Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 70
0.00% covered (danger)
0.00%
0 / 7
CRAP
0.00% covered (danger)
0.00%
0 / 1
MeanShift
0.00% covered (danger)
0.00%
0 / 70
0.00% covered (danger)
0.00%
0 / 7
506
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
2
 generateClusters
0.00% covered (danger)
0.00%
0 / 20
0.00% covered (danger)
0.00%
0 / 1
42
 shiftPoint
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
30
 groupPoints
0.00% covered (danger)
0.00%
0 / 12
0.00% covered (danger)
0.00%
0 / 1
12
 findNearestGroup
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
12
 distanceToGroup
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
12
 cluster
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2/**
3 * Jingga
4 *
5 * PHP Version 8.1
6 *
7 * @package   phpOMS\Algorithm\Clustering
8 * @copyright Dennis Eichhorn
9 * @license   OMS License 2.0
10 * @version   1.0.0
11 * @link      https://jingga.app
12 */
13declare(strict_types=1);
14
15namespace phpOMS\Algorithm\Clustering;
16
17use phpOMS\Math\Topology\KernelsND;
18use phpOMS\Math\Topology\MetricsND;
19
20/**
21 * Clustering points
22 *
23 * @package phpOMS\Algorithm\Clustering
24 * @license OMS License 2.0
25 * @link    https://jingga.app
26 * @see     ./clustering_overview.png
27 * @since   1.0.0
28 */
29final class MeanShift
30{
31    /**
32     * Min distance for clustering
33     *
34     * As long as a point is further away as the min distance the shifting is performed
35     *
36     * @var float
37     * @since 1.0.0
38     */
39    public const MIN_DISTANCE = 0.001;
40
41    /**
42     * Kernel function
43     *
44     * @var \Closure
45     * @since 1.0.0
46     */
47    private \Closure $kernel;
48
49    /**
50     * Metric function
51     *
52     * @var \Closure
53     * @since 1.0.0
54     */
55    private \Closure $metric;
56
57    private array $points;
58
59    /**
60     * Points outside of any cluster
61     *
62     * @var PointInterface[]
63     * @since 1.0.0
64     */
65    private array $noisePoints = [];
66
67    /**
68     * Cluster points
69     *
70     * Points in clusters (helper to avoid looping the cluster array)
71     *
72     * @var array
73     * @since 1.0.0
74     */
75    private array $clusters = [];
76
77    /**
78     * Points of the cluster centers
79     *
80     * @var PointInterface[]
81     * @since 1.0.0
82     */
83    private $clusterCenters = [];
84
85    /**
86     * Max distance to cluster to be still considered part of cluster
87     *
88     * @var float
89     * @since 1.0.0
90     */
91    public float $groupDistanceTolerance = 0.1;
92
93    /**
94     * Constructor
95     *
96     * Both the metric and kernel function need to be of the same dimension.
97     *
98     * @param null|\Closure $metric Metric to use for the distance between two points
99     * @param null|\Closure $kernel Kernel
100     *
101     * @since 1.0.0
102     */
103    public function __construct(\Closure $metric = null, \Closure $kernel = null)
104    {
105        $this->metric = $metric ?? function (PointInterface $a, PointInterface $b) {
106            $aCoordinates = $a->coordinates;
107            $bCoordinates = $b->coordinates;
108
109            return MetricsND::euclidean($aCoordinates, $bCoordinates);
110        };
111
112        $this->kernel = $kernel ?? function (array $distances, array $bandwidths) {
113            return KernelsND::gaussianKernel($distances, $bandwidths);
114        };
115    }
116
117    /**
118     * Generate the clusters of the points
119     *
120     * @param PointInterface[] $points    Points to cluster
121     * @param array<int|float> $bandwidth Bandwidth(s)
122     *
123     * @return void
124     *
125     * @since 1.0.0
126     */
127    public function generateClusters(array $points, array $bandwidth) : void
128    {
129        $shiftPoints = $points;
130        $maxMinDist  = 1;
131
132        $stillShifting = \array_fill(0, \count($points), true);
133
134        $pointLength = \count($shiftPoints);
135
136        while ($maxMinDist > self::MIN_DISTANCE) {
137            $maxMinDist = 0;
138
139            for ($i = 0; $i < $pointLength; ++$i) {
140                if (!$stillShifting[$i]) {
141                    continue;
142                }
143
144                $pNew      = $shiftPoints[$i];
145                $pNewStart = $pNew;
146                $pNew      = $this->shiftPoint($pNew, $points, $bandwidth);
147                $dist      = ($this->metric)($pNew, $pNewStart);
148
149                if ($dist > $maxMinDist) {
150                    $maxMinDist = $dist;
151                }
152
153                if ($dist < self::MIN_DISTANCE) {
154                    $stillShifting[$i] = false;
155                }
156
157                $shiftPoints[$i] = $pNew;
158            }
159        }
160
161        // @todo create an array of noisePoints like in the DBSCAN. That array can be empty or not depending on the bandwidth defined
162
163        $this->clusters       = $this->groupPoints($shiftPoints);
164        $this->clusterCenters = $shiftPoints;
165    }
166
167    /**
168     * Perform shift on a point
169     *
170     * @param PointInterface   $point     Point to shift
171     * @param PointInterface   $points    Array of all points
172     * @param array<int|float> $bandwidth Bandwidth(s)
173     *
174     * @return PointInterface
175     *
176     * @since 1.0.0
177     */
178    private function shiftPoint(PointInterface $point, array $points, array $bandwidth) : PointInterface
179    {
180        $scaleFactor = 0.0;
181
182        $shifted = clone $point;
183
184        foreach ($points as $pTemp) {
185            $dist   = ($this->metric)($point, $pTemp);
186            $weight = ($this->kernel)($dist, $bandwidth);
187
188            foreach ($point->coordinates as $idx => $_) {
189                if (!isset($shifted->coordinates[$idx])) {
190                    $shifted->coordinates[$idx] = 0;
191                }
192
193                $shifted->coordinates[$idx] += $pTemp->coordinates[$idx] * $weight;
194            }
195
196            $scaleFactor += $weight;
197        }
198
199        foreach ($shifted->coordinates as $idx => $_) {
200            $shifted->coordinates[$idx] /= $scaleFactor;
201        }
202
203        return $shifted;
204    }
205
206    /**
207     * Group points together into clusters
208     *
209     * @param PointInterface[] $points Array of points to assign to groups
210     *
211     * @return array
212     *
213     * @since 1.0.0
214     */
215    private function groupPoints(array $points) : array
216    {
217        $groupAssignment = [];
218        $groups          = [];
219        $groupIndex      = 0;
220
221        foreach ($points as $point) {
222            $nearestGroupIndex = $this->findNearestGroup($point, $groups);
223
224            if ($nearestGroupIndex === -1) {
225                // create new group
226                $groups[]          = [$point];
227                $groupAssignment[] = $groupIndex;
228
229                ++$groupIndex;
230            } else {
231                $groupAssignment[]            = $nearestGroupIndex;
232                $groups[$nearestGroupIndex][] = $point;
233            }
234        }
235
236        return $groupAssignment;
237    }
238
239    /**
240     * Find the closest cluster/group of a point
241     *
242     * @param PointInterface          $point  Point to find the cluster for
243     * @param array<PointInterface[]> $groups Clusters
244     *
245     * @return int
246     *
247     * @since 1.0.0
248     */
249    private function findNearestGroup(PointInterface $point, array $groups) : int
250    {
251        $nearestGroupIndex = -1;
252        $index             = 0;
253
254        foreach ($groups as $group) {
255            $distanceToGroup = $this->distanceToGroup($point, $group);
256
257            if ($distanceToGroup < $this->groupDistanceTolerance) {
258                $nearestGroupIndex = $index;
259
260                break;
261            }
262
263            ++$index;
264        }
265
266        return $nearestGroupIndex;
267    }
268
269    /**
270     * Find distance of point to best cluster/group
271     *
272     * @param PointInterface   $point Point to find the cluster for
273     * @param PointInterface[] $group Clusters
274     *
275     * @return float Distance
276     *
277     * @since 1.0.0
278     */
279    private function distanceToGroup(PointInterface $point, array $group) : float
280    {
281        $minDistance = \PHP_FLOAT_MAX;
282
283        foreach ($group as $pt) {
284            $dist = ($this->metric)($point, $pt);
285
286            if ($dist < $minDistance) {
287                $minDistance = $dist;
288            }
289        }
290
291        return $minDistance;
292    }
293
294    /**
295     * Find the cluster for a point
296     *
297     * @param PointInterface $point Point to find the cluster for
298     *
299     * @return null|PointInterface Cluster center point
300     *
301     * @since 1.0.0
302     */
303    public function cluster(PointInterface $point) : ?PointInterface
304    {
305        $clusterId = $this->findNearestGroup($point, $this->clusters);
306
307        return $this->clusterCenters[$clusterId] ?? null;
308    }
309}