Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 70 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
MeanShift | |
0.00% |
0 / 70 |
|
0.00% |
0 / 7 |
506 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
generateClusters | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
42 | |||
shiftPoint | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
30 | |||
groupPoints | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
findNearestGroup | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
distanceToGroup | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
cluster | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * Jingga |
4 | * |
5 | * PHP Version 8.1 |
6 | * |
7 | * @package phpOMS\Algorithm\Clustering |
8 | * @copyright Dennis Eichhorn |
9 | * @license OMS License 2.0 |
10 | * @version 1.0.0 |
11 | * @link https://jingga.app |
12 | */ |
13 | declare(strict_types=1); |
14 | |
15 | namespace phpOMS\Algorithm\Clustering; |
16 | |
17 | use phpOMS\Math\Topology\KernelsND; |
18 | use phpOMS\Math\Topology\MetricsND; |
19 | |
20 | /** |
21 | * Clustering points |
22 | * |
23 | * @package phpOMS\Algorithm\Clustering |
24 | * @license OMS License 2.0 |
25 | * @link https://jingga.app |
26 | * @see ./clustering_overview.png |
27 | * @since 1.0.0 |
28 | */ |
29 | final class MeanShift |
30 | { |
31 | /** |
32 | * Min distance for clustering |
33 | * |
34 | * As long as a point is further away as the min distance the shifting is performed |
35 | * |
36 | * @var float |
37 | * @since 1.0.0 |
38 | */ |
39 | public const MIN_DISTANCE = 0.001; |
40 | |
41 | /** |
42 | * Kernel function |
43 | * |
44 | * @var \Closure |
45 | * @since 1.0.0 |
46 | */ |
47 | private \Closure $kernel; |
48 | |
49 | /** |
50 | * Metric function |
51 | * |
52 | * @var \Closure |
53 | * @since 1.0.0 |
54 | */ |
55 | private \Closure $metric; |
56 | |
57 | private array $points; |
58 | |
59 | /** |
60 | * Points outside of any cluster |
61 | * |
62 | * @var PointInterface[] |
63 | * @since 1.0.0 |
64 | */ |
65 | private array $noisePoints = []; |
66 | |
67 | /** |
68 | * Cluster points |
69 | * |
70 | * Points in clusters (helper to avoid looping the cluster array) |
71 | * |
72 | * @var array |
73 | * @since 1.0.0 |
74 | */ |
75 | private array $clusters = []; |
76 | |
77 | /** |
78 | * Points of the cluster centers |
79 | * |
80 | * @var PointInterface[] |
81 | * @since 1.0.0 |
82 | */ |
83 | private $clusterCenters = []; |
84 | |
85 | /** |
86 | * Max distance to cluster to be still considered part of cluster |
87 | * |
88 | * @var float |
89 | * @since 1.0.0 |
90 | */ |
91 | public float $groupDistanceTolerance = 0.1; |
92 | |
93 | /** |
94 | * Constructor |
95 | * |
96 | * Both the metric and kernel function need to be of the same dimension. |
97 | * |
98 | * @param null|\Closure $metric Metric to use for the distance between two points |
99 | * @param null|\Closure $kernel Kernel |
100 | * |
101 | * @since 1.0.0 |
102 | */ |
103 | public function __construct(\Closure $metric = null, \Closure $kernel = null) |
104 | { |
105 | $this->metric = $metric ?? function (PointInterface $a, PointInterface $b) { |
106 | $aCoordinates = $a->coordinates; |
107 | $bCoordinates = $b->coordinates; |
108 | |
109 | return MetricsND::euclidean($aCoordinates, $bCoordinates); |
110 | }; |
111 | |
112 | $this->kernel = $kernel ?? function (array $distances, array $bandwidths) { |
113 | return KernelsND::gaussianKernel($distances, $bandwidths); |
114 | }; |
115 | } |
116 | |
117 | /** |
118 | * Generate the clusters of the points |
119 | * |
120 | * @param PointInterface[] $points Points to cluster |
121 | * @param array<int|float> $bandwidth Bandwidth(s) |
122 | * |
123 | * @return void |
124 | * |
125 | * @since 1.0.0 |
126 | */ |
127 | public function generateClusters(array $points, array $bandwidth) : void |
128 | { |
129 | $shiftPoints = $points; |
130 | $maxMinDist = 1; |
131 | |
132 | $stillShifting = \array_fill(0, \count($points), true); |
133 | |
134 | $pointLength = \count($shiftPoints); |
135 | |
136 | while ($maxMinDist > self::MIN_DISTANCE) { |
137 | $maxMinDist = 0; |
138 | |
139 | for ($i = 0; $i < $pointLength; ++$i) { |
140 | if (!$stillShifting[$i]) { |
141 | continue; |
142 | } |
143 | |
144 | $pNew = $shiftPoints[$i]; |
145 | $pNewStart = $pNew; |
146 | $pNew = $this->shiftPoint($pNew, $points, $bandwidth); |
147 | $dist = ($this->metric)($pNew, $pNewStart); |
148 | |
149 | if ($dist > $maxMinDist) { |
150 | $maxMinDist = $dist; |
151 | } |
152 | |
153 | if ($dist < self::MIN_DISTANCE) { |
154 | $stillShifting[$i] = false; |
155 | } |
156 | |
157 | $shiftPoints[$i] = $pNew; |
158 | } |
159 | } |
160 | |
161 | // @todo create an array of noisePoints like in the DBSCAN. That array can be empty or not depending on the bandwidth defined |
162 | |
163 | $this->clusters = $this->groupPoints($shiftPoints); |
164 | $this->clusterCenters = $shiftPoints; |
165 | } |
166 | |
167 | /** |
168 | * Perform shift on a point |
169 | * |
170 | * @param PointInterface $point Point to shift |
171 | * @param PointInterface $points Array of all points |
172 | * @param array<int|float> $bandwidth Bandwidth(s) |
173 | * |
174 | * @return PointInterface |
175 | * |
176 | * @since 1.0.0 |
177 | */ |
178 | private function shiftPoint(PointInterface $point, array $points, array $bandwidth) : PointInterface |
179 | { |
180 | $scaleFactor = 0.0; |
181 | |
182 | $shifted = clone $point; |
183 | |
184 | foreach ($points as $pTemp) { |
185 | $dist = ($this->metric)($point, $pTemp); |
186 | $weight = ($this->kernel)($dist, $bandwidth); |
187 | |
188 | foreach ($point->coordinates as $idx => $_) { |
189 | if (!isset($shifted->coordinates[$idx])) { |
190 | $shifted->coordinates[$idx] = 0; |
191 | } |
192 | |
193 | $shifted->coordinates[$idx] += $pTemp->coordinates[$idx] * $weight; |
194 | } |
195 | |
196 | $scaleFactor += $weight; |
197 | } |
198 | |
199 | foreach ($shifted->coordinates as $idx => $_) { |
200 | $shifted->coordinates[$idx] /= $scaleFactor; |
201 | } |
202 | |
203 | return $shifted; |
204 | } |
205 | |
206 | /** |
207 | * Group points together into clusters |
208 | * |
209 | * @param PointInterface[] $points Array of points to assign to groups |
210 | * |
211 | * @return array |
212 | * |
213 | * @since 1.0.0 |
214 | */ |
215 | private function groupPoints(array $points) : array |
216 | { |
217 | $groupAssignment = []; |
218 | $groups = []; |
219 | $groupIndex = 0; |
220 | |
221 | foreach ($points as $point) { |
222 | $nearestGroupIndex = $this->findNearestGroup($point, $groups); |
223 | |
224 | if ($nearestGroupIndex === -1) { |
225 | // create new group |
226 | $groups[] = [$point]; |
227 | $groupAssignment[] = $groupIndex; |
228 | |
229 | ++$groupIndex; |
230 | } else { |
231 | $groupAssignment[] = $nearestGroupIndex; |
232 | $groups[$nearestGroupIndex][] = $point; |
233 | } |
234 | } |
235 | |
236 | return $groupAssignment; |
237 | } |
238 | |
239 | /** |
240 | * Find the closest cluster/group of a point |
241 | * |
242 | * @param PointInterface $point Point to find the cluster for |
243 | * @param array<PointInterface[]> $groups Clusters |
244 | * |
245 | * @return int |
246 | * |
247 | * @since 1.0.0 |
248 | */ |
249 | private function findNearestGroup(PointInterface $point, array $groups) : int |
250 | { |
251 | $nearestGroupIndex = -1; |
252 | $index = 0; |
253 | |
254 | foreach ($groups as $group) { |
255 | $distanceToGroup = $this->distanceToGroup($point, $group); |
256 | |
257 | if ($distanceToGroup < $this->groupDistanceTolerance) { |
258 | $nearestGroupIndex = $index; |
259 | |
260 | break; |
261 | } |
262 | |
263 | ++$index; |
264 | } |
265 | |
266 | return $nearestGroupIndex; |
267 | } |
268 | |
269 | /** |
270 | * Find distance of point to best cluster/group |
271 | * |
272 | * @param PointInterface $point Point to find the cluster for |
273 | * @param PointInterface[] $group Clusters |
274 | * |
275 | * @return float Distance |
276 | * |
277 | * @since 1.0.0 |
278 | */ |
279 | private function distanceToGroup(PointInterface $point, array $group) : float |
280 | { |
281 | $minDistance = \PHP_FLOAT_MAX; |
282 | |
283 | foreach ($group as $pt) { |
284 | $dist = ($this->metric)($point, $pt); |
285 | |
286 | if ($dist < $minDistance) { |
287 | $minDistance = $dist; |
288 | } |
289 | } |
290 | |
291 | return $minDistance; |
292 | } |
293 | |
294 | /** |
295 | * Find the cluster for a point |
296 | * |
297 | * @param PointInterface $point Point to find the cluster for |
298 | * |
299 | * @return null|PointInterface Cluster center point |
300 | * |
301 | * @since 1.0.0 |
302 | */ |
303 | public function cluster(PointInterface $point) : ?PointInterface |
304 | { |
305 | $clusterId = $this->findNearestGroup($point, $this->clusters); |
306 | |
307 | return $this->clusterCenters[$clusterId] ?? null; |
308 | } |
309 | } |