Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.24% |
60 / 63 |
|
66.67% |
4 / 6 |
CRAP | |
0.00% |
0 / 1 |
Kmeans | |
95.24% |
60 / 63 |
|
66.67% |
4 / 6 |
28 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
cluster | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
getCentroids | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
generateClusters | |
92.59% |
25 / 27 |
|
0.00% |
0 / 1 |
14.08 | |||
nearestClusterCenter | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
kpp | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
6 |
1 | <?php |
2 | /** |
3 | * Jingga |
4 | * |
5 | * PHP Version 8.1 |
6 | * |
7 | * @package phpOMS\Algorithm\Clustering |
8 | * @copyright Dennis Eichhorn |
9 | * @license OMS License 2.0 |
10 | * @version 1.0.0 |
11 | * @link https://jingga.app |
12 | */ |
13 | declare(strict_types=1); |
14 | |
15 | namespace phpOMS\Algorithm\Clustering; |
16 | |
17 | use phpOMS\Math\Topology\MetricsND; |
18 | |
19 | /** |
20 | * Clustering points |
21 | * |
22 | * @package phpOMS\Algorithm\Clustering |
23 | * @license OMS License 2.0 |
24 | * @link https://jingga.app |
25 | * @see ./clustering_overview.png |
26 | * @since 1.0.0 |
27 | */ |
28 | final class Kmeans |
29 | { |
30 | /** |
31 | * Epsilon for float comparison. |
32 | * |
33 | * @var float |
34 | * @since 1.0.0 |
35 | */ |
36 | public const EPSILON = 4.88e-04; |
37 | |
38 | /** |
39 | * Metric to calculate the distance between two points |
40 | * |
41 | * @var \Closure |
42 | * @since 1.0.0 |
43 | */ |
44 | private \Closure $metric; |
45 | |
46 | /** |
47 | * Points of the cluster centers |
48 | * |
49 | * @var PointInterface[] |
50 | * @since 1.0.0 |
51 | */ |
52 | private $clusterCenters = []; |
53 | |
54 | /** |
55 | * Constructor |
56 | * |
57 | * @param null|\Closure $metric metric to use for the distance between two points |
58 | * |
59 | * @since 1.0.0 |
60 | */ |
61 | public function __construct(\Closure $metric = null) |
62 | { |
63 | $this->metric = $metric ?? function (PointInterface $a, PointInterface $b) { |
64 | $aCoordinates = $a->coordinates; |
65 | $bCoordinates = $b->coordinates; |
66 | |
67 | return MetricsND::euclidean($aCoordinates, $bCoordinates); |
68 | }; |
69 | |
70 | //$this->generateClusters($points, $clusters); |
71 | } |
72 | |
73 | /** |
74 | * Find the cluster for a point |
75 | * |
76 | * @param PointInterface $point Point to find the cluster for |
77 | * |
78 | * @return null|PointInterface Cluster center point |
79 | * |
80 | * @since 1.0.0 |
81 | */ |
82 | public function cluster(PointInterface $point) : ?PointInterface |
83 | { |
84 | $bestCluster = null; |
85 | $bestDistance = \PHP_FLOAT_MAX; |
86 | |
87 | foreach ($this->clusterCenters as $center) { |
88 | if (($distance = ($this->metric)($center, $point)) < $bestDistance) { |
89 | $bestCluster = $center; |
90 | $bestDistance = $distance; |
91 | } |
92 | } |
93 | |
94 | return $bestCluster; |
95 | } |
96 | |
97 | /** |
98 | * Get cluster centroids |
99 | * |
100 | * @return array |
101 | * |
102 | * @since 1.0.0 |
103 | */ |
104 | public function getCentroids() : array |
105 | { |
106 | return $this->clusterCenters; |
107 | } |
108 | |
109 | /** |
110 | * Generate the clusters of the points |
111 | * |
112 | * @param PointInterface[] $points Points to cluster |
113 | * @param int<0, max> $clusters Amount of clusters |
114 | * |
115 | * @return void |
116 | * |
117 | * @since 1.0.0 |
118 | */ |
119 | public function generateClusters(array $points, int $clusters) : void |
120 | { |
121 | $n = \count($points); |
122 | $clusterCenters = $this->kpp($points, $clusters); |
123 | $coordinates = \count($points[0]->coordinates); |
124 | |
125 | while (true) { |
126 | foreach ($clusterCenters as $center) { |
127 | for ($i = 0; $i < $coordinates; ++$i) { |
128 | $center->setCoordinate($i, 0); |
129 | } |
130 | } |
131 | |
132 | foreach ($points as $point) { |
133 | $clusterPoint = $clusterCenters[$point->group]; |
134 | |
135 | ++$clusterPoint->group; |
136 | for ($i = 0; $i < $coordinates; ++$i) { |
137 | $clusterPoint->setCoordinate($i, $clusterPoint->getCoordinate($i) + $point->getCoordinate($i)); |
138 | } |
139 | } |
140 | |
141 | foreach ($clusterCenters as $center) { |
142 | for ($i = 0; $i < $coordinates; ++$i) { |
143 | // @todo Invalid center coodinate value in like 5 % of the runs |
144 | $center->setCoordinate($i, $center->getCoordinate($i) / ($center->group === 0 ? 1 : $center->group)); |
145 | } |
146 | } |
147 | |
148 | $changed = 0; |
149 | foreach ($points as $point) { |
150 | $min = $this->nearestClusterCenter($point, $clusterCenters)[0]; |
151 | |
152 | if ($min !== $point->group) { |
153 | ++$changed; |
154 | $point->group = $min; |
155 | } |
156 | } |
157 | |
158 | if ($changed <= $n * self::EPSILON || $n * self::EPSILON < 2) { |
159 | break; |
160 | } |
161 | } |
162 | |
163 | foreach ($clusterCenters as $key => $center) { |
164 | $center->group = $key; |
165 | $center->name = (string) $key; |
166 | } |
167 | |
168 | $this->clusterCenters = $clusterCenters; |
169 | } |
170 | |
171 | /** |
172 | * Get the index and distance to the nearest cluster center |
173 | * |
174 | * @param PointInterface $point Point to get the cluster for |
175 | * @param PointInterface[] $clusterCenters All cluster centers |
176 | * |
177 | * @return array [index, distance] |
178 | * |
179 | * @since 1.0.0 |
180 | */ |
181 | private function nearestClusterCenter(PointInterface $point, array $clusterCenters) : array |
182 | { |
183 | $index = $point->group; |
184 | $dist = \PHP_FLOAT_MAX; |
185 | |
186 | foreach ($clusterCenters as $key => $cPoint) { |
187 | $d = ($this->metric)($cPoint, $point); |
188 | |
189 | if ($dist > $d) { |
190 | $dist = $d; |
191 | $index = $key; |
192 | } |
193 | } |
194 | |
195 | return [$index, $dist]; |
196 | } |
197 | |
198 | /** |
199 | * Initializae cluster centers |
200 | * |
201 | * @param PointInterface[] $points Points to use for the cluster center initialization |
202 | * @param int<0, max> $n Amount of clusters to use |
203 | * |
204 | * @return PointInterface[] |
205 | * |
206 | * @since 1.0.0 |
207 | */ |
208 | private function kpp(array $points, int $n) : array |
209 | { |
210 | $clusters = [clone $points[\mt_rand(0, \count($points) - 1)]]; |
211 | $d = \array_fill(0, $n, 0.0); |
212 | |
213 | for ($i = 1; $i < $n; ++$i) { |
214 | $sum = 0; |
215 | |
216 | foreach ($points as $key => $point) { |
217 | $d[$key] = $this->nearestClusterCenter($point, \array_slice($clusters, 0, 5))[1]; |
218 | $sum += $d[$key]; |
219 | } |
220 | |
221 | $sum *= \mt_rand(0, \mt_getrandmax()) / \mt_getrandmax(); |
222 | |
223 | foreach ($d as $key => $di) { |
224 | $sum -= $di; |
225 | |
226 | if ($sum <= 0) { |
227 | $clusters[$i] = clone $points[$key]; |
228 | } |
229 | } |
230 | } |
231 | |
232 | foreach ($points as $point) { |
233 | $point->group = ($this->nearestClusterCenter($point, $clusters)[0]); |
234 | } |
235 | |
236 | return $clusters; |
237 | } |
238 | } |