Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 59 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
DBSCAN | |
0.00% |
0 / 59 |
|
0.00% |
0 / 6 |
650 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
expandCluster | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
42 | |||
findNeighbors | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
generateDistanceMatrix | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
cluster | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
42 | |||
generateClusters | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | /** |
3 | * Jingga |
4 | * |
5 | * PHP Version 8.1 |
6 | * |
7 | * @package phpOMS\Algorithm\Clustering |
8 | * @copyright Dennis Eichhorn |
9 | * @license OMS License 2.0 |
10 | * @version 1.0.0 |
11 | * @link https://jingga.app |
12 | */ |
13 | declare(strict_types=1); |
14 | |
15 | namespace phpOMS\Algorithm\Clustering; |
16 | |
17 | use phpOMS\Math\Geometry\ConvexHull\MonotoneChain; |
18 | use phpOMS\Math\Geometry\Shape\D2\Polygon; |
19 | use phpOMS\Math\Topology\MetricsND; |
20 | |
21 | /** |
22 | * Clustering points |
23 | * |
24 | * @package phpOMS\Algorithm\Clustering |
25 | * @license OMS License 2.0 |
26 | * @link https://jingga.app |
27 | * @see ./clustering_overview.png |
28 | * @since 1.0.0 |
29 | * |
30 | * @todo Expand to n dimensions |
31 | */ |
32 | final class DBSCAN |
33 | { |
34 | /** |
35 | * Epsilon for float comparison. |
36 | * |
37 | * @var float |
38 | * @since 1.0.0 |
39 | */ |
40 | public const EPSILON = 4.88e-04; |
41 | |
42 | /** |
43 | * Metric to calculate the distance between two points |
44 | * |
45 | * @var \Closure |
46 | * @since 1.0.0 |
47 | */ |
48 | private \Closure $metric; |
49 | |
50 | /** |
51 | * Points outside of any cluster |
52 | * |
53 | * @var PointInterface[] |
54 | * @since 1.0.0 |
55 | */ |
56 | private array $noisePoints = []; |
57 | |
58 | /** |
59 | * All points |
60 | * |
61 | * @var PointInterface[] |
62 | * @since 1.0.0 |
63 | */ |
64 | private array $points = []; |
65 | |
66 | /** |
67 | * Clusters |
68 | * |
69 | * Array of points assigned to a cluster |
70 | * |
71 | * @var array<int, array> |
72 | * @since 1.0.0 |
73 | */ |
74 | private array $clusters = []; |
75 | |
76 | /** |
77 | * Convex hull of all clusters |
78 | * |
79 | * @var array<array> |
80 | * @since 1.0.0 |
81 | */ |
82 | private array $convexHulls = []; |
83 | |
84 | /** |
85 | * Cluster points |
86 | * |
87 | * Points in clusters (helper to avoid looping the cluster array) |
88 | * |
89 | * @var array |
90 | * @since 1.0.0 |
91 | */ |
92 | private array $clusteredPoints = []; |
93 | |
94 | /** |
95 | * Distance matrix |
96 | * |
97 | * Distances between points |
98 | * |
99 | * @var array<float[]> |
100 | * @since 1.0.0 |
101 | */ |
102 | private array $distanceMatrix = []; |
103 | |
104 | /** |
105 | * Constructor |
106 | * |
107 | * @param null|\Closure $metric metric to use for the distance between two points |
108 | * |
109 | * @since 1.0.0 |
110 | */ |
111 | public function __construct(\Closure $metric = null) |
112 | { |
113 | $this->metric = $metric ?? function (PointInterface $a, PointInterface $b) { |
114 | $aCoordinates = $a->coordinates; |
115 | $bCoordinates = $b->coordinates; |
116 | |
117 | return MetricsND::euclidean($aCoordinates, $bCoordinates); |
118 | }; |
119 | } |
120 | |
121 | /** |
122 | * Expand cluster with additional point and potential neighbors. |
123 | * |
124 | * @param PointInterface $point Point to add to a cluster |
125 | * @param array $neighbors Neighbors of point |
126 | * @param int $c Cluster id |
127 | * @param float $epsilon Max distance |
128 | * @param int $minPoints Min amount of points required for a cluster |
129 | * |
130 | * @return void |
131 | * |
132 | * @since 1.0.0 |
133 | */ |
134 | private function expandCluster( |
135 | PointInterface $point, |
136 | array $neighbors, |
137 | int $c, |
138 | float $epsilon, |
139 | int $minPoints |
140 | ) : void |
141 | { |
142 | $this->clusters[$c][] = $point; |
143 | $this->clusteredPoints[] = $point; |
144 | $nPoint = \reset($neighbors); |
145 | |
146 | while ($nPoint) { |
147 | $neighbors2 = $this->findNeighbors($nPoint, $epsilon); |
148 | |
149 | if (\count($neighbors2) >= $minPoints) { |
150 | foreach ($neighbors2 as $nPoint2) { |
151 | if (!isset($neighbors[$nPoint2->name])) { |
152 | $neighbors[$nPoint2->name] = $nPoint2; |
153 | } |
154 | } |
155 | } |
156 | |
157 | if (!\in_array($nPoint->name, $this->clusteredPoints)) { |
158 | $this->clusters[$c][] = $nPoint; |
159 | $this->clusteredPoints[] = $nPoint; |
160 | } |
161 | |
162 | $nPoint = \next($neighbors); |
163 | } |
164 | } |
165 | |
166 | /** |
167 | * Find neighbors of a point |
168 | * |
169 | * @param PointInterface $point Base point for potential neighbors |
170 | * @param float $epsilon Max distance to neighbor |
171 | * |
172 | * @return array |
173 | * |
174 | * @since 1.0.0 |
175 | */ |
176 | private function findNeighbors(PointInterface $point, float $epsilon) : array |
177 | { |
178 | $neighbors = []; |
179 | foreach ($this->points as $point2) { |
180 | if ($point->isEquals($point2)) { |
181 | $distance = isset($this->distanceMatrix[$point->name]) |
182 | ? $this->distanceMatrix[$point->name][$point2->name] |
183 | : $this->distanceMatrix[$point2->name][$point->name]; |
184 | |
185 | if ($distance < $epsilon) { |
186 | $neighbors[$point2->name] = $point2; |
187 | } |
188 | } |
189 | } |
190 | |
191 | return $neighbors; |
192 | } |
193 | |
194 | /** |
195 | * Generate distances between points |
196 | * |
197 | * @param array $points Array of all points |
198 | * |
199 | * @return float[] |
200 | * |
201 | * @since 1.0.0 |
202 | */ |
203 | private function generateDistanceMatrix(array $points) : array |
204 | { |
205 | $distances = []; |
206 | foreach ($points as $point) { |
207 | $distances[$point->name] = []; |
208 | foreach ($points as $point2) { |
209 | $distances[$point->name][$point2->name] = ($this->metric)($point, $point2); |
210 | } |
211 | } |
212 | |
213 | /** @var float[] $distances */ |
214 | return $distances; |
215 | } |
216 | |
217 | /** |
218 | * Find the cluster for a point |
219 | * |
220 | * @param PointInterface $point Point to find the cluster for |
221 | * |
222 | * @return int Cluster id |
223 | * |
224 | * @since 1.0.0 |
225 | */ |
226 | public function cluster(PointInterface $point) : int |
227 | { |
228 | if ($this->convexHulls === []) { |
229 | foreach ($this->clusters as $c => $cluster) { |
230 | $points = []; |
231 | foreach ($cluster as $p) { |
232 | $points[] = $p->coordinates; |
233 | } |
234 | |
235 | // @todo: this is only good for 2D. Fix this for ND. |
236 | $this->convexHulls[$c] = MonotoneChain::createConvexHull($points); |
237 | } |
238 | } |
239 | |
240 | foreach ($this->convexHulls as $c => $hull) { |
241 | if (Polygon::isPointInPolygon($point->coordinates, $hull) <= 0) { |
242 | return $c; |
243 | } |
244 | } |
245 | |
246 | return -1; |
247 | } |
248 | |
249 | /** |
250 | * Generate the clusters of the points |
251 | * |
252 | * @param PointInterface[] $points Points to cluster |
253 | * @param float $epsilon Max distance |
254 | * @param int $minPoints Min amount of points required for a cluster |
255 | * |
256 | * @return void |
257 | * |
258 | * @since 1.0.0 |
259 | */ |
260 | public function generateClusters(array $points, float $epsilon, int $minPoints) : void |
261 | { |
262 | $this->noisePoints = []; |
263 | $this->clusters = []; |
264 | $this->clusteredPoints = []; |
265 | $this->points = $points; |
266 | $this->convexHulls = []; |
267 | |
268 | $this->distanceMatrix = $this->generateDistanceMatrix($points); |
269 | |
270 | $c = 0; |
271 | $this->clusters[$c] = []; |
272 | |
273 | foreach ($this->points as $point) { |
274 | $neighbors = $this->findNeighbors($point, $epsilon); |
275 | |
276 | if (\count($neighbors) < $minPoints) { |
277 | $this->noisePoints[] = $point->name; |
278 | } elseif (!\in_array($point->name, $this->clusteredPoints)) { |
279 | $this->expandCluster($point, $neighbors, $c, $epsilon, $minPoints); |
280 | ++$c; |
281 | $this->clusters[$c] = []; |
282 | } |
283 | } |
284 | } |
285 | } |