Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 59 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
| DBSCAN | |
0.00% |
0 / 59 |
|
0.00% |
0 / 6 |
650 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
| expandCluster | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
42 | |||
| findNeighbors | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
| generateDistanceMatrix | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| cluster | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
42 | |||
| generateClusters | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Jingga |
| 4 | * |
| 5 | * PHP Version 8.1 |
| 6 | * |
| 7 | * @package phpOMS\Algorithm\Clustering |
| 8 | * @copyright Dennis Eichhorn |
| 9 | * @license OMS License 2.0 |
| 10 | * @version 1.0.0 |
| 11 | * @link https://jingga.app |
| 12 | */ |
| 13 | declare(strict_types=1); |
| 14 | |
| 15 | namespace phpOMS\Algorithm\Clustering; |
| 16 | |
| 17 | use phpOMS\Math\Geometry\ConvexHull\MonotoneChain; |
| 18 | use phpOMS\Math\Geometry\Shape\D2\Polygon; |
| 19 | use phpOMS\Math\Topology\MetricsND; |
| 20 | |
| 21 | /** |
| 22 | * Clustering points |
| 23 | * |
| 24 | * @package phpOMS\Algorithm\Clustering |
| 25 | * @license OMS License 2.0 |
| 26 | * @link https://jingga.app |
| 27 | * @see ./clustering_overview.png |
| 28 | * @since 1.0.0 |
| 29 | * |
| 30 | * @todo Expand to n dimensions |
| 31 | */ |
| 32 | final class DBSCAN |
| 33 | { |
| 34 | /** |
| 35 | * Epsilon for float comparison. |
| 36 | * |
| 37 | * @var float |
| 38 | * @since 1.0.0 |
| 39 | */ |
| 40 | public const EPSILON = 4.88e-04; |
| 41 | |
| 42 | /** |
| 43 | * Metric to calculate the distance between two points |
| 44 | * |
| 45 | * @var \Closure |
| 46 | * @since 1.0.0 |
| 47 | */ |
| 48 | private \Closure $metric; |
| 49 | |
| 50 | /** |
| 51 | * Points outside of any cluster |
| 52 | * |
| 53 | * @var PointInterface[] |
| 54 | * @since 1.0.0 |
| 55 | */ |
| 56 | private array $noisePoints = []; |
| 57 | |
| 58 | /** |
| 59 | * All points |
| 60 | * |
| 61 | * @var PointInterface[] |
| 62 | * @since 1.0.0 |
| 63 | */ |
| 64 | private array $points = []; |
| 65 | |
| 66 | /** |
| 67 | * Clusters |
| 68 | * |
| 69 | * Array of points assigned to a cluster |
| 70 | * |
| 71 | * @var array<int, array> |
| 72 | * @since 1.0.0 |
| 73 | */ |
| 74 | private array $clusters = []; |
| 75 | |
| 76 | /** |
| 77 | * Convex hull of all clusters |
| 78 | * |
| 79 | * @var array<array> |
| 80 | * @since 1.0.0 |
| 81 | */ |
| 82 | private array $convexHulls = []; |
| 83 | |
| 84 | /** |
| 85 | * Cluster points |
| 86 | * |
| 87 | * Points in clusters (helper to avoid looping the cluster array) |
| 88 | * |
| 89 | * @var array |
| 90 | * @since 1.0.0 |
| 91 | */ |
| 92 | private array $clusteredPoints = []; |
| 93 | |
| 94 | /** |
| 95 | * Distance matrix |
| 96 | * |
| 97 | * Distances between points |
| 98 | * |
| 99 | * @var array<float[]> |
| 100 | * @since 1.0.0 |
| 101 | */ |
| 102 | private array $distanceMatrix = []; |
| 103 | |
| 104 | /** |
| 105 | * Constructor |
| 106 | * |
| 107 | * @param null|\Closure $metric metric to use for the distance between two points |
| 108 | * |
| 109 | * @since 1.0.0 |
| 110 | */ |
| 111 | public function __construct(\Closure $metric = null) |
| 112 | { |
| 113 | $this->metric = $metric ?? function (PointInterface $a, PointInterface $b) { |
| 114 | $aCoordinates = $a->coordinates; |
| 115 | $bCoordinates = $b->coordinates; |
| 116 | |
| 117 | return MetricsND::euclidean($aCoordinates, $bCoordinates); |
| 118 | }; |
| 119 | } |
| 120 | |
| 121 | /** |
| 122 | * Expand cluster with additional point and potential neighbors. |
| 123 | * |
| 124 | * @param PointInterface $point Point to add to a cluster |
| 125 | * @param array $neighbors Neighbors of point |
| 126 | * @param int $c Cluster id |
| 127 | * @param float $epsilon Max distance |
| 128 | * @param int $minPoints Min amount of points required for a cluster |
| 129 | * |
| 130 | * @return void |
| 131 | * |
| 132 | * @since 1.0.0 |
| 133 | */ |
| 134 | private function expandCluster( |
| 135 | PointInterface $point, |
| 136 | array $neighbors, |
| 137 | int $c, |
| 138 | float $epsilon, |
| 139 | int $minPoints |
| 140 | ) : void |
| 141 | { |
| 142 | $this->clusters[$c][] = $point; |
| 143 | $this->clusteredPoints[] = $point; |
| 144 | $nPoint = \reset($neighbors); |
| 145 | |
| 146 | while ($nPoint) { |
| 147 | $neighbors2 = $this->findNeighbors($nPoint, $epsilon); |
| 148 | |
| 149 | if (\count($neighbors2) >= $minPoints) { |
| 150 | foreach ($neighbors2 as $nPoint2) { |
| 151 | if (!isset($neighbors[$nPoint2->name])) { |
| 152 | $neighbors[$nPoint2->name] = $nPoint2; |
| 153 | } |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | if (!\in_array($nPoint->name, $this->clusteredPoints)) { |
| 158 | $this->clusters[$c][] = $nPoint; |
| 159 | $this->clusteredPoints[] = $nPoint; |
| 160 | } |
| 161 | |
| 162 | $nPoint = \next($neighbors); |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | /** |
| 167 | * Find neighbors of a point |
| 168 | * |
| 169 | * @param PointInterface $point Base point for potential neighbors |
| 170 | * @param float $epsilon Max distance to neighbor |
| 171 | * |
| 172 | * @return array |
| 173 | * |
| 174 | * @since 1.0.0 |
| 175 | */ |
| 176 | private function findNeighbors(PointInterface $point, float $epsilon) : array |
| 177 | { |
| 178 | $neighbors = []; |
| 179 | foreach ($this->points as $point2) { |
| 180 | if ($point->isEquals($point2)) { |
| 181 | $distance = isset($this->distanceMatrix[$point->name]) |
| 182 | ? $this->distanceMatrix[$point->name][$point2->name] |
| 183 | : $this->distanceMatrix[$point2->name][$point->name]; |
| 184 | |
| 185 | if ($distance < $epsilon) { |
| 186 | $neighbors[$point2->name] = $point2; |
| 187 | } |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | return $neighbors; |
| 192 | } |
| 193 | |
| 194 | /** |
| 195 | * Generate distances between points |
| 196 | * |
| 197 | * @param array $points Array of all points |
| 198 | * |
| 199 | * @return float[] |
| 200 | * |
| 201 | * @since 1.0.0 |
| 202 | */ |
| 203 | private function generateDistanceMatrix(array $points) : array |
| 204 | { |
| 205 | $distances = []; |
| 206 | foreach ($points as $point) { |
| 207 | $distances[$point->name] = []; |
| 208 | foreach ($points as $point2) { |
| 209 | $distances[$point->name][$point2->name] = ($this->metric)($point, $point2); |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | /** @var float[] $distances */ |
| 214 | return $distances; |
| 215 | } |
| 216 | |
| 217 | /** |
| 218 | * Find the cluster for a point |
| 219 | * |
| 220 | * @param PointInterface $point Point to find the cluster for |
| 221 | * |
| 222 | * @return int Cluster id |
| 223 | * |
| 224 | * @since 1.0.0 |
| 225 | */ |
| 226 | public function cluster(PointInterface $point) : int |
| 227 | { |
| 228 | if ($this->convexHulls === []) { |
| 229 | foreach ($this->clusters as $c => $cluster) { |
| 230 | $points = []; |
| 231 | foreach ($cluster as $p) { |
| 232 | $points[] = $p->coordinates; |
| 233 | } |
| 234 | |
| 235 | // @todo: this is only good for 2D. Fix this for ND. |
| 236 | $this->convexHulls[$c] = MonotoneChain::createConvexHull($points); |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | foreach ($this->convexHulls as $c => $hull) { |
| 241 | if (Polygon::isPointInPolygon($point->coordinates, $hull) <= 0) { |
| 242 | return $c; |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | return -1; |
| 247 | } |
| 248 | |
| 249 | /** |
| 250 | * Generate the clusters of the points |
| 251 | * |
| 252 | * @param PointInterface[] $points Points to cluster |
| 253 | * @param float $epsilon Max distance |
| 254 | * @param int $minPoints Min amount of points required for a cluster |
| 255 | * |
| 256 | * @return void |
| 257 | * |
| 258 | * @since 1.0.0 |
| 259 | */ |
| 260 | public function generateClusters(array $points, float $epsilon, int $minPoints) : void |
| 261 | { |
| 262 | $this->noisePoints = []; |
| 263 | $this->clusters = []; |
| 264 | $this->clusteredPoints = []; |
| 265 | $this->points = $points; |
| 266 | $this->convexHulls = []; |
| 267 | |
| 268 | $this->distanceMatrix = $this->generateDistanceMatrix($points); |
| 269 | |
| 270 | $c = 0; |
| 271 | $this->clusters[$c] = []; |
| 272 | |
| 273 | foreach ($this->points as $point) { |
| 274 | $neighbors = $this->findNeighbors($point, $epsilon); |
| 275 | |
| 276 | if (\count($neighbors) < $minPoints) { |
| 277 | $this->noisePoints[] = $point->name; |
| 278 | } elseif (!\in_array($point->name, $this->clusteredPoints)) { |
| 279 | $this->expandCluster($point, $neighbors, $c, $epsilon, $minPoints); |
| 280 | ++$c; |
| 281 | $this->clusters[$c] = []; |
| 282 | } |
| 283 | } |
| 284 | } |
| 285 | } |