Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 70 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
| MeanShift | |
0.00% |
0 / 70 |
|
0.00% |
0 / 7 |
506 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
| generateClusters | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
42 | |||
| shiftPoint | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
30 | |||
| groupPoints | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
| findNearestGroup | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
| distanceToGroup | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| cluster | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Jingga |
| 4 | * |
| 5 | * PHP Version 8.1 |
| 6 | * |
| 7 | * @package phpOMS\Algorithm\Clustering |
| 8 | * @copyright Dennis Eichhorn |
| 9 | * @license OMS License 2.0 |
| 10 | * @version 1.0.0 |
| 11 | * @link https://jingga.app |
| 12 | */ |
| 13 | declare(strict_types=1); |
| 14 | |
| 15 | namespace phpOMS\Algorithm\Clustering; |
| 16 | |
| 17 | use phpOMS\Math\Topology\KernelsND; |
| 18 | use phpOMS\Math\Topology\MetricsND; |
| 19 | |
| 20 | /** |
| 21 | * Clustering points |
| 22 | * |
| 23 | * @package phpOMS\Algorithm\Clustering |
| 24 | * @license OMS License 2.0 |
| 25 | * @link https://jingga.app |
| 26 | * @see ./clustering_overview.png |
| 27 | * @since 1.0.0 |
| 28 | */ |
| 29 | final class MeanShift |
| 30 | { |
| 31 | /** |
| 32 | * Min distance for clustering |
| 33 | * |
| 34 | * As long as a point is further away as the min distance the shifting is performed |
| 35 | * |
| 36 | * @var float |
| 37 | * @since 1.0.0 |
| 38 | */ |
| 39 | public const MIN_DISTANCE = 0.001; |
| 40 | |
| 41 | /** |
| 42 | * Kernel function |
| 43 | * |
| 44 | * @var \Closure |
| 45 | * @since 1.0.0 |
| 46 | */ |
| 47 | private \Closure $kernel; |
| 48 | |
| 49 | /** |
| 50 | * Metric function |
| 51 | * |
| 52 | * @var \Closure |
| 53 | * @since 1.0.0 |
| 54 | */ |
| 55 | private \Closure $metric; |
| 56 | |
| 57 | private array $points; |
| 58 | |
| 59 | /** |
| 60 | * Points outside of any cluster |
| 61 | * |
| 62 | * @var PointInterface[] |
| 63 | * @since 1.0.0 |
| 64 | */ |
| 65 | private array $noisePoints = []; |
| 66 | |
| 67 | /** |
| 68 | * Cluster points |
| 69 | * |
| 70 | * Points in clusters (helper to avoid looping the cluster array) |
| 71 | * |
| 72 | * @var array |
| 73 | * @since 1.0.0 |
| 74 | */ |
| 75 | private array $clusters = []; |
| 76 | |
| 77 | /** |
| 78 | * Points of the cluster centers |
| 79 | * |
| 80 | * @var PointInterface[] |
| 81 | * @since 1.0.0 |
| 82 | */ |
| 83 | private $clusterCenters = []; |
| 84 | |
| 85 | /** |
| 86 | * Max distance to cluster to be still considered part of cluster |
| 87 | * |
| 88 | * @var float |
| 89 | * @since 1.0.0 |
| 90 | */ |
| 91 | public float $groupDistanceTolerance = 0.1; |
| 92 | |
| 93 | /** |
| 94 | * Constructor |
| 95 | * |
| 96 | * Both the metric and kernel function need to be of the same dimension. |
| 97 | * |
| 98 | * @param null|\Closure $metric Metric to use for the distance between two points |
| 99 | * @param null|\Closure $kernel Kernel |
| 100 | * |
| 101 | * @since 1.0.0 |
| 102 | */ |
| 103 | public function __construct(\Closure $metric = null, \Closure $kernel = null) |
| 104 | { |
| 105 | $this->metric = $metric ?? function (PointInterface $a, PointInterface $b) { |
| 106 | $aCoordinates = $a->coordinates; |
| 107 | $bCoordinates = $b->coordinates; |
| 108 | |
| 109 | return MetricsND::euclidean($aCoordinates, $bCoordinates); |
| 110 | }; |
| 111 | |
| 112 | $this->kernel = $kernel ?? function (array $distances, array $bandwidths) { |
| 113 | return KernelsND::gaussianKernel($distances, $bandwidths); |
| 114 | }; |
| 115 | } |
| 116 | |
| 117 | /** |
| 118 | * Generate the clusters of the points |
| 119 | * |
| 120 | * @param PointInterface[] $points Points to cluster |
| 121 | * @param array<int|float> $bandwidth Bandwidth(s) |
| 122 | * |
| 123 | * @return void |
| 124 | * |
| 125 | * @since 1.0.0 |
| 126 | */ |
| 127 | public function generateClusters(array $points, array $bandwidth) : void |
| 128 | { |
| 129 | $shiftPoints = $points; |
| 130 | $maxMinDist = 1; |
| 131 | |
| 132 | $stillShifting = \array_fill(0, \count($points), true); |
| 133 | |
| 134 | $pointLength = \count($shiftPoints); |
| 135 | |
| 136 | while ($maxMinDist > self::MIN_DISTANCE) { |
| 137 | $maxMinDist = 0; |
| 138 | |
| 139 | for ($i = 0; $i < $pointLength; ++$i) { |
| 140 | if (!$stillShifting[$i]) { |
| 141 | continue; |
| 142 | } |
| 143 | |
| 144 | $pNew = $shiftPoints[$i]; |
| 145 | $pNewStart = $pNew; |
| 146 | $pNew = $this->shiftPoint($pNew, $points, $bandwidth); |
| 147 | $dist = ($this->metric)($pNew, $pNewStart); |
| 148 | |
| 149 | if ($dist > $maxMinDist) { |
| 150 | $maxMinDist = $dist; |
| 151 | } |
| 152 | |
| 153 | if ($dist < self::MIN_DISTANCE) { |
| 154 | $stillShifting[$i] = false; |
| 155 | } |
| 156 | |
| 157 | $shiftPoints[$i] = $pNew; |
| 158 | } |
| 159 | } |
| 160 | |
| 161 | // @todo create an array of noisePoints like in the DBSCAN. That array can be empty or not depending on the bandwidth defined |
| 162 | |
| 163 | $this->clusters = $this->groupPoints($shiftPoints); |
| 164 | $this->clusterCenters = $shiftPoints; |
| 165 | } |
| 166 | |
| 167 | /** |
| 168 | * Perform shift on a point |
| 169 | * |
| 170 | * @param PointInterface $point Point to shift |
| 171 | * @param PointInterface $points Array of all points |
| 172 | * @param array<int|float> $bandwidth Bandwidth(s) |
| 173 | * |
| 174 | * @return PointInterface |
| 175 | * |
| 176 | * @since 1.0.0 |
| 177 | */ |
| 178 | private function shiftPoint(PointInterface $point, array $points, array $bandwidth) : PointInterface |
| 179 | { |
| 180 | $scaleFactor = 0.0; |
| 181 | |
| 182 | $shifted = clone $point; |
| 183 | |
| 184 | foreach ($points as $pTemp) { |
| 185 | $dist = ($this->metric)($point, $pTemp); |
| 186 | $weight = ($this->kernel)($dist, $bandwidth); |
| 187 | |
| 188 | foreach ($point->coordinates as $idx => $_) { |
| 189 | if (!isset($shifted->coordinates[$idx])) { |
| 190 | $shifted->coordinates[$idx] = 0; |
| 191 | } |
| 192 | |
| 193 | $shifted->coordinates[$idx] += $pTemp->coordinates[$idx] * $weight; |
| 194 | } |
| 195 | |
| 196 | $scaleFactor += $weight; |
| 197 | } |
| 198 | |
| 199 | foreach ($shifted->coordinates as $idx => $_) { |
| 200 | $shifted->coordinates[$idx] /= $scaleFactor; |
| 201 | } |
| 202 | |
| 203 | return $shifted; |
| 204 | } |
| 205 | |
| 206 | /** |
| 207 | * Group points together into clusters |
| 208 | * |
| 209 | * @param PointInterface[] $points Array of points to assign to groups |
| 210 | * |
| 211 | * @return array |
| 212 | * |
| 213 | * @since 1.0.0 |
| 214 | */ |
| 215 | private function groupPoints(array $points) : array |
| 216 | { |
| 217 | $groupAssignment = []; |
| 218 | $groups = []; |
| 219 | $groupIndex = 0; |
| 220 | |
| 221 | foreach ($points as $point) { |
| 222 | $nearestGroupIndex = $this->findNearestGroup($point, $groups); |
| 223 | |
| 224 | if ($nearestGroupIndex === -1) { |
| 225 | // create new group |
| 226 | $groups[] = [$point]; |
| 227 | $groupAssignment[] = $groupIndex; |
| 228 | |
| 229 | ++$groupIndex; |
| 230 | } else { |
| 231 | $groupAssignment[] = $nearestGroupIndex; |
| 232 | $groups[$nearestGroupIndex][] = $point; |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | return $groupAssignment; |
| 237 | } |
| 238 | |
| 239 | /** |
| 240 | * Find the closest cluster/group of a point |
| 241 | * |
| 242 | * @param PointInterface $point Point to find the cluster for |
| 243 | * @param array<PointInterface[]> $groups Clusters |
| 244 | * |
| 245 | * @return int |
| 246 | * |
| 247 | * @since 1.0.0 |
| 248 | */ |
| 249 | private function findNearestGroup(PointInterface $point, array $groups) : int |
| 250 | { |
| 251 | $nearestGroupIndex = -1; |
| 252 | $index = 0; |
| 253 | |
| 254 | foreach ($groups as $group) { |
| 255 | $distanceToGroup = $this->distanceToGroup($point, $group); |
| 256 | |
| 257 | if ($distanceToGroup < $this->groupDistanceTolerance) { |
| 258 | $nearestGroupIndex = $index; |
| 259 | |
| 260 | break; |
| 261 | } |
| 262 | |
| 263 | ++$index; |
| 264 | } |
| 265 | |
| 266 | return $nearestGroupIndex; |
| 267 | } |
| 268 | |
| 269 | /** |
| 270 | * Find distance of point to best cluster/group |
| 271 | * |
| 272 | * @param PointInterface $point Point to find the cluster for |
| 273 | * @param PointInterface[] $group Clusters |
| 274 | * |
| 275 | * @return float Distance |
| 276 | * |
| 277 | * @since 1.0.0 |
| 278 | */ |
| 279 | private function distanceToGroup(PointInterface $point, array $group) : float |
| 280 | { |
| 281 | $minDistance = \PHP_FLOAT_MAX; |
| 282 | |
| 283 | foreach ($group as $pt) { |
| 284 | $dist = ($this->metric)($point, $pt); |
| 285 | |
| 286 | if ($dist < $minDistance) { |
| 287 | $minDistance = $dist; |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | return $minDistance; |
| 292 | } |
| 293 | |
| 294 | /** |
| 295 | * Find the cluster for a point |
| 296 | * |
| 297 | * @param PointInterface $point Point to find the cluster for |
| 298 | * |
| 299 | * @return null|PointInterface Cluster center point |
| 300 | * |
| 301 | * @since 1.0.0 |
| 302 | */ |
| 303 | public function cluster(PointInterface $point) : ?PointInterface |
| 304 | { |
| 305 | $clusterId = $this->findNearestGroup($point, $this->clusters); |
| 306 | |
| 307 | return $this->clusterCenters[$clusterId] ?? null; |
| 308 | } |
| 309 | } |