Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
95.24% covered (success)
95.24%
60 / 63
66.67% covered (warning)
66.67%
4 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
Kmeans
95.24% covered (success)
95.24%
60 / 63
66.67% covered (warning)
66.67%
4 / 6
28
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 cluster
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
3
 getCentroids
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 generateClusters
92.59% covered (success)
92.59%
25 / 27
0.00% covered (danger)
0.00%
0 / 1
14.08
 nearestClusterCenter
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
3
 kpp
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
6
1<?php
2/**
3 * Jingga
4 *
5 * PHP Version 8.1
6 *
7 * @package   phpOMS\Algorithm\Clustering
8 * @copyright Dennis Eichhorn
9 * @license   OMS License 2.0
10 * @version   1.0.0
11 * @link      https://jingga.app
12 */
13declare(strict_types=1);
14
15namespace phpOMS\Algorithm\Clustering;
16
17use phpOMS\Math\Topology\MetricsND;
18
19/**
20 * Clustering points
21 *
22 * @package phpOMS\Algorithm\Clustering
23 * @license OMS License 2.0
24 * @link    https://jingga.app
25 * @see     ./clustering_overview.png
26 * @since   1.0.0
27 */
28final class Kmeans
29{
30    /**
31     * Epsilon for float comparison.
32     *
33     * @var float
34     * @since 1.0.0
35     */
36    public const EPSILON = 4.88e-04;
37
38    /**
39     * Metric to calculate the distance between two points
40     *
41     * @var \Closure
42     * @since 1.0.0
43     */
44    private \Closure $metric;
45
46    /**
47     * Points of the cluster centers
48     *
49     * @var PointInterface[]
50     * @since 1.0.0
51     */
52    private $clusterCenters = [];
53
54    /**
55     * Constructor
56     *
57     * @param null|\Closure $metric metric to use for the distance between two points
58     *
59     * @since 1.0.0
60     */
61    public function __construct(\Closure $metric = null)
62    {
63        $this->metric = $metric ?? function (PointInterface $a, PointInterface $b) {
64            $aCoordinates = $a->coordinates;
65            $bCoordinates = $b->coordinates;
66
67            return MetricsND::euclidean($aCoordinates, $bCoordinates);
68        };
69
70        //$this->generateClusters($points, $clusters);
71    }
72
73    /**
74     * Find the cluster for a point
75     *
76     * @param PointInterface $point Point to find the cluster for
77     *
78     * @return null|PointInterface Cluster center point
79     *
80     * @since 1.0.0
81     */
82    public function cluster(PointInterface $point) : ?PointInterface
83    {
84        $bestCluster  = null;
85        $bestDistance = \PHP_FLOAT_MAX;
86
87        foreach ($this->clusterCenters as $center) {
88            if (($distance = ($this->metric)($center, $point)) < $bestDistance) {
89                $bestCluster  = $center;
90                $bestDistance = $distance;
91            }
92        }
93
94        return $bestCluster;
95    }
96
97    /**
98     * Get cluster centroids
99     *
100     * @return array
101     *
102     * @since 1.0.0
103     */
104    public function getCentroids() : array
105    {
106        return $this->clusterCenters;
107    }
108
109    /**
110     * Generate the clusters of the points
111     *
112     * @param PointInterface[] $points   Points to cluster
113     * @param int<0, max>      $clusters Amount of clusters
114     *
115     * @return void
116     *
117     * @since 1.0.0
118     */
119    public function generateClusters(array $points, int $clusters) : void
120    {
121        $n              = \count($points);
122        $clusterCenters = $this->kpp($points, $clusters);
123        $coordinates    = \count($points[0]->coordinates);
124
125        while (true) {
126            foreach ($clusterCenters as $center) {
127                for ($i = 0; $i < $coordinates; ++$i) {
128                    $center->setCoordinate($i, 0);
129                }
130            }
131
132            foreach ($points as $point) {
133                $clusterPoint = $clusterCenters[$point->group];
134
135                ++$clusterPoint->group;
136                for ($i = 0; $i < $coordinates; ++$i) {
137                    $clusterPoint->setCoordinate($i, $clusterPoint->getCoordinate($i) + $point->getCoordinate($i));
138                }
139            }
140
141            foreach ($clusterCenters as $center) {
142                for ($i = 0; $i < $coordinates; ++$i) {
143                    // @todo Invalid center coodinate value in like 5 % of the runs
144                    $center->setCoordinate($i, $center->getCoordinate($i) / ($center->group === 0 ? 1 : $center->group));
145                }
146            }
147
148            $changed = 0;
149            foreach ($points as $point) {
150                $min = $this->nearestClusterCenter($point, $clusterCenters)[0];
151
152                if ($min !== $point->group) {
153                    ++$changed;
154                    $point->group = $min;
155                }
156            }
157
158            if ($changed <= $n * self::EPSILON || $n * self::EPSILON < 2) {
159                break;
160            }
161        }
162
163        foreach ($clusterCenters as $key => $center) {
164            $center->group = $key;
165            $center->name  = (string) $key;
166        }
167
168        $this->clusterCenters = $clusterCenters;
169    }
170
171    /**
172     * Get the index and distance to the nearest cluster center
173     *
174     * @param PointInterface   $point          Point to get the cluster for
175     * @param PointInterface[] $clusterCenters All cluster centers
176     *
177     * @return array [index, distance]
178     *
179     * @since 1.0.0
180     */
181    private function nearestClusterCenter(PointInterface $point, array $clusterCenters) : array
182    {
183        $index = $point->group;
184        $dist  = \PHP_FLOAT_MAX;
185
186        foreach ($clusterCenters as $key => $cPoint) {
187            $d = ($this->metric)($cPoint, $point);
188
189            if ($dist > $d) {
190                $dist  = $d;
191                $index = $key;
192            }
193        }
194
195        return [$index, $dist];
196    }
197
198    /**
199     * Initializae cluster centers
200     *
201     * @param PointInterface[] $points Points to use for the cluster center initialization
202     * @param int<0, max>      $n      Amount of clusters to use
203     *
204     * @return PointInterface[]
205     *
206     * @since 1.0.0
207     */
208    private function kpp(array $points, int $n) : array
209    {
210        $clusters = [clone $points[\mt_rand(0, \count($points) - 1)]];
211        $d        = \array_fill(0, $n, 0.0);
212
213        for ($i = 1; $i < $n; ++$i) {
214            $sum = 0;
215
216            foreach ($points as $key => $point) {
217                $d[$key] = $this->nearestClusterCenter($point, \array_slice($clusters, 0, 5))[1];
218                $sum    += $d[$key];
219            }
220
221            $sum *= \mt_rand(0, \mt_getrandmax()) / \mt_getrandmax();
222
223            foreach ($d as $key => $di) {
224                $sum -= $di;
225
226                if ($sum <= 0) {
227                    $clusters[$i] = clone $points[$key];
228                }
229            }
230        }
231
232        foreach ($points as $point) {
233            $point->group = ($this->nearestClusterCenter($point, $clusters)[0]);
234        }
235
236        return $clusters;
237    }
238}