Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 44 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
MemoryCF | |
0.00% |
0 / 44 |
|
0.00% |
0 / 6 |
420 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
normalizeRanking | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
euclideanDistance | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
cosineDistance | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
weightedItemRank | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
bestMatch | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
56 |
1 | <?php |
2 | /** |
3 | * Jingga |
4 | * |
5 | * PHP Version 8.1 |
6 | * |
7 | * @package phpOMS\Business\Recommendation |
8 | * @copyright Dennis Eichhorn |
9 | * @license OMS License 2.0 |
10 | * @version 1.0.0 |
11 | * @link https://jingga.app |
12 | */ |
13 | declare(strict_types=1); |
14 | |
15 | namespace phpOMS\Business\Recommendation; |
16 | |
17 | use phpOMS\Math\Topology\MetricsND; |
18 | |
19 | /** |
20 | * Memory based collaborative filtering |
21 | * |
22 | * Items or potential customers are found based on how much they like certain items. |
23 | * |
24 | * This requires a item/product rating of some sort in the backend. |
25 | * Such a rating could be either manual user ratings or a rating based on how often it is purchased or how long it is used. |
26 | * Most likely a combination is required. |
27 | * |
28 | * @package phpOMS\Business\Recommendation |
29 | * @license OMS License 2.0 |
30 | * @link https://jingga.app |
31 | * @see https://realpython.com/build-recommendation-engine-collaborative-filtering/ |
32 | * @since 1.0.0 |
33 | */ |
34 | final class MemoryCF |
35 | { |
36 | /** |
37 | * All rankings |
38 | * |
39 | * @var array<array> |
40 | * @since 1.0.0 |
41 | */ |
42 | private array $rankings = []; |
43 | |
44 | /** |
45 | * Constructor. |
46 | * |
47 | * @param array<array> $rankings Array of item ratings by users (or reverse to find users) |
48 | * |
49 | * @since 1.0.0 |
50 | */ |
51 | public function __construct(array $rankings) |
52 | { |
53 | $this->rankings = $this->normalizeRanking($rankings); |
54 | } |
55 | |
56 | /** |
57 | * Normalize all ratings. |
58 | * |
59 | * This is necessary because some users my give lower or higher ratings on average (bias). |
60 | * |
61 | * @param array<array> $rankings Item ratings/rankings |
62 | * |
63 | * @return array<array> |
64 | * |
65 | * @since 1.0.0 |
66 | */ |
67 | private function normalizeRanking(array $rankings) : array |
68 | { |
69 | foreach ($rankings as $idx => $items) { |
70 | $avg = \array_sum($items) / \count($items); |
71 | |
72 | foreach ($items as $idx2 => $_) { |
73 | $rankings[$idx][$idx2] -= $avg; |
74 | } |
75 | } |
76 | |
77 | return $rankings; |
78 | } |
79 | |
80 | /** |
81 | * Euclidean distance between users |
82 | * |
83 | * @param array $ranking Rating to find the distance for |
84 | * @param array<array> $rankings All ratings to find the distance to |
85 | * |
86 | * @return float[] |
87 | * |
88 | * @since 1.0.0 |
89 | */ |
90 | public function euclideanDistance(array $ranking, array $rankings) : array |
91 | { |
92 | $distances = []; |
93 | foreach ($rankings as $idx => $r) { |
94 | $distances[$idx] = \abs(MetricsND::euclidean($ranking, $r)); |
95 | } |
96 | |
97 | return $distances; |
98 | } |
99 | |
100 | /** |
101 | * Cosine distance between users |
102 | * |
103 | * @param array $ranking Rating to find the distance for |
104 | * @param array<array> $rankings All ratings to find the distance to |
105 | * |
106 | * @return float[] |
107 | * |
108 | * @since 1.0.0 |
109 | */ |
110 | public function cosineDistance(array $ranking, array $rankings) : array |
111 | { |
112 | $distances = []; |
113 | foreach ($rankings as $idx => $r) { |
114 | $distances[$idx] = \abs(MetricsND::cosine($ranking, $r)); |
115 | } |
116 | |
117 | return $distances; |
118 | } |
119 | |
120 | /** |
121 | * Assign a item rank/rating based on the distance to other items |
122 | * |
123 | * @param string $itemId Id of the item to rank |
124 | * @param array $distances Distance to other users |
125 | * @param array<array> $users All user ratings |
126 | * @param int $size Only consider the top n distances (best matches with other users) |
127 | * |
128 | * @return float Estimated item rank/rating based on similarity to other users |
129 | * |
130 | * @since 1.0.0 |
131 | */ |
132 | private function weightedItemRank(string $itemId, array $distances, array $users, int $size) : float |
133 | { |
134 | $rank = 0.0; |
135 | $count = 0; |
136 | foreach ($distances as $uId => $_) { |
137 | if ($count >= $size) { |
138 | break; |
139 | } |
140 | |
141 | if (!isset($users[$itemId])) { |
142 | continue; |
143 | } |
144 | |
145 | ++$count; |
146 | $rank += $users[$uId][$itemId]; |
147 | } |
148 | |
149 | return $count === 0 ? 0.0 : $rank / $count; |
150 | } |
151 | |
152 | /** |
153 | * Find potential items/users which are a good match for a user/item. |
154 | * |
155 | * The algorithm uses the ratings of a a user and tries to find other users who have similar rating behavior and then searches for high rated items that the user doesn't have yet. |
156 | * |
157 | * This can be used to find items for a specific user (aka might be interested in) or to find users who might be interested in this item |
158 | * |
159 | * option 1 - find items |
160 | * ranking[itemId] = itemRank (how much does specific user like item) |
161 | * rankings[userId][itemId] = itemRank |
162 | * |
163 | * option 2 - find user |
164 | * ranking[userId] = itemRank (how much does user like specific item) |
165 | * rankings[itemId][userId] = itemRank |
166 | * option 1 searches for items, option 2 searches for users |
167 | * |
168 | * @param array $ranking Array of item ratings (e.g. products, movies, ...) |
169 | * |
170 | * @return array |
171 | * |
172 | * @since 1.0.0 |
173 | */ |
174 | public function bestMatch(array $ranking, int $size = 10) : array |
175 | { |
176 | $ranking = $this->normalizeRanking([$ranking]); |
177 | $ranking = $ranking[0]; |
178 | |
179 | $euclidean = $this->euclideanDistance($ranking, $this->rankings); |
180 | $cosine = $this->cosineDistance($ranking, $this->rankings); |
181 | |
182 | \asort($euclidean); |
183 | \asort($cosine); |
184 | |
185 | $size = \min($size, \count($this->rankings)); |
186 | $matches = []; |
187 | |
188 | $distancePointer = \array_keys($euclidean); |
189 | $anglePointer = \array_keys($cosine); |
190 | |
191 | // Inspect items of the top n comparable users |
192 | for ($i = 1; $i <= $size; ++$i) { |
193 | $index = (int) ($i / 2) - 1; |
194 | |
195 | $uId = $i % 2 === 1 ? $distancePointer[$index] : $anglePointer[$index]; |
196 | $distances = $i % 2 === 1 ? $euclidean : $cosine; |
197 | |
198 | foreach ($this->rankings[$uId] as $iId => $_) { |
199 | // Item is not already in dataset and not in historic dataset (we are only interested in new) |
200 | if (isset($matches[$iId]) || isset($ranking[$iId])) { |
201 | continue; |
202 | } |
203 | |
204 | // Calculate the expected rating the user would give based on what the best comparable users did |
205 | $matches[$iId] = $this->weightedItemRank($iId, $distances, $this->rankings, $size); |
206 | } |
207 | } |
208 | |
209 | \asort($matches); |
210 | |
211 | return \array_reverse($matches, true); |
212 | } |
213 | } |