Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
69 / 69 |
|
100.00% |
8 / 8 |
CRAP | |
100.00% |
1 / 1 |
StringCompare | |
100.00% |
69 / 69 |
|
100.00% |
8 / 8 |
27 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
add | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
matchDictionary | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
jaro | |
100.00% |
37 / 37 |
|
100.00% |
1 / 1 |
12 | |||
valueWords | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
7 | |||
valuePhrase | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
valueLength | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
fuzzyMatch | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | /** |
3 | * Jingga |
4 | * |
5 | * PHP Version 8.1 |
6 | * |
7 | * @package phpOMS\Utils |
8 | * @copyright Dennis Eichhorn |
9 | * @license OMS License 2.0 |
10 | * @version 1.0.0 |
11 | * @link https://jingga.app |
12 | */ |
13 | declare(strict_types=1); |
14 | |
15 | namespace phpOMS\Utils; |
16 | |
17 | /** |
18 | * String comparison class. |
19 | * |
20 | * This class helps to compare two strings |
21 | * |
22 | * @package phpOMS\Utils |
23 | * @license OMS License 2.0 |
24 | * @link https://jingga.app |
25 | * @since 1.0.0 |
26 | */ |
27 | final class StringCompare |
28 | { |
29 | /** |
30 | * Dictionary. |
31 | * |
32 | * @var string[] |
33 | * @since 1.0.0 |
34 | */ |
35 | private array $dictionary = []; |
36 | |
37 | /** |
38 | * Constructor. |
39 | * |
40 | * @param string[] $dictionary Dictionary |
41 | * |
42 | * @since 1.0.0 |
43 | */ |
44 | public function __construct(array $dictionary) |
45 | { |
46 | $this->dictionary = $dictionary; |
47 | } |
48 | |
49 | /** |
50 | * Adds word to dictionary |
51 | * |
52 | * @param string $word Word to add to dictionary |
53 | * |
54 | * @return void |
55 | * |
56 | * @since 1.0.0 |
57 | */ |
58 | public function add(string $word) : void |
59 | { |
60 | $this->dictionary[] = $word; |
61 | } |
62 | |
63 | /** |
64 | * Match word against dictionary. |
65 | * |
66 | * @param string $match Word to match against dictionary |
67 | * |
68 | * @return string Best match |
69 | * |
70 | * @since 1.0.0 |
71 | */ |
72 | public function matchDictionary(string $match) : string |
73 | { |
74 | $bestScore = \PHP_INT_MAX; |
75 | $bestMatch = ''; |
76 | |
77 | foreach ($this->dictionary as $word) { |
78 | $score = self::fuzzyMatch($word, $match); |
79 | |
80 | if ($score < $bestScore) { |
81 | $bestScore = $score; |
82 | $bestMatch = $word; |
83 | } |
84 | } |
85 | |
86 | return $bestMatch; |
87 | } |
88 | |
89 | /** |
90 | * Jaro string distance |
91 | * |
92 | * @param string $s1 String1 |
93 | * @param string $s2 String2 |
94 | * |
95 | * @return float |
96 | * |
97 | * @since 1.0.0 |
98 | */ |
99 | public static function jaro(string $s1, string $s2) : float |
100 | { |
101 | $s1Size = \strlen($s1); |
102 | $s2Size = \strlen($s2); |
103 | |
104 | if ($s1Size === 0) { |
105 | return $s2Size === 0 ? 1.0 : 0.0; |
106 | } |
107 | |
108 | $mDistance = (int) (\max($s1Size, $s2Size) / 2 - 1); |
109 | |
110 | $matches = 0; |
111 | $transpositions = 0.0; |
112 | |
113 | $s1Matches = []; |
114 | $s2Matches = []; |
115 | |
116 | for ($i = 0; $i < $s1Size; ++$i) { |
117 | $start = \max(0, $i - $mDistance); |
118 | $end = \min($i + $mDistance + 1, $s2Size); |
119 | |
120 | for ($j = $start; $j < $end; ++$j) { |
121 | if (isset($s2Matches[$j])) { |
122 | continue; |
123 | } |
124 | |
125 | if ($s1[$i] !== $s2[$j]) { |
126 | continue; |
127 | } |
128 | |
129 | $s1Matches[$i] = true; |
130 | $s2Matches[$j] = true; |
131 | |
132 | ++$matches; |
133 | break; |
134 | } |
135 | } |
136 | |
137 | if ($matches === 0) { |
138 | return 0.0; |
139 | } |
140 | |
141 | $j = 0; |
142 | for ($i = 0; $i < $s1Size; ++$i) { |
143 | if (!isset($s1Matches[$i])) { |
144 | continue; |
145 | } |
146 | |
147 | while (!isset($s2Matches[$j])) { |
148 | ++$j; |
149 | } |
150 | |
151 | if ($s1[$i] !== $s2[$j]) { |
152 | ++$transpositions; |
153 | } |
154 | |
155 | ++$j; |
156 | } |
157 | |
158 | $transpositions /= 2.0; |
159 | |
160 | return ($matches / $s1Size |
161 | + $matches / $s2Size |
162 | + ($matches - $transpositions) / $matches) |
163 | / 3.0; |
164 | } |
165 | |
166 | /** |
167 | * Calculate word match score. |
168 | * |
169 | * @param string $s1 Word 1 |
170 | * @param string $s2 Word 2 |
171 | * |
172 | * @return int |
173 | * |
174 | * @since 1.0.0 |
175 | */ |
176 | public static function valueWords(string $s1, string $s2) : int |
177 | { |
178 | $words1 = \preg_split('/[ _-]/', $s1); |
179 | $words2 = \preg_split('/[ _-]/', $s2); |
180 | $total = 0; |
181 | |
182 | if ($words1 === false || $words2 === false) { |
183 | return \PHP_INT_MAX; // @codeCoverageIgnore |
184 | } |
185 | |
186 | foreach ($words1 as $word1) { |
187 | $best = \strlen($s2); |
188 | |
189 | foreach ($words2 as $word2) { |
190 | $wordDist = \levenshtein($word1, $word2); |
191 | |
192 | if ($wordDist < $best) { |
193 | $best = $wordDist; |
194 | } |
195 | |
196 | if ($wordDist === 0) { |
197 | break; |
198 | } |
199 | } |
200 | |
201 | $total += $best; |
202 | } |
203 | |
204 | return $total; |
205 | } |
206 | |
207 | /** |
208 | * Calculate phrase match score. |
209 | * |
210 | * @param string $s1 Word 1 |
211 | * @param string $s2 Word 2 |
212 | * |
213 | * @return int |
214 | * |
215 | * @since 1.0.0 |
216 | */ |
217 | public static function valuePhrase(string $s1, string $s2) : int |
218 | { |
219 | return \levenshtein($s1, $s2); |
220 | } |
221 | |
222 | /** |
223 | * Calculate word length score. |
224 | * |
225 | * @param string $s1 Word 1 |
226 | * @param string $s2 Word 2 |
227 | * |
228 | * @return int |
229 | * |
230 | * @since 1.0.0 |
231 | */ |
232 | public static function valueLength(string $s1, string $s2) : int |
233 | { |
234 | return \abs(\strlen($s1) - \strlen($s2)); |
235 | } |
236 | |
237 | /** |
238 | * Calculate fuzzy match score. |
239 | * |
240 | * @param string $s1 Word 1 |
241 | * @param string $s2 Word 2 |
242 | * @param float $phraseWeight Weighting for phrase score |
243 | * @param float $wordWeight Weighting for word score |
244 | * @param float $minWeight Min weight |
245 | * @param float $maxWeight Max weight |
246 | * @param float $lengthWeight Weighting for word length |
247 | * |
248 | * @return float |
249 | * |
250 | * @since 1.0.0 |
251 | */ |
252 | public static function fuzzyMatch( |
253 | string $s1, string $s2, |
254 | float $phraseWeight = 0.5, float $wordWeight = 1.0, |
255 | float $minWeight = 10.0, float $maxWeight = 1.0, |
256 | float $lengthWeight = -0.3 |
257 | ) : float |
258 | { |
259 | $phraseValue = self::valuePhrase($s1, $s2); |
260 | $wordValue = self::valueWords($s1, $s2); |
261 | $lengthValue = self::valueLength($s1, $s2); |
262 | |
263 | return \min($phraseValue * $phraseWeight, $wordValue * $wordWeight) * $minWeight |
264 | + \max($phraseValue * $phraseWeight, $wordValue * $wordWeight) * $maxWeight |
265 | + $lengthValue * $lengthWeight; |
266 | } |
267 | } |