Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
69 / 69
100.00% covered (success)
100.00%
8 / 8
CRAP
100.00% covered (success)
100.00%
1 / 1
StringCompare
100.00% covered (success)
100.00%
69 / 69
100.00% covered (success)
100.00%
8 / 8
27
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 add
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 matchDictionary
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
3
 jaro
100.00% covered (success)
100.00%
37 / 37
100.00% covered (success)
100.00%
1 / 1
12
 valueWords
100.00% covered (success)
100.00%
14 / 14
100.00% covered (success)
100.00%
1 / 1
7
 valuePhrase
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 valueLength
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 fuzzyMatch
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2/**
3 * Jingga
4 *
5 * PHP Version 8.1
6 *
7 * @package   phpOMS\Utils
8 * @copyright Dennis Eichhorn
9 * @license   OMS License 2.0
10 * @version   1.0.0
11 * @link      https://jingga.app
12 */
13declare(strict_types=1);
14
15namespace phpOMS\Utils;
16
17/**
18 * String comparison class.
19 *
20 * This class helps to compare two strings
21 *
22 * @package phpOMS\Utils
23 * @license OMS License 2.0
24 * @link    https://jingga.app
25 * @since   1.0.0
26 */
27final class StringCompare
28{
29    /**
30     * Dictionary.
31     *
32     * @var string[]
33     * @since 1.0.0
34     */
35    private array $dictionary = [];
36
37    /**
38     * Constructor.
39     *
40     * @param string[] $dictionary Dictionary
41     *
42     * @since 1.0.0
43     */
44    public function __construct(array $dictionary)
45    {
46        $this->dictionary = $dictionary;
47    }
48
49    /**
50     * Adds word to dictionary
51     *
52     * @param string $word Word to add to dictionary
53     *
54     * @return void
55     *
56     * @since 1.0.0
57     */
58    public function add(string $word) : void
59    {
60        $this->dictionary[] = $word;
61    }
62
63    /**
64     * Match word against dictionary.
65     *
66     * @param string $match Word to match against dictionary
67     *
68     * @return string Best match
69     *
70     * @since 1.0.0
71     */
72    public function matchDictionary(string $match) : string
73    {
74        $bestScore = \PHP_INT_MAX;
75        $bestMatch = '';
76
77        foreach ($this->dictionary as $word) {
78            $score = self::fuzzyMatch($word, $match);
79
80            if ($score < $bestScore) {
81                $bestScore = $score;
82                $bestMatch = $word;
83            }
84        }
85
86        return $bestMatch;
87    }
88
89    /**
90     * Jaro string distance
91     *
92     * @param string $s1 String1
93     * @param string $s2 String2
94     *
95     * @return float
96     *
97     * @since 1.0.0
98     */
99    public static function jaro(string $s1, string $s2) : float
100    {
101        $s1Size = \strlen($s1);
102        $s2Size = \strlen($s2);
103
104        if ($s1Size === 0) {
105            return $s2Size === 0 ? 1.0 : 0.0;
106        }
107
108        $mDistance = (int) (\max($s1Size, $s2Size) / 2 - 1);
109
110        $matches        = 0;
111        $transpositions = 0.0;
112
113        $s1Matches = [];
114        $s2Matches = [];
115
116        for ($i = 0; $i < $s1Size; ++$i) {
117            $start = \max(0, $i - $mDistance);
118            $end   = \min($i + $mDistance + 1, $s2Size);
119
120            for ($j = $start; $j < $end; ++$j) {
121                if (isset($s2Matches[$j])) {
122                    continue;
123                }
124
125                if ($s1[$i] !== $s2[$j]) {
126                    continue;
127                }
128
129                $s1Matches[$i] = true;
130                $s2Matches[$j] = true;
131
132                ++$matches;
133                break;
134            }
135        }
136
137        if ($matches === 0) {
138            return 0.0;
139        }
140
141        $j = 0;
142        for ($i = 0; $i < $s1Size; ++$i) {
143            if (!isset($s1Matches[$i])) {
144                continue;
145            }
146
147            while (!isset($s2Matches[$j])) {
148                ++$j;
149            }
150
151            if ($s1[$i] !== $s2[$j]) {
152                ++$transpositions;
153            }
154
155            ++$j;
156        }
157
158        $transpositions /= 2.0;
159
160        return ($matches / $s1Size
161            + $matches / $s2Size
162            + ($matches - $transpositions) / $matches)
163            / 3.0;
164    }
165
166    /**
167     * Calculate word match score.
168     *
169     * @param string $s1 Word 1
170     * @param string $s2 Word 2
171     *
172     * @return int
173     *
174     * @since 1.0.0
175     */
176    public static function valueWords(string $s1, string $s2) : int
177    {
178        $words1 = \preg_split('/[ _-]/', $s1);
179        $words2 = \preg_split('/[ _-]/', $s2);
180        $total  = 0;
181
182        if ($words1 === false || $words2 === false) {
183            return \PHP_INT_MAX; // @codeCoverageIgnore
184        }
185
186        foreach ($words1 as $word1) {
187            $best = \strlen($s2);
188
189            foreach ($words2 as $word2) {
190                $wordDist = \levenshtein($word1, $word2);
191
192                if ($wordDist < $best) {
193                    $best = $wordDist;
194                }
195
196                if ($wordDist === 0) {
197                    break;
198                }
199            }
200
201            $total += $best;
202        }
203
204        return $total;
205    }
206
207    /**
208     * Calculate phrase match score.
209     *
210     * @param string $s1 Word 1
211     * @param string $s2 Word 2
212     *
213     * @return int
214     *
215     * @since 1.0.0
216     */
217    public static function valuePhrase(string $s1, string $s2) : int
218    {
219        return \levenshtein($s1, $s2);
220    }
221
222    /**
223     * Calculate word length score.
224     *
225     * @param string $s1 Word 1
226     * @param string $s2 Word 2
227     *
228     * @return int
229     *
230     * @since 1.0.0
231     */
232    public static function valueLength(string $s1, string $s2) : int
233    {
234        return \abs(\strlen($s1) - \strlen($s2));
235    }
236
237    /**
238     * Calculate fuzzy match score.
239     *
240     * @param string $s1           Word 1
241     * @param string $s2           Word 2
242     * @param float  $phraseWeight Weighting for phrase score
243     * @param float  $wordWeight   Weighting for word score
244     * @param float  $minWeight    Min weight
245     * @param float  $maxWeight    Max weight
246     * @param float  $lengthWeight Weighting for word length
247     *
248     * @return float
249     *
250     * @since 1.0.0
251     */
252    public static function fuzzyMatch(
253        string $s1, string $s2,
254        float $phraseWeight = 0.5, float $wordWeight = 1.0,
255        float $minWeight = 10.0, float $maxWeight = 1.0,
256        float $lengthWeight = -0.3
257    ) : float
258    {
259        $phraseValue = self::valuePhrase($s1, $s2);
260        $wordValue   = self::valueWords($s1, $s2);
261        $lengthValue = self::valueLength($s1, $s2);
262
263        return \min($phraseValue * $phraseWeight, $wordValue * $wordWeight) * $minWeight
264            + \max($phraseValue * $phraseWeight, $wordValue * $wordWeight) * $maxWeight
265            + $lengthValue * $lengthWeight;
266    }
267}