Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
92.65% |
63 / 68 |
|
91.67% |
11 / 12 |
CRAP | |
0.00% |
0 / 1 |
MbStringUtils | |
92.65% |
63 / 68 |
|
91.67% |
11 / 12 |
34.46 | |
0.00% |
0 / 1 |
__construct | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
mb_contains | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
mb_startsWith | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
5 | |||
mb_endsWith | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
6 | |||
mb_ucfirst | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
mb_lcfirst | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
mb_trim | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
mb_rtrim | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
mb_ltrim | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
mb_entropy | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
mb_count_chars | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
utf8CharBoundary | |
68.75% |
11 / 16 |
|
0.00% |
0 / 1 |
5.76 | |||
hasMultiBytes | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | /** |
3 | * Jingga |
4 | * |
5 | * PHP Version 8.1 |
6 | * |
7 | * @package phpOMS\Utils |
8 | * @copyright Dennis Eichhorn |
9 | * @license OMS License 2.0 |
10 | * @version 1.0.0 |
11 | * @link https://jingga.app |
12 | */ |
13 | declare(strict_types=1); |
14 | |
15 | namespace phpOMS\Utils; |
16 | |
17 | use phpOMS\System\CharsetType; |
18 | |
19 | /** |
20 | * String utils class. |
21 | * |
22 | * This class provides static helper functionalities for strings. |
23 | * |
24 | * @package phpOMS\Utils |
25 | * @license OMS License 2.0 |
26 | * @link https://jingga.app |
27 | * @since 1.0.0 |
28 | * |
29 | * @SuppressWarnings(PHPMD.CamelCaseMethodName) |
30 | */ |
31 | final class MbStringUtils |
32 | { |
33 | /** |
34 | * Constructor. |
35 | * |
36 | * This class is purely static and is preventing any initialization |
37 | * |
38 | * @since 1.0.0 |
39 | * @codeCoverageIgnore |
40 | */ |
41 | private function __construct() |
42 | { |
43 | } |
44 | |
45 | /** |
46 | * Check if a string contains any of the provided needles (case sensitive). |
47 | * |
48 | * The validation is done case sensitive. |
49 | * |
50 | * @param string $haystack Haystack |
51 | * @param string[] $needles Needles to check if any of them are part of the haystack |
52 | * |
53 | * @example MbStringUtils::mb_contains('This string', ['This', 'test']); // true |
54 | * |
55 | * @return bool the function returns true if any of the needles is part of the haystack, false otherwise |
56 | * |
57 | * @since 1.0.0 |
58 | */ |
59 | public static function mb_contains(string $haystack, array $needles) : bool |
60 | { |
61 | foreach ($needles as $needle) { |
62 | if (\mb_strpos($haystack, $needle) !== false) { |
63 | return true; |
64 | } |
65 | } |
66 | |
67 | return false; |
68 | } |
69 | |
70 | /** |
71 | * Tests if a multi byte string starts with a certain string (case sensitive). |
72 | * |
73 | * The validation is done case sensitive. The function takes strings or an array of strings for the validation. |
74 | * In case of an array the function will test if any of the needles is at the beginning of the haystack string. |
75 | * |
76 | * @param string $haystack Haystack |
77 | * @param array|string $needles needles to check if they are at the beginning of the haystack |
78 | * |
79 | * @return bool the function returns true if any of the needles is at the beginning of the haystack, false otherwise |
80 | * |
81 | * @since 1.0.0 |
82 | */ |
83 | public static function mb_startsWith(string $haystack, string | array $needles) : bool |
84 | { |
85 | if (\is_string($needles)) { |
86 | $needles = [$needles]; |
87 | } |
88 | |
89 | foreach ($needles as $needle) { |
90 | if ($needle === '' || \mb_strrpos($haystack, $needle, -\mb_strlen($haystack)) !== false) { |
91 | return true; |
92 | } |
93 | } |
94 | |
95 | return false; |
96 | } |
97 | |
98 | /** |
99 | * Tests if a multi byte string ends with a certain string (case sensitive). |
100 | * |
101 | * The validation is done case sensitive. The function takes strings or an array of strings for the validation. |
102 | * In case of an array the function will test if any of the needles is at the end of the haystack string. |
103 | * |
104 | * @param string $haystack Haystack |
105 | * @param array|string $needles needles to check if they are at the end of the haystack |
106 | * |
107 | * @example StringUtils::endsWith('Test string', ['test1', 'string']); // true |
108 | * @example StringUtils::endsWith('Test string', 'string'); // true |
109 | * @example StringUtils::endsWith('Test string', String); // false |
110 | * |
111 | * @return bool the function returns true if any of the needles is at the end of the haystack, false otherwise |
112 | * |
113 | * @since 1.0.0 |
114 | */ |
115 | public static function mb_endsWith(string $haystack, string | array $needles) : bool |
116 | { |
117 | if (\is_string($needles)) { |
118 | $needles = [$needles]; |
119 | } |
120 | |
121 | foreach ($needles as $needle) { |
122 | if ($needle === '' || (($temp = \mb_strlen($haystack) - \mb_strlen($needle)) >= 0 && \mb_strpos($haystack, $needle, $temp) !== false)) { |
123 | return true; |
124 | } |
125 | } |
126 | |
127 | return false; |
128 | } |
129 | |
130 | /** |
131 | * Makes first letter of a multi byte string upper case. |
132 | * |
133 | * @param string $string string to upper case first letter |
134 | * |
135 | * @return string multi byte string with first character as upper case |
136 | * |
137 | * @since 1.0.0 |
138 | */ |
139 | public static function mb_ucfirst(string $string) : string |
140 | { |
141 | $strlen = \mb_strlen($string); |
142 | $firstChar = \mb_substr($string, 0, 1); |
143 | $then = \mb_substr($string, 1, $strlen - 1); |
144 | |
145 | return \mb_strtoupper($firstChar) . $then; |
146 | } |
147 | |
148 | /** |
149 | * Makes first letter of a multi byte string lower case. |
150 | * |
151 | * @param string $string string to lower case first letter |
152 | * |
153 | * @return string multi byte string with first character as lower case |
154 | * |
155 | * @since 1.0.0 |
156 | */ |
157 | public static function mb_lcfirst(string $string) : string |
158 | { |
159 | $strlen = \mb_strlen($string); |
160 | $firstChar = \mb_substr($string, 0, 1); |
161 | $then = \mb_substr($string, 1, $strlen - 1); |
162 | |
163 | return \mb_strtolower($firstChar) . $then; |
164 | } |
165 | |
166 | /** |
167 | * Trim multi byte characters from a multi byte string. |
168 | * |
169 | * @param string $string multi byte string to trim multi byte characters from |
170 | * @param string $charlist Multi byte character list used for trimming |
171 | * |
172 | * @return string trimmed multi byte string |
173 | * |
174 | * @since 1.0.0 |
175 | */ |
176 | public static function mb_trim(string $string, string $charlist = ' ') : string |
177 | { |
178 | if ($charlist === ' ') { |
179 | return \trim($string); |
180 | } else { |
181 | $charlist = \str_replace('/', '\/', \preg_quote($charlist)); |
182 | |
183 | return \preg_replace('/(^[' . $charlist . ']+)|([ ' . $charlist . ']+$)/us', '', $string) ?? ''; |
184 | } |
185 | } |
186 | |
187 | /** |
188 | * Trim multi byte characters from the right of a multi byte string. |
189 | * |
190 | * @param string $string multi byte string to trim multi byte characters from |
191 | * @param string $charlist Multi byte character list used for trimming |
192 | * |
193 | * @return string trimmed multi byte string |
194 | * |
195 | * @since 1.0.0 |
196 | */ |
197 | public static function mb_rtrim(string $string, string $charlist = ' ') : string |
198 | { |
199 | if ($charlist === ' ') { |
200 | return \rtrim($string); |
201 | } else { |
202 | $charlist = \str_replace('/', '\/', \preg_quote($charlist)); |
203 | |
204 | return \preg_replace('/([' . $charlist . ']+$)/us', '', $string) ?? ''; |
205 | } |
206 | } |
207 | |
208 | /** |
209 | * Trim multi byte characters from the left of a multi byte string. |
210 | * |
211 | * @param string $string multi byte string to trim multi byte characters from |
212 | * @param string $charlist Multi byte character list used for trimming |
213 | * |
214 | * @return string trimmed multi byte string |
215 | * |
216 | * @since 1.0.0 |
217 | */ |
218 | public static function mb_ltrim(string $string, string $charlist = ' ') : string |
219 | { |
220 | if ($charlist === ' ') { |
221 | return \ltrim($string); |
222 | } else { |
223 | $charlist = \str_replace('/', '\/', \preg_quote($charlist)); |
224 | |
225 | return \preg_replace('/(^[' . $charlist . ']+)/us', '', $string) ?? ''; |
226 | } |
227 | } |
228 | |
229 | /** |
230 | * Calculate string entropy |
231 | * |
232 | * @param string $value string to analyze |
233 | * |
234 | * @return float |
235 | * |
236 | * @since 1.0.0 |
237 | */ |
238 | public static function mb_entropy(string $value) : float |
239 | { |
240 | $entropy = 0.0; |
241 | $size = \mb_strlen($value); |
242 | $countChars = self::mb_count_chars($value); |
243 | |
244 | foreach ($countChars as $v) { |
245 | $p = $v / $size; |
246 | $entropy -= $p * \log($p) / \log(2); |
247 | } |
248 | |
249 | return $entropy; |
250 | } |
251 | |
252 | /** |
253 | * Count chars of utf-8 string. |
254 | * |
255 | * @param string $input string to count chars |
256 | * |
257 | * @return array<string, int> |
258 | * |
259 | * @since 1.0.0 |
260 | */ |
261 | public static function mb_count_chars(string $input) : array |
262 | { |
263 | $l = \mb_strlen($input, 'UTF-8'); |
264 | $unique = []; |
265 | |
266 | for ($i = 0; $i < $l; ++$i) { |
267 | $char = \mb_substr($input, $i, 1, 'UTF-8'); |
268 | |
269 | if (!\array_key_exists($char, $unique)) { |
270 | $unique[$char] = 0; |
271 | } |
272 | |
273 | ++$unique[$char]; |
274 | } |
275 | |
276 | return $unique; |
277 | } |
278 | |
279 | /** |
280 | * Get the utf-8 boundary of a string |
281 | * |
282 | * @param string $text QP text to search for utf-8 boundary |
283 | * @param int $length Last character boundary prior to this length |
284 | * |
285 | * @return int |
286 | * |
287 | * @since 1.0.0 |
288 | */ |
289 | public static function utf8CharBoundary(string $text, int $length = 0) : int |
290 | { |
291 | $reset = 3; |
292 | |
293 | do { |
294 | $lastChunk = \substr($text, $length - $reset, $reset); |
295 | $encodedPos = \strpos($lastChunk, '='); |
296 | |
297 | if ($encodedPos === false) { |
298 | break; // @codeCoverageIgnore |
299 | } |
300 | |
301 | $hex = \substr($text, $length - $reset + $encodedPos + 1, 2); |
302 | $dec = \hexdec($hex); |
303 | |
304 | if ($dec < 128) { |
305 | if ($encodedPos > 0) { |
306 | $length -= $reset - $encodedPos; |
307 | } |
308 | |
309 | break; |
310 | } elseif ($dec >= 192) { |
311 | $length -= $reset - $encodedPos; |
312 | break; |
313 | } else { /* $dec < 192 */ |
314 | $reset += 3; |
315 | } |
316 | } while (true); |
317 | |
318 | return $length; |
319 | } |
320 | |
321 | /** |
322 | * Test if a string has multibytes |
323 | * |
324 | * @param string $text Text to check |
325 | * @param string $charset Charset to check |
326 | * |
327 | * @return bool |
328 | * |
329 | * @since 1.0.0 |
330 | */ |
331 | public static function hasMultiBytes(string $text, string $charset = CharsetType::UTF_8) : bool |
332 | { |
333 | return \strlen($text) > \mb_strlen($text, $charset); |
334 | } |
335 | } |