Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
CRAP | |
0.00% |
0 / 1 |
WhitespaceTokenizer | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
2.01 | |
0.00% |
0 / 1 |
tokenize | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
2.01 |
1 | <?php |
2 | /** |
3 | * Jingga |
4 | * |
5 | * PHP Version 8.1 |
6 | * |
7 | * @package phpOMS\Localization\LanguageDetection\Tokenizer |
8 | * @author Patrick Schur <patrick_schur@outlook.de> |
9 | * @copyright Patrick Schur |
10 | * @license https://opensource.org/licenses/mit-license.html MIT |
11 | * @link https://github.com/patrickschur/language-detection |
12 | */ |
13 | declare(strict_types=1); |
14 | |
15 | namespace phpOMS\Localization\LanguageDetection\Tokenizer; |
16 | |
17 | /** |
18 | * Whitespace tokenizer |
19 | * |
20 | * @package phpOMS\Localization\LanguageDetection\Tokenizer |
21 | * @license https://opensource.org/licenses/mit-license.html MIT |
22 | * @link https://github.com/patrickschur/language-detection |
23 | * @since 1.0.0 |
24 | */ |
25 | class WhitespaceTokenizer |
26 | { |
27 | /** |
28 | * Tokenize string |
29 | * |
30 | * @param string $str String to tokenize |
31 | * |
32 | * @return array |
33 | * |
34 | * @since 1.0.0 |
35 | */ |
36 | public function tokenize(string $str) : array |
37 | { |
38 | $split = \preg_split('/[^\pL]+(?<![\x27\x60\x{2019}])/u', $str, -1, \PREG_SPLIT_NO_EMPTY); |
39 | if ($split === false) { |
40 | return []; |
41 | } |
42 | |
43 | return \array_map( |
44 | function ($word) { |
45 | return "_{$word}_"; |
46 | }, |
47 | $split |
48 | ); |
49 | } |
50 | } |