Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
CRAP | |
0.00% |
0 / 1 |
| WhitespaceTokenizer | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
2.01 | |
0.00% |
0 / 1 |
| tokenize | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
2.01 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Jingga |
| 4 | * |
| 5 | * PHP Version 8.1 |
| 6 | * |
| 7 | * @package phpOMS\Localization\LanguageDetection\Tokenizer |
| 8 | * @author Patrick Schur <patrick_schur@outlook.de> |
| 9 | * @copyright Patrick Schur |
| 10 | * @license https://opensource.org/licenses/mit-license.html MIT |
| 11 | * @link https://github.com/patrickschur/language-detection |
| 12 | */ |
| 13 | declare(strict_types=1); |
| 14 | |
| 15 | namespace phpOMS\Localization\LanguageDetection\Tokenizer; |
| 16 | |
| 17 | /** |
| 18 | * Whitespace tokenizer |
| 19 | * |
| 20 | * @package phpOMS\Localization\LanguageDetection\Tokenizer |
| 21 | * @license https://opensource.org/licenses/mit-license.html MIT |
| 22 | * @link https://github.com/patrickschur/language-detection |
| 23 | * @since 1.0.0 |
| 24 | */ |
| 25 | class WhitespaceTokenizer |
| 26 | { |
| 27 | /** |
| 28 | * Tokenize string |
| 29 | * |
| 30 | * @param string $str String to tokenize |
| 31 | * |
| 32 | * @return array |
| 33 | * |
| 34 | * @since 1.0.0 |
| 35 | */ |
| 36 | public function tokenize(string $str) : array |
| 37 | { |
| 38 | $split = \preg_split('/[^\pL]+(?<![\x27\x60\x{2019}])/u', $str, -1, \PREG_SPLIT_NO_EMPTY); |
| 39 | if ($split === false) { |
| 40 | return []; |
| 41 | } |
| 42 | |
| 43 | return \array_map( |
| 44 | function ($word) { |
| 45 | return "_{$word}_"; |
| 46 | }, |
| 47 | $split |
| 48 | ); |
| 49 | } |
| 50 | } |