Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
88.89% covered (warning)
88.89%
8 / 9
0.00% covered (danger)
0.00%
0 / 1
CRAP
0.00% covered (danger)
0.00%
0 / 1
WhitespaceTokenizer
88.89% covered (warning)
88.89%
8 / 9
0.00% covered (danger)
0.00%
0 / 1
2.01
0.00% covered (danger)
0.00%
0 / 1
 tokenize
88.89% covered (warning)
88.89%
8 / 9
0.00% covered (danger)
0.00%
0 / 1
2.01
1<?php
2/**
3 * Jingga
4 *
5 * PHP Version 8.1
6 *
7 * @package   phpOMS\Localization\LanguageDetection\Tokenizer
8 * @author    Patrick Schur <patrick_schur@outlook.de>
9 * @copyright Patrick Schur
10 * @license   https://opensource.org/licenses/mit-license.html MIT
11 * @link      https://github.com/patrickschur/language-detection
12 */
13declare(strict_types=1);
14
15namespace phpOMS\Localization\LanguageDetection\Tokenizer;
16
17/**
18 * Whitespace tokenizer
19 *
20 * @package phpOMS\Localization\LanguageDetection\Tokenizer
21 * @license https://opensource.org/licenses/mit-license.html MIT
22 * @link    https://github.com/patrickschur/language-detection
23 * @since   1.0.0
24 */
25class WhitespaceTokenizer
26{
27    /**
28     * Tokenize string
29     *
30     * @param string $str String to tokenize
31     *
32     * @return array
33     *
34     * @since 1.0.0
35     */
36    public function tokenize(string $str) : array
37    {
38        $split = \preg_split('/[^\pL]+(?<![\x27\x60\x{2019}])/u', $str, -1, \PREG_SPLIT_NO_EMPTY);
39        if ($split === false) {
40            return [];
41        }
42
43        return \array_map(
44            function ($word) {
45                return "_{$word}_";
46            },
47            $split
48        );
49    }
50}