Skip to content

Commit b3998f5

Browse files
committed
Tokenizer: float support + more tests
1 parent 14834b6 commit b3998f5

File tree

3 files changed

+99
-73
lines changed

3 files changed

+99
-73
lines changed

src/Query/TokenType.php

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,25 @@
44

55
enum TokenType
66
{
7+
case T_IDENTIFIER;
78
case T_STRING;
89
case T_INTEGER;
9-
case T_WHITESPACE;
10-
case T_IDENTIFIER;
10+
case T_FLOAT;
11+
case T_TRUE;
12+
case T_FALSE;
13+
case T_NULL;
1114
case T_DOT;
15+
case T_COLON;
16+
case T_QUESTION_MARK;
1217
case T_OPEN_PAREN;
1318
case T_CLOSE_PAREN;
1419
case T_OPEN_BRACKET;
1520
case T_CLOSE_BRACKET;
16-
case T_QUESTION_MARK;
1721
case T_TERNARY_DEFAULT; // ?:
1822
case T_NULLSAFE; // ?.
19-
case T_COLON;
2023
case T_COALESCE; // ??
2124
case T_COMMA;
22-
case T_EOF;
23-
case T_TRUE;
24-
case T_FALSE;
25-
case T_NULL;
2625
case T_ARROW;
26+
case T_WHITESPACE;
27+
case T_EOF;
2728
}

src/Query/Tokenizer.php

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -54,24 +54,23 @@ public function __construct(
5454
* Matches a regex pattern at the current position in the source string.
5555
* The matched lexeme will be stored in the $lexeme variable.
5656
*
57-
* @param int $current Current position in the source string (used as offset for the regex)
57+
* @param int $offset Current position in the source string
5858
* @param string $regex Regex pattern without delimiters/flags
59-
* @param string $lexeme Matched lexeme will be stored in this variable
6059
*/
61-
protected static function match(
60+
public static function match(
6261
string $source,
63-
int $current,
62+
int $offset,
6463
string $regex,
65-
bool $caseIgnore = false
64+
bool $caseInsensitive = false
6665
): string|null {
6766
// add delimiters and flags to the regex
6867
$regex = '/\G' . $regex . '/u';
6968

70-
if ($caseIgnore === true) {
69+
if ($caseInsensitive === true) {
7170
$regex .= 'i';
7271
}
7372

74-
preg_match($regex, $source, $matches, 0, $current);
73+
preg_match($regex, $source, $matches, 0, $offset);
7574

7675
if (empty($matches[0]) === true) {
7776
return null;
@@ -88,13 +87,12 @@ protected static function match(
8887
*
8988
* @throws Exception If an unexpected character is encountered
9089
*/
91-
protected static function token(string $source, int $current): Token
90+
public static function token(string $source, int $current): Token
9291
{
93-
$lex = '';
9492
$char = $source[$current];
9593

9694
// single character tokens:
97-
$token = match($char) {
95+
$token = match ($char) {
9896
'.' => new Token(TokenType::T_DOT, '.'),
9997
'(' => new Token(TokenType::T_OPEN_PAREN, '('),
10098
')' => new Token(TokenType::T_CLOSE_PAREN, ')'),
@@ -175,13 +173,16 @@ protected static function token(string $source, int $current): Token
175173
);
176174
}
177175

176+
// float
177+
if ($lex = static::match($source, $current, '-?\d+\.\d+\b')) {
178+
return new Token(TokenType::T_FLOAT, $lex, (float)$lex);
179+
}
180+
178181
// int
179-
if ($lex = static::match($source, $current, '\d+\b')) {
182+
if ($lex = static::match($source, $current, '-?\d+\b')) {
180183
return new Token(TokenType::T_INTEGER, $lex, (int)$lex);
181184
}
182185

183-
// TODO: float?
184-
185186
// identifier
186187
if ($lex = static::match($source, $current, static::IDENTIFIER_REGEX)) {
187188
return new Token(TokenType::T_IDENTIFIER, $lex);

tests/Query/TokenizerTest.php

Lines changed: 76 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -10,70 +10,94 @@
1010
class TokenizerTest extends TestCase
1111
{
1212
/**
13-
* @covers ::__construct
14-
* @covers ::tokens
13+
* @covers ::match
1514
*/
16-
public function testTokens()
15+
public function testMatch(): void
1716
{
18-
$string = "user.likes(['(', ')']).drink";
19-
$tokenizer = new Tokenizer($string);
20-
$tokens = $tokenizer->tokens();
17+
$string = 'Find ?? a TRUE';
2118

22-
$token = $tokens->current();
23-
$this->assertSame(TokenType::T_IDENTIFIER, $token->type);
24-
$this->assertSame('user', $token->lexeme);
25-
26-
$tokens->next();
27-
$token = $tokens->current();
28-
$this->assertSame(TokenType::T_DOT, $token->type);
29-
30-
$tokens->next();
31-
$token = $tokens->current();
32-
$this->assertSame(TokenType::T_IDENTIFIER, $token->type);
33-
$this->assertSame('likes', $token->lexeme);
19+
$this->assertNull(Tokenizer::match($string, 0, '\?\?'));
20+
$this->assertSame('??', Tokenizer::match($string, 5, '\?\?'));
3421

35-
$tokens->next();
36-
$token = $tokens->current();
37-
$this->assertSame(TokenType::T_OPEN_PAREN, $token->type);
38-
39-
$tokens->next();
40-
$token = $tokens->current();
41-
$this->assertSame(TokenType::T_OPEN_BRACKET, $token->type);
42-
43-
$tokens->next();
44-
$token = $tokens->current();
45-
$this->assertSame(TokenType::T_STRING, $token->type);
46-
$this->assertSame("'('", $token->lexeme);
22+
$this->assertNull(Tokenizer::match($string, 10, 'true'));
23+
$this->assertSame('TRUE', Tokenizer::match($string, 10, 'true', true));
24+
}
4725

48-
$tokens->next();
49-
$token = $tokens->current();
50-
$this->assertSame(TokenType::T_COMMA, $token->type);
26+
public static function stringProvider(): string
27+
{
28+
return 'site?.([\'number\' => 3], null) ? (true ?: 4.1) : ("fox" ?? false)';
29+
}
5130

52-
$tokens->next();
53-
$token = $tokens->current();
54-
$this->assertSame(TokenType::T_STRING, $token->type);
55-
$this->assertSame("')'", $token->lexeme);
31+
public static function tokenProvider(): array
32+
{
33+
return [
34+
[0, TokenType::T_IDENTIFIER, 'site'],
35+
[4, TokenType::T_NULLSAFE, '?.'],
36+
[6, TokenType::T_OPEN_PAREN, '('],
37+
[7, TokenType::T_OPEN_BRACKET, '['],
38+
[8, TokenType::T_STRING, '\'number\'', 'number'],
39+
[16, TokenType::T_WHITESPACE, ' '],
40+
[17, TokenType::T_ARROW, '=>'],
41+
[20, TokenType::T_INTEGER, '3', 3],
42+
[21, TokenType::T_CLOSE_BRACKET, ']'],
43+
[22, TokenType::T_COMMA, ','],
44+
[24, TokenType::T_NULL, 'null', null],
45+
[28, TokenType::T_CLOSE_PAREN, ')'],
46+
[30, TokenType::T_QUESTION_MARK, '?'],
47+
[32, TokenType::T_OPEN_PAREN, '('],
48+
[33, TokenType::T_TRUE, 'true', true],
49+
[38, TokenType::T_TERNARY_DEFAULT, '?:'],
50+
[41, TokenType::T_FLOAT, '4.1', 4.1],
51+
[44, TokenType::T_CLOSE_PAREN, ')'],
52+
[46, TokenType::T_COLON, ':'],
53+
[48, TokenType::T_OPEN_PAREN, '('],
54+
[49, TokenType::T_STRING, '"fox"', 'fox'],
55+
[55, TokenType::T_COALESCE, '??'],
56+
[58, TokenType::T_FALSE, 'false', false],
57+
[63, TokenType::T_CLOSE_PAREN, ')']
58+
];
59+
}
5660

57-
$tokens->next();
58-
$token = $tokens->current();
59-
$this->assertSame(TokenType::T_CLOSE_BRACKET, $token->type);
61+
/**
62+
* @covers ::token
63+
* @dataProvider tokenProvider
64+
*/
65+
public function testToken(
66+
int $offset,
67+
TokenType $type,
68+
string $lexeme,
69+
mixed $literal = null
70+
): void {
71+
$string = static::stringProvider();
72+
$token = Tokenizer::token($string, $offset);
73+
$this->assertSame($type, $token->type);
74+
$this->assertSame($lexeme, $token->lexeme);
75+
$this->assertSame($literal, $token->literal);
76+
}
6077

61-
$tokens->next();
62-
$token = $tokens->current();
63-
$this->assertSame(TokenType::T_CLOSE_PAREN, $token->type);
78+
/**
79+
* @covers ::__construct
80+
* @covers ::tokens
81+
*/
82+
public function testTokens()
83+
{
84+
$string = static::stringProvider();
85+
$tokenizer = new Tokenizer($string);
86+
$tokens = $tokenizer->tokens();
6487

65-
$tokens->next();
66-
$token = $tokens->current();
67-
$this->assertSame(TokenType::T_DOT, $token->type);
88+
foreach (static::tokenProvider() as $expected) {
89+
if ($expected[1] === TokenType::T_WHITESPACE) {
90+
continue;
91+
}
6892

69-
$tokens->next();
70-
$token = $tokens->current();
71-
$this->assertSame(TokenType::T_IDENTIFIER, $token->type);
72-
$this->assertSame('drink', $token->lexeme);
93+
$token = $tokens->current();
94+
$this->assertSame($expected[1], $token->type);
95+
$this->assertSame($expected[2], $token->lexeme);
96+
$this->assertSame($expected[3] ?? null, $token->literal);
97+
$tokens->next();
98+
}
7399

74-
$tokens->next();
75100
$token = $tokens->current();
76101
$this->assertSame(TokenType::T_EOF, $token->type);
77102
}
78-
79103
}

0 commit comments

Comments
 (0)