Skip to content

Commit c2122b5

Browse files
committed
TASK: Implement Lexer
1 parent 5b3f7ca commit c2122b5

File tree

18 files changed

+2139
-0
lines changed

18 files changed

+2139
-0
lines changed

scripts/test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,6 @@
1717
--display-deprecations \
1818
--display-errors \
1919
--display-notices \
20+
--display-warnings \
2021
--coverage-html build/coverage-report \
2122
--coverage-filter src $@
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
<?php
2+
3+
/**
4+
* PackageFactory.ComponentEngine - Universal View Components for PHP
5+
* Copyright (C) 2023 Contributors of PackageFactory.ComponentEngine
6+
*
7+
* This program is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <https://www.gnu.org/licenses/>.
19+
*/
20+
21+
declare(strict_types=1);
22+
23+
namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream;
24+
25+
use PackageFactory\ComponentEngine\Parser\Source\Position;
26+
27+
final class CharacterStream
28+
{
29+
private int $byte;
30+
private Cursor $cursor;
31+
private ?string $characterUnderCursor = null;
32+
33+
public function __construct(private readonly string $source)
34+
{
35+
$this->byte = 0;
36+
$this->cursor = new Cursor();
37+
38+
$this->next();
39+
}
40+
41+
public function next(): void
42+
{
43+
$this->cursor->advance($this->characterUnderCursor);
44+
45+
$nextCharacter = $this->source[$this->byte++] ?? null;
46+
if ($nextCharacter === null) {
47+
$this->characterUnderCursor = null;
48+
return;
49+
}
50+
51+
$ord = ord($nextCharacter);
52+
if ($ord >= 0x80) {
53+
$nextCharacter .= $this->source[$this->byte++];
54+
}
55+
if ($ord >= 0xe0) {
56+
$nextCharacter .= $this->source[$this->byte++];
57+
}
58+
if ($ord >= 0xf0) {
59+
$nextCharacter .= $this->source[$this->byte++];
60+
}
61+
62+
$this->characterUnderCursor = $nextCharacter;
63+
}
64+
65+
public function current(): ?string
66+
{
67+
return $this->characterUnderCursor;
68+
}
69+
70+
public function isEnd(): bool
71+
{
72+
return $this->characterUnderCursor === null;
73+
}
74+
75+
public function getCurrentPosition(): Position
76+
{
77+
return $this->cursor->getCurrentPosition();
78+
}
79+
80+
public function getPreviousPosition(): Position
81+
{
82+
return $this->cursor->getPreviousPosition();
83+
}
84+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
<?php
2+
3+
/**
4+
* PackageFactory.ComponentEngine - Universal View Components for PHP
5+
* Copyright (C) 2023 Contributors of PackageFactory.ComponentEngine
6+
*
7+
* This program is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <https://www.gnu.org/licenses/>.
19+
*/
20+
21+
declare(strict_types=1);
22+
23+
namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream;
24+
25+
use PackageFactory\ComponentEngine\Parser\Source\Position;
26+
27+
final class Cursor
28+
{
29+
private int $currentLineNumber = 0;
30+
private int $currentColumnNumber = 0;
31+
private int $previousLineNumber = -1;
32+
private int $previousColumnNumber = -1;
33+
34+
public function advance(?string $character): void
35+
{
36+
if ($character !== null) {
37+
$this->previousLineNumber = $this->currentLineNumber;
38+
$this->previousColumnNumber = $this->currentColumnNumber;
39+
40+
if ($character === "\n") {
41+
$this->currentLineNumber++;
42+
$this->currentColumnNumber = 0;
43+
} else {
44+
$this->currentColumnNumber++;
45+
}
46+
}
47+
}
48+
49+
public function getCurrentPosition(): Position
50+
{
51+
return new Position($this->currentLineNumber, $this->currentColumnNumber);
52+
}
53+
54+
public function getPreviousPosition(): Position
55+
{
56+
assert($this->previousLineNumber >= 0);
57+
assert($this->previousColumnNumber >= 0);
58+
59+
return new Position($this->previousLineNumber, $this->previousColumnNumber);
60+
}
61+
}

src/Language/Lexer/Lexer.php

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
<?php
2+
3+
/**
4+
* PackageFactory.ComponentEngine - Universal View Components for PHP
5+
* Copyright (C) 2023 Contributors of PackageFactory.ComponentEngine
6+
*
7+
* This program is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <https://www.gnu.org/licenses/>.
19+
*/
20+
21+
declare(strict_types=1);
22+
23+
namespace PackageFactory\ComponentEngine\Language\Lexer;
24+
25+
use PackageFactory\ComponentEngine\Language\Lexer\CharacterStream\CharacterStream;
26+
use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher;
27+
use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result;
28+
use PackageFactory\ComponentEngine\Language\Lexer\Token\Token;
29+
use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType;
30+
use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes;
31+
use PackageFactory\ComponentEngine\Parser\Source\Position;
32+
use PackageFactory\ComponentEngine\Parser\Source\Range;
33+
34+
final class Lexer
35+
{
36+
private readonly CharacterStream $characterStream;
37+
private ?Position $startPosition = null;
38+
private int $offset = 0;
39+
private string $buffer = '';
40+
private ?TokenType $tokenTypeUnderCursor = null;
41+
private ?Token $tokenUnderCursor = null;
42+
private ?LexerException $latestError = null;
43+
44+
public function __construct(string $source)
45+
{
46+
$this->characterStream = new CharacterStream($source);
47+
}
48+
49+
public function read(TokenType $tokenType): void
50+
{
51+
assert($this->latestError === null);
52+
$this->startPosition = $this->characterStream->getCurrentPosition();
53+
54+
if ($this->characterStream->isEnd()) {
55+
throw $this->latestError = LexerException::becauseOfUnexpectedEndOfSource(
56+
expectedTokenTypes: TokenTypes::from($tokenType),
57+
affectedRangeInSource: $this->startPosition->toRange()
58+
);
59+
}
60+
61+
$this->tokenTypeUnderCursor = null;
62+
$this->tokenUnderCursor = null;
63+
$this->offset = 0;
64+
$this->buffer = '';
65+
66+
while (true) {
67+
$character = $this->characterStream->current();
68+
$result = Matcher::for($tokenType)->match($character, $this->offset);
69+
70+
if ($result === Result::KEEP) {
71+
$this->offset++;
72+
$this->buffer .= $character;
73+
$this->characterStream->next();
74+
continue;
75+
}
76+
77+
if ($result === Result::SATISFIED) {
78+
$this->tokenTypeUnderCursor = $tokenType;
79+
break;
80+
}
81+
82+
if ($result === Result::CANCEL) {
83+
throw $this->latestError = LexerException::becauseOfUnexpectedCharacterSequence(
84+
expectedTokenTypes: TokenTypes::from($tokenType),
85+
affectedRangeInSource: Range::from(
86+
$this->startPosition,
87+
$this->characterStream->getCurrentPosition()
88+
),
89+
actualCharacterSequence: $this->buffer . $character
90+
);
91+
}
92+
}
93+
}
94+
95+
public function readOneOf(TokenTypes $tokenTypes): void
96+
{
97+
assert($this->latestError === null);
98+
$this->startPosition = $this->characterStream->getCurrentPosition();
99+
100+
if ($this->characterStream->isEnd()) {
101+
throw $this->latestError = LexerException::becauseOfUnexpectedEndOfSource(
102+
expectedTokenTypes: $tokenTypes,
103+
affectedRangeInSource: $this->startPosition->toRange()
104+
);
105+
}
106+
107+
$this->tokenTypeUnderCursor = null;
108+
$this->tokenUnderCursor = null;
109+
$this->offset = 0;
110+
$this->buffer = '';
111+
112+
$tokenTypeCandidates = $tokenTypes->items;
113+
while (count($tokenTypeCandidates)) {
114+
$character = $this->characterStream->current();
115+
116+
$nextTokenTypeCandidates = [];
117+
foreach ($tokenTypeCandidates as $tokenType) {
118+
$result = Matcher::for($tokenType)->match($character, $this->offset);
119+
120+
if ($result === Result::KEEP) {
121+
$nextTokenTypeCandidates[] = $tokenType;
122+
continue;
123+
}
124+
125+
if ($result === Result::SATISFIED) {
126+
$this->tokenTypeUnderCursor = $tokenType;
127+
return;
128+
}
129+
}
130+
131+
$this->offset++;
132+
$this->buffer .= $character;
133+
$tokenTypeCandidates = $nextTokenTypeCandidates;
134+
$this->characterStream->next();
135+
}
136+
137+
throw $this->latestError = LexerException::becauseOfUnexpectedCharacterSequence(
138+
expectedTokenTypes: $tokenTypes,
139+
affectedRangeInSource: Range::from(
140+
$this->startPosition,
141+
$this->characterStream->getPreviousPosition()
142+
),
143+
actualCharacterSequence: $this->buffer
144+
);
145+
}
146+
147+
public function skipSpace(): void
148+
{
149+
assert($this->latestError === null);
150+
$this->skip(TokenType::SPACE, TokenType::END_OF_LINE);
151+
}
152+
153+
public function skipSpaceAndComments(): void
154+
{
155+
assert($this->latestError === null);
156+
$this->skip(TokenType::SPACE, TokenType::END_OF_LINE, TokenType::COMMENT);
157+
}
158+
159+
private function skip(TokenType ...$tokenTypes): void
160+
{
161+
while (true) {
162+
$character = $this->characterStream->current();
163+
164+
foreach ($tokenTypes as $tokenType) {
165+
$matcher = Matcher::for($tokenType);
166+
167+
if ($matcher->match($character, 0) === Result::KEEP) {
168+
$this->read($tokenType);
169+
continue 2;
170+
}
171+
}
172+
173+
break;
174+
}
175+
}
176+
177+
public function getTokenUnderCursor(): Token
178+
{
179+
assert($this->latestError === null);
180+
assert($this->startPosition !== null);
181+
assert($this->tokenTypeUnderCursor !== null);
182+
183+
return $this->tokenUnderCursor ??= new Token(
184+
rangeInSource: Range::from(
185+
$this->startPosition,
186+
$this->characterStream->getPreviousPosition()
187+
),
188+
type: $this->tokenTypeUnderCursor,
189+
value: $this->buffer
190+
);
191+
}
192+
193+
public function isEnd(): bool
194+
{
195+
return $this->characterStream->isEnd();
196+
}
197+
}

0 commit comments

Comments
 (0)