diff --git a/src/Parser/Tokenizer/Tokenizer.php b/src/Parser/Tokenizer/Tokenizer.php index bb67fd38..a3571d7a 100644 --- a/src/Parser/Tokenizer/Tokenizer.php +++ b/src/Parser/Tokenizer/Tokenizer.php @@ -250,7 +250,7 @@ public static function period(\Iterator $fragments): \Iterator public static function symbol(\Iterator $fragments, ?Buffer $buffer = null): \Iterator { - $buffer = $buffer ?? Buffer::empty(); + $buffer ??= Buffer::empty(); $capture = true; while ($capture && $fragments->valid()) { @@ -324,7 +324,7 @@ public static function angle(\Iterator $fragments): \Iterator */ public static function tag(\Iterator $fragments, ?Buffer $buffer = null): \Iterator { - $buffer = $buffer ?? Buffer::empty(); + $buffer ??= Buffer::empty(); $isClosing = false; while ($fragments->valid()) { @@ -410,16 +410,19 @@ public static function tagContent(\Iterator $fragments): \Iterator yield from $buffer->flush(TokenType::STRING); yield from self::block($fragments); break; + case $fragment->value === '>': + throw new \Exception(sprintf('@TODO: Illegal Character "%s"', $fragment->value)); case $fragment->value === '<': $fragments->next(); - if ($fragments->current()?->value === '/') { + if (!$fragments->valid()) { + throw new \Exception("@TODO: Unexpected end of input"); + } + if ($fragments->current()->value === '/') { yield from $buffer->flush(TokenType::STRING); return Buffer::empty()->append($fragment); - } else if (!ctype_space($fragments->current()?->value)) { - yield from self::tag($fragments, Buffer::empty()->append($fragment)); - } else { - $buffer->append($fragment); } + yield from self::tag($fragments, Buffer::empty()->append($fragment)); + break; case ctype_space($fragment->value): yield from $buffer->flush(TokenType::STRING); yield from self::space($fragments); diff --git a/test/Unit/Parser/Tokenizer/TokenizerTest.php b/test/Unit/Parser/Tokenizer/TokenizerTest.php new file mode 100644 index 00000000..d25c1a58 --- /dev/null +++ b/test/Unit/Parser/Tokenizer/TokenizerTest.php @@ -0,0 +1,48 @@ + inside text" => [ + '
acd>def
', + '@TODO: Illegal Character ">"' + ]; + + yield "> between tags" => [ + '
>
', + '@TODO: Illegal Character ">"' + ]; + + /* + * @todo no exception is thrown on tokenizer level ... + * + *
abc < <