From 83f59ee27f2d32868854337696fbff2d4ee78b7c Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Sun, 26 Feb 2023 00:59:36 +0000 Subject: [PATCH 01/16] Fix flake8 pre-commit dependency --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 87725aa..e3b0dfd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,8 +4,8 @@ repos: hooks: - id: trailing-whitespace - id: end-of-file-fixer - - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 + - repo: https://github.com/pycqa/flake8.git + rev: 3.9.2 hooks: - id: flake8 language_version: python3.9 From c9911a096e7c000e8cf233ba15459e674724f2e3 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Sun, 26 Feb 2023 03:13:56 +0000 Subject: [PATCH 02/16] Add typed seq and combine alternatives (join/append/combine) --- README.rst | 8 +++++--- setup.py | 1 + src/parsy/__init__.py | 41 +++++++++++++++++++++++++++++++++++++---- tests/test_parsy.py | 21 +++++++++++++++++---- 4 files changed, 60 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index 6d90112..3b68947 100644 --- a/README.rst +++ b/README.rst @@ -13,10 +13,12 @@ incompatible** version of parsy that has strong type guarantees, with no This means removing anything that cannot be typed fully, and providing alternatives. Main changes: -* Removed ``seq``, and replaced it with ``&`` operator support which returns a 2-tuple +* Removed ``seq``, and replaced it with ``join`` which creates a 2-tuple result, and + ``append`` which takes an ``n``-tuple result and adds the result of another parser to + the end, producing an ``n+1``-tuple result. * Removed ``alt`` - you can use only ``|`` operator. -* Removed ``.combine`` and ``.combine_dict`` - you have to use ``.map`` instead, - which is type-safe but much trickier, especially once you have nested tuples. +* Removed ``.combine_dict`` - you have to use ``.map`` or ``.combine`` instead, + which is type-safe but loses the benefit of keyword sequence parsers. The docs have not been updated, you’ll need to look at the source code if you are interested. diff --git a/setup.py b/setup.py index bff016b..a5281d1 100755 --- a/setup.py +++ b/setup.py @@ -42,4 +42,5 @@ keywords="parser parsers parsing monad combinators", packages=find_packages("src"), package_dir={"": "src"}, + install_requires=["typing-extensions"], ) diff --git a/src/parsy/__init__.py b/src/parsy/__init__.py index 30865dc..693b754 100644 --- a/src/parsy/__init__.py +++ b/src/parsy/__init__.py @@ -3,21 +3,21 @@ # are mainly for internal use. from __future__ import annotations -import operator import enum - +import operator import re from dataclasses import dataclass from functools import reduce, wraps -from typing import Any, Callable, FrozenSet, Generator, Generic, Optional, TypeVar, Union +from typing import Any, Callable, FrozenSet, Generator, Generic, Optional, Tuple, TypeVar, Union +from typing_extensions import TypeVarTuple, Unpack from .version import __version__ # noqa: F401 - OUT = TypeVar("OUT") OUT1 = TypeVar("OUT1") OUT2 = TypeVar("OUT2") +OUT_T = TypeVarTuple("OUT_T") OUT_co = TypeVar("OUT_co", covariant=True) @@ -330,6 +330,39 @@ def seq_parser(stream: str, index: int) -> Result[tuple[OUT1, OUT2]]: return seq_parser + def join(self: Parser[OUT1], other: Parser[OUT2]) -> Parser[tuple[OUT1, OUT2]]: + """TODO alternative name for `&`, decide on naming""" + return self & other + + def append(self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[OUT2]) -> Parser[Tuple[Unpack[OUT_T], OUT2]]: + """ + Take a parser which produces a tuple of values, and add another parser's result + to the end of that tuple + """ + + @Parser + def tuple_parser(stream: str, index: int) -> Result[Tuple[Unpack[OUT_T], OUT2]]: + result0 = None + result1 = self(stream, index).aggregate(result0) + if not result1.status: + return Result(result1.status, result1.index, None, result1.furthest, result1.expected) # type: ignore + result2 = other(stream, result1.index).aggregate(result1) + if not result2.status: + return Result( + result2.status, result2.index, (*result1.value, result2.value), result2.furthest, result2.expected + ) + + return Result.success(result2.index, (*result1.value, result2.value)).aggregate(result2) + + return tuple_parser + + def combine(self: Parser[Tuple[Unpack[OUT_T]]], combine_fn: Callable[[Unpack[OUT_T]], OUT2]) -> Parser[OUT2]: + """ + Apply ``combine_fn`` to the parser result, which must be a tuple. The result + is passed as `*args` to ``combine_fn``. + """ + return self.bind(lambda value: success(combine_fn(*value))) + # haskelley operators, for fun # # >> diff --git a/tests/test_parsy.py b/tests/test_parsy.py index 02f6841..c906c5e 100644 --- a/tests/test_parsy.py +++ b/tests/test_parsy.py @@ -4,14 +4,12 @@ except ImportError: enum = None import re -from typing import Generator import unittest - -from typing import Any +from typing import Any, Generator from parsy import ( - Parser, ParseError, + Parser, any_char, char_from, decimal_digit, @@ -122,6 +120,21 @@ def test_and(self): parser = digit & letter self.assertEqual(parser.parse("1A"), ("1", "A")) + def test_append(self): + parser = digit.join(letter).append(letter) + self.assertEqual(parser.parse("1AB"), ("1", "A", "B")) + + def test_combine(self): + parser = digit.join(letter).append(letter).combine(lambda a, b, c: (c + b + a)) + self.assertEqual(parser.parse("1AB"), "BA1") + + def test_combine_mixed_types(self): + def demo(a: int, b: str, c: bool) -> tuple[int, str, bool]: + return (a, b, c) + + parser = digit.map(int).join(letter).append(digit.map(bool)).combine(demo) + self.assertEqual(parser.parse("1A1"), (1, "A", True)) + def test_or(self): self.assertEqual((letter | digit).parse("a"), "a") self.assertEqual((letter | digit).parse("1"), "1") From b4c5a38e5aa65c7748cb520c4f05c8c550fb0ce8 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Thu, 4 May 2023 22:52:42 +0100 Subject: [PATCH 03/16] Add dataclass parser prototype --- .gitignore | 4 + examples/dataclass_parsing.py | 58 +++++++++ examples/json.py | 40 +++--- examples/sequence.py | 34 +++++ examples/simple_logo_lexer.py | 2 +- pyproject.toml | 1 - src/parsy/__init__.py | 237 ++++++++++++++++++++++++++-------- 7 files changed, 304 insertions(+), 72 deletions(-) create mode 100644 examples/dataclass_parsing.py create mode 100644 examples/sequence.py diff --git a/.gitignore b/.gitignore index 5e3115d..eadab64 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,7 @@ src/parsy.egg-info docs/_build .cache +__pycache__ +.python-version +.venv +.vscode diff --git a/examples/dataclass_parsing.py b/examples/dataclass_parsing.py new file mode 100644 index 0000000..5cf7ee6 --- /dev/null +++ b/examples/dataclass_parsing.py @@ -0,0 +1,58 @@ +from dataclasses import dataclass +from typing import Optional + +from parsy import dataparser, parse_field, regex, string, whitespace + + +@dataclass +class Person: + name: str = parse_field(regex(r"\w+") << whitespace) + age: int = parse_field(regex(r"\d+").map(int) << whitespace) + note: str = parse_field(regex(".+")) + + +person_parser = dataparser(Person) +person = person_parser.parse("Rob 2000 how time flies") +print(person) +assert person == Person(name="Rob", age=2000, note="how time flies") + + +# Nesting dataclass parsers + + +@dataclass +class Id: + id: str = parse_field(regex(r"[^\s]+") << whitespace.optional()) + from_year: Optional[int] = parse_field( + regex("[0-9]+").map(int).desc("Numeric").optional() << whitespace.optional() + ) + + +@dataclass +class Name: + name: str = parse_field(regex(r"[a-zA-Z]+") << whitespace.optional()) + abbreviated: Optional[bool] = parse_field( + (string("T") | string("F")).map(lambda x: x == "T").optional() << whitespace.optional() + ) + + +@dataclass +class PersonDetail: + id: Id = parse_field(dataparser(Id)) + forename: Name = parse_field(dataparser(Name)) + surname: Optional[Name] = parse_field(dataparser(Name).optional()) + + +out_parser = dataparser(PersonDetail).many() + +new_person = out_parser.parse("007 2023 Rob T John 123 2004 Bob") +print(new_person) + +res = [ + PersonDetail( + id=Id(id="007", from_year=2023), + forename=Name(name="Rob", abbreviated=True), + surname=Name(name="John", abbreviated=None), + ), + PersonDetail(id=Id(id="123", from_year=2004), forename=Name(name="Bob", abbreviated=None), surname=None), +] diff --git a/examples/json.py b/examples/json.py index 7eab341..394456b 100644 --- a/examples/json.py +++ b/examples/json.py @@ -1,4 +1,5 @@ from typing import TypeVar + from parsy import Parser, forward_declaration, regex, string # Utilities @@ -39,20 +40,24 @@ def lexeme(p: Parser[T]) -> Parser[T]: quoted = lexeme(string('"') >> (string_part | string_esc).many().concat() << string('"')) # Data structures -json_value = forward_declaration() +JSON = TypeVar("JSON") + +json_value = forward_declaration[JSON]() object_pair = (quoted << colon) & json_value -json_object = lbrace >> object_pair.sep_by(comma).map(dict) << rbrace +json_object = lbrace >> object_pair.sep_by(comma).map(lambda a: {g[0]: g[1] for g in a}) << rbrace array = lbrack >> json_value.sep_by(comma) << rbrack # Everything -json_value.become(quoted | number | json_object | array | true | false | null) +all = quoted | number | json_object | array | true | false | null +json_value = json_value.become(all) json_doc = whitespace >> json_value +# JSON = Union[Dict[str, JSON], List[JSON], str, int, float, bool, None] + def test(): - assert ( - json_doc.parse( - r""" + result = json_doc.parse( + r""" { "int": 1, "string": "hello", @@ -62,19 +67,18 @@ def test(): "other": [true, false, null] } """ - ) - == { - "int": 1, - "string": "hello", - "a list": [1, 2, 3], - "escapes": "\n ⓒ", - "nested": {"x": "y"}, - "other": [True, False, None], - } ) + print(result) + assert result == { + "int": 1, + "string": "hello", + "a list": [1, 2, 3], + "escapes": "\n ⓒ", + "nested": {"x": "y"}, + "other": [True, False, None], + } if __name__ == "__main__": - from sys import stdin - - print(repr(json_doc.parse(stdin.read()))) + test() + # print(repr(json_doc.parse(stdin.read()))) diff --git a/examples/sequence.py b/examples/sequence.py new file mode 100644 index 0000000..0ccdee1 --- /dev/null +++ b/examples/sequence.py @@ -0,0 +1,34 @@ +from dataclasses import dataclass +from typing import TypeVar + +from typing_extensions import TypeVarTuple + +from parsy import regex, seq, whitespace + +OUT1 = TypeVar("OUT1") +OUT2 = TypeVar("OUT2") +OUT3 = TypeVar("OUT3") +OUT4 = TypeVar("OUT4") +OUT5 = TypeVar("OUT5") +OUT6 = TypeVar("OUT6") +OUT_T = TypeVarTuple("OUT_T") + + +@dataclass +class Person: + name: str + age: int + note: str + + +person_parser = seq( + regex(r"\w+"), + whitespace >> regex(r"\d+").map(int), + whitespace >> regex(r".+"), +).combine(Person) + +person = person_parser.parse("Rob 1000 pretty old") + +print(person) + +assert person == Person(name="Rob", age=1000, note="pretty old") diff --git a/examples/simple_logo_lexer.py b/examples/simple_logo_lexer.py index 4f5ffb8..804e4ef 100644 --- a/examples/simple_logo_lexer.py +++ b/examples/simple_logo_lexer.py @@ -8,7 +8,7 @@ etc. """ -from parsy import eof, regex, string, string_from, whitespace, Parser +from parsy import Parser, eof, regex, string, string_from, whitespace command = string_from("fd", "bk", "rt", "lt") number = regex(r"[0-9]+").map(int) diff --git a/pyproject.toml b/pyproject.toml index 366edb3..57c1027 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,4 +9,3 @@ profile = "black" default_section = "THIRDPARTY" skip = [".tox", ".git", "docs", "dist", "build" , "todo"] known_first_party = "parsy" - diff --git a/src/parsy/__init__.py b/src/parsy/__init__.py index 693b754..cfac66e 100644 --- a/src/parsy/__init__.py +++ b/src/parsy/__init__.py @@ -6,20 +6,43 @@ import enum import operator import re -from dataclasses import dataclass +from dataclasses import Field, dataclass, field from functools import reduce, wraps -from typing import Any, Callable, FrozenSet, Generator, Generic, Optional, Tuple, TypeVar, Union - -from typing_extensions import TypeVarTuple, Unpack +from typing import ( + Any, + Callable, + ClassVar, + Dict, + FrozenSet, + Generator, + Generic, + Mapping, + Optional, + Protocol, + Tuple, + Type, + TypeVar, + Union, + cast, + overload, +) + +from typing_extensions import ParamSpec, TypeVarTuple, Unpack from .version import __version__ # noqa: F401 OUT = TypeVar("OUT") OUT1 = TypeVar("OUT1") OUT2 = TypeVar("OUT2") +OUT3 = TypeVar("OUT3") +OUT4 = TypeVar("OUT4") +OUT5 = TypeVar("OUT5") +OUT6 = TypeVar("OUT6") OUT_T = TypeVarTuple("OUT_T") OUT_co = TypeVar("OUT_co", covariant=True) +OUT2_co = TypeVar("OUT2_co", covariant=True) +P = ParamSpec("P") T = TypeVar("T") @@ -37,6 +60,14 @@ def line_info_at(stream: str, index: int) -> tuple[int, int]: return (line, col) +# @dataclass +# class Stream: +# stream: str + +# def at_index(self, index: int): +# return memoryview(self.stream) + + class ParseError(RuntimeError): def __init__(self, expected: FrozenSet[str], stream: str, index: int): self.expected: FrozenSet[str] = expected @@ -95,7 +126,7 @@ def aggregate(self: Result[OUT], other: Optional[Result[Any]]) -> Result[OUT]: return Result(self.status, self.index, self.value, other.furthest, other.expected) -class Parser(Generic[OUT]): +class Parser(Generic[OUT_co]): """ A Parser is an object that wraps a function whose arguments are a string to be parsed and the index on which to begin parsing. @@ -106,18 +137,18 @@ class Parser(Generic[OUT]): of the failure. """ - def __init__(self, wrapped_fn: Callable[[str, int], Result[OUT]]): - self.wrapped_fn: Callable[[str, int], Result[OUT]] = wrapped_fn + def __init__(self, wrapped_fn: Callable[[str, int], Result[OUT_co]]): + self.wrapped_fn: Callable[[str, int], Result[OUT_co]] = wrapped_fn - def __call__(self, stream: str, index: int) -> Result[OUT]: + def __call__(self, stream: str, index: int) -> Result[OUT_co]: return self.wrapped_fn(stream, index) - def parse(self, stream: str) -> OUT: + def parse(self, stream: str) -> OUT_co: """Parse a string and return the result or raise a ParseError.""" (result, _) = (self << eof).parse_partial(stream) return result - def parse_partial(self, stream: str) -> tuple[OUT, str]: + def parse_partial(self, stream: str) -> tuple[OUT_co, str]: """ Parse the longest possible prefix of a given string. Return a tuple of the result and the rest of the string, @@ -158,10 +189,10 @@ def skip(self: Parser[OUT1], other: Parser) -> Parser[OUT1]: def result(self: Parser, res: OUT2) -> Parser[OUT2]: return self >> success(res) - def many(self: Parser[OUT]) -> Parser[list[OUT]]: + def many(self: Parser[OUT_co]) -> Parser[list[OUT_co]]: return self.times(0, float("inf")) - def times(self: Parser[OUT], min: int, max: int | float | None = None) -> Parser[list[OUT]]: + def times(self: Parser[OUT_co], min: int, max: int | float | None = None) -> Parser[list[OUT_co]]: the_max: int | float if max is None: the_max = min @@ -170,8 +201,8 @@ def times(self: Parser[OUT], min: int, max: int | float | None = None) -> Parser # TODO - must execute at least once @Parser - def times_parser(stream: str, index: int) -> Result[list[OUT]]: - values: list[OUT] = [] + def times_parser(stream: str, index: int) -> Result[list[OUT_co]]: + values: list[OUT_co] = [] times = 0 result = None @@ -190,10 +221,10 @@ def times_parser(stream: str, index: int) -> Result[list[OUT]]: return times_parser - def at_most(self: Parser[OUT], n: int) -> Parser[list[OUT]]: + def at_most(self: Parser[OUT_co], n: int) -> Parser[list[OUT_co]]: return self.times(0, n) - def at_least(self: Parser[OUT], n: int) -> Parser[list[OUT]]: + def at_least(self: Parser[OUT_co], n: int) -> Parser[list[OUT_co]]: # TODO: I cannot for the life of me work out why mypy rejects the following. # Pyright does not reject it. return (self.times(n) & self.many()).map(lambda t: t[0] + t[1]) @@ -203,14 +234,14 @@ def optional(self: Parser[OUT1], default: OUT2 | None = None) -> Parser[OUT1 | O return self.times(0, 1).map(lambda v: v[0] if v else default) def until( - self: Parser[OUT], - other: Parser[OUT], + self: Parser[OUT_co], + other: Parser, min: int = 0, max: int | float = float("inf"), consume_other: bool = False, - ) -> Parser[list[OUT]]: + ) -> Parser[list[OUT_co]]: @Parser - def until_parser(stream: str, index: int) -> Result[list[OUT]]: + def until_parser(stream: str, index: int) -> Result[list[OUT_co]]: values = [] times = 0 while True: @@ -245,8 +276,10 @@ def until_parser(stream: str, index: int) -> Result[list[OUT]]: return until_parser - def sep_by(self: Parser[OUT], sep: Parser, *, min: int = 0, max: int | float = float("inf")) -> Parser[list[OUT]]: - zero_times: Parser[list[OUT]] = success([]) + def sep_by( + self: Parser[OUT_co], sep: Parser, *, min: int = 0, max: int | float = float("inf") + ) -> Parser[list[OUT_co]]: + zero_times: Parser[list[OUT_co]] = success([]) if max == 0: return zero_times res = (self.times(1) & (sep >> self).times(min - 1, max - 1)).map(lambda t: t[0] + t[1]) @@ -254,9 +287,9 @@ def sep_by(self: Parser[OUT], sep: Parser, *, min: int = 0, max: int | float = f res |= zero_times return res - def desc(self, description: str) -> Parser[OUT]: + def desc(self, description: str) -> Parser[OUT_co]: @Parser - def desc_parser(stream: str, index: int) -> Result[OUT]: + def desc_parser(stream: str, index: int) -> Result[OUT_co]: result = self(stream, index) if result.status: return result @@ -334,27 +367,15 @@ def join(self: Parser[OUT1], other: Parser[OUT2]) -> Parser[tuple[OUT1, OUT2]]: """TODO alternative name for `&`, decide on naming""" return self & other - def append(self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[OUT2]) -> Parser[Tuple[Unpack[OUT_T], OUT2]]: + def as_tuple(self: Parser[OUT_co]) -> Parser[Tuple[OUT_co]]: + return self.map(lambda value: (value,)) + + def append(self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[OUT2_co]) -> Parser[Tuple[Unpack[OUT_T], OUT2_co]]: """ Take a parser which produces a tuple of values, and add another parser's result to the end of that tuple """ - - @Parser - def tuple_parser(stream: str, index: int) -> Result[Tuple[Unpack[OUT_T], OUT2]]: - result0 = None - result1 = self(stream, index).aggregate(result0) - if not result1.status: - return Result(result1.status, result1.index, None, result1.furthest, result1.expected) # type: ignore - result2 = other(stream, result1.index).aggregate(result1) - if not result2.status: - return Result( - result2.status, result2.index, (*result1.value, result2.value), result2.furthest, result2.expected - ) - - return Result.success(result2.index, (*result1.value, result2.value)).aggregate(result2) - - return tuple_parser + return self.bind(lambda self_value: other.bind(lambda other_value: success((*self_value, other_value)))) def combine(self: Parser[Tuple[Unpack[OUT_T]]], combine_fn: Callable[[Unpack[OUT_T]], OUT2]) -> Parser[OUT2]: """ @@ -366,11 +387,12 @@ def combine(self: Parser[Tuple[Unpack[OUT_T]]], combine_fn: Callable[[Unpack[OUT # haskelley operators, for fun # # >> - def __rshift__(self, other: Parser[OUT2]) -> Parser[OUT2]: + + def __rshift__(self, other: Parser[OUT2_co]) -> Parser[OUT2_co]: return self.then(other) # << - def __lshift__(self, other: Parser) -> Parser[OUT]: + def __lshift__(self, other: Parser) -> Parser[OUT_co]: return self.skip(other) @@ -439,9 +461,17 @@ def string_parser(stream, index): return string_parser -def regex(exp, flags=0, group=0) -> Parser[str]: - if isinstance(exp, (str, bytes)): - exp = re.compile(exp, flags) +# @overload +# def regex(pattern: str, flags:re.RegexFlag, group: int) -> Parser[str]: +# ... +# @overload +# def regex(pattern: str, *, flags:re.RegexFlag, group: str) -> Parser[str]: +# ... + + +def regex(pattern: str, *, flags=re.RegexFlag(0), group: Any = 0) -> Parser[str]: + if isinstance(pattern, str): + exp = re.compile(pattern, flags) if isinstance(group, (str, int)): group = (group,) @@ -456,6 +486,62 @@ def regex_parser(stream, index): return regex_parser +# Each number of args needs to be typed separately +@overload +def seq( + arg1: Parser[OUT1], + arg2: Parser[OUT2], + arg3: Parser[OUT3], + arg4: Parser[OUT4], + arg5: Parser[OUT5], + arg6: Parser[OUT6], + /, +) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5, OUT6]]: + ... + + +@overload +def seq( + arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3], arg4: Parser[OUT4], arg5: Parser[OUT5], / +) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5]]: + ... + + +@overload +def seq( + arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3], arg4: Parser[OUT4], / +) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4]]: + ... + + +@overload +def seq(arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3], /) -> Parser[Tuple[OUT1, OUT2, OUT3]]: + ... + + +@overload +def seq(arg1: Parser[OUT1], arg2: Parser[OUT2], /) -> Parser[Tuple[OUT1, OUT2]]: + ... + + +@overload +def seq(arg1: Parser[OUT1], /) -> Parser[Tuple[OUT1]]: + ... + + +@overload +def seq(arg1: Parser, *args: Parser) -> Parser[Tuple]: + ... + + +def seq(arg1: Parser, *args: Parser) -> Parser[Tuple]: + arg1 = arg1.as_tuple() + + for p in args: + arg1 = arg1.append(p) + return arg1 + + # TODO the rest of the functions here need type annotations. # One problem is that `test_item` and `match_item` are assumning that the input @@ -499,10 +585,7 @@ def string_from(*strings: str, transform: Callable[[str], str] = noop) -> Parser # TODO drop bytes support here def char_from(string): - if isinstance(string, bytes): - return test_char(lambda c: c in string, b"[" + string + b"]") - else: - return test_char(lambda c: c in string, "[" + string + "]") + return test_char(lambda c: c in string, "[" + string + "]") def peek(parser): @@ -552,7 +635,7 @@ def from_enum(enum_cls: type[E], transform: Callable[[str], str] = noop) -> Pars # Cutting the recursive knot might be harder at the type level? -class forward_declaration(Parser): +class forward_declaration(Parser[OUT]): """ An empty parser that can be used as a forward declaration, especially for parsers that need to be defined recursively. @@ -569,6 +652,56 @@ def _raise_error(self, *args, **kwargs): parse = _raise_error parse_partial = _raise_error - def become(self, other: Parser) -> None: + def become(self, other: Parser[OUT2]) -> Parser[OUT2]: self.__dict__ = other.__dict__ self.__class__ = other.__class__ + self = cast(Parser[OUT2], self) + return self + + +# Dataclass parsers + + +_T = TypeVar("_T") + + +def parse_field( + parser: Parser[_T], + *, + default: _T = ..., + init: bool = ..., + repr: bool = ..., + hash: Union[bool, None] = ..., + compare: bool = ..., + metadata: Mapping[Any, Any] = ..., +) -> _T: + if metadata is Ellipsis: + metadata = {} + return field( + default=default, init=init, repr=repr, hash=hash, compare=compare, metadata={**metadata, "parser": parser} + ) + + +class DataClassProtocol(Protocol): + __dataclass_fields__: ClassVar[Dict[str, Field]] + + +OUT_D = TypeVar("OUT_D", bound=DataClassProtocol) + + +def dataparser(datatype: Type[OUT_D]) -> Parser[OUT_D]: + @Parser + def data_parser(stream: str, index: int) -> Result[OUT_D]: + fields: Dict[str, Any] = {} + result = Result.success(index, None) + for fieldname, field in datatype.__dataclass_fields__.items(): + parser: Parser[Any] = field.metadata["parser"] + result = parser(stream, index) + if not result.status: + return result + index = result.index + fields[fieldname] = result.value + + return Result.success(result.index, datatype(**fields)) + + return data_parser From 649e5eecf96edba53d3d3a7c47be97bd13ee7d2b Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Thu, 4 May 2023 23:33:40 +0100 Subject: [PATCH 04/16] Convert to poetry and add support for python 3.7 --- conftest.py | 3 +- examples/simple_logo_lexer.py | 3 +- {src/parsy => parsy}/__init__.py | 47 +++++---- {src/parsy => parsy}/py.typed | 0 poetry.lock | 161 +++++++++++++++++++++++++++++++ pyproject.toml | 20 ++++ src/parsy/version.py | 1 - tests/test_parsy.py | 45 ++++----- tests/test_sexpr.py | 2 +- 9 files changed, 229 insertions(+), 53 deletions(-) rename {src/parsy => parsy}/__init__.py (95%) rename {src/parsy => parsy}/py.typed (100%) create mode 100644 poetry.lock delete mode 100644 src/parsy/version.py diff --git a/conftest.py b/conftest.py index dce42a2..b521adf 100644 --- a/conftest.py +++ b/conftest.py @@ -1,6 +1,7 @@ import sys +from typing import List -collect_ignore: list[str] = [] +collect_ignore: List[str] = [] if sys.version_info < (3, 7): # Python 3.6 and below don't have `dataclasses` diff --git a/examples/simple_logo_lexer.py b/examples/simple_logo_lexer.py index 804e4ef..de8fba2 100644 --- a/examples/simple_logo_lexer.py +++ b/examples/simple_logo_lexer.py @@ -8,6 +8,7 @@ etc. """ +from typing import List from parsy import Parser, eof, regex, string, string_from, whitespace command = string_from("fd", "bk", "rt", "lt") @@ -15,7 +16,7 @@ optional_whitespace = regex(r"\s*") eol = string("\n") line = (optional_whitespace >> command) & (whitespace >> number) & (eof | eol | (whitespace >> eol)).result("\n") -lexer: Parser[list[object]] = line.many().map(lambda lines: sum(([t0, t1, t2] for ((t0, t1), t2) in lines), [])) +lexer: Parser[List[object]] = line.many().map(lambda lines: sum(([t0, t1, t2] for ((t0, t1), t2) in lines), [])) def test_lexer() -> None: diff --git a/src/parsy/__init__.py b/parsy/__init__.py similarity index 95% rename from src/parsy/__init__.py rename to parsy/__init__.py index cfac66e..2614ceb 100644 --- a/src/parsy/__init__.py +++ b/parsy/__init__.py @@ -16,9 +16,9 @@ FrozenSet, Generator, Generic, + List, Mapping, Optional, - Protocol, Tuple, Type, TypeVar, @@ -27,9 +27,7 @@ overload, ) -from typing_extensions import ParamSpec, TypeVarTuple, Unpack - -from .version import __version__ # noqa: F401 +from typing_extensions import ParamSpec, Protocol, TypeVarTuple, Unpack OUT = TypeVar("OUT") OUT1 = TypeVar("OUT1") @@ -51,7 +49,7 @@ def noop(val: T) -> T: return val -def line_info_at(stream: str, index: int) -> tuple[int, int]: +def line_info_at(stream: str, index: int) -> Tuple[int, int]: if index > len(stream): raise ValueError("invalid index") line = stream.count("\n", 0, index) @@ -148,7 +146,7 @@ def parse(self, stream: str) -> OUT_co: (result, _) = (self << eof).parse_partial(stream) return result - def parse_partial(self, stream: str) -> tuple[OUT_co, str]: + def parse_partial(self, stream: str) -> Tuple[OUT_co, str]: """ Parse the longest possible prefix of a given string. Return a tuple of the result and the rest of the string, @@ -177,7 +175,7 @@ def bound_parser(stream: str, index: int) -> Result[OUT2]: def map(self: Parser[OUT1], map_fn: Callable[[OUT1], OUT2]) -> Parser[OUT2]: return self.bind(lambda res: success(map_fn(res))) - def concat(self: Parser[list[str]]) -> Parser[str]: + def concat(self: Parser[List[str]]) -> Parser[str]: return self.map("".join) def then(self: Parser, other: Parser[OUT2]) -> Parser[OUT2]: @@ -189,10 +187,10 @@ def skip(self: Parser[OUT1], other: Parser) -> Parser[OUT1]: def result(self: Parser, res: OUT2) -> Parser[OUT2]: return self >> success(res) - def many(self: Parser[OUT_co]) -> Parser[list[OUT_co]]: + def many(self: Parser[OUT_co]) -> Parser[List[OUT_co]]: return self.times(0, float("inf")) - def times(self: Parser[OUT_co], min: int, max: int | float | None = None) -> Parser[list[OUT_co]]: + def times(self: Parser[OUT_co], min: int, max: int | float | None = None) -> Parser[List[OUT_co]]: the_max: int | float if max is None: the_max = min @@ -201,8 +199,8 @@ def times(self: Parser[OUT_co], min: int, max: int | float | None = None) -> Par # TODO - must execute at least once @Parser - def times_parser(stream: str, index: int) -> Result[list[OUT_co]]: - values: list[OUT_co] = [] + def times_parser(stream: str, index: int) -> Result[List[OUT_co]]: + values: List[OUT_co] = [] times = 0 result = None @@ -221,10 +219,10 @@ def times_parser(stream: str, index: int) -> Result[list[OUT_co]]: return times_parser - def at_most(self: Parser[OUT_co], n: int) -> Parser[list[OUT_co]]: + def at_most(self: Parser[OUT_co], n: int) -> Parser[List[OUT_co]]: return self.times(0, n) - def at_least(self: Parser[OUT_co], n: int) -> Parser[list[OUT_co]]: + def at_least(self: Parser[OUT_co], n: int) -> Parser[List[OUT_co]]: # TODO: I cannot for the life of me work out why mypy rejects the following. # Pyright does not reject it. return (self.times(n) & self.many()).map(lambda t: t[0] + t[1]) @@ -239,9 +237,9 @@ def until( min: int = 0, max: int | float = float("inf"), consume_other: bool = False, - ) -> Parser[list[OUT_co]]: + ) -> Parser[List[OUT_co]]: @Parser - def until_parser(stream: str, index: int) -> Result[list[OUT_co]]: + def until_parser(stream: str, index: int) -> Result[List[OUT_co]]: values = [] times = 0 while True: @@ -278,8 +276,8 @@ def until_parser(stream: str, index: int) -> Result[list[OUT_co]]: def sep_by( self: Parser[OUT_co], sep: Parser, *, min: int = 0, max: int | float = float("inf") - ) -> Parser[list[OUT_co]]: - zero_times: Parser[list[OUT_co]] = success([]) + ) -> Parser[List[OUT_co]]: + zero_times: Parser[List[OUT_co]] = success([]) if max == 0: return zero_times res = (self.times(1) & (sep >> self).times(min - 1, max - 1)).map(lambda t: t[0] + t[1]) @@ -472,6 +470,8 @@ def string_parser(stream, index): def regex(pattern: str, *, flags=re.RegexFlag(0), group: Any = 0) -> Parser[str]: if isinstance(pattern, str): exp = re.compile(pattern, flags) + else: + exp = pattern if isinstance(group, (str, int)): group = (group,) @@ -494,38 +494,37 @@ def seq( arg3: Parser[OUT3], arg4: Parser[OUT4], arg5: Parser[OUT5], - arg6: Parser[OUT6], - /, + arg6: Parser[OUT6] ) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5, OUT6]]: ... @overload def seq( - arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3], arg4: Parser[OUT4], arg5: Parser[OUT5], / + arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3], arg4: Parser[OUT4], arg5: Parser[OUT5] ) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5]]: ... @overload def seq( - arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3], arg4: Parser[OUT4], / + arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3], arg4: Parser[OUT4] ) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4]]: ... @overload -def seq(arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3], /) -> Parser[Tuple[OUT1, OUT2, OUT3]]: +def seq(arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3]) -> Parser[Tuple[OUT1, OUT2, OUT3]]: ... @overload -def seq(arg1: Parser[OUT1], arg2: Parser[OUT2], /) -> Parser[Tuple[OUT1, OUT2]]: +def seq(arg1: Parser[OUT1], arg2: Parser[OUT2]) -> Parser[Tuple[OUT1, OUT2]]: ... @overload -def seq(arg1: Parser[OUT1], /) -> Parser[Tuple[OUT1]]: +def seq(arg1: Parser[OUT1]) -> Parser[Tuple[OUT1]]: ... diff --git a/src/parsy/py.typed b/parsy/py.typed similarity index 100% rename from src/parsy/py.typed rename to parsy/py.typed diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..16c5eb3 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,161 @@ +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" + +[[package]] +name = "exceptiongroup" +version = "1.1.1" +description = "Backport of PEP 654 (exception groups)" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "importlib-metadata" +version = "6.6.0" +description = "Read metadata from Python packages" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "packaging" +version = "23.1" +description = "Core utilities for Python packages" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pytest" +version = "7.3.1" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "typing-extensions" +version = "4.5.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "zipp" +version = "3.15.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[metadata] +lock-version = "1.1" +python-versions = "^3.7" +content-hash = "497757940fe550dacf532459ed493243fd21a456359d6093e9d341f115646e03" + +[metadata.files] +colorama = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] +exceptiongroup = [ + {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, + {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, +] +importlib-metadata = [ + {file = "importlib_metadata-6.6.0-py3-none-any.whl", hash = "sha256:43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed"}, + {file = "importlib_metadata-6.6.0.tar.gz", hash = "sha256:92501cdf9cc66ebd3e612f1b4f0c0765dfa42f0fa38ffb319b6bd84dd675d705"}, +] +iniconfig = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] +packaging = [ + {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, + {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, +] +pluggy = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] +pytest = [ + {file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"}, + {file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] +typing-extensions = [ + {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, + {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, +] +zipp = [ + {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, + {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, +] diff --git a/pyproject.toml b/pyproject.toml index 57c1027..57ef629 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,3 +9,23 @@ profile = "black" default_section = "THIRDPARTY" skip = [".tox", ".git", "docs", "dist", "build" , "todo"] known_first_party = "parsy" + +[tool.poetry] +name = "parsy" +version = "0.1.0" +description = "" +authors = ["Your Name "] +readme = "README.rst" +packages = [{include = "parsy"}] + +[tool.poetry.dependencies] +python = "^3.7" +typing-extensions = "^4.5.0" + + +[tool.poetry.group.dev.dependencies] +pytest = "^7.3.1" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/src/parsy/version.py b/src/parsy/version.py deleted file mode 100644 index f2dc0e4..0000000 --- a/src/parsy/version.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "2.0" diff --git a/tests/test_parsy.py b/tests/test_parsy.py index c906c5e..722c4df 100644 --- a/tests/test_parsy.py +++ b/tests/test_parsy.py @@ -5,7 +5,7 @@ enum = None import re import unittest -from typing import Any, Generator +from typing import Any, Generator, Tuple from parsy import ( ParseError, @@ -62,13 +62,13 @@ def test_regex_str(self): self.assertRaises(ParseError, parser.parse, "x") - def test_regex_bytes(self): - parser = regex(rb"[0-9]") + # def test_regex_bytes(self): + # parser = regex(rb"[0-9]") - self.assertEqual(parser.parse(b"1"), b"1") - self.assertEqual(parser.parse(b"4"), b"4") + # self.assertEqual(parser.parse(b"1"), b"1") + # self.assertEqual(parser.parse(b"4"), b"4") - self.assertRaises(ParseError, parser.parse, b"x") + # self.assertRaises(ParseError, parser.parse, b"x") def test_regex_compiled(self): parser = regex(re.compile(r"[0-9]")) @@ -128,17 +128,12 @@ def test_combine(self): parser = digit.join(letter).append(letter).combine(lambda a, b, c: (c + b + a)) self.assertEqual(parser.parse("1AB"), "BA1") - def test_combine_mixed_types(self): - def demo(a: int, b: str, c: bool) -> tuple[int, str, bool]: - return (a, b, c) + # def test_combine_mixed_types(self): + # def demo(a: int, b: str, c: bool) -> Tuple[int, str, bool]: + # return (a, b, c) - parser = digit.map(int).join(letter).append(digit.map(bool)).combine(demo) - self.assertEqual(parser.parse("1A1"), (1, "A", True)) - - def test_or(self): - self.assertEqual((letter | digit).parse("a"), "a") - self.assertEqual((letter | digit).parse("1"), "1") - self.assertRaises(ParseError, (letter | digit).parse, ".") + # parser = digit.map(int).join(letter).append(digit.map(bool)).combine(demo) + # self.assertEqual(parser.parse("1A1"), (1, "A", True)) def test_concat(self): parser = letter.many().concat() @@ -427,16 +422,16 @@ def test_char_from_str(self): ex = err.exception self.assertEqual(str(ex), """expected '[ab]' at 0:0""") - def test_char_from_bytes(self): - ab = char_from(b"ab") - self.assertEqual(ab.parse(b"a"), b"a") - self.assertEqual(ab.parse(b"b"), b"b") + # def test_char_from_bytes(self): + # ab = char_from(b"ab") + # self.assertEqual(ab.parse(b"a"), b"a") + # self.assertEqual(ab.parse(b"b"), b"b") - with self.assertRaises(ParseError) as err: - ab.parse(b"x") + # with self.assertRaises(ParseError) as err: + # ab.parse(b"x") - ex = err.exception - self.assertEqual(str(ex), """expected b'[ab]' at 0""") + # ex = err.exception + # self.assertEqual(str(ex), """expected b'[ab]' at 0""") def test_string_from(self): titles = string_from("Mr", "Mr.", "Mrs", "Mrs.") @@ -487,7 +482,7 @@ def test_decimal_digit(self): def test_line_info(self): @generate - def foo() -> Generator[Any, Any, tuple[str, tuple[int, int]]]: + def foo() -> Generator[Any, Any, Tuple[str, Tuple[int, int]]]: i = yield line_info l = yield any_char return (l, i) diff --git a/tests/test_sexpr.py b/tests/test_sexpr.py index ec0fca6..1969c66 100644 --- a/tests/test_sexpr.py +++ b/tests/test_sexpr.py @@ -3,7 +3,7 @@ from parsy import generate, regex, string -whitespace = regex(r"\s+", re.MULTILINE) +whitespace = regex(r"\s+") comment = regex(r";.*") ignore = (whitespace | comment).many() From 86f919d3144df026d0769fb6415c611b761587a0 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Fri, 5 May 2023 00:48:07 +0100 Subject: [PATCH 05/16] Clean up sequence example --- examples/sequence.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/examples/sequence.py b/examples/sequence.py index 0ccdee1..08e5ef7 100644 --- a/examples/sequence.py +++ b/examples/sequence.py @@ -5,14 +5,6 @@ from parsy import regex, seq, whitespace -OUT1 = TypeVar("OUT1") -OUT2 = TypeVar("OUT2") -OUT3 = TypeVar("OUT3") -OUT4 = TypeVar("OUT4") -OUT5 = TypeVar("OUT5") -OUT6 = TypeVar("OUT6") -OUT_T = TypeVarTuple("OUT_T") - @dataclass class Person: @@ -20,12 +12,12 @@ class Person: age: int note: str - -person_parser = seq( +person_arg_sequence = seq( regex(r"\w+"), whitespace >> regex(r"\d+").map(int), whitespace >> regex(r".+"), -).combine(Person) +) +person_parser = person_arg_sequence.combine(Person) person = person_parser.parse("Rob 1000 pretty old") From edffd48b66e8d68a41a7410dd14f86689bad7d1a Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Fri, 5 May 2023 00:48:27 +0100 Subject: [PATCH 06/16] Add homogeneous typed generator example --- examples/generator_typed.py | 40 +++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 examples/generator_typed.py diff --git a/examples/generator_typed.py b/examples/generator_typed.py new file mode 100644 index 0000000..ea79a52 --- /dev/null +++ b/examples/generator_typed.py @@ -0,0 +1,40 @@ + + + +from dataclasses import dataclass +from typing import Generator, Union +from parsy import Parser, generate, regex, success, whitespace + + +@dataclass +class Person: + name: str + age: int + note: str + + +def person_parser(): + @generate + def person_parser() -> Generator[Parser[str], str, Person]: + # By yielding parsers of a single type, the type system works. + # Homogeneous generator types don't exist. + name = yield regex(r"\w+") << whitespace + + # But every parser starts by matching a string anyway: other types only come + # from further function logic, which doesn't need to be part of the parser when + # using a generator: + age_text = yield regex(r"\d+") << whitespace + age = int(age_text) + if age > 20: + # Parsing depends on previously parsed values + note = yield regex(".+") >> success("Older than a score") + else: + note = yield regex(".+") + + return Person(name, age, note) + + return person_parser + +person = person_parser().parse("Rob 21 once upon a time") + +print(person) From e6ddc7743562bf95ffbaefbabea4bf96bbd04323 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Fri, 5 May 2023 01:33:04 +0100 Subject: [PATCH 07/16] Allow a mix of parser and non-parser dataclass fields --- examples/dataclass_parsing.py | 26 +++++++++++++++++++++++++- parsy/__init__.py | 2 ++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/examples/dataclass_parsing.py b/examples/dataclass_parsing.py index 5cf7ee6..e43b64c 100644 --- a/examples/dataclass_parsing.py +++ b/examples/dataclass_parsing.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Optional from parsy import dataparser, parse_field, regex, string, whitespace @@ -56,3 +56,27 @@ class PersonDetail: ), PersonDetail(id=Id(id="123", from_year=2004), forename=Name(name="Bob", abbreviated=None), surname=None), ] + +# Dataclass parsing where not all fields have a parsy parser + + + +@dataclass +class PersonWithRarity: + name: str = parse_field(regex(r"\w+") << whitespace) + age: int = parse_field(regex(r"\d+").map(int) << whitespace) + note: str = parse_field(regex(".+")) + rare: bool = False + + def __post_init__(self): + if self.age > 70: + self.rare = True + +person_parser = dataparser(PersonWithRarity) +person = person_parser.parse("Rob 20 whippersnapper") +print(person) +assert person == PersonWithRarity(name="Rob", age=20, note="whippersnapper", rare=False) + +person = person_parser.parse("Rob 2000 how time flies") +print(person) +assert person == PersonWithRarity(name="Rob", age=2000, note="how time flies", rare=True) diff --git a/parsy/__init__.py b/parsy/__init__.py index 2614ceb..cd17ad8 100644 --- a/parsy/__init__.py +++ b/parsy/__init__.py @@ -694,6 +694,8 @@ def data_parser(stream: str, index: int) -> Result[OUT_D]: fields: Dict[str, Any] = {} result = Result.success(index, None) for fieldname, field in datatype.__dataclass_fields__.items(): + if "parser" not in field.metadata: + continue parser: Parser[Any] = field.metadata["parser"] result = parser(stream, index) if not result.status: From 9030ee9f0e29848236cf18233b6485c8d6a40b41 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Sun, 7 May 2023 16:34:45 +0100 Subject: [PATCH 08/16] Add full dataclass parsing example --- examples/dataclass_parsing.py | 16 ++++- examples/log_parser.py | 115 ++++++++++++++++++++++++++++++++++ parsy/__init__.py | 38 ++++++++--- 3 files changed, 159 insertions(+), 10 deletions(-) create mode 100644 examples/log_parser.py diff --git a/examples/dataclass_parsing.py b/examples/dataclass_parsing.py index e43b64c..886c2ed 100644 --- a/examples/dataclass_parsing.py +++ b/examples/dataclass_parsing.py @@ -1,7 +1,7 @@ -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Optional -from parsy import dataparser, parse_field, regex, string, whitespace +from parsy import DataParser, dataparser, parse_field, regex, string, whitespace @dataclass @@ -60,7 +60,6 @@ class PersonDetail: # Dataclass parsing where not all fields have a parsy parser - @dataclass class PersonWithRarity: name: str = parse_field(regex(r"\w+") << whitespace) @@ -72,6 +71,7 @@ def __post_init__(self): if self.age > 70: self.rare = True + person_parser = dataparser(PersonWithRarity) person = person_parser.parse("Rob 20 whippersnapper") print(person) @@ -80,3 +80,13 @@ def __post_init__(self): person = person_parser.parse("Rob 2000 how time flies") print(person) assert person == PersonWithRarity(name="Rob", age=2000, note="how time flies", rare=True) + + +@dataclass +class PersonFromBase(DataParser): + name: str = parse_field(regex(r"\w+") << whitespace) + age: int = parse_field(regex(r"\d+").map(int) << whitespace) + note: str = parse_field(regex(".+")) + + +print(PersonFromBase.parser().parse("Rob 2000 how time flies")) diff --git a/examples/log_parser.py b/examples/log_parser.py new file mode 100644 index 0000000..0d9f7e0 --- /dev/null +++ b/examples/log_parser.py @@ -0,0 +1,115 @@ +from dataclasses import dataclass +from typing import List + +from parsy import dataparser, parse_field, regex, string + +text = """Sample text + +A selection of students from Riverdale High and Hogwarts took part in a quiz. This is a record of their scores. + +School = Riverdale High +Grade = 1 +Student number, Name +0, Phoebe +1, Rachel + +Student number, Score +0, 3 +1, 7 + +Grade = 2 +Student number, Name +0, Angela +1, Tristan +2, Aurora + +Student number, Score +0, 6 +1, 3 +2, 9 + +School = Hogwarts +Grade = 1 +Student number, Name +0, Ginny +1, Luna + +Student number, Score +0, 8 +1, 7 + +Grade = 2 +Student number, Name +0, Harry +1, Hermione + +Student number, Score +0, 5 +1, 10 + +Grade = 3 +Student number, Name +0, Fred +1, George + +Student number, Score +0, 0 +1, 0 +""" + + +integer = regex(r"\d+").map(int) +any_text = regex(r"[^\n]+") + + +@dataclass +class Student: + number: int = parse_field(integer << string(", ")) + name: str = parse_field(any_text << string("\n")) + + +@dataclass +class Score: + number: int = parse_field(integer << string(", ")) + score: int = parse_field(integer << string("\n")) + + +@dataclass +class StudentWithScore: + name: str + number: int + score: int + + +@dataclass +class Grade: + grade: int = parse_field(string("Grade = ") >> integer << string("\n")) + students: List[Student] = parse_field( + string("Student number, Name\n") >> dataparser(Student).many() << regex(r"\n*") + ) + scores: List[Score] = parse_field(string("Student number, Score\n") >> dataparser(Score).many() << regex(r"\n*")) + + @property + def students_with_scores(self) -> List[StudentWithScore]: + names = {st.number: st.name for st in self.students} + return [StudentWithScore(names[score.number], score.number, score.score) for score in self.scores] + + +@dataclass +class School: + name: str = parse_field(string("School = ") >> any_text << string("\n")) + grades: List[Grade] = parse_field(dataparser(Grade).many()) + + +@dataclass +class File: + header: str = parse_field(regex(r"[\s\S]*?(?=School =)")) + schools: List[School] = parse_field(dataparser(School).many()) + + +parser = dataparser(File) + + +if __name__ == "__main__": + file = parser.parse(text) + print(file.schools) diff --git a/parsy/__init__.py b/parsy/__init__.py index cd17ad8..14fd3aa 100644 --- a/parsy/__init__.py +++ b/parsy/__init__.py @@ -494,7 +494,7 @@ def seq( arg3: Parser[OUT3], arg4: Parser[OUT4], arg5: Parser[OUT5], - arg6: Parser[OUT6] + arg6: Parser[OUT6], ) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5, OUT6]]: ... @@ -661,19 +661,16 @@ def become(self, other: Parser[OUT2]) -> Parser[OUT2]: # Dataclass parsers -_T = TypeVar("_T") - - def parse_field( - parser: Parser[_T], + parser: Parser[OUT], *, - default: _T = ..., + default: OUT = ..., init: bool = ..., repr: bool = ..., hash: Union[bool, None] = ..., compare: bool = ..., metadata: Mapping[Any, Any] = ..., -) -> _T: +) -> OUT: if metadata is Ellipsis: metadata = {} return field( @@ -706,3 +703,30 @@ def data_parser(stream: str, index: int) -> Result[OUT_D]: return Result.success(result.index, datatype(**fields)) return data_parser + + +@dataclass +class DataParser: + @classmethod + def parser(cls): + @Parser + def data_parser(stream: str, index: int) -> Result[cls]: + fields: Dict[str, Any] = {} + result = Result.success(index, None) + at_least_one_parser = False + for fieldname, field in cls.__dataclass_fields__.items(): + if "parser" not in field.metadata: + continue + at_least_one_parser = True + parser: Parser[Any] = field.metadata["parser"] + result = parser(stream, index) + if not result.status: + return result + index = result.index + fields[fieldname] = result.value + + if not at_least_one_parser: + raise ValueError("Called data_parser on a class containing no parser fields") + return Result.success(result.index, cls(**fields)) + + return data_parser From 5b755e823e3afafd25a0c0cdf28cfd9a128dc987 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Sun, 7 May 2023 16:42:43 +0100 Subject: [PATCH 09/16] Move dataclass example, change dataclass field access --- examples/{log_parser.py => dataclass_parser_demo.py} | 12 +++++------- parsy/__init__.py | 10 +++++----- 2 files changed, 10 insertions(+), 12 deletions(-) rename examples/{log_parser.py => dataclass_parser_demo.py} (90%) diff --git a/examples/log_parser.py b/examples/dataclass_parser_demo.py similarity index 90% rename from examples/log_parser.py rename to examples/dataclass_parser_demo.py index 0d9f7e0..52193e3 100644 --- a/examples/log_parser.py +++ b/examples/dataclass_parser_demo.py @@ -1,7 +1,8 @@ -from dataclasses import dataclass -from typing import List +from dataclasses import dataclass, field, fields +from typing import List, Optional + +from parsy import DataParser, dataparser, parse_field, regex, string, whitespace -from parsy import dataparser, parse_field, regex, string text = """Sample text @@ -107,9 +108,6 @@ class File: schools: List[School] = parse_field(dataparser(School).many()) -parser = dataparser(File) - - if __name__ == "__main__": - file = parser.parse(text) + file = dataparser(File).parse(text) print(file.schools) diff --git a/parsy/__init__.py b/parsy/__init__.py index 14fd3aa..d97d757 100644 --- a/parsy/__init__.py +++ b/parsy/__init__.py @@ -6,7 +6,7 @@ import enum import operator import re -from dataclasses import Field, dataclass, field +from dataclasses import Field, dataclass, field, fields from functools import reduce, wraps from typing import ( Any, @@ -688,9 +688,9 @@ class DataClassProtocol(Protocol): def dataparser(datatype: Type[OUT_D]) -> Parser[OUT_D]: @Parser def data_parser(stream: str, index: int) -> Result[OUT_D]: - fields: Dict[str, Any] = {} + parsed_fields: Dict[str, Any] = {} result = Result.success(index, None) - for fieldname, field in datatype.__dataclass_fields__.items(): + for field in fields(datatype): if "parser" not in field.metadata: continue parser: Parser[Any] = field.metadata["parser"] @@ -698,9 +698,9 @@ def data_parser(stream: str, index: int) -> Result[OUT_D]: if not result.status: return result index = result.index - fields[fieldname] = result.value + parsed_fields[field.name] = result.value - return Result.success(result.index, datatype(**fields)) + return Result.success(result.index, datatype(**parsed_fields)) return data_parser From 4c093e86f325d373fd9fe29694e88e8342398611 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Sun, 7 May 2023 16:43:21 +0100 Subject: [PATCH 10/16] Run pre-commit on all files --- examples/dataclass_parser_demo.py | 7 +++---- examples/generator_typed.py | 7 +++---- examples/sequence.py | 3 +-- examples/simple_logo_lexer.py | 1 + tests/test_sexpr.py | 1 - 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/examples/dataclass_parser_demo.py b/examples/dataclass_parser_demo.py index 52193e3..4c1cf34 100644 --- a/examples/dataclass_parser_demo.py +++ b/examples/dataclass_parser_demo.py @@ -1,8 +1,7 @@ -from dataclasses import dataclass, field, fields -from typing import List, Optional - -from parsy import DataParser, dataparser, parse_field, regex, string, whitespace +from dataclasses import dataclass +from typing import List +from parsy import dataparser, parse_field, regex, string text = """Sample text diff --git a/examples/generator_typed.py b/examples/generator_typed.py index ea79a52..110ed21 100644 --- a/examples/generator_typed.py +++ b/examples/generator_typed.py @@ -1,8 +1,6 @@ - - - from dataclasses import dataclass -from typing import Generator, Union +from typing import Generator + from parsy import Parser, generate, regex, success, whitespace @@ -35,6 +33,7 @@ def person_parser() -> Generator[Parser[str], str, Person]: return person_parser + person = person_parser().parse("Rob 21 once upon a time") print(person) diff --git a/examples/sequence.py b/examples/sequence.py index 08e5ef7..e48b5f2 100644 --- a/examples/sequence.py +++ b/examples/sequence.py @@ -1,7 +1,5 @@ from dataclasses import dataclass -from typing import TypeVar -from typing_extensions import TypeVarTuple from parsy import regex, seq, whitespace @@ -12,6 +10,7 @@ class Person: age: int note: str + person_arg_sequence = seq( regex(r"\w+"), whitespace >> regex(r"\d+").map(int), diff --git a/examples/simple_logo_lexer.py b/examples/simple_logo_lexer.py index de8fba2..bd90d97 100644 --- a/examples/simple_logo_lexer.py +++ b/examples/simple_logo_lexer.py @@ -9,6 +9,7 @@ """ from typing import List + from parsy import Parser, eof, regex, string, string_from, whitespace command = string_from("fd", "bk", "rt", "lt") diff --git a/tests/test_sexpr.py b/tests/test_sexpr.py index 1969c66..535afeb 100644 --- a/tests/test_sexpr.py +++ b/tests/test_sexpr.py @@ -1,4 +1,3 @@ -import re import unittest from parsy import generate, regex, string From de680488af6eaec4662c8e82076adf27109c9f45 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Mon, 12 Jun 2023 20:27:49 +0100 Subject: [PATCH 11/16] Improve type annotations --- examples/dataclass_parsing.py | 12 +- examples/simple_eval.py | 106 +++++++------- examples/simple_logo_lexer.py | 32 ++++- examples/simple_logo_parser.py | 49 ------- parsy/__init__.py | 251 ++++++++++++++++++--------------- poetry.lock | 194 ++++++++++++++++++++++++- pyproject.toml | 2 + tests/test_parsy.py | 86 +++-------- 8 files changed, 434 insertions(+), 298 deletions(-) delete mode 100644 examples/simple_logo_parser.py diff --git a/examples/dataclass_parsing.py b/examples/dataclass_parsing.py index 886c2ed..e240b3d 100644 --- a/examples/dataclass_parsing.py +++ b/examples/dataclass_parsing.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Optional -from parsy import DataParser, dataparser, parse_field, regex, string, whitespace +from parsy import dataparser, parse_field, regex, string, whitespace @dataclass @@ -80,13 +80,3 @@ def __post_init__(self): person = person_parser.parse("Rob 2000 how time flies") print(person) assert person == PersonWithRarity(name="Rob", age=2000, note="how time flies", rare=True) - - -@dataclass -class PersonFromBase(DataParser): - name: str = parse_field(regex(r"\w+") << whitespace) - age: int = parse_field(regex(r"\d+").map(int) << whitespace) - note: str = parse_field(regex(".+")) - - -print(PersonFromBase.parser().parse("Rob 2000 how time flies")) diff --git a/examples/simple_eval.py b/examples/simple_eval.py index 1debc7c..0f77bc5 100644 --- a/examples/simple_eval.py +++ b/examples/simple_eval.py @@ -1,70 +1,70 @@ -from parsy import digit, generate, match_item, regex, string, success, test_item +# from parsy import digit, generate, match_char, regex, string, success -def lexer(code): - whitespace = regex(r"\s*") - integer = digit.at_least(1).concat().map(int) - float_ = (digit.many() + string(".").result(["."]) + digit.many()).concat().map(float) - parser = whitespace >> ((float_ | integer | regex(r"[()*/+-]")) << whitespace).many() - return parser.parse(code) +# def lexer(code): +# whitespace = regex(r"\s*") +# integer = digit.at_least(1).concat().map(int) +# float_ = (digit.many() + string(".").result(["."]) + digit.many()).concat().map(float) +# parser = whitespace >> ((float_ | integer | regex(r"[()*/+-]")) << whitespace).many() +# return parser.parse(code) -def eval_tokens(tokens): - # This function parses and evaluates at the same time. +# def eval_tokens(tokens): +# # This function parses and evaluates at the same time. - lparen = match_item("(") - rparen = match_item(")") +# lparen = match_char("(") +# rparen = match_char(")") - @generate - def additive(): - res = yield multiplicative - sign = match_item("+") | match_item("-") - while True: - operation = yield sign | success("") - if not operation: - break - operand = yield multiplicative - if operation == "+": - res += operand - elif operation == "-": - res -= operand - return res +# @generate +# def additive(): +# res = yield multiplicative +# sign = match_char("+") | match_char("-") +# while True: +# operation = yield sign | success("") +# if not operation: +# break +# operand = yield multiplicative +# if operation == "+": +# res += operand +# elif operation == "-": +# res -= operand +# return res - @generate - def multiplicative(): - res = yield simple - op = match_item("*") | match_item("/") - while True: - operation = yield op | success("") - if not operation: - break - operand = yield simple - if operation == "*": - res *= operand - elif operation == "/": - res /= operand - return res +# @generate +# def multiplicative(): +# res = yield simple +# op = match_char("*") | match_char("/") +# while True: +# operation = yield op | success("") +# if not operation: +# break +# operand = yield simple +# if operation == "*": +# res *= operand +# elif operation == "/": +# res /= operand +# return res - @generate - def number(): - sign = yield match_item("+") | match_item("-") | success("+") - value = yield test_item(lambda x: isinstance(x, (int, float)), "number") - return value if sign == "+" else -value +# @generate +# def number(): +# sign = yield match_char("+") | match_char("-") | success("+") +# value = yield test_item(lambda x: isinstance(x, (int, float)), "number") +# return value if sign == "+" else -value - expr = additive - simple = (lparen >> expr << rparen) | number +# expr = additive +# simple = (lparen >> expr << rparen) | number - return expr.parse(tokens) +# return expr.parse(tokens) -def simple_eval(expr): - return eval_tokens(lexer(expr)) +# def simple_eval(expr): +# return eval_tokens(lexer(expr)) -import pytest # noqa isort:skip +# import pytest # noqa isort:skip -test_item = pytest.mark.skip(test_item) # This is not a test +# test_item = pytest.mark.skip(test_item) # This is not a test -if __name__ == "__main__": - print(simple_eval(input())) +# if __name__ == "__main__": +# print(simple_eval(input())) diff --git a/examples/simple_logo_lexer.py b/examples/simple_logo_lexer.py index bd90d97..1001056 100644 --- a/examples/simple_logo_lexer.py +++ b/examples/simple_logo_lexer.py @@ -8,16 +8,17 @@ etc. """ +from dataclasses import dataclass from typing import List -from parsy import Parser, eof, regex, string, string_from, whitespace +from parsy import Parser, dataparser, eof, parse_field, regex, string, string_from, whitespace command = string_from("fd", "bk", "rt", "lt") number = regex(r"[0-9]+").map(int) optional_whitespace = regex(r"\s*") eol = string("\n") -line = (optional_whitespace >> command) & (whitespace >> number) & (eof | eol | (whitespace >> eol)).result("\n") -lexer: Parser[List[object]] = line.many().map(lambda lines: sum(([t0, t1, t2] for ((t0, t1), t2) in lines), [])) +line = (optional_whitespace >> command).join(whitespace >> number) << (eof | eol | (whitespace >> eol)) +lexer = line.many() def test_lexer() -> None: @@ -27,5 +28,28 @@ def test_lexer() -> None: bk 2 """ ) - == ["fd", 1, "\n", "bk", 2, "\n"] + == [("fd", 1), ("bk", 2)] ) + + +""" +Alternative which creates a more structured output +""" + + +@dataclass +class Instruction: + command: str = parse_field(optional_whitespace >> command) + distance: int = parse_field(whitespace >> number << (eof | eol | (whitespace >> eol))) + + +instruction_parser = dataparser(Instruction).many() + +assert ( + instruction_parser.parse( + """fd 1 +bk 2 +""" + ) + == [Instruction("fd", 1), Instruction("bk", 2)] +) diff --git a/examples/simple_logo_parser.py b/examples/simple_logo_parser.py deleted file mode 100644 index 37e9570..0000000 --- a/examples/simple_logo_parser.py +++ /dev/null @@ -1,49 +0,0 @@ -from parsy import generate, match_item, test_item - - -class Command: - def __init__(self, parameter): - self.parameter = parameter - - def __repr__(self): - return f"{self.__class__.__name__}({self.parameter})" - - -class Forward(Command): - pass - - -class Backward(Command): - pass - - -class Right(Command): - pass - - -class Left(Command): - pass - - -commands = { - "fd": Forward, - "bk": Backward, - "rt": Right, - "lt": Left, -} - - -@generate -def statement(): - cmd_name = yield test_item(lambda i: i in commands.keys(), "command") - parameter = yield test_item(lambda i: isinstance(i, int), "number") - yield match_item("\n") - return commands[cmd_name](int(parameter)) - - -program = statement.many() - - -import pytest # noqa isort:skip - -test_item = pytest.mark.skip(test_item) # This is not a test diff --git a/parsy/__init__.py b/parsy/__init__.py index d97d757..533bfef 100644 --- a/parsy/__init__.py +++ b/parsy/__init__.py @@ -17,6 +17,7 @@ Generator, Generic, List, + Literal, Mapping, Optional, Tuple, @@ -178,13 +179,13 @@ def map(self: Parser[OUT1], map_fn: Callable[[OUT1], OUT2]) -> Parser[OUT2]: def concat(self: Parser[List[str]]) -> Parser[str]: return self.map("".join) - def then(self: Parser, other: Parser[OUT2]) -> Parser[OUT2]: + def then(self: Parser[Any], other: Parser[OUT2]) -> Parser[OUT2]: return (self & other).map(lambda t: t[1]) - def skip(self: Parser[OUT1], other: Parser) -> Parser[OUT1]: + def skip(self: Parser[OUT1], other: Parser[Any]) -> Parser[OUT1]: return (self & other).map(lambda t: t[0]) - def result(self: Parser, res: OUT2) -> Parser[OUT2]: + def result(self: Parser[Any], res: OUT2) -> Parser[OUT2]: return self >> success(res) def many(self: Parser[OUT_co]) -> Parser[List[OUT_co]]: @@ -223,17 +224,22 @@ def at_most(self: Parser[OUT_co], n: int) -> Parser[List[OUT_co]]: return self.times(0, n) def at_least(self: Parser[OUT_co], n: int) -> Parser[List[OUT_co]]: - # TODO: I cannot for the life of me work out why mypy rejects the following. - # Pyright does not reject it. - return (self.times(n) & self.many()).map(lambda t: t[0] + t[1]) + return self.times(min=n, max=float("inf")) + + @overload + def optional(self: Parser[OUT1], default: None = None) -> Parser[OUT1 | None]: + pass + + @overload + def optional(self: Parser[OUT1], default: OUT2) -> Parser[OUT1 | OUT2]: + pass - # TODO overloads to distinguish calling with and without default def optional(self: Parser[OUT1], default: OUT2 | None = None) -> Parser[OUT1 | OUT2 | None]: return self.times(0, 1).map(lambda v: v[0] if v else default) def until( self: Parser[OUT_co], - other: Parser, + other: Parser[Any], min: int = 0, max: int | float = float("inf"), consume_other: bool = False, @@ -243,7 +249,6 @@ def until_parser(stream: str, index: int) -> Result[List[OUT_co]]: values = [] times = 0 while True: - # try parser first res = other(stream, index) if res.status and times >= min: @@ -275,14 +280,15 @@ def until_parser(stream: str, index: int) -> Result[List[OUT_co]]: return until_parser def sep_by( - self: Parser[OUT_co], sep: Parser, *, min: int = 0, max: int | float = float("inf") + self: Parser[OUT_co], sep: Parser[Any], *, min: int = 0, max: int | float = float("inf") ) -> Parser[List[OUT_co]]: zero_times: Parser[List[OUT_co]] = success([]) if max == 0: return zero_times - res = (self.times(1) & (sep >> self).times(min - 1, max - 1)).map(lambda t: t[0] + t[1]) + # TODO + res = (self & (sep >> self).times(min - 1, max - 1)).map(lambda t: [t[0], *t[1]]) if min == 0: - res |= zero_times + res = res | zero_times return res def desc(self, description: str) -> Parser[OUT_co]: @@ -296,22 +302,15 @@ def desc_parser(stream: str, index: int) -> Result[OUT_co]: return desc_parser - def mark(self): - @generate - def marked(): - start = yield line_info - body = yield self - end = yield line_info - return (start, body, end) - - return marked + def mark(self: Parser[OUT_co]) -> Parser[Tuple[Tuple[int, int], OUT_co, Tuple[int, int]]]: + return seq(line_info, self, line_info) - def tag(self, name): + def tag(self: Parser[OUT], name: str) -> Parser[Tuple[str, OUT]]: return self.map(lambda v: (name, v)) - def should_fail(self, description): + def should_fail(self: Parser[OUT], description: str) -> Parser[Result[OUT]]: @Parser - def fail_parser(stream, index): + def fail_parser(stream: str, index: int) -> Result[Result[OUT]]: res = self(stream, index) if res.status: return Result.failure(index, description) @@ -327,7 +326,7 @@ def __add__(self: Parser[str], other: Parser[str]) -> Parser[str]: # `str` and `list`, which satisfy that. return (self & other).map(lambda t: t[0] + t[1]) - def __mul__(self, other): + def __mul__(self: Parser[OUT], other: range | int) -> Parser[List[OUT]]: if isinstance(other, range): return self.times(other.start, other.stop - 1) return self.times(other) @@ -365,13 +364,13 @@ def join(self: Parser[OUT1], other: Parser[OUT2]) -> Parser[tuple[OUT1, OUT2]]: """TODO alternative name for `&`, decide on naming""" return self & other - def as_tuple(self: Parser[OUT_co]) -> Parser[Tuple[OUT_co]]: + def as_tuple(self: Parser[OUT]) -> Parser[Tuple[OUT]]: return self.map(lambda value: (value,)) - def append(self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[OUT2_co]) -> Parser[Tuple[Unpack[OUT_T], OUT2_co]]: + def append(self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[OUT2]) -> Parser[Tuple[Unpack[OUT_T], OUT2]]: """ Take a parser which produces a tuple of values, and add another parser's result - to the end of that tuple + to the end of that tuples """ return self.bind(lambda self_value: other.bind(lambda other_value: success((*self_value, other_value)))) @@ -386,11 +385,11 @@ def combine(self: Parser[Tuple[Unpack[OUT_T]]], combine_fn: Callable[[Unpack[OUT # >> - def __rshift__(self, other: Parser[OUT2_co]) -> Parser[OUT2_co]: + def __rshift__(self, other: Parser[OUT]) -> Parser[OUT]: return self.then(other) # << - def __lshift__(self, other: Parser) -> Parser[OUT_co]: + def __lshift__(self, other: Parser[Any]) -> Parser[OUT_co]: return self.skip(other) @@ -450,7 +449,7 @@ def string(s: str, transform: Callable[[str], str] = noop) -> Parser[str]: transformed_s = transform(s) @Parser - def string_parser(stream, index): + def string_parser(stream: str, index: int) -> Result[str]: if transform(stream[index : index + slen]) == transformed_s: return Result.success(index + slen, s) else: @@ -458,28 +457,91 @@ def string_parser(stream, index): return string_parser +PatternType = Union[str, re.Pattern[str]] + +@overload +def regex( + pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Literal[0] = 0 +) -> Parser[str]: + ... + + +@overload +def regex( + pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: str | int +) -> Parser[str]: + ... + + +@overload +def regex( + pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Tuple[str | int] +) -> Parser[Tuple[str]]: + ... + + +@overload +def regex( + pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Tuple[str | int, str | int] +) -> Parser[Tuple[str, str]]: + ... + +@overload +def regex( + pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Tuple[str | int, str | int, str | int] +) -> Parser[Tuple[str, str, str]]: + ... + +@overload +def regex( + pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Tuple[str | int, str | int, str | int, str | int] +) -> Parser[Tuple[str, str, str, str]]: + ... -# @overload -# def regex(pattern: str, flags:re.RegexFlag, group: int) -> Parser[str]: -# ... -# @overload -# def regex(pattern: str, *, flags:re.RegexFlag, group: str) -> Parser[str]: -# ... +@overload +def regex( + pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Tuple[str | int, str | int, str | int, str | int, str | int] +) -> Parser[Tuple[str, str, str, str, str]]: + ... -def regex(pattern: str, *, flags=re.RegexFlag(0), group: Any = 0) -> Parser[str]: +def regex( + pattern: PatternType, + *, + flags: re.RegexFlag = re.RegexFlag(0), + group: str | int | Tuple[str | int, ...] = 0, +) -> Parser[str | Tuple[str, ...]]: if isinstance(pattern, str): exp = re.compile(pattern, flags) else: exp = pattern - if isinstance(group, (str, int)): - group = (group,) + + if isinstance(group, tuple) and len(group) >= 2: + first_group, second_group, *groups = group + + @Parser + def regex_parser_tuple(stream: str, index: int) -> Result[Tuple[str, ...]]: + match = exp.match(stream, index) + if match: + match_result = match.group(first_group, second_group, *groups) + return Result.success(match.end(), match_result) + else: + return Result.failure(index, exp.pattern) + + return regex_parser_tuple + + if isinstance(group, tuple) and len(group) == 1: + target_group = group[0] + elif isinstance(group, tuple): + target_group = 0 + else: + target_group = group @Parser - def regex_parser(stream, index): + def regex_parser(stream: str, index: int) -> Result[str]: match = exp.match(stream, index) if match: - return Result.success(match.end(), match.group(*group)) + return Result.success(match.end(), match.group(target_group)) else: return Result.failure(index, exp.pattern) @@ -489,56 +551,58 @@ def regex_parser(stream, index): # Each number of args needs to be typed separately @overload def seq( - arg1: Parser[OUT1], - arg2: Parser[OUT2], - arg3: Parser[OUT3], - arg4: Parser[OUT4], - arg5: Parser[OUT5], - arg6: Parser[OUT6], + __arg1: Parser[OUT1], + __arg2: Parser[OUT2], + __arg3: Parser[OUT3], + __arg4: Parser[OUT4], + __arg5: Parser[OUT5], + __arg6: Parser[OUT6], ) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5, OUT6]]: ... @overload def seq( - arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3], arg4: Parser[OUT4], arg5: Parser[OUT5] + __arg1: Parser[OUT1], __arg2: Parser[OUT2], __arg3: Parser[OUT3], __arg4: Parser[OUT4], __arg5: Parser[OUT5] ) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5]]: ... @overload def seq( - arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3], arg4: Parser[OUT4] + __arg1: Parser[OUT1], __arg2: Parser[OUT2], __arg3: Parser[OUT3], __arg4: Parser[OUT4] ) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4]]: ... @overload -def seq(arg1: Parser[OUT1], arg2: Parser[OUT2], arg3: Parser[OUT3]) -> Parser[Tuple[OUT1, OUT2, OUT3]]: +def seq(__arg1: Parser[OUT1], __arg2: Parser[OUT2], __arg3: Parser[OUT3]) -> Parser[Tuple[OUT1, OUT2, OUT3]]: ... @overload -def seq(arg1: Parser[OUT1], arg2: Parser[OUT2]) -> Parser[Tuple[OUT1, OUT2]]: +def seq(__arg1: Parser[OUT1], __arg2: Parser[OUT2]) -> Parser[Tuple[OUT1, OUT2]]: ... @overload -def seq(arg1: Parser[OUT1]) -> Parser[Tuple[OUT1]]: +def seq(__arg1: Parser[OUT1]) -> Parser[Tuple[OUT1]]: ... @overload -def seq(arg1: Parser, *args: Parser) -> Parser[Tuple]: +def seq(*args: Parser[Any]) -> Parser[Tuple[Any, ...]]: ... -def seq(arg1: Parser, *args: Parser) -> Parser[Tuple]: - arg1 = arg1.as_tuple() - - for p in args: - arg1 = arg1.append(p) - return arg1 +def seq(*args: Parser[Any]) -> Parser[Tuple[Any, ...]]: + if not args: + raise ValueError() + first, *remainder = args + parser = first.as_tuple() + for p in remainder: + parser = parser.append(p) # type: ignore + return parser # TODO the rest of the functions here need type annotations. @@ -549,32 +613,21 @@ def seq(arg1: Parser, *args: Parser) -> Parser[Tuple]: # yet, for simplicity. -def test_item(func, description): +def test_char(func: Callable[[str], bool], description: str) -> Parser[str]: @Parser - def test_item_parser(stream, index): + def test_char_parser(stream: str, index: int) -> Result[str]: if index < len(stream): - if isinstance(stream, bytes): - # Subscripting bytes with `[index]` instead of - # `[index:index + 1]` returns an int - item = stream[index : index + 1] - else: - item = stream[index] - if func(item): - return Result.success(index + 1, item) + if func(stream[index]): + return Result.success(index + 1, stream[index]) return Result.failure(index, description) - return test_item_parser + return test_char_parser -def test_char(func: Callable[[str], bool], description: str) -> Parser[str]: - # Implementation is identical to test_item - return test_item(func, description) - - -def match_item(item, description=None): +def match_char(char: str, description: Optional[str] = None) -> Parser[str]: if description is None: - description = str(item) - return test_item(lambda i: item == i, description) + description = char + return test_char(lambda i: char == i, description) def string_from(*strings: str, transform: Callable[[str], str] = noop) -> Parser[str]: @@ -583,13 +636,13 @@ def string_from(*strings: str, transform: Callable[[str], str] = noop) -> Parser # TODO drop bytes support here -def char_from(string): +def char_from(string: str) -> Parser[str]: return test_char(lambda c: c in string, "[" + string + "]") -def peek(parser): +def peek(parser: Parser[OUT]) -> Parser[OUT]: @Parser - def peek_parser(stream, index): + def peek_parser(stream: str, index: int) -> Result[OUT]: result = parser(stream, index) if result.status: return Result.success(index, result.value) @@ -645,7 +698,7 @@ class forward_declaration(Parser[OUT]): def __init__(self) -> None: pass - def _raise_error(self, *args, **kwargs): + def _raise_error(self, *args: Any, **kwargs: Any) -> Any: raise ValueError("You must use 'become' before attempting to call `parse` or `parse_partial`") parse = _raise_error @@ -679,7 +732,8 @@ def parse_field( class DataClassProtocol(Protocol): - __dataclass_fields__: ClassVar[Dict[str, Field]] + __dataclass_fields__: ClassVar[Dict[str, Field[Any]]] + __init__: Callable OUT_D = TypeVar("OUT_D", bound=DataClassProtocol) @@ -696,37 +750,10 @@ def data_parser(stream: str, index: int) -> Result[OUT_D]: parser: Parser[Any] = field.metadata["parser"] result = parser(stream, index) if not result.status: - return result + return result # type: ignore index = result.index parsed_fields[field.name] = result.value return Result.success(result.index, datatype(**parsed_fields)) return data_parser - - -@dataclass -class DataParser: - @classmethod - def parser(cls): - @Parser - def data_parser(stream: str, index: int) -> Result[cls]: - fields: Dict[str, Any] = {} - result = Result.success(index, None) - at_least_one_parser = False - for fieldname, field in cls.__dataclass_fields__.items(): - if "parser" not in field.metadata: - continue - at_least_one_parser = True - parser: Parser[Any] = field.metadata["parser"] - result = parser(stream, index) - if not result.status: - return result - index = result.index - fields[fieldname] = result.value - - if not at_least_one_parser: - raise ValueError("Called data_parser on a class containing no parser fields") - return Result.success(result.index, cls(**fields)) - - return data_parser diff --git a/poetry.lock b/poetry.lock index 16c5eb3..67d12e9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,3 +1,39 @@ +[[package]] +name = "black" +version = "23.3.0" +description = "The uncompromising code formatter." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""} +typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "click" +version = "8.1.3" +description = "Composable command line interface toolkit" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} + [[package]] name = "colorama" version = "0.4.6" @@ -42,6 +78,34 @@ category = "dev" optional = false python-versions = ">=3.7" +[[package]] +name = "mypy" +version = "1.3.0" +description = "Optional static typing for Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""} +typing-extensions = ">=3.10" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +python2 = ["typed-ast (>=1.4.0,<2)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +category = "dev" +optional = false +python-versions = ">=3.5" + [[package]] name = "packaging" version = "23.1" @@ -50,6 +114,29 @@ category = "dev" optional = false python-versions = ">=3.7" +[[package]] +name = "pathspec" +version = "0.11.1" +description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "platformdirs" +version = "3.5.1" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +typing-extensions = {version = ">=4.5", markers = "python_version < \"3.8\""} + +[package.extras] +docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.2.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] + [[package]] name = "pluggy" version = "1.0.0" @@ -93,6 +180,14 @@ category = "dev" optional = false python-versions = ">=3.7" +[[package]] +name = "typed-ast" +version = "1.5.4" +description = "a fork of Python 2 and 3 ast modules with type comment support" +category = "dev" +optional = false +python-versions = ">=3.6" + [[package]] name = "typing-extensions" version = "4.5.0" @@ -116,9 +211,40 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "497757940fe550dacf532459ed493243fd21a456359d6093e9d341f115646e03" +content-hash = "3305bba6acd3af12565b1a45daa6ba063b24fc0ac09cfdbf79602a0201497f21" [metadata.files] +black = [ + {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"}, + {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"}, + {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"}, + {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"}, + {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"}, + {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"}, + {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"}, + {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"}, + {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"}, + {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"}, + {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"}, + {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"}, + {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"}, + {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"}, + {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"}, + {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"}, + {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"}, + {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"}, + {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"}, + {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"}, + {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"}, + {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"}, + {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"}, + {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"}, + {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"}, +] +click = [ + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +] colorama = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, @@ -135,10 +261,50 @@ iniconfig = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +mypy = [ + {file = "mypy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eb485cea53f4f5284e5baf92902cd0088b24984f4209e25981cc359d64448d"}, + {file = "mypy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c99c3ecf223cf2952638da9cd82793d8f3c0c5fa8b6ae2b2d9ed1e1ff51ba85"}, + {file = "mypy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:550a8b3a19bb6589679a7c3c31f64312e7ff482a816c96e0cecec9ad3a7564dd"}, + {file = "mypy-1.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cbc07246253b9e3d7d74c9ff948cd0fd7a71afcc2b77c7f0a59c26e9395cb152"}, + {file = "mypy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:a22435632710a4fcf8acf86cbd0d69f68ac389a3892cb23fbad176d1cddaf228"}, + {file = "mypy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6e33bb8b2613614a33dff70565f4c803f889ebd2f859466e42b46e1df76018dd"}, + {file = "mypy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d23370d2a6b7a71dc65d1266f9a34e4cde9e8e21511322415db4b26f46f6b8c"}, + {file = "mypy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:658fe7b674769a0770d4b26cb4d6f005e88a442fe82446f020be8e5f5efb2fae"}, + {file = "mypy-1.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d29e324cdda61daaec2336c42512e59c7c375340bd202efa1fe0f7b8f8ca"}, + {file = "mypy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:d0b6c62206e04061e27009481cb0ec966f7d6172b5b936f3ead3d74f29fe3dcf"}, + {file = "mypy-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:76ec771e2342f1b558c36d49900dfe81d140361dd0d2df6cd71b3db1be155409"}, + {file = "mypy-1.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc95f8386314272bbc817026f8ce8f4f0d2ef7ae44f947c4664efac9adec929"}, + {file = "mypy-1.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:faff86aa10c1aa4a10e1a301de160f3d8fc8703b88c7e98de46b531ff1276a9a"}, + {file = "mypy-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8c5979d0deb27e0f4479bee18ea0f83732a893e81b78e62e2dda3e7e518c92ee"}, + {file = "mypy-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c5d2cc54175bab47011b09688b418db71403aefad07cbcd62d44010543fc143f"}, + {file = "mypy-1.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:87df44954c31d86df96c8bd6e80dfcd773473e877ac6176a8e29898bfb3501cb"}, + {file = "mypy-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:473117e310febe632ddf10e745a355714e771ffe534f06db40702775056614c4"}, + {file = "mypy-1.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:74bc9b6e0e79808bf8678d7678b2ae3736ea72d56eede3820bd3849823e7f305"}, + {file = "mypy-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:44797d031a41516fcf5cbfa652265bb994e53e51994c1bd649ffcd0c3a7eccbf"}, + {file = "mypy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ddae0f39ca146972ff6bb4399f3b2943884a774b8771ea0a8f50e971f5ea5ba8"}, + {file = "mypy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c4c42c60a8103ead4c1c060ac3cdd3ff01e18fddce6f1016e08939647a0e703"}, + {file = "mypy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e86c2c6852f62f8f2b24cb7a613ebe8e0c7dc1402c61d36a609174f63e0ff017"}, + {file = "mypy-1.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f9dca1e257d4cc129517779226753dbefb4f2266c4eaad610fc15c6a7e14283e"}, + {file = "mypy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d8d31a7713510685b05fbb18d6ac287a56c8f6554d88c19e73f724a445448a"}, + {file = "mypy-1.3.0-py3-none-any.whl", hash = "sha256:a8763e72d5d9574d45ce5881962bc8e9046bf7b375b0abf031f3e6811732a897"}, + {file = "mypy-1.3.0.tar.gz", hash = "sha256:e1f4d16e296f5135624b34e8fb741eb0eadedca90862405b1f1fde2040b9bd11"}, +] +mypy-extensions = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] packaging = [ {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, ] +pathspec = [ + {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"}, + {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"}, +] +platformdirs = [ + {file = "platformdirs-3.5.1-py3-none-any.whl", hash = "sha256:e2378146f1964972c03c085bb5662ae80b2b8c06226c54b2ff4aa9483e8a13a5"}, + {file = "platformdirs-3.5.1.tar.gz", hash = "sha256:412dae91f52a6f84830f39a8078cecd0e866cb72294a5c66808e74d5e88d251f"}, +] pluggy = [ {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, @@ -151,6 +317,32 @@ tomli = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +typed-ast = [ + {file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"}, + {file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"}, + {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"}, + {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c542eeda69212fa10a7ada75e668876fdec5f856cd3d06829e6aa64ad17c8dfe"}, + {file = "typed_ast-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:a9916d2bb8865f973824fb47436fa45e1ebf2efd920f2b9f99342cb7fab93f72"}, + {file = "typed_ast-1.5.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79b1e0869db7c830ba6a981d58711c88b6677506e648496b1f64ac7d15633aec"}, + {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a94d55d142c9265f4ea46fab70977a1944ecae359ae867397757d836ea5a3f47"}, + {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:183afdf0ec5b1b211724dfef3d2cad2d767cbefac291f24d69b00546c1837fb6"}, + {file = "typed_ast-1.5.4-cp36-cp36m-win_amd64.whl", hash = "sha256:639c5f0b21776605dd6c9dbe592d5228f021404dafd377e2b7ac046b0349b1a1"}, + {file = "typed_ast-1.5.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf4afcfac006ece570e32d6fa90ab74a17245b83dfd6655a6f68568098345ff6"}, + {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed855bbe3eb3715fca349c80174cfcfd699c2f9de574d40527b8429acae23a66"}, + {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6778e1b2f81dfc7bc58e4b259363b83d2e509a65198e85d5700dfae4c6c8ff1c"}, + {file = "typed_ast-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0261195c2062caf107831e92a76764c81227dae162c4f75192c0d489faf751a2"}, + {file = "typed_ast-1.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2efae9db7a8c05ad5547d522e7dbe62c83d838d3906a3716d1478b6c1d61388d"}, + {file = "typed_ast-1.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7d5d014b7daa8b0bf2eaef684295acae12b036d79f54178b92a2b6a56f92278f"}, + {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:370788a63915e82fd6f212865a596a0fefcbb7d408bbbb13dea723d971ed8bdc"}, + {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4e964b4ff86550a7a7d56345c7864b18f403f5bd7380edf44a3c1fb4ee7ac6c6"}, + {file = "typed_ast-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:683407d92dc953c8a7347119596f0b0e6c55eb98ebebd9b23437501b28dcbb8e"}, + {file = "typed_ast-1.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4879da6c9b73443f97e731b617184a596ac1235fe91f98d279a7af36c796da35"}, + {file = "typed_ast-1.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e123d878ba170397916557d31c8f589951e353cc95fb7f24f6bb69adc1a8a97"}, + {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebd9d7f80ccf7a82ac5f88c521115cc55d84e35bf8b446fcd7836eb6b98929a3"}, + {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98f80dee3c03455e92796b58b98ff6ca0b2a6f652120c263efdba4d6c5e58f72"}, + {file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"}, + {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"}, +] typing-extensions = [ {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, diff --git a/pyproject.toml b/pyproject.toml index 57ef629..44343a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,8 @@ typing-extensions = "^4.5.0" [tool.poetry.group.dev.dependencies] pytest = "^7.3.1" +mypy = "^1.3.0" +black = "^23.3.0" [build-system] requires = ["poetry-core"] diff --git a/tests/test_parsy.py b/tests/test_parsy.py index 722c4df..4c2f9e9 100644 --- a/tests/test_parsy.py +++ b/tests/test_parsy.py @@ -1,8 +1,5 @@ # -*- code: utf8 -*- -try: - import enum -except ImportError: - enum = None +import enum import re import unittest from typing import Any, Generator, Tuple @@ -10,6 +7,7 @@ from parsy import ( ParseError, Parser, + Result, any_char, char_from, decimal_digit, @@ -21,14 +19,12 @@ letter, line_info, line_info_at, - match_item, peek, regex, string, string_from, ) from parsy import test_char as parsy_test_char # to stop pytest thinking this function is a test -from parsy import test_item as parsy_test_item # to stop pytest thinking this function is a test from parsy import whitespace @@ -285,7 +281,6 @@ def test_at_least(self): self.assertEqual(ab.at_least(2).parse_partial("abababc"), (["ab", "ab", "ab"], "c")) def test_until(self): - until = string("s").until(string("x")) s = "ssssx" @@ -305,7 +300,6 @@ def test_until(self): self.assertEqual(until.parse_partial("xxxx"), ([], "xxxx")) def test_until_with_consume_other(self): - until = string("s").until(string("x"), consume_other=True) self.assertEqual(until.parse("ssssx"), 4 * ["s"] + ["x"]) @@ -317,7 +311,6 @@ def test_until_with_consume_other(self): self.assertRaises(ParseError, until.parse, "xssssxy") def test_until_with_min(self): - until = string("s").until(string("x"), min=3) self.assertEqual(until.parse_partial("sssx"), (3 * ["s"], "x")) @@ -326,7 +319,6 @@ def test_until_with_min(self): self.assertRaises(ParseError, until.parse_partial, "ssx") def test_until_with_max(self): - # until with max until = string("s").until(string("x"), max=3) @@ -336,7 +328,6 @@ def test_until_with_max(self): self.assertRaises(ParseError, until.parse_partial, "ssssx") def test_until_with_min_max(self): - until = string("s").until(string("x"), min=3, max=5) self.assertEqual(until.parse_partial("sssx"), (3 * ["s"], "x")) @@ -512,17 +503,24 @@ def test_should_fail(self): self.assertRaises(ParseError, not_a_digit.parse, "8ab") - if enum is not None: + def test_should_fail_isolated(self): + not_a_digit = digit.should_fail("not a digit") - def test_from_enum_string(self): - class Pet(enum.Enum): - CAT = "cat" - DOG = "dog" + self.assertEqual( + not_a_digit.parse_partial("a"), + (Result(status=False, index=-1, value=None, furthest=0, expected=frozenset({"a digit"})), "a"), + ) + self.assertRaises(ParseError, not_a_digit.parse_partial, "1") - pet = from_enum(Pet) - self.assertEqual(pet.parse("cat"), Pet.CAT) - self.assertEqual(pet.parse("dog"), Pet.DOG) - self.assertRaises(ParseError, pet.parse, "foo") + def test_from_enum_string(self): + class Pet(enum.Enum): + CAT = "cat" + DOG = "dog" + + pet = from_enum(Pet) + self.assertEqual(pet.parse("cat"), Pet.CAT) + self.assertEqual(pet.parse("dog"), Pet.DOG) + self.assertRaises(ParseError, pet.parse, "foo") def test_from_enum_int(self): class Position(enum.Enum): @@ -544,54 +542,6 @@ class Pet(enum.Enum): self.assertEqual(pet.parse("CAT"), Pet.CAT) -class TestParserTokens(unittest.TestCase): - """ - Tests that ensure that `.parse` can handle an arbitrary list of tokens, - rather than a string. - """ - - # Some opaque objects we will use in our stream: - START = object() - STOP = object() - - def test_test_item(self): - start_stop = parsy_test_item(lambda i: i in [self.START, self.STOP], "START/STOP") - self.assertEqual(start_stop.parse([self.START]), self.START) - self.assertEqual(start_stop.parse([self.STOP]), self.STOP) - with self.assertRaises(ParseError) as err: - start_stop.many().parse([self.START, "hello"]) - - ex = err.exception - self.assertEqual(str(ex), "expected one of 'EOF', 'START/STOP' at 1") - self.assertEqual(ex.expected, {"EOF", "START/STOP"}) - self.assertEqual(ex.index, 1) - - def test_match_item(self): - self.assertEqual(match_item(self.START).parse([self.START]), self.START) - with self.assertRaises(ParseError) as err: - match_item(self.START, "START").parse([]) - - ex = err.exception - self.assertEqual(str(ex), "expected 'START' at 0") - - def test_parse_tokens(self): - other_vals = parsy_test_item(lambda i: i not in [self.START, self.STOP], "not START/STOP") - - bracketed = match_item(self.START) >> other_vals.many() << match_item(self.STOP) - stream = [self.START, "hello", 1, 2, "goodbye", self.STOP] - result = bracketed.parse(stream) - self.assertEqual(result, ["hello", 1, 2, "goodbye"]) - - def test_index(self): - @generate - def foo(): - i = yield index - l = yield letter - return (l, i) - - self.assertEqual(foo.many().parse(["A", "B"]), [("A", 0), ("B", 1)]) - - class TestUtils(unittest.TestCase): def test_line_info_at(self): text = "abc\ndef" From 9d9ed23e02440fd3954fbde3270e14ca4c5b3f6c Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Thu, 15 Jun 2023 17:54:54 +0100 Subject: [PATCH 12/16] Improve type info further, remove things which are hard to type --- parsy/__init__.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/parsy/__init__.py b/parsy/__init__.py index 533bfef..b06cc41 100644 --- a/parsy/__init__.py +++ b/parsy/__init__.py @@ -242,20 +242,15 @@ def until( other: Parser[Any], min: int = 0, max: int | float = float("inf"), - consume_other: bool = False, ) -> Parser[List[OUT_co]]: @Parser def until_parser(stream: str, index: int) -> Result[List[OUT_co]]: - values = [] + values: List[OUT_co] = [] times = 0 while True: # try parser first res = other(stream, index) if res.status and times >= min: - if consume_other: - # consume other - values.append(res.value) - index = res.index return Result.success(index, values) # exceeded max? @@ -282,7 +277,7 @@ def until_parser(stream: str, index: int) -> Result[List[OUT_co]]: def sep_by( self: Parser[OUT_co], sep: Parser[Any], *, min: int = 0, max: int | float = float("inf") ) -> Parser[List[OUT_co]]: - zero_times: Parser[List[OUT_co]] = success([]) + zero_times = success(list[OUT_co]()) if max == 0: return zero_times # TODO @@ -523,8 +518,7 @@ def regex( def regex_parser_tuple(stream: str, index: int) -> Result[Tuple[str, ...]]: match = exp.match(stream, index) if match: - match_result = match.group(first_group, second_group, *groups) - return Result.success(match.end(), match_result) + return Result.success(match.end(), match.group(first_group, second_group, *groups)) else: return Result.failure(index, exp.pattern) @@ -733,7 +727,7 @@ def parse_field( class DataClassProtocol(Protocol): __dataclass_fields__: ClassVar[Dict[str, Field[Any]]] - __init__: Callable + __init__: Callable[..., Any] OUT_D = TypeVar("OUT_D", bound=DataClassProtocol) From 75a7c6a81255875186daabee638cec824c511326 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Fri, 16 Jun 2023 00:11:40 +0100 Subject: [PATCH 13/16] Remove uses of combine_other in 'until' parser --- docs/ref/methods_and_combinators.rst | 7 ++----- tests/test_parsy.py | 12 ------------ 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/docs/ref/methods_and_combinators.rst b/docs/ref/methods_and_combinators.rst index 3e37df5..69b7ab4 100644 --- a/docs/ref/methods_and_combinators.rst +++ b/docs/ref/methods_and_combinators.rst @@ -111,13 +111,12 @@ can be used and manipulated as below. Returns a parser that expects the initial parser at least ``n`` times, and produces a list of the results. - .. method:: until(other_parser, [min=0, max=inf, consume_other=False]) + .. method:: until(other_parser, [min=0, max=inf]) Returns a parser that expects the initial parser followed by ``other_parser``. The initial parser is expected at least ``min`` times and at most ``max`` times. By default, it does not consume ``other_parser`` and it produces a list of the - results excluding ``other_parser``. If ``consume_other`` is ``True`` then - ``other_parser`` is consumed and its result is included in the list of results. + results excluding ``other_parser``. .. code:: python @@ -125,8 +124,6 @@ can be used and manipulated as below. [['A','A','A'], 'BC'] >>> string('A').until(string('B')).then(string('BC')).parse('AAABC') 'BC' - >>> string('A').until(string('BC'), consume_other=True).parse('AAABC') - ['A', 'A', 'A', 'BC'] .. versionadded:: 2.0 diff --git a/tests/test_parsy.py b/tests/test_parsy.py index 4c2f9e9..163a435 100644 --- a/tests/test_parsy.py +++ b/tests/test_parsy.py @@ -15,7 +15,6 @@ forward_declaration, from_enum, generate, - index, letter, line_info, line_info_at, @@ -299,17 +298,6 @@ def test_until(self): until = regex(".").until(string("x")) self.assertEqual(until.parse_partial("xxxx"), ([], "xxxx")) - def test_until_with_consume_other(self): - until = string("s").until(string("x"), consume_other=True) - - self.assertEqual(until.parse("ssssx"), 4 * ["s"] + ["x"]) - self.assertEqual(until.parse_partial("ssssxy"), (4 * ["s"] + ["x"], "y")) - - self.assertEqual(until.parse_partial("xxx"), (["x"], "xx")) - - self.assertRaises(ParseError, until.parse, "ssssy") - self.assertRaises(ParseError, until.parse, "xssssxy") - def test_until_with_min(self): until = string("s").until(string("x"), min=3) From 3660432b0eb189410460d4d71458ea26acd5a329 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Wed, 21 Jun 2023 19:34:46 +0100 Subject: [PATCH 14/16] Add type annotations for __add__ --- parsy/__init__.py | 135 +++++++++++++++++++++++++++++++++----------- tests/test_parsy.py | 95 ++++++++++++++++++++++++------- 2 files changed, 178 insertions(+), 52 deletions(-) diff --git a/parsy/__init__.py b/parsy/__init__.py index b06cc41..db83858 100644 --- a/parsy/__init__.py +++ b/parsy/__init__.py @@ -17,9 +17,9 @@ Generator, Generic, List, - Literal, Mapping, Optional, + Pattern, Tuple, Type, TypeVar, @@ -28,7 +28,7 @@ overload, ) -from typing_extensions import ParamSpec, Protocol, TypeVarTuple, Unpack +from typing_extensions import Literal, ParamSpec, Protocol, TypeVarTuple, Unpack OUT = TypeVar("OUT") OUT1 = TypeVar("OUT1") @@ -38,12 +38,14 @@ OUT5 = TypeVar("OUT5") OUT6 = TypeVar("OUT6") OUT_T = TypeVarTuple("OUT_T") +OUT_T2 = TypeVarTuple("OUT_T2") OUT_co = TypeVar("OUT_co", covariant=True) OUT2_co = TypeVar("OUT2_co", covariant=True) P = ParamSpec("P") T = TypeVar("T") +T_co = TypeVar("T_co", covariant=True) def noop(val: T) -> T: @@ -125,6 +127,20 @@ def aggregate(self: Result[OUT], other: Optional[Result[Any]]) -> Result[OUT]: return Result(self.status, self.index, self.value, other.furthest, other.expected) +class Addable(Protocol): + def __add__(__self: T, __other: T) -> T: + ... + + +Tadd = TypeVar("Tadd", bound=Addable, covariant=True) + + +# def xx(a: Tuple[Unpack[OUT_T]], b: Tuple[Unpack[OUT_T2]]) -> Tuple[Unpack[OUT_T], Unpack[OUT_T2]]: +# return a + b + +a = tuple("a") + + class Parser(Generic[OUT_co]): """ A Parser is an object that wraps a function whose arguments are @@ -275,13 +291,13 @@ def until_parser(stream: str, index: int) -> Result[List[OUT_co]]: return until_parser def sep_by( - self: Parser[OUT_co], sep: Parser[Any], *, min: int = 0, max: int | float = float("inf") - ) -> Parser[List[OUT_co]]: - zero_times = success(list[OUT_co]()) + self: Parser[OUT], sep: Parser[Any], *, min: int = 0, max: int | float = float("inf") + ) -> Parser[List[OUT]]: + zero_times = success(cast(List[OUT], [])) if max == 0: return zero_times - # TODO - res = (self & (sep >> self).times(min - 1, max - 1)).map(lambda t: [t[0], *t[1]]) + + res = (self & (sep >> self).times(min - 1, max - 1)).combine(lambda first, repeats: [first, *repeats]) if min == 0: res = res | zero_times return res @@ -313,13 +329,60 @@ def fail_parser(stream: str, index: int) -> Result[Result[OUT]]: return fail_parser - def __add__(self: Parser[str], other: Parser[str]) -> Parser[str]: - # TODO it would be nice to get more generic type checks here. - # I want some way of saying "the input value can be any - # type that has an ``__add__`` method that returns the same type - # as the two inputs". This would allow us to use it for both - # `str` and `list`, which satisfy that. - return (self & other).map(lambda t: t[0] + t[1]) + # Special cases for adding tuples + # We have to unroll each number of elements of the second tuple because Pylance + # can only "Unpack" one tuple at a time + @overload + def __add__(self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[Tuple[OUT1]]) -> Parser[Tuple[Unpack[OUT_T], OUT1]]: + ... + + @overload + def __add__( + self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[Tuple[OUT1, OUT2]] + ) -> Parser[Tuple[Unpack[OUT_T], OUT1, OUT2]]: + ... + + @overload + def __add__( + self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[Tuple[OUT1, OUT2, OUT3]] + ) -> Parser[Tuple[Unpack[OUT_T], OUT1, OUT2, OUT3]]: + ... + + @overload + def __add__( + self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[Tuple[OUT1, OUT2, OUT3, OUT4]] + ) -> Parser[Tuple[Unpack[OUT_T], OUT1, OUT2, OUT3, OUT4]]: + ... + + @overload + def __add__( + self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5]] + ) -> Parser[Tuple[Unpack[OUT_T], OUT1, OUT2, OUT3, OUT4, OUT5]]: + ... + + # This covers tuples where `other` has more elements than the above overloads + # and all types are the same in `self` and `other` + @overload + def __add__( + self: Parser[Tuple[OUT, ...]], other: Parser[Tuple[OUT, ...]] + ) -> Parser[Tuple[OUT, ...]]: + ... + + # Same as above, but for when all types are not the same + @overload + def __add__( + self: Parser[Tuple[Any, ...]], other: Parser[Tuple[Any, ...]] + ) -> Parser[Tuple[Any, ...]]: + ... + + + # Type annotations for any addable types + @overload + def __add__(self: Parser[Tadd], other: Parser[Tadd]) -> Parser[Tadd]: + ... + + def __add__(self: Parser[Any], other: Parser[Any]) -> Parser[Any]: + return (self & other).combine(lambda first, second: first + second) def __mul__(self: Parser[OUT], other: range | int) -> Parser[List[OUT]]: if isinstance(other, range): @@ -342,18 +405,17 @@ def alt_parser(stream: str, index: int) -> Result[Union[OUT1, OUT2]]: def __and__(self: Parser[OUT1], other: Parser[OUT2]) -> Parser[tuple[OUT1, OUT2]]: @Parser - def seq_parser(stream: str, index: int) -> Result[tuple[OUT1, OUT2]]: - result0 = None - result1 = self(stream, index).aggregate(result0) - if not result1.status: - return result1 # type: ignore - result2 = other(stream, result1.index).aggregate(result1) - if not result2.status: - return result2 # type: ignore + def and_parser(stream: str, index: int) -> Result[tuple[OUT1, OUT2]]: + self_result = self(stream, index) + if not self_result.status: + return self_result # type: ignore + other_result = other(stream, self_result.index).aggregate(self_result) + if not other_result.status: + return other_result # type: ignore - return Result.success(result2.index, (result1.value, result2.value)).aggregate(result2) + return Result.success(other_result.index, (self_result.value, other_result.value)).aggregate(other_result) - return seq_parser + return and_parser def join(self: Parser[OUT1], other: Parser[OUT2]) -> Parser[tuple[OUT1, OUT2]]: """TODO alternative name for `&`, decide on naming""" @@ -452,19 +514,17 @@ def string_parser(stream: str, index: int) -> Result[str]: return string_parser -PatternType = Union[str, re.Pattern[str]] + +PatternType = Union[str, Pattern[str]] + @overload -def regex( - pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Literal[0] = 0 -) -> Parser[str]: +def regex(pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Literal[0] = 0) -> Parser[str]: ... @overload -def regex( - pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: str | int -) -> Parser[str]: +def regex(pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: str | int) -> Parser[str]: ... @@ -481,21 +541,30 @@ def regex( ) -> Parser[Tuple[str, str]]: ... + @overload def regex( pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Tuple[str | int, str | int, str | int] ) -> Parser[Tuple[str, str, str]]: ... + @overload def regex( - pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Tuple[str | int, str | int, str | int, str | int] + pattern: PatternType, + *, + flags: re.RegexFlag = re.RegexFlag(0), + group: Tuple[str | int, str | int, str | int, str | int], ) -> Parser[Tuple[str, str, str, str]]: ... + @overload def regex( - pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Tuple[str | int, str | int, str | int, str | int, str | int] + pattern: PatternType, + *, + flags: re.RegexFlag = re.RegexFlag(0), + group: Tuple[str | int, str | int, str | int, str | int, str | int], ) -> Parser[Tuple[str, str, str, str, str]]: ... diff --git a/tests/test_parsy.py b/tests/test_parsy.py index 163a435..6ae5309 100644 --- a/tests/test_parsy.py +++ b/tests/test_parsy.py @@ -2,7 +2,7 @@ import enum import re import unittest -from typing import Any, Generator, Tuple +from typing import Any, Generator, List, Tuple, cast from parsy import ( ParseError, @@ -95,7 +95,7 @@ def test_then(self): def test_bind(self): piped = None - def binder(x): + def binder(x: str): nonlocal piped piped = x return string("y") @@ -365,8 +365,65 @@ def test_sep_by_with_min_and_max(self): self.assertRaises(ParseError, digit_list.parse, "7.6") self.assertEqual(digit.sep_by(string(","), max=0).parse(""), []) - def test_add(self): - self.assertEqual((letter + digit).parse("a1"), "a1") + def test_add_tuple(self): + """This test code is for checking that pylance gives no type errors""" + letter_tuple = letter.as_tuple() + int_parser = regex(r"\d").map(int) + two_int_parser = int_parser & int_parser + barcode = letter_tuple + two_int_parser + + def my_foo(first: str, second: int, third: int) -> str: + return first + str(third + second) + + foo_parser = barcode.combine(my_foo) + + self.assertEqual(foo_parser.parse("a13"), "a4") + + def test_add_too_long_tuple_uniform_types(self): + """This test code is for checking that pylance gives no type errors""" + letter_tuple = letter.as_tuple() + int_parser = regex(r"\d") + six_int_parser = (int_parser & int_parser).append(int_parser).append(int_parser).append(int_parser).append(int_parser) + barcode = letter_tuple + six_int_parser + + def my_bar(first: str, *second: str) -> str: + return first + "-".join(second) + + foo_parser = barcode.combine(my_bar) + + self.assertEqual(foo_parser.parse("a123456"), "a1-2-3-4-5-6") + + def test_add_too_long_tuple_different_types(self): + """This test code is for checking that pylance gives no type errors""" + letter_tuple = letter.as_tuple() + int_parser = regex(r"\d").map(int) + six_int_parser = (int_parser & int_parser).append(int_parser).append(int_parser).append(int_parser).append(int_parser) + barcode = letter_tuple + six_int_parser + + def my_bar(first: str, *second: int) -> str: + return first + str(sum(second)) + + foo_parser = barcode.combine(my_bar) + + self.assertEqual(foo_parser.parse("a111111"), "a6") + + def test_add_list(self): + """This test code is for checking that pylance gives no type errors""" + letters = letter.many() + number_chars = regex(r"\d").many() + letters_numbers = letters + number_chars + + self.assertEqual(letters_numbers.parse("ab12"), ["a", "b", "1", "2"]) + + def test_add_unaddable_types(self): + """ + The type system warns us this isn't possible: + + `Operator "+" not supported for types "Parser[str]" and "Parser[int]"` + """ + bad_parser = letter + regex(r"\d").map(int) + + self.assertRaises(TypeError, bad_parser.parse, "a1") def test_multiply(self): self.assertEqual((letter * 3).parse("abc"), ["a", "b", "c"]) @@ -510,24 +567,24 @@ class Pet(enum.Enum): self.assertEqual(pet.parse("dog"), Pet.DOG) self.assertRaises(ParseError, pet.parse, "foo") - def test_from_enum_int(self): - class Position(enum.Enum): - FIRST = 1 - SECOND = 2 + def test_from_enum_int(self): + class Position(enum.Enum): + FIRST = 1 + SECOND = 2 - position = from_enum(Position) - self.assertEqual(position.parse("1"), Position.FIRST) - self.assertEqual(position.parse("2"), Position.SECOND) - self.assertRaises(ParseError, position.parse, "foo") + position = from_enum(Position) + self.assertEqual(position.parse("1"), Position.FIRST) + self.assertEqual(position.parse("2"), Position.SECOND) + self.assertRaises(ParseError, position.parse, "foo") - def test_from_enum_transform(self): - class Pet(enum.Enum): - CAT = "cat" - DOG = "dog" + def test_from_enum_transform(self): + class Pet(enum.Enum): + CAT = "cat" + DOG = "dog" - pet = from_enum(Pet, transform=lambda s: s.lower()) - self.assertEqual(pet.parse("cat"), Pet.CAT) - self.assertEqual(pet.parse("CAT"), Pet.CAT) + pet = from_enum(Pet, transform=lambda s: s.lower()) + self.assertEqual(pet.parse("cat"), Pet.CAT) + self.assertEqual(pet.parse("CAT"), Pet.CAT) class TestUtils(unittest.TestCase): From 3034711649b770005ab1513538dbbee92310f981 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Sun, 25 Jun 2023 01:00:16 +0100 Subject: [PATCH 15/16] Clean up __add__ type annotations --- parsy/__init__.py | 38 +++++++++++++++----------------------- tests/test_parsy.py | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/parsy/__init__.py b/parsy/__init__.py index db83858..7d9b5f8 100644 --- a/parsy/__init__.py +++ b/parsy/__init__.py @@ -47,6 +47,13 @@ T = TypeVar("T") T_co = TypeVar("T_co", covariant=True) +_T_contra = TypeVar("_T_contra", contravariant=True) + +_T_co = TypeVar("_T_co", covariant=True) + +class SupportsAdd(Protocol[_T_contra, _T_co]): + def __add__(self, __x: _T_contra) -> _T_co: ... + def noop(val: T) -> T: return val @@ -127,20 +134,6 @@ def aggregate(self: Result[OUT], other: Optional[Result[Any]]) -> Result[OUT]: return Result(self.status, self.index, self.value, other.furthest, other.expected) -class Addable(Protocol): - def __add__(__self: T, __other: T) -> T: - ... - - -Tadd = TypeVar("Tadd", bound=Addable, covariant=True) - - -# def xx(a: Tuple[Unpack[OUT_T]], b: Tuple[Unpack[OUT_T2]]) -> Tuple[Unpack[OUT_T], Unpack[OUT_T2]]: -# return a + b - -a = tuple("a") - - class Parser(Generic[OUT_co]): """ A Parser is an object that wraps a function whose arguments are @@ -330,8 +323,9 @@ def fail_parser(stream: str, index: int) -> Result[Result[OUT]]: return fail_parser # Special cases for adding tuples - # We have to unroll each number of elements of the second tuple because Pylance - # can only "Unpack" one tuple at a time + # We have to unroll each number of tuple elements for `other` because PEP-646 + # only allows one "Unpack" in a Tuple (if we could have two, the return + # type could use two Unpacks @overload def __add__(self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[Tuple[OUT1]]) -> Parser[Tuple[Unpack[OUT_T], OUT1]]: ... @@ -361,28 +355,27 @@ def __add__( ... # This covers tuples where `other` has more elements than the above overloads - # and all types are the same in `self` and `other` + # and the `self` and `other` tuples have the same homogeneous type @overload def __add__( self: Parser[Tuple[OUT, ...]], other: Parser[Tuple[OUT, ...]] ) -> Parser[Tuple[OUT, ...]]: ... - # Same as above, but for when all types are not the same + # Cover the rest of cases which can't return a homogeneous tuple @overload def __add__( self: Parser[Tuple[Any, ...]], other: Parser[Tuple[Any, ...]] ) -> Parser[Tuple[Any, ...]]: ... - - # Type annotations for any addable types + # Addable parsers which return the same type @overload - def __add__(self: Parser[Tadd], other: Parser[Tadd]) -> Parser[Tadd]: + def __add__(self: Parser[SupportsAdd[Any, _T_co]], other: Parser[SupportsAdd[Any, _T_co]]) -> Parser[_T_co]: ... def __add__(self: Parser[Any], other: Parser[Any]) -> Parser[Any]: - return (self & other).combine(lambda first, second: first + second) + return (self & other).combine(operator.add) def __mul__(self: Parser[OUT], other: range | int) -> Parser[List[OUT]]: if isinstance(other, range): @@ -749,7 +742,6 @@ def from_enum(enum_cls: type[E], transform: Callable[[str], str] = noop) -> Pars # mirroring the recursive definition issues that forward_declaration is designed to solve. # Cutting the recursive knot might be harder at the type level? - class forward_declaration(Parser[OUT]): """ An empty parser that can be used as a forward declaration, diff --git a/tests/test_parsy.py b/tests/test_parsy.py index 6ae5309..caeeda7 100644 --- a/tests/test_parsy.py +++ b/tests/test_parsy.py @@ -2,7 +2,7 @@ import enum import re import unittest -from typing import Any, Generator, List, Tuple, cast +from typing import Any, Generator, Generic, List, Tuple, TypeVar, Union from parsy import ( ParseError, @@ -20,6 +20,7 @@ line_info_at, peek, regex, + seq, string, string_from, ) @@ -424,6 +425,34 @@ def test_add_unaddable_types(self): bad_parser = letter + regex(r"\d").map(int) self.assertRaises(TypeError, bad_parser.parse, "a1") + + def test_add_numerics(self): + digit = regex(r"\d") + numeric_parser = digit.map(float) + digit.map(int) + + self.assertEqual(numeric_parser.parse("12"), 3.0) + + def test_seq(self): + + a = regex("a") + b = regex("b") + num = regex(r"[\d]").map(int) + + parser = seq(a, num, b, num, a|num) + + self.assertEqual(parser.parse("a1b2a"), ("a", 1, "b", 2, "a")) + self.assertEqual(parser.parse("a1b23"), ("a", 1, "b", 2, 3)) + + def test_add_tuples_like_seq(self): + """A possible alternative to `seq`""" + a = regex("a").as_tuple() + b = regex("b").as_tuple() + num = regex(r"[\d]").map(int).as_tuple() + + parser = a + num + b + num + (a | num) + + self.assertEqual(parser.parse("a1b2a"), ("a", 1, "b", 2, "a")) + self.assertEqual(parser.parse("a1b23"), ("a", 1, "b", 2, 3)) def test_multiply(self): self.assertEqual((letter * 3).parse("abc"), ["a", "b", "c"]) @@ -601,13 +630,13 @@ def test_line_info_at(self): class TestForwardDeclaration(unittest.TestCase): def test_forward_declaration_1(self): # This is the example from the docs + expr = forward_declaration() with self.assertRaises(ValueError): expr.parse("()") with self.assertRaises(ValueError): expr.parse_partial("()") - simple = regex("[0-9]+").map(int) group = string("(") >> expr.sep_by(string(" ")) << string(")") expr.become(simple | group) From e1f1ae02936b4cc673f1518695e295b72dd8c0c6 Mon Sep 17 00:00:00 2001 From: Rob Hornby Date: Wed, 28 Jun 2023 01:47:08 +0100 Subject: [PATCH 16/16] Remove forward_declaration replaced by generator approach, rename dataclass parsers --- examples/dataclass_parser_demo.py | 30 +++++---- examples/dataclass_parsing.py | 34 +++++----- examples/json.py | 23 ++++--- examples/sequence.py | 3 +- examples/simple_logo_lexer.py | 9 ++- parsy/__init__.py | 108 +++++++++++------------------- tests/test_parsy.py | 67 ++++++++---------- tests/test_sexpr.py | 31 +++++---- 8 files changed, 137 insertions(+), 168 deletions(-) diff --git a/examples/dataclass_parser_demo.py b/examples/dataclass_parser_demo.py index 4c1cf34..dc436d4 100644 --- a/examples/dataclass_parser_demo.py +++ b/examples/dataclass_parser_demo.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import List -from parsy import dataparser, parse_field, regex, string +from parsy import dataclass_parser, parser_field, regex, string text = """Sample text @@ -64,14 +64,14 @@ @dataclass class Student: - number: int = parse_field(integer << string(", ")) - name: str = parse_field(any_text << string("\n")) + number: int = parser_field(integer << string(", ")) + name: str = parser_field(any_text << string("\n")) @dataclass class Score: - number: int = parse_field(integer << string(", ")) - score: int = parse_field(integer << string("\n")) + number: int = parser_field(integer << string(", ")) + score: int = parser_field(integer << string("\n")) @dataclass @@ -83,11 +83,13 @@ class StudentWithScore: @dataclass class Grade: - grade: int = parse_field(string("Grade = ") >> integer << string("\n")) - students: List[Student] = parse_field( - string("Student number, Name\n") >> dataparser(Student).many() << regex(r"\n*") + grade: int = parser_field(string("Grade = ") >> integer << string("\n")) + students: List[Student] = parser_field( + string("Student number, Name\n") >> dataclass_parser(Student).many() << regex(r"\n*") + ) + scores: List[Score] = parser_field( + string("Student number, Score\n") >> dataclass_parser(Score).many() << regex(r"\n*") ) - scores: List[Score] = parse_field(string("Student number, Score\n") >> dataparser(Score).many() << regex(r"\n*")) @property def students_with_scores(self) -> List[StudentWithScore]: @@ -97,16 +99,16 @@ def students_with_scores(self) -> List[StudentWithScore]: @dataclass class School: - name: str = parse_field(string("School = ") >> any_text << string("\n")) - grades: List[Grade] = parse_field(dataparser(Grade).many()) + name: str = parser_field(string("School = ") >> any_text << string("\n")) + grades: List[Grade] = parser_field(dataclass_parser(Grade).many()) @dataclass class File: - header: str = parse_field(regex(r"[\s\S]*?(?=School =)")) - schools: List[School] = parse_field(dataparser(School).many()) + header: str = parser_field(regex(r"[\s\S]*?(?=School =)")) + schools: List[School] = parser_field(dataclass_parser(School).many()) if __name__ == "__main__": - file = dataparser(File).parse(text) + file = dataclass_parser(File).parse(text) print(file.schools) diff --git a/examples/dataclass_parsing.py b/examples/dataclass_parsing.py index e240b3d..00193b1 100644 --- a/examples/dataclass_parsing.py +++ b/examples/dataclass_parsing.py @@ -1,17 +1,17 @@ from dataclasses import dataclass from typing import Optional -from parsy import dataparser, parse_field, regex, string, whitespace +from parsy import dataclass_parser, parser_field, regex, string, whitespace @dataclass class Person: - name: str = parse_field(regex(r"\w+") << whitespace) - age: int = parse_field(regex(r"\d+").map(int) << whitespace) - note: str = parse_field(regex(".+")) + name: str = parser_field(regex(r"\w+") << whitespace) + age: int = parser_field(regex(r"\d+").map(int) << whitespace) + note: str = parser_field(regex(".+")) -person_parser = dataparser(Person) +person_parser = dataclass_parser(Person) person = person_parser.parse("Rob 2000 how time flies") print(person) assert person == Person(name="Rob", age=2000, note="how time flies") @@ -22,28 +22,28 @@ class Person: @dataclass class Id: - id: str = parse_field(regex(r"[^\s]+") << whitespace.optional()) - from_year: Optional[int] = parse_field( + id: str = parser_field(regex(r"[^\s]+") << whitespace.optional()) + from_year: Optional[int] = parser_field( regex("[0-9]+").map(int).desc("Numeric").optional() << whitespace.optional() ) @dataclass class Name: - name: str = parse_field(regex(r"[a-zA-Z]+") << whitespace.optional()) - abbreviated: Optional[bool] = parse_field( + name: str = parser_field(regex(r"[a-zA-Z]+") << whitespace.optional()) + abbreviated: Optional[bool] = parser_field( (string("T") | string("F")).map(lambda x: x == "T").optional() << whitespace.optional() ) @dataclass class PersonDetail: - id: Id = parse_field(dataparser(Id)) - forename: Name = parse_field(dataparser(Name)) - surname: Optional[Name] = parse_field(dataparser(Name).optional()) + id: Id = parser_field(dataclass_parser(Id)) + forename: Name = parser_field(dataclass_parser(Name)) + surname: Optional[Name] = parser_field(dataclass_parser(Name).optional()) -out_parser = dataparser(PersonDetail).many() +out_parser = dataclass_parser(PersonDetail).many() new_person = out_parser.parse("007 2023 Rob T John 123 2004 Bob") print(new_person) @@ -62,9 +62,9 @@ class PersonDetail: @dataclass class PersonWithRarity: - name: str = parse_field(regex(r"\w+") << whitespace) - age: int = parse_field(regex(r"\d+").map(int) << whitespace) - note: str = parse_field(regex(".+")) + name: str = parser_field(regex(r"\w+") << whitespace) + age: int = parser_field(regex(r"\d+").map(int) << whitespace) + note: str = parser_field(regex(".+")) rare: bool = False def __post_init__(self): @@ -72,7 +72,7 @@ def __post_init__(self): self.rare = True -person_parser = dataparser(PersonWithRarity) +person_parser = dataclass_parser(PersonWithRarity) person = person_parser.parse("Rob 20 whippersnapper") print(person) assert person == PersonWithRarity(name="Rob", age=20, note="whippersnapper", rare=False) diff --git a/examples/json.py b/examples/json.py index 394456b..dd159a7 100644 --- a/examples/json.py +++ b/examples/json.py @@ -1,6 +1,6 @@ -from typing import TypeVar +from typing import Dict, List, TypeVar, Union -from parsy import Parser, forward_declaration, regex, string +from parsy import Parser, ParserReference, generate, regex, string # Utilities whitespace = regex(r"\s*") @@ -40,19 +40,22 @@ def lexeme(p: Parser[T]) -> Parser[T]: quoted = lexeme(string('"') >> (string_part | string_esc).many().concat() << string('"')) # Data structures -JSON = TypeVar("JSON") +JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None] -json_value = forward_declaration[JSON]() -object_pair = (quoted << colon) & json_value + +@generate +def _json_parser() -> ParserReference[JSON]: + return (yield json_parser) + + +object_pair = (quoted << colon) & _json_parser json_object = lbrace >> object_pair.sep_by(comma).map(lambda a: {g[0]: g[1] for g in a}) << rbrace -array = lbrack >> json_value.sep_by(comma) << rbrack +array = lbrack >> _json_parser.sep_by(comma) << rbrack # Everything -all = quoted | number | json_object | array | true | false | null -json_value = json_value.become(all) -json_doc = whitespace >> json_value +json_parser = quoted | number | json_object | array | true | false | null -# JSON = Union[Dict[str, JSON], List[JSON], str, int, float, bool, None] +json_doc = whitespace >> json_parser def test(): diff --git a/examples/sequence.py b/examples/sequence.py index e48b5f2..6dcca28 100644 --- a/examples/sequence.py +++ b/examples/sequence.py @@ -1,6 +1,5 @@ from dataclasses import dataclass - from parsy import regex, seq, whitespace @@ -14,7 +13,7 @@ class Person: person_arg_sequence = seq( regex(r"\w+"), whitespace >> regex(r"\d+").map(int), - whitespace >> regex(r".+"), + whitespace.then(regex(r".+")), ) person_parser = person_arg_sequence.combine(Person) diff --git a/examples/simple_logo_lexer.py b/examples/simple_logo_lexer.py index 1001056..a768fdc 100644 --- a/examples/simple_logo_lexer.py +++ b/examples/simple_logo_lexer.py @@ -9,9 +9,8 @@ """ from dataclasses import dataclass -from typing import List -from parsy import Parser, dataparser, eof, parse_field, regex, string, string_from, whitespace +from parsy import dataclass_parser, eof, parser_field, regex, string, string_from, whitespace command = string_from("fd", "bk", "rt", "lt") number = regex(r"[0-9]+").map(int) @@ -39,11 +38,11 @@ def test_lexer() -> None: @dataclass class Instruction: - command: str = parse_field(optional_whitespace >> command) - distance: int = parse_field(whitespace >> number << (eof | eol | (whitespace >> eol))) + command: str = parser_field(optional_whitespace >> command) + distance: int = parser_field(whitespace >> number << (eof | eol | (whitespace >> eol))) -instruction_parser = dataparser(Instruction).many() +instruction_parser = dataclass_parser(Instruction).many() assert ( instruction_parser.parse( diff --git a/parsy/__init__.py b/parsy/__init__.py index 7d9b5f8..b1d70e2 100644 --- a/parsy/__init__.py +++ b/parsy/__init__.py @@ -51,8 +51,10 @@ _T_co = TypeVar("_T_co", covariant=True) + class SupportsAdd(Protocol[_T_contra, _T_co]): - def __add__(self, __x: _T_contra) -> _T_co: ... + def __add__(self, __x: _T_contra) -> _T_co: + ... def noop(val: T) -> T: @@ -235,15 +237,7 @@ def at_most(self: Parser[OUT_co], n: int) -> Parser[List[OUT_co]]: def at_least(self: Parser[OUT_co], n: int) -> Parser[List[OUT_co]]: return self.times(min=n, max=float("inf")) - @overload - def optional(self: Parser[OUT1], default: None = None) -> Parser[OUT1 | None]: - pass - - @overload - def optional(self: Parser[OUT1], default: OUT2) -> Parser[OUT1 | OUT2]: - pass - - def optional(self: Parser[OUT1], default: OUT2 | None = None) -> Parser[OUT1 | OUT2 | None]: + def optional(self: Parser[OUT1], default: OUT2 = None) -> Parser[OUT1 | OUT2]: return self.times(0, 1).map(lambda v: v[0] if v else default) def until( @@ -357,16 +351,12 @@ def __add__( # This covers tuples where `other` has more elements than the above overloads # and the `self` and `other` tuples have the same homogeneous type @overload - def __add__( - self: Parser[Tuple[OUT, ...]], other: Parser[Tuple[OUT, ...]] - ) -> Parser[Tuple[OUT, ...]]: + def __add__(self: Parser[Tuple[OUT, ...]], other: Parser[Tuple[OUT, ...]]) -> Parser[Tuple[OUT, ...]]: ... # Cover the rest of cases which can't return a homogeneous tuple @overload - def __add__( - self: Parser[Tuple[Any, ...]], other: Parser[Tuple[Any, ...]] - ) -> Parser[Tuple[Any, ...]]: + def __add__(self: Parser[Tuple[Any, ...]], other: Parser[Tuple[Any, ...]]) -> Parser[Tuple[Any, ...]]: ... # Addable parsers which return the same type @@ -482,6 +472,10 @@ def generated(stream: str, index: int) -> Result[OUT]: return generated +# A convenience type for defining forward references to parsers using a generator +ParserReference = Generator[Parser[T], T, T] + + index = Parser(lambda _, index: Result.success(index, index)) line_info = Parser(lambda stream, index: Result.success(index, line_info_at(stream, index))) @@ -607,54 +601,60 @@ def regex_parser(stream: str, index: int) -> Result[str]: # Each number of args needs to be typed separately @overload def seq( - __arg1: Parser[OUT1], - __arg2: Parser[OUT2], - __arg3: Parser[OUT3], - __arg4: Parser[OUT4], - __arg5: Parser[OUT5], - __arg6: Parser[OUT6], + __parser_1: Parser[OUT1], + __parser_2: Parser[OUT2], + __parser_3: Parser[OUT3], + __parser_4: Parser[OUT4], + __parser_5: Parser[OUT5], + __parser_6: Parser[OUT6], ) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5, OUT6]]: ... @overload def seq( - __arg1: Parser[OUT1], __arg2: Parser[OUT2], __arg3: Parser[OUT3], __arg4: Parser[OUT4], __arg5: Parser[OUT5] + __parser_1: Parser[OUT1], + __parser_2: Parser[OUT2], + __parser_3: Parser[OUT3], + __parser_4: Parser[OUT4], + __parser_5: Parser[OUT5], ) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5]]: ... @overload def seq( - __arg1: Parser[OUT1], __arg2: Parser[OUT2], __arg3: Parser[OUT3], __arg4: Parser[OUT4] + __parser_1: Parser[OUT1], __parser_2: Parser[OUT2], __parser_3: Parser[OUT3], __parser_4: Parser[OUT4] ) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4]]: ... @overload -def seq(__arg1: Parser[OUT1], __arg2: Parser[OUT2], __arg3: Parser[OUT3]) -> Parser[Tuple[OUT1, OUT2, OUT3]]: +def seq( + __parser_1: Parser[OUT1], __parser_2: Parser[OUT2], __parser_3: Parser[OUT3] +) -> Parser[Tuple[OUT1, OUT2, OUT3]]: ... @overload -def seq(__arg1: Parser[OUT1], __arg2: Parser[OUT2]) -> Parser[Tuple[OUT1, OUT2]]: +def seq(__parser_1: Parser[OUT1], __parser_2: Parser[OUT2]) -> Parser[Tuple[OUT1, OUT2]]: ... @overload -def seq(__arg1: Parser[OUT1]) -> Parser[Tuple[OUT1]]: +def seq(__parser_1: Parser[OUT1]) -> Parser[Tuple[OUT1]]: ... @overload -def seq(*args: Parser[Any]) -> Parser[Tuple[Any, ...]]: +def seq(*parsers: Parser[Any]) -> Parser[Tuple[Any, ...]]: ... -def seq(*args: Parser[Any]) -> Parser[Tuple[Any, ...]]: - if not args: +def seq(*parsers: Parser[Any]) -> Parser[Tuple[Any, ...]]: + if not parsers: raise ValueError() - first, *remainder = args + first, *remainder = parsers parser = first.as_tuple() for p in remainder: parser = parser.append(p) # type: ignore @@ -737,39 +737,10 @@ def from_enum(enum_cls: type[E], transform: Callable[[str], str] = noop) -> Pars return reduce(operator.or_, [string(value, transform=transform).result(enum_item) for value, enum_item in items]) -# TODO how do we type a forward_declaration instance? For a typical usage, see -# examples/json.py. I think this is probably a recursive type issue which is probably -# mirroring the recursive definition issues that forward_declaration is designed to solve. -# Cutting the recursive knot might be harder at the type level? - -class forward_declaration(Parser[OUT]): - """ - An empty parser that can be used as a forward declaration, - especially for parsers that need to be defined recursively. - - You must use `.become(parser)` before using. - """ - - def __init__(self) -> None: - pass - - def _raise_error(self, *args: Any, **kwargs: Any) -> Any: - raise ValueError("You must use 'become' before attempting to call `parse` or `parse_partial`") - - parse = _raise_error - parse_partial = _raise_error - - def become(self, other: Parser[OUT2]) -> Parser[OUT2]: - self.__dict__ = other.__dict__ - self.__class__ = other.__class__ - self = cast(Parser[OUT2], self) - return self - - # Dataclass parsers -def parse_field( +def parser_field( parser: Parser[OUT], *, default: OUT = ..., @@ -788,27 +759,26 @@ def parse_field( class DataClassProtocol(Protocol): __dataclass_fields__: ClassVar[Dict[str, Field[Any]]] - __init__: Callable[..., Any] + __init__: Callable[..., None] OUT_D = TypeVar("OUT_D", bound=DataClassProtocol) -def dataparser(datatype: Type[OUT_D]) -> Parser[OUT_D]: +def dataclass_parser(datatype: Type[OUT_D]) -> Parser[OUT_D]: @Parser def data_parser(stream: str, index: int) -> Result[OUT_D]: parsed_fields: Dict[str, Any] = {} - result = Result.success(index, None) - for field in fields(datatype): - if "parser" not in field.metadata: + for dataclass_field in fields(datatype): + if "parser" not in dataclass_field.metadata: continue - parser: Parser[Any] = field.metadata["parser"] + parser: Parser[Any] = dataclass_field.metadata["parser"] result = parser(stream, index) if not result.status: return result # type: ignore index = result.index - parsed_fields[field.name] = result.value + parsed_fields[dataclass_field.name] = result.value - return Result.success(result.index, datatype(**parsed_fields)) + return Result.success(index, datatype(**parsed_fields)) return data_parser diff --git a/tests/test_parsy.py b/tests/test_parsy.py index caeeda7..c19ab15 100644 --- a/tests/test_parsy.py +++ b/tests/test_parsy.py @@ -2,17 +2,17 @@ import enum import re import unittest -from typing import Any, Generator, Generic, List, Tuple, TypeVar, Union +from typing import Any, Generator, List, Tuple, Union from parsy import ( ParseError, Parser, + ParserReference, Result, any_char, char_from, decimal_digit, digit, - forward_declaration, from_enum, generate, letter, @@ -384,7 +384,9 @@ def test_add_too_long_tuple_uniform_types(self): """This test code is for checking that pylance gives no type errors""" letter_tuple = letter.as_tuple() int_parser = regex(r"\d") - six_int_parser = (int_parser & int_parser).append(int_parser).append(int_parser).append(int_parser).append(int_parser) + six_int_parser = ( + (int_parser & int_parser).append(int_parser).append(int_parser).append(int_parser).append(int_parser) + ) barcode = letter_tuple + six_int_parser def my_bar(first: str, *second: str) -> str: @@ -398,7 +400,9 @@ def test_add_too_long_tuple_different_types(self): """This test code is for checking that pylance gives no type errors""" letter_tuple = letter.as_tuple() int_parser = regex(r"\d").map(int) - six_int_parser = (int_parser & int_parser).append(int_parser).append(int_parser).append(int_parser).append(int_parser) + six_int_parser = ( + (int_parser & int_parser).append(int_parser).append(int_parser).append(int_parser).append(int_parser) + ) barcode = letter_tuple + six_int_parser def my_bar(first: str, *second: int) -> str: @@ -425,7 +429,7 @@ def test_add_unaddable_types(self): bad_parser = letter + regex(r"\d").map(int) self.assertRaises(TypeError, bad_parser.parse, "a1") - + def test_add_numerics(self): digit = regex(r"\d") numeric_parser = digit.map(float) + digit.map(int) @@ -438,7 +442,7 @@ def test_seq(self): b = regex("b") num = regex(r"[\d]").map(int) - parser = seq(a, num, b, num, a|num) + parser = seq(a, num, b, num, a | num) self.assertEqual(parser.parse("a1b2a"), ("a", 1, "b", 2, "a")) self.assertEqual(parser.parse("a1b23"), ("a", 1, "b", 2, 3)) @@ -627,46 +631,33 @@ def test_line_info_at(self): self.assertRaises(ValueError, lambda: line_info_at(text, 8)) -class TestForwardDeclaration(unittest.TestCase): - def test_forward_declaration_1(self): - # This is the example from the docs - - expr = forward_declaration() - with self.assertRaises(ValueError): - expr.parse("()") - - with self.assertRaises(ValueError): - expr.parse_partial("()") - simple = regex("[0-9]+").map(int) - group = string("(") >> expr.sep_by(string(" ")) << string(")") - expr.become(simple | group) +# Type alias used in test_recursive_parser, has to be defined at module or class level +RT = Union[int, List["RT"]] - self.assertEqual(expr.parse("(0 1 (2 3))"), [0, 1, [2, 3]]) - def test_forward_declaration_2(self): - # Simplest example I could think of - expr = forward_declaration() - expr.become(string("A") + expr | string("Z")) +def test_recursive_parser(): + """ + A recursive parser can be defined by using generators. - self.assertEqual(expr.parse("Z"), "Z") - self.assertEqual(expr.parse("AZ"), "AZ") - self.assertEqual(expr.parse("AAAAAZ"), "AAAAAZ") + The type of the parser has to be explicitly declared with a type alias which + is also recursively defined using a forward-declaration. - with self.assertRaises(ParseError): - expr.parse("A") + This works because the generator can refer the target parser before the target + parser is defined. Then, when defining the parser, it can use `_parser` to + indirectly refer to itself, creating a recursive parser. + """ + digits = regex("[0-9]+").map(int) - with self.assertRaises(ParseError): - expr.parse("B") + @generate + def _parser() -> ParserReference[RT]: + return (yield parser) - self.assertEqual(expr.parse_partial("AAZXX"), ("AAZ", "XX")) + # The explicit type annotation of `Parser[RT]` could be omitted + parser: Parser[RT] = digits | string("(") >> _parser.sep_by(string(" ")) << string(")") - def test_forward_declaration_cant_become_twice(self): - dec = forward_declaration() - other = string("X") - dec.become(other) + result = parser.parse("(0 1 (2 3 (4 5)))") - with self.assertRaises((AttributeError, TypeError)): - dec.become(other) + assert result == [0, 1, [2, 3, [4, 5]]] if __name__ == "__main__": diff --git a/tests/test_sexpr.py b/tests/test_sexpr.py index 535afeb..227b7f6 100644 --- a/tests/test_sexpr.py +++ b/tests/test_sexpr.py @@ -1,12 +1,18 @@ import unittest +from typing import List, TypeVar, Union -from parsy import generate, regex, string +from parsy import Parser, ParserReference, generate, regex, string whitespace = regex(r"\s+") comment = regex(r";.*") ignore = (whitespace | comment).many() -lexeme = lambda p: p << ignore +T = TypeVar("T") + + +def lexeme(parser: Parser[T]) -> Parser[T]: + return parser << ignore + lparen = lexeme(string("(")) rparen = lexeme(string(")")) @@ -17,22 +23,21 @@ atom = true | false | number | symbol - -@generate -def form(): - yield lparen - els = yield expr.many() - yield rparen - return els +PT = Union[str, bool, int, List["PT"]] @generate -def quote(): - yield string("'") - e = yield expr - return ["quote", e] +def _expr() -> ParserReference[PT]: + # expr is referred to before it's defined + return (yield expr) + +# expr is indirectly used via _expr +form = lparen >> _expr.many() << rparen +quote = string("'") >> _expr.map(lambda e: ["quote", e]) +# Here, expr is finally defined, combining parsers which already refer to it via +# _expr, which creates a recursive parser expr = form | quote | atom program = ignore >> expr.many()