Skip to content

Commit c5ba3d5

Browse files
committed
add (Byte|Str)Stream for sources on input data
For backwards compatibility, these are only used to wrap input data when a source is given to `parse()` or `parse_partial()`. When a source is given, the following behaviours change: - the primitive `line_info` parser returns a 3-tuple instead of a 2-tuple - ParseError objects will include the source
1 parent 3b72c71 commit c5ba3d5

File tree

1 file changed

+64
-8
lines changed

1 file changed

+64
-8
lines changed

src/parsy/__init__.py

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,54 @@
1212
noop = lambda x: x
1313

1414

15+
class StrStream(str):
16+
"""String data to parse, possibly equipped with a name for the source it's
17+
from, e.g. a file path."""
18+
19+
def __new__(cls, string, source):
20+
instance = super().__new__(cls, string)
21+
instance.source = source
22+
return instance
23+
24+
25+
class ByteStream(str):
26+
"""String data to parse, possibly equipped with a name for the source it's
27+
from, e.g. a file path."""
28+
29+
def __new__(cls, bs, source):
30+
instance = super().__new__(cls, bs)
31+
instance.source = source
32+
return instance
33+
34+
35+
def make_stream(data: str | bytes, source: Any):
36+
"""Constructs an appropriate stream type for `data` when it's one of the
37+
three core supported datatypes of parsy (viz. str, bytes, list). Otherwise,
38+
the data is assumed to just support a minimum of __getitem__ and
39+
__len__."""
40+
if isinstance(data, str):
41+
return StrStream(data, source)
42+
43+
if isinstance(data, bytes):
44+
return ByteStream(data, source)
45+
46+
raise RuntimeError(
47+
"A Parsy stream can be formed only on str and bytes, but the given "
48+
f"data has type {type(data)}. If you are separately tokenizing the "
49+
"data to parse, consider instead equipping the tokens with source "
50+
"location metadata.",
51+
)
52+
1553
def line_info_at(stream, index):
1654
if index > len(stream):
1755
raise ValueError("invalid index")
1856
line = stream.count("\n", 0, index)
1957
last_nl = stream.rfind("\n", 0, index)
2058
col = index - (last_nl + 1)
21-
return (line, col)
59+
if hasattr(stream, "source"):
60+
return (line, col, stream.source)
61+
else:
62+
return (line, col)
2263

2364

2465
class ParseError(RuntimeError):
@@ -29,7 +70,15 @@ def __init__(self, expected, stream, index):
2970

3071
def line_info(self):
3172
try:
32-
return "{}:{}".format(*line_info_at(self.stream, self.index))
73+
info = line_info_at(self.stream, self.index)
74+
if len(info) == 2:
75+
row, col = info
76+
return "{}:{}".format(row, col)
77+
elif len(info) == 3:
78+
source, row, col = info
79+
return "{}:{}:{}".format(source, row, col)
80+
else:
81+
raise RuntimeError("Internal line_info_at violates length expectation.")
3382
except (TypeError, AttributeError): # not a str
3483
return str(self.index)
3584

@@ -90,20 +139,23 @@ def __init__(self, wrapped_fn: Callable[[str | bytes | list, int], Result]):
90139
"""
91140
self.wrapped_fn = wrapped_fn
92141

93-
def __call__(self, stream: str | bytes | list, index: int):
142+
def __call__(self, stream, index: int):
94143
return self.wrapped_fn(stream, index)
95144

96-
def parse(self, stream: str | bytes | list) -> Any:
145+
def parse(self, stream, source=None) -> Any:
97146
"""Parses a string or list of tokens and returns the result or raise a ParseError."""
98-
(result, _) = (self << eof).parse_partial(stream)
147+
(result, _) = (self << eof).parse_partial(stream, source)
99148
return result
100149

101-
def parse_partial(self, stream: str | bytes | list) -> tuple[Any, str | bytes | list]:
150+
def parse_partial(self, stream, source=None) -> tuple[Any, str | bytes | list]:
102151
"""
103152
Parses the longest possible prefix of a given string.
104153
Returns a tuple of the result and the unparsed remainder,
105154
or raises ParseError
106155
"""
156+
if source is not None:
157+
stream = make_stream(stream, source)
158+
107159
result = self(stream, 0)
108160

109161
if result.status:
@@ -346,6 +398,11 @@ def marked():
346398
start = yield line_info
347399
body = yield self
348400
end = yield line_info
401+
# line_info returns a 3-tuple including the source when a source
402+
# was given to `parse`, but older programs expect these tuples to
403+
# have length 2, consisting of just row and col
404+
start = start[:2]
405+
end = end[:2]
349406
return (start, body, end)
350407

351408
return marked
@@ -578,8 +635,7 @@ def test_item(func: Callable[..., bool], description: str) -> Parser:
578635
def test_item_parser(stream, index):
579636
if index < len(stream):
580637
if isinstance(stream, bytes):
581-
# Subscripting bytes with `[index]` instead of
582-
# `[index:index + 1]` returns an int
638+
# Otherwise directly indexing a bytes gives `int`
583639
item = stream[index : index + 1]
584640
else:
585641
item = stream[index]

0 commit comments

Comments
 (0)