Skip to content

Commit 30b3ee4

Browse files
committed
add Stream to represent input data
This primarily wraps the str|bytes|list that is the data to parse, but also adds the metadata `source` to hold a filename, URL, etc. where the data is from. Introducing this class also paves the way for eventually supporting streaming input data.
1 parent 97fc01c commit 30b3ee4

File tree

1 file changed

+35
-19
lines changed

1 file changed

+35
-19
lines changed

src/parsy/__init__.py

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,31 @@
1111

1212
noop = lambda x: x
1313

14+
@dataclass
15+
class Stream:
16+
"""Data to parse, possibly equipped with a name for the source it's from,
17+
e.g. a file path."""
18+
data: str | bytes | list
19+
source : str | None = None
20+
21+
def __len__(self):
22+
return len(self.data)
23+
24+
def __getitem__(self, i):
25+
# Subscripting bytes with `[index]` instead of `[index:index + 1]`
26+
# returns an int
27+
if isinstance(self.data, bytes) and not isinstance(i, slice):
28+
return self.data[i:i+1]
29+
else:
30+
return self.data[i]
1431

15-
def line_info_at(stream, index):
32+
def line_info_at(stream: Stream, index):
1633
if index > len(stream):
1734
raise ValueError("invalid index")
18-
line = stream.count("\n", 0, index)
19-
last_nl = stream.rfind("\n", 0, index)
35+
line = stream.data.count("\n", 0, index)
36+
last_nl = stream.data.rfind("\n", 0, index)
2037
col = index - (last_nl + 1)
21-
return (line, col)
38+
return (stream.source, line, col)
2239

2340

2441
class ParseError(RuntimeError):
@@ -29,7 +46,11 @@ def __init__(self, expected, stream, index):
2946

3047
def line_info(self):
3148
try:
32-
return "{}:{}".format(*line_info_at(self.stream, self.index))
49+
source, row, col = line_info_at(self.stream, self.index)
50+
if source is None:
51+
return "{}:{}".format(row, col)
52+
else:
53+
return "{}:{}:{}".format(source, row, col)
3354
except (TypeError, AttributeError): # not a str
3455
return str(self.index)
3556

@@ -83,22 +104,22 @@ class Parser:
83104
of the failure.
84105
"""
85106

86-
def __init__(self, wrapped_fn: Callable[[str | bytes | list, int], Result]):
107+
def __init__(self, wrapped_fn: Callable[[Stream, int], Result]):
87108
"""
88109
Creates a new Parser from a function that takes a stream
89110
and returns a Result.
90111
"""
91112
self.wrapped_fn = wrapped_fn
92113

93-
def __call__(self, stream: str | bytes | list, index: int):
114+
def __call__(self, stream: Stream, index: int):
94115
return self.wrapped_fn(stream, index)
95116

96-
def parse(self, stream: str | bytes | list) -> Any:
117+
def parse(self, stream: Stream) -> Any:
97118
"""Parses a string or list of tokens and returns the result or raise a ParseError."""
98119
(result, _) = (self << eof).parse_partial(stream)
99120
return result
100121

101-
def parse_partial(self, stream: str | bytes | list) -> tuple[Any, str | bytes | list]:
122+
def parse_partial(self, stream: Stream) -> tuple[Any, Stream]:
102123
"""
103124
Parses the longest possible prefix of a given string.
104125
Returns a tuple of the result and the unparsed remainder,
@@ -343,10 +364,10 @@ def mark(self) -> Parser:
343364

344365
@generate
345366
def marked():
346-
start = yield line_info
367+
_, *start = yield line_info
347368
body = yield self
348-
end = yield line_info
349-
return (start, body, end)
369+
_, *end = yield line_info
370+
return (tuple(start), body, tuple(end))
350371

351372
return marked
352373

@@ -557,7 +578,7 @@ def regex(exp: str, flags=0, group: int | str | tuple = 0) -> Parser:
557578

558579
@Parser
559580
def regex_parser(stream, index):
560-
match = exp.match(stream, index)
581+
match = exp.match(stream.data, index)
561582
if match:
562583
return Result.success(match.end(), match.group(*group))
563584
else:
@@ -577,12 +598,7 @@ def test_item(func: Callable[..., bool], description: str) -> Parser:
577598
@Parser
578599
def test_item_parser(stream, index):
579600
if index < len(stream):
580-
if isinstance(stream, bytes):
581-
# Subscripting bytes with `[index]` instead of
582-
# `[index:index + 1]` returns an int
583-
item = stream[index : index + 1]
584-
else:
585-
item = stream[index]
601+
item = stream[index]
586602
if func(item):
587603
return Result.success(index + 1, item)
588604
return Result.failure(index, description)

0 commit comments

Comments
 (0)