1212noop = lambda x : x
1313
1414
15+ class StrStream (str ):
16+ """String data to parse, possibly equipped with a name for the source it's
17+ from, e.g. a file path."""
18+
19+ def __new__ (cls , string , source ):
20+ instance = super ().__new__ (cls , string )
21+ instance .source = source
22+ return instance
23+
24+
25+ class ByteStream (str ):
26+ """String data to parse, possibly equipped with a name for the source it's
27+ from, e.g. a file path."""
28+
29+ def __new__ (cls , bs , source ):
30+ instance = super ().__new__ (cls , bs )
31+ instance .source = source
32+ return instance
33+
34+
35+ def make_stream (data : str | bytes , source : Any ):
36+ """Constructs an appropriate stream type for `data` when it's one of the
37+ three core supported datatypes of parsy (viz. str, bytes, list). Otherwise,
38+ the data is assumed to just support a minimum of __getitem__ and
39+ __len__."""
40+ if isinstance (data , str ):
41+ return StrStream (data , source )
42+
43+ if isinstance (data , bytes ):
44+ return ByteStream (data , source )
45+
46+ raise RuntimeError (
47+ "A Parsy stream can be formed only on str and bytes, but the given "
48+ f"data has type { type (data )} . If you are separately tokenizing the "
49+ "data to parse, consider instead equipping the tokens with source "
50+ "location metadata." ,
51+ )
52+
1553def line_info_at (stream , index ):
1654 if index > len (stream ):
1755 raise ValueError ("invalid index" )
1856 line = stream .count ("\n " , 0 , index )
1957 last_nl = stream .rfind ("\n " , 0 , index )
2058 col = index - (last_nl + 1 )
21- return (line , col )
59+ if hasattr (stream , "source" ):
60+ return (line , col , stream .source )
61+ else :
62+ return (line , col )
2263
2364
2465class ParseError (RuntimeError ):
@@ -29,7 +70,15 @@ def __init__(self, expected, stream, index):
2970
3071 def line_info (self ):
3172 try :
32- return "{}:{}" .format (* line_info_at (self .stream , self .index ))
73+ info = line_info_at (self .stream , self .index )
74+ if len (info ) == 2 :
75+ row , col = info
76+ return "{}:{}" .format (row , col )
77+ elif len (info ) == 3 :
78+ source , row , col = info
79+ return "{}:{}:{}" .format (source , row , col )
80+ else :
81+ raise RuntimeError ("Internal line_info_at violates length expectation." )
3382 except (TypeError , AttributeError ): # not a str
3483 return str (self .index )
3584
@@ -90,20 +139,23 @@ def __init__(self, wrapped_fn: Callable[[str | bytes | list, int], Result]):
90139 """
91140 self .wrapped_fn = wrapped_fn
92141
93- def __call__ (self , stream : str | bytes | list , index : int ):
142+ def __call__ (self , stream , index : int ):
94143 return self .wrapped_fn (stream , index )
95144
96- def parse (self , stream : str | bytes | list ) -> Any :
145+ def parse (self , stream , source = None ) -> Any :
97146 """Parses a string or list of tokens and returns the result or raise a ParseError."""
98- (result , _ ) = (self << eof ).parse_partial (stream )
147+ (result , _ ) = (self << eof ).parse_partial (stream , source )
99148 return result
100149
101- def parse_partial (self , stream : str | bytes | list ) -> tuple [Any , str | bytes | list ]:
150+ def parse_partial (self , stream , source = None ) -> tuple [Any , str | bytes | list ]:
102151 """
103152 Parses the longest possible prefix of a given string.
104153 Returns a tuple of the result and the unparsed remainder,
105154 or raises ParseError
106155 """
156+ if source is not None :
157+ stream = make_stream (stream , source )
158+
107159 result = self (stream , 0 )
108160
109161 if result .status :
@@ -346,6 +398,11 @@ def marked():
346398 start = yield line_info
347399 body = yield self
348400 end = yield line_info
401+ # line_info returns a 3-tuple including the source when a source
402+ # was given to `parse`, but older programs expect these tuples to
403+ # have length 2, consisting of just row and col
404+ start = start [:2 ]
405+ end = end [:2 ]
349406 return (start , body , end )
350407
351408 return marked
@@ -578,8 +635,7 @@ def test_item(func: Callable[..., bool], description: str) -> Parser:
578635 def test_item_parser (stream , index ):
579636 if index < len (stream ):
580637 if isinstance (stream , bytes ):
581- # Subscripting bytes with `[index]` instead of
582- # `[index:index + 1]` returns an int
638+ # Otherwise directly indexing a bytes gives `int`
583639 item = stream [index : index + 1 ]
584640 else :
585641 item = stream [index ]
0 commit comments