Skip to content

Commit 78b87ac

Browse files
committed
some pattern parsing fixes; improve debugging output
1 parent a3e1901 commit 78b87ac

File tree

5 files changed

+104
-93
lines changed

5 files changed

+104
-93
lines changed

project/fortran-regex.depend

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55

66
1662208326 source:/Users/federico/code/fortran-regex/test/testdata.f90
77

8-
1671094483 source:i:\fortran-regex\test\testdata.f90
8+
1671110187 source:i:\fortran-regex\test\testdata.f90
99

10-
1671094429 source:i:\fortran-regex\src\regex.f90
10+
1671110635 source:i:\fortran-regex\src\regex.f90
1111

12-
1671094129 source:i:\fortran-regex\test\test_1.f90
12+
1671094582 source:i:\fortran-regex\test\test_1.f90
1313

project/fortran-regex.layout

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,19 @@
22
<CodeBlocks_layout_file>
33
<FileVersion major="1" minor="0" />
44
<ActiveTarget name="Debug" />
5-
<File name="..\src\regex.f90" open="1" top="0" tabpos="2" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
5+
<File name="..\src\regex.f90" open="1" top="1" tabpos="2" split="0" active="1" splitpos="0" zoom_1="4" zoom_2="0">
66
<Cursor>
7-
<Cursor1 position="17593" topLine="28" />
7+
<Cursor1 position="15345" topLine="421" />
88
</Cursor>
99
</File>
1010
<File name="..\test\testdata.f90" open="1" top="0" tabpos="3" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
1111
<Cursor>
12-
<Cursor1 position="6700" topLine="86" />
12+
<Cursor1 position="672" topLine="21" />
1313
</Cursor>
1414
</File>
15-
<File name="..\test\test_1.f90" open="1" top="1" tabpos="1" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
15+
<File name="..\test\test_1.f90" open="1" top="0" tabpos="1" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
1616
<Cursor>
17-
<Cursor1 position="359" topLine="0" />
17+
<Cursor1 position="521" topLine="4" />
1818
</Cursor>
1919
</File>
2020
</CodeBlocks_layout_file>

project/regex_testdata.mod

-15 Bytes
Binary file not shown.

src/regex.f90

Lines changed: 53 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ module regex_module
2020
private
2121

2222
public :: parse_pattern
23-
!public :: re_matchp
2423
public :: regex
2524

2625
! Character kind
@@ -29,7 +28,7 @@ module regex_module
2928
logical, parameter, public :: RE_DOT_MATCHES_NEWLINE = .true. ! Define .false. if you DON'T want '.' to match '\r' + '\n'
3029
integer, parameter, public :: MAX_REGEXP_OBJECTS = 512 ! Max number of regex symbols in expression.
3130
integer, parameter, public :: MAX_CHAR_CLASS_LEN = 1024 ! Max length of character-class buffer in.
32-
logical, parameter :: DEBUG = .false.
31+
logical, parameter :: DEBUG = .true.
3332

3433
! Supported patterns
3534
integer, parameter :: UNUSED = 0
@@ -60,9 +59,9 @@ module regex_module
6059
character(kind=RCK), parameter :: UNDERSCORE = "_"
6160
character(kind=RCK), parameter :: SPACE = " "
6261
character(kind=RCK), parameter :: DASH = "-"
63-
character(kind=RCK), parameter :: BACKSLASH0 = achar(0,kind=RCK) ! \0 or null character
64-
character(kind=RCK), parameter :: NEWLINE = achar(10,kind=RCK) ! \n or line feed
65-
character(kind=RCK), parameter :: BACKSPCE = achar(8,kind=RCK) ! \b or backspace character
62+
character(kind=RCK), parameter :: BACKSLASH0 = achar( 0,kind=RCK) ! \0 or null character
63+
character(kind=RCK), parameter :: NEWLINE = achar(10,kind=RCK) ! \n or line feed
64+
character(kind=RCK), parameter :: BACKSPCE = achar( 8,kind=RCK) ! \b or backspace character
6665

6766

6867
! Regex pattern element
@@ -184,12 +183,12 @@ end function matchmetachar
184183

185184
elemental logical function isdigit(c)
186185
character(kind=RCK), intent(in) :: c
187-
isdigit = index(c,"01234567890")>0
186+
isdigit = index("1234567890",c)>0
188187
end function isdigit
189188

190189
elemental logical function isalpha(c)
191190
character(kind=RCK), intent(in) :: c
192-
isalpha = index(c,lowercase)>0 .or. index(c,uppercase)>0
191+
isalpha = index(lowercase,c)>0 .or. index(uppercase,c)>0
193192
end function isalpha
194193

195194
elemental logical function isalphanum(c)
@@ -216,44 +215,45 @@ elemental logical function matchrange(c,str)
216215

217216
end function matchrange
218217

219-
elemental logical function matchcharclass(c,str)
220-
character(kind=RCK), intent(in) :: c
221-
character(kind=RCK,len=*), intent(in) :: str
218+
elemental logical function matchcharclass(c,str) result(match)
219+
character(kind=RCK), intent(in) :: c ! The current character
220+
character(kind=RCK,len=*), intent(in) :: str ! The charclass contents
222221

223222
integer :: i
224223

225-
matchcharclass = .false.
224+
match = .false.
226225
i = 0
227226

228-
loop: do
227+
! All characters
228+
loop: do while (i<len_trim(str))
229229

230230
i = i+1
231231

232-
if (matchrange(c,str(i:))) then
233-
matchcharclass = .true.
234-
return
235-
elseif (str(i:i+1) == BACKSLASH0) then
236-
! Escape-char: increment str-ptr and match on next char
232+
! We're in a range: must check this further
233+
match = matchrange(c,str(i:)); if (match) return
234+
235+
! Escaped character? look what's next
236+
if (str(i:i) == '\') then
237+
237238
i = i+1
238239

239-
if (matchmetachar(c,str(i:))) then
240-
matchcharclass = .true.
241-
return
242-
elseif (c==str(i:i) .and. .not.ismetachar(c)) then
243-
matchcharclass = .true.
244-
return
245-
end if
240+
! Valid escaped sequence
241+
match = matchmetachar(c,str(i:)); if (match) return
242+
243+
match = (c==str(i:i) .and. .not.ismetachar(c)); if (match) return
244+
246245
elseif (c==str(i:i)) then
246+
247+
! Character match
247248
if (c==DASH) then
248-
matchcharclass = str(i-1:i-1)==BACKSLASH0 .or. str(i+1:i+1)==BACKSLASH0
249+
! If this is a range, there must be something both before and later
250+
match = i-1<=0 .or. i+1>len(str)
249251
else
250-
matchcharclass = .true.
252+
match = .true.
251253
end if
252254
return
253255
end if
254256

255-
if (i==len(str)) exit loop
256-
257257
end do loop
258258

259259
end function matchcharclass
@@ -263,12 +263,17 @@ logical function matchquestion(p, pattern, text, matchlength)
263263
character(len=*,kind=RCK), intent(in) :: text
264264
integer, intent(inout) :: matchlength
265265

266+
print *, 'question'
267+
266268
matchquestion = .false.
267269

268-
if (p%type == UNUSED .or. matchpattern(pattern, text, matchlength)) then
270+
if (p%type == UNUSED) then
269271
matchquestion = .true.
270272
return
271-
elseif (len(text)>0 .and. pat_match(p,text(2:))) then
273+
elseif (matchpattern(pattern, text, matchlength)) then
274+
matchquestion = .true.
275+
return
276+
elseif (len(text)>0 .and. pat_match(p,text)) then
272277
if (matchpattern(pattern,text(2:),matchlength)) then
273278
matchlength = matchlength+1
274279
matchquestion = .true.
@@ -333,12 +338,12 @@ logical function matchplus(p, pattern, text, matchlength)
333338
end function matchplus
334339

335340
! Find matches of the given pattern in the string
336-
integer function re_match(pattern, text, length) result(index)
341+
integer function re_match(text, pattern, length) result(index)
337342
character(*,kind=RCK), intent(in) :: pattern
338343
character(*,kind=RCK), intent(in) :: text
339344
integer, intent(out) :: length
340345

341-
index = re_matchp(parse_pattern(pattern),text,length)
346+
index = re_matchp(text,parse_pattern(pattern),length)
342347

343348
end function re_match
344349

@@ -353,7 +358,7 @@ type(regex_op) function parse_pattern(pattern) result(this)
353358
! Initialize class
354359
call this%destroy()
355360

356-
if (DEBUG) print *, "[regex] parsing pattern: <"//pattern//">"
361+
if (DEBUG) print "('[regex] parsing pattern: <',a,'>')", trim(pattern)
357362

358363
i = 1 ! index in pattern
359364
j = 1 ! index in re-compiled
@@ -363,7 +368,7 @@ type(regex_op) function parse_pattern(pattern) result(this)
363368
to_the_moon: do while (i<=lenp)
364369

365370
c = pattern(i:i)
366-
if (DEBUG) print *, "[regex] at location ",i,': <',c,'>'
371+
if (DEBUG) print "('[regex] at location ',i0,': <',a,'>')", i, c
367372

368373
select case (c)
369374

@@ -413,7 +418,7 @@ type(regex_op) function parse_pattern(pattern) result(this)
413418
i = i+1 ! Increment i to avoid including "^" in the char-buffer
414419

415420
! Check this is not an incomplete pattern
416-
if (pattern(i+1:i+1)==BACKSLASH0) then
421+
if (i>=lenp) then
417422
stop 'incomplete pattern'
418423
return
419424
end if
@@ -427,7 +432,9 @@ type(regex_op) function parse_pattern(pattern) result(this)
427432

428433
if (loc>0) then
429434
ccl_buf = pattern(i+1:i+loc-1)
430-
i = i+loc+1
435+
i = i+loc
436+
if (DEBUG) print "('[regex] at end of multi-character pattern: ',a)", trim(ccl_buf)
437+
431438
else
432439
stop 'incomplete [] pattern'
433440
end if
@@ -447,7 +454,7 @@ type(regex_op) function parse_pattern(pattern) result(this)
447454

448455
end select
449456

450-
if (DEBUG) print *, "[regex] added pattern ",j,': ',this%pattern(j)%print()
457+
if (DEBUG) print "('[regex] added pattern ',i0,': ',a)",j,this%pattern(j)%print()
451458

452459
! A pattern was added: move to next
453460
i = i+1
@@ -468,7 +475,7 @@ function print_pattern(pattern) result(msg)
468475
character(len=MAX_CHAR_CLASS_LEN,kind=RCK) :: buffer
469476
integer :: lt
470477

471-
write(buffer,1) types(pattern%type+1),trim(pattern%ccl)
478+
write(buffer,1) trim(types(pattern%type+1)),trim(pattern%ccl)
472479

473480
lt = len_trim(buffer)
474481
allocate(character(len=lt,kind=RCK) :: msg)
@@ -479,7 +486,7 @@ function print_pattern(pattern) result(msg)
479486
end function print_pattern
480487

481488
! Match a single pattern at the g
482-
elemental logical function pat_match(p, c) result(match)
489+
logical function pat_match(p, c) result(match)
483490
class(regex_pattern), intent(in) :: p
484491
character(kind=RCK), intent(in) :: c
485492

@@ -496,10 +503,12 @@ elemental logical function pat_match(p, c) result(match)
496503
case default; match = c==p%ccl(1:1)
497504
end select
498505

506+
if (DEBUG) print "('[regex] current pattern=',a,' at char=',a,' match? ',l1)", p%print(),c,match
507+
499508
end function pat_match
500509

501510

502-
integer function re_matchp(pattern, text, matchlength) result(index)
511+
integer function re_matchp(text, pattern, matchlength) result(index)
503512
type(regex_op) :: pattern
504513
character(len=*,kind=RCK), intent(in) :: text
505514
integer, intent(out) :: matchlength
@@ -552,6 +561,7 @@ logical function matchpattern(pattern, text, matchlength) result(match)
552561
integer :: pre,ip,it
553562

554563
pre = matchlength
564+
print *, 'initial length ',pre
555565
ip = 1
556566
it = 1
557567

@@ -581,10 +591,11 @@ logical function matchpattern(pattern, text, matchlength) result(match)
581591

582592
matchlength = matchlength+1
583593

594+
if (it>=len(text)) exit iterate
595+
if (.not. pat_match(pattern(ip), text(it:it))) exit iterate
596+
584597
ip = ip+1
585598
it = it+1
586-
if (it>len(text)) exit iterate
587-
if (.not. pat_match(pattern(ip), text(it:it))) exit iterate
588599

589600
end do iterate
590601

0 commit comments

Comments
 (0)