Skip to content

Commit afc5dfa

Browse files
committed
several bugfixes; most tests now working
1 parent 78b87ac commit afc5dfa

File tree

7 files changed

+104
-62
lines changed

7 files changed

+104
-62
lines changed

project/fortran-regex.cbp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
<Option compiler="gcc" />
1414
<Compiler>
1515
<Add option="-g" />
16+
<Add option="-fcheck=bounds" />
1617
</Compiler>
1718
</Target>
1819
<Target title="Release">
@@ -39,10 +40,10 @@
3940
<Option weight="0" />
4041
</Unit>
4142
<Unit filename="../test/test_1.f90">
42-
<Option weight="2" />
43+
<Option weight="1" />
4344
</Unit>
4445
<Unit filename="../test/testdata.f90">
45-
<Option weight="1" />
46+
<Option weight="0" />
4647
</Unit>
4748
<Extensions />
4849
</Project>

project/fortran-regex.depend

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55

66
1662208326 source:/Users/federico/code/fortran-regex/test/testdata.f90
77

8-
1671110187 source:i:\fortran-regex\test\testdata.f90
8+
1671127283 source:i:\fortran-regex\test\testdata.f90
99

10-
1671110635 source:i:\fortran-regex\src\regex.f90
10+
1671125468 source:i:\fortran-regex\src\regex.f90
1111

12-
1671094582 source:i:\fortran-regex\test\test_1.f90
12+
1671123384 source:i:\fortran-regex\test\test_1.f90
1313

project/fortran-regex.layout

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,19 @@
22
<CodeBlocks_layout_file>
33
<FileVersion major="1" minor="0" />
44
<ActiveTarget name="Debug" />
5-
<File name="..\src\regex.f90" open="1" top="1" tabpos="2" split="0" active="1" splitpos="0" zoom_1="4" zoom_2="0">
5+
<File name="..\src\regex.f90" open="1" top="1" tabpos="2" split="0" active="1" splitpos="0" zoom_1="1" zoom_2="0">
66
<Cursor>
7-
<Cursor1 position="15345" topLine="421" />
7+
<Cursor1 position="12313" topLine="321" />
88
</Cursor>
99
</File>
1010
<File name="..\test\testdata.f90" open="1" top="0" tabpos="3" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
1111
<Cursor>
12-
<Cursor1 position="672" topLine="21" />
12+
<Cursor1 position="1106" topLine="3" />
1313
</Cursor>
1414
</File>
1515
<File name="..\test\test_1.f90" open="1" top="0" tabpos="1" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
1616
<Cursor>
17-
<Cursor1 position="521" topLine="4" />
17+
<Cursor1 position="575" topLine="0" />
1818
</Cursor>
1919
</File>
2020
</CodeBlocks_layout_file>

project/regex_testdata.mod

-44 Bytes
Binary file not shown.

src/regex.f90

Lines changed: 88 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ end function matchdot
163163

164164
elemental logical function ismetachar(c)
165165
character(kind=RCK), intent(in) :: c
166-
ismetachar = index(c,"sSwWdD")>0
166+
ismetachar = index("sSwWdD",c)>0
167167
end function ismetachar
168168

169169
pure logical function matchmetachar(c, str)
@@ -204,18 +204,18 @@ end function isspace
204204
! Match range of the tye 0-9 or 5-7 etc.
205205
elemental logical function matchrange(c,str)
206206
character(kind=RCK), intent(in) :: c
207-
character(kind=RCK,len=*), intent(in) :: str
207+
character(kind=RCK,len=*), intent(in) :: str ! the range pattern
208208

209-
matchrange = len(str)>=3 .and. &
210-
c /= DASH &
209+
matchrange = len(str)>=3 &
210+
.and. c /= DASH &
211211
.and. str(1:1) /= DASH &
212212
.and. str(2:2) == DASH &
213-
.and. ichar(c)>=ichar(str(1:1)) &
214-
.and. ichar(c)<=ichar(str(3:3)) ! Range (number/letters) is in increasing order
213+
.and. iachar(c)>=iachar(str(1:1)) &
214+
.and. iachar(c)<=iachar(str(3:3)) ! Range (number/letters) is in increasing order
215215

216216
end function matchrange
217217

218-
elemental logical function matchcharclass(c,str) result(match)
218+
logical function matchcharclass(c,str) result(match)
219219
character(kind=RCK), intent(in) :: c ! The current character
220220
character(kind=RCK,len=*), intent(in) :: str ! The charclass contents
221221

@@ -224,30 +224,40 @@ elemental logical function matchcharclass(c,str) result(match)
224224
match = .false.
225225
i = 0
226226

227-
! All characters
227+
! All characters in the charclass contents
228228
loop: do while (i<len_trim(str))
229229

230230
i = i+1
231231

232232
! We're in a range: must check this further
233-
match = matchrange(c,str(i:)); if (match) return
233+
if (matchrange(c,str(i:))) then
234+
match = .true.
235+
return
234236

235237
! Escaped character? look what's next
236-
if (str(i:i) == '\') then
238+
elseif (str(i:i) == '\') then
237239

238240
i = i+1
239241

240242
! Valid escaped sequence
241-
match = matchmetachar(c,str(i:)); if (match) return
242243

243-
match = (c==str(i:i) .and. .not.ismetachar(c)); if (match) return
244+
if (matchmetachar(c,str(i:))) then
245+
match = .true.
246+
print *, 'match meta, c=',c
247+
return
248+
elseif (c==str(i:i) .and. (.not.ismetachar(c))) then
249+
match = .true.
250+
print *, 'match not meta, c=',c
251+
return
252+
endif
244253

245254
elseif (c==str(i:i)) then
246255

247256
! Character match
248257
if (c==DASH) then
249-
! If this is a range, there must be something both before and later
250-
match = i-1<=0 .or. i+1>len(str)
258+
! If this is a range, the character must be in this range, that we evaluate with the ASCII collating sequence
259+
match = i<=0 .or. i+1>len_trim(str)
260+
print *, 'c is dash! match=',match
251261
else
252262
match = .true.
253263
end if
@@ -256,54 +266,66 @@ elemental logical function matchcharclass(c,str) result(match)
256266

257267
end do loop
258268

269+
print *, 'charclass: no match on i=',i,' str=',trim(str),' c=',c
270+
259271
end function matchcharclass
260272

261273
logical function matchquestion(p, pattern, text, matchlength)
262274
type(regex_pattern), intent(in) :: p, pattern(:)
263275
character(len=*,kind=RCK), intent(in) :: text
264276
integer, intent(inout) :: matchlength
265277

266-
print *, 'question'
267-
268278
matchquestion = .false.
269279

270280
if (p%type == UNUSED) then
271281
matchquestion = .true.
282+
print *, 'unused -> match'
272283
return
273284
elseif (matchpattern(pattern, text, matchlength)) then
285+
print *, 'matchquestion 2: length=',matchlength,' match=.true.'
274286
matchquestion = .true.
275287
return
276-
elseif (len(text)>0 .and. pat_match(p,text)) then
277-
if (matchpattern(pattern,text(2:),matchlength)) then
278-
matchlength = matchlength+1
279-
matchquestion = .true.
280-
return
288+
elseif (len(text)>0) then
289+
if (pat_match(p,text) .and. len(text)>1) then
290+
if (matchpattern(pattern,text(2:),matchlength)) then
291+
matchlength = matchlength+1
292+
matchquestion = .true.
293+
return
294+
endif
281295
end if
282296
end if
283297

284298
end function matchquestion
285299

286-
logical function matchstar(p, pattern, text, matchlength)
300+
logical function matchstar(p, pattern, text, it0, matchlength)
287301
type(regex_pattern), intent(in) :: p, pattern(:)
288302
character(len=*,kind=RCK), intent(in) :: text
303+
integer, intent(in) :: it0 ! starting point
289304
integer, intent(inout) :: matchlength
290305

291306
integer :: prelen,it
292307

308+
print *, 'match star, length=',matchlength,' it0=',it0,' lenm=',len(text)
309+
310+
if (len(text)<=0) then
311+
matchstar = .false.
312+
return
313+
end if
314+
293315
! Save input variables
294316
prelen = matchlength
295-
it = 1
317+
it = it0
296318

297-
do while (it<=len(text) .and. pat_match(p, text(it:)))
319+
do while (it>0 .and. it<=len(text))
320+
if (.not.pat_match(p, text(it:))) exit
298321
it = it+1
299322
matchlength = matchlength+1
300323
end do
301324

302-
do while (it>=1)
325+
do while (it>=it0)
303326
matchstar = matchpattern(pattern, text(it:), matchlength)
304-
if (matchstar) return
305-
306327
it = it-1
328+
if (matchstar) return
307329
matchlength = matchlength-1
308330

309331
end do
@@ -313,23 +335,27 @@ logical function matchstar(p, pattern, text, matchlength)
313335

314336
end function matchstar
315337

316-
logical function matchplus(p, pattern, text, matchlength)
338+
logical function matchplus(p, pattern, text, it0, matchlength)
317339
type(regex_pattern), intent(in) :: p, pattern(:)
318340
character(len=*,kind=RCK), intent(in) :: text
341+
integer, intent(in) :: it0
319342
integer, intent(inout) :: matchlength
320343

321344
integer :: it
322345

323-
it = 1
324-
do while (it<=len(text) .and. pat_match(p, text(it:)))
346+
print *, 'matchplus'
347+
348+
it = it0
349+
do while (it>0 .and. it<=len(text))
350+
if (.not. pat_match(p, text(it:))) exit
325351
it = it+1
326352
matchlength = matchlength+1
327353
end do
328354

329-
do while (it>1)
355+
do while (it>it0)
330356
matchplus = matchpattern(pattern, text(it:), matchlength)
331-
if (matchplus) return
332357
it = it-1
358+
if (matchplus) return
333359
matchlength = matchlength-1
334360
end do
335361

@@ -342,8 +368,10 @@ integer function re_match(text, pattern, length) result(index)
342368
character(*,kind=RCK), intent(in) :: pattern
343369
character(*,kind=RCK), intent(in) :: text
344370
integer, intent(out) :: length
371+
type (regex_op) :: command
345372

346-
index = re_matchp(text,parse_pattern(pattern),length)
373+
command = parse_pattern(pattern)
374+
index = re_matchp(text,command,length)
347375

348376
end function re_match
349377

@@ -411,6 +439,8 @@ type(regex_op) function parse_pattern(pattern) result(this)
411439
! Character class
412440
case ('[')
413441

442+
loc = 1
443+
414444
! First, check if this class is negated ("^")
415445
if (pattern(i+1:i+1)=='^') then
416446
this%pattern(j)%type = INV_CHAR_CLASS
@@ -427,22 +457,26 @@ type(regex_op) function parse_pattern(pattern) result(this)
427457
this%pattern(j)%type = AT_CHAR_CLASS
428458
end if
429459

430-
! Copy characters inside [..] to buffer */
460+
! Remove any escape characters
431461
loc = index(pattern(i+1:),']')
432-
433462
if (loc>0) then
434463
ccl_buf = pattern(i+1:i+loc-1)
435464
i = i+loc
436465
if (DEBUG) print "('[regex] at end of multi-character pattern: ',a)", trim(ccl_buf)
437-
438466
else
439467
stop 'incomplete [] pattern'
440468
end if
441469

442-
! the only escaped character between brackets is \\
443-
! if present, replace double backslash with a single one
444-
loc = index(ccl_buf,'\\')
445-
if (loc>0) ccl_buf = ccl_buf(:loc)//ccl_buf(loc+2:)
470+
! If there is any escape character(s), just check that the next is nonempty
471+
loc = index(ccl_buf,'\')
472+
if (loc>0) then
473+
if (loc>=len(ccl_buf)) then
474+
stop 'incomplete escaped character inside [] pattern'
475+
end if
476+
if (ccl_buf(loc+1:loc+1)==SPACE) then
477+
stop 'empty escaped character inside [] pattern'
478+
end if
479+
end if
446480

447481
! Terminate string
448482
this%pattern(j)%ccl = trim(ccl_buf)
@@ -520,16 +554,16 @@ integer function re_matchp(text, pattern, matchlength) result(index)
520554
if (pattern%pattern(1)%type == BEGIN_WITH) then
521555

522556
! String must begin with this pattern
523-
index = merge(1,0,matchpattern([pattern%pattern(2)], text, matchlength))
524-
!print *, 'begin with? index = ',index
557+
index = merge(1,0,matchpattern(pattern%pattern(2:), text, matchlength))
558+
print *, 'begin with? index = ',index
525559

526560
else
527561

528562
do index=1,len(text)
529563
if (matchpattern(pattern%pattern,text(index:),matchlength)) return
530564
end do
531565

532-
!print *, 'all patterns not matched'
566+
print *, 'all patterns not matched, ',text
533567

534568
index = 0
535569

@@ -561,39 +595,44 @@ logical function matchpattern(pattern, text, matchlength) result(match)
561595
integer :: pre,ip,it
562596

563597
pre = matchlength
564-
print *, 'initial length ',pre
565598
ip = 1
566599
it = 1
567600

568601
iterate: do while (ip<=size(pattern))
569602

603+
print *, 'trying pattern ',pattern(ip)%print(),' on character ',text(it:)
604+
570605
if (pattern(ip)%type == UNUSED .or. pattern(ip+1)%type == QUESTIONMARK) then
571606

607+
print *, 'before matchquestion = ',match,' length=',matchlength
572608
match = matchquestion(pattern(ip),pattern(ip+2:),text(it:),matchlength)
609+
print *, 'matchquestion = ',match,' length=',matchlength
573610
return
574611

575612
elseif (pattern(ip+1)%type == STAR) then
576613

577-
match = matchstar(pattern(ip),pattern(ip+2:), text(it:), matchlength)
614+
match = matchstar(pattern(ip),pattern(ip+2:), text, it, matchlength)
578615
return
579616

580617
elseif (pattern(ip+1)%type == PLUS) then
581618

582-
match = matchplus(pattern(ip),pattern(ip+2:), text(it:), matchlength)
619+
match = matchplus(pattern(ip),pattern(ip+2:), text, it, matchlength)
583620
return
584621

585622
elseif (pattern(ip)%type == END_WITH .and. pattern(ip+1)%type == UNUSED) then
586623

587-
match = len(text(it:))<=0
624+
print *, 'end pattern, len(text)',len(text(it:))
625+
626+
match = len(text(it:))<=1
588627
return
589628

590629
end if
591630

592631
matchlength = matchlength+1
593632

594-
if (it>=len(text)) exit iterate
633+
if (it>len(text)) exit iterate
634+
print *, 'pat match with ip=',ip,' it=',it,' len=',len(text),' matchlenght=',matchlength
595635
if (.not. pat_match(pattern(ip), text(it:it))) exit iterate
596-
597636
ip = ip+1
598637
it = it+1
599638

test/test_1.f90

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ program tests
1818
do i=1,size(test1data,2)
1919
call get_test1(i,valid,pattern,str,length)
2020

21-
call add_test(test1(valid,pattern,str,length))
21+
call add_test(test1(valid,pattern,trim(str),length))
2222

2323
if (nfailed>0) stop 'test failed'
2424

0 commit comments

Comments
 (0)