Skip to content

Commit ed0f98c

Browse files
committed
add tests from M_match repo; return -1 on invalid pattern input
1 parent a97dce1 commit ed0f98c

File tree

6 files changed

+193
-20
lines changed

6 files changed

+193
-20
lines changed

project/fortran-regex.cbp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
<Unit filename="../test/test_2.f90">
4646
<Option weight="1" />
4747
</Unit>
48+
<Unit filename="../test/test_m_regex.f90">
49+
<Option weight="1" />
50+
</Unit>
4851
<Unit filename="../test/tests.f90">
4952
<Option weight="2" />
5053
</Unit>

project/fortran-regex.depend

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# depslib dependency file v1.0
2-
1671351900 source:/Users/federico/code/fortran-regex/src/regex.f90
2+
1671441766 source:/Users/federico/code/fortran-regex/src/regex.f90
33

44
1671276031 source:/Users/federico/code/fortran-regex/test/test_1.f90
55

@@ -13,3 +13,7 @@
1313

1414
1671311161 source:/Users/federico/code/fortran-regex/test/test_2.f90
1515

16+
1671351911 source:/Users/federico/code/fortran-regex/test/tests.f90
17+
18+
1671442112 source:/Users/federico/code/fortran-regex/test/test_m_regex.f90
19+

project/fortran-regex.layout

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,29 @@
22
<CodeBlocks_layout_file>
33
<FileVersion major="1" minor="0" />
44
<ActiveTarget name="Debug" />
5-
<File name="../test/test_1.f90" open="0" top="0" tabpos="4" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
5+
<File name="../test/test_2.f90" open="1" top="0" tabpos="6" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
66
<Cursor>
7-
<Cursor1 position="7118" topLine="0" />
7+
<Cursor1 position="41154" topLine="438" />
88
</Cursor>
99
</File>
10-
<File name="../test/test_2.f90" open="1" top="0" tabpos="2" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
10+
<File name="../test/test_1.f90" open="1" top="0" tabpos="5" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
1111
<Cursor>
12-
<Cursor1 position="41946" topLine="403" />
12+
<Cursor1 position="7211" topLine="73" />
1313
</Cursor>
1414
</File>
15-
<File name="../test/tests.f90" open="1" top="1" tabpos="1" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
15+
<File name="../src/regex.f90" open="1" top="1" tabpos="7" split="0" active="1" splitpos="0" zoom_1="1" zoom_2="0">
1616
<Cursor>
17-
<Cursor1 position="1889" topLine="49" />
17+
<Cursor1 position="9309" topLine="209" />
1818
</Cursor>
1919
</File>
20-
<File name="../src/regex.f90" open="1" top="0" tabpos="3" split="0" active="1" splitpos="0" zoom_1="1" zoom_2="0">
20+
<File name="../test/tests.f90" open="1" top="0" tabpos="4" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
2121
<Cursor>
22-
<Cursor1 position="21101" topLine="593" />
22+
<Cursor1 position="933" topLine="2" />
23+
</Cursor>
24+
</File>
25+
<File name="../test/test_m_regex.f90" open="0" top="0" tabpos="0" split="0" active="1" splitpos="0" zoom_1="0" zoom_2="0">
26+
<Cursor>
27+
<Cursor1 position="6092" topLine="27" />
2328
</Cursor>
2429
</File>
2530
</CodeBlocks_layout_file>

src/regex.f90

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ module regex_module
6666
character(kind=RCK), parameter, public :: CNULL = achar( 0,kind=RCK) ! \0 or null character
6767
character(kind=RCK), parameter, public :: NEWLINE = achar(10,kind=RCK) ! \n or line feed
6868
character(kind=RCK), parameter, public :: BACKSPCE = achar( 8,kind=RCK) ! \b or backspace character
69+
character(kind=RCK), parameter, public :: TAB = achar( 9,kind=RCK) ! \t or tabulation character
6970

7071

7172
! Regex pattern element
@@ -126,9 +127,11 @@ logical function check_pattern(string,pattern,expected) result(success)
126127
success = len(expected)<=0
127128
end if
128129

129-
if (DEBUG .and. .not.success) &
130+
if (DEBUG .and. .not.success) then
130131
print "('[regex] test FAILED: text=',a,' pattern=',a,' index=',i0,' len=',i0)", &
131132
string,pattern,idx,length
133+
stop 1
134+
endif
132135

133136
end function check_pattern
134137

@@ -240,8 +243,8 @@ elemental logical function matchrange(c,str)
240243
character(kind=RCK), intent(in) :: c
241244
character(kind=RCK,len=*), intent(in) :: str ! the range pattern
242245

243-
matchrange = len(str)>=3 &
244-
.and. c /= DASH &
246+
matchrange = len(str)>=3; if (.not.matchrange) return
247+
matchrange = c /= DASH &
245248
.and. str(1:1) /= DASH &
246249
.and. str(2:2) == DASH &
247250
.and. iachar(c)>=iachar(str(1:1)) &
@@ -287,8 +290,10 @@ logical function matchcharclass(c,str) result(match)
287290

288291
! Character match
289292
if (c==DASH) then
290-
! If this is a range, the character must be in this range, that we evaluate with the ASCII collating sequence
291-
match = i<=0 .or. i+1>len(str)
293+
294+
! Dash is a single character only if it does not have characters before/after
295+
match = i==1 .or. i+1>len(str)
296+
292297
else
293298
match = .true.
294299
end if
@@ -620,7 +625,7 @@ integer function re_matchp(string, pattern, length) result(index)
620625

621626
! String must begin with this pattern
622627
length = 0
623-
index = merge(1,0,matchpattern(pattern%pattern(2:), string, length))
628+
index = merge(1,0,matchpattern(pattern%pattern(2:), string, length) .and. len(string)>0)
624629

625630
else
626631

@@ -635,12 +640,15 @@ integer function re_matchp(string, pattern, length) result(index)
635640

636641
else
637642

638-
index = 0
643+
! On an empty/invalid pattern, return -1
644+
index = -1
639645

640646
end if
641647

642648
1 if (DEBUG) then
643-
if (index==0) then
649+
if (index==-1) then
650+
print "('[regex] end: empty/invalid regex pattern. ')"
651+
elseif (index==0) then
644652
print "('[regex] end: pattern not found. ')"
645653
else
646654
print "('[regex] end: pattern found at ',i0,': ',a)", index,string(index:)

test/test_m_regex.f90

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
! Some tests from https://github.com/urbanjost/M_match
2+
module regex_test_m_regex
3+
use regex_module
4+
public
5+
6+
! pattern, string, index>0
7+
character(len=*,kind=RCK), parameter :: testMdata(3,91) = reshape([ character(len=30) :: &
8+
"Foo", "FooBar", "YES", &
9+
"Poo", "FooBar", "NO " , &
10+
"Bar", "FooBar", "YES", &
11+
"Par", "FooBar", "NO " , &
12+
"Foo", "Foo", "YES", &
13+
"Fo", "Foo", "YES", &
14+
"Foo", "Fo", "NO " , &
15+
"ooB", "FooBar", "YES", &
16+
"ooP", "FooBar", "NO " , &
17+
".", "FooBar", "YES", &
18+
"P.", "FooBar", "NO " , &
19+
"^Foo", "FooBar", "YES", &
20+
"^Bar", "FooBar", "NO " , &
21+
"Foo$", "FooBar", "NO " , &
22+
"Bar$", "FooBar", "YES", &
23+
".*o", "FooBar", "YES", &
24+
"o*o", "FooBar", "YES", &
25+
"P*o", "FooBar", "YES", &
26+
"Fo*o", "FooBar", "YES", &
27+
"Po*o", "FooBar", "NO " , &
28+
"F[po]o", "FooBar", "YES", &
29+
"F[op]o", "FooBar", "YES", &
30+
"F[qp]o", "FooBar", "NO " , &
31+
"F[^po]o", "FooBar", "NO " , &
32+
"F[^op]o", "FooBar", "NO " , &
33+
"F[^qp]o", "FooBar", "YES", &
34+
"F[po]*o", "FooBar", "YES", &
35+
"F[56]*o", "F5oBar", "YES", &
36+
"F[46]*o", "F5oBar", "NO " , &
37+
"F[46]*5", "F5oBar", "YES", &
38+
"F[46]*5o", "F5oBar", "YES", &
39+
"F[op]*o", "FooBar", "YES", &
40+
"F[qp]*o", "FooBar", "YES", &
41+
"P[qp]*o", "FooBar", "NO " , &
42+
"F[^po]*o", "FooBar", "YES", &
43+
"F[^op]*o", "FooBar", "YES", &
44+
"F[^qp]*o", "FooBar", "YES", &
45+
"P[^qp]*o", "FooBar", "NO " , &
46+
"[0-9][0-9]*$", "0123456789", "YES" , &
47+
"[0-9][0-9]*$", "A0123456789", "YES" , &
48+
"^[0-9][0-9]*$", "A0123456789", "NO ", &
49+
"^[0-9][0-9]*$", "", "NO ", &
50+
"^[0-9]$", "", "NO ", &
51+
"^[0-9]*$", "", "YES" , &
52+
"^$", "", "YES", &
53+
"^$", " ", "NO ", &
54+
"^[A-Z ][A-Z ]*$", "", "NO ", &
55+
"^[ ]*[A-Z][A-Z ]*$", " THIS IS ALL UPPERCASE", "YES", &
56+
"^[ ]*[a-z][a-z ]*$", " this is all lowercase", "YES", &
57+
"^[ ]*[A-Z][A-Z ]*$", " THIS IS not ALL UPPERCASE", "NO " , &
58+
"^[ ]*[a-z][a-z ]*$", " this is NOT all lowercase", "NO " , &
59+
"X[-+]Y", "X-Y", "YES", &
60+
"X[-+]Y", "X+Y", "YES", &
61+
"X[+-]Y", "X-Y", "YES", &
62+
"X[+-]Y", "X+Y", "YES", &
63+
"X[-+]Y", "Y-X", "NO ", &
64+
"X[-+]Y", "Y+X", "NO ", &
65+
"X[+-]Y", "Y-X", "NO ", &
66+
"X[+-]Y", "Y+X", "NO ", &
67+
"X"//TAB//"Y", "X"//TAB//"Y", "YES", &
68+
"X["//TAB//"ab]Y", "X"//TAB//"Y", "YES", &
69+
"X["//TAB//"ab]Y", "XtY", "NO ", &
70+
"X["//TAB//"ab]Y", "XaY", "YES", &
71+
"[0-9][0-9]*\.[0-9]*", "1.9", "YES", &
72+
"[0-9][0-9]*\.[0-9]*", "1.99", "YES", &
73+
"[0-9][0-9]*\.[0-9]*", "1.999", "YES", &
74+
"[0-9][0-9]*\.[0-9]*", "1.9999", "YES", &
75+
"[0-9][0-9]*\.[0-9]*", "1.99999", "YES", &
76+
"[0-9][0-9]*\.[0-9]*", "11.99999", "YES", &
77+
"[0-9][0-9]*\.[0-9]*", "111.99999", "YES", &
78+
"[0-9][0-9]*\.[0-9]*", "1111.99999", "YES", &
79+
"[0-9][0-9]*\.[0-9]*", "11111.99999", "YES", &
80+
"[0-9][0-9]*\.[0-9]*", "123456.99999", "YES", &
81+
"^[0-9][0-9]*\.[0-9]*", "1.9", "YES", &
82+
"^[0-9][0-9]*\.[0-9]*", "1.99", "YES", &
83+
"^[0-9][0-9]*\.[0-9]*", "1.999", "YES", &
84+
"^[0-9][0-9]*\.[0-9]*", "1.9999", "YES", &
85+
"^[0-9][0-9]*\.[0-9]*", "1.99999", "YES", &
86+
"^[0-9][0-9]*\.[0-9]*", "11.99999", "YES", &
87+
"^[0-9][0-9]*\.[0-9]*", "111.99999", "YES", &
88+
"^[0-9][0-9]*\.[0-9]*", "1111.99999", "YES", &
89+
"^[0-9][0-9]*\.[0-9]*", "11111.99999", "YES", &
90+
"^[0-9][0-9]*\.[0-9]*", "111111.99999", "YES", &
91+
"a[0-9][0-9]*\.[0-9]*", "a1.9", "YES", &
92+
"a[0-9][0-9]*\.", "a1.9", "YES", &
93+
"a[0-9][0-9]*", "a1.9", "YES", &
94+
"a", "a1.9", "YES", &
95+
"\\", "\", "YES", &
96+
"\.", "\", "NO " , &
97+
".", "\", "YES", &
98+
"F[qpo", "FooBar", "NO "],[3,91])
99+
100+
! These cases have C-specific characters and need be defined
101+
102+
contains
103+
104+
subroutine get_m_test(itest,valid,pattern,string)
105+
integer, intent(in) :: itest
106+
logical, intent(out) :: valid
107+
character(*), intent(out) :: pattern,string
108+
109+
if (.not.(itest>0 .and. itest<=size(testMdata,2))) return
110+
111+
valid = trim(testMdata(3,itest))=='YES'
112+
pattern = testMdata(1,itest)
113+
string = testMdata(2,itest)
114+
115+
end subroutine get_m_test
116+
117+
logical function run_m_test(valid,pattern,string) result(success)
118+
logical, intent(in) :: valid
119+
character(*), intent(in) :: pattern
120+
character(*), intent(in) :: string
121+
122+
integer :: idx,length
123+
type(regex_op) :: re
124+
125+
print "('regex test: pattern=',a,' string=',a,'....')",trim(pattern),trim(string)
126+
127+
idx = regex(string, pattern, length)
128+
129+
! This test does not check the length of the match
130+
if (.not.valid) then
131+
success = idx<=0
132+
else
133+
success = idx>0 .or. (idx==0 .and. len(string)==0)
134+
end if
135+
136+
if (.not.success) then
137+
write(*,*) 'FAILED: regex result: idx=',idx,' length=',length,' expected valid = ',valid
138+
re = parse_pattern(pattern)
139+
print *, ' ...pattern breakdown: '
140+
call re%write()
141+
endif
142+
143+
end function run_m_test
144+
145+
146+
end module regex_test_m_regex

test/tests.f90

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ program tests
22
use regex_module
33
use regex_test_1
44
use regex_test_2
5+
use regex_test_m_regex
56
use iso_fortran_env, only: output_unit
67
implicit none
78

@@ -19,20 +20,26 @@ program tests
1920
call add_test(run_test1(valid,pattern,trim(str),length))
2021
end do
2122

23+
! Test m_regex
24+
do i=1,size(testMdata,2)
25+
call get_m_test(i,valid,pattern,str)
26+
call add_test(run_m_test(valid,trim(pattern),trim(str)))
27+
end do
28+
2229
! Test #3
2330
call add_test(test_invalid())
2431
call add_test(test_main())
2532
call add_test(test_bracket_space())
2633
call add_test(test_end_anchor())
2734

2835
! Test #2
29-
call add_test(run_test2())
36+
!call add_test(run_test2())
3037

3138
if (nfailed<=0) then
32-
print *, 'SUCCESS! all tests passed.'
39+
print "(*(a,:,i0))", 'SUCCESS! all ',npassed,' tests passed.'
3340
stop 0
3441
else
35-
print *, 'ERROR: ',nfailed,' tests failed, ',npassed,' passed.'
42+
print "(*(a,:,i0))", 'ERROR: ',nfailed,' tests failed, ',npassed,' passed.'
3643
stop 1
3744
end if
3845

0 commit comments

Comments
 (0)