@@ -20,7 +20,6 @@ module regex_module
2020 private
2121
2222 public :: parse_pattern
23- ! public :: re_matchp
2423 public :: regex
2524
2625 ! Character kind
@@ -29,7 +28,7 @@ module regex_module
2928 logical , parameter , public :: RE_DOT_MATCHES_NEWLINE = .true. ! Define .false. if you DON'T want '.' to match '\r' + '\n'
3029 integer , parameter , public :: MAX_REGEXP_OBJECTS = 512 ! Max number of regex symbols in expression.
3130 integer , parameter , public :: MAX_CHAR_CLASS_LEN = 1024 ! Max length of character-class buffer in.
32- logical , parameter :: DEBUG = .false .
31+ logical , parameter :: DEBUG = .true .
3332
3433 ! Supported patterns
3534 integer , parameter :: UNUSED = 0
@@ -60,9 +59,9 @@ module regex_module
6059 character (kind= RCK), parameter :: UNDERSCORE = " _"
6160 character (kind= RCK), parameter :: SPACE = " "
6261 character (kind= RCK), parameter :: DASH = " -"
63- character (kind= RCK), parameter :: BACKSLASH0 = achar (0 ,kind= RCK) ! \0 or null character
64- character (kind= RCK), parameter :: NEWLINE = achar (10 ,kind= RCK) ! \n or line feed
65- character (kind= RCK), parameter :: BACKSPCE = achar (8 ,kind= RCK) ! \b or backspace character
62+ character (kind= RCK), parameter :: BACKSLASH0 = achar ( 0 ,kind= RCK) ! \0 or null character
63+ character (kind= RCK), parameter :: NEWLINE = achar (10 ,kind= RCK) ! \n or line feed
64+ character (kind= RCK), parameter :: BACKSPCE = achar ( 8 ,kind= RCK) ! \b or backspace character
6665
6766
6867 ! Regex pattern element
@@ -184,12 +183,12 @@ end function matchmetachar
184183
185184 elemental logical function isdigit(c)
186185 character (kind= RCK), intent (in ) :: c
187- isdigit = index (c, " 01234567890 " )>0
186+ isdigit = index (" 1234567890 " ,c )>0
188187 end function isdigit
189188
190189 elemental logical function isalpha(c)
191190 character (kind= RCK), intent (in ) :: c
192- isalpha = index (c, lowercase)>0 .or. index (c, uppercase)>0
191+ isalpha = index (lowercase,c )>0 .or. index (uppercase,c )>0
193192 end function isalpha
194193
195194 elemental logical function isalphanum(c)
@@ -216,44 +215,45 @@ elemental logical function matchrange(c,str)
216215
217216 end function matchrange
218217
219- elemental logical function matchcharclass(c,str)
220- character (kind= RCK), intent (in ) :: c
221- character (kind= RCK,len=* ), intent (in ) :: str
218+ elemental logical function matchcharclass(c,str) result(match)
219+ character (kind= RCK), intent (in ) :: c ! The current character
220+ character (kind= RCK,len=* ), intent (in ) :: str ! The charclass contents
222221
223222 integer :: i
224223
225- matchcharclass = .false.
224+ match = .false.
226225 i = 0
227226
228- loop: do
227+ ! All characters
228+ loop: do while (i< len_trim (str))
229229
230230 i = i+1
231231
232- if (matchrange(c,str(i:))) then
233- matchcharclass = .true.
234- return
235- elseif (str(i:i+1 ) == BACKSLASH0) then
236- ! Escape-char: increment str-ptr and match on next char
232+ ! We're in a range: must check this further
233+ match = matchrange(c,str(i:)); if (match) return
234+
235+ ! Escaped character? look what's next
236+ if (str(i:i) == ' \' ) then
237+
237238 i = i+1
238239
239- if (matchmetachar(c,str(i:))) then
240- matchcharclass = .true.
241- return
242- elseif (c== str(i:i) .and. .not. ismetachar(c)) then
243- matchcharclass = .true.
244- return
245- end if
240+ ! Valid escaped sequence
241+ match = matchmetachar(c,str(i:)); if (match) return
242+
243+ match = (c== str(i:i) .and. .not. ismetachar(c)); if (match) return
244+
246245 elseif (c== str(i:i)) then
246+
247+ ! Character match
247248 if (c== DASH) then
248- matchcharclass = str(i-1 :i-1 )==BACKSLASH0 .or. str(i+1 :i+1 )==BACKSLASH0
249+ ! If this is a range, there must be something both before and later
250+ match = i-1 <= 0 .or. i+1 > len (str)
249251 else
250- matchcharclass = .true.
252+ match = .true.
251253 end if
252254 return
253255 end if
254256
255- if (i== len (str)) exit loop
256-
257257 end do loop
258258
259259 end function matchcharclass
@@ -263,12 +263,17 @@ logical function matchquestion(p, pattern, text, matchlength)
263263 character (len=* ,kind= RCK), intent (in ) :: text
264264 integer , intent (inout ) :: matchlength
265265
266+ print * , ' question'
267+
266268 matchquestion = .false.
267269
268- if (p% type == UNUSED .or. matchpattern(pattern, text, matchlength) ) then
270+ if (p% type == UNUSED) then
269271 matchquestion = .true.
270272 return
271- elseif (len (text)>0 .and. pat_match(p,text(2 :))) then
273+ elseif (matchpattern(pattern, text, matchlength)) then
274+ matchquestion = .true.
275+ return
276+ elseif (len (text)>0 .and. pat_match(p,text)) then
272277 if (matchpattern(pattern,text(2 :),matchlength)) then
273278 matchlength = matchlength+1
274279 matchquestion = .true.
@@ -333,12 +338,12 @@ logical function matchplus(p, pattern, text, matchlength)
333338 end function matchplus
334339
335340 ! Find matches of the given pattern in the string
336- integer function re_match (pattern , text , length ) result(index)
341+ integer function re_match (text , pattern , length ) result(index)
337342 character (* ,kind= RCK), intent (in ) :: pattern
338343 character (* ,kind= RCK), intent (in ) :: text
339344 integer , intent (out ) :: length
340345
341- index = re_matchp(parse_pattern(pattern),text ,length)
346+ index = re_matchp(text, parse_pattern(pattern),length)
342347
343348 end function re_match
344349
@@ -353,7 +358,7 @@ type(regex_op) function parse_pattern(pattern) result(this)
353358 ! Initialize class
354359 call this% destroy()
355360
356- if (DEBUG) print * , " [regex] parsing pattern: <" // pattern // " > "
361+ if (DEBUG) print " (' [regex] parsing pattern: <',a,'>') " , trim (pattern)
357362
358363 i = 1 ! index in pattern
359364 j = 1 ! index in re-compiled
@@ -363,7 +368,7 @@ type(regex_op) function parse_pattern(pattern) result(this)
363368 to_the_moon: do while (i<= lenp)
364369
365370 c = pattern(i:i)
366- if (DEBUG) print * , " [regex] at location " ,i ,' : <' ,c ,' >'
371+ if (DEBUG) print " (' [regex] at location ',i0 ,': <',a ,'>') " , i, c
367372
368373 select case (c)
369374
@@ -413,7 +418,7 @@ type(regex_op) function parse_pattern(pattern) result(this)
413418 i = i+1 ! Increment i to avoid including "^" in the char-buffer
414419
415420 ! Check this is not an incomplete pattern
416- if (pattern(i +1 :i +1 )==BACKSLASH0 ) then
421+ if (i >= lenp ) then
417422 stop ' incomplete pattern'
418423 return
419424 end if
@@ -427,7 +432,9 @@ type(regex_op) function parse_pattern(pattern) result(this)
427432
428433 if (loc> 0 ) then
429434 ccl_buf = pattern(i+1 :i+ loc-1 )
430- i = i+ loc+1
435+ i = i+ loc
436+ if (DEBUG) print " ('[regex] at end of multi-character pattern: ',a)" , trim (ccl_buf)
437+
431438 else
432439 stop ' incomplete [] pattern'
433440 end if
@@ -447,7 +454,7 @@ type(regex_op) function parse_pattern(pattern) result(this)
447454
448455 end select
449456
450- if (DEBUG) print * , " [regex] added pattern " ,j ,' : ' ,this% pattern(j)% print ()
457+ if (DEBUG) print " (' [regex] added pattern ',i0 ,': ',a) " ,j ,this% pattern(j)% print ()
451458
452459 ! A pattern was added: move to next
453460 i = i+1
@@ -468,7 +475,7 @@ function print_pattern(pattern) result(msg)
468475 character (len= MAX_CHAR_CLASS_LEN,kind= RCK) :: buffer
469476 integer :: lt
470477
471- write (buffer,1 ) types(pattern% type+1 ),trim (pattern% ccl)
478+ write (buffer,1 ) trim ( types(pattern% type+1 ) ),trim (pattern% ccl)
472479
473480 lt = len_trim (buffer)
474481 allocate (character (len= lt,kind= RCK) :: msg)
@@ -479,7 +486,7 @@ function print_pattern(pattern) result(msg)
479486 end function print_pattern
480487
481488 ! Match a single pattern at the g
482- elemental logical function pat_match(p, c) result(match)
489+ logical function pat_match (p , c ) result(match)
483490 class(regex_pattern), intent (in ) :: p
484491 character (kind= RCK), intent (in ) :: c
485492
@@ -496,10 +503,12 @@ elemental logical function pat_match(p, c) result(match)
496503 case default ; match = c== p% ccl(1 :1 )
497504 end select
498505
506+ if (DEBUG) print " ('[regex] current pattern=',a,' at char=',a,' match? ',l1)" , p% print (),c,match
507+
499508 end function pat_match
500509
501510
502- integer function re_matchp (pattern , text , matchlength ) result(index)
511+ integer function re_matchp (text , pattern , matchlength ) result(index)
503512 type (regex_op) :: pattern
504513 character (len=* ,kind= RCK), intent (in ) :: text
505514 integer , intent (out ) :: matchlength
@@ -552,6 +561,7 @@ logical function matchpattern(pattern, text, matchlength) result(match)
552561 integer :: pre,ip,it
553562
554563 pre = matchlength
564+ print * , ' initial length ' ,pre
555565 ip = 1
556566 it = 1
557567
@@ -581,10 +591,11 @@ logical function matchpattern(pattern, text, matchlength) result(match)
581591
582592 matchlength = matchlength+1
583593
594+ if (it>= len (text)) exit iterate
595+ if (.not. pat_match(pattern(ip), text(it:it))) exit iterate
596+
584597 ip = ip+1
585598 it = it+1
586- if (it> len (text)) exit iterate
587- if (.not. pat_match(pattern(ip), text(it:it))) exit iterate
588599
589600 end do iterate
590601
0 commit comments