@@ -163,7 +163,7 @@ end function matchdot
163163
164164 elemental logical function ismetachar(c)
165165 character (kind= RCK), intent (in ) :: c
166- ismetachar = index (c, " sSwWdD" )>0
166+ ismetachar = index (" sSwWdD" ,c )>0
167167 end function ismetachar
168168
169169 pure logical function matchmetachar(c, str)
@@ -204,18 +204,18 @@ end function isspace
204204 ! Match range of the tye 0-9 or 5-7 etc.
205205 elemental logical function matchrange(c,str)
206206 character (kind= RCK), intent (in ) :: c
207- character (kind= RCK,len=* ), intent (in ) :: str
207+ character (kind= RCK,len=* ), intent (in ) :: str ! the range pattern
208208
209- matchrange = len (str)>= 3 .and. &
210- c /= DASH &
209+ matchrange = len (str)>= 3 &
210+ .and. c /= DASH &
211211 .and. str(1 :1 ) /= DASH &
212212 .and. str(2 :2 ) == DASH &
213- .and. ichar (c)>= ichar (str(1 :1 )) &
214- .and. ichar (c)<= ichar (str(3 :3 )) ! Range (number/letters) is in increasing order
213+ .and. iachar (c)>= iachar (str(1 :1 )) &
214+ .and. iachar (c)<= iachar (str(3 :3 )) ! Range (number/letters) is in increasing order
215215
216216 end function matchrange
217217
218- elemental logical function matchcharclass(c,str) result(match)
218+ logical function matchcharclass (c ,str ) result(match)
219219 character (kind= RCK), intent (in ) :: c ! The current character
220220 character (kind= RCK,len=* ), intent (in ) :: str ! The charclass contents
221221
@@ -224,30 +224,40 @@ elemental logical function matchcharclass(c,str) result(match)
224224 match = .false.
225225 i = 0
226226
227- ! All characters
227+ ! All characters in the charclass contents
228228 loop: do while (i< len_trim (str))
229229
230230 i = i+1
231231
232232 ! We're in a range: must check this further
233- match = matchrange(c,str(i:)); if (match) return
233+ if (matchrange(c,str(i:))) then
234+ match = .true.
235+ return
234236
235237 ! Escaped character? look what's next
236- if (str(i:i) == ' \' ) then
238+ elseif (str(i:i) == ' \' ) then
237239
238240 i = i+1
239241
240242 ! Valid escaped sequence
241- match = matchmetachar(c,str(i:)); if (match) return
242243
243- match = (c== str(i:i) .and. .not. ismetachar(c)); if (match) return
244+ if (matchmetachar(c,str(i:))) then
245+ match = .true.
246+ print * , ' match meta, c=' ,c
247+ return
248+ elseif (c== str(i:i) .and. (.not. ismetachar(c))) then
249+ match = .true.
250+ print * , ' match not meta, c=' ,c
251+ return
252+ endif
244253
245254 elseif (c== str(i:i)) then
246255
247256 ! Character match
248257 if (c== DASH) then
249- ! If this is a range, there must be something both before and later
250- match = i-1 <= 0 .or. i+1 > len (str)
258+ ! If this is a range, the character must be in this range, that we evaluate with the ASCII collating sequence
259+ match = i<= 0 .or. i+1 > len_trim (str)
260+ print * , ' c is dash! match=' ,match
251261 else
252262 match = .true.
253263 end if
@@ -256,54 +266,66 @@ elemental logical function matchcharclass(c,str) result(match)
256266
257267 end do loop
258268
269+ print * , ' charclass: no match on i=' ,i,' str=' ,trim (str),' c=' ,c
270+
259271 end function matchcharclass
260272
261273 logical function matchquestion (p , pattern , text , matchlength )
262274 type (regex_pattern), intent (in ) :: p, pattern(:)
263275 character (len=* ,kind= RCK), intent (in ) :: text
264276 integer , intent (inout ) :: matchlength
265277
266- print * , ' question'
267-
268278 matchquestion = .false.
269279
270280 if (p% type == UNUSED) then
271281 matchquestion = .true.
282+ print * , ' unused -> match'
272283 return
273284 elseif (matchpattern(pattern, text, matchlength)) then
285+ print * , ' matchquestion 2: length=' ,matchlength,' match=.true.'
274286 matchquestion = .true.
275287 return
276- elseif (len (text)>0 .and. pat_match(p,text)) then
277- if (matchpattern(pattern,text(2 :),matchlength)) then
278- matchlength = matchlength+1
279- matchquestion = .true.
280- return
288+ elseif (len (text)>0 ) then
289+ if (pat_match(p,text) .and. len (text)>1 ) then
290+ if (matchpattern(pattern,text(2 :),matchlength)) then
291+ matchlength = matchlength+1
292+ matchquestion = .true.
293+ return
294+ endif
281295 end if
282296 end if
283297
284298 end function matchquestion
285299
286- logical function matchstar (p , pattern , text , matchlength )
300+ logical function matchstar (p , pattern , text , it0 , matchlength )
287301 type (regex_pattern), intent (in ) :: p, pattern(:)
288302 character (len=* ,kind= RCK), intent (in ) :: text
303+ integer , intent (in ) :: it0 ! starting point
289304 integer , intent (inout ) :: matchlength
290305
291306 integer :: prelen,it
292307
308+ print * , ' match star, length=' ,matchlength,' it0=' ,it0,' lenm=' ,len (text)
309+
310+ if (len (text)<= 0 ) then
311+ matchstar = .false.
312+ return
313+ end if
314+
293315 ! Save input variables
294316 prelen = matchlength
295- it = 1
317+ it = it0
296318
297- do while (it<= len (text) .and. pat_match(p, text(it:)))
319+ do while (it> 0 .and. it<= len (text))
320+ if (.not. pat_match(p, text(it:))) exit
298321 it = it+1
299322 matchlength = matchlength+1
300323 end do
301324
302- do while (it>= 1 )
325+ do while (it>= it0 )
303326 matchstar = matchpattern(pattern, text(it:), matchlength)
304- if (matchstar) return
305-
306327 it = it-1
328+ if (matchstar) return
307329 matchlength = matchlength-1
308330
309331 end do
@@ -313,23 +335,27 @@ logical function matchstar(p, pattern, text, matchlength)
313335
314336 end function matchstar
315337
316- logical function matchplus (p , pattern , text , matchlength )
338+ logical function matchplus (p , pattern , text , it0 , matchlength )
317339 type (regex_pattern), intent (in ) :: p, pattern(:)
318340 character (len=* ,kind= RCK), intent (in ) :: text
341+ integer , intent (in ) :: it0
319342 integer , intent (inout ) :: matchlength
320343
321344 integer :: it
322345
323- it = 1
324- do while (it<= len (text) .and. pat_match(p, text(it:)))
346+ print * , ' matchplus'
347+
348+ it = it0
349+ do while (it> 0 .and. it<= len (text))
350+ if (.not. pat_match(p, text(it:))) exit
325351 it = it+1
326352 matchlength = matchlength+1
327353 end do
328354
329- do while (it> 1 )
355+ do while (it> it0 )
330356 matchplus = matchpattern(pattern, text(it:), matchlength)
331- if (matchplus) return
332357 it = it-1
358+ if (matchplus) return
333359 matchlength = matchlength-1
334360 end do
335361
@@ -342,8 +368,10 @@ integer function re_match(text, pattern, length) result(index)
342368 character (* ,kind= RCK), intent (in ) :: pattern
343369 character (* ,kind= RCK), intent (in ) :: text
344370 integer , intent (out ) :: length
371+ type (regex_op) :: command
345372
346- index = re_matchp(text,parse_pattern(pattern),length)
373+ command = parse_pattern(pattern)
374+ index = re_matchp(text,command,length)
347375
348376 end function re_match
349377
@@ -411,6 +439,8 @@ type(regex_op) function parse_pattern(pattern) result(this)
411439 ! Character class
412440 case (' [' )
413441
442+ loc = 1
443+
414444 ! First, check if this class is negated ("^")
415445 if (pattern(i+1 :i+1 )==' ^' ) then
416446 this% pattern(j)% type = INV_CHAR_CLASS
@@ -427,22 +457,26 @@ type(regex_op) function parse_pattern(pattern) result(this)
427457 this% pattern(j)% type = AT_CHAR_CLASS
428458 end if
429459
430- ! Copy characters inside [..] to buffer */
460+ ! Remove any escape characters
431461 loc = index (pattern(i+1 :),' ]' )
432-
433462 if (loc> 0 ) then
434463 ccl_buf = pattern(i+1 :i+ loc-1 )
435464 i = i+ loc
436465 if (DEBUG) print " ('[regex] at end of multi-character pattern: ',a)" , trim (ccl_buf)
437-
438466 else
439467 stop ' incomplete [] pattern'
440468 end if
441469
442- ! the only escaped character between brackets is \\
443- ! if present, replace double backslash with a single one
444- loc = index (ccl_buf,' \\' )
445- if (loc> 0 ) ccl_buf = ccl_buf(:loc)// ccl_buf(loc+2 :)
470+ ! If there is any escape character(s), just check that the next is nonempty
471+ loc = index (ccl_buf,' \' )
472+ if (loc> 0 ) then
473+ if (loc>= len (ccl_buf)) then
474+ stop ' incomplete escaped character inside [] pattern'
475+ end if
476+ if (ccl_buf(loc+1 :loc+1 )==SPACE) then
477+ stop ' empty escaped character inside [] pattern'
478+ end if
479+ end if
446480
447481 ! Terminate string
448482 this% pattern(j)% ccl = trim (ccl_buf)
@@ -520,16 +554,16 @@ integer function re_matchp(text, pattern, matchlength) result(index)
520554 if (pattern% pattern(1 )% type == BEGIN_WITH) then
521555
522556 ! String must begin with this pattern
523- index = merge (1 ,0 ,matchpattern([ pattern% pattern(2 )] , text, matchlength))
524- ! print *, 'begin with? index = ',index
557+ index = merge (1 ,0 ,matchpattern(pattern% pattern(2 :) , text, matchlength))
558+ print * , ' begin with? index = ' ,index
525559
526560 else
527561
528562 do index= 1 ,len (text)
529563 if (matchpattern(pattern% pattern,text(index:),matchlength)) return
530564 end do
531565
532- ! print *, 'all patterns not matched'
566+ print * , ' all patterns not matched, ' ,text
533567
534568 index = 0
535569
@@ -561,39 +595,44 @@ logical function matchpattern(pattern, text, matchlength) result(match)
561595 integer :: pre,ip,it
562596
563597 pre = matchlength
564- print * , ' initial length ' ,pre
565598 ip = 1
566599 it = 1
567600
568601 iterate: do while (ip<= size (pattern))
569602
603+ print * , ' trying pattern ' ,pattern(ip)% print (),' on character ' ,text(it:)
604+
570605 if (pattern(ip)% type == UNUSED .or. pattern(ip+1 )% type == QUESTIONMARK) then
571606
607+ print * , ' before matchquestion = ' ,match,' length=' ,matchlength
572608 match = matchquestion(pattern(ip),pattern(ip+2 :),text(it:),matchlength)
609+ print * , ' matchquestion = ' ,match,' length=' ,matchlength
573610 return
574611
575612 elseif (pattern(ip+1 )% type == STAR) then
576613
577- match = matchstar(pattern(ip),pattern(ip+2 :), text(it:) , matchlength)
614+ match = matchstar(pattern(ip),pattern(ip+2 :), text, it , matchlength)
578615 return
579616
580617 elseif (pattern(ip+1 )% type == PLUS) then
581618
582- match = matchplus(pattern(ip),pattern(ip+2 :), text(it:) , matchlength)
619+ match = matchplus(pattern(ip),pattern(ip+2 :), text, it , matchlength)
583620 return
584621
585622 elseif (pattern(ip)% type == END_WITH .and. pattern(ip+1 )% type == UNUSED) then
586623
587- match = len (text(it:))<= 0
624+ print * , ' end pattern, len(text)' ,len (text(it:))
625+
626+ match = len (text(it:))<= 1
588627 return
589628
590629 end if
591630
592631 matchlength = matchlength+1
593632
594- if (it>= len (text)) exit iterate
633+ if (it> len (text)) exit iterate
634+ print * , ' pat match with ip=' ,ip,' it=' ,it,' len=' ,len (text),' matchlenght=' ,matchlength
595635 if (.not. pat_match(pattern(ip), text(it:it))) exit iterate
596-
597636 ip = ip+1
598637 it = it+1
599638
0 commit comments