1616!
1717! *************************************************************************************************
1818module regex_module
19+ use iso_fortran_env, only: output_unit
1920 implicit none
2021 private
2122
@@ -28,7 +29,9 @@ module regex_module
2829 logical , parameter , public :: RE_DOT_MATCHES_NEWLINE = .true. ! Define .false. if you DON'T want '.' to match '\r' + '\n'
2930 integer , parameter , public :: MAX_REGEXP_OBJECTS = 512 ! Max number of regex symbols in expression.
3031 integer , parameter , public :: MAX_CHAR_CLASS_LEN = 1024 ! Max length of character-class buffer in.
31- logical , parameter :: DEBUG = .true.
32+
33+ ! Turn on verbosity for debugging
34+ logical , parameter :: DEBUG = .false.
3235
3336 ! Supported patterns
3437 integer , parameter :: UNUSED = 0
@@ -56,12 +59,12 @@ module regex_module
5659 character (kind= RCK,len=* ), parameter :: lowercase= " abcdefghijklmnopqrstuvwxyz"
5760 character (kind= RCK,len=* ), parameter :: uppercase= " ABCDEFGHIJKLMNOPQRSTUVWXYZ"
5861
59- character (kind= RCK), parameter :: UNDERSCORE = " _"
60- character (kind= RCK), parameter :: SPACE = " "
61- character (kind= RCK), parameter :: DASH = " -"
62- character (kind= RCK), parameter :: BACKSLASH0 = achar ( 0 ,kind= RCK) ! \0 or null character
63- character (kind= RCK), parameter :: NEWLINE = achar (10 ,kind= RCK) ! \n or line feed
64- character (kind= RCK), parameter :: BACKSPCE = achar ( 8 ,kind= RCK) ! \b or backspace character
62+ character (kind= RCK), parameter , public :: UNDERSCORE = " _"
63+ character (kind= RCK), parameter , public :: SPACE = " "
64+ character (kind= RCK), parameter , public :: DASH = " -"
65+ character (kind= RCK), parameter , public :: CNULL = achar ( 0 ,kind= RCK) ! \0 or null character
66+ character (kind= RCK), parameter , public :: NEWLINE = achar (10 ,kind= RCK) ! \n or line feed
67+ character (kind= RCK), parameter , public :: BACKSPCE = achar ( 8 ,kind= RCK) ! \b or backspace character
6568
6669
6770 ! Regex pattern element
@@ -87,18 +90,20 @@ module regex_module
8790
8891 contains
8992
90- ! procedure :: parse => parse_pattern
91- procedure :: write = > write_pattern
93+ procedure :: new = > new_from_pattern
94+ procedure :: write = > write_pattern
9295 procedure :: nrules
9396 procedure :: destroy
94- final :: finalize
97+ final :: finalize
9598
9699 end type regex_op
97100
98101 ! Public interface
99102 interface regex
100103 module procedure re_match
104+ module procedure re_match_nolength
101105 module procedure re_matchp
106+ module procedure re_matchp_nolength
102107 end interface regex
103108
104109 contains
@@ -123,12 +128,18 @@ end function nrules
123128
124129 subroutine write_pattern (this ,iunit )
125130 class(regex_op), intent (in ) :: this
126- integer , intent (in ) :: iunit
131+ integer , optional , intent (in ) :: iunit
132+
133+ integer :: i,u
127134
128- integer :: i
135+ if (present (iunit)) then
136+ u = iunit
137+ else
138+ u = output_unit
139+ end if
129140
130141 do i= 1 ,this% nrules()
131- write (iunit ,' (a)' ) this% pattern(i)% print ()
142+ write (u ,' (a)' ) this% pattern(i)% print ()
132143 end do
133144
134145 end subroutine write_pattern
@@ -243,11 +254,9 @@ logical function matchcharclass(c,str) result(match)
243254
244255 if (matchmetachar(c,str(i:))) then
245256 match = .true.
246- print * , ' match meta, c=' ,c
247257 return
248258 elseif (c== str(i:i) .and. (.not. ismetachar(c))) then
249259 match = .true.
250- print * , ' match not meta, c=' ,c
251260 return
252261 endif
253262
@@ -257,7 +266,6 @@ logical function matchcharclass(c,str) result(match)
257266 if (c== DASH) then
258267 ! If this is a range, the character must be in this range, that we evaluate with the ASCII collating sequence
259268 match = i<= 0 .or. i+1 > len_trim (str)
260- print * , ' c is dash! match=' ,match
261269 else
262270 match = .true.
263271 end if
@@ -266,7 +274,7 @@ logical function matchcharclass(c,str) result(match)
266274
267275 end do loop
268276
269- print * , ' charclass: no match on i=' ,i,' str=' ,trim (str),' c=' ,c
277+ if (DEBUG) print * , ' charclass: no match on i=' ,i,' str=' ,trim (str),' c=' ,c
270278
271279 end function matchcharclass
272280
@@ -279,10 +287,8 @@ logical function matchquestion(p, pattern, text, matchlength)
279287
280288 if (p% type == UNUSED) then
281289 matchquestion = .true.
282- print * , ' unused -> match'
283290 return
284291 elseif (matchpattern(pattern, text, matchlength)) then
285- print * , ' matchquestion 2: length=' ,matchlength,' match=.true.'
286292 matchquestion = .true.
287293 return
288294 elseif (len (text)>0 ) then
@@ -305,7 +311,7 @@ logical function matchstar(p, pattern, text, it0, matchlength)
305311
306312 integer :: prelen,it
307313
308- print * , ' match star, length=' ,matchlength,' it0=' ,it0,' lenm=' ,len (text)
314+ if (DEBUG) print * , ' match star, length=' ,matchlength,' it0=' ,it0,' lenm=' ,len (text)
309315
310316 if (len (text)<= 0 ) then
311317 matchstar = .false.
@@ -343,7 +349,7 @@ logical function matchplus(p, pattern, text, it0, matchlength)
343349
344350 integer :: it
345351
346- print * , ' matchplus '
352+ if (DEBUG) print * , ' matching PLUS pattern '
347353
348354 it = it0
349355 do while (it> 0 .and. it<= len (text))
@@ -375,9 +381,30 @@ integer function re_match(text, pattern, length) result(index)
375381
376382 end function re_match
377383
384+ ! Find matches of the given pattern in the string
385+ integer function re_match_nolength (text , pattern ) result(index)
386+ character (* ,kind= RCK), intent (in ) :: pattern
387+ character (* ,kind= RCK), intent (in ) :: text
388+
389+ type (regex_op) :: command
390+ integer :: length
391+
392+ command = parse_pattern(pattern)
393+ index = re_matchp(text,command,length)
394+
395+ end function re_match_nolength
396+
378397 type (regex_op) function parse_pattern(pattern) result(this)
379398 character (* ,kind= RCK), intent (in ) :: pattern
380399
400+ call new_from_pattern(this,pattern)
401+
402+ end function parse_pattern
403+
404+ subroutine new_from_pattern (this ,pattern )
405+ class(regex_op), intent (inout ) :: this
406+ character (* ,kind= RCK), intent (in ) :: pattern
407+
381408 ! Local variables
382409 character (len= MAX_CHAR_CLASS_LEN,kind= RCK) :: ccl_buf ! size of buffer for chars in all char-classes in the expression. */
383410 integer :: loc,i,j,lenp
@@ -500,7 +527,7 @@ type(regex_op) function parse_pattern(pattern) result(this)
500527 ! Save number of patterns
501528 this% n = j-1
502529
503- end function parse_pattern
530+ end subroutine new_from_pattern
504531
505532 function print_pattern (pattern ) result(msg)
506533 class(regex_pattern), intent (in ) :: pattern
@@ -541,9 +568,16 @@ logical function pat_match(p, c) result(match)
541568
542569 end function pat_match
543570
571+ integer function re_matchp_nolength (text , pattern ) result(index)
572+ type (regex_op), intent (in ) :: pattern
573+ character (len=* ,kind= RCK), intent (in ) :: text
574+ integer :: matchlength
575+ index = re_matchp(text, pattern, matchlength)
576+ end function re_matchp_nolength
577+
544578
545579 integer function re_matchp (text , pattern , matchlength ) result(index)
546- type (regex_op) :: pattern
580+ type (regex_op), intent ( in ) :: pattern
547581 character (len=* ,kind= RCK), intent (in ) :: text
548582 integer , intent (out ) :: matchlength
549583
@@ -555,24 +589,19 @@ integer function re_matchp(text, pattern, matchlength) result(index)
555589
556590 ! String must begin with this pattern
557591 index = merge (1 ,0 ,matchpattern(pattern% pattern(2 :), text, matchlength))
558- print * , ' begin with? index = ' ,index
559592
560593 else
561594
562595 do index= 1 ,len (text)
563596 if (matchpattern(pattern% pattern,text(index:),matchlength)) return
564597 end do
565598
566- print * , ' all patterns not matched, ' ,text
567-
568599 index = 0
569600
570601 end if
571602
572603 else
573604
574- ! print *, 'pattern has no patterns'
575-
576605 index = 0
577606
578607 end if
@@ -600,13 +629,9 @@ logical function matchpattern(pattern, text, matchlength) result(match)
600629
601630 iterate: do while (ip<= size (pattern))
602631
603- print * , ' trying pattern ' ,pattern(ip)% print (),' on character ' ,text(it:)
604-
605632 if (pattern(ip)% type == UNUSED .or. pattern(ip+1 )% type == QUESTIONMARK) then
606633
607- print * , ' before matchquestion = ' ,match,' length=' ,matchlength
608634 match = matchquestion(pattern(ip),pattern(ip+2 :),text(it:),matchlength)
609- print * , ' matchquestion = ' ,match,' length=' ,matchlength
610635 return
611636
612637 elseif (pattern(ip+1 )% type == STAR) then
@@ -621,7 +646,6 @@ logical function matchpattern(pattern, text, matchlength) result(match)
621646
622647 elseif (pattern(ip)% type == END_WITH .and. pattern(ip+1 )% type == UNUSED) then
623648
624- print * , ' end pattern, len(text)' ,len (text(it:))
625649
626650 match = len (text(it:))<= 1
627651 return
@@ -631,7 +655,8 @@ logical function matchpattern(pattern, text, matchlength) result(match)
631655 matchlength = matchlength+1
632656
633657 if (it> len (text)) exit iterate
634- print * , ' pat match with ip=' ,ip,' it=' ,it,' len=' ,len (text),' matchlenght=' ,matchlength
658+
659+ if (DEBUG) print " ('[regex] matching ',i0,'-th pattern on chunk <',i0,':',i0,'>')" , ip,it,len (text)
635660 if (.not. pat_match(pattern(ip), text(it:it))) exit iterate
636661 ip = ip+1
637662 it = it+1
0 commit comments