(* Regular expression matching is a very useful technique for describing commonly occurring patterns. This program follows ideas from "Proof-directed debugging", R. Harper, Journal of Functional Programming 1(1), Jan 1993 We will give an algorithm for a simple regular expression matcher. Patterns describable by regular expressions: - Singelton : matching a specific character - Alternation: choice between two patterns - Concatenation: succession of patterns - Iteration: indefinite repetition of patterns Note: regular expressions provide no concept of nesting of one pattern inside another. For this we require a richer formalism, namely context-free language. We can describe regular expressions via a BNF grammar, inductively: r ::= a | r1 r2 | 0 | r1 + r2 | 1 | r* we say a string s matches a regular expression r iff s in L(r). s never matches 0; s matches 1 only if s = empty s matches a iff s = a s matches r1 + r2 iff either s matches r1 or r2 s matches r1 r2 iff s = s1 s2 where s1 matches r1 and s2 matches r2. s matches r* iff either s = empty or s = s1 s2 where s1 matches r and s2 matches r* *) datatype regexp = Char of char | Times of regexp * regexp | One | Zero | Plus of regexp * regexp | Star of regexp fun acc (Char(c)) [] k = false | acc (Char(c)) (c1::s) k = (c = c1) andalso (k s) | acc (Times(r1, r2)) s k = acc r1 s (fn s' => acc r2 s' k) | acc (One) s k = k s | acc (Plus(r1, r2)) s k = acc r1 s k orelse acc r2 s k | acc (Zero) s k = false | acc (Star(r)) s k = (k s) orelse acc r s (fn s' => not(s = s') andalso acc (Star(r)) s' k) (* accept : regexp * string -> bool *) fun accept r s = acc r (String.explode s) (fn l => l = []) ; (* ------------------------------------------------------------------------ *) (* Some examples *) (* r0 = a + 0 *) val r0 = Plus(Char(#"a"), Zero); val ra = (Times(Char(#"b"), Times (Star(Char (#"a")), Star(Times(Char(#"a"), Char(#"b")))))) (* r1 = aa *) val r1 = Times(Char(#"a"), Char(#"a")); (* r2 = (a + b)^* *) val r2 = Star(Plus(Char(#"a"), Char(#"b"))); (* r = r2 r1 r2 *) val r3 = Times(r2, Times(r1, r2)); (* r = (a + 1)(b + ba)^* *) val r4 = Times(Plus(Char(#"a"), One), Star(Plus(Char(#"b"), Times(Char(#"b"), Char#"a")))); (* r = g(1+r)(e+a)y *) val r5 = Times(Char(#"g"), Times(Plus(One, Char(#"r")), Times(Plus(Char(#"e"), Char(#"a")),Char(#"y")))); (* r = g(1+o)^* gle *) val r6 = Times(Char(#"g"), Times(Star(Plus(One, Char(#"o"))), Times(Times(Char(#"g"), Char(#"l")),Char(#"e")))); val r6' = Times(Char(#"g"), Times(Star(Char(#"o")), Times(Times(Char(#"g"), Char(#"l")),Char(#"e")))); (* accept r6 "google"; accept r6 "gogle"; accept r6 "ggle"; accept r6 "gooooogle"; accept r6 "gooooogle" accept r5 "gay"; accept r5 "gray"; accept r5 "grey"; *) (* apple, apply, ale, aply, aple, appppppple *) val r7 = Times(Char(#"a"), Times(Star(Char#"p"), Times(Char#"l", Plus(Times(Char(#"e"), Plus(One, Char#"s")), Char(#"y")))));