'************************************************** ' FILE : Tokenizer.vb ' AUTHOR : Paulo Santos ' CREATION : 9/29/2007 10:18:09 PM ' COPYRIGHT : Copyright © 2007 ' PJ on Development ' All Rights Reserved. ' ' Description: ' Implements a class that retrieves tokens from an input. ' ' Change log: ' 0.1 9/29/2007 10:18:09 PM ' Paulo Santos ' Created. '*************************************************** Imports System.Text Imports System.Text.RegularExpressions Imports System.Collections.Generic Imports System.Diagnostics.CodeAnalysis Namespace Tokenization ''' ''' An utility class to get tokens from an input. ''' _ Public Class Tokenizer Private Const __MacroNamePattern As String = "\{(?[_A-Z].*?)\}" Private __Macros As New Dictionary(Of String, Macro) Private __Rules As New Dictionary(Of String, Rule) Private __Options As RegexOptions #Region " Public Properties " ''' ''' Gets a read-only list of the macros of this instance of the class. ''' ''' A read-only list of the macros of this instance of the class. Public ReadOnly Property Macros() As MacroCollection Get Return New MacroCollection(__Macros.Values) End Get End Property ''' ''' Gets a read-only list of the rules of this instance of the class. ''' ''' A read-only list of the rules of this instance of the class. Public ReadOnly Property Rules() As RuleCollection Get Return New RuleCollection(__Rules.Values) End Get End Property ''' ''' Gets or sets the regular expression options used to match the rules. ''' ''' The regular expression options used to match the rules. Public Property Options() As RegexOptions Get Return __Options End Get Set(ByVal value As RegexOptions) __Options = value End Set End Property #End Region #Region " Public Methods " ''' ''' Adds a new macro to the list. ''' ''' The macro to be added. ''' One or more referenced macros was not found in the collection. ''' The macro has a circular reference. Public Sub AddMacro(ByVal macro As Macro) For Each s As String In macro.References If (Not __Macros.ContainsKey(s)) Then Throw New ReferencedMacroNotFoundException(s) End If Next EnsureNoCircularReference(macro) Expand(macro) __Macros.Add(macro.Name, macro) End Sub ''' ''' Adds a new macro to the list. ''' ''' The name of the macro to add. ''' The pattern of the macro to add. Public Sub AddMacro(ByVal name As String, ByVal pattern As String) If (String.IsNullOrEmpty(name) OrElse String.IsNullOrEmpty(pattern)) Then Throw New ArgumentNullException(IIf(String.IsNullOrEmpty(name), "name", "pattern")) End If Dim m As New Macro m.Name = name m.Pattern = pattern Me.AddMacro(m) End Sub ''' ''' Adds a new rule to the list. ''' ''' The rule to be added. ''' One or more referenced macro was not found in the collection. Public Sub AddRule(ByVal rule As Rule) For Each s As String In rule.References If (Not __Macros.ContainsKey(s)) Then Throw New ReferencedMacroNotFoundException(s) End If Next Expand(rule) __Rules.Add(rule.Name, rule) End Sub ''' ''' Adds a new rule to the list. ''' ''' The name of the rule to add. ''' The type of the rule to add. ''' The pattern of the rule to add. Public Sub AddRule(ByVal name As String, ByVal type As Integer, ByVal pattern As String) If (String.IsNullOrEmpty(name) OrElse String.IsNullOrEmpty(pattern)) Then Throw New ArgumentNullException(IIf(String.IsNullOrEmpty(name), "name", "pattern")) End If Dim m As New Rule m.Name = name m.RuleType = type m.Pattern = pattern Me.AddRule(m) End Sub ''' ''' Returns a token from the specified text, consuming it. ''' ''' The text from which to extract the token. ''' A token from the specified text if a matching rule is found; otherwise . ''' ''' The text is passed by reference and the token is consumed, i.e., ''' the token will be removed from the specified text. ''' ''' If the text is or GetToken also returns . ''' _ Public Function GetToken(ByRef text As String) As Token '* '* Check the passed parameters '* If (String.IsNullOrEmpty(text)) Then Return Nothing End If Dim token As Token Dim matchedToken As Token = Nothing Dim maxLen As Integer = -1 For Each name As String In __Rules.Keys Dim r As Rule = __Rules(name) If (Not r.IsExpanded) Then Expand(r) End If '* '* Try to match the rule '* Dim match As Match = r.Regex.Match(text) '* '* If the match is a success '* If (match.Success) Then '* '* Add the token to the match list '* token = New Token(r.Name, r.RuleType, match.Value) '* '* Check for the maximum amount of characters consumed '* If (maxLen < match.Value.Length) Then maxLen = match.Value.Length matchedToken = token End If End If Next '* '* Was any token found? '* If (matchedToken IsNot Nothing) Then '* '* Consumes the token '* text = text.Substring(matchedToken.Value.Length) End If Return matchedToken End Function #End Region #Region " Private Methods " ''' ''' Determines whether the specified macro has circular reference. ''' ''' The macro to be tested. Private Sub EnsureNoCircularReference(ByVal macro As Macro, Optional ByVal path As String = "") If (path.IndexOf("{" & macro.Name & "}", StringComparison.OrdinalIgnoreCase) <> (-1)) Then Throw New CircularReferenceException(path & "{" & macro.Name & "}") End If For Each s As String In macro.References EnsureNoCircularReference(__Macros.Item(s), path & "{" & macro.Name & "}") Next End Sub ''' ''' Expands all referenced macros in the rule. ''' ''' The rule to be expanded. Private Sub Expand(ByRef rule As Rule) '* '* Resolve literals '* Dim iPos As Integer = 0 Dim sb As New StringBuilder For Each m As Match In Regex.Matches(rule.Pattern, """([^""]|\\"")*?""") sb.Append(rule.Pattern.Substring(iPos, m.Index - iPos)) sb.Append(EscapeRegExpChars(m.Value.Substring(1, m.Value.Length - 2).Replace("\""", """"))) iPos = m.Index + m.Length Next sb.Append(rule.Pattern.Substring(iPos)) Dim expanded As String = sb.ToString() '* '* Expand macros '* For Each s As String In rule.References Dim m As Macro = __Macros(s) If (Not m.IsExpanded) Then Expand(m) End If expanded = expanded.Replace("{" & m.Name & "}", "(" & m.ExpandedPattern & ")") Next rule.SetExpandedPattern(expanded, Me.Options) End Sub ''' ''' Expands all referenced macros in the macro. ''' ''' The macro to be expanded. Private Sub Expand(ByRef macro As Macro) Dim expanded As String = macro.Pattern For Each s As String In macro.References Dim im As Macro = __Macros(s) If (Not im.IsExpanded) Then Expand(im) End If expanded = expanded.Replace("{" & im.Name & "}", "(" & im.ExpandedPattern & ")") Next macro.SetExpandedPattern(expanded) End Sub ''' ''' Escapes all regular expression special characters. ''' ''' The regular expression pattern to be escaped. ''' A string with all the regular expression special characters escaped. Private Shared Function EscapeRegExpChars(ByVal text As String) As String Dim s As String = text s = s.Replace("\", "\\") s = s.Replace("^", "\^") s = s.Replace("$", "\$") s = s.Replace("*", "\*") s = s.Replace("+", "\+") s = s.Replace("?", "\?") s = s.Replace("{", "\{") s = s.Replace("}", "\}") s = s.Replace(".", "\.") s = s.Replace("(", "\(") s = s.Replace(")", "\)") s = s.Replace("[", "\[") s = s.Replace("]", "\]") s = s.Replace("|", "\|") Return s End Function #End Region End Class End Namespace