'**************************************************
' FILE : Tokenizer.vb
' AUTHOR : Paulo Santos
' CREATION : 9/29/2007 10:18:09 PM
' COPYRIGHT : Copyright © 2007
' PJ on Development
' All Rights Reserved.
'
' Description:
' Implements a class that retrieves tokens from an input.
'
' Change log:
' 0.1 9/29/2007 10:18:09 PM
' Paulo Santos
' Created.
'***************************************************
Imports System.Text
Imports System.Text.RegularExpressions
Imports System.Collections.Generic
Imports System.Diagnostics.CodeAnalysis
Namespace Tokenization
'''
''' An utility class to get tokens from an input.
'''
_
Public Class Tokenizer
Private Const __MacroNamePattern As String = "\{(?[_A-Z].*?)\}"
Private __Macros As New Dictionary(Of String, Macro)
Private __Rules As New Dictionary(Of String, Rule)
Private __Options As RegexOptions
#Region " Public Properties "
'''
''' Gets a read-only list of the macros of this instance of the class.
'''
''' A read-only list of the macros of this instance of the class.
Public ReadOnly Property Macros() As MacroCollection
Get
Return New MacroCollection(__Macros.Values)
End Get
End Property
'''
''' Gets a read-only list of the rules of this instance of the class.
'''
''' A read-only list of the rules of this instance of the class.
Public ReadOnly Property Rules() As RuleCollection
Get
Return New RuleCollection(__Rules.Values)
End Get
End Property
'''
''' Gets or sets the regular expression options used to match the rules.
'''
''' The regular expression options used to match the rules.
Public Property Options() As RegexOptions
Get
Return __Options
End Get
Set(ByVal value As RegexOptions)
__Options = value
End Set
End Property
#End Region
#Region " Public Methods "
'''
''' Adds a new macro to the list.
'''
''' The macro to be added.
''' One or more referenced macros was not found in the collection.
''' The macro has a circular reference.
Public Sub AddMacro(ByVal macro As Macro)
For Each s As String In macro.References
If (Not __Macros.ContainsKey(s)) Then
Throw New ReferencedMacroNotFoundException(s)
End If
Next
EnsureNoCircularReference(macro)
Expand(macro)
__Macros.Add(macro.Name, macro)
End Sub
'''
''' Adds a new macro to the list.
'''
''' The name of the macro to add.
''' The pattern of the macro to add.
Public Sub AddMacro(ByVal name As String, ByVal pattern As String)
If (String.IsNullOrEmpty(name) OrElse String.IsNullOrEmpty(pattern)) Then
Throw New ArgumentNullException(IIf(String.IsNullOrEmpty(name), "name", "pattern"))
End If
Dim m As New Macro
m.Name = name
m.Pattern = pattern
Me.AddMacro(m)
End Sub
'''
''' Adds a new rule to the list.
'''
''' The rule to be added.
''' One or more referenced macro was not found in the collection.
Public Sub AddRule(ByVal rule As Rule)
For Each s As String In rule.References
If (Not __Macros.ContainsKey(s)) Then
Throw New ReferencedMacroNotFoundException(s)
End If
Next
Expand(rule)
__Rules.Add(rule.Name, rule)
End Sub
'''
''' Adds a new rule to the list.
'''
''' The name of the rule to add.
''' The type of the rule to add.
''' The pattern of the rule to add.
Public Sub AddRule(ByVal name As String, ByVal type As Integer, ByVal pattern As String)
If (String.IsNullOrEmpty(name) OrElse String.IsNullOrEmpty(pattern)) Then
Throw New ArgumentNullException(IIf(String.IsNullOrEmpty(name), "name", "pattern"))
End If
Dim m As New Rule
m.Name = name
m.RuleType = type
m.Pattern = pattern
Me.AddRule(m)
End Sub
'''
''' Returns a token from the specified text, consuming it.
'''
''' The text from which to extract the token.
''' A token from the specified text if a matching rule is found; otherwise .
'''
''' The text is passed by reference and the token is consumed, i.e.,
''' the token will be removed from the specified text.
'''
''' If the text is or GetToken also returns .
'''
_
Public Function GetToken(ByRef text As String) As Token
'*
'* Check the passed parameters
'*
If (String.IsNullOrEmpty(text)) Then
Return Nothing
End If
Dim token As Token
Dim matchedToken As Token = Nothing
Dim maxLen As Integer = -1
For Each name As String In __Rules.Keys
Dim r As Rule = __Rules(name)
If (Not r.IsExpanded) Then
Expand(r)
End If
'*
'* Try to match the rule
'*
Dim match As Match = r.Regex.Match(text)
'*
'* If the match is a success
'*
If (match.Success) Then
'*
'* Add the token to the match list
'*
token = New Token(r.Name, r.RuleType, match.Value)
'*
'* Check for the maximum amount of characters consumed
'*
If (maxLen < match.Value.Length) Then
maxLen = match.Value.Length
matchedToken = token
End If
End If
Next
'*
'* Was any token found?
'*
If (matchedToken IsNot Nothing) Then
'*
'* Consumes the token
'*
text = text.Substring(matchedToken.Value.Length)
End If
Return matchedToken
End Function
#End Region
#Region " Private Methods "
'''
''' Determines whether the specified macro has circular reference.
'''
''' The macro to be tested.
Private Sub EnsureNoCircularReference(ByVal macro As Macro, Optional ByVal path As String = "")
If (path.IndexOf("{" & macro.Name & "}", StringComparison.OrdinalIgnoreCase) <> (-1)) Then
Throw New CircularReferenceException(path & "{" & macro.Name & "}")
End If
For Each s As String In macro.References
EnsureNoCircularReference(__Macros.Item(s), path & "{" & macro.Name & "}")
Next
End Sub
'''
''' Expands all referenced macros in the rule.
'''
''' The rule to be expanded.
Private Sub Expand(ByRef rule As Rule)
'*
'* Resolve literals
'*
Dim iPos As Integer = 0
Dim sb As New StringBuilder
For Each m As Match In Regex.Matches(rule.Pattern, """([^""]|\\"")*?""")
sb.Append(rule.Pattern.Substring(iPos, m.Index - iPos))
sb.Append(EscapeRegExpChars(m.Value.Substring(1, m.Value.Length - 2).Replace("\""", """")))
iPos = m.Index + m.Length
Next
sb.Append(rule.Pattern.Substring(iPos))
Dim expanded As String = sb.ToString()
'*
'* Expand macros
'*
For Each s As String In rule.References
Dim m As Macro = __Macros(s)
If (Not m.IsExpanded) Then
Expand(m)
End If
expanded = expanded.Replace("{" & m.Name & "}", "(" & m.ExpandedPattern & ")")
Next
rule.SetExpandedPattern(expanded, Me.Options)
End Sub
'''
''' Expands all referenced macros in the macro.
'''
''' The macro to be expanded.
Private Sub Expand(ByRef macro As Macro)
Dim expanded As String = macro.Pattern
For Each s As String In macro.References
Dim im As Macro = __Macros(s)
If (Not im.IsExpanded) Then
Expand(im)
End If
expanded = expanded.Replace("{" & im.Name & "}", "(" & im.ExpandedPattern & ")")
Next
macro.SetExpandedPattern(expanded)
End Sub
'''
''' Escapes all regular expression special characters.
'''
''' The regular expression pattern to be escaped.
''' A string with all the regular expression special characters escaped.
Private Shared Function EscapeRegExpChars(ByVal text As String) As String
Dim s As String = text
s = s.Replace("\", "\\")
s = s.Replace("^", "\^")
s = s.Replace("$", "\$")
s = s.Replace("*", "\*")
s = s.Replace("+", "\+")
s = s.Replace("?", "\?")
s = s.Replace("{", "\{")
s = s.Replace("}", "\}")
s = s.Replace(".", "\.")
s = s.Replace("(", "\(")
s = s.Replace(")", "\)")
s = s.Replace("[", "\[")
s = s.Replace("]", "\]")
s = s.Replace("|", "\|")
Return s
End Function
#End Region
End Class
End Namespace