You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
392 lines
12 KiB
392 lines
12 KiB
// Copyright (c) 2009-2010 Sun Microsystems, Inc.
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License version 2 as published by
|
|
// the Free Software Foundation
|
|
//
|
|
// There are special exceptions to the terms and conditions of the GPL
|
|
// as it is applied to this software. View the full text of the
|
|
// exception in file EXCEPTIONS in the directory of this software
|
|
// distribution.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Text;
|
|
using System.IO;
|
|
using System.Diagnostics;
|
|
using System.Collections;
|
|
using MySql.Data.MySqlClient.Properties;
|
|
|
|
namespace MySql.Data.Common
|
|
{
|
|
internal class QueryNormalizer
|
|
{
|
|
private static List<string> keywords = new List<string>();
|
|
private List<Token> tokens = new List<Token>();
|
|
private int pos;
|
|
private string fullSql;
|
|
private string queryType;
|
|
|
|
static QueryNormalizer()
|
|
{
|
|
StringReader sr = new StringReader(Resources.keywords);
|
|
string keyword = sr.ReadLine();
|
|
while (keyword != null)
|
|
{
|
|
keywords.Add(keyword);
|
|
keyword = sr.ReadLine();
|
|
}
|
|
}
|
|
|
|
public string QueryType
|
|
{
|
|
get { return queryType; }
|
|
}
|
|
|
|
public string Normalize(string sql)
|
|
{
|
|
tokens.Clear();
|
|
StringBuilder newSql = new StringBuilder();
|
|
fullSql = sql;
|
|
|
|
TokenizeSql(sql);
|
|
DetermineStatementType(tokens);
|
|
ProcessMathSymbols(tokens);
|
|
CollapseValueLists(tokens);
|
|
CollapseInLists(tokens);
|
|
CollapseWhitespace(tokens);
|
|
|
|
foreach (Token t in tokens)
|
|
if (t.Output)
|
|
newSql.Append(t.Text);
|
|
|
|
return newSql.ToString();
|
|
}
|
|
|
|
private void DetermineStatementType(List<Token> tok)
|
|
{
|
|
foreach (Token t in tok)
|
|
{
|
|
if (t.Type == TokenType.Keyword)
|
|
{
|
|
queryType = t.Text.ToUpperInvariant();
|
|
//string s = t.Text.ToLowerInvariant();
|
|
//if (s == "select")
|
|
// queryType = "SELECT";
|
|
//else if (s == "update" || s == "insert")
|
|
// queryType = "UPSERT";
|
|
//else
|
|
// queryType = "OTHER";
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Mark - or + signs that are unary ops as no output
|
|
/// </summary>
|
|
/// <param name="tok"></param>
|
|
private void ProcessMathSymbols(List<Token> tok)
|
|
{
|
|
Token lastToken = null;
|
|
|
|
foreach (Token t in tok)
|
|
{
|
|
if (t.Type == TokenType.Symbol &&
|
|
(t.Text == "-" || t.Text == "+"))
|
|
{
|
|
if (lastToken != null &&
|
|
lastToken.Type != TokenType.Number &&
|
|
lastToken.Type != TokenType.Identifier &&
|
|
(lastToken.Type != TokenType.Symbol || lastToken.Text != ")"))
|
|
t.Output = false;
|
|
}
|
|
if (t.IsRealToken)
|
|
lastToken = t;
|
|
}
|
|
}
|
|
|
|
private void CollapseWhitespace(List<Token> tok)
|
|
{
|
|
Token lastToken = null;
|
|
|
|
foreach (Token t in tok)
|
|
{
|
|
if (t.Output &&
|
|
t.Type == TokenType.Whitespace &&
|
|
lastToken != null &&
|
|
lastToken.Type == TokenType.Whitespace)
|
|
{
|
|
t.Output = false;
|
|
}
|
|
if (t.Output)
|
|
lastToken = t;
|
|
}
|
|
}
|
|
|
|
private void CollapseValueLists(List<Token> tok)
|
|
{
|
|
int pos = -1;
|
|
while (++pos < tok.Count)
|
|
{
|
|
Token t = tok[pos];
|
|
if (t.Type != TokenType.Keyword) continue;
|
|
if (!t.Text.StartsWith("VALUE")) continue;
|
|
CollapseValueList(tok, ref pos);
|
|
}
|
|
}
|
|
|
|
private void CollapseValueList(List<Token> tok, ref int pos)
|
|
{
|
|
List<int> parenIndices = new List<int>();
|
|
|
|
// this while loop will find all closing parens in this value list
|
|
while (true)
|
|
{
|
|
// find the close ')'
|
|
while (++pos < tok.Count)
|
|
if (tok[pos].Type == TokenType.Symbol && tok[pos].Text == ")") break;
|
|
Debug.Assert(pos < tok.Count);
|
|
parenIndices.Add(pos);
|
|
|
|
// now find the next "real" token
|
|
while (++pos < tok.Count)
|
|
if (tok[pos].IsRealToken) break;
|
|
if (pos == tok.Count) break;
|
|
|
|
if (tok[pos].Text != ",")
|
|
{
|
|
pos--;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// if we only have 1 value then we don't collapse
|
|
if (parenIndices.Count < 2) return;
|
|
int index = parenIndices[0];
|
|
tok[++index] = new Token(TokenType.Whitespace, " ");
|
|
tok[++index] = new Token(TokenType.Comment, "/* , ... */");
|
|
index++;
|
|
|
|
// now mark all the other tokens as no output
|
|
while (index <= parenIndices[parenIndices.Count - 1])
|
|
tok[index++].Output = false;
|
|
}
|
|
|
|
private void CollapseInLists(List<Token> tok)
|
|
{
|
|
int pos = -1;
|
|
while (++pos < tok.Count)
|
|
{
|
|
Token t = tok[pos];
|
|
if (t.Type != TokenType.Keyword) continue;
|
|
if (!(t.Text == "IN")) continue;
|
|
CollapseInList(tok, ref pos);
|
|
}
|
|
}
|
|
|
|
private Token GetNextRealToken(List<Token> tok, ref int pos)
|
|
{
|
|
while (++pos < tok.Count)
|
|
{
|
|
if (tok[pos].IsRealToken) return tok[pos];
|
|
}
|
|
return null;
|
|
}
|
|
|
|
private void CollapseInList(List<Token> tok, ref int pos)
|
|
{
|
|
Token t = GetNextRealToken(tok, ref pos);
|
|
Debug.Assert(t.Text == "(");
|
|
|
|
// if the first token is a keyword then we likely have a
|
|
// SELECT .. IN (SELECT ...)
|
|
t = GetNextRealToken(tok, ref pos);
|
|
if (t.Type == TokenType.Keyword) return;
|
|
|
|
int start = pos;
|
|
// first find all the tokens that make up the in list
|
|
while (++pos < tok.Count)
|
|
{
|
|
t = tok[pos];
|
|
if (t.Type == TokenType.CommandComment) return;
|
|
if (!t.IsRealToken) continue;
|
|
if (t.Text == "(") return;
|
|
if (t.Text == ")") break;
|
|
}
|
|
int stop = pos;
|
|
|
|
for (int i = stop; i > start; i--)
|
|
tok.RemoveAt(i);
|
|
tok.Insert(++start, new Token(TokenType.Whitespace, " "));
|
|
tok.Insert(++start, new Token(TokenType.Comment, "/* , ... */"));
|
|
tok.Insert(++start, new Token(TokenType.Whitespace, " "));
|
|
tok.Insert(++start, new Token(TokenType.Symbol, ")"));
|
|
}
|
|
|
|
private void TokenizeSql(string sql)
|
|
{
|
|
pos = 0;
|
|
|
|
while (pos < sql.Length)
|
|
{
|
|
char c = sql[pos];
|
|
if (LetterStartsComment(c) && ConsumeComment())
|
|
continue;
|
|
if (Char.IsWhiteSpace(c))
|
|
ConsumeWhitespace();
|
|
else if (c == '\'' || c == '\"' || c == '`')
|
|
ConsumeQuotedToken(c);
|
|
else if (!IsSpecialCharacter(c))
|
|
ConsumeUnquotedToken();
|
|
else
|
|
ConsumeSymbol();
|
|
}
|
|
}
|
|
|
|
private bool LetterStartsComment(char c)
|
|
{
|
|
return c == '#' || c == '/' || c == '-';
|
|
}
|
|
|
|
private bool IsSpecialCharacter(char c)
|
|
{
|
|
if (Char.IsLetterOrDigit(c) ||
|
|
c == '$' || c == '_' || c == '.') return false;
|
|
return true;
|
|
}
|
|
|
|
private bool ConsumeComment()
|
|
{
|
|
char c = fullSql[pos];
|
|
// make sure the comment starts correctly
|
|
if (c == '/' && ((pos + 1) >= fullSql.Length || fullSql[pos + 1] != '*')) return false;
|
|
if (c == '-' && ((pos + 2) >= fullSql.Length || fullSql[pos + 1] != '-' || fullSql[pos + 2] != ' ')) return false;
|
|
|
|
string endingPattern = "\n";
|
|
if (c == '/')
|
|
endingPattern = "*/";
|
|
|
|
int startingIndex = pos;
|
|
|
|
int index = fullSql.IndexOf(endingPattern, pos);
|
|
if (index == -1)
|
|
index = fullSql.Length - 1;
|
|
else
|
|
index += endingPattern.Length;
|
|
string comment = fullSql.Substring(pos, index - pos);
|
|
if (comment.StartsWith("/*!"))
|
|
tokens.Add(new Token(TokenType.CommandComment, comment));
|
|
pos = index;
|
|
return true;
|
|
}
|
|
|
|
private void ConsumeSymbol()
|
|
{
|
|
char c = fullSql[pos++];
|
|
tokens.Add(new Token(TokenType.Symbol, c.ToString()));
|
|
}
|
|
|
|
private void ConsumeQuotedToken(char c)
|
|
{
|
|
bool escaped = false;
|
|
int start = pos;
|
|
pos++;
|
|
while (pos < fullSql.Length)
|
|
{
|
|
char x = fullSql[pos];
|
|
|
|
if (x == c && !escaped) break;
|
|
|
|
if (escaped)
|
|
escaped = false;
|
|
else if (x == '\\')
|
|
escaped = true;
|
|
pos++;
|
|
}
|
|
pos++;
|
|
if (c == '\'')
|
|
tokens.Add(new Token(TokenType.String, "?"));
|
|
else
|
|
tokens.Add(new Token(TokenType.Identifier, fullSql.Substring(start, pos - start)));
|
|
}
|
|
|
|
private void ConsumeUnquotedToken()
|
|
{
|
|
int startPos = pos;
|
|
while (pos < fullSql.Length && !IsSpecialCharacter(fullSql[pos]))
|
|
pos++;
|
|
string word = fullSql.Substring(startPos, pos - startPos);
|
|
double v;
|
|
if (Double.TryParse(word, out v))
|
|
tokens.Add(new Token(TokenType.Number, "?"));
|
|
else
|
|
{
|
|
Token t = new Token(TokenType.Identifier, word);
|
|
if (IsKeyword(word))
|
|
{
|
|
t.Type = TokenType.Keyword;
|
|
t.Text = t.Text.ToUpperInvariant();
|
|
}
|
|
tokens.Add(t);
|
|
}
|
|
}
|
|
|
|
private void ConsumeWhitespace()
|
|
{
|
|
tokens.Add(new Token(TokenType.Whitespace, " "));
|
|
while (pos < fullSql.Length && Char.IsWhiteSpace(fullSql[pos]))
|
|
pos++;
|
|
}
|
|
|
|
private bool IsKeyword(string word)
|
|
{
|
|
return keywords.Contains(word.ToUpperInvariant());
|
|
}
|
|
}
|
|
|
|
internal class Token
|
|
{
|
|
public TokenType Type;
|
|
public string Text;
|
|
public bool Output;
|
|
|
|
public Token(TokenType type, string text)
|
|
{
|
|
Type = type;
|
|
Text = text;
|
|
Output = true;
|
|
}
|
|
|
|
public bool IsRealToken
|
|
{
|
|
get
|
|
{
|
|
return Type != TokenType.Comment &&
|
|
Type != TokenType.CommandComment &&
|
|
Type != TokenType.Whitespace &&
|
|
Output; }
|
|
}
|
|
}
|
|
|
|
internal enum TokenType
|
|
{
|
|
Keyword,
|
|
String,
|
|
Number,
|
|
Symbol,
|
|
Identifier,
|
|
Comment,
|
|
CommandComment,
|
|
Whitespace
|
|
}
|
|
} |