// Copyright (c) 2009-2010 Sun Microsystems, Inc. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License version 2 as published by // the Free Software Foundation // // There are special exceptions to the terms and conditions of the GPL // as it is applied to this software. View the full text of the // exception in file EXCEPTIONS in the directory of this software // distribution. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA using System; using System.Collections.Generic; using System.Text; using System.IO; using System.Diagnostics; using System.Collections; using MySql.Data.MySqlClient.Properties; namespace MySql.Data.Common { internal class QueryNormalizer { private static List keywords = new List(); private List tokens = new List(); private int pos; private string fullSql; private string queryType; static QueryNormalizer() { StringReader sr = new StringReader(Resources.keywords); string keyword = sr.ReadLine(); while (keyword != null) { keywords.Add(keyword); keyword = sr.ReadLine(); } } public string QueryType { get { return queryType; } } public string Normalize(string sql) { tokens.Clear(); StringBuilder newSql = new StringBuilder(); fullSql = sql; TokenizeSql(sql); DetermineStatementType(tokens); ProcessMathSymbols(tokens); CollapseValueLists(tokens); CollapseInLists(tokens); CollapseWhitespace(tokens); foreach (Token t in tokens) if (t.Output) newSql.Append(t.Text); return newSql.ToString(); } private void DetermineStatementType(List tok) { foreach (Token t in tok) { if (t.Type == TokenType.Keyword) { queryType = t.Text.ToUpperInvariant(); //string s = t.Text.ToLowerInvariant(); //if (s == "select") // queryType = "SELECT"; //else if (s == "update" || s == "insert") // queryType = "UPSERT"; //else // queryType = "OTHER"; break; } } } /// /// Mark - or + signs that are unary ops as no output /// /// private void ProcessMathSymbols(List tok) { Token lastToken = null; foreach (Token t in tok) { if (t.Type == TokenType.Symbol && (t.Text == "-" || t.Text == "+")) { if (lastToken != null && lastToken.Type != TokenType.Number && lastToken.Type != TokenType.Identifier && (lastToken.Type != TokenType.Symbol || lastToken.Text != ")")) t.Output = false; } if (t.IsRealToken) lastToken = t; } } private void CollapseWhitespace(List tok) { Token lastToken = null; foreach (Token t in tok) { if (t.Output && t.Type == TokenType.Whitespace && lastToken != null && lastToken.Type == TokenType.Whitespace) { t.Output = false; } if (t.Output) lastToken = t; } } private void CollapseValueLists(List tok) { int pos = -1; while (++pos < tok.Count) { Token t = tok[pos]; if (t.Type != TokenType.Keyword) continue; if (!t.Text.StartsWith("VALUE")) continue; CollapseValueList(tok, ref pos); } } private void CollapseValueList(List tok, ref int pos) { List parenIndices = new List(); // this while loop will find all closing parens in this value list while (true) { // find the close ')' while (++pos < tok.Count) if (tok[pos].Type == TokenType.Symbol && tok[pos].Text == ")") break; Debug.Assert(pos < tok.Count); parenIndices.Add(pos); // now find the next "real" token while (++pos < tok.Count) if (tok[pos].IsRealToken) break; if (pos == tok.Count) break; if (tok[pos].Text != ",") { pos--; break; } } // if we only have 1 value then we don't collapse if (parenIndices.Count < 2) return; int index = parenIndices[0]; tok[++index] = new Token(TokenType.Whitespace, " "); tok[++index] = new Token(TokenType.Comment, "/* , ... */"); index++; // now mark all the other tokens as no output while (index <= parenIndices[parenIndices.Count - 1]) tok[index++].Output = false; } private void CollapseInLists(List tok) { int pos = -1; while (++pos < tok.Count) { Token t = tok[pos]; if (t.Type != TokenType.Keyword) continue; if (!(t.Text == "IN")) continue; CollapseInList(tok, ref pos); } } private Token GetNextRealToken(List tok, ref int pos) { while (++pos < tok.Count) { if (tok[pos].IsRealToken) return tok[pos]; } return null; } private void CollapseInList(List tok, ref int pos) { Token t = GetNextRealToken(tok, ref pos); Debug.Assert(t.Text == "("); // if the first token is a keyword then we likely have a // SELECT .. IN (SELECT ...) t = GetNextRealToken(tok, ref pos); if (t.Type == TokenType.Keyword) return; int start = pos; // first find all the tokens that make up the in list while (++pos < tok.Count) { t = tok[pos]; if (t.Type == TokenType.CommandComment) return; if (!t.IsRealToken) continue; if (t.Text == "(") return; if (t.Text == ")") break; } int stop = pos; for (int i = stop; i > start; i--) tok.RemoveAt(i); tok.Insert(++start, new Token(TokenType.Whitespace, " ")); tok.Insert(++start, new Token(TokenType.Comment, "/* , ... */")); tok.Insert(++start, new Token(TokenType.Whitespace, " ")); tok.Insert(++start, new Token(TokenType.Symbol, ")")); } private void TokenizeSql(string sql) { pos = 0; while (pos < sql.Length) { char c = sql[pos]; if (LetterStartsComment(c) && ConsumeComment()) continue; if (Char.IsWhiteSpace(c)) ConsumeWhitespace(); else if (c == '\'' || c == '\"' || c == '`') ConsumeQuotedToken(c); else if (!IsSpecialCharacter(c)) ConsumeUnquotedToken(); else ConsumeSymbol(); } } private bool LetterStartsComment(char c) { return c == '#' || c == '/' || c == '-'; } private bool IsSpecialCharacter(char c) { if (Char.IsLetterOrDigit(c) || c == '$' || c == '_' || c == '.') return false; return true; } private bool ConsumeComment() { char c = fullSql[pos]; // make sure the comment starts correctly if (c == '/' && ((pos + 1) >= fullSql.Length || fullSql[pos + 1] != '*')) return false; if (c == '-' && ((pos + 2) >= fullSql.Length || fullSql[pos + 1] != '-' || fullSql[pos + 2] != ' ')) return false; string endingPattern = "\n"; if (c == '/') endingPattern = "*/"; int startingIndex = pos; int index = fullSql.IndexOf(endingPattern, pos); if (index == -1) index = fullSql.Length - 1; else index += endingPattern.Length; string comment = fullSql.Substring(pos, index - pos); if (comment.StartsWith("/*!")) tokens.Add(new Token(TokenType.CommandComment, comment)); pos = index; return true; } private void ConsumeSymbol() { char c = fullSql[pos++]; tokens.Add(new Token(TokenType.Symbol, c.ToString())); } private void ConsumeQuotedToken(char c) { bool escaped = false; int start = pos; pos++; while (pos < fullSql.Length) { char x = fullSql[pos]; if (x == c && !escaped) break; if (escaped) escaped = false; else if (x == '\\') escaped = true; pos++; } pos++; if (c == '\'') tokens.Add(new Token(TokenType.String, "?")); else tokens.Add(new Token(TokenType.Identifier, fullSql.Substring(start, pos - start))); } private void ConsumeUnquotedToken() { int startPos = pos; while (pos < fullSql.Length && !IsSpecialCharacter(fullSql[pos])) pos++; string word = fullSql.Substring(startPos, pos - startPos); double v; if (Double.TryParse(word, out v)) tokens.Add(new Token(TokenType.Number, "?")); else { Token t = new Token(TokenType.Identifier, word); if (IsKeyword(word)) { t.Type = TokenType.Keyword; t.Text = t.Text.ToUpperInvariant(); } tokens.Add(t); } } private void ConsumeWhitespace() { tokens.Add(new Token(TokenType.Whitespace, " ")); while (pos < fullSql.Length && Char.IsWhiteSpace(fullSql[pos])) pos++; } private bool IsKeyword(string word) { return keywords.Contains(word.ToUpperInvariant()); } } internal class Token { public TokenType Type; public string Text; public bool Output; public Token(TokenType type, string text) { Type = type; Text = text; Output = true; } public bool IsRealToken { get { return Type != TokenType.Comment && Type != TokenType.CommandComment && Type != TokenType.Whitespace && Output; } } } internal enum TokenType { Keyword, String, Number, Symbol, Identifier, Comment, CommandComment, Whitespace } }