blob: 60b9e9ce11c27116ca4b706097acdea23fdf5a3c [file] [log] [blame]
/*******************************************************************************
* You may amend and distribute as you like, but don't remove this header!
*
* EPPlus provides server-side generation of Excel 2007/2010 spreadsheets.
* See http://www.codeplex.com/EPPlus for details.
*
* Copyright (C) 2011 Jan Källman
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* The GNU Lesser General Public License can be viewed at http://www.opensource.org/licenses/lgpl-license.php
* If you unfamiliar with this license or have questions about it, here is an http://www.gnu.org/licenses/gpl-faq.html
*
* All code and executables are provided "as is" with no warranty either express or implied.
* The author accepts no liability for any damage or loss of business that this product may cause.
*
* Code change notes:
*
* Author Change Date
* ******************************************************************************
* Mats Alm Added 2013-03-01 (Prior file history on https://github.com/swmal/ExcelFormulaParser)
*******************************************************************************/
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using OfficeOpenXml.FormulaParsing.Excel.Functions;
namespace OfficeOpenXml.FormulaParsing.LexicalAnalysis
{
public class SourceCodeTokenizer : ISourceCodeTokenizer
{
public static ISourceCodeTokenizer Default
{
get { return new SourceCodeTokenizer(FunctionNameProvider.Empty, NameValueProvider.Empty); }
}
public SourceCodeTokenizer(IFunctionNameProvider functionRepository, INameValueProvider nameValueProvider)
: this(new TokenFactory(functionRepository, nameValueProvider), new TokenSeparatorProvider())
{
}
public SourceCodeTokenizer(ITokenFactory tokenFactory, ITokenSeparatorProvider tokenProvider)
{
_tokenFactory = tokenFactory;
_tokenProvider = tokenProvider;
}
private readonly ITokenSeparatorProvider _tokenProvider;
private readonly ITokenFactory _tokenFactory;
public IEnumerable<Token> Tokenize(string input)
{
return Tokenize(input, null);
}
public IEnumerable<Token> Tokenize(string input, string worksheet)
{
if (string.IsNullOrEmpty(input))
{
return Enumerable.Empty<Token>();
}
// MA 1401: Ignore leading plus in formula.
input = input.TrimStart('+');
var context = new TokenizerContext(input);
for (int i = 0; i<context.FormulaChars.Length;i++)
{
var c = context.FormulaChars[i];
Token tokenSeparator;
if (CharIsTokenSeparator(c, out tokenSeparator))
{
if (context.IsInString)
{
if (IsDoubleQuote(tokenSeparator, i, context))
{
i ++;
context.AppendToCurrentToken(c);
continue;
}
if(tokenSeparator.TokenType != TokenType.String)
{
context.AppendToCurrentToken(c);
continue;
}
}
if (tokenSeparator.TokenType == TokenType.OpeningBracket)
{
context.AppendToCurrentToken(c);
context.BracketCount++;
continue;
}
if (tokenSeparator.TokenType == TokenType.ClosingBracket)
{
context.AppendToCurrentToken(c);
context.BracketCount--;
continue;
}
if (context.BracketCount > 0)
{
context.AppendToCurrentToken(c);
continue;
}
// two operators in sequence could be "<=" or ">="
if (IsPartOfMultipleCharSeparator(context, c))
{
var sOp = context.LastToken.Value + c.ToString(CultureInfo.InvariantCulture);
var op = _tokenProvider.Tokens[sOp];
context.ReplaceLastToken(op);
context.NewToken();
continue;
}
if (tokenSeparator.TokenType == TokenType.String)
{
if (context.LastToken != null && context.LastToken.TokenType == TokenType.OpeningEnumerable)
{
context.AppendToCurrentToken(c);
context.ToggleIsInString();
continue;
}
if (context.LastToken != null && context.LastToken.TokenType == TokenType.String)
{
context.AddToken(!context.CurrentTokenHasValue
? new Token(string.Empty, TokenType.StringContent)
: new Token(context.CurrentToken, TokenType.StringContent));
}
context.AddToken(new Token("\"", TokenType.String));
context.ToggleIsInString();
context.NewToken();
continue;
}
if (context.CurrentTokenHasValue)
{
if (Regex.IsMatch(context.CurrentToken, "^\"*$"))
{
context.AddToken(_tokenFactory.Create(context.CurrentToken, TokenType.StringContent));
}
else
{
context.AddToken(CreateToken(context, worksheet));
}
//If the a next token is an opening parantheses and the previous token is interpeted as an address or name, then the currenct token is a function
if(tokenSeparator.TokenType==TokenType.OpeningParenthesis && (context.LastToken.TokenType==TokenType.ExcelAddress || context.LastToken.TokenType==TokenType.NameValue))
{
context.LastToken.TokenType=TokenType.Function;
}
}
if (tokenSeparator.Value == "-")
{
if (TokenIsNegator(context))
{
context.AddToken(new Token("-", TokenType.Negator));
continue;
}
}
context.AddToken(tokenSeparator);
context.NewToken();
continue;
}
context.AppendToCurrentToken(c);
}
if (context.CurrentTokenHasValue)
{
context.AddToken(CreateToken(context, worksheet));
}
CleanupTokens(context, _tokenProvider.Tokens);
return context.Result;
}
private static bool IsDoubleQuote(Token tokenSeparator, int formulaCharIndex, TokenizerContext context)
{
return tokenSeparator.TokenType == TokenType.String && formulaCharIndex + 1 < context.FormulaChars.Length && context.FormulaChars[formulaCharIndex + 1] == '\"';
}
private static void CleanupTokens(TokenizerContext context, IDictionary<string, Token> tokens)
{
for (int i = 0; i < context.Result.Count; i++)
{
var token=context.Result[i];
if (token.TokenType == TokenType.Unrecognized)
{
if (i < context.Result.Count - 1)
{
if (context.Result[i+1].TokenType == TokenType.OpeningParenthesis)
{
token.TokenType = TokenType.Function;
}
else
{
token.TokenType = TokenType.NameValue;
}
}
else
{
token.TokenType = TokenType.NameValue;
}
}
else if ((token.TokenType == TokenType.Operator || token.TokenType == TokenType.Negator) && i < context.Result.Count - 1 &&
(token.Value=="+" || token.Value=="-"))
{
if (i > 0 && token.Value == "+") //Remove any + with an opening parenthesis before.
{
if (context.Result[i - 1].TokenType == TokenType.OpeningParenthesis)
{
context.Result.RemoveAt(i);
SetNegatorOperator(context, i, tokens);
i--;
continue;
}
}
var nextToken = context.Result[i + 1];
if (nextToken.TokenType == TokenType.Operator || nextToken.TokenType == TokenType.Negator)
{
if (token.Value == "+" && (nextToken.Value=="+" || nextToken.Value == "-"))
{
//Remove first
context.Result.RemoveAt(i);
SetNegatorOperator(context, i, tokens);
i--;
}
else if (token.Value == "-" && nextToken.Value == "+")
{
//Remove second
context.Result.RemoveAt(i+1);
SetNegatorOperator(context, i, tokens);
i--;
}
else if (token.Value == "-" && nextToken.Value == "-")
{
//Remove first and set operator to +
context.Result.RemoveAt(i);
if (i == 0)
{
context.Result.RemoveAt(i+1);
i += 2;
}
else
{
//context.Result[i].TokenType = TokenType.Operator;
//context.Result[i].Value = "+";
context.Result[i] = tokens["+"];
SetNegatorOperator(context, i, tokens);
i--;
}
}
}
}
}
}
private static void SetNegatorOperator(TokenizerContext context, int i, IDictionary<string, Token> tokens)
{
if (context.Result[i].Value == "-" && i > 0 && (context.Result[i].TokenType == TokenType.Operator || context.Result[i].TokenType == TokenType.Negator))
{
if (TokenIsNegator(context.Result[i - 1]))
{
context.Result[i] = new Token("-", TokenType.Negator);
}
else
{
context.Result[i] = tokens["-"];
}
}
}
private static bool TokenIsNegator(TokenizerContext context)
{
return TokenIsNegator(context.LastToken);
}
private static bool TokenIsNegator(Token t)
{
return t == null
||
t.TokenType == TokenType.Operator
||
t.TokenType == TokenType.OpeningParenthesis
||
t.TokenType == TokenType.Comma
||
t.TokenType == TokenType.SemiColon
||
t.TokenType == TokenType.OpeningEnumerable;
}
private bool IsPartOfMultipleCharSeparator(TokenizerContext context, char c)
{
var lastToken = context.LastToken != null ? context.LastToken.Value : string.Empty;
return _tokenProvider.IsOperator(lastToken)
&& _tokenProvider.IsPossibleLastPartOfMultipleCharOperator(c.ToString(CultureInfo.InvariantCulture))
&& !context.CurrentTokenHasValue;
}
private Token CreateToken(TokenizerContext context, string worksheet)
{
if (context.CurrentToken == "-")
{
if (context.LastToken == null && context.LastToken.TokenType == TokenType.Operator)
{
return new Token("-", TokenType.Negator);
}
}
return _tokenFactory.Create(context.Result, context.CurrentToken, worksheet);
}
private bool CharIsTokenSeparator(char c, out Token token)
{
var result = _tokenProvider.Tokens.ContainsKey(c.ToString());
token = result ? token = _tokenProvider.Tokens[c.ToString()] : null;
return result;
}
}
}