blob: 3a6a006858f37be1933bfd507e4fdcea2fcf7ea0 [file] [log] [blame]
/*******************************************************************************
* You may amend and distribute as you like, but don't remove this header!
*
* EPPlus provides server-side generation of Excel 2007/2010 spreadsheets.
* See http://www.codeplex.com/EPPlus for details.
*
* Copyright (C) 2011 Jan Källman
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* The GNU Lesser General Public License can be viewed at http://www.opensource.org/licenses/lgpl-license.php
* If you unfamiliar with this license or have questions about it, here is an http://www.gnu.org/licenses/gpl-faq.html
*
* All code and executables are provided "as is" with no warranty either express or implied.
* The author accepts no liability for any damage or loss of business that this product may cause.
*
* Code change notes:
*
* Author Change Date
* ******************************************************************************
* Mats Alm Added 2013-03-01 (Prior file history on https://github.com/swmal/ExcelFormulaParser)
*******************************************************************************/
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text.RegularExpressions;
namespace AppsheetEpplus;
public class SourceCodeTokenizer : ISourceCodeTokenizer {
public static ISourceCodeTokenizer Default =>
new SourceCodeTokenizer(FunctionNameProvider.Empty, NameValueProvider.Empty);
public SourceCodeTokenizer(
IFunctionNameProvider functionRepository,
INameValueProvider nameValueProvider)
: this(
new TokenFactory(functionRepository, nameValueProvider),
new TokenSeparatorProvider()) {}
public SourceCodeTokenizer(ITokenFactory tokenFactory, ITokenSeparatorProvider tokenProvider) {
_tokenFactory = tokenFactory;
_tokenProvider = tokenProvider;
}
private readonly ITokenSeparatorProvider _tokenProvider;
private readonly ITokenFactory _tokenFactory;
public IEnumerable<Token> Tokenize(string input) {
return Tokenize(input, null);
}
public IEnumerable<Token> Tokenize(string input, string worksheet) {
if (string.IsNullOrEmpty(input)) {
return [];
}
// MA 1401: Ignore leading plus in formula.
input = input.TrimStart('+');
var context = new TokenizerContext(input);
bool isSingleQuoteString = false;
for (int i = 0; i < context.FormulaChars.Length; i++) {
var c = context.FormulaChars[i];
if (CharIsTokenSeparator(c, out var tokenSeparator)) {
if (context.IsInString) {
if (IsDoubleQuote(tokenSeparator, i, context)) {
i++;
context.AppendToCurrentToken(c);
continue;
}
if (tokenSeparator.TokenType != TokenType.String) {
context.AppendToCurrentToken(c);
continue;
}
// CHANGE 2
if ((isSingleQuoteString && c != '\'') || (!isSingleQuoteString && c != '"')) {
context.AppendToCurrentToken(c);
continue;
}
}
if (tokenSeparator.TokenType == TokenType.OpeningBracket) {
context.AppendToCurrentToken(c);
context.BracketCount++;
continue;
}
if (tokenSeparator.TokenType == TokenType.ClosingBracket) {
context.AppendToCurrentToken(c);
context.BracketCount--;
continue;
}
if (context.BracketCount > 0) {
context.AppendToCurrentToken(c);
continue;
}
// two operators in sequence could be "<=" or ">="
if (IsPartOfMultipleCharSeparator(context, c)) {
var sOp = context.LastToken.Value + c.ToString(CultureInfo.InvariantCulture);
var op = _tokenProvider.Tokens[sOp];
context.ReplaceLastToken(op);
context.NewToken();
continue;
}
if (tokenSeparator.TokenType == TokenType.String) {
// CHANGE3 :
isSingleQuoteString = (c == '\'');
if (context.LastToken != null
&& context.LastToken.TokenType == TokenType.OpeningEnumerable) {
// context.AppendToCurrentToken(c); // Praveen's change of 10/28/2015
context.ToggleIsInString();
continue;
}
if (context.LastToken != null && context.LastToken.TokenType == TokenType.String) {
context.AddToken(
!context.CurrentTokenHasValue
? new(string.Empty, TokenType.StringContent)
: new Token(context.CurrentToken, TokenType.StringContent));
}
context.AddToken(new("\"", TokenType.String));
context.ToggleIsInString();
context.NewToken();
continue;
}
if (context.CurrentTokenHasValue) {
if (Regex.IsMatch(context.CurrentToken, "^\"*$")) {
context.AddToken(_tokenFactory.Create(context.CurrentToken, TokenType.StringContent));
} else {
context.AddToken(CreateToken(context, worksheet));
}
//If the a next token is an opening parantheses and the previous token is interpeted as an address or name, then the currenct token is a function
if (tokenSeparator.TokenType == TokenType.OpeningParenthesis
&& (context.LastToken.TokenType == TokenType.ExcelAddress
|| context.LastToken.TokenType == TokenType.NameValue)) {
context.LastToken.TokenType = TokenType.Function;
}
}
if (tokenSeparator.Value == "-") {
if (TokenIsNegator(context)) {
context.AddToken(new("-", TokenType.Negator));
continue;
}
}
context.AddToken(tokenSeparator);
context.NewToken();
continue;
}
context.AppendToCurrentToken(c);
}
if (context.CurrentTokenHasValue) {
context.AddToken(CreateToken(context, worksheet));
}
CleanupTokens(context, _tokenProvider.Tokens);
return context.Result;
}
private static bool IsDoubleQuote(
Token tokenSeparator,
int formulaCharIndex,
TokenizerContext context) {
return tokenSeparator.TokenType == TokenType.String
&& formulaCharIndex + 1 < context.FormulaChars.Length
&& context.FormulaChars[formulaCharIndex + 1] == '\"';
}
private static void CleanupTokens(TokenizerContext context, IDictionary<string, Token> tokens) {
for (int i = 0; i < context.Result.Count; i++) {
var token = context.Result[i];
if (token.TokenType == TokenType.Unrecognized) {
if (i < context.Result.Count - 1) {
if (context.Result[i + 1].TokenType == TokenType.OpeningParenthesis) {
token.TokenType = TokenType.Function;
} else {
token.TokenType = TokenType.NameValue;
}
} else {
token.TokenType = TokenType.NameValue;
}
} else if (token.TokenType == TokenType.Function) {
if (i < context.Result.Count - 1) {
if (context.Result[i + 1].TokenType == TokenType.OpeningParenthesis) {
token.TokenType = TokenType.Function;
} else {
token.TokenType = TokenType.Unrecognized;
}
} else {
token.TokenType = TokenType.Unrecognized;
}
} else if ((token.TokenType == TokenType.Operator || token.TokenType == TokenType.Negator)
&& i < context.Result.Count - 1
&& (token.Value == "+" || token.Value == "-")) {
if (i > 0
&& token.Value
== "+") //Remove any + with an opening parenthesis before.
{
if (context.Result[i - 1].TokenType == TokenType.OpeningParenthesis) {
context.Result.RemoveAt(i);
SetNegatorOperator(context, i, tokens);
i--;
continue;
}
}
var nextToken = context.Result[i + 1];
if (nextToken.TokenType == TokenType.Operator || nextToken.TokenType == TokenType.Negator) {
if (token.Value == "+" && (nextToken.Value == "+" || nextToken.Value == "-")) {
//Remove first
context.Result.RemoveAt(i);
SetNegatorOperator(context, i, tokens);
i--;
} else if (token.Value == "-" && nextToken.Value == "+") {
//Remove second
context.Result.RemoveAt(i + 1);
SetNegatorOperator(context, i, tokens);
i--;
} else if (token.Value == "-" && nextToken.Value == "-") {
//Remove first and set operator to +
context.Result.RemoveAt(i);
if (i == 0) {
context.Result.RemoveAt(i + 1);
i += 2;
} else {
//context.Result[i].TokenType = TokenType.Operator;
//context.Result[i].Value = "+";
context.Result[i] = tokens["+"];
SetNegatorOperator(context, i, tokens);
i--;
}
}
}
}
}
}
private static void SetNegatorOperator(
TokenizerContext context,
int i,
IDictionary<string, Token> tokens) {
if (context.Result[i].Value == "-"
&& i > 0
&& (context.Result[i].TokenType == TokenType.Operator
|| context.Result[i].TokenType == TokenType.Negator)) {
if (TokenIsNegator(context.Result[i - 1])) {
context.Result[i] = new("-", TokenType.Negator);
} else {
context.Result[i] = tokens["-"];
}
}
}
private static bool TokenIsNegator(TokenizerContext context) {
return TokenIsNegator(context.LastToken);
}
private static bool TokenIsNegator(Token t) {
return t == null
|| t.TokenType == TokenType.Operator
|| t.TokenType == TokenType.OpeningParenthesis
|| t.TokenType == TokenType.Comma
|| t.TokenType == TokenType.SemiColon
|| t.TokenType == TokenType.OpeningEnumerable;
}
private bool IsPartOfMultipleCharSeparator(TokenizerContext context, char c) {
var lastToken = context.LastToken != null ? context.LastToken.Value : string.Empty;
return _tokenProvider.IsOperator(lastToken)
&& _tokenProvider.IsPossibleLastPartOfMultipleCharOperator(
c.ToString(CultureInfo.InvariantCulture))
&& !context.CurrentTokenHasValue;
}
private Token CreateToken(TokenizerContext context, string worksheet) {
if (context.CurrentToken == "-") {
if (context.LastToken == null && context.LastToken.TokenType == TokenType.Operator) {
return new("-", TokenType.Negator);
}
}
return _tokenFactory.Create(context.Result, context.CurrentToken, worksheet);
}
private bool CharIsTokenSeparator(char c, out Token token) {
var result = _tokenProvider.Tokens.ContainsKey(c.ToString());
token = result ? token = _tokenProvider.Tokens[c.ToString()] : null;
return result;
}
}