blob: 08c1c37b3a181ad2ad04487cd490a49fa278b132 [file] [log] [blame]
/*******************************************************************************
* You may amend and distribute as you like, but don't remove this header!
*
* EPPlus provides server-side generation of Excel 2007/2010 spreadsheets.
* See http://www.codeplex.com/EPPlus for details.
*
* Copyright (C) 2011 Jan Källman
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* The GNU Lesser General Public License can be viewed at http://www.opensource.org/licenses/lgpl-license.php
* If you unfamiliar with this license or have questions about it, here is an http://www.gnu.org/licenses/gpl-faq.html
*
* All code and executables are provided "as is" with no warranty either express or implied.
* The author accepts no liability for any damage or loss of business that this product may cause.
*
* Code change notes:
*
* Author Change Date
* ******************************************************************************
* Mats Alm Added 2013-03-01 (Prior file history on https://github.com/swmal/ExcelFormulaParser)
*******************************************************************************/
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Text.RegularExpressions;
namespace EpplusFormulaParser;
internal static partial class SourceCodeTokenizer {
public static List<Token> Tokenize(string input) {
if (string.IsNullOrEmpty(input)) {
return [];
}
// MA 1401: Ignore leading plus in formula.
input = input.TrimStart('+');
var context = new TokenizerContext(input);
var isSingleQuoteString = false;
for (var i = 0; i < context.Formula.Length; i++) {
var c = context.Formula[i];
if (CharIsTokenSeparator(c, out var tokenSeparator)) {
if (context.IsInString) {
if (IsDoubleQuote(tokenSeparator, i, context)) {
i++;
context.AppendToCurrentToken(c);
continue;
}
if (tokenSeparator.TokenType != TokenType.String) {
context.AppendToCurrentToken(c);
continue;
}
// CHANGE 2
if ((isSingleQuoteString && c != '\'') || (!isSingleQuoteString && c != '"')) {
context.AppendToCurrentToken(c);
continue;
}
}
if (tokenSeparator.TokenType == TokenType.OpeningBracket) {
context.AppendToCurrentToken(c);
context.BracketCount++;
continue;
}
if (tokenSeparator.TokenType == TokenType.ClosingBracket) {
context.AppendToCurrentToken(c);
context.BracketCount--;
continue;
}
if (context.BracketCount > 0) {
context.AppendToCurrentToken(c);
continue;
}
// two operators in sequence could be "<=" or ">="
if (IsPartOfMultipleCharSeparator(context, c)) {
var sOp = context.LastToken!.Value + c.ToString(CultureInfo.InvariantCulture);
var op = TokenSeparatorProvider.Tokens[sOp];
context.ReplaceLastToken(op);
context.NewToken();
continue;
}
if (tokenSeparator.TokenType == TokenType.String) {
// CHANGE3 :
isSingleQuoteString = c == '\'';
if (context.LastToken is { TokenType: TokenType.OpeningEnumerable }) {
// context.AppendToCurrentToken(c); // Praveen's change of 10/28/2015
context.ToggleIsInString();
continue;
}
if (context.LastToken is { TokenType: TokenType.String }) {
context.AddToken(
!context.CurrentTokenHasValue
? new(string.Empty, TokenType.StringContent)
: new Token(context.CurrentToken, TokenType.StringContent));
}
context.AddToken(new("\"", TokenType.String));
context.ToggleIsInString();
context.NewToken();
continue;
}
if (context.CurrentTokenHasValue) {
if (StringRegex().IsMatch(context.CurrentToken)) {
context.AddToken(new(context.CurrentToken, TokenType.StringContent));
} else {
context.AddToken(CreateToken(context));
}
//If the next token is an opening parantheses and the previous token is interpeted as an address or name, then the current token is a function
if (tokenSeparator.TokenType == TokenType.OpeningParenthesis
&& context.LastToken?.TokenType is TokenType.ExcelAddress or TokenType.NameValue) {
context.LastToken.TokenType = TokenType.Function;
}
}
if (tokenSeparator.Value == "-") {
if (TokenIsNegator(context)) {
context.AddToken(new("-", TokenType.Negator));
continue;
}
}
context.AddToken(tokenSeparator);
context.NewToken();
continue;
}
context.AppendToCurrentToken(c);
}
if (context.CurrentTokenHasValue) {
context.AddToken(CreateToken(context));
}
CleanupTokens(context);
return context.Result;
}
private static bool IsDoubleQuote(
Token tokenSeparator,
int formulaCharIndex,
TokenizerContext context) {
return tokenSeparator.TokenType == TokenType.String
&& formulaCharIndex + 1 < context.Formula.Length
&& context.Formula[formulaCharIndex + 1] == '\"';
}
private static void CleanupTokens(TokenizerContext context) {
for (var i = 0; i < context.Result.Count; i++) {
var token = context.Result[i];
if (token.TokenType == TokenType.Unrecognized) {
if (i < context.Result.Count - 1) {
if (context.Result[i + 1].TokenType == TokenType.OpeningParenthesis) {
token.TokenType = TokenType.Function;
} else {
token.TokenType = TokenType.NameValue;
}
} else {
token.TokenType = TokenType.NameValue;
}
} else if (token.TokenType == TokenType.Function) {
if (i < context.Result.Count - 1) {
if (context.Result[i + 1].TokenType == TokenType.OpeningParenthesis) {
token.TokenType = TokenType.Function;
} else {
token.TokenType = TokenType.Unrecognized;
}
} else {
token.TokenType = TokenType.Unrecognized;
}
} else if ((token.TokenType == TokenType.Operator || token.TokenType == TokenType.Negator)
&& i < context.Result.Count - 1
&& (token.Value == "+" || token.Value == "-")) {
if (i > 0
&& token.Value
== "+") //Remove any + with an opening parenthesis before.
{
if (context.Result[i - 1].TokenType == TokenType.OpeningParenthesis) {
context.Result.RemoveAt(i);
SetNegatorOperator(context, i);
i--;
continue;
}
}
var nextToken = context.Result[i + 1];
if (nextToken.TokenType == TokenType.Operator || nextToken.TokenType == TokenType.Negator) {
if (token.Value == "+" && (nextToken.Value == "+" || nextToken.Value == "-")) {
//Remove first
context.Result.RemoveAt(i);
SetNegatorOperator(context, i);
i--;
} else if (token.Value == "-" && nextToken.Value == "+") {
//Remove second
context.Result.RemoveAt(i + 1);
SetNegatorOperator(context, i);
i--;
} else if (token.Value == "-" && nextToken.Value == "-") {
//Remove first and set operator to +
context.Result.RemoveAt(i);
if (i == 0) {
context.Result.RemoveAt(i + 1);
i += 2;
} else {
//context.Result[i].TokenType = TokenType.Operator;
//context.Result[i].Value = "+";
context.Result[i] = TokenSeparatorProvider.Tokens["+"];
SetNegatorOperator(context, i);
i--;
}
}
}
}
}
}
private static void SetNegatorOperator(TokenizerContext context, int i) {
if (context.Result[i].Value == "-"
&& i > 0
&& (context.Result[i].TokenType == TokenType.Operator
|| context.Result[i].TokenType == TokenType.Negator)) {
if (TokenIsNegator(context.Result[i - 1])) {
context.Result[i] = new("-", TokenType.Negator);
} else {
context.Result[i] = TokenSeparatorProvider.Tokens["-"];
}
}
}
private static bool TokenIsNegator(TokenizerContext context) {
return TokenIsNegator(context.LastToken);
}
private static bool TokenIsNegator(Token? t) {
return t == null
|| t.TokenType == TokenType.Operator
|| t.TokenType == TokenType.OpeningParenthesis
|| t.TokenType == TokenType.Comma
|| t.TokenType == TokenType.SemiColon
|| t.TokenType == TokenType.OpeningEnumerable;
}
private static bool IsPartOfMultipleCharSeparator(TokenizerContext context, char c) {
var lastToken = context.LastToken != null ? context.LastToken.Value : string.Empty;
return TokenSeparatorProvider.IsOperator(lastToken)
&& TokenSeparatorProvider.IsPossibleLastPartOfMultipleCharOperator(
c.ToString(CultureInfo.InvariantCulture))
&& !context.CurrentTokenHasValue;
}
private static Token CreateToken(TokenizerContext context) {
if (context is { CurrentToken: "-", LastToken: null }) {
throw new NullReferenceException();
}
return TokenFactory.Create(context.Result, context.CurrentToken);
}
private static bool CharIsTokenSeparator(char c, [NotNullWhen(true)] out Token? token) {
return TokenSeparatorProvider.Tokens.TryGetValue(c.ToString(), out token);
}
[GeneratedRegex("^\"*$")]
private static partial Regex StringRegex();
}