﻿/***************************************************************************

Copyright (c) Microsoft Corporation 2012-2015.

This code is licensed using the Microsoft Public License (Ms-PL).  The text of the license can be found here:

http://www.microsoft.com/resources/sharedsource/licensingbasics/publiclicense.mspx

Published at http://OpenXmlDeveloper.org
Resource Center and Documentation: http://openxmldeveloper.org/wiki/w/wiki/powertools-for-open-xml.aspx

Developer: Eric White
Blog: http://www.ericwhite.com
Twitter: @EricWhiteDev
Email: eric@ericwhite.com

***************************************************************************/

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using System.Xml.Linq;

namespace OpenXmlPowerTools
{
    public class OpenXmlRegex
    {
        private const string DontConsolidate = "DontConsolidate";

        private static readonly HashSet<XName> RevTrackMarkupWithId = new HashSet<XName>
        {
            W.cellDel,
            W.cellIns,
            W.cellMerge,
            W.customXmlDelRangeEnd,
            W.customXmlDelRangeStart,
            W.customXmlInsRangeEnd,
            W.customXmlInsRangeStart,
            W.customXmlMoveFromRangeEnd,
            W.customXmlMoveFromRangeStart,
            W.customXmlMoveToRangeEnd,
            W.customXmlMoveToRangeStart,
            W.del,
            W.ins,
            W.moveFrom,
            W.moveFromRangeEnd,
            W.moveFromRangeStart,
            W.moveTo,
            W.moveToRangeEnd,
            W.moveToRangeStart,
            W.pPrChange,
            W.rPrChange,
            W.sectPrChange,
            W.tblGridChange,
            W.tblPrChange,
            W.tblPrExChange,
            W.tcPrChange
        };

        public static int Match(IEnumerable<XElement> content, Regex regex)
        {
            return ReplaceInternal(content, regex, null, null, false, null, true);
        }

        /// <summary>
        /// If callback == null Then returns count of matches in the content
        /// If callback != null Then Match calls Found for each match
        /// </summary>
        public static int Match(IEnumerable<XElement> content, Regex regex, Action<XElement, Match> found)
        {
            return ReplaceInternal(content, regex, null,
                (x, m) =>
                {
                    if (found != null) found.Invoke(x, m);
                    return true;
                },
                false, null, true);
        }

        /// <summary>
        /// If replacement == "new content" && callback == null
        ///     Then replaces all matches
        /// If replacement == "" && callback == null)
        ///     Then deletes all matches
        /// If replacement == "new content" && callback != null)
        ///     Then the callback can return true / false to indicate whether to replace or not
        /// If the callback returns true once, and false on all subsequent calls, then this method replaces only the first found.
        /// If replacement == "" && callback != null)
        ///     Then the callback can return true / false to indicate whether to delete or not
        /// </summary>
        public static int Replace(IEnumerable<XElement> content, Regex regex, string replacement,
            Func<XElement, Match, bool> doReplacement)
        {
            return ReplaceInternal(content, regex, replacement, doReplacement, false, null, true);
        }

        /// <summary>
        /// This overload enables not coalescing content, which is necessary for DocumentAssembler.
        /// </summary>
        public static int Replace(IEnumerable<XElement> content, Regex regex, string replacement,
            Func<XElement, Match, bool> doReplacement, bool coalesceContent)
        {
            return ReplaceInternal(content, regex, replacement, doReplacement, false, null, coalesceContent);
        }

        /// <summary>
        /// If replacement == "new content" && callback == null
        ///     Then replaces all matches
        /// If replacement == "" && callback == null)
        ///     Then deletes all matches
        /// If replacement == "new content" && callback != null)
        ///     Then the callback can return true / false to indicate whether to replace or not
        /// If the callback returns true once, and false on all subsequent calls, then this method replaces only the first found.
        /// If replacement == "" && callback != null)
        ///     Then the callback can return true / false to indicate whether to delete or not
        /// If trackRevisions == true
        ///     Then replacement is done using revision tracking markup, with author as the revision tracking author
        /// If trackRevisions == true for a PPTX
        ///     Then code throws an exception
        /// </summary>
        public static int Replace(IEnumerable<XElement> content, Regex regex, string replacement,
            Func<XElement, Match, bool> doReplacement, bool trackRevisions, string author)
        {
            return ReplaceInternal(content, regex, replacement, doReplacement, trackRevisions, author, true);
        }

        private static int ReplaceInternal(IEnumerable<XElement> content, Regex regex, string replacement,
            Func<XElement, Match, bool> callback, bool trackRevisions, string revisionTrackingAuthor,
            bool coalesceContent)
        {
            if (content == null) throw new ArgumentNullException("content");
            if (regex == null) throw new ArgumentNullException("regex");

            IEnumerable<XElement> contentList = content as IList<XElement> ?? content.ToList();

            XElement first = contentList.FirstOrDefault();
            if (first == null)
                return 0;

            if (first.Name.Namespace == W.w)
            {
                if (!contentList.Any())
                    return 0;

                var replInfo = new ReplaceInternalInfo { Count = 0 };
                foreach (XElement c in contentList)
                {
                    var newC = (XElement) WmlSearchAndReplaceTransform(c, regex, replacement, callback, trackRevisions,
                        revisionTrackingAuthor, replInfo, coalesceContent);
                    c.ReplaceNodes(newC.Nodes());
                }

                XElement root = contentList.First().AncestorsAndSelf().Last();
                int nextId = new[] { 0 }
                                 .Concat(root
                                     .Descendants()
                                     .Where(d => RevTrackMarkupWithId.Contains(d.Name))
                                     .Attributes(W.id)
                                     .Select(a => (int) a))
                                 .Max() + 1;
                IEnumerable<XElement> revTrackingWithoutId = root
                    .DescendantsAndSelf()
                    .Where(d => RevTrackMarkupWithId.Contains(d.Name) && (d.Attribute(W.id) == null));
                foreach (XElement item in revTrackingWithoutId)
                    item.Add(new XAttribute(W.id, nextId++));

                List<IGrouping<int, XElement>> revTrackingWithDuplicateIds = root
                    .DescendantsAndSelf()
                    .Where(d => RevTrackMarkupWithId.Contains(d.Name))
                    .GroupBy(d => (int) d.Attribute(W.id))
                    .Where(g => g.Count() > 1)
                    .ToList();
                foreach (IGrouping<int, XElement> group in revTrackingWithDuplicateIds)
                    foreach (XElement gc in group.Skip(1))
                    {
                        XAttribute xAttribute = gc.Attribute(W.id);
                        if (xAttribute != null) xAttribute.Value = nextId.ToString();
                        nextId++;
                    }

                return replInfo.Count;
            }

            if ((first.Name.Namespace == P.p) || (first.Name.Namespace == A.a))
            {
                if (trackRevisions)
                    throw new OpenXmlPowerToolsException("PPTX does not support revision tracking");

                var counter = new ReplaceInternalInfo { Count = 0 };
                foreach (XElement c in contentList)
                {
                    var newC = (XElement) PmlSearchAndReplaceTransform(c, regex, replacement, callback, counter);
                    c.ReplaceNodes(newC.Nodes());
                }

                return counter.Count;
            }

            return 0;
        }

        private static object WmlSearchAndReplaceTransform(XNode node, Regex regex, string replacement,
            Func<XElement, Match, bool> callback, bool trackRevisions, string revisionTrackingAuthor,
            ReplaceInternalInfo replInfo, bool coalesceContent)
        {
            var element = node as XElement;
            if (element == null) return node;

            if (element.Name == W.p)
            {
                XElement paragraph = element;

                string preliminaryContent = paragraph
                    .DescendantsTrimmed(W.txbxContent)
                    .Where(d => d.Name == W.r && (d.Parent == null || d.Parent.Name != W.del))
                    .Select(UnicodeMapper.RunToString)
                    .StringConcatenate();
                if (regex.IsMatch(preliminaryContent))
                {
                    var paragraphWithSplitRuns = new XElement(W.p,
                        paragraph.Attributes(),
                        paragraph.Nodes().Select(n => WmlSearchAndReplaceTransform(n, regex, replacement, callback,
                            trackRevisions, revisionTrackingAuthor, replInfo, coalesceContent)));

                    IEnumerable<XElement> runsTrimmed = paragraphWithSplitRuns
                        .DescendantsTrimmed(W.txbxContent)
                        .Where(d => d.Name == W.r && (d.Parent == null || d.Parent.Name != W.del));

                    var charsAndRuns = runsTrimmed
                        .Select(r => new { Ch = UnicodeMapper.RunToString(r), r })
                        .ToList();

                    string content = charsAndRuns.Select(t => t.Ch).StringConcatenate();
                    XElement[] alignedRuns = charsAndRuns.Select(t => t.r).ToArray();

                    MatchCollection matchCollection = regex.Matches(content);
                    replInfo.Count += matchCollection.Count;

                    // Process Match
                    if (replacement == null)
                    {
                        if (callback == null) return paragraph;

                        foreach (Match match in matchCollection.Cast<Match>())
                            callback(paragraph, match);

                        return paragraph;
                    }

                    // Process Replace
                    foreach (Match match in matchCollection.Cast<Match>())
                    {
                        if (match.Length == 0) continue;
                        if ((callback != null) && !callback(paragraph, match)) continue;

                        List<XElement> runCollection = alignedRuns
                            .Skip(match.Index)
                            .Take(match.Length)
                            .ToList();

                        // uses the Skip / Take special semantics of array to implement efficient finding of sub array

                        XElement firstRun = runCollection.First();
                        XElement firstRunProperties = firstRun.Elements(W.rPr).FirstOrDefault();

                        // save away first run properties

                        if (trackRevisions)
                        {
                            if (replacement != "")
                            {
                                // We coalesce runs as some methods, e.g., in DocumentAssembler,
                                // will try to find the replacement string even though they
                                // set coalesceContent to false.
                                string newTextValue = match.Result(replacement);
                                List<XElement> newRuns = UnicodeMapper.StringToCoalescedRunList(newTextValue,
                                    firstRunProperties);
                                var newIns = new XElement(W.ins,
                                    new XAttribute(W.author, revisionTrackingAuthor),
                                    new XAttribute(W.date, DateTime.UtcNow.ToString("s") + "Z"),
                                    newRuns);

                                if (firstRun.Parent != null && firstRun.Parent.Name == W.ins)
                                    firstRun.Parent.AddBeforeSelf(newIns);
                                else
                                    firstRun.AddBeforeSelf(newIns);
                            }

                            foreach (XElement run in runCollection)
                            {
                                bool isInIns = run.Parent != null && run.Parent.Name == W.ins;
                                if (isInIns)
                                {
                                    XElement parentIns = run.Parent;
                                    XElement grandParentParagraph = parentIns.Parent;
                                    if (grandParentParagraph != null)
                                    {
                                        if ((string) parentIns.Attributes(W.author).FirstOrDefault() ==
                                            revisionTrackingAuthor)
                                        {
                                            List<XElement> parentInsSiblings = grandParentParagraph
                                                .Elements()
                                                .Where(c => c != parentIns)
                                                .ToList();
                                            grandParentParagraph.ReplaceNodes(parentInsSiblings);
                                        }
                                        else
                                        {
                                            List<XElement> parentInsSiblings = grandParentParagraph
                                                .Elements()
                                                .Select(c => c == parentIns
                                                    ? new XElement(W.ins,
                                                        parentIns.Attributes(),
                                                        new XElement(W.del,
                                                            new XAttribute(W.author, revisionTrackingAuthor),
                                                            new XAttribute(W.date, DateTime.UtcNow.ToString("s") + "Z"),
                                                            parentIns.Elements().Select(TransformToDelText)))
                                                    : c)
                                                .ToList();
                                            grandParentParagraph.ReplaceNodes(parentInsSiblings);
                                        }
                                    }
                                }
                                else
                                {
                                    var delRun = new XElement(W.del,
                                        new XAttribute(W.author, revisionTrackingAuthor),
                                        new XAttribute(W.date, DateTime.UtcNow.ToString("s") + "Z"),
                                        TransformToDelText(run));
                                    run.ReplaceWith(delRun);
                                }
                            }
                        }
                        else // not tracked revisions
                        {
                            foreach (XElement runToDelete in runCollection.Skip(1).ToList())
                                if (runToDelete.Parent != null && runToDelete.Parent.Name == W.ins)
                                    runToDelete.Parent.Remove();
                                else
                                    runToDelete.Remove();

                            // We coalesce runs as some methods, e.g., in DocumentAssembler,
                            // will try to find the replacement string even though they
                            // set coalesceContent to false.
                            string newTextValue = match.Result(replacement);
                            List<XElement> newRuns = UnicodeMapper.StringToCoalescedRunList(newTextValue,
                                firstRunProperties);
                            if (firstRun.Parent != null && firstRun.Parent.Name == W.ins)
                                firstRun.Parent.ReplaceWith(newRuns);
                            else
                                firstRun.ReplaceWith(newRuns);
                        }
                    }

                    return coalesceContent
                        ? WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(paragraphWithSplitRuns)
                        : paragraphWithSplitRuns;
                }

                var newParagraph = new XElement(W.p,
                    paragraph.Attributes(),
                    paragraph.Nodes().Select(n =>
                    {
                        var e = n as XElement;
                        if (e == null) return n;

                        if (e.Name == W.pPr)
                            return e;
                        if (((e.Name == W.r) && e.Elements(W.t).Any()) || e.Elements(W.tab).Any())
                            return e;
                        if ((e.Name == W.ins) && e.Elements(W.r).Elements(W.t).Any())
                            return e;

                        return WmlSearchAndReplaceTransform(e, regex, replacement, callback,
                            trackRevisions, revisionTrackingAuthor, replInfo, coalesceContent);
                    }));
                return coalesceContent
                    ? WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(newParagraph) // CoalesceContent(newParagraph)
                    : newParagraph;
            }

            if (element.Name == W.ins && element.Elements(W.r).Any())
            {
                List<object> collectionOfCollections = element
                    .Elements()
                    .Select(n => WmlSearchAndReplaceTransform(n, regex, replacement, callback, trackRevisions,
                        revisionTrackingAuthor, replInfo, coalesceContent))
                    .ToList();
                List<object> collectionOfIns = collectionOfCollections
                    .Select(c =>
                    {
                        var elements = c as IEnumerable<XElement>;
                        return elements != null
                            ? elements.Select(ixc => new XElement(W.ins, element.Attributes(), ixc))
                            : c;
                    })
                    .ToList();
                return collectionOfIns;
            }

            if (element.Name == W.r)
            {
                return element.Elements()
                    .Where(e => e.Name != W.rPr)
                    .Select(e => e.Name == W.t
                        ? ((string) e).Select(c =>
                            new XElement(W.r,
                                element.Elements(W.rPr),
                                new XElement(W.t, XmlUtil.GetXmlSpaceAttribute(c), c)))
                        : new[] { new XElement(W.r, element.Elements(W.rPr), e) })
                    .SelectMany(t => t);
            }

            return new XElement(element.Name,
                element.Attributes(),
                element.Nodes()
                    .Select(n => WmlSearchAndReplaceTransform(n, regex, replacement, callback, trackRevisions,
                        revisionTrackingAuthor, replInfo, coalesceContent)));
        }

        private static object TransformToDelText(XNode node)
        {
            var element = node as XElement;
            if (element == null) return node;

            if (element.Name == W.t)
                return new XElement(W.delText,
                    XmlUtil.GetXmlSpaceAttribute(element.Value),
                    element.Value);

            return new XElement(element.Name,
                element.Attributes(),
                element.Nodes().Select(TransformToDelText));
        }

        private static object PmlSearchAndReplaceTransform(XNode node, Regex regex, string replacement,
            Func<XElement, Match, bool> callback, ReplaceInternalInfo counter)
        {
            var element = node as XElement;
            if (element == null) return node;

            if (element.Name == A.p)
            {
                XElement paragraph = element;
                string contents = element.Descendants(A.t).Select(t => (string) t).StringConcatenate();
                if (!regex.IsMatch(contents))
                    return new XElement(element.Name, element.Attributes(), element.Nodes());

                var paragraphWithSplitRuns = new XElement(A.p,
                    paragraph.Attributes(),
                    paragraph.Nodes()
                        .Select(n => PmlSearchAndReplaceTransform(n, regex, replacement, callback, counter)));

                List<XElement> runsTrimmed = paragraphWithSplitRuns
                    .Descendants(A.r)
                    .ToList();

                var charsAndRuns = runsTrimmed
                    .Select(r =>
                        r.Element(A.t) != null
                            ? new { Ch = r.Element(A.t).Value, r }
                            : new { Ch = "\x01", r })
                    .ToList();

                string content = charsAndRuns.Select(t => t.Ch).StringConcatenate();
                XElement[] alignedRuns = charsAndRuns.Select(t => t.r).ToArray();

                MatchCollection matchCollection = regex.Matches(content);
                counter.Count += matchCollection.Count;
                if (replacement == null)
                {
                    foreach (Match match in matchCollection.Cast<Match>())
                        callback(paragraph, match);
                }
                else
                {
                    foreach (Match match in matchCollection.Cast<Match>())
                    {
                        if ((callback != null) && !callback(paragraph, match)) continue;

                        List<XElement> runCollection = alignedRuns
                            .Skip(match.Index)
                            .Take(match.Length)
                            .ToList();

                        // uses the Skip / Take special semantics of array to implement efficient finding of sub array

                        XElement firstRun = runCollection.First();

                        // save away first run because we want the run properties

                        runCollection.Skip(1).Remove();

                        // binds to Remove(this IEnumerable<XElement> elements), which is an extension

                        // in LINQ to XML that uses snapshot semantics and removes every element from
                        // its parent.

                        var newFirstRun = new XElement(A.r,
                            firstRun.Element(A.rPr),
                            new XElement(A.t, replacement));

                        // creates a new run with proper run properties

                        firstRun.ReplaceWith(newFirstRun);

                        // finds firstRun in its parent's list of children, unparents firstRun,

                        // sets newFirstRun's parent to firstRuns old parent, and inserts in the list
                        // of children at the right place.
                    }
                    XElement paragraphWithReplacedRuns = paragraphWithSplitRuns;

                    IEnumerable<IGrouping<string, XElement>> groupedAdjacentRunsWithIdenticalFormatting =
                        paragraphWithReplacedRuns
                            .Elements()
                            .GroupAdjacent(ce =>
                            {
                                if (ce.Name != A.r)
                                    return DontConsolidate;
                                if ((ce.Elements().Count(e => e.Name != A.rPr) != 1) || (ce.Element(A.t) == null))
                                    return DontConsolidate;

                                XElement rPr = ce.Element(A.rPr);
                                return rPr == null ? "" : rPr.ToString(SaveOptions.None);
                            });
                    var paragraphWithConsolidatedRuns = new XElement(A.p,
                        groupedAdjacentRunsWithIdenticalFormatting.Select(g =>
                        {
                            if (g.Key == DontConsolidate)
                                return (object) g;

                            string textValue = g.Select(r => r.Element(A.t).Value).StringConcatenate();
                            XAttribute xs = XmlUtil.GetXmlSpaceAttribute(textValue);
                            return new XElement(A.r,
                                g.First().Elements(A.rPr),
                                new XElement(A.t, xs, textValue));
                        }));
                    paragraph = paragraphWithConsolidatedRuns;
                }

                return paragraph;
            }

            if ((element.Name == A.r) && element.Elements(A.t).Any())
            {
                return element.Elements()
                    .Where(e => e.Name != A.rPr)
                    .Select(e =>
                    {
                        if (e.Name == A.t)
                        {
                            var s = (string) e;
                            IEnumerable<XElement> collectionOfSubRuns = s.Select(c => new XElement(A.r,
                                element.Elements(A.rPr),
                                new XElement(A.t, XmlUtil.GetXmlSpaceAttribute(c), c)));
                            return (object) collectionOfSubRuns;
                        }

                        return new XElement(A.r,
                            element.Elements(A.rPr),
                            e);
                    });
            }

            return new XElement(element.Name,
                element.Attributes(),
                element.Nodes().Select(n => PmlSearchAndReplaceTransform(n, regex, replacement, callback, counter)));
        }

        private class ReplaceInternalInfo
        {
            public int Count;
        }
    }
}
