﻿/***************************************************************************

Copyright (c) Microsoft Corporation 2012-2015.

This code is licensed using the Microsoft Public License (Ms-PL).  The text of the license can be found here:

http://www.microsoft.com/resources/sharedsource/licensingbasics/publiclicense.mspx

Published at http://OpenXmlDeveloper.org
Resource Center and Documentation: http://openxmldeveloper.org/wiki/w/wiki/powertools-for-open-xml.aspx

Developer: Eric White
Blog: http://www.ericwhite.com
Twitter: @EricWhiteDev
Email: eric@ericwhite.com

***************************************************************************/

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;

namespace OpenXmlPowerTools
{
    public partial class WmlDocument : OpenXmlPowerToolsDocument
    {
        public WmlDocument SearchAndReplace(string search, string replace, bool matchCase)
        {
            return TextReplacer.SearchAndReplace(this, search, replace, matchCase);
        }
    }

    public partial class PmlDocument : OpenXmlPowerToolsDocument
    {
        public PmlDocument SearchAndReplace(string search, string replace, bool matchCase)
        {
            return TextReplacer.SearchAndReplace(this, search, replace, matchCase);
        }
    }

    public class TextReplacer
    {
        private class MatchSemaphore
        {
            public int MatchId;
            public MatchSemaphore(int matchId)
            {
                MatchId = matchId;
            }
        }

        private static XObject CloneWithAnnotation(XNode node)
        {
            XElement element = node as XElement;
            if (element != null)
            {
                XElement newElement = new XElement(element.Name,
                    element.Attributes(),
                    element.Nodes().Select(n => CloneWithAnnotation(n)));
                if (element.Annotation<MatchSemaphore>() != null)
                    newElement.AddAnnotation(element.Annotation<MatchSemaphore>());
            }
            return node;
        }

        private static object WmlSearchAndReplaceTransform(XNode node,
            string search, string replace, bool matchCase)
        {
            XElement element = node as XElement;
            if (element != null)
            {
                if (element.Name == W.p)
                {
                    string contents = element.Descendants(W.t).Select(t => (string)t).StringConcatenate();
                    if (contents.Contains(search) ||
                        (!matchCase && contents.ToUpper().Contains(search.ToUpper())))
                    {
                        XElement paragraphWithSplitRuns = new XElement(W.p,
                            element.Attributes(),
                            element.Nodes().Select(n => WmlSearchAndReplaceTransform(n, search,
                                replace, matchCase)));
                        XElement[] subRunArray = paragraphWithSplitRuns
                            .Elements(W.r)
                            .Where(e => {
                                XElement subRunElement = e.Elements().FirstOrDefault(el => el.Name != W.rPr);
                                if (subRunElement == null)
                                    return false;
                                return W.SubRunLevelContent.Contains(subRunElement.Name);
                            })
                            .ToArray();
                        int paragraphChildrenCount = subRunArray.Length;
                        int matchId = 1;
                        foreach (var pc in subRunArray
                            .Take(paragraphChildrenCount - (search.Length - 1))
                            .Select((c, i) => new { Child = c, Index = i, }))
                        {
                            var subSequence = subRunArray.SequenceAt(pc.Index).Take(search.Length);
                            var zipped = subSequence.PtZip(search, (pcp, c) => new
                            {
                                ParagraphChildProjection = pcp,
                                CharacterToCompare = c,
                            });
                            bool dontMatch = zipped.Any(z => {
                                if (z.ParagraphChildProjection.Annotation<MatchSemaphore>() != null)
                                    return true;
                                bool b;
                                if (matchCase)
                                    b = z.ParagraphChildProjection.Value != z.CharacterToCompare.ToString();
                                else
                                    b = z.ParagraphChildProjection.Value.ToUpper() != z.CharacterToCompare.ToString().ToUpper();
                                return b;
                            });
                            bool match = !dontMatch;
                            if (match)
                            {
                                foreach (var item in subSequence)
                                    item.AddAnnotation(new MatchSemaphore(matchId));
                                ++matchId;
                            }
                        }

                        // The following code is locally impure, as this is the most expressive way to write it.
                        XElement paragraphWithReplacedRuns = (XElement)CloneWithAnnotation(paragraphWithSplitRuns);
                        for (int id = 1; id < matchId; ++id)
                        {
                            List<XElement> elementsToReplace = paragraphWithReplacedRuns
                                .Elements()
                                .Where(e => {
                                    var sem = e.Annotation<MatchSemaphore>();
                                    if (sem == null)
                                        return false;
                                    return sem.MatchId == id;
                                })
                                .ToList();
                            elementsToReplace.First().AddBeforeSelf(
                                new XElement(W.r,
                                    elementsToReplace.First().Elements(W.rPr),
                                    new XElement(W.t, replace)));
                            elementsToReplace.Remove();
                        }
                        var groupedAdjacentRunsWithIdenticalFormatting =
                            paragraphWithReplacedRuns
                            .Elements()
                            .GroupAdjacent(ce =>
                            {
                                if (ce.Name != W.r)
                                    return "DontConsolidate";
                                if (ce.Elements().Where(e => e.Name != W.rPr).Count() != 1 ||
                                    ce.Element(W.t) == null)
                                    return "DontConsolidate";
                                if (ce.Element(W.rPr) == null)
                                    return "";
                                return ce.Element(W.rPr).ToString(SaveOptions.None);
                            });
                        XElement paragraphWithConsolidatedRuns = new XElement(W.p,
                            groupedAdjacentRunsWithIdenticalFormatting.Select(g =>
                                {
                                    if (g.Key == "DontConsolidate")
                                        return (object)g;
                                    string textValue = g.Select(r => r.Element(W.t).Value).StringConcatenate();
                                    XAttribute xs = null;
                                    if (textValue[0] == ' ' || textValue[textValue.Length - 1] == ' ')
                                        xs = new XAttribute(XNamespace.Xml + "space", "preserve");
                                    return new XElement(W.r,
                                        g.First().Elements(W.rPr),
                                        new XElement(W.t, xs, textValue));
                                }));
                        return paragraphWithConsolidatedRuns;
                    }
                    return element;
                }
                if (element.Name == W.r && element.Elements(W.t).Any())
                {
                    var collectionOfRuns = element.Elements()
                        .Where(e => e.Name != W.rPr)
                        .Select(e =>
                            {
                                if (e.Name == W.t)
                                {
                                    string s = (string)e;
                                    IEnumerable<XElement> collectionOfSubRuns = s.Select(c =>
                                    {
                                        XElement newRun = new XElement(W.r,
                                            element.Elements(W.rPr),
                                            new XElement(W.t,
                                                c == ' ' ?
                                                new XAttribute(XNamespace.Xml + "space", "preserve") :
                                                null, c));
                                        return newRun;
                                    });
                                    return (object)collectionOfSubRuns;
                                }
                                else
                                {
                                    XElement newRun = new XElement(W.r,
                                        element.Elements(W.rPr),
                                        e);
                                    return newRun;
                                }
                            });
                    return collectionOfRuns;
                }
                return new XElement(element.Name,
                    element.Attributes(),
                    element.Nodes().Select(n => WmlSearchAndReplaceTransform(n,
                        search, replace, matchCase)));
            }
            return node;
        }

        private static void WmlSearchAndReplaceInXDocument(XDocument xDocument, string search,
            string replace, bool matchCase)
        {
            XElement newRoot = (XElement)WmlSearchAndReplaceTransform(xDocument.Root,
                search, replace, matchCase);
            xDocument.Elements().First().ReplaceWith(newRoot);
        }

        public static WmlDocument SearchAndReplace(WmlDocument doc, string search, string replace, bool matchCase)
        {
            using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc))
            {
                using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
                {
                    SearchAndReplace(document, search, replace, matchCase);
                }
                return streamDoc.GetModifiedWmlDocument();
            }
        }

        public static void SearchAndReplace(WordprocessingDocument wordDoc, string search,
            string replace, bool matchCase)
        {
            if (RevisionAccepter.HasTrackedRevisions(wordDoc))
                throw new InvalidDataException(
                    "Search and replace will not work with documents " +
                    "that contain revision tracking.");
            XDocument xDoc;
            xDoc = wordDoc.MainDocumentPart.DocumentSettingsPart.GetXDocument();
            if (xDoc.Descendants(W.trackRevisions).Any())
                throw new InvalidDataException("Revision tracking is turned on for document.");

            xDoc = wordDoc.MainDocumentPart.GetXDocument();
            WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase);
            wordDoc.MainDocumentPart.PutXDocument();
            foreach (var part in wordDoc.MainDocumentPart.HeaderParts)
            {
                xDoc = part.GetXDocument();
                WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase);
                part.PutXDocument();
            }
            foreach (var part in wordDoc.MainDocumentPart.FooterParts)
            {
                xDoc = part.GetXDocument();
                WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase);
                part.PutXDocument();
            }
            if (wordDoc.MainDocumentPart.EndnotesPart != null)
            {
                xDoc = wordDoc.MainDocumentPart.EndnotesPart.GetXDocument();
                WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase);
                wordDoc.MainDocumentPart.EndnotesPart.PutXDocument();
            }
            if (wordDoc.MainDocumentPart.FootnotesPart != null)
            {
                xDoc = wordDoc.MainDocumentPart.FootnotesPart.GetXDocument();
                WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase);
                wordDoc.MainDocumentPart.FootnotesPart.PutXDocument();
            }
        }

        private static object PmlReplaceTextTransform(XNode node, string search, string replace,
            bool matchCase)
        {
            XElement element = node as XElement;
            if (element != null)
            {
                if (element.Name == A.p)
                {
                    string contents = element.Descendants(A.t).Select(t => (string)t).StringConcatenate();
                    if (contents.Contains(search) ||
                        (!matchCase && contents.ToUpper().Contains(search.ToUpper())))
                    {
                        XElement paragraphWithSplitRuns = new XElement(A.p,
                            element.Attributes(),
                            element.Nodes().Select(n => PmlReplaceTextTransform(n, search,
                                replace, matchCase)));
                        XElement[] subRunArray = paragraphWithSplitRuns
                            .Elements(A.r)
                            .Where(e =>
                            {
                                XElement subRunElement = e.Elements().FirstOrDefault(el => el.Name != A.rPr);
                                if (subRunElement == null)
                                    return false;
                                return subRunElement.Name == A.t;
                            })
                            .ToArray();
                        int paragraphChildrenCount = subRunArray.Length;
                        int matchId = 1;
                        foreach (var pc in subRunArray
                            .Take(paragraphChildrenCount - (search.Length - 1))
                            .Select((c, i) => new { Child = c, Index = i, }))
                        {
                            var subSequence = subRunArray.SequenceAt(pc.Index).Take(search.Length);
                            var zipped = subSequence.PtZip(search, (pcp, c) => new
                            {
                                ParagraphChildProjection = pcp,
                                CharacterToCompare = c,
                            });
                            bool dontMatch = zipped.Any(z =>
                            {
                                if (z.ParagraphChildProjection.Annotation<MatchSemaphore>() != null)
                                    return true;
                                bool b;
                                if (matchCase)
                                    b = z.ParagraphChildProjection.Value != z.CharacterToCompare.ToString();
                                else
                                    b = z.ParagraphChildProjection.Value.ToUpper() != z.CharacterToCompare.ToString().ToUpper();
                                return b;
                            });
                            bool match = !dontMatch;
                            if (match)
                            {
                                foreach (var item in subSequence)
                                    item.AddAnnotation(new MatchSemaphore(matchId));
                                ++matchId;
                            }
                        }

                        // The following code is locally impure, as this is the most expressive way to write it.
                        XElement paragraphWithReplacedRuns = (XElement)CloneWithAnnotation(paragraphWithSplitRuns);
                        for (int id = 1; id < matchId; ++id)
                        {
                            List<XElement> elementsToReplace = paragraphWithReplacedRuns
                                .Elements()
                                .Where(e =>
                                {
                                    var sem = e.Annotation<MatchSemaphore>();
                                    if (sem == null)
                                        return false;
                                    return sem.MatchId == id;
                                })
                                .ToList();
                            elementsToReplace.First().AddBeforeSelf(
                                new XElement(A.r,
                                    elementsToReplace.First().Elements(A.rPr),
                                    new XElement(A.t, replace)));
                            elementsToReplace.Remove();
                        }

                        var groupedAdjacentRunsWithIdenticalFormatting =
                            paragraphWithReplacedRuns
                            .Elements()
                            .GroupAdjacent(ce =>
                            {
                                if (ce.Name != A.r)
                                    return "DontConsolidate";
                                if (ce.Elements().Where(e => e.Name != A.rPr).Count() != 1 ||
                                    ce.Element(A.t) == null)
                                    return "DontConsolidate";
                                if (ce.Element(A.rPr) == null)
                                    return "";
                                return ce.Element(A.rPr).ToString(SaveOptions.None);
                            });
                        XElement paragraphWithConsolidatedRuns = new XElement(A.p,
                            groupedAdjacentRunsWithIdenticalFormatting.Select(g =>
                            {
                                if (g.Key == "DontConsolidate")
                                    return (object)g;
                                string textValue = g.Select(r => r.Element(A.t).Value).StringConcatenate();
                                return new XElement(A.r,
                                    g.First().Elements(A.rPr),
                                    new XElement(A.t, textValue));
                            }));
                        return paragraphWithConsolidatedRuns;
                    }
                }
                if (element.Name == A.r && element.Elements(A.t).Any())
                {
                    var collectionOfRuns = element.Elements()
                        .Where(e => e.Name != A.rPr)
                        .Select(e =>
                        {
                            if (e.Name == A.t)
                            {
                                string s = (string)e;
                                IEnumerable<XElement> collectionOfSubRuns = s.Select(c =>
                                {
                                    XElement newRun = new XElement(A.r,
                                        element.Elements(A.rPr),
                                        new XElement(A.t, c));
                                    return newRun;
                                });
                                return (object)collectionOfSubRuns;
                            }
                            else
                            {
                                XElement newRun = new XElement(A.r,
                                    element.Elements(A.rPr),
                                    e);
                                return newRun;
                            }
                        });
                    return collectionOfRuns;
                }
                return new XElement(element.Name,
                    element.Attributes(),
                    element.Nodes().Select(n => PmlReplaceTextTransform(n, search, replace, matchCase)));
            }
            return node;
        }

        public static PmlDocument SearchAndReplace(PmlDocument doc, string search, string replace, bool matchCase)
        {
            using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc))
            {
                using (PresentationDocument document = streamDoc.GetPresentationDocument())
                {
                    SearchAndReplace(document, search, replace, matchCase);
                }
                return streamDoc.GetModifiedPmlDocument();
            }
        }

        public static void SearchAndReplace(PresentationDocument pDoc, string search,
            string replace, bool matchCase)
        {
            PresentationPart presentationPart = pDoc.PresentationPart;
            foreach (var slidePart in presentationPart.SlideParts)
            {
                XDocument slideXDoc = slidePart.GetXDocument();
                XElement root = slideXDoc.Root;
                XElement newRoot = (XElement)PmlReplaceTextTransform(root, search, replace, matchCase);
                slidePart.PutXDocument(new XDocument(newRoot));
            }
        }
    }
}
