﻿/***************************************************************************

Copyright (c) Microsoft Corporation 2012-2015.

This code is licensed using the Microsoft Public License (Ms-PL).  The text of the license can be found here:

http://www.microsoft.com/resources/sharedsource/licensingbasics/publiclicense.mspx

Published at http://OpenXmlDeveloper.org
Resource Center and Documentation: http://openxmldeveloper.org/wiki/w/wiki/powertools-for-open-xml.aspx

Developer: Eric White
Blog: http://www.ericwhite.com
Twitter: @EricWhiteDev
Email: eric@ericwhite.com

***************************************************************************/

using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Packaging;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Validation;
using System.Globalization;

namespace OpenXmlPowerTools
{
    public class MetricsGetterSettings
    {
        public bool IncludeTextInContentControls;
        public bool IncludeXlsxTableCellData;
        public bool RetrieveNamespaceList;
        public bool RetrieveContentTypeList;
    }

    public class MetricsGetter
    {
        public static XElement GetMetrics(string fileName, MetricsGetterSettings settings)
        {
            FileInfo fi = new FileInfo(fileName);
            if (!fi.Exists)
                throw new FileNotFoundException("{0} does not exist.", fi.FullName);
            if (Util.IsWordprocessingML(fi.Extension))
            {
                WmlDocument wmlDoc = new WmlDocument(fi.FullName, true);
                return GetDocxMetrics(wmlDoc, settings);
            }
            if (Util.IsSpreadsheetML(fi.Extension))
            {
                SmlDocument smlDoc = new SmlDocument(fi.FullName, true);
                return GetXlsxMetrics(smlDoc, settings);
            }
            if (Util.IsPresentationML(fi.Extension))
            {
                PmlDocument pmlDoc = new PmlDocument(fi.FullName, true);
                return GetPptxMetrics(pmlDoc, settings);
            }
            return null;
        }

        public static XElement GetDocxMetrics(WmlDocument wmlDoc, MetricsGetterSettings settings)
        {
            try
            {
                using (MemoryStream ms = new MemoryStream())
                {
                    ms.Write(wmlDoc.DocumentByteArray, 0, wmlDoc.DocumentByteArray.Length);
                    using (WordprocessingDocument document = WordprocessingDocument.Open(ms, true))
                    {
                        bool hasTrackedRevisions = RevisionAccepter.HasTrackedRevisions(document);
                        if (hasTrackedRevisions)
                            RevisionAccepter.AcceptRevisions(document);
                        XElement metrics1 = GetWmlMetrics(wmlDoc.FileName, false, document, settings);
                        if (hasTrackedRevisions)
                            metrics1.Add(new XElement(H.RevisionTracking, new XAttribute(H.Val, true)));
                        return metrics1;
                    }
                }
            }
            catch (OpenXmlPowerToolsException e)
            {
                if (e.ToString().Contains("Invalid Hyperlink"))
                {
                    using (MemoryStream ms = new MemoryStream())
                    {
                        ms.Write(wmlDoc.DocumentByteArray, 0, wmlDoc.DocumentByteArray.Length);
#if !NET35
                        UriFixer.FixInvalidUri(ms, brokenUri => FixUri(brokenUri));
#endif
                        wmlDoc = new WmlDocument("dummy.docx", ms.ToArray());
                    }
                    using (MemoryStream ms = new MemoryStream())
                    {
                        ms.Write(wmlDoc.DocumentByteArray, 0, wmlDoc.DocumentByteArray.Length);
                        using (WordprocessingDocument document = WordprocessingDocument.Open(ms, true))
                        {
                            bool hasTrackedRevisions = RevisionAccepter.HasTrackedRevisions(document);
                            if (hasTrackedRevisions)
                                RevisionAccepter.AcceptRevisions(document);
                            XElement metrics2 = GetWmlMetrics(wmlDoc.FileName, true, document, settings);
                            if (hasTrackedRevisions)
                                metrics2.Add(new XElement(H.RevisionTracking, new XAttribute(H.Val, true)));
                            return metrics2;
                        }
                    }
                }
            }
            var metrics = new XElement(H.Metrics,
                new XAttribute(H.FileName, wmlDoc.FileName),
                new XAttribute(H.FileType, "WordprocessingML"),
                new XAttribute(H.Error, "Unknown error, metrics not determined"));
            return metrics;
        }

        private static Uri FixUri(string brokenUri)
        {
            return new Uri("http://broken-link/");
        }

        private static XElement GetWmlMetrics(string fileName, bool invalidHyperlink, WordprocessingDocument wDoc, MetricsGetterSettings settings)
        {
            var parts = new XElement(H.Parts,
                wDoc.GetAllParts().Select(part =>
                {
                    return GetMetricsForWmlPart(part, settings);
                }));
            if (!parts.HasElements)
                parts = null;
            var metrics = new XElement(H.Metrics,
                new XAttribute(H.FileName, fileName),
                new XAttribute(H.FileType, "WordprocessingML"),
                GetStyleHierarchy(wDoc),
                GetMiscWmlMetrics(wDoc, invalidHyperlink),
                parts,
                settings.RetrieveNamespaceList ? RetrieveNamespaceList(wDoc) : null,
                settings.RetrieveContentTypeList ? RetrieveContentTypeList(wDoc) : null
                );
            return metrics;
        }

        private static XElement RetrieveContentTypeList(OpenXmlPackage oxPkg)
        {
            Package pkg = oxPkg.Package;

            var nonRelationshipParts = pkg.GetParts().Cast<ZipPackagePart>().Where(p => p.ContentType != "application/vnd.openxmlformats-package.relationships+xml");
            var contentTypes = nonRelationshipParts
                .Select(p => p.ContentType)
                .OrderBy(t => t)
                .Distinct();
            var xe = new XElement(H.ContentTypes,
                contentTypes.Select(ct => new XElement(H.ContentType, new XAttribute(H.Val, ct))));
            return xe;
        }

        private static XElement RetrieveNamespaceList(OpenXmlPackage oxPkg)
        {
            Package pkg = oxPkg.Package;

            var nonRelationshipParts = pkg.GetParts().Cast<ZipPackagePart>().Where(p => p.ContentType != "application/vnd.openxmlformats-package.relationships+xml");
            var xmlParts = nonRelationshipParts
                .Where(p => p.ContentType.ToLower().EndsWith("xml"));

            var uniqueNamespaces = new HashSet<string>();
            foreach (var xp in xmlParts)
            {
                using (Stream st = xp.GetStream())
                {
                    try
                    {
                        XDocument xdoc = XDocument.Load(st);
                        var namespaces = xdoc
                            .Descendants()
                            .Attributes()
                            .Where(a => a.IsNamespaceDeclaration)
                            .Select(a => string.Format("{0}|{1}", a.Name.LocalName, a.Value))
                            .OrderBy(t => t)
                            .Distinct()
                            .ToList();
                        foreach (var item in namespaces)
		                    uniqueNamespaces.Add(item);
                    }
                    // if catch exception, forget about it.  Just trying to get a most complete survey possible of all namespaces in all documents.
                    // if caught exception, chances are the document is bad anyway.
                    catch (Exception)
                    {
                        continue;
                    }
                }
            }
            var xe = new XElement(H.Namespaces,
                uniqueNamespaces.OrderBy(t => t).Select(n =>
                {
                    var spl = n.Split('|');
                    return new XElement(H.Namespace,
                        new XAttribute(H.NamespacePrefix, spl[0]),
                        new XAttribute(H.NamespaceName, spl[1]));
                }));
            return xe;
        }

        private static List<XElement> GetMiscWmlMetrics(WordprocessingDocument document, bool invalidHyperlink)
        {
            List<XElement> metrics = new List<XElement>();
            List<string> notes = new List<string>();
            Dictionary<XName, int> elementCountDictionary = new Dictionary<XName, int>();

            if (invalidHyperlink)
                metrics.Add(new XElement(H.InvalidHyperlink, new XAttribute(H.Val, invalidHyperlink)));

            bool valid = ValidateWordprocessingDocument(document, metrics, notes, elementCountDictionary);
            if (invalidHyperlink)
                valid = false;

            return metrics;
        }

        private static bool ValidateWordprocessingDocument(WordprocessingDocument wDoc, List<XElement> metrics, List<string> notes, Dictionary<XName, int> metricCountDictionary)
        {
            bool valid = ValidateAgainstSpecificVersion(wDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2007, H.SdkValidationError2007);
            valid |= ValidateAgainstSpecificVersion(wDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2010, H.SdkValidationError2010);
#if !NET35
            valid |= ValidateAgainstSpecificVersion(wDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2013, H.SdkValidationError2013);
#endif

            int elementCount = 0;
            int paragraphCount = 0;
            int textCount = 0;
            foreach (var part in wDoc.ContentParts())
            {
                XDocument xDoc = part.GetXDocument();
                foreach (var e in xDoc.Descendants())
                {
                    if (e.Name == W.txbxContent)
                        IncrementMetric(metricCountDictionary, H.TextBox);
                    else if (e.Name == W.sdt)
                        IncrementMetric(metricCountDictionary, H.ContentControl);
                    else if (e.Name == W.customXml)
                        IncrementMetric(metricCountDictionary, H.CustomXmlMarkup);
                    else if (e.Name == W.fldChar)
                        IncrementMetric(metricCountDictionary, H.ComplexField);
                    else if (e.Name == W.fldSimple)
                        IncrementMetric(metricCountDictionary, H.SimpleField);
                    else if (e.Name == W.altChunk)
                        IncrementMetric(metricCountDictionary, H.AltChunk);
                    else if (e.Name == W.tbl)
                        IncrementMetric(metricCountDictionary, H.Table);
                    else if (e.Name == W.hyperlink)
                        IncrementMetric(metricCountDictionary, H.Hyperlink);
                    else if (e.Name == W.framePr)
                        IncrementMetric(metricCountDictionary, H.LegacyFrame);
                    else if (e.Name == W.control)
                        IncrementMetric(metricCountDictionary, H.ActiveX);
                    else if (e.Name == W.subDoc)
                        IncrementMetric(metricCountDictionary, H.SubDocument);
                    else if (e.Name == VML.imagedata || e.Name == VML.fill || e.Name == VML.stroke || e.Name == A.blip)
                    {
                        var relId = (string)e.Attribute(R.embed);
                        if (relId != null)
                            ValidateImageExists(part, relId, metricCountDictionary);
                        relId = (string)e.Attribute(R.pict);
                        if (relId != null)
                            ValidateImageExists(part, relId, metricCountDictionary);
                        relId = (string)e.Attribute(R.id);
                        if (relId != null)
                            ValidateImageExists(part, relId, metricCountDictionary);
                    }

                    if (part.Uri == wDoc.MainDocumentPart.Uri)
                    {
                        elementCount++;
                        if (e.Name == W.p)
                            paragraphCount++;
                        if (e.Name == W.t)
                            textCount += ((string)e).Length;
                    }
                }
            }

            foreach (var item in metricCountDictionary)
            {
                metrics.Add(
                    new XElement(item.Key, new XAttribute(H.Val, item.Value)));
            }

            metrics.Add(new XElement(H.ElementCount, new XAttribute(H.Val, elementCount)));
            metrics.Add(new XElement(H.AverageParagraphLength, new XAttribute(H.Val, (int)((double)textCount / (double)paragraphCount))));

            if (wDoc.GetAllParts().Any(part => part.ContentType == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"))
                metrics.Add(new XElement(H.EmbeddedXlsx, new XAttribute(H.Val, true)));

            NumberingFormatListAssembly(wDoc, metrics);

            XDocument wxDoc = wDoc.MainDocumentPart.GetXDocument();

            foreach (var d in wxDoc.Descendants())
            {
                if (d.Name == W.saveThroughXslt)
                {
                    string rid = (string)d.Attribute(R.id);
                    var tempExternalRelationship = wDoc
                        .MainDocumentPart
                        .DocumentSettingsPart
                        .ExternalRelationships
                        .FirstOrDefault(h => h.Id == rid);
                    if (tempExternalRelationship == null)
                        metrics.Add(new XElement(H.InvalidSaveThroughXslt, new XAttribute(H.Val, true)));
                    valid = false;
                }
                else if (d.Name == W.trackRevisions)
                    metrics.Add(new XElement(H.TrackRevisionsEnabled, new XAttribute(H.Val, true)));
                else if (d.Name == W.documentProtection)
                    metrics.Add(new XElement(H.DocumentProtection, new XAttribute(H.Val, true)));
            }

            FontAndCharSetAnalysis(wDoc, metrics, notes);

            return valid;
        }

        private static bool ValidateAgainstSpecificVersion(WordprocessingDocument wDoc, List<XElement> metrics, DocumentFormat.OpenXml.FileFormatVersions versionToValidateAgainst, XName versionSpecificMetricName)
        {
            OpenXmlValidator validator = new OpenXmlValidator(versionToValidateAgainst);
            var errors = validator.Validate(wDoc);
            bool valid = errors.Count() == 0;
            if (!valid)
            {
                if (!metrics.Any(e => e.Name == H.SdkValidationError))
                    metrics.Add(new XElement(H.SdkValidationError, new XAttribute(H.Val, true)));
                metrics.Add(new XElement(versionSpecificMetricName, new XAttribute(H.Val, true),
                    errors.Take(3).Select(err =>
                    {
                        StringBuilder sb = new StringBuilder();
                        if (err.Description.Length > 300)
                            sb.Append(PtUtils.MakeValidXml(err.Description.Substring(0, 300) + " ... elided ...") + Environment.NewLine);
                        else
                            sb.Append(PtUtils.MakeValidXml(err.Description) + Environment.NewLine);
                        sb.Append("  in part " + PtUtils.MakeValidXml(err.Part.Uri.ToString()) + Environment.NewLine);
                        sb.Append("  at " + PtUtils.MakeValidXml(err.Path.XPath) + Environment.NewLine);
                        return sb.ToString();
                    })));
            }
            return valid;
        }

        private static bool ValidateAgainstSpecificVersion(SpreadsheetDocument sDoc, List<XElement> metrics, DocumentFormat.OpenXml.FileFormatVersions versionToValidateAgainst, XName versionSpecificMetricName)
        {
            OpenXmlValidator validator = new OpenXmlValidator(versionToValidateAgainst);
            var errors = validator.Validate(sDoc);
            bool valid = errors.Count() == 0;
            if (!valid)
            {
                if (!metrics.Any(e => e.Name == H.SdkValidationError))
                    metrics.Add(new XElement(H.SdkValidationError, new XAttribute(H.Val, true)));
                metrics.Add(new XElement(versionSpecificMetricName, new XAttribute(H.Val, true),
                    errors.Take(3).Select(err =>
                    {
                        StringBuilder sb = new StringBuilder();
                        if (err.Description.Length > 300)
                            sb.Append(PtUtils.MakeValidXml(err.Description.Substring(0, 300) + " ... elided ...") + Environment.NewLine);
                        else
                            sb.Append(PtUtils.MakeValidXml(err.Description) + Environment.NewLine);
                        sb.Append("  in part " + PtUtils.MakeValidXml(err.Part.Uri.ToString()) + Environment.NewLine);
                        sb.Append("  at " + PtUtils.MakeValidXml(err.Path.XPath) + Environment.NewLine);
                        return sb.ToString();
                    })));
            }
            return valid;
        }

        private static bool ValidateAgainstSpecificVersion(PresentationDocument pDoc, List<XElement> metrics, DocumentFormat.OpenXml.FileFormatVersions versionToValidateAgainst, XName versionSpecificMetricName)
        {
            OpenXmlValidator validator = new OpenXmlValidator(versionToValidateAgainst);
            var errors = validator.Validate(pDoc);
            bool valid = errors.Count() == 0;
            if (!valid)
            {
                if (!metrics.Any(e => e.Name == H.SdkValidationError))
                    metrics.Add(new XElement(H.SdkValidationError, new XAttribute(H.Val, true)));
                metrics.Add(new XElement(versionSpecificMetricName, new XAttribute(H.Val, true),
                    errors.Take(3).Select(err =>
                    {
                        StringBuilder sb = new StringBuilder();
                        if (err.Description.Length > 300)
                            sb.Append(PtUtils.MakeValidXml(err.Description.Substring(0, 300) + " ... elided ...") + Environment.NewLine);
                        else
                            sb.Append(PtUtils.MakeValidXml(err.Description) + Environment.NewLine);
                        sb.Append("  in part " + PtUtils.MakeValidXml(err.Part.Uri.ToString()) + Environment.NewLine);
                        sb.Append("  at " + PtUtils.MakeValidXml(err.Path.XPath) + Environment.NewLine);
                        return sb.ToString();
                    })));
            }
            return valid;
        }

        private static void IncrementMetric(Dictionary<XName, int> metricCountDictionary, XName xName)
        {
            if (metricCountDictionary.ContainsKey(xName))
                metricCountDictionary[xName] = metricCountDictionary[xName] + 1;
            else
                metricCountDictionary.Add(xName, 1);
        }

        private static void ValidateImageExists(OpenXmlPart part, string relId, Dictionary<XName, int> metrics)
        {
            var imagePart = part.Parts.FirstOrDefault(ipp => ipp.RelationshipId == relId);
            if (imagePart == null)
                IncrementMetric(metrics, H.ReferenceToNullImage);
        }


        private static void NumberingFormatListAssembly(WordprocessingDocument wDoc, List<XElement> metrics)
        {
            List<string> numFmtList = new List<string>();
            foreach (var part in wDoc.ContentParts())
            {
                var xDoc = part.GetXDocument();
                numFmtList = numFmtList.Concat(xDoc
                    .Descendants(W.p)
                    .Select(p =>
                    {
                        ListItemRetriever.RetrieveListItem(wDoc, p, null);
                        ListItemRetriever.ListItemInfo lif = p.Annotation<ListItemRetriever.ListItemInfo>();
                        if (lif != null && lif.IsListItem && lif.Lvl(ListItemRetriever.GetParagraphLevel(p)) != null)
                        {
                            string numFmtForLevel = (string)lif.Lvl(ListItemRetriever.GetParagraphLevel(p)).Elements(W.numFmt).Attributes(W.val).FirstOrDefault();
                            if (numFmtForLevel == null)
                            {
                                var numFmtElement = lif.Lvl(ListItemRetriever.GetParagraphLevel(p)).Elements(MC.AlternateContent).Elements(MC.Choice).Elements(W.numFmt).FirstOrDefault();
                                if (numFmtElement != null && (string)numFmtElement.Attribute(W.val) == "custom")
                                    numFmtForLevel = (string)numFmtElement.Attribute(W.format);
                            }
                            return numFmtForLevel;
                        }
                        return null;
                    })
                    .Where(s => s != null)
                    .Distinct())
                    .ToList();
            }
            if (numFmtList.Any())
            {
                var nfls = numFmtList.StringConcatenate(s => s + ",").TrimEnd(',');
                metrics.Add(new XElement(H.NumberingFormatList, new XAttribute(H.Val, PtUtils.MakeValidXml(nfls))));
            }
        }

        class FormattingMetrics
        {
            public int RunCount;
            public int RunWithoutRprCount;
            public int ZeroLengthText;
            public int MultiFontRun;

            public int AsciiCharCount;
            public int CSCharCount;
            public int EastAsiaCharCount;
            public int HAnsiCharCount;

            public int AsciiRunCount;
            public int CSRunCount;
            public int EastAsiaRunCount;
            public int HAnsiRunCount;

            public List<string> Languages;

            public FormattingMetrics()
            {
                Languages = new List<string>();
            }
        }

        private static void FontAndCharSetAnalysis(WordprocessingDocument wDoc, List<XElement> metrics, List<string> notes)
        {
            FormattingAssemblerSettings settings = new FormattingAssemblerSettings
            {
                RemoveStyleNamesFromParagraphAndRunProperties = false,
                ClearStyles = true,
                RestrictToSupportedNumberingFormats = false,
                RestrictToSupportedLanguages = false,
            };
            FormattingAssembler.AssembleFormatting(wDoc, settings);
            var formattingMetrics = new FormattingMetrics();

            foreach (var part in wDoc.ContentParts())
            {
                var xDoc = part.GetXDocument();
                foreach (var run in xDoc.Descendants(W.r))
                {
                    formattingMetrics.RunCount++;
                    AnalyzeRun(run, metrics, notes, formattingMetrics, part.Uri.ToString());
                }
            }

            metrics.Add(new XElement(H.RunCount, new XAttribute(H.Val, formattingMetrics.RunCount)));
            if (formattingMetrics.RunWithoutRprCount > 0)
                metrics.Add(new XElement(H.RunWithoutRprCount, new XAttribute(H.Val, formattingMetrics.RunWithoutRprCount)));
            if (formattingMetrics.ZeroLengthText > 0)
                metrics.Add(new XElement(H.ZeroLengthText, new XAttribute(H.Val, formattingMetrics.ZeroLengthText)));
            if (formattingMetrics.MultiFontRun > 0)
                metrics.Add(new XElement(H.MultiFontRun, new XAttribute(H.Val, formattingMetrics.MultiFontRun)));
            if (formattingMetrics.AsciiCharCount > 0)
                metrics.Add(new XElement(H.AsciiCharCount, new XAttribute(H.Val, formattingMetrics.AsciiCharCount)));
            if (formattingMetrics.CSCharCount > 0)
                metrics.Add(new XElement(H.CSCharCount, new XAttribute(H.Val, formattingMetrics.CSCharCount)));
            if (formattingMetrics.EastAsiaCharCount > 0)
                metrics.Add(new XElement(H.EastAsiaCharCount, new XAttribute(H.Val, formattingMetrics.EastAsiaCharCount)));
            if (formattingMetrics.HAnsiCharCount > 0)
                metrics.Add(new XElement(H.HAnsiCharCount, new XAttribute(H.Val, formattingMetrics.HAnsiCharCount)));
            if (formattingMetrics.AsciiRunCount > 0)
                metrics.Add(new XElement(H.AsciiRunCount, new XAttribute(H.Val, formattingMetrics.AsciiRunCount)));
            if (formattingMetrics.CSRunCount > 0)
                metrics.Add(new XElement(H.CSRunCount, new XAttribute(H.Val, formattingMetrics.CSRunCount)));
            if (formattingMetrics.EastAsiaRunCount > 0)
                metrics.Add(new XElement(H.EastAsiaRunCount, new XAttribute(H.Val, formattingMetrics.EastAsiaRunCount)));
            if (formattingMetrics.HAnsiRunCount > 0)
                metrics.Add(new XElement(H.HAnsiRunCount, new XAttribute(H.Val, formattingMetrics.HAnsiRunCount)));

            if (formattingMetrics.Languages.Any())
            {
                var uls = formattingMetrics.Languages.StringConcatenate(s => s + ",").TrimEnd(',');
                metrics.Add(new XElement(H.Languages, new XAttribute(H.Val, PtUtils.MakeValidXml(uls))));
            }
        }

        private static void AnalyzeRun(XElement run, List<XElement> attList, List<string> notes, FormattingMetrics formattingMetrics, string uri)
        {
            var runText = run.Elements()
                .Where(e => e.Name == W.t || e.Name == W.delText)
                .Select(t => (string)t)
                .StringConcatenate();
            if (runText.Length == 0)
            {
                formattingMetrics.ZeroLengthText++;
                return;
            }
            var rPr = run.Element(W.rPr);
            if (rPr == null)
            {
                formattingMetrics.RunWithoutRprCount++;
                notes.Add(PtUtils.MakeValidXml(string.Format("Error in part {0}: run without rPr at {1}", uri, run.GetXPath())));
                rPr = new XElement(W.rPr);
            }
            FormattingAssembler.CharStyleAttributes csa = new FormattingAssembler.CharStyleAttributes(null, rPr);
            var fontTypeArray = runText
                .Select(ch => FormattingAssembler.DetermineFontTypeFromCharacter(ch, csa))
                .ToArray();
            var distinctFontTypeArray = fontTypeArray
                .Distinct()
                .ToArray();
            var distinctFonts = distinctFontTypeArray
                .Select(ft =>
                {
                    return GetFontFromFontType(csa, ft);
                })
                .Distinct();
            var languages = distinctFontTypeArray
                .Select(ft =>
                {
                    if (ft == FormattingAssembler.FontType.Ascii)
                        return csa.LatinLang;
                    if (ft == FormattingAssembler.FontType.CS)
                        return csa.BidiLang;
                    if (ft == FormattingAssembler.FontType.EastAsia)
                        return csa.EastAsiaLang;
                    //if (ft == FormattingAssembler.FontType.HAnsi)
                    return csa.LatinLang;
                })
                .Select(l =>
                {
                    if (l == "" || l == null)
                        return /* "Dflt:" + */ CultureInfo.CurrentCulture.Name;
                    return l;
                })
                //.Where(l => l != null && l != "")
                .Distinct();
            if (languages.Any(l => !formattingMetrics.Languages.Contains(l)))
                formattingMetrics.Languages = formattingMetrics.Languages.Concat(languages).Distinct().ToList();
            var multiFontRun = distinctFonts.Count() > 1;
            if (multiFontRun)
            {
                formattingMetrics.MultiFontRun++;

                formattingMetrics.AsciiCharCount += fontTypeArray.Where(ft => ft == FormattingAssembler.FontType.Ascii).Count();
                formattingMetrics.CSCharCount += fontTypeArray.Where(ft => ft == FormattingAssembler.FontType.CS).Count();
                formattingMetrics.EastAsiaCharCount += fontTypeArray.Where(ft => ft == FormattingAssembler.FontType.EastAsia).Count();
                formattingMetrics.HAnsiCharCount += fontTypeArray.Where(ft => ft == FormattingAssembler.FontType.HAnsi).Count();
            }
            else
            {
                switch (fontTypeArray[0])
                {
                    case FormattingAssembler.FontType.Ascii:
                        formattingMetrics.AsciiCharCount += runText.Length;
                        formattingMetrics.AsciiRunCount++;
                        break;
                    case FormattingAssembler.FontType.CS:
                        formattingMetrics.CSCharCount += runText.Length;
                        formattingMetrics.CSRunCount++;
                        break;
                    case FormattingAssembler.FontType.EastAsia:
                        formattingMetrics.EastAsiaCharCount += runText.Length;
                        formattingMetrics.EastAsiaRunCount++;
                        break;
                    case FormattingAssembler.FontType.HAnsi:
                        formattingMetrics.HAnsiCharCount += runText.Length;
                        formattingMetrics.HAnsiRunCount++;
                        break;
                }
            }
        }

        private static string GetFontFromFontType(FormattingAssembler.CharStyleAttributes csa, FormattingAssembler.FontType ft)
        {
            switch (ft)
            {
                case FormattingAssembler.FontType.Ascii:
                    return csa.AsciiFont;
                case FormattingAssembler.FontType.CS:
                    return csa.CsFont;
                case FormattingAssembler.FontType.EastAsia:
                    return csa.EastAsiaFont;
                case FormattingAssembler.FontType.HAnsi:
                    return csa.HAnsiFont;
                default: // dummy
                    return csa.AsciiFont;
            }
        }

        public static XElement GetXlsxMetrics(SmlDocument smlDoc, MetricsGetterSettings settings)
        {
            using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(smlDoc))
            {
                using (SpreadsheetDocument sDoc = streamDoc.GetSpreadsheetDocument())
                {
                    List<XElement> metrics = new List<XElement>();

                    bool valid = ValidateAgainstSpecificVersion(sDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2007, H.SdkValidationError2007);
                    valid |= ValidateAgainstSpecificVersion(sDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2010, H.SdkValidationError2010);
#if !NET35
                    valid |= ValidateAgainstSpecificVersion(sDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2013, H.SdkValidationError2013);
#endif

                    return new XElement(H.Metrics,
                        new XAttribute(H.FileName, smlDoc.FileName),
                        new XAttribute(H.FileType, "SpreadsheetML"),
                        metrics,
                        GetTableInfoForWorkbook(sDoc, settings),
                        settings.RetrieveNamespaceList ? RetrieveNamespaceList(sDoc) : null,
                        settings.RetrieveContentTypeList ? RetrieveContentTypeList(sDoc) : null);
                }
            }
        }

        private static XElement GetTableInfoForWorkbook(SpreadsheetDocument spreadsheet, MetricsGetterSettings settings)
        {
            var workbookPart = spreadsheet.WorkbookPart;
            var xd = workbookPart.GetXDocument();
            var partInformation =
                new XElement(H.Sheets,
                    xd.Root
                    .Element(S.sheets)
                    .Elements(S.sheet)
                    .Select(sh =>
                    {
                        var rid = (string)sh.Attribute(R.id);
                        var sheetName = (string)sh.Attribute("name");
                        WorksheetPart worksheetPart = (WorksheetPart)workbookPart.GetPartById(rid);
                        return GetTableInfoForSheet(spreadsheet, worksheetPart, sheetName, settings);
                    }));
            return partInformation;
        }

        public static XElement GetTableInfoForSheet(SpreadsheetDocument spreadsheetDocument, WorksheetPart sheetPart, string sheetName,
            MetricsGetterSettings settings)
        {
            var xd = sheetPart.GetXDocument();
            XElement sheetInformation = new XElement(H.Sheet,
                    new XAttribute(H.Name, sheetName),
                    xd.Root.Elements(S.tableParts).Elements(S.tablePart).Select(tp =>
                    {
                        string rId = (string)tp.Attribute(R.id);
                        TableDefinitionPart tablePart = (TableDefinitionPart)sheetPart.GetPartById(rId);
                        var txd = tablePart.GetXDocument();
                        var tableName = (string)txd.Root.Attribute("displayName");
                        XElement tableCellData = null;
                        if (settings.IncludeXlsxTableCellData)
                        {
                            var xlsxTable = spreadsheetDocument.Table(tableName);
                            tableCellData = new XElement(H.TableData,
                                xlsxTable.TableRows()
                                    .Select(row =>
                                    {
                                        var rowElement = new XElement(H.Row,
                                            xlsxTable.TableColumns().Select(col =>
                                            {
                                                var cellElement = new XElement(H.Cell,
                                                    new XAttribute(H.Name, col.Name),
                                                    new XAttribute(H.Val, (string)row[col.Name]));
                                                return cellElement;
                                            }));
                                        return rowElement;
                                    }));
                        }
                        var table = new XElement(H.Table,
                            new XAttribute(H.Name, (string)txd.Root.Attribute("name")),
                            new XAttribute(H.DisplayName, tableName),
                            new XElement(H.Columns,
                                txd.Root.Element(S.tableColumns).Elements(S.tableColumn)
                                .Select(tc => new XElement(H.Column,
                                    new XAttribute(H.Name, (string)tc.Attribute("name"))))),
                                    tableCellData
                            );
                        return table;
                    })
                );
            if (!sheetInformation.HasElements)
                return null;
            return sheetInformation;
        }

        public static XElement GetPptxMetrics(PmlDocument pmlDoc, MetricsGetterSettings settings)
        {
            using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(pmlDoc))
            {
                using (PresentationDocument pDoc = streamDoc.GetPresentationDocument())
                {
                    List<XElement> metrics = new List<XElement>();

                    bool valid = ValidateAgainstSpecificVersion(pDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2007, H.SdkValidationError2007);
                    valid |= ValidateAgainstSpecificVersion(pDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2010, H.SdkValidationError2010);
#if !NET35
                    valid |= ValidateAgainstSpecificVersion(pDoc, metrics, DocumentFormat.OpenXml.FileFormatVersions.Office2013, H.SdkValidationError2013);
#endif
                    return new XElement(H.Metrics,
                        new XAttribute(H.FileName, pmlDoc.FileName),
                        new XAttribute(H.FileType, "PresentationML"),
                        metrics,
                        settings.RetrieveNamespaceList ? RetrieveNamespaceList(pDoc) : null,
                        settings.RetrieveContentTypeList ? RetrieveContentTypeList(pDoc) : null);
                }
            }
        }

        private static object GetStyleHierarchy(WordprocessingDocument document)
        {
            var stylePart = document.MainDocumentPart.StyleDefinitionsPart;
            if (stylePart == null)
                return null;
            var xd = stylePart.GetXDocument();
            var stylesWithPath = xd.Root
                .Elements(W.style)
                .Select(s =>
                {
                    var styleString = (string)s.Attribute(W.styleId);
                    var thisStyle = s;
                    while (true)
                    {
                        var baseStyle = (string)thisStyle.Elements(W.basedOn).Attributes(W.val).FirstOrDefault();
                        if (baseStyle == null)
                            break;
                        styleString = baseStyle + "/" + styleString;
                        thisStyle = xd.Root.Elements(W.style).FirstOrDefault(ts => ts.Attribute(W.styleId).Value == baseStyle);
                        if (thisStyle == null)
                            break;
                    }
                    return styleString;
                })
                .OrderBy(n => n)
                .ToList();
            XElement styleHierarchy = new XElement(H.StyleHierarchy);
            foreach (var item in stylesWithPath)
            {
                var styleChain = item.Split('/');
                XElement elementToAddTo = styleHierarchy;
                foreach (var inChain in styleChain.SkipLast(1))
                    elementToAddTo = elementToAddTo.Elements(H.Style).FirstOrDefault(z => z.Attribute(H.Id).Value == inChain);
                var styleToAdd = styleChain.Last();
                elementToAddTo.Add(
                    new XElement(H.Style,
                        new XAttribute(H.Id, styleChain.Last()),
                        new XAttribute(H.Type, (string)xd.Root.Elements(W.style).First(z => z.Attribute(W.styleId).Value == styleToAdd).Attribute(W.type))));
            }
            return styleHierarchy;
        }

        private static XElement GetMetricsForWmlPart(OpenXmlPart part, MetricsGetterSettings settings)
        {
            XElement contentControls = null;
            if (part is MainDocumentPart ||
                part is HeaderPart ||
                part is FooterPart ||
                part is FootnotesPart ||
                part is EndnotesPart)
            {
                var xd = part.GetXDocument();
                contentControls = (XElement)GetContentControlsTransform(xd.Root, settings);
                if (!contentControls.HasElements)
                    contentControls = null;
            }
            var partMetrics = new XElement(H.Part,
                new XAttribute(H.ContentType, part.ContentType),
                new XAttribute(H.Uri, part.Uri.ToString()),
                contentControls);
            if (partMetrics.HasElements)
                return partMetrics;
            return null;
        }

        private static object GetContentControlsTransform(XNode node, MetricsGetterSettings settings)
        {
            XElement element = node as XElement;
            if (element != null)
            {
                if (element == element.Document.Root)
                    return new XElement(H.ContentControls,
                        element.Nodes().Select(n => GetContentControlsTransform(n, settings)));

                if (element.Name == W.sdt)
                {
                    var tag = (string)element.Elements(W.sdtPr).Elements(W.tag).Attributes(W.val).FirstOrDefault();
                    XAttribute tagAttr = tag != null ? new XAttribute(H.Tag, tag) : null;

                    var alias = (string)element.Elements(W.sdtPr).Elements(W.alias).Attributes(W.val).FirstOrDefault();
                    XAttribute aliasAttr = alias != null ? new XAttribute(H.Alias, alias) : null;

                    var xPathAttr = new XAttribute(H.XPath, element.GetXPath());

                    var isText = element.Elements(W.sdtPr).Elements(W.text).Any();
                    var isBibliography = element.Elements(W.sdtPr).Elements(W.bibliography).Any();
                    var isCitation = element.Elements(W.sdtPr).Elements(W.citation).Any();
                    var isComboBox = element.Elements(W.sdtPr).Elements(W.comboBox).Any();
                    var isDate = element.Elements(W.sdtPr).Elements(W.date).Any();
                    var isDocPartList = element.Elements(W.sdtPr).Elements(W.docPartList).Any();
                    var isDocPartObj = element.Elements(W.sdtPr).Elements(W.docPartObj).Any();
                    var isDropDownList = element.Elements(W.sdtPr).Elements(W.dropDownList).Any();
                    var isEquation = element.Elements(W.sdtPr).Elements(W.equation).Any();
                    var isGroup = element.Elements(W.sdtPr).Elements(W.group).Any();
                    var isPicture = element.Elements(W.sdtPr).Elements(W.picture).Any();
                    var isRichText = element.Elements(W.sdtPr).Elements(W.richText).Any() ||
                        (! isText && 
                        ! isBibliography && 
                        ! isCitation && 
                        ! isComboBox && 
                        ! isDate && 
                        ! isDocPartList && 
                        ! isDocPartObj && 
                        ! isDropDownList && 
                        ! isEquation && 
                        ! isGroup && 
                        ! isPicture);
                    string type = null;
                    if (isText        ) type = "Text";
                    if (isBibliography) type = "Bibliography";
                    if (isCitation    ) type = "Citation";
                    if (isComboBox    ) type = "ComboBox";
                    if (isDate        ) type = "Date";
                    if (isDocPartList ) type = "DocPartList";
                    if (isDocPartObj  ) type = "DocPartObj";
                    if (isDropDownList) type = "DropDownList";
                    if (isEquation    ) type = "Equation";
                    if (isGroup       ) type = "Group";
                    if (isPicture     ) type = "Picture";
                    if (isRichText    ) type = "RichText";
                    var typeAttr = new XAttribute(H.Type, type);

                    return new XElement(H.ContentControl,
                        typeAttr,
                        tagAttr,
                        aliasAttr,
                        xPathAttr,
                        element.Nodes().Select(n => GetContentControlsTransform(n, settings)));
                }

                return element.Nodes().Select(n => GetContentControlsTransform(n, settings));
            }
            if (settings.IncludeTextInContentControls)
                return node;
            return null;
        }
    }

    public static class H
    {
        public static XName ActiveX = "ActiveX";
        public static XName Alias = "Alias";
        public static XName AltChunk = "AltChunk";
        public static XName Arguments = "Arguments";
        public static XName AsciiCharCount = "AsciiCharCount";
        public static XName AsciiRunCount = "AsciiRunCount";
        public static XName AverageParagraphLength = "AverageParagraphLength";
        public static XName BaselineReport = "BaselineReport";
        public static XName Batch = "Batch";
        public static XName BatchName = "BatchName";
        public static XName BatchSelector = "BatchSelector";
        public static XName CSCharCount = "CSCharCount";
        public static XName CSRunCount = "CSRunCount";
        public static XName Catalog = "Catalog";
        public static XName CatalogList = "CatalogList";
        public static XName CatalogListFile = "CatalogListFile";
        public static XName CaughtException = "CaughtException";
        public static XName Cell = "Cell";
        public static XName Column = "Column";
        public static XName Columns = "Columns";
        public static XName ComplexField = "ComplexField";
        public static XName Computer = "Computer";
        public static XName Computers = "Computers";
        public static XName ContentControl = "ContentControl";
        public static XName ContentControls = "ContentControls";
        public static XName ContentType = "ContentType";
        public static XName ContentTypes = "ContentTypes";
        public static XName CustomXmlMarkup = "CustomXmlMarkup";
        public static XName DLL = "DLL";
        public static XName DefaultDialogValuesFile = "DefaultDialogValuesFile";
        public static XName DefaultValues = "DefaultValues";
        public static XName Dependencies = "Dependencies";
        public static XName DestinationDir = "DestinationDir";
        public static XName Directory = "Directory";
        public static XName DirectoryPattern = "DirectoryPattern";
        public static XName DisplayName = "DisplayName";
        public static XName DoJobQueueName = "DoJobQueueName";
        public static XName Document = "Document";
        public static XName DocumentProtection = "DocumentProtection";
        public static XName DocumentSelector = "DocumentSelector";
        public static XName DocumentType = "DocumentType";
        public static XName Documents = "Documents";
        public static XName EastAsiaCharCount = "EastAsiaCharCount";
        public static XName EastAsiaRunCount = "EastAsiaRunCount";
        public static XName ElementCount = "ElementCount";
        public static XName EmbeddedXlsx = "EmbeddedXlsx";
        public static XName Error = "Error";
        public static XName Exception = "Exception";
        public static XName Exe = "Exe";
        public static XName ExeRoot = "ExeRoot";
        public static XName Extension = "Extension";
        public static XName File = "File";
        public static XName FileLength = "FileLength";
        public static XName FileName = "FileName";
        public static XName FilePattern = "FilePattern";
        public static XName FileType = "FileType";
        public static XName Guid = "Guid";
        public static XName HAnsiCharCount = "HAnsiCharCount";
        public static XName HAnsiRunCount = "HAnsiRunCount";
        public static XName RevisionTracking = "RevisionTracking";
        public static XName Hyperlink = "Hyperlink";
        public static XName IPAddress = "IPAddress";
        public static XName Id = "Id";
        public static XName Invalid = "Invalid";
        public static XName InvalidHyperlink = "InvalidHyperlink";
        public static XName InvalidHyperlinkException = "InvalidHyperlinkException";
        public static XName InvalidSaveThroughXslt = "InvalidSaveThroughXslt";
        public static XName JobComplete = "JobComplete";
        public static XName JobExe = "JobExe";
        public static XName JobName = "JobName";
        public static XName JobSpec = "JobSpec";
        public static XName Languages = "Languages";
        public static XName LegacyFrame = "LegacyFrame";
        public static XName LocalDoJobQueue = "LocalDoJobQueue";
        public static XName MachineName = "MachineName";
        public static XName MaxConcurrentJobs = "MaxConcurrentJobs";
        public static XName MaxDocumentsInJob = "MaxDocumentsInJob";
        public static XName MaxParagraphLength = "MaxParagraphLength";
        public static XName Message = "Message";
        public static XName Metrics = "Metrics";
        public static XName MultiDirectory = "MultiDirectory";
        public static XName MultiFontRun = "MultiFontRun";
        public static XName MultiServerQueue = "MultiServerQueue";
        public static XName Name = "Name";
        public static XName Namespaces = "Namespaces";
        public static XName Namespace = "Namespace";
        public static XName NamespaceName = "NamespaceName";
        public static XName NamespacePrefix = "NamespacePrefix";
        public static XName Note = "Note";
        public static XName NumberingFormatList = "NumberingFormatList";
        public static XName ObjectDisposedException = "ObjectDisposedException";
        public static XName ParagraphCount = "ParagraphCount";
        public static XName Part = "Part";
        public static XName Parts = "Parts";
        public static XName PassedDocuments = "PassedDocuments";
        public static XName Path = "Path";
        public static XName ProduceCatalog = "ProduceCatalog";
        public static XName ReferenceToNullImage = "ReferenceToNullImage";
        public static XName Report = "Report";
        public static XName Root = "Root";
        public static XName RootDirectory = "RootDirectory";
        public static XName Row = "Row";
        public static XName RunCount = "RunCount";
        public static XName RunWithoutRprCount = "RunWithoutRprCount";
        public static XName SdkValidationError = "SdkValidationError";
        public static XName SdkValidationError2007 = "SdkValidationError2007";
        public static XName SdkValidationError2010 = "SdkValidationError2010";
        public static XName SdkValidationError2013 = "SdkValidationError2013";
        public static XName Sheet = "Sheet";
        public static XName Sheets = "Sheets";
        public static XName SimpleField = "SimpleField";
        public static XName Skip = "Skip";
        public static XName SmartTag = "SmartTag";
        public static XName SourceRootDir = "SourceRootDir";
        public static XName SpawnerJobExeLocation = "SpawnerJobExeLocation";
        public static XName SpawnerReady = "SpawnerReady";
        public static XName Style = "Style";
        public static XName StyleHierarchy = "StyleHierarchy";
        public static XName SubDocument = "SubDocument";
        public static XName Table = "Table";
        public static XName TableData = "TableData";
        public static XName Tag = "Tag";
        public static XName Take = "Take";
        public static XName TextBox = "TextBox";
        public static XName TrackRevisionsEnabled = "TrackRevisionsEnabled";
        public static XName Type = "Type";
        public static XName Uri = "Uri";
        public static XName Val = "Val";
        public static XName Valid = "Valid";
        public static XName WindowStyle = "WindowStyle";
        public static XName XPath = "XPath";
        public static XName ZeroLengthText = "ZeroLengthText";
        public static XName custDataLst = "custDataLst";
        public static XName custShowLst = "custShowLst";
        public static XName kinsoku = "kinsoku";
        public static XName modifyVerifier = "modifyVerifier";
        public static XName photoAlbum = "photoAlbum";
    }
}
