blob: eae9a12154cf33677d892144e3076422b2ac020c [file] [log] [blame]
/***************************************************************************
Copyright (c) Eric White 2016. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
Published at http://EricWhite.com
Resource Center and Documentation: http://ericwhite.com/
Developer: Eric White
Blog: http://www.ericwhite.com
Twitter: @EricWhiteDev
Email: eric@ericwhite.com
***************************************************************************/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
using System.Drawing;
namespace OpenXmlPowerTools
{
public class ContentTypeRule
{
public string ContentType;
public string StyleName;
public Regex StyleNameRegex;
public Regex[] RegexArray;
public Func<XElement, ContentTypeRule, WordprocessingDocument, WmlToXmlSettings, bool> MatchLambda;
public bool ApplyRunContentTypes = true;
}
public class GlobalValidationRule
{
public string[] RuleNames;
public string[] RuleDescriptions;
public Func<GlobalValidationRule, WordprocessingDocument, WordprocessingDocument, XElement, WmlToXmlSettings, List<WmlToXmlValidationError>> GlobalRuleLambda;
public bool IsOnlyWarning;
public string Message;
}
public class BlockLevelContentValidationRule
{
public string[] RuleNames;
public string[] RuleDescriptions;
public Regex StyleNameRegex;
public Func<XElement, BlockLevelContentValidationRule, WordprocessingDocument, XElement, WmlToXmlSettings, List<WmlToXmlValidationError>> BlockLevelContentRuleLambda;
public bool IsOnlyWarning;
public string Message;
}
public class WmlToXmlValidationError
{
public string RuleName;
public string ErrorMessage;
public string BlockLevelContentIdentifier; // this string is the same as the unid that is in the source document. This string should be sufficient to identify and find any
// invalid paragraph, table, row, cell, or anything else in the source document.
// for now, i am putting an integer into this attribute / id, but I expect that this will be more elaborate than this.
// I need to again research exactly how to move to a specific paragraph or table in a document, in a VSTO app.
}
public class WmlToXmlProgressInfo
{
public int ContentCount;
public int ContentTotal;
public string InProgressMessage;
}
public class TransformInfo
{
public string DefaultLangFromStylesPart;
}
public class WmlToXmlSettings
{
public List<ContentTypeRule> GlobalContentTypeRules;
public List<ContentTypeRule> DocumentTypeContentTypeRules;
public List<ContentTypeRule> DocumentContentTypeRules;
public List<ContentTypeRule> RunContentTypeRules;
public List<GlobalValidationRule> GlobalValidationRules;
public List<BlockLevelContentValidationRule> BlockLevelContentValidationRules;
public ListItemRetrieverSettings ListItemRetrieverSettings;
public bool? InjectCommentForContentTypes;
public XElement ContentTypeHierarchyDefinition;
public Func<XElement, WmlToXmlSettings, bool> ContentTypeHierarchyLambda;
public Dictionary<string, Func<string, OpenXmlPart, XElement, WmlToXmlSettings, object>> XmlGenerationLambdas;
public DirectoryInfo ImageBase;
public bool WriteImageFiles = true;
public Action<WmlToXmlProgressInfo> ProgressFunction;
public XDocument ContentTypeRegexExtension;
public string DefaultLang;
public object UserData;
public WmlToXmlSettings(
List<ContentTypeRule> globalContentTypeRules,
List<ContentTypeRule> documentTypeContentTypeRules,
List<ContentTypeRule> documentContentTypeRules,
List<ContentTypeRule> runContentTypeRules,
List<GlobalValidationRule> globalValidationRules,
List<BlockLevelContentValidationRule> blockLevelContentValidationRules,
XElement contentTypeHierarchyDefinition,
Func<XElement, WmlToXmlSettings, bool> contentTypeHierarchyLambda,
Dictionary<string, Func<string, OpenXmlPart, XElement, WmlToXmlSettings, object>> xmlGenerationLambdas,
DirectoryInfo imageBase,
XDocument contentTypeRegexExtension)
{
GlobalContentTypeRules = globalContentTypeRules;
DocumentTypeContentTypeRules = documentTypeContentTypeRules;
DocumentContentTypeRules = documentContentTypeRules;
RunContentTypeRules = runContentTypeRules;
GlobalValidationRules = globalValidationRules;
BlockLevelContentValidationRules = blockLevelContentValidationRules;
ListItemRetrieverSettings = new ListItemRetrieverSettings();
ContentTypeHierarchyDefinition = contentTypeHierarchyDefinition;
ContentTypeHierarchyLambda = contentTypeHierarchyLambda;
XmlGenerationLambdas = xmlGenerationLambdas;
ImageBase = imageBase;
ContentTypeRegexExtension = contentTypeRegexExtension;
}
public WmlToXmlSettings(
List<ContentTypeRule> globalContentTypeRules,
List<ContentTypeRule> documentTypeContentTypeRules,
List<ContentTypeRule> documentContentTypeRules,
List<ContentTypeRule> runContentTypeRules,
List<GlobalValidationRule> globalValidationRules,
List<BlockLevelContentValidationRule> blockLevelContentValidationRules,
Func<XElement, WmlToXmlSettings, bool> contentTypeHierarchyLambda,
Dictionary<string, Func<string, OpenXmlPart, XElement, WmlToXmlSettings, object>> xmlGenerationLambdas,
ListItemRetrieverSettings listItemRetrieverSettings,
DirectoryInfo imageBase,
XDocument contentTypeRegexExtension)
{
GlobalContentTypeRules = globalContentTypeRules;
DocumentTypeContentTypeRules = documentTypeContentTypeRules;
DocumentContentTypeRules = documentContentTypeRules;
RunContentTypeRules = runContentTypeRules;
GlobalValidationRules = globalValidationRules;
BlockLevelContentValidationRules = blockLevelContentValidationRules;
ListItemRetrieverSettings = listItemRetrieverSettings;
ContentTypeHierarchyLambda = contentTypeHierarchyLambda;
XmlGenerationLambdas = xmlGenerationLambdas;
ImageBase = imageBase;
ContentTypeRegexExtension = contentTypeRegexExtension;
}
}
public static class WmlToXml
{
public static WmlDocument ApplyContentTypes(WmlDocument document, WmlToXmlSettings settings)
{
using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(document))
{
using (WordprocessingDocument doc = streamDoc.GetWordprocessingDocument())
{
ApplyContentTypes(doc, settings);
}
return streamDoc.GetModifiedWmlDocument();
}
}
public static void ApplyContentTypes(WordprocessingDocument wDoc, WmlToXmlSettings settings)
{
#if false
<Extensions>
<Extension ContentType='Introduction'>
<RegexExtension>
<Regex>.*Infroduction.*</Regex>
<Regex>.*Entroduction.*</Regex>
</RegexExtension>
</Extension>
</Extensions>
#endif
if (settings.ContentTypeRegexExtension != null)
{
foreach (var ext in settings.ContentTypeRegexExtension.Root.Elements("Extension"))
{
var ct = (string)ext.Attribute("ContentType");
var rules = settings.DocumentContentTypeRules.Concat(settings.DocumentTypeContentTypeRules).Concat(settings.GlobalContentTypeRules);
var ruleToUpdate = rules
.FirstOrDefault(r => r.ContentType == ct);
if (ruleToUpdate == null)
throw new OpenXmlPowerToolsException("ContentTypeRexexExtension refers to content type that does not exist");
var oldRegexRules = ruleToUpdate.RegexArray.ToList();
var newRegexRules = ext.Elements("RegexExtension").Elements("Regex").Select(z => new Regex(z.Value)).ToArray();
var regexArray = oldRegexRules.Concat(newRegexRules).ToArray();
ruleToUpdate.RegexArray = regexArray;
}
}
if (settings.ProgressFunction != null)
{
WmlToXmlProgressInfo pi = new WmlToXmlProgressInfo()
{
ContentCount = 0,
ContentTotal = 0,
InProgressMessage = "Simplify markup" + Environment.NewLine,
};
settings.ProgressFunction(pi);
}
SimplifyMarkupSettings markupSimplifierSettings = new SimplifyMarkupSettings()
{
AcceptRevisions = true,
NormalizeXml = true,
RemoveBookmarks = false,
RemoveComments = true,
RemoveContentControls = false,
RemoveEndAndFootNotes = false,
RemoveFieldCodes = false,
RemoveGoBackBookmark = true,
RemoveHyperlinks = false,
RemoveLastRenderedPageBreak = true,
RemoveMarkupForDocumentComparison = false,
RemovePermissions = true,
RemoveProof = true,
RemoveRsidInfo = true,
RemoveSmartTags = true,
RemoveSoftHyphens = false,
RemoveWebHidden = true,
ReplaceTabsWithSpaces = false,
};
MarkupSimplifier.SimplifyMarkup(wDoc, markupSimplifierSettings);
if (settings.ProgressFunction != null)
{
WmlToXmlProgressInfo pi = new WmlToXmlProgressInfo()
{
ContentCount = 0,
ContentTotal = 0,
InProgressMessage = "Assemble formatting" + Environment.NewLine,
};
settings.ProgressFunction(pi);
}
FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings();
formattingAssemblerSettings.RemoveStyleNamesFromParagraphAndRunProperties = false;
formattingAssemblerSettings.RestrictToSupportedLanguages = false;
formattingAssemblerSettings.RestrictToSupportedNumberingFormats = false;
FormattingAssembler.AssembleFormatting(wDoc, formattingAssemblerSettings);
ContentTypeApplierInfo ctai = new ContentTypeApplierInfo();
XDocument sXDoc = wDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
XElement defaultParagraphStyle = sXDoc
.Root
.Elements(W.style)
.FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true &&
(string)st.Attribute(W.type) == "paragraph");
if (defaultParagraphStyle != null)
ctai.DefaultParagraphStyleName = (string)defaultParagraphStyle.Attribute(W.styleId);
XElement defaultCharacterStyle = sXDoc
.Root
.Elements(W.style)
.FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true &&
(string)st.Attribute(W.type) == "character");
if (defaultCharacterStyle != null)
ctai.DefaultCharacterStyleName = (string)defaultCharacterStyle.Attribute(W.styleId);
XElement defaultTableStyle = sXDoc
.Root
.Elements(W.style)
.FirstOrDefault(st => st.Attribute(W._default).ToBoolean() == true &&
(string)st.Attribute(W.type) == "table");
if (defaultTableStyle != null)
ctai.DefaultTableStyleName = (string)defaultTableStyle.Attribute(W.styleId);
if (settings.ProgressFunction != null)
{
WmlToXmlProgressInfo pi = new WmlToXmlProgressInfo()
{
ContentCount = 0,
ContentTotal = 0,
InProgressMessage = "Assemble list item information" + Environment.NewLine,
};
settings.ProgressFunction(pi);
}
ListItemRetrieverSettings listItemRetrieverSettings = new ListItemRetrieverSettings();
AssembleListItemInformation(wDoc, settings.ListItemRetrieverSettings);
ApplyContentTypesForRuleSet(settings, ctai, wDoc);
}
public static XElement ProduceContentTypeXml(WmlDocument document, WmlToXmlSettings settings)
{
using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(document))
{
using (WordprocessingDocument doc = streamDoc.GetWordprocessingDocument())
{
return ProduceContentTypeXml(doc, settings);
}
}
}
public static XElement ProduceContentTypeXml(WordprocessingDocument wDoc, WmlToXmlSettings settings)
{
var mainPart = wDoc.MainDocumentPart;
var mainXDoc = mainPart.GetXDocument();
#if false
<w:styles xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" mc:Ignorable="w14 w15 w16se">
<w:docDefaults>
<w:rPrDefault>
<w:rPr>
<w:rFonts w:ascii="Georgia" w:eastAsiaTheme="minorHAnsi" w:hAnsi="Georgia" w:cs="Times New Roman"/>
<w:lang w:val="en-US" w:eastAsia="en-US" w:bidi="ar-SA"/>
</w:rPr>
</w:rPrDefault>
<w:pPrDefault/>
</w:docDefaults>
#endif
AssignLevelsToContent(mainXDoc, settings);
// Call RetrieveListItem so that all paragraphs are initialized with ListItemInfo
var firstParagraph = mainXDoc.Descendants(W.p).FirstOrDefault();
// if there is no content, then return an empty document.
if (firstParagraph == null)
return new XElement("ContentTypeXml");
var listItem = ListItemRetriever.RetrieveListItem(wDoc, firstParagraph);
// Annotate runs associated with fields, so that can retrieve hyperlinks that are stored as fields.
FieldRetriever.AnnotateWithFieldInfo(wDoc.MainDocumentPart);
AnnotateRunsThatUseFieldsForNumbering(mainXDoc);
var newRoot = (XElement)AnnotateRunsThatUseFldSimple(mainXDoc.Root);
mainXDoc.Root.ReplaceWith(newRoot);
wDoc.MainDocumentPart.PutXDocument();
// Annotate runs associated with fields, so that can retrieve hyperlinks that are stored as fields.
FieldRetriever.AnnotateWithFieldInfo(wDoc.MainDocumentPart);
mainXDoc = wDoc.MainDocumentPart.GetXDocument();
var body = mainXDoc.Root.Descendants(W.body).FirstOrDefault();
if (body == null)
throw new OpenXmlPowerToolsException("Internal error: invalid document");
var contentList = body.Elements()
.Where(e => e.Attribute(PtOpenXml.Level) != null)
.ToList();
var rootLevelContentList = contentList
.Where(h => (int)h.Attribute(PtOpenXml.Level) == 1)
.ToList();
var contentTypeXml = new XElement("ContentTypeXml",
rootLevelContentList
.Select(h =>
{
var childrenHeadings = GetChildrenHeadings(mainPart, contentList, h, settings);
XElement xml = (XElement)ProduceXmlTransform(mainPart, h, settings);
if (xml != null)
xml.Add(childrenHeadings);
return xml;
}));
contentTypeXml = HierarchyPerSettings(contentTypeXml, settings);
return contentTypeXml;
}
private static XElement HierarchyPerSettings(XElement contentTypeXml, WmlToXmlSettings settings)
{
var hierarchyDefinition = settings.ContentTypeHierarchyDefinition;
HashSet<XName> hierarchyElements = new HashSet<XName>(hierarchyDefinition.DescendantsAndSelf().Select(d => d.Name).Distinct());
Stack<XElement> stack = new Stack<XElement>();
var rootElement = hierarchyDefinition
.Elements()
.FirstOrDefault(e => (bool)e.Attribute("IsRoot"));
if (rootElement == null)
throw new OpenXmlPowerToolsException("Invalid content type hierarchy definition - no root element");
stack.Push(rootElement);
var currentlyLookingAt = hierarchyDefinition.Element(rootElement.Name);
foreach (var item in contentTypeXml.Elements())
{
if (!hierarchyElements.Contains(item.Name))
throw new OpenXmlPowerToolsException(string.Format("Invalid Content Type Hierarchy Definition - missing def for {0}", item.Name));
bool found = false;
var possibleChildItem = currentlyLookingAt.Element(item.Name);
if (possibleChildItem != null)
{
if (!possibleChildItem.HasAttributes)
found = true;
if (!found)
{
var anyMismatch = possibleChildItem.Attributes().Any(a =>
{
var val1 = a.Value;
var a2 = item.Attribute(a.Name);
if (a2 == null)
return true;
var val2 = a2.Value;
if (val1 != val2)
return true;
return false;
});
if (!anyMismatch)
found = true;
}
}
if (found)
{
item.Add(new XAttribute(PtOpenXml.IndentLevel, stack.Count()));
stack.Push(item);
currentlyLookingAt = FindCurrentlyLookingAt(hierarchyDefinition, item);
continue;
}
if (hierarchyElements.Contains(item.Name))
{
while (true)
{
if (stack.Count() == 1)
{
// have encountered an unexpected hierarchy element. have gone up the stack, and no element up the stack allows for this as a child element.
// Therefore, put it at level one, and let the Narrdoc transform generate invalid narrdoc.
item.Add(new XAttribute(PtOpenXml.IndentLevel, stack.Count()));
break;
}
stack.Pop();
var last = stack.Peek();
currentlyLookingAt = FindCurrentlyLookingAt(hierarchyDefinition, last);
bool found2 = false;
var possibleChildItem2 = currentlyLookingAt.Element(item.Name);
if (possibleChildItem2 != null)
{
if (!possibleChildItem2.HasAttributes)
found2 = true;
if (!found2)
{
var anyMismatch2 = possibleChildItem2.Attributes().Any(a =>
{
var val1 = a.Value;
var a2 = item.Attribute(a.Name);
if (a2 == null)
return true;
var val2 = a2.Value;
if (val1 != val2)
return true;
return false;
});
if (!anyMismatch2)
found2 = true;
}
}
if (found2)
{
item.Add(new XAttribute(PtOpenXml.IndentLevel, stack.Count()));
stack.Push(item);
currentlyLookingAt = FindCurrentlyLookingAt(hierarchyDefinition, item);
break;
}
if (stack.Count() == 0)
throw new OpenXmlPowerToolsException("Internal error = reached top of hierarchy - prob not an internal error - some other error");
}
continue;
}
// otherwise continue on to next item.
}
var hierarchicalContentTypeXml = new XElement("ContentTypeXml",
HierarchyPerSettingsTransform(contentTypeXml.Elements(), 1));
hierarchicalContentTypeXml.DescendantsAndSelf().Attributes(PtOpenXml.IndentLevel).Remove();
return hierarchicalContentTypeXml;
}
private static XElement FindCurrentlyLookingAt(XElement hierarchyDefinition, XElement item)
{
var candidates = hierarchyDefinition
.Elements(item.Name)
.OrderByDescending(e => e.Attributes().Count());
var theOne = candidates
.FirstOrDefault(c =>
{
if (!c.HasAttributes)
return true;
var anyMismatch2 = c.Attributes().Any(a =>
{
var val1 = a.Value;
var a2 = item.Attribute(a.Name);
if (a2 == null)
return true;
var val2 = a2.Value;
if (val1 != val2)
return true;
return false;
});
if (anyMismatch2)
return false;
return true;
});
if (theOne == null)
throw new OpenXmlPowerToolsException("Internal error");
return theOne;
}
private static object HierarchyPerSettingsTransform(IEnumerable<XElement> list, int level)
{
// small optimization - other code in this method would have same effect, but this is more efficient.
if (!list.Any())
return null;
List<int> groupingKeys = new List<int>();
int currentGroupingKey = 0;
foreach (var item in list)
{
if (item.Attribute(PtOpenXml.IndentLevel) == null)
throw new OpenXmlPowerToolsException(string.Format("Invalid Content Type Hierarchy Definition - missing def for {0}", item.Name));
if ((int)item.Attribute(PtOpenXml.IndentLevel) == level)
{
currentGroupingKey += 1;
}
groupingKeys.Add(currentGroupingKey);
}
var zipped = list
.Zip(groupingKeys, (item, key) => new
{
Item = item,
Key = key,
})
.GroupBy(z => z.Key)
.ToList();
var newContent = zipped
.Select(z =>
{
var first = z.First().Item;
var newItem = new XElement(first.Name,
first.Attributes(),
first.Elements(),
HierarchyPerSettingsTransform(z.Skip(1).Select(r => r.Item), level + 1));
return newItem;
})
.ToList();
return newContent;
}
// this is where we need to do the same type of run annotation as for complex fields, but for simple fields.
// I think that we may need to split up the run following the simple field
#if false
<w:p pt:StyleName="Caption" pt:ContentType="Caption" pt:Level="2">
<w:r pt:ContentType="Span">
<w:t xml:space="preserve">Table </w:t>
</w:r>
<w:r>
<w:fldChar w:fldCharType="begin" />
</w:r>
<w:r>
<w:instrText xml:space="preserve"> STYLEREF 1 \s </w:instrText>
</w:r>
<w:r>
<w:fldChar w:fldCharType="separate" />
</w:r>
<w:r pt:ContentType="Span">
<w:t>1</w:t>
</w:r>
<w:r>
<w:fldChar w:fldCharType="end" />
</w:r>
<w:r pt:ContentType="Span">
<w:t>.</w:t>
</w:r>
<w:r>
<w:fldChar w:fldCharType="begin" />
</w:r>
<w:r>
<w:instrText xml:space="preserve"> SEQ Table \* ARABIC </w:instrText>
</w:r>
<w:r>
<w:fldChar w:fldCharType="separate" />
</w:r>
<w:r pt:ContentType="Span">
<w:t>1</w:t>
</w:r>
<w:r>
<w:fldChar w:fldCharType="end" />
</w:r>
<w:r pt:ContentType="Span">
<w:t>Type the title here</w:t>
</w:r>
</w:p>
#endif
private static void AnnotateRunsThatUseFieldsForNumbering(XDocument mainXDoc)
{
var cachedAnnotationInformation = mainXDoc.Root.Annotation<Dictionary<int, List<XElement>>>();
if (cachedAnnotationInformation == null)
return;
StringBuilder sb = new StringBuilder();
foreach (var item in cachedAnnotationInformation)
{
var instrText = FieldRetriever.InstrText(mainXDoc.Root, item.Key).TrimStart('{').TrimEnd('}');
var fi = FieldRetriever.ParseField(instrText);
if (fi.FieldType.ToUpper() == "SEQ" || fi.FieldType.ToUpper() == "STYLEREF")
{
var runsForField = mainXDoc
.Root
.Descendants()
.Where(d =>
{
Stack<FieldRetriever.FieldElementTypeInfo> stack = d.Annotation<Stack<FieldRetriever.FieldElementTypeInfo>>();
if (stack == null)
return false;
if (stack.Any(stackItem => stackItem.Id == item.Key && stackItem.FieldElementType == FieldRetriever.FieldElementTypeEnum.Result))
return true;
return false;
})
.Select(d => d.AncestorsAndSelf(W.r).FirstOrDefault())
.Where(z9 => z9 != null)
.GroupAdjacent(o => o)
.Select(g => g.First())
.Where(r => r.Element(W.t) != null)
.ToList();
if (!runsForField.Any())
continue;
var lastRun = runsForField.LastOrDefault();
var para = lastRun
.Ancestors(W.p)
.FirstOrDefault();
if (para == null)
throw new OpenXmlPowerToolsException("Internal error - invalid document");
// if already processed
if (para.Descendants(W.r).Any(r => r.Attribute(PtOpenXml.ListItemRun) != null))
continue;
var lastFldCharRun = para
.Elements(W.r)
.LastOrDefault(r =>
{
if (r.Element(W.fldChar) == null)
return false;
Stack<FieldRetriever.FieldElementTypeInfo> stack = r.Annotation<Stack<FieldRetriever.FieldElementTypeInfo>>();
if (stack == null)
return false;
if (stack.Any(stackItem =>
{
var instrText2 = FieldRetriever.InstrText(mainXDoc.Root, stackItem.Id).TrimStart('{').TrimEnd('}');
var fi2 = FieldRetriever.ParseField(instrText2);
if (fi2.FieldType.ToUpper() == "SEQ" || fi2.FieldType.ToUpper() == "STYLEREF")
return true;
return false;
}))
return true;
return false;
});
var elementAfter = lastFldCharRun
.ElementsAfterSelf(W.r)
.FirstOrDefault();
// elementAfter may be null - that is ok - the rest of the routine works properly in this case.
var listItemText = para
.Elements(W.r)
.TakeWhile(e => e != elementAfter)
.Select(r1 => r1.Descendants(W.t).Select(t => (string)t).StringConcatenate())
.StringConcatenate()
.Trim();
var nextRun = lastFldCharRun
.ElementsAfterSelf(W.r)
.FirstOrDefault(nr => nr.Element(W.t) != null);
var lastFldCharRunText = lastFldCharRun
.ElementsBeforeSelf(W.r)
.Reverse()
.First(r => r.Element(W.t) != null)
.Element(W.t);
string sepCharsString = "";
if (nextRun != null)
{
var nextRunTextElement = nextRun
.Element(W.t);
var nextRunText = nextRunTextElement.Value;
var sepChars = nextRunText
.TakeWhile(ch => ch == '.' || ch == ' ')
.ToList();
sepCharsString = nextRunText.Substring(0, sepChars.Count());
nextRunText = nextRunText.Substring(sepChars.Count());
nextRunTextElement.Value = nextRunText;
lastFldCharRunText.Value = lastFldCharRunText.Value + sepCharsString;
}
Regex re = new Regex("[A-F0-9.]+$");
Match m = re.Match(listItemText);
string matchedValue = null;
if (m.Success)
{
matchedValue = m.Value;
}
if (matchedValue != null)
{
matchedValue += sepCharsString;
matchedValue = matchedValue.TrimStart('.');
matchedValue = matchedValue.TrimEnd('.', ' ');
foreach (var run in para.Elements(W.r).TakeWhile(e => e != elementAfter).Where(e => e.Element(W.t) != null))
run.Add(new XAttribute(PtOpenXml.ListItemRun, matchedValue));
}
}
#if false
// old code
if (fi.FieldType.ToUpper() == "SEQ")
{
// have it
var runsForField = mainXDoc
.Root
.Descendants()
.Where(d =>
{
Stack<FieldRetriever.FieldElementTypeInfo> stack = d.Annotation<Stack<FieldRetriever.FieldElementTypeInfo>>();
if (stack == null)
return false;
if (stack.Any(stackItem => stackItem.Id == item.Key && stackItem.FieldElementType == FieldRetriever.FieldElementTypeEnum.Result))
return true;
return false;
})
.Select(d => d.AncestorsAndSelf(W.r).FirstOrDefault())
.Where(z9 => z9 != null)
.GroupAdjacent(o => o)
.Select(g => g.First())
.Where(r => r.Element(W.t) != null)
.ToList();
if (!runsForField.Any())
continue;
var lastRun = runsForField
.Last();
var lastRunTextElement = lastRun
.Element(W.t);
var lastRunText = lastRunTextElement.Value;
var nextRun = lastRun
.ElementsAfterSelf(W.r)
.FirstOrDefault(r => r.Element(W.t) != null);
if (nextRun != null)
{
var nextRunTextElement = nextRun
.Element(W.t);
var nextRunText = nextRunTextElement.Value;
var sepChars = nextRunText
.TakeWhile(ch => ch == '.' || ch == ' ')
.ToList();
nextRunText = nextRunText.Substring(sepChars.Count());
nextRunTextElement.Value = nextRunText;
lastRunText = lastRunTextElement.Value + sepChars.Select(ch => ch.ToString()).StringConcatenate();
lastRunTextElement.Value = lastRunText;
}
lastRun.Add(new XAttribute(PtOpenXml.ListItemRun, lastRunText));
foreach (var runbefore in lastRun
.ElementsBeforeSelf(W.r)
.Where(rz => rz.Element(W.t) != null))
{
runbefore.Add(new XAttribute(PtOpenXml.ListItemRun, lastRunText));
}
}
#endif
}
}
#if false
<w:p pt14:StyleName="Caption">
<w:r>
<w:t xml:space="preserve">Box </w:t>
</w:r>
<w:fldSimple w:instr=" SEQ Box \* ARABIC ">
<w:r>
<w:t>1</w:t>
</w:r>
</w:fldSimple>
<w:r>
<w:t>. Type the title here</w:t>
</w:r>
</w:p>
#endif
private static object AnnotateRunsThatUseFldSimple(XNode node)
{
var element = node as XElement;
if (element != null)
{
if (element.Name == W.p &&
element.Elements(W.fldSimple).Any(fs =>
{
var instrText = ((string)fs.Attribute(W.instr)).Trim();
return instrText.StartsWith("SEQ");
}))
{
var fldSimple = element.Elements(W.fldSimple).FirstOrDefault(fs =>
{
var instrText = ((string)fs.Attribute(W.instr)).Trim();
return instrText.StartsWith("SEQ");
});
var instr = ((string)fldSimple.Attribute(W.instr)).Trim();
// we have to do some funny business here because Word puts the ". " as part of the text following the fldSimple, and we want that text to be part of the list item.
var runAfter = fldSimple.ElementsAfterSelf(W.r).FirstOrDefault();
var runAfterText = runAfter.Elements(W.t).Select(t => (string)t).StringConcatenate();
var runAfterTextTrimmed = runAfterText.TrimStart('.', ' ');
var listItemNum = fldSimple.Elements(W.r).Elements(W.t).Select(t => (string)t).StringConcatenate();
var runsBefore = element
.Elements()
.TakeWhile(fs => fs.Name != W.fldSimple || (fs.Name == W.fldSimple && !((string)fs.Attribute(W.instr)).Trim().StartsWith("SEQ")))
.Select(e =>
{
#if false
<w:r pt14:StyleName="DefaultParagraphFont" pt14:FontName="Calibri" pt14:LanguageType="western" pt14:ListItemRun="3" xmlns:pt14="http://powertools.codeplex.com/2011" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:rPr>
<w:rFonts w:asciiTheme="minorHAnsi" w:hAnsiTheme="minorHAnsi" w:eastAsiaTheme="minorHAnsi" w:cstheme="minorBidi" w:ascii="Calibri" w:hAnsi="Calibri" w:eastAsia="Calibri" w:cs="" />
<w:bCs />
<w:sz w:val="22" />
<w:szCs w:val="22" />
<w:lang w:bidi="ar-SA" w:eastAsia="en-US" w:val="en-US" />
</w:rPr>
<w:t>3.</w:t>
</w:r>
#endif
var newE = new XElement(e); // clone
if (e.Value != "" && e.Attribute(PtOpenXml.ListItemRun) == null)
newE.Add(new XAttribute(PtOpenXml.ListItemRun, listItemNum));
return newE;
})
.ToList();
var fldSimpleRuns = fldSimple.Elements().Select(e =>
{
var newE = new XElement(e.Name,
e.Attributes(),
new XAttribute(PtOpenXml.ListItemRun, listItemNum),
e.Elements());
return newE;
});
var runAfterTextTrimmedLength = runAfterText.Length - runAfterTextTrimmed.Length;
XElement runAfterListItemElement = null;
if (runAfterTextTrimmedLength != 0)
{
runAfterListItemElement = new XElement(W.r,
runAfter.Attributes(),
new XAttribute(PtOpenXml.ListItemRun, listItemNum),
runAfter.Elements(W.rPr),
new XElement(W.t, runAfterText.Substring(0, runAfterTextTrimmedLength)));
}
XElement runAfterRemainderElement = new XElement(W.r,
runAfter.Attributes(),
runAfter.Elements(W.rPr),
new XElement(W.t, runAfterText.Substring(runAfterTextTrimmedLength)));
var newPara = new XElement(W.p,
element.Attributes(),
runsBefore,
fldSimpleRuns,
runAfterListItemElement,
runAfterRemainderElement,
fldSimple.ElementsAfterSelf(W.r).Skip(1));
return newPara;
}
return new XElement(element.Name,
element.Attributes(),
element.Nodes().Select(n => AnnotateRunsThatUseFldSimple(n)));
}
return node;
}
// this method produces the XML for an endnote or footnote - the blockLevelContentContainer is the w:endnote or w:footnote element, and it produces the content type XML for the
// contents of the endnote or footnote, to be inserted en situ in the ContentTypeXml.
public static object ProduceContentTypeXmlForBlockLevelContentContainer(WordprocessingDocument wDoc, WmlToXmlSettings settings, OpenXmlPart part, XElement blockLevelContentContainer)
{
AssignLevelsToContentForEndFootNote(blockLevelContentContainer, settings);
// Call RetrieveListItem so that all paragraphs are initialized with ListItemInfo
var firstParagraph = blockLevelContentContainer.Descendants(W.p).FirstOrDefault();
var listItem = ListItemRetriever.RetrieveListItem(wDoc, firstParagraph);
var contentList = blockLevelContentContainer.Elements()
.Where(e => e.Attribute(PtOpenXml.Level) != null)
.ToList();
var rootLevelContentList = contentList
.Where(h => (int)h.Attribute(PtOpenXml.Level) == 1)
.ToList();
var contentTypeXml = rootLevelContentList
.Select(h =>
{
var childrenHeadings = GetChildrenHeadings(part, contentList, h, settings);
XElement xml = (XElement)ProduceXmlTransform(part, h, settings);
if (xml != null)
xml.Add(childrenHeadings);
return xml;
});
return contentTypeXml;
}
private static object GetChildrenHeadings(OpenXmlPart part, List<XElement> contentList, XElement parent, WmlToXmlSettings settings)
{
return contentList
.SkipWhile(h => h != parent)
.Skip(1)
.TakeWhile(h => (int)h.Attribute(PtOpenXml.Level) > (int)parent.Attribute(PtOpenXml.Level))
.Where(h => (int)h.Attribute(PtOpenXml.Level) == (int)parent.Attribute(PtOpenXml.Level) + 1)
.Select(h =>
{
var childrenHeadings = GetChildrenHeadings(part, contentList, h, settings);
XElement xml = (XElement)ProduceXmlTransform(part, h, settings);
if (xml != null)
xml.Add(childrenHeadings);
return xml;
}
);
}
public static object ProduceXmlTransform(OpenXmlPart part, XNode node, WmlToXmlSettings settings)
{
var element = node as XElement;
if (element != null)
{
if (settings.XmlGenerationLambdas == null)
throw new ArgumentOutOfRangeException("Xml Generation Lambdas are required");
var contentType = (string)element.Attribute(PtOpenXml.ContentType);
if (element.Name == W.t || element.Name == W.fldSimple)
return element.Nodes().Select(z => ProduceXmlTransform(part, z, settings));
if (contentType == null && element.Name == W.r)
{
if (settings.XmlGenerationLambdas.ContainsKey("Run"))
{
var lamda = settings.XmlGenerationLambdas["Run"];
var newElement = lamda(contentType, part, element, settings);
return newElement;
}
else
{
throw new OpenXmlPowerToolsException("Entry for Run content type in XML generation lambdas is required");
}
}
if (element.Name == W.hyperlink)
{
if (settings.XmlGenerationLambdas.ContainsKey("Hyperlink"))
{
var lamda = settings.XmlGenerationLambdas["Hyperlink"];
var newElement = lamda(contentType, part, element, settings);
return newElement;
}
else
{
throw new OpenXmlPowerToolsException("Entry for Hyperlink content type in XML generation lambdas is required");
}
}
if (contentType != null)
{
if (settings.XmlGenerationLambdas != null)
{
if (settings.XmlGenerationLambdas.ContainsKey(contentType))
{
var lamda = settings.XmlGenerationLambdas[contentType];
var newElement = lamda(contentType, part, element, settings);
string lang = (string)element.Elements(W.pPr).Elements(W.rPr).Elements(W.lang).Attributes(W.val).FirstOrDefault();
if (lang == null)
lang = settings.DefaultLang;
if (lang != null && ! lang.StartsWith("en")) // TODO we are not generating lang if English, but this needs revised after analysis
{
var n = newElement as XElement;
if (n != null)
{
n.Add(new XAttribute("Lang", lang));
if (element.Attribute(PtOpenXml.Unid) != null)
n.Add(new XAttribute("Unid", element.Attribute(PtOpenXml.Unid).Value));
return n;
}
}
var n2 = newElement as XElement;
if (n2 != null && element.Attribute(PtOpenXml.Unid) != null)
{
n2.Add(new XAttribute("Unid", element.Attribute(PtOpenXml.Unid).Value));
return n2;
}
return newElement;
}
}
// if no generation rules are set, or if there is no rule for this content type, then
// generate the default, for now.
// todo this is not ideal in my mind. Need to think about this more. Maybe every content type
// must have a generation lambda.
return new XElement(contentType, new XElement("Content",
element.Elements().Select(rce => ProduceXmlTransform(part, rce, settings))));
}
// ignore any other elements
return null;
}
#if false
// The following code inserts an XML comment for unicode characters above 256
// This could be made more efficient - group characters together and create fewer XText nodes.
// As it is, it is pretty slow, so should be used only for debugging.
var xt = node as XText;
if (xt != null)
{
var newContent = xt.Value.Select(c =>
{
var ic = (int)c;
if (ic < 256)
return (object)new XText(c.ToString());
return new[] {
(object)new XText(c.ToString()),
new XComment(ic.ToString("X")),
};
})
.ToList();
return newContent;
}
#endif
return node;
}
private static void AssignLevelsToContent(XDocument mainXDoc, WmlToXmlSettings settings)
{
var contentWithContentType = mainXDoc
.Root
.Descendants()
.Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr || d.Name == W.tc)
.Where(d => d.Attribute(PtOpenXml.ContentType) != null)
.ToList();
int currentLevel = 1;
foreach (var content in contentWithContentType)
{
var thisLevel = GetIndentLevel(content, settings);
if (thisLevel == null)
{
content.Add(new XAttribute(PtOpenXml.Level, currentLevel));
}
else
{
if (content.Attribute(PtOpenXml.Level) == null)
content.Add(new XAttribute(PtOpenXml.Level, thisLevel));
currentLevel = (int)thisLevel + 1;
}
}
}
private static void AssignLevelsToContentForEndFootNote(XElement blockLevelContentContainer, WmlToXmlSettings settings)
{
var contentWithContentType = blockLevelContentContainer
.Descendants()
.Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr || d.Name == W.tc)
.Where(d => d.Attribute(PtOpenXml.ContentType) != null)
.ToList();
foreach (var content in contentWithContentType)
content.Add(new XAttribute(PtOpenXml.Level, 1));
}
private static int? GetIndentLevel(XElement blockLevelContent, WmlToXmlSettings settings)
{
if (settings.ContentTypeHierarchyLambda(blockLevelContent, settings))
return 1;
return 2;
}
// Apply the Document rules first, then apply the DocumentType rules, then apply the Global rules. First one that matches, wins.
private static void ApplyContentTypesForRuleSet(WmlToXmlSettings settings, ContentTypeApplierInfo ctai, WordprocessingDocument wDoc)
{
ApplyRulesToPart(settings, ctai, wDoc, wDoc.MainDocumentPart);
if (wDoc.MainDocumentPart.EndnotesPart != null)
ApplyRulesToPart(settings, ctai, wDoc, wDoc.MainDocumentPart.EndnotesPart);
if (wDoc.MainDocumentPart.FootnotesPart != null)
ApplyRulesToPart(settings, ctai, wDoc, wDoc.MainDocumentPart.FootnotesPart);
}
private static void ApplyRulesToPart(WmlToXmlSettings settings, ContentTypeApplierInfo ctai, WordprocessingDocument wDoc, OpenXmlPart part)
{
var partXDoc = part.GetXDocument();
var styleXDoc = wDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
var blockContent = partXDoc.Descendants()
.Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr || d.Name == W.tc);
int totalCount = 0;
if (settings.ProgressFunction != null)
{
totalCount = blockContent.Count();
string message;
if (part is MainDocumentPart)
message = "Apply rules to main document part";
else if (part is EndnotesPart)
message = "Apply rules to endnotes part";
else
message = "Apply rules to footnotes part";
WmlToXmlProgressInfo pi = new WmlToXmlProgressInfo()
{
ContentTotal = totalCount,
ContentCount = 0,
InProgressMessage = message + Environment.NewLine,
};
settings.ProgressFunction(pi);
}
var count = 0;
foreach (var blc in blockContent)
{
if (settings.ProgressFunction != null)
{
++count;
if (count < 50 || (count) % 10 == 0 || count == totalCount)
{
var msg = string.Format(" {0} of {1}", count, totalCount);
msg += "".PadRight(msg.Length, '\b');
WmlToXmlProgressInfo pi2 = new WmlToXmlProgressInfo()
{
ContentTotal = totalCount,
ContentCount = count,
InProgressMessage = msg,
};
settings.ProgressFunction(pi2);
}
}
string styleOfBlc = null;
string styleOfBlcUC = null;
if (blc.Name == W.p)
{
var styleIdOfBlc = (string)blc.Elements(W.pPr).Elements(W.pStyle).Attributes(W.val).FirstOrDefault();
if (styleIdOfBlc != null)
{
styleOfBlc = (string)styleXDoc
.Root
.Elements(W.style)
.Where(s => (string)s.Attribute(W.styleId) == styleIdOfBlc && (string)s.Attribute(W.type) == "paragraph")
.Elements(W.name)
.Attributes(W.val)
.FirstOrDefault();
}
if (styleOfBlc == null)
styleOfBlc = ctai.DefaultParagraphStyleName;
styleOfBlcUC = styleOfBlc.ToUpper();
}
else if (blc.Name == W.tbl)
{
var styleIdOfBlc = (string)blc.Elements(W.tblPr).Elements(W.tblStyle).Attributes(W.val).FirstOrDefault();
if (styleIdOfBlc != null)
{
styleOfBlc = (string)styleXDoc
.Root
.Elements(W.style)
.Where(s => (string)s.Attribute(W.styleId) == styleIdOfBlc && (string)s.Attribute(W.type) == "table")
.Elements(W.name)
.Attributes(W.val)
.FirstOrDefault();
}
if (styleOfBlc == null)
styleOfBlc = ctai.DefaultTableStyleName;
styleOfBlcUC = styleOfBlc.ToUpper();
}
///////////////////////////////////////////////////////////////////////////////////////////
// The following is useful to get a list of all content types and the code gen list
//var contentTypeList = settings
// .DocumentContentTypeRules
// .Concat(settings.DocumentTypeContentTypeRules)
// .Concat(settings.GlobalContentTypeRules)
// .Select(ct => ct.ContentType)
// .Distinct()
// .OrderBy(n => n)
// .ToList();
//var contentTypeCodeGenList = settings
// .XmlGenerationLambdas
// .Select(xgl => xgl.Key)
// .OrderBy(n => n)
// .ToList();
//var rulesWithoutGenCode = contentTypeList
// .Except(contentTypeCodeGenList)
// .ToList();
//var codeGenWithoutRules = contentTypeCodeGenList
// .Except(contentTypeList)
// .ToList();
//var s10 = codeGenWithoutRules.Select(m => m + Environment.NewLine).StringConcatenate();
//Console.WriteLine(s10);
//var s9 = contentTypeList.Select(m => m + Environment.NewLine).StringConcatenate();
//Console.WriteLine(s9);
// Apply the Document rules first, then apply the DocumentType rules, then apply the Global rules. First one that matches, wins.
foreach (var rule in settings.DocumentContentTypeRules.Concat(settings.DocumentTypeContentTypeRules).Concat(settings.GlobalContentTypeRules))
{
bool stylePass = false;
bool styleRegexPass = false;
bool regexPass = false;
bool matchLambdaPass = false;
stylePass = rule.StyleName == null || rule.StyleName.ToUpper() == styleOfBlcUC;
if (stylePass)
{
styleRegexPass = rule.StyleNameRegex == null;
if (rule.StyleNameRegex != null && styleOfBlc != null)
styleRegexPass = rule.StyleNameRegex.IsMatch(styleOfBlc);
}
if (stylePass && styleRegexPass)
{
regexPass = rule.RegexArray == null;
if (rule.RegexArray != null)
{
for (int i = 0; i < rule.RegexArray.Length; i++)
{
// clone the blc because OpenXmlRegex.Match replaces content, mucks with the run, probably should not if it only is used to find content.
var clonedBlc = new XElement(blc);
// following removes the subtitle created by a soft break, so that the pattern matches appropriately.
clonedBlc = RemoveContentAfterBR(clonedBlc);
#if false
<p p1:FontName="Georgia" p1:LanguageType="western" p1:AbstractNumId="28" xmlns:p1="http://powertools.codeplex.com/2011" xmlns="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<r p1:ListItemRun="1.1" p1:FontName="Georgia" p1:LanguageType="western">
<t xml:space="preserve">1.1</t>
</r>
#endif
// remove list item runs so that they are not matched in the content
clonedBlc.Elements(W.r).Where(r => r.Attribute(PtOpenXml.ListItemRun) != null).Remove();
if (OpenXmlRegex.Match(new[] { clonedBlc }, rule.RegexArray[i]) != 0)
{
regexPass = true;
break;
}
}
}
}
if (stylePass && styleRegexPass && regexPass)
{
matchLambdaPass = rule.MatchLambda == null;
if (rule.MatchLambda != null)
{
if (rule.MatchLambda(blc, rule, wDoc, settings))
matchLambdaPass = true;
}
}
if (stylePass && styleRegexPass && regexPass && matchLambdaPass)
{
AddContentTypeToBlockContent(settings, part, blc, rule.ContentType);
if (rule.ApplyRunContentTypes)
ApplyRunContentTypes(settings, ctai, wDoc, blc, settings.RunContentTypeRules, part, partXDoc);
break;
}
}
}
if (settings.ProgressFunction != null)
{
WmlToXmlProgressInfo pi = new WmlToXmlProgressInfo()
{
ContentTotal = totalCount,
ContentCount = totalCount,
InProgressMessage = Environment.NewLine + " Done" + Environment.NewLine,
};
settings.ProgressFunction(pi);
}
part.PutXDocument();
var mainPart = part as MainDocumentPart;
if (mainPart != null)
{
if (mainPart.WordprocessingCommentsPart != null)
mainPart.WordprocessingCommentsPart.PutXDocument();
}
}
private static XElement RemoveContentAfterBR(XElement clonedBlc)
{
if (clonedBlc.Name != W.p)
return clonedBlc;
var cloned2 = new XElement(clonedBlc.Name,
clonedBlc.Attributes(),
clonedBlc.Elements().TakeWhile(r => r.Element(W.br) == null));
return cloned2;
}
private static void ApplyRunContentTypes(WmlToXmlSettings settings, ContentTypeApplierInfo ctai, WordprocessingDocument wDoc,
XElement blockLevelContent, List<ContentTypeRule> runContentTypeRuleList, OpenXmlPart part, XDocument mainXDoc)
{
var runContent = blockLevelContent.Descendants()
.Where(d => d.Name == W.r || d.Name == W.hyperlink || d.Name == W.sdt || d.Name == W.bookmarkStart);
foreach (var rlc in runContent)
{
if (rlc.Name == W.r || rlc.Name == W.sdt)
{
var runStyle = (string)rlc.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault();
if (runStyle == null)
runStyle = ctai.DefaultCharacterStyleName;
foreach (var rule in runContentTypeRuleList)
{
if (rule.StyleName != null && rule.StyleName != runStyle)
continue;
if (rule.RegexArray != null)
throw new OpenXmlPowerToolsException("Invalid Run ContentType Rule - Regex not allowed");
if (rule.MatchLambda != null)
{
if (rule.MatchLambda(rlc, rule, wDoc, settings))
{
AddContentTypeToRunContent(settings, part, rlc, rule.ContentType);
break;
}
continue;
}
AddContentTypeToRunContent(settings, part, rlc, rule.ContentType);
break;
}
}
else if (rlc.Name == W.hyperlink)
{
foreach (var run in rlc.Descendants(W.r))
AddContentTypeToRunContent(settings, part, run, "Hyperlink");
}
else if (rlc.Name == W.bookmarkStart)
{
AddContentTypeToRunContent(settings, part, rlc, "Anchor");
}
}
}
private static XAttribute[] NamespaceAttributes =
{
new XAttribute(XNamespace.Xmlns + "wpc", WPC.wpc),
new XAttribute(XNamespace.Xmlns + "mc", MC.mc),
new XAttribute(XNamespace.Xmlns + "o", O.o),
new XAttribute(XNamespace.Xmlns + "r", R.r),
new XAttribute(XNamespace.Xmlns + "m", M.m),
new XAttribute(XNamespace.Xmlns + "v", VML.vml),
new XAttribute(XNamespace.Xmlns + "wp14", WP14.wp14),
new XAttribute(XNamespace.Xmlns + "wp", WP.wp),
new XAttribute(XNamespace.Xmlns + "w10", W10.w10),
new XAttribute(XNamespace.Xmlns + "w", W.w),
new XAttribute(XNamespace.Xmlns + "w14", W14.w14),
new XAttribute(XNamespace.Xmlns + "w15", W15.w15),
new XAttribute(XNamespace.Xmlns + "w16se", W16SE.w16se),
new XAttribute(XNamespace.Xmlns + "wpg", WPG.wpg),
new XAttribute(XNamespace.Xmlns + "wpi", WPI.wpi),
new XAttribute(XNamespace.Xmlns + "wne", WNE.wne),
new XAttribute(XNamespace.Xmlns + "wps", WPS.wps),
new XAttribute(XNamespace.Xmlns + "pt", PtOpenXml.pt),
new XAttribute(MC.Ignorable, "w14 wp14 w15 w16se pt"),
};
private static void AddContentTypeToBlockContent(WmlToXmlSettings settings, OpenXmlPart part, XElement blc, string contentType)
{
// add the attribute to the block content
blc.Add(new XAttribute(PtOpenXml.ContentType, contentType));
var mainPart = part as MainDocumentPart;
if (mainPart != null)
{
// add a comment, if appropriate
int commentNumber = 1;
XDocument newComments = null;
if (settings.InjectCommentForContentTypes != null && (bool)settings.InjectCommentForContentTypes)
{
if (mainPart.WordprocessingCommentsPart != null)
{
newComments = mainPart.WordprocessingCommentsPart.GetXDocument();
newComments.Declaration.Standalone = "yes";
newComments.Declaration.Encoding = "UTF-8";
var ids = newComments.Root.Elements(W.comment).Select(f => (int)f.Attribute(W.id));
if (ids.Any())
commentNumber = ids.Max() + 1;
}
else
{
part.AddNewPart<WordprocessingCommentsPart>();
newComments = mainPart.WordprocessingCommentsPart.GetXDocument();
newComments.Declaration.Standalone = "yes";
newComments.Declaration.Encoding = "UTF-8";
newComments.Add(new XElement(W.comments, NamespaceAttributes));
commentNumber = 1;
}
#if false
<w:comment w:id="12"
w:author="Eric White"
w:date="2016-03-20T18:50:00Z"
w:initials="EW">
<w:p w14:paraId="7E227B98"
w14:textId="6FA2BE6B"
w:rsidR="00425889"
w:rsidRDefault="00425889">
<w:pPr>
<w:pStyle w:val="CommentText"/>
</w:pPr>
<w:r>
<w:rPr>
<w:rStyle w:val="CommentReference"/>
</w:rPr>
<w:annotationRef/>
</w:r>
<w:r>
<w:t>Nil</w:t>
</w:r>
</w:p>
</w:comment>
#endif
XElement newElement = new XElement(W.comment,
new XAttribute(W.id, commentNumber),
new XElement(W.p,
new XElement(W.pPr,
new XElement(W.pStyle,
new XAttribute(W.val, "CommentText"))),
new XElement(W.r,
new XElement(W.rPr,
new XElement(W.rStyle,
new XAttribute(W.val, "CommentReference"))),
new XElement(W.annotationRef)),
new XElement(W.r,
new XElement(W.t,
new XText(contentType)))));
newComments.Root.Add(newElement);
#if false
<w:r>
<w:rPr>
<w:rStyle w:val="CommentReference"/>
</w:rPr>
<w:commentReference w:id="12"/>
</w:r>
#endif
XElement commentRun = new XElement(W.r,
new XElement(W.rPr,
new XElement(W.rStyle, new XAttribute(W.val, "CommentReference"))),
new XElement(W.commentReference,
new XAttribute(W.id, commentNumber)));
var firstRunInParagraph = blc
.DescendantsTrimmed(W.txbxContent)
.Where(r => r.Name == W.r)
.FirstOrDefault();
if (firstRunInParagraph != null)
{
// for now, only do the work of inserting a comment if it is easy. For content types for tables, rows and cells, not inserting a comment.
if (firstRunInParagraph.Parent.Name == W.p)
firstRunInParagraph.AddBeforeSelf(commentRun);
}
else
{
// for now, only do the work of inserting a comment if it is easy. For content types for tables, rows and cells, not inserting a comment.
if (blc.Name == W.p)
blc.Add(commentRun);
}
if (mainPart.StyleDefinitionsPart == null)
{
throw new ContentApplierException("Document does not have styles definition part");
}
XDocument stylesXDoc = mainPart.StyleDefinitionsPart.GetXDocument();
var style =
@"<w:style w:type=""paragraph""
w:styleId=""CommentText""
xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:name w:val=""annotation text""/>
<w:basedOn w:val=""Normal""/>
<w:link w:val=""CommentTextChar""/>
<w:semiHidden/>
<w:rPr>
<w:sz w:val=""20""/>
<w:szCs w:val=""20""/>
</w:rPr>
</w:style>
";
AddIfMissing(stylesXDoc, style);
style =
@"<w:style w:type=""paragraph""
w:styleId=""CommentSubject""
xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:name w:val=""annotation subject""/>
<w:basedOn w:val=""CommentText""/>
<w:next w:val=""CommentText""/>
<w:semiHidden/>
<w:rPr>
<w:b/>
<w:bCs/>
</w:rPr>
</w:style>
";
AddIfMissing(stylesXDoc, style);
style =
@"<w:style w:type=""character""
w:styleId=""CommentReference""
xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:name w:val=""annotation reference""/>
<w:basedOn w:val=""DefaultParagraphFont""/>
<w:uiPriority w:val=""99""/>
<w:semiHidden/>
<w:unhideWhenUsed/>
<w:rsid w:val=""00872729""/>
<w:rPr>
<w:sz w:val=""16""/>
<w:szCs w:val=""16""/>
</w:rPr>
</w:style>
";
AddIfMissing(stylesXDoc, style);
style =
@"<w:style w:type=""character""
w:customStyle=""1""
w:styleId=""CommentTextChar""
xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:name w:val=""Comment Text Char""/>
<w:basedOn w:val=""DefaultParagraphFont""/>
<w:link w:val=""CommentText""/>
<w:semiHidden/>
<w:rsid w:val=""00A43CEC""/>
<w:rPr>
<w:lang w:val=""en-GB""
w:eastAsia=""zh-CN""/>
</w:rPr>
</w:style>
";
AddIfMissing(stylesXDoc, style);
mainPart.StyleDefinitionsPart.PutXDocument();
}
}
var root = blc.Ancestors().LastOrDefault();
if (root == null)
throw new ContentApplierException("Internal error");
var ptNamespace = root.Attribute(XNamespace.Xmlns + "pt");
if (ptNamespace == null)
{
root.Add(new XAttribute(XNamespace.Xmlns + "pt", PtOpenXml.pt.NamespaceName));
}
var ignorable = (string)root.Attribute(MC.Ignorable);
if (ignorable != null)
{
var list = ignorable.Split(' ');
if (!list.Contains("pt"))
{
ignorable += " pt";
root.Attribute(MC.Ignorable).Value = ignorable;
}
}
else
{
root.Add(new XAttribute(MC.Ignorable, "pt"));
}
}
private static void AddContentTypeToRunContent(WmlToXmlSettings settings, OpenXmlPart part, XElement rlc, string contentType)
{
// if there is already a content type for this run level content, then nothing to do. First one wins.
if (rlc.Attribute(PtOpenXml.ContentType) != null)
return;
// add the attribute to the block level content
rlc.Add(new XAttribute(PtOpenXml.ContentType, contentType));
var mainPart = part as MainDocumentPart;
if (mainPart != null)
{
// add a comment, if appropriate
int commentNumber = 1;
XDocument newComments = null;
if (settings.InjectCommentForContentTypes != null && (bool)settings.InjectCommentForContentTypes)
{
if (mainPart.WordprocessingCommentsPart != null)
{
newComments = mainPart.WordprocessingCommentsPart.GetXDocument();
newComments.Declaration.Standalone = "yes";
newComments.Declaration.Encoding = "UTF-8";
var ids = newComments.Root.Elements(W.comment).Select(f => (int)f.Attribute(W.id));
if (ids.Any())
commentNumber = ids.Max() + 1;
}
else
{
mainPart.AddNewPart<WordprocessingCommentsPart>();
newComments = mainPart.WordprocessingCommentsPart.GetXDocument();
newComments.Declaration.Standalone = "yes";
newComments.Declaration.Encoding = "UTF-8";
newComments.Add(new XElement(W.comments, NamespaceAttributes));
commentNumber = 1;
}
XElement newElement = new XElement(W.comment,
new XAttribute(W.id, commentNumber),
new XElement(W.p,
new XElement(W.pPr,
new XElement(W.pStyle,
new XAttribute(W.val, "CommentText"))),
new XElement(W.r,
new XElement(W.rPr,
new XElement(W.rStyle,
new XAttribute(W.val, "CommentReference"))),
new XElement(W.annotationRef)),
new XElement(W.r,
new XElement(W.t,
new XText(contentType)))));
newComments.Root.Add(newElement);
XElement commentRun = new XElement(W.r,
new XElement(W.rPr,
new XElement(W.rStyle, new XAttribute(W.val, "CommentReference"))),
new XElement(W.commentReference,
new XAttribute(W.id, commentNumber)));
var firstRunInParagraph = rlc
.DescendantsTrimmed(W.txbxContent)
.Where(r => r.Name == W.r)
.FirstOrDefault();
// for now, only do the work of inserting a comment if it is easy. For content types for tables, rows and cells, not inserting a comment.
if (rlc.Parent.Name == W.p)
rlc.AddBeforeSelf(commentRun);
if (mainPart.StyleDefinitionsPart == null)
{
throw new ContentApplierException("Document does not have styles definition part");
}
XDocument stylesXDoc = mainPart.StyleDefinitionsPart.GetXDocument();
var style =
@"<w:style w:type=""paragraph""
w:styleId=""CommentText""
xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:name w:val=""annotation text""/>
<w:basedOn w:val=""Normal""/>
<w:link w:val=""CommentTextChar""/>
<w:semiHidden/>
<w:rPr>
<w:sz w:val=""20""/>
<w:szCs w:val=""20""/>
</w:rPr>
</w:style>
";
AddIfMissing(stylesXDoc, style);
style =
@"<w:style w:type=""paragraph""
w:styleId=""CommentSubject""
xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:name w:val=""annotation subject""/>
<w:basedOn w:val=""CommentText""/>
<w:next w:val=""CommentText""/>
<w:semiHidden/>
<w:rPr>
<w:b/>
<w:bCs/>
</w:rPr>
</w:style>
";
AddIfMissing(stylesXDoc, style);
style =
@"<w:style w:type=""character""
w:styleId=""CommentReference""
xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:name w:val=""annotation reference""/>
<w:basedOn w:val=""DefaultParagraphFont""/>
<w:uiPriority w:val=""99""/>
<w:semiHidden/>
<w:unhideWhenUsed/>
<w:rsid w:val=""00872729""/>
<w:rPr>
<w:sz w:val=""16""/>
<w:szCs w:val=""16""/>
</w:rPr>
</w:style>
";
AddIfMissing(stylesXDoc, style);
style =
@"<w:style w:type=""character""
w:customStyle=""1""
w:styleId=""CommentTextChar""
xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:name w:val=""Comment Text Char""/>
<w:basedOn w:val=""DefaultParagraphFont""/>
<w:link w:val=""CommentText""/>
<w:semiHidden/>
<w:rsid w:val=""00A43CEC""/>
<w:rPr>
<w:lang w:val=""en-GB""
w:eastAsia=""zh-CN""/>
</w:rPr>
</w:style>
";
AddIfMissing(stylesXDoc, style);
mainPart.StyleDefinitionsPart.PutXDocument();
}
}
var root = rlc.Ancestors().LastOrDefault();
if (root == null)
throw new ContentApplierException("Internal error");
var ptNamespace = root.Attribute(XNamespace.Xmlns + "pt");
if (ptNamespace == null)
{
root.Add(new XAttribute(XNamespace.Xmlns + "pt", PtOpenXml.pt.NamespaceName));
}
var ignorable = (string)root.Attribute(MC.Ignorable);
if (ignorable != null)
{
var list = ignorable.Split(' ');
if (!list.Contains("pt"))
{
ignorable += " pt";
root.Attribute(MC.Ignorable).Value = ignorable;
}
}
else
{
root.Add(new XAttribute(MC.Ignorable, "pt"));
}
}
private static void AddIfMissing(XDocument stylesXDoc, string commentStyle)
{
XElement e1 = XElement.Parse(commentStyle);
#if false
<w:style w:type=""character""
w:customStyle=""1""
w:styleId=""CommentTextChar""
#endif
var existingStyle = stylesXDoc
.Root
.Elements(W.style)
.FirstOrDefault(e2 =>
{
XName name = W.type;
string v1 = (string)e1.Attribute(name);
string v2 = (string)e2.Attribute(name);
if (v1 != v2)
return false;
name = W.customStyle;
v1 = (string)e1.Attribute(name);
v2 = (string)e2.Attribute(name);
if (v1 != v2)
return false;
name = W.styleId;
v1 = (string)e1.Attribute(name);
v2 = (string)e2.Attribute(name);
if (v1 != v2)
return false;
return true;
});
if (existingStyle != null)
return;
stylesXDoc.Root.Add(e1);
}
private static void AssembleListItemInformation(WordprocessingDocument wordDoc, ListItemRetrieverSettings settings)
{
XDocument xDoc = wordDoc.MainDocumentPart.GetXDocument();
foreach (var para in xDoc.Descendants(W.p))
{
ListItemRetriever.RetrieveListItem(wordDoc, para, settings);
}
}
private class ContentTypeApplierInfo
{
public string DefaultParagraphStyleName;
public string DefaultCharacterStyleName;
public string DefaultTableStyleName;
public ContentTypeApplierInfo()
{
}
}
public class ContentApplierException : Exception
{
public ContentApplierException(string message) : base(message) { }
}
public static List<WmlToXmlValidationError> ValidateContentTypeXml(WmlDocument wmlRawSourceDocument, WmlDocument wmlWithContentTypeApplied, XElement contentTypeXml, WmlToXmlSettings settings)
{
List<WmlToXmlValidationError> errorList = new List<WmlToXmlValidationError>();
using (MemoryStream msContentTypeApplied = new MemoryStream())
using (MemoryStream msRawSourceDocument = new MemoryStream())
{
msContentTypeApplied.Write(wmlWithContentTypeApplied.DocumentByteArray, 0, wmlWithContentTypeApplied.DocumentByteArray.Length);
msRawSourceDocument.Write(wmlRawSourceDocument.DocumentByteArray, 0, wmlRawSourceDocument.DocumentByteArray.Length);
using (WordprocessingDocument wDocContentTypeApplied = WordprocessingDocument.Open(msContentTypeApplied, true))
using (WordprocessingDocument wDocRawSourceDocument = WordprocessingDocument.Open(msRawSourceDocument, true))
{
foreach (var vr in settings.GlobalValidationRules)
{
if (vr.GlobalRuleLambda != null)
{
var valErrors = vr.GlobalRuleLambda(vr, wDocRawSourceDocument, wDocContentTypeApplied, contentTypeXml, settings);
if (valErrors != null && valErrors.Any())
{
foreach (var ve in valErrors)
{
errorList.Add(ve);
}
}
}
}
var mXDoc = wDocContentTypeApplied.MainDocumentPart.GetXDocument();
var sXDoc = wDocContentTypeApplied.MainDocumentPart.StyleDefinitionsPart.GetXDocument();
var defaultParagraphStyle = sXDoc
.Root
.Elements(W.style)
.FirstOrDefault(s => (string)s.Attribute(W._default) == "1");
string defaultParagraphStyleName = null;
if (defaultParagraphStyle != null)
defaultParagraphStyleName = (string)defaultParagraphStyle.Attribute(W.styleId);
foreach (var blc in mXDoc.Root.Descendants().Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr || d.Name == W.tc))
{
var styleId = (string)blc
.Elements(W.pPr)
.Elements(W.pStyle)
.Attributes(W.val)
.FirstOrDefault();
var styleName = (string)sXDoc
.Root
.Elements(W.style)
.Where(s => (string)s.Attribute(W.styleId) == styleId)
.Elements(W.name)
.Attributes(W.val)
.FirstOrDefault();
if (styleName == null && blc.Name == W.p)
styleName = defaultParagraphStyleName;
foreach (var vr in settings.BlockLevelContentValidationRules)
{
bool matchStyle = true;
if (vr.StyleNameRegex != null)
{
if (styleName == null)
{
matchStyle = false;
}
else
{
var match = vr.StyleNameRegex.Match(styleName);
matchStyle = match.Success;
}
}
if (matchStyle && vr.BlockLevelContentRuleLambda != null)
{
var valErrors = vr.BlockLevelContentRuleLambda(blc, vr, wDocContentTypeApplied, contentTypeXml, settings);
if (valErrors != null && valErrors.Any())
{
foreach (var ve in valErrors)
{
errorList.Add(ve);
}
}
}
}
}
}
}
return errorList;
}
}
public static class WmlToXmlUtil
{
public static WmlDocument AssignUnidToBlc(WmlDocument document)
{
using (MemoryStream ms = new MemoryStream())
{
ms.Write(document.DocumentByteArray, 0, document.DocumentByteArray.Length);
using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true))
{
var xDoc = wDoc.MainDocumentPart.GetXDocument();
List<XElement> elementsInOrder = new List<XElement>();
DetermineElementOrder(xDoc.Root.Descendants(W.body).FirstOrDefault(), elementsInOrder);
var unid = 1;
foreach (var b in elementsInOrder)
{
var unidString = unid.ToString();
if (b.Attribute(PtOpenXml.Unid) != null)
b.Attribute(PtOpenXml.Unid).Value = unidString;
else
b.Add(new XAttribute(PtOpenXml.Unid, unidString));
unid++;
}
IgnorePt14Namespace(xDoc.Root);
wDoc.MainDocumentPart.PutXDocument();
}
var result = new WmlDocument(document.FileName, ms.ToArray());
return result;
}
}
private static void DetermineElementOrder(XElement element, List<XElement> elementList)
{
foreach (var childElement in element.Elements())
{
if (childElement.Name == W.p)
{
elementList.Add(childElement);
continue;
}
else if (childElement.Name == W.tbl || childElement.Name == W.tc || childElement.Name == W.sdt ||
childElement.Name == W.sdtContent)
{
DetermineElementOrder(childElement, elementList);
continue;
}
else if (childElement.Name == W.tr)
{
foreach (var tc in childElement.Elements())
DetermineElementOrder(tc, elementList);
elementList.Add(childElement);
continue;
}
}
}
private static void IgnorePt14Namespace(XElement root)
{
if (root.Attribute(XNamespace.Xmlns + "pt14") == null)
{
root.Add(new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName));
}
var ignorable = (string)root.Attribute(MC.Ignorable);
if (ignorable != null)
{
var list = ignorable.Split(' ');
if (!list.Contains("pt14"))
{
ignorable += " pt14";
root.Attribute(MC.Ignorable).Value = ignorable;
}
}
else
{
root.Add(new XAttribute(MC.Ignorable, "pt14"));
}
}
}
}