| /*************************************************************************** |
| |
| Copyright (c) Microsoft Corporation 2012-2015. |
| |
| This code is licensed using the Microsoft Public License (Ms-PL). The text of the license can be found here: |
| |
| http://www.microsoft.com/resources/sharedsource/licensingbasics/publiclicense.mspx |
| |
| Published at http://OpenXmlDeveloper.org |
| Resource Center and Documentation: http://openxmldeveloper.org/wiki/w/wiki/powertools-for-open-xml.aspx |
| |
| Developer: Eric White |
| Blog: http://www.ericwhite.com |
| Twitter: @EricWhiteDev |
| Email: eric@ericwhite.com |
| |
| ***************************************************************************/ |
| |
| #define COPY_FILES_FOR_DEBUGGING |
| |
| // DO_CONVERSION_VIA_WORD is defined in the project OpenXmlPowerTools.Tests.OA.csproj, but not in the OpenXmlPowerTools.Tests.csproj |
| |
| using System; |
| using System.Collections.Generic; |
| using System.Drawing; |
| using System.Drawing.Imaging; |
| using System.IO; |
| using System.Linq; |
| using System.Text; |
| using System.Threading.Tasks; |
| using System.Xml.Linq; |
| using DocumentFormat.OpenXml.Packaging; |
| using OpenXmlPowerTools; |
| using Xunit; |
| |
| #if DO_CONVERSION_VIA_WORD |
| using Word = Microsoft.Office.Interop.Word; |
| #endif |
| |
| #if !ELIDE_XUNIT_TESTS |
| |
| namespace OxPt |
| { |
| public class HcTests |
| { |
| public static bool s_CopySourceFiles = true; |
| public static bool s_CopyFormattingAssembledDocx = true; |
| public static bool s_ConvertUsingWord = true; |
| |
| // PowerShell oneliner that generates InlineData for all files in a directory |
| // dir | % { '[InlineData("' + $_.Name + '")]' } | clip |
| |
| [Theory] |
| [InlineData("HC001-5DayTourPlanTemplate.docx")] |
| [InlineData("HC002-Hebrew-01.docx")] |
| [InlineData("HC003-Hebrew-02.docx")] |
| [InlineData("HC004-ResumeTemplate.docx")] |
| [InlineData("HC005-TaskPlanTemplate.docx")] |
| [InlineData("HC006-Test-01.docx")] |
| [InlineData("HC007-Test-02.docx")] |
| [InlineData("HC008-Test-03.docx")] |
| [InlineData("HC009-Test-04.docx")] |
| [InlineData("HC010-Test-05.docx")] |
| [InlineData("HC011-Test-06.docx")] |
| [InlineData("HC012-Test-07.docx")] |
| [InlineData("HC013-Test-08.docx")] |
| [InlineData("HC014-RTL-Table-01.docx")] |
| [InlineData("HC015-Vertical-Spacing-atLeast.docx")] |
| [InlineData("HC016-Horizontal-Spacing-firstLine.docx")] |
| [InlineData("HC017-Vertical-Alignment-Cell-01.docx")] |
| [InlineData("HC018-Vertical-Alignment-Para-01.docx")] |
| [InlineData("HC019-Hidden-Run.docx")] |
| [InlineData("HC020-Small-Caps.docx")] |
| [InlineData("HC021-Symbols.docx")] |
| [InlineData("HC022-Table-Of-Contents.docx")] |
| [InlineData("HC023-Hyperlink.docx")] |
| [InlineData("HC024-Tabs-01.docx")] |
| [InlineData("HC025-Tabs-02.docx")] |
| [InlineData("HC026-Tabs-03.docx")] |
| [InlineData("HC027-Tabs-04.docx")] |
| [InlineData("HC028-No-Break-Hyphen.docx")] |
| [InlineData("HC029-Table-Merged-Cells.docx")] |
| [InlineData("HC030-Content-Controls.docx")] |
| [InlineData("HC031-Complicated-Document.docx")] |
| [InlineData("HC032-Named-Color.docx")] |
| [InlineData("HC033-Run-With-Border.docx")] |
| [InlineData("HC034-Run-With-Position.docx")] |
| [InlineData("HC035-Strike-Through.docx")] |
| [InlineData("HC036-Super-Script.docx")] |
| [InlineData("HC037-Sub-Script.docx")] |
| [InlineData("HC038-Conflicting-Border-Weight.docx")] |
| [InlineData("HC039-Bold.docx")] |
| [InlineData("HC040-Hyperlink-Fieldcode-01.docx")] |
| [InlineData("HC041-Hyperlink-Fieldcode-02.docx")] |
| [InlineData("HC042-Image-Png.docx")] |
| [InlineData("HC043-Chart.docx")] |
| [InlineData("HC044-Embedded-Workbook.docx")] |
| [InlineData("HC045-Italic.docx")] |
| [InlineData("HC046-BoldAndItalic.docx")] |
| [InlineData("HC047-No-Section.docx")] |
| [InlineData("HC048-Excerpt.docx")] |
| [InlineData("HC049-Borders.docx")] |
| [InlineData("HC050-Shaded-Text-01.docx")] |
| [InlineData("HC051-Shaded-Text-02.docx")] |
| [InlineData("HC060-Image-with-Hyperlink.docx")] |
| [InlineData("HC061-Hyperlink-in-Field.docx")] |
| |
| public void HC001(string name) |
| { |
| FileInfo sourceDocx = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name)); |
| |
| #if COPY_FILES_FOR_DEBUGGING |
| var sourceCopiedToDestDocx = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceDocx.Name.Replace(".docx", "-1-Source.docx"))); |
| if (!sourceCopiedToDestDocx.Exists) |
| File.Copy(sourceDocx.FullName, sourceCopiedToDestDocx.FullName); |
| |
| var assembledFormattingDestDocx = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceDocx.Name.Replace(".docx", "-2-FormattingAssembled.docx"))); |
| if (!assembledFormattingDestDocx.Exists) |
| CopyFormattingAssembledDocx(sourceDocx, assembledFormattingDestDocx); |
| #endif |
| |
| var oxPtConvertedDestHtml = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceDocx.Name.Replace(".docx", "-3-OxPt.html"))); |
| ConvertToHtml(sourceDocx, oxPtConvertedDestHtml); |
| |
| #if DO_CONVERSION_VIA_WORD |
| var wordConvertedDocHtml = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceDocx.Name.Replace(".docx", "-4-Word.html"))); |
| ConvertToHtmlUsingWord(sourceDocx, wordConvertedDocHtml); |
| #endif |
| |
| } |
| |
| [Theory] |
| [InlineData("HC006-Test-01.docx")] |
| public void HC002_NoCssClasses(string name) |
| { |
| FileInfo sourceDocx = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name)); |
| |
| var oxPtConvertedDestHtml = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceDocx.Name.Replace(".docx", "-5-OxPt-No-CSS-Classes.html"))); |
| ConvertToHtmlNoCssClasses(sourceDocx, oxPtConvertedDestHtml); |
| } |
| |
| private static void CopyFormattingAssembledDocx(FileInfo source, FileInfo dest) |
| { |
| var ba = File.ReadAllBytes(source.FullName); |
| using (MemoryStream ms = new MemoryStream()) |
| { |
| ms.Write(ba, 0, ba.Length); |
| using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(ms, true)) |
| { |
| |
| RevisionAccepter.AcceptRevisions(wordDoc); |
| SimplifyMarkupSettings simplifyMarkupSettings = new SimplifyMarkupSettings |
| { |
| RemoveComments = true, |
| RemoveContentControls = true, |
| RemoveEndAndFootNotes = true, |
| RemoveFieldCodes = false, |
| RemoveLastRenderedPageBreak = true, |
| RemovePermissions = true, |
| RemoveProof = true, |
| RemoveRsidInfo = true, |
| RemoveSmartTags = true, |
| RemoveSoftHyphens = true, |
| RemoveGoBackBookmark = true, |
| ReplaceTabsWithSpaces = false, |
| }; |
| MarkupSimplifier.SimplifyMarkup(wordDoc, simplifyMarkupSettings); |
| |
| FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings |
| { |
| RemoveStyleNamesFromParagraphAndRunProperties = false, |
| ClearStyles = false, |
| RestrictToSupportedLanguages = false, |
| RestrictToSupportedNumberingFormats = false, |
| CreateHtmlConverterAnnotationAttributes = true, |
| OrderElementsPerStandard = false, |
| ListItemRetrieverSettings = |
| new ListItemRetrieverSettings() |
| { |
| ListItemTextImplementations = ListItemRetrieverSettings.DefaultListItemTextImplementations, |
| }, |
| }; |
| |
| FormattingAssembler.AssembleFormatting(wordDoc, formattingAssemblerSettings); |
| } |
| var newBa = ms.ToArray(); |
| File.WriteAllBytes(dest.FullName, newBa); |
| } |
| } |
| |
| private static void ConvertToHtml(FileInfo sourceDocx, FileInfo destFileName) |
| { |
| byte[] byteArray = File.ReadAllBytes(sourceDocx.FullName); |
| using (MemoryStream memoryStream = new MemoryStream()) |
| { |
| memoryStream.Write(byteArray, 0, byteArray.Length); |
| using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true)) |
| { |
| var outputDirectory = destFileName.Directory; |
| destFileName = new FileInfo(Path.Combine(outputDirectory.FullName, destFileName.Name)); |
| var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files"; |
| int imageCounter = 0; |
| var pageTitle = (string)wDoc.CoreFilePropertiesPart.GetXDocument().Descendants(DC.title).FirstOrDefault(); |
| if (pageTitle == null) |
| pageTitle = sourceDocx.FullName; |
| |
| WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings() |
| { |
| PageTitle = pageTitle, |
| FabricateCssClasses = true, |
| CssClassPrefix = "pt-", |
| RestrictToSupportedLanguages = false, |
| RestrictToSupportedNumberingFormats = false, |
| ImageHandler = imageInfo => |
| { |
| DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName); |
| if (!localDirInfo.Exists) |
| localDirInfo.Create(); |
| ++imageCounter; |
| string extension = imageInfo.ContentType.Split('/')[1].ToLower(); |
| ImageFormat imageFormat = null; |
| if (extension == "png") |
| { |
| // Convert png to jpeg. |
| extension = "gif"; |
| imageFormat = ImageFormat.Gif; |
| } |
| else if (extension == "gif") |
| imageFormat = ImageFormat.Gif; |
| else if (extension == "bmp") |
| imageFormat = ImageFormat.Bmp; |
| else if (extension == "jpeg") |
| imageFormat = ImageFormat.Jpeg; |
| else if (extension == "tiff") |
| { |
| // Convert tiff to gif. |
| extension = "gif"; |
| imageFormat = ImageFormat.Gif; |
| } |
| else if (extension == "x-wmf") |
| { |
| extension = "wmf"; |
| imageFormat = ImageFormat.Wmf; |
| } |
| |
| // If the image format isn't one that we expect, ignore it, |
| // and don't return markup for the link. |
| if (imageFormat == null) |
| return null; |
| |
| string imageFileName = imageDirectoryName + "/image" + |
| imageCounter.ToString() + "." + extension; |
| try |
| { |
| imageInfo.Bitmap.Save(imageFileName, imageFormat); |
| } |
| catch (System.Runtime.InteropServices.ExternalException) |
| { |
| return null; |
| } |
| XElement img = new XElement(Xhtml.img, |
| new XAttribute(NoNamespace.src, imageFileName), |
| imageInfo.ImgStyleAttribute, |
| imageInfo.AltText != null ? |
| new XAttribute(NoNamespace.alt, imageInfo.AltText) : null); |
| return img; |
| } |
| }; |
| XElement html = WmlToHtmlConverter.ConvertToHtml(wDoc, settings); |
| |
| // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type |
| // XEntity. PtOpenXmlUtil.cs define the XEntity class. See |
| // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx |
| // for detailed explanation. |
| // |
| // If you further transform the XML tree returned by ConvertToHtmlTransform, you |
| // must do it correctly, or entities will not be serialized properly. |
| |
| var htmlString = html.ToString(SaveOptions.DisableFormatting); |
| File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8); |
| } |
| } |
| } |
| |
| private static void ConvertToHtmlNoCssClasses(FileInfo sourceDocx, FileInfo destFileName) |
| { |
| byte[] byteArray = File.ReadAllBytes(sourceDocx.FullName); |
| using (MemoryStream memoryStream = new MemoryStream()) |
| { |
| memoryStream.Write(byteArray, 0, byteArray.Length); |
| using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true)) |
| { |
| var outputDirectory = destFileName.Directory; |
| destFileName = new FileInfo(Path.Combine(outputDirectory.FullName, destFileName.Name)); |
| var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files"; |
| int imageCounter = 0; |
| var pageTitle = (string)wDoc.CoreFilePropertiesPart.GetXDocument().Descendants(DC.title).FirstOrDefault(); |
| if (pageTitle == null) |
| pageTitle = sourceDocx.FullName; |
| |
| WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings() |
| { |
| PageTitle = pageTitle, |
| FabricateCssClasses = false, |
| RestrictToSupportedLanguages = false, |
| RestrictToSupportedNumberingFormats = false, |
| ImageHandler = imageInfo => |
| { |
| DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName); |
| if (!localDirInfo.Exists) |
| localDirInfo.Create(); |
| ++imageCounter; |
| string extension = imageInfo.ContentType.Split('/')[1].ToLower(); |
| ImageFormat imageFormat = null; |
| if (extension == "png") |
| { |
| // Convert png to jpeg. |
| extension = "gif"; |
| imageFormat = ImageFormat.Gif; |
| } |
| else if (extension == "gif") |
| imageFormat = ImageFormat.Gif; |
| else if (extension == "bmp") |
| imageFormat = ImageFormat.Bmp; |
| else if (extension == "jpeg") |
| imageFormat = ImageFormat.Jpeg; |
| else if (extension == "tiff") |
| { |
| // Convert tiff to gif. |
| extension = "gif"; |
| imageFormat = ImageFormat.Gif; |
| } |
| else if (extension == "x-wmf") |
| { |
| extension = "wmf"; |
| imageFormat = ImageFormat.Wmf; |
| } |
| |
| // If the image format isn't one that we expect, ignore it, |
| // and don't return markup for the link. |
| if (imageFormat == null) |
| return null; |
| |
| string imageFileName = imageDirectoryName + "/image" + |
| imageCounter.ToString() + "." + extension; |
| try |
| { |
| imageInfo.Bitmap.Save(imageFileName, imageFormat); |
| } |
| catch (System.Runtime.InteropServices.ExternalException) |
| { |
| return null; |
| } |
| XElement img = new XElement(Xhtml.img, |
| new XAttribute(NoNamespace.src, imageFileName), |
| imageInfo.ImgStyleAttribute, |
| imageInfo.AltText != null ? |
| new XAttribute(NoNamespace.alt, imageInfo.AltText) : null); |
| return img; |
| } |
| }; |
| XElement html = WmlToHtmlConverter.ConvertToHtml(wDoc, settings); |
| |
| // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type |
| // XEntity. PtOpenXmlUtil.cs define the XEntity class. See |
| // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx |
| // for detailed explanation. |
| // |
| // If you further transform the XML tree returned by ConvertToHtmlTransform, you |
| // must do it correctly, or entities will not be serialized properly. |
| |
| var htmlString = html.ToString(SaveOptions.DisableFormatting); |
| File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8); |
| } |
| } |
| } |
| |
| #if DO_CONVERSION_VIA_WORD |
| public static void ConvertToHtmlUsingWord(FileInfo sourceFileName, FileInfo destFileName) |
| { |
| Word.Application app = new Word.Application(); |
| app.Visible = false; |
| try |
| { |
| Word.Document doc = app.Documents.Open(sourceFileName.FullName); |
| doc.SaveAs2(destFileName.FullName, Word.WdSaveFormat.wdFormatFilteredHTML); |
| } |
| catch (System.Runtime.InteropServices.COMException) |
| { |
| Console.WriteLine("Caught unexpected COM exception."); |
| ((Microsoft.Office.Interop.Word._Application)app).Quit(); |
| Environment.Exit(0); |
| } |
| ((Microsoft.Office.Interop.Word._Application)app).Quit(); |
| } |
| #endif |
| } |
| } |
| |
| #endif |