blob: 6c6bc372d6af7ee9173e930f2c31b068bc0589e8 [file] [log] [blame]
/***************************************************************************
Copyright (c) Microsoft Corporation 2012-2015.
This code is licensed using the Microsoft Public License (Ms-PL). The text of the license can be found here:
http://www.microsoft.com/resources/sharedsource/licensingbasics/publiclicense.mspx
Published at http://OpenXmlDeveloper.org
Resource Center and Documentation: http://openxmldeveloper.org/wiki/w/wiki/powertools-for-open-xml.aspx
Developer: Eric White
Blog: http://www.ericwhite.com
Twitter: @EricWhiteDev
Email: eric@ericwhite.com
***************************************************************************/
#define COPY_FILES_FOR_DEBUGGING
// DO_CONVERSION_VIA_WORD is defined in the project OpenXmlPowerTools.Tests.OA.csproj, but not in the OpenXmlPowerTools.Tests.csproj
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
using OpenXmlPowerTools;
using Xunit;
#if DO_CONVERSION_VIA_WORD
using Word = Microsoft.Office.Interop.Word;
#endif
#if !ELIDE_XUNIT_TESTS
namespace OxPt
{
public class HcTests
{
public static bool s_CopySourceFiles = true;
public static bool s_CopyFormattingAssembledDocx = true;
public static bool s_ConvertUsingWord = true;
// PowerShell oneliner that generates InlineData for all files in a directory
// dir | % { '[InlineData("' + $_.Name + '")]' } | clip
[Theory]
[InlineData("HC001-5DayTourPlanTemplate.docx")]
[InlineData("HC002-Hebrew-01.docx")]
[InlineData("HC003-Hebrew-02.docx")]
[InlineData("HC004-ResumeTemplate.docx")]
[InlineData("HC005-TaskPlanTemplate.docx")]
[InlineData("HC006-Test-01.docx")]
[InlineData("HC007-Test-02.docx")]
[InlineData("HC008-Test-03.docx")]
[InlineData("HC009-Test-04.docx")]
[InlineData("HC010-Test-05.docx")]
[InlineData("HC011-Test-06.docx")]
[InlineData("HC012-Test-07.docx")]
[InlineData("HC013-Test-08.docx")]
[InlineData("HC014-RTL-Table-01.docx")]
[InlineData("HC015-Vertical-Spacing-atLeast.docx")]
[InlineData("HC016-Horizontal-Spacing-firstLine.docx")]
[InlineData("HC017-Vertical-Alignment-Cell-01.docx")]
[InlineData("HC018-Vertical-Alignment-Para-01.docx")]
[InlineData("HC019-Hidden-Run.docx")]
[InlineData("HC020-Small-Caps.docx")]
[InlineData("HC021-Symbols.docx")]
[InlineData("HC022-Table-Of-Contents.docx")]
[InlineData("HC023-Hyperlink.docx")]
[InlineData("HC024-Tabs-01.docx")]
[InlineData("HC025-Tabs-02.docx")]
[InlineData("HC026-Tabs-03.docx")]
[InlineData("HC027-Tabs-04.docx")]
[InlineData("HC028-No-Break-Hyphen.docx")]
[InlineData("HC029-Table-Merged-Cells.docx")]
[InlineData("HC030-Content-Controls.docx")]
[InlineData("HC031-Complicated-Document.docx")]
[InlineData("HC032-Named-Color.docx")]
[InlineData("HC033-Run-With-Border.docx")]
[InlineData("HC034-Run-With-Position.docx")]
[InlineData("HC035-Strike-Through.docx")]
[InlineData("HC036-Super-Script.docx")]
[InlineData("HC037-Sub-Script.docx")]
[InlineData("HC038-Conflicting-Border-Weight.docx")]
[InlineData("HC039-Bold.docx")]
[InlineData("HC040-Hyperlink-Fieldcode-01.docx")]
[InlineData("HC041-Hyperlink-Fieldcode-02.docx")]
[InlineData("HC042-Image-Png.docx")]
[InlineData("HC043-Chart.docx")]
[InlineData("HC044-Embedded-Workbook.docx")]
[InlineData("HC045-Italic.docx")]
[InlineData("HC046-BoldAndItalic.docx")]
[InlineData("HC047-No-Section.docx")]
[InlineData("HC048-Excerpt.docx")]
[InlineData("HC049-Borders.docx")]
[InlineData("HC050-Shaded-Text-01.docx")]
[InlineData("HC051-Shaded-Text-02.docx")]
[InlineData("HC060-Image-with-Hyperlink.docx")]
[InlineData("HC061-Hyperlink-in-Field.docx")]
public void HC001(string name)
{
FileInfo sourceDocx = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name));
#if COPY_FILES_FOR_DEBUGGING
var sourceCopiedToDestDocx = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceDocx.Name.Replace(".docx", "-1-Source.docx")));
if (!sourceCopiedToDestDocx.Exists)
File.Copy(sourceDocx.FullName, sourceCopiedToDestDocx.FullName);
var assembledFormattingDestDocx = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceDocx.Name.Replace(".docx", "-2-FormattingAssembled.docx")));
if (!assembledFormattingDestDocx.Exists)
CopyFormattingAssembledDocx(sourceDocx, assembledFormattingDestDocx);
#endif
var oxPtConvertedDestHtml = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceDocx.Name.Replace(".docx", "-3-OxPt.html")));
ConvertToHtml(sourceDocx, oxPtConvertedDestHtml);
#if DO_CONVERSION_VIA_WORD
var wordConvertedDocHtml = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceDocx.Name.Replace(".docx", "-4-Word.html")));
ConvertToHtmlUsingWord(sourceDocx, wordConvertedDocHtml);
#endif
}
[Theory]
[InlineData("HC006-Test-01.docx")]
public void HC002_NoCssClasses(string name)
{
FileInfo sourceDocx = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name));
var oxPtConvertedDestHtml = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceDocx.Name.Replace(".docx", "-5-OxPt-No-CSS-Classes.html")));
ConvertToHtmlNoCssClasses(sourceDocx, oxPtConvertedDestHtml);
}
private static void CopyFormattingAssembledDocx(FileInfo source, FileInfo dest)
{
var ba = File.ReadAllBytes(source.FullName);
using (MemoryStream ms = new MemoryStream())
{
ms.Write(ba, 0, ba.Length);
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(ms, true))
{
RevisionAccepter.AcceptRevisions(wordDoc);
SimplifyMarkupSettings simplifyMarkupSettings = new SimplifyMarkupSettings
{
RemoveComments = true,
RemoveContentControls = true,
RemoveEndAndFootNotes = true,
RemoveFieldCodes = false,
RemoveLastRenderedPageBreak = true,
RemovePermissions = true,
RemoveProof = true,
RemoveRsidInfo = true,
RemoveSmartTags = true,
RemoveSoftHyphens = true,
RemoveGoBackBookmark = true,
ReplaceTabsWithSpaces = false,
};
MarkupSimplifier.SimplifyMarkup(wordDoc, simplifyMarkupSettings);
FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings
{
RemoveStyleNamesFromParagraphAndRunProperties = false,
ClearStyles = false,
RestrictToSupportedLanguages = false,
RestrictToSupportedNumberingFormats = false,
CreateHtmlConverterAnnotationAttributes = true,
OrderElementsPerStandard = false,
ListItemRetrieverSettings =
new ListItemRetrieverSettings()
{
ListItemTextImplementations = ListItemRetrieverSettings.DefaultListItemTextImplementations,
},
};
FormattingAssembler.AssembleFormatting(wordDoc, formattingAssemblerSettings);
}
var newBa = ms.ToArray();
File.WriteAllBytes(dest.FullName, newBa);
}
}
private static void ConvertToHtml(FileInfo sourceDocx, FileInfo destFileName)
{
byte[] byteArray = File.ReadAllBytes(sourceDocx.FullName);
using (MemoryStream memoryStream = new MemoryStream())
{
memoryStream.Write(byteArray, 0, byteArray.Length);
using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true))
{
var outputDirectory = destFileName.Directory;
destFileName = new FileInfo(Path.Combine(outputDirectory.FullName, destFileName.Name));
var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files";
int imageCounter = 0;
var pageTitle = (string)wDoc.CoreFilePropertiesPart.GetXDocument().Descendants(DC.title).FirstOrDefault();
if (pageTitle == null)
pageTitle = sourceDocx.FullName;
WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings()
{
PageTitle = pageTitle,
FabricateCssClasses = true,
CssClassPrefix = "pt-",
RestrictToSupportedLanguages = false,
RestrictToSupportedNumberingFormats = false,
ImageHandler = imageInfo =>
{
DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName);
if (!localDirInfo.Exists)
localDirInfo.Create();
++imageCounter;
string extension = imageInfo.ContentType.Split('/')[1].ToLower();
ImageFormat imageFormat = null;
if (extension == "png")
{
// Convert png to jpeg.
extension = "gif";
imageFormat = ImageFormat.Gif;
}
else if (extension == "gif")
imageFormat = ImageFormat.Gif;
else if (extension == "bmp")
imageFormat = ImageFormat.Bmp;
else if (extension == "jpeg")
imageFormat = ImageFormat.Jpeg;
else if (extension == "tiff")
{
// Convert tiff to gif.
extension = "gif";
imageFormat = ImageFormat.Gif;
}
else if (extension == "x-wmf")
{
extension = "wmf";
imageFormat = ImageFormat.Wmf;
}
// If the image format isn't one that we expect, ignore it,
// and don't return markup for the link.
if (imageFormat == null)
return null;
string imageFileName = imageDirectoryName + "/image" +
imageCounter.ToString() + "." + extension;
try
{
imageInfo.Bitmap.Save(imageFileName, imageFormat);
}
catch (System.Runtime.InteropServices.ExternalException)
{
return null;
}
XElement img = new XElement(Xhtml.img,
new XAttribute(NoNamespace.src, imageFileName),
imageInfo.ImgStyleAttribute,
imageInfo.AltText != null ?
new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
return img;
}
};
XElement html = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);
// Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
// XEntity. PtOpenXmlUtil.cs define the XEntity class. See
// http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
// for detailed explanation.
//
// If you further transform the XML tree returned by ConvertToHtmlTransform, you
// must do it correctly, or entities will not be serialized properly.
var htmlString = html.ToString(SaveOptions.DisableFormatting);
File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);
}
}
}
private static void ConvertToHtmlNoCssClasses(FileInfo sourceDocx, FileInfo destFileName)
{
byte[] byteArray = File.ReadAllBytes(sourceDocx.FullName);
using (MemoryStream memoryStream = new MemoryStream())
{
memoryStream.Write(byteArray, 0, byteArray.Length);
using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true))
{
var outputDirectory = destFileName.Directory;
destFileName = new FileInfo(Path.Combine(outputDirectory.FullName, destFileName.Name));
var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files";
int imageCounter = 0;
var pageTitle = (string)wDoc.CoreFilePropertiesPart.GetXDocument().Descendants(DC.title).FirstOrDefault();
if (pageTitle == null)
pageTitle = sourceDocx.FullName;
WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings()
{
PageTitle = pageTitle,
FabricateCssClasses = false,
RestrictToSupportedLanguages = false,
RestrictToSupportedNumberingFormats = false,
ImageHandler = imageInfo =>
{
DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName);
if (!localDirInfo.Exists)
localDirInfo.Create();
++imageCounter;
string extension = imageInfo.ContentType.Split('/')[1].ToLower();
ImageFormat imageFormat = null;
if (extension == "png")
{
// Convert png to jpeg.
extension = "gif";
imageFormat = ImageFormat.Gif;
}
else if (extension == "gif")
imageFormat = ImageFormat.Gif;
else if (extension == "bmp")
imageFormat = ImageFormat.Bmp;
else if (extension == "jpeg")
imageFormat = ImageFormat.Jpeg;
else if (extension == "tiff")
{
// Convert tiff to gif.
extension = "gif";
imageFormat = ImageFormat.Gif;
}
else if (extension == "x-wmf")
{
extension = "wmf";
imageFormat = ImageFormat.Wmf;
}
// If the image format isn't one that we expect, ignore it,
// and don't return markup for the link.
if (imageFormat == null)
return null;
string imageFileName = imageDirectoryName + "/image" +
imageCounter.ToString() + "." + extension;
try
{
imageInfo.Bitmap.Save(imageFileName, imageFormat);
}
catch (System.Runtime.InteropServices.ExternalException)
{
return null;
}
XElement img = new XElement(Xhtml.img,
new XAttribute(NoNamespace.src, imageFileName),
imageInfo.ImgStyleAttribute,
imageInfo.AltText != null ?
new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
return img;
}
};
XElement html = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);
// Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
// XEntity. PtOpenXmlUtil.cs define the XEntity class. See
// http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
// for detailed explanation.
//
// If you further transform the XML tree returned by ConvertToHtmlTransform, you
// must do it correctly, or entities will not be serialized properly.
var htmlString = html.ToString(SaveOptions.DisableFormatting);
File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);
}
}
}
#if DO_CONVERSION_VIA_WORD
public static void ConvertToHtmlUsingWord(FileInfo sourceFileName, FileInfo destFileName)
{
Word.Application app = new Word.Application();
app.Visible = false;
try
{
Word.Document doc = app.Documents.Open(sourceFileName.FullName);
doc.SaveAs2(destFileName.FullName, Word.WdSaveFormat.wdFormatFilteredHTML);
}
catch (System.Runtime.InteropServices.COMException)
{
Console.WriteLine("Caught unexpected COM exception.");
((Microsoft.Office.Interop.Word._Application)app).Quit();
Environment.Exit(0);
}
((Microsoft.Office.Interop.Word._Application)app).Quit();
}
#endif
}
}
#endif