blob: d277ee92d5b16571d0680006901b46ad178af450 [file] [log] [blame] [edit]
/***************************************************************************
Copyright (c) Microsoft Corporation 2010.
This code is licensed using the Microsoft Public License (Ms-PL). The text of the license
can be found here:
http://www.microsoft.com/resources/sharedsource/licensingbasics/publiclicense.mspx
***************************************************************************/
using System;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
using OpenXmlPowerTools;
using System.Collections.Generic;
class WmlToHtmlConverterHelper
{
static void Main(string[] args)
{
var n = DateTime.Now;
var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second));
tempDi.Create();
/*
* This example loads each document into a byte array, then into a memory stream, so that the document can be opened for writing without
* modifying the source document.
*/
foreach (var file in Directory.GetFiles("../../", "*.docx"))
{
ConvertToHtml(file, tempDi.FullName);
}
}
public static void ConvertToHtml(string file, string outputDirectory)
{
var fi = new FileInfo(file);
Console.WriteLine(fi.Name);
byte[] byteArray = File.ReadAllBytes(fi.FullName);
using (MemoryStream memoryStream = new MemoryStream())
{
memoryStream.Write(byteArray, 0, byteArray.Length);
using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true))
{
var destFileName = new FileInfo(fi.Name.Replace(".docx", ".html"));
if (outputDirectory != null && outputDirectory != string.Empty)
{
DirectoryInfo di = new DirectoryInfo(outputDirectory);
if (!di.Exists)
{
throw new OpenXmlPowerToolsException("Output directory does not exist");
}
destFileName = new FileInfo(Path.Combine(di.FullName, destFileName.Name));
}
var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files";
int imageCounter = 0;
var pageTitle = fi.FullName;
var part = wDoc.CoreFilePropertiesPart;
if (part != null)
{
pageTitle = (string)part.GetXDocument().Descendants(DC.title).FirstOrDefault() ?? fi.FullName;
}
// TODO: Determine max-width from size of content area.
WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings()
{
AdditionalCss = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }",
PageTitle = pageTitle,
FabricateCssClasses = true,
CssClassPrefix = "pt-",
RestrictToSupportedLanguages = false,
RestrictToSupportedNumberingFormats = false,
ImageHandler = imageInfo =>
{
++imageCounter;
string extension = imageInfo.ContentType.Split('/')[1].ToLower();
ImageFormat imageFormat = null;
if (extension == "png")
imageFormat = ImageFormat.Png;
else if (extension == "gif")
imageFormat = ImageFormat.Gif;
else if (extension == "bmp")
imageFormat = ImageFormat.Bmp;
else if (extension == "jpeg")
imageFormat = ImageFormat.Jpeg;
else if (extension == "tiff")
{
// Convert tiff to gif.
extension = "gif";
imageFormat = ImageFormat.Gif;
}
else if (extension == "x-wmf")
{
extension = "wmf";
imageFormat = ImageFormat.Wmf;
}
// If the image format isn't one that we expect, ignore it,
// and don't return markup for the link.
if (imageFormat == null)
return null;
string base64 = null;
try
{
using (MemoryStream ms = new MemoryStream())
{
imageInfo.Bitmap.Save(ms, imageFormat);
var ba = ms.ToArray();
base64 = System.Convert.ToBase64String(ba);
}
}
catch (System.Runtime.InteropServices.ExternalException)
{
return null;
}
ImageFormat format = imageInfo.Bitmap.RawFormat;
ImageCodecInfo codec = ImageCodecInfo.GetImageDecoders().First(c => c.FormatID == format.Guid);
string mimeType = codec.MimeType;
string imageSource = string.Format("data:{0};base64,{1}", mimeType, base64);
XElement img = new XElement(Xhtml.img,
new XAttribute(NoNamespace.src, imageSource),
imageInfo.ImgStyleAttribute,
imageInfo.AltText != null ?
new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
return img;
}
};
XElement htmlElement = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);
// Produce HTML document with <!DOCTYPE html > declaration to tell the browser
// we are using HTML5.
var html = new XDocument(
new XDocumentType("html", null, null, null),
htmlElement);
// Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
// XEntity. PtOpenXmlUtil.cs define the XEntity class. See
// http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
// for detailed explanation.
//
// If you further transform the XML tree returned by ConvertToHtmlTransform, you
// must do it correctly, or entities will not be serialized properly.
var htmlString = html.ToString(SaveOptions.DisableFormatting);
File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);
}
}
}
}