HTML to DOC

This example demonstrates how to convert HTML to a Word document

Convert HTML from:
Source HTML file:
URL to the webpage to convert:
HTML Content:

Creating a doc from HTML.

We can use html data to convert into document file

We see first file browser which is the primary html file whose content need to converted into another file format

Secondly, We have URL field, which is used to fetch HTML content from specific server URL, then Its parsed into document

We also have alternative to parse HTML content inserted into text box, it will be parsed into document

At last, We have versions of this representation in words ,using dropdown you can make a choice within the following

  • Word Doc
  • Word Docx
  • Word ML
  • RTF
  • MS Text format
  • E-book format
  • DLS based xml file format
  • HTML

After clicking a generate button will launch a file with above choosen version of word , this file have content of selected file or fetched from URL or plain html content put into textbox

using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Text;

using OfficeComponent.Word;
using System.Net;

namespace OfficeComponent.Samples
{
    class HTMLtoDOCExample : WordExampleBase
#if WEB
, IUIExample
#endif
    {
        #region Inputs
        public string Value { get; set; }
        public int Type { get; set; }
        #endregion

        public HTMLtoDOCExample()
            : base(null,null)
        {

        }

        public HTMLtoDOCExample(string commonDataPath, string outputDir)
            : base(commonDataPath, outputDir)
        {

        }

        public HTMLtoDOCExample(string commonDataPath, string outputDir, string xmlFile) : base(commonDataPath, outputDir, xmlFile)
        {

        }

        public override string Execute()
        {
#if WEB
            ProcessForm();
#endif
            string htmlContent;
            string error;
            
            if (Type == 0)
            {
                if (string.IsNullOrWhiteSpace(Value))
                {
                    ShowError("Please specify the path to an HTML file.");
                    return null;
                }

                error = GetFileContent(Value, out htmlContent);
                if (error != null)
                {
                    ShowError("File Read Error. " + error);
                    return null;
                }
            }
            else if (Type == 1)
            {
                if (string.IsNullOrWhiteSpace(Value))
                {
                    ShowError("Please specify the URL to the webpage to convert.");
                    return null;
                }

                error = GetWebContent(Value, out htmlContent);
                if (error != null)
                {
                    ShowError("Web Page Read Error. " + error);
                    return null;
                }
            }
            else // HTML content
            {
                if (string.IsNullOrWhiteSpace(Value))
                {
                    ShowError("Please specify the HTML text to convert.");
                    return null;
                }

                htmlContent = Value;
            }

            // Create a new instance of the WordDocument class.
            WordDocument document = new WordDocument();

            Section section = document.AddSection();
            section.AddParagraph();

            // Check the HTML content.
            // You can skip the validation step to improve performance.
            if (section.Body.IsValidXHTML(htmlContent, XHTMLValidationType.Transitional, out error))
            {
                document.XHTMLValidateOption = XHTMLValidationType.Transitional;
                section.Body.InsertXHTML(htmlContent);

                string fileName = Path.Combine(OutputDir, this.GetType().Name + "_" + Guid.NewGuid().ToString() + GetExtension(SaveAsFormat));
                // Save the document.
                document.Save(fileName, SaveAsFormat);

                // Close the document.
                document.Close();

                // Ask user whether he/she wants to open the generated file.
                return fileName;
            }
            else
            {
                ShowError("XHTML Validation Error. " + error);
                
                // Close the document.
                document.Close();
                
                return null;
            }

        }
        static string GetWebContent(string url, out string content)
        {
            try
            {
                WebClient client = new WebClient();
                content = client.DownloadString(url);

                return null;
            }
            catch (Exception ex)
            {
                content = null;
                return ex.Message;
            }
        }

        /// 
        /// Reads a file.
        /// 
        /// The file path.
        /// The output content.
        /// null if no error; otherwise the error message.
        static string GetFileContent(string fileName, out string content)
        {
            try
            {
                StreamReader reader = new StreamReader(fileName);

                content = reader.ReadToEnd();

                return null;
            }
            catch (Exception ex)
            {
                content = null;
                return ex.Message;
            }
        }

        public override string ActionTitle
        {
            get
            {
                return "Convert";
            }
        }

#if WEB
        void ProcessForm()
        {
            if (Type == 0)
                Value = GetPostFile("SourceFile");
        }
#endif
    }
}
Imports System.Drawing.Imaging
Imports System.IO
Imports System.Text

Imports OfficeComponent.Word
Imports System.Net

Namespace OfficeComponent.Samples
#If WEB Then
	Friend Class HTMLtoDOCExample
		Inherits WordExampleBase
		Implements IUIExample
#Else
	Friend Class HTMLtoDOCExample
		Inherits WordExampleBase
#End If
		#Region "Inputs"
		Private privateValue As String
		Public Property Value() As String
			Get
				Return privateValue
			End Get
			Set(ByVal value As String)
				privateValue = value
			End Set
		End Property
		Private privateType As Integer
		Public Property Type() As Integer
			Get
				Return privateType
			End Get
			Set(ByVal value As Integer)
				privateType = value
			End Set
		End Property
		#End Region

		Public Sub New()
			MyBase.New(Nothing,Nothing)

		End Sub

		Public Sub New(ByVal commonDataPath As String, ByVal outputDir As String)
			MyBase.New(commonDataPath, outputDir)

		End Sub

		Public Sub New(ByVal commonDataPath As String, ByVal outputDir As String, ByVal xmlFile As String)
			MyBase.New(commonDataPath, outputDir, xmlFile)

		End Sub

		Public Overrides Function Execute() As String
#If WEB Then
			ProcessForm()
#End If
			Dim htmlContent As String
			Dim [error] As String

			If Type = 0 Then
				If String.IsNullOrWhiteSpace(Value) Then
					ShowError("Please specify the path to an HTML file.")
					Return Nothing
				End If

				[error] = GetFileContent(Value, htmlContent)
				If [error] IsNot Nothing Then
					ShowError("File Read Error. " & [error])
					Return Nothing
				End If
			ElseIf Type = 1 Then
				If String.IsNullOrWhiteSpace(Value) Then
					ShowError("Please specify the URL to the webpage to convert.")
					Return Nothing
				End If

				[error] = GetWebContent(Value, htmlContent)
				If [error] IsNot Nothing Then
					ShowError("Web Page Read Error. " & [error])
					Return Nothing
				End If
			Else ' HTML content
				If String.IsNullOrWhiteSpace(Value) Then
					ShowError("Please specify the HTML text to convert.")
					Return Nothing
				End If

				htmlContent = Value
			End If

			' Create a new instance of the WordDocument class.
			Dim document As New WordDocument()

			Dim section As Section = document.AddSection()
			section.AddParagraph()

			' Check the HTML content.
			' You can skip the validation step to improve performance.
			If section.Body.IsValidXHTML(htmlContent, XHTMLValidationType.Transitional, [error]) Then
				document.XHTMLValidateOption = XHTMLValidationType.Transitional
				section.Body.InsertXHTML(htmlContent)

				Dim fileName As String = Path.Combine(OutputDir, Me.GetType().Name & "_" & Guid.NewGuid().ToString() & GetExtension(SaveAsFormat))
				' Save the document.
				document.Save(fileName, SaveAsFormat)

				' Close the document.
				document.Close()

				' Ask user whether he/she wants to open the generated file.
				Return fileName
			Else
				ShowError("XHTML Validation Error. " & [error])

				' Close the document.
				document.Close()

				Return Nothing
			End If

		End Function
		Private Shared Function GetWebContent(ByVal url As String, ByRef content As String) As String
			Try
				Dim client As New WebClient()
				content = client.DownloadString(url)

				Return Nothing
			Catch ex As Exception
				content = Nothing
				Return ex.Message
			End Try
		End Function

		''' 
		''' Reads a file.
		''' 
		''' The file path.
		''' The output content.
		''' null if no error; otherwise the error message.
		Private Shared Function GetFileContent(ByVal fileName As String, ByRef content As String) As String
			Try
				Dim reader As New StreamReader(fileName)

				content = reader.ReadToEnd()

				Return Nothing
			Catch ex As Exception
				content = Nothing
				Return ex.Message
			End Try
		End Function

		Public Overrides ReadOnly Property ActionTitle() As String
			Get
				Return "Convert"
			End Get
		End Property

#If WEB Then
		Private Sub ProcessForm()
			If Type = 0 Then
				Value = GetPostFile("SourceFile")
			End If
		End Sub
#End If
	End Class
End Namespace