Image Extractor

This example demonstrates how to extract images from PDF document.

Source PDF file:

When you need to extract images from a pdf file for any purpose like when images are given in a pdf format.

In file browser we choose a pdf file which containes images .

After locating the file we click generate button in order to see the output.

Output is actually a folder named "Extracted Images" in bin/debug with extracted images

using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
#if WEB
using System.IO.Compression;
#endif
using System.Text;

using OfficeComponent.Pdf;
using OfficeComponent.Pdf.Graphics;

namespace OfficeComponent.Samples
{
    class ImageExtractorExample : ExampleBase
#if WEB
, IUIExample
#endif
    {
        #region Input

        public string SourcePdf
        {
            get;
            set;
        }

        #endregion

        public ImageExtractorExample(string commonDataPath, string outputDir)
            : base(commonDataPath, outputDir)
        {

        }

        public ImageExtractorExample(string commonDataPath, string outputDir, string xmlFile) : base(commonDataPath, outputDir, xmlFile)
        {

        }

        public override string Execute()
        {
#if WEB
            ProcessForm();
#endif

            // Create a new instance of PdfDocument class.
            PdfDocument doc = new PdfDocument();

            if (string.IsNullOrWhiteSpace(SourcePdf))
            {
                ShowError("Please specify a PDF document.");
                return null;
            }

            var OutputFile = OutputDir + "\\Extracted Images";
            if (!Directory.Exists(OutputFile))
                Directory.CreateDirectory(OutputFile);

            string name = Path.GetFileNameWithoutExtension(SourcePdf);
            // Load an existing PDF
            using (PdfImportedDocument ldoc = new PdfImportedDocument(SourcePdf))
            {
                // Loading Page collections
                PdfImportedPageCollection loadedPages = ldoc.Pages;

                List savedFiles = new List();

                // Extract Image from PDF document pages
                for (int i = 0; i < loadedPages.Count; i++)
                {
                    PdfImportedPage lpage = (PdfImportedPage) loadedPages[i];
                    Image[] img = lpage.ExtractImages();

                    if (img != null && img.Length > 0)
                    {
                        for (int j = 0; j < img.Length; j++)
                        {
                            Image img1 = img[j];
                            var imgPath = OutputFile + "\\" + name + "_Page_" + (i + 1).ToString() + "_Image_" + (j + 1).ToString() + ".png";
                            img1.Save(imgPath, ImageFormat.Png);
                            savedFiles.Add(imgPath);
                        }
                    }
                }

                if (savedFiles.Count == 0)
                {
                    ShowError("No images found in the source PDF file.");
                    return null;
                }
#if WEB
                OutputFile = OutputFile + "\\" + name + ".zip";
                using (ZipStorer zip = ZipStorer.Create(OutputFile, ""))
                {
                    foreach (var file in savedFiles)
                    {
                        zip.AddFile(ZipStorer.Compression.Store, file, Path.GetFileName(file), "");
                    }
                }
#endif

                return OutputFile;
            }
        }

        public override string ActionTitle
        {
            get { return "Extract Images"; }
        }

#if WEB
        void ProcessForm()
        {
            SourcePdf = GetPostFile("SourceFile");
        }
#endif
    }
}
Imports System.Drawing.Imaging
Imports System.IO
#If WEB Then
Imports System.IO.Compression
#End If
Imports System.Text

Imports OfficeComponent.Pdf
Imports OfficeComponent.Pdf.Graphics

Namespace OfficeComponent.Samples
#If WEB Then
	Friend Class ImageExtractorExample
		Inherits ExampleBase
		Implements IUIExample
#Else
	Friend Class ImageExtractorExample
		Inherits ExampleBase
#End If
		#Region "Input"

		Private privateSourcePdf As String
		Public Property SourcePdf() As String
			Get
				Return privateSourcePdf
			End Get
			Set(ByVal value As String)
				privateSourcePdf = value
			End Set
		End Property

		#End Region

		Public Sub New(ByVal commonDataPath As String, ByVal outputDir As String)
			MyBase.New(commonDataPath, outputDir)

		End Sub

		Public Sub New(ByVal commonDataPath As String, ByVal outputDir As String, ByVal xmlFile As String)
			MyBase.New(commonDataPath, outputDir, xmlFile)

		End Sub

		Public Overrides Function Execute() As String
#If WEB Then
			ProcessForm()
#End If

			' Create a new instance of PdfDocument class.
			Dim doc As New PdfDocument()

			If String.IsNullOrWhiteSpace(SourcePdf) Then
				ShowError("Please specify a PDF document.")
				Return Nothing
			End If

			Dim OutputFile = OutputDir & "\Extracted Images"
			If Not Directory.Exists(OutputFile) Then
				Directory.CreateDirectory(OutputFile)
			End If

			Dim name As String = Path.GetFileNameWithoutExtension(SourcePdf)
			' Load an existing PDF
			Using ldoc As New PdfImportedDocument(SourcePdf)
				' Loading Page collections
				Dim loadedPages As PdfImportedPageCollection = ldoc.Pages

				Dim savedFiles As New List(Of String)()

				' Extract Image from PDF document pages
				For i As Integer = 0 To loadedPages.Count - 1
					Dim lpage As PdfImportedPage = CType(loadedPages(i), PdfImportedPage)
					Dim img() As Image = lpage.ExtractImages()

					If img IsNot Nothing AndAlso img.Length > 0 Then
						For j As Integer = 0 To img.Length - 1
							Dim img1 As Image = img(j)
							Dim imgPath = OutputFile & "\" & name & "_Page_" & (i + 1).ToString() & "_Image_" & (j + 1).ToString() & ".png"
							img1.Save(imgPath, ImageFormat.Png)
							savedFiles.Add(imgPath)
						Next j
					End If
				Next i

				If savedFiles.Count = 0 Then
					ShowError("No images found in the source PDF file.")
					Return Nothing
				End If
#If WEB Then
				OutputFile = OutputFile & "\" & name & ".zip"
				Using zip As ZipStorer = ZipStorer.Create(OutputFile, "")
					For Each file In savedFiles
						zip.AddFile(ZipStorer.Compression.Store, file, Path.GetFileName(file), "")
					Next file
				End Using
#End If

				Return OutputFile
			End Using
		End Function

		Public Overrides ReadOnly Property ActionTitle() As String
			Get
				Return "Extract Images"
			End Get
		End Property

#If WEB Then
		Private Sub ProcessForm()
			SourcePdf = GetPostFile("SourceFile")
		End Sub
#End If
	End Class
End Namespace