This example demonstrates how to extract images from PDF document.
When you need to extract images from a pdf file for any purpose like when images are given in a pdf format.
In file browser we choose a pdf file which containes images .
After locating the file we click generate button in order to see the output.
Output is actually a folder named "Extracted Images" in bin/debug with extracted images
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
#if WEB
using System.IO.Compression;
#endif
using System.Text;
using OfficeComponent.Pdf;
using OfficeComponent.Pdf.Graphics;
namespace OfficeComponent.Samples
{
class ImageExtractorExample : ExampleBase
#if WEB
, IUIExample
#endif
{
#region Input
public string SourcePdf
{
get;
set;
}
#endregion
public ImageExtractorExample(string commonDataPath, string outputDir)
: base(commonDataPath, outputDir)
{
}
public ImageExtractorExample(string commonDataPath, string outputDir, string xmlFile) : base(commonDataPath, outputDir, xmlFile)
{
}
public override string Execute()
{
#if WEB
ProcessForm();
#endif
// Create a new instance of PdfDocument class.
PdfDocument doc = new PdfDocument();
if (string.IsNullOrWhiteSpace(SourcePdf))
{
ShowError("Please specify a PDF document.");
return null;
}
var OutputFile = OutputDir + "\\Extracted Images";
if (!Directory.Exists(OutputFile))
Directory.CreateDirectory(OutputFile);
string name = Path.GetFileNameWithoutExtension(SourcePdf);
// Load an existing PDF
using (PdfImportedDocument ldoc = new PdfImportedDocument(SourcePdf))
{
// Loading Page collections
PdfImportedPageCollection loadedPages = ldoc.Pages;
List savedFiles = new List();
// Extract Image from PDF document pages
for (int i = 0; i < loadedPages.Count; i++)
{
PdfImportedPage lpage = (PdfImportedPage) loadedPages[i];
Image[] img = lpage.ExtractImages();
if (img != null && img.Length > 0)
{
for (int j = 0; j < img.Length; j++)
{
Image img1 = img[j];
var imgPath = OutputFile + "\\" + name + "_Page_" + (i + 1).ToString() + "_Image_" + (j + 1).ToString() + ".png";
img1.Save(imgPath, ImageFormat.Png);
savedFiles.Add(imgPath);
}
}
}
if (savedFiles.Count == 0)
{
ShowError("No images found in the source PDF file.");
return null;
}
#if WEB
OutputFile = OutputFile + "\\" + name + ".zip";
using (ZipStorer zip = ZipStorer.Create(OutputFile, ""))
{
foreach (var file in savedFiles)
{
zip.AddFile(ZipStorer.Compression.Store, file, Path.GetFileName(file), "");
}
}
#endif
return OutputFile;
}
}
public override string ActionTitle
{
get { return "Extract Images"; }
}
#if WEB
void ProcessForm()
{
SourcePdf = GetPostFile("SourceFile");
}
#endif
}
}
Imports System.Drawing.Imaging
Imports System.IO
#If WEB Then
Imports System.IO.Compression
#End If
Imports System.Text
Imports OfficeComponent.Pdf
Imports OfficeComponent.Pdf.Graphics
Namespace OfficeComponent.Samples
#If WEB Then
Friend Class ImageExtractorExample
Inherits ExampleBase
Implements IUIExample
#Else
Friend Class ImageExtractorExample
Inherits ExampleBase
#End If
#Region "Input"
Private privateSourcePdf As String
Public Property SourcePdf() As String
Get
Return privateSourcePdf
End Get
Set(ByVal value As String)
privateSourcePdf = value
End Set
End Property
#End Region
Public Sub New(ByVal commonDataPath As String, ByVal outputDir As String)
MyBase.New(commonDataPath, outputDir)
End Sub
Public Sub New(ByVal commonDataPath As String, ByVal outputDir As String, ByVal xmlFile As String)
MyBase.New(commonDataPath, outputDir, xmlFile)
End Sub
Public Overrides Function Execute() As String
#If WEB Then
ProcessForm()
#End If
' Create a new instance of PdfDocument class.
Dim doc As New PdfDocument()
If String.IsNullOrWhiteSpace(SourcePdf) Then
ShowError("Please specify a PDF document.")
Return Nothing
End If
Dim OutputFile = OutputDir & "\Extracted Images"
If Not Directory.Exists(OutputFile) Then
Directory.CreateDirectory(OutputFile)
End If
Dim name As String = Path.GetFileNameWithoutExtension(SourcePdf)
' Load an existing PDF
Using ldoc As New PdfImportedDocument(SourcePdf)
' Loading Page collections
Dim loadedPages As PdfImportedPageCollection = ldoc.Pages
Dim savedFiles As New List(Of String)()
' Extract Image from PDF document pages
For i As Integer = 0 To loadedPages.Count - 1
Dim lpage As PdfImportedPage = CType(loadedPages(i), PdfImportedPage)
Dim img() As Image = lpage.ExtractImages()
If img IsNot Nothing AndAlso img.Length > 0 Then
For j As Integer = 0 To img.Length - 1
Dim img1 As Image = img(j)
Dim imgPath = OutputFile & "\" & name & "_Page_" & (i + 1).ToString() & "_Image_" & (j + 1).ToString() & ".png"
img1.Save(imgPath, ImageFormat.Png)
savedFiles.Add(imgPath)
Next j
End If
Next i
If savedFiles.Count = 0 Then
ShowError("No images found in the source PDF file.")
Return Nothing
End If
#If WEB Then
OutputFile = OutputFile & "\" & name & ".zip"
Using zip As ZipStorer = ZipStorer.Create(OutputFile, "")
For Each file In savedFiles
zip.AddFile(ZipStorer.Compression.Store, file, Path.GetFileName(file), "")
Next file
End Using
#End If
Return OutputFile
End Using
End Function
Public Overrides ReadOnly Property ActionTitle() As String
Get
Return "Extract Images"
End Get
End Property
#If WEB Then
Private Sub ProcessForm()
SourcePdf = GetPostFile("SourceFile")
End Sub
#End If
End Class
End Namespace