CSharp中PDF转图片、Word转图片

使用PDFRender4NET

无水印DLL

链接:https://pan.baidu.com/s/1HILw9Ztl6xNr4kMB1HGuWQ
提取码:psvm

工具类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using O2S.Components.PDFRender4NET;

namespace Z.Tools
{
public class PdfHelper
{
public enum Definition
{
Low = 2,
Standard = 3,
High = 5,
SuperHigh = 8,
UltraHigh = 10
}

/// <summary>
/// 获取PDF的页数
/// </summary>
/// <param name="pdfInputPath"></param>
/// <returns></returns>
public static int GetPageNum(string pdfInputPath)
{
PDFFile pdfFile = PDFFile.Open(pdfInputPath);
return pdfFile.PageCount;
}

/// <summary>
/// 将PDF文档转换为图片的方法
/// </summary>
/// <param name="pdfInputPath">PDF文件路径</param>
/// <param name="imageOutputPath">图片输出路径</param>
/// <param name="startPageNum">从PDF文档的第几页开始转换</param>
/// <param name="endPageNum">从PDF文档的第几页开始停止转换</param>
/// <param name="imageFormat">设置所需图片格式</param>
/// <param name="definition">设置图片的清晰度,数字越大越清晰</param>
public static List<string> ConvertPdfToImage(
string pdfInputPath,
string imageOutputPath,
int startPageNum = 1,
int endPageNum = int.MaxValue,
ImageFormat imageFormat = null,
Definition definition = Definition.Standard
)
{
List<string> imgList = new List<string>();
string imageName = "pdfimg";
if (imageFormat == null)
{
imageFormat = ImageFormat.Jpeg;
}

PDFFile pdfFile = PDFFile.Open(pdfInputPath);
if (!Directory.Exists(imageOutputPath))
{
Directory.CreateDirectory(imageOutputPath);
}

if (startPageNum <= 0)
{
startPageNum = 1;
}

if (endPageNum > pdfFile.PageCount)
{
endPageNum = pdfFile.PageCount;
}

if (startPageNum > endPageNum)
{
startPageNum = endPageNum;
endPageNum = startPageNum;
}

for (int i = startPageNum; i <= endPageNum; i++)
{
Bitmap pageImage = pdfFile.GetPageImage(i - 1, 56 * (int)definition);

string filePath = imageOutputPath + imageName + i + "." + imageFormat;
imgList.Add(filePath);
pageImage.Save(filePath, imageFormat);
pageImage.Dispose();
}

pdfFile.Dispose();
return imgList;
}
}
}

调用

1
2
3
4
5
var pageNum = PdfHelper.GetPageNum(@"D:\Tools\DocTest\水印.pdf");
Console.WriteLine($@"文档页数:{pageNum}");

var imgList = PdfHelper.ConvertPdfToImage(@"D:\Tools\DocTest\水印.pdf", @"D:\Tools\DocTest\Pic\");
Console.WriteLine(string.Join("\n", imgList));

Word转图片

1
Install-Package Aspose.Words -Version 19.10.0

调用前设置授权

1
2
3
4
5
6
new License()
.SetLicense(
new MemoryStream(
Convert.FromBase64String( "PExpY2Vuc2U+CiAgPERhdGE+CiAgICA8TGljZW5zZWRUbz5TdXpob3UgQXVuYm94IFNvZnR3YXJlIENvLiwgTHRkLjwvTGljZW5zZWRUbz4KICAgIDxFbWFpbFRvPnNhbGVzQGF1bnRlYy5jb208L0VtYWlsVG8+CiAgICA8TGljZW5zZVR5cGU+RGV2ZWxvcGVyIE9FTTwvTGljZW5zZVR5cGU+CiAgICA8TGljZW5zZU5vdGU+TGltaXRlZCB0byAxIGRldmVsb3BlciwgdW5saW1pdGVkIHBoeXNpY2FsIGxvY2F0aW9uczwvTGljZW5zZU5vdGU+CiAgICA8T3JkZXJJRD4xOTA4MjYwODA3NTM8L09yZGVySUQ+CiAgICA8VXNlcklEPjEzNDk3NjAwNjwvVXNlcklEPgogICAgPE9FTT5UaGlzIGlzIGEgcmVkaXN0cmlidXRhYmxlIGxpY2Vuc2U8L09FTT4KICAgIDxQcm9kdWN0cz4KICAgICAgPFByb2R1Y3Q+QXNwb3NlLlRvdGFsIGZvciAuTkVUPC9Qcm9kdWN0PgogICAgPC9Qcm9kdWN0cz4KICAgIDxFZGl0aW9uVHlwZT5FbnRlcnByaXNlPC9FZGl0aW9uVHlwZT4KICAgIDxTZXJpYWxOdW1iZXI+M2U0NGRlMzAtZmNkMi00MTA2LWIzNWQtNDZjNmEzNzE1ZmMyPC9TZXJpYWxOdW1iZXI+CiAgICA8U3Vic2NyaXB0aW9uRXhwaXJ5PjIwMjAwODI3PC9TdWJzY3JpcHRpb25FeHBpcnk+CiAgICA8TGljZW5zZVZlcnNpb24+My4wPC9MaWNlbnNlVmVyc2lvbj4KICAgIDxMaWNlbnNlSW5zdHJ1Y3Rpb25zPmh0dHBzOi8vcHVyY2hhc2UuYXNwb3NlLmNvbS9wb2xpY2llcy91c2UtbGljZW5zZTwvTGljZW5zZUluc3RydWN0aW9ucz4KICA8L0RhdGE+CiAgPFNpZ25hdHVyZT53UGJtNUt3ZTYvRFZXWFNIY1o4d2FiVEFQQXlSR0pEOGI3L00zVkV4YWZpQnd5U2h3YWtrNGI5N2c2eGtnTjhtbUFGY3J0c0cwd1ZDcnp6MytVYk9iQjRYUndTZWxsTFdXeXNDL0haTDNpN01SMC9jZUFxaVZFOU0rWndOQkR4RnlRbE9uYTFQajhQMzhzR1grQ3ZsemJLZFZPZXk1S3A2dDN5c0dqYWtaL1E9PC9TaWduYXR1cmU+CjwvTGljZW5zZT4=")
)
);

调用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
using System;
using System.Collections.Generic;
using System.Drawing.Imaging;
using System.IO;
using Aspose.Words;
using Aspose.Words.Saving;

namespace Z.Utils
{
public class Word2Img
{
/// <summary>
/// 将Word文档转换为图片的方法(该方法基于第三方DLL),你可以像这样调用该方法: ConvertPDF2Image("F:\\PdfFile.doc", "F:\\",
/// "ImageFile", 1, 20, ImageFormat.Png, 256);
/// </summary>
/// <param name="wordInputPath"></param>
/// <param name="imageOutputPath">
/// 图片输出路径,如果为空,默认值为Word所在路径
/// </param>
/// <param name="imageName">
/// 图片的名字,不需要带扩展名,如果为空,默认值为Word的名称
/// </param>
/// <param name="startPageNum">
/// 从PDF文档的第几页开始转换,如果为0,默认值为1
/// </param>
/// <param name="endPageNum">
/// 从PDF文档的第几页开始停止转换,如果为0,默认值为Word总页数
/// </param>
/// <param name="imageFormat">
/// 设置所需图片格式,如果为null,默认格式为PNG
/// </param>
/// <param name="resolution">
/// 设置图片的像素,数字越大越清晰,如果为0,默认值为128,建议最大值不要超过1024
/// </param>
public static List<string> ConvertWordToImage(
string wordInputPath,
string imageOutputPath,
string imageName,
int startPageNum,
int endPageNum,
ImageFormat imageFormat,
float resolution
)
{
int num = 1;
// 返回的图片绝对路径集合
List<string> images = new List<string>();
try
{
// open word file
Document doc = new Document(wordInputPath);
// validate parameter

if (imageOutputPath.Trim().Length == 0) { imageOutputPath = Path.GetDirectoryName(wordInputPath); }

if (!Directory.Exists(imageOutputPath))
{
if (imageOutputPath != null)
{
Directory.CreateDirectory(imageOutputPath);
}
}

if (imageName.Trim().Length == 0)
{
string uuid = Guid.NewGuid().ToString("N");
imageName = uuid;
}

if (startPageNum <= 0) { startPageNum = 1; }

if (endPageNum > doc.PageCount || endPageNum <= 0) { endPageNum = doc.PageCount; }

if (startPageNum > endPageNum)
{
startPageNum = endPageNum;
endPageNum = startPageNum;
}

imageFormat ??= ImageFormat.Png;

if (resolution <= 0) { resolution = 128; }

ImageSaveOptions imageSaveOptions = new ImageSaveOptions(GetSaveFormat(imageFormat))
{
Resolution = resolution
};

// start to convert each page
for (int i = startPageNum; i <= endPageNum; i++)
{
imageSaveOptions.PageIndex = i - 1;
if (imageOutputPath != null)
{
doc.Save(
Path.Combine(imageOutputPath, imageName) + "_" + num.ToString() + "." +
imageFormat,
imageSaveOptions);
images.Add(Path.Combine(imageOutputPath, imageName) + "_" + num.ToString() + "." +
imageFormat);
}

num++;
}
}
catch (Exception ex)
{
throw new Exception("The document appears to be corrupted and cannot be loaded.".Equals(ex.Message)
? "文件似乎已损坏,无法加载。"
: "文件被占用请关闭后重新导入");
}

return images;
}

private static SaveFormat GetSaveFormat(ImageFormat imageFormat)
{
SaveFormat sf; // = SaveFormat.Unknown;
if (imageFormat.Equals(ImageFormat.Png))
{
sf = SaveFormat.Png;
}
else if (imageFormat.Equals(ImageFormat.Jpeg))
{
sf = SaveFormat.Jpeg;
}
else if (imageFormat.Equals(ImageFormat.Tiff))
{
sf = SaveFormat.Tiff;
}
else if (imageFormat.Equals(ImageFormat.Bmp))
{
sf = SaveFormat.Bmp;
}
else
{
sf = SaveFormat.Unknown;
}

return sf;
}
}
}

使用程序Poppler

https://blog.alivate.com.au/poppler-windows/

转换PDF为图片

1
pdftoppm.exe -jpeg "D:\Tools\DocTest\水印.pdf" D:\Tools\DocTest\Pic\

这种方式转换的速度快,并且相同质量的前提下,文件比较小。

提取PDF中的图片

1
pdfimages.exe -j -p "D:\Tools\DocTest\水印.pdf" D:\Tools\DocTest\Pic\

使用程序Ghostscript

https://www.ghostscript.com/

这种方式效果较好,就是引用的exe和dll会增加将近11M。

gs.exe的同级目录下运行下面的命令

获取页数

1
./gs -q -dNODISPLAY -c "(D:/Project/Node/Pdf2PngforWindows/doc/1.pdf) (r) file runpdfbegin pdfpagecount = quit"

转换某页

1
./gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=png16m -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r100 -dFirstPage=1 -dLastPage=1 -sOutputFile=C:\Users\ADMINI~1\AppData\Local\Temp\tmp-8468uujQmrsdRJ21.png "D:\Project\Node\Pdf2PngforWindows/doc/1.pdf"

我们就可以根据页数进行逐页转换。

相关参数如下说明:

  • -dQUIET, 安静的意思,指代执行过程中尽可能少的输出日志等信息。(也可以简写为-q
  • -dNOSAFER, 通过命令行运行
  • -dBATCH, 执行到最后一页后退出
  • -dNOPAUSE, 每一页转换之间没有停顿
  • -dNOPROMPT, 没有相关提示
  • -dFirstPage=1, 从第几页开始
  • -dLastPage=5, 到第几页结束
  • -sDEVICE=png16m, 转换输出的文件类型装置,默认值为x11alpha
  • -g720x1280, 图片像素(-g<width>x<height>),一般不指定,使用默认输出
  • -r300, 图片分辨率(即图片解析度为300dpi),默认值好像是72
  • -sOutputFile=/opt/shanhy/error1png/%d.png, 图片输出路径,使用%d%ld输出页数