CSharp中文档转换为PDF或图片不依赖WPS或Office

安装依赖

1
2
3
4
Install-Package Aspose.Words -Version 19.10.0
Install-Package Aspose.Cells -Version 19.10.0
Install-Package Aspose.Slides.NET -Version 19.10.0
Install-Package Aspose.PDF -Version 19.10.0

添加系统引用

1
System.Drawing

Word => PDF/IMG

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
using System;
using System.Collections.Generic;
using System.Drawing.Imaging;
using System.IO;
using Aspose.Words;
using Path = System.IO.Path;

namespace document_converter.Utils
{
public class ConverterWord
{
public static void ToPdf(
string sourcePath,
string targetPath
)
{
SetLicense();
Document doc = new Document(sourcePath);
doc.Save(targetPath, SaveFormat.Pdf);
}

/// <summary>
/// 转换为图片
/// </summary>
/// <param name="sourcePath">原路径</param>
/// <param name="targetPath">目标文件夹</param>
/// <param name="startPageNum">开始页面</param>
/// <param name="endPageNum">结束页面</param>
/// <param name="imageFormat">格式</param>
/// <param name="resolution">清晰度</param>
/// <returns></returns>
public static List<string> ToImage(
string sourcePath,
string targetPath,
int startPageNum = 1,
int endPageNum = int.MaxValue,
ImageFormat imageFormat = null,
int resolution = 128
)
{
SetLicense();
string pdfPath = Path.ChangeExtension(sourcePath, "temp");
ToPdf(sourcePath, pdfPath);
var imgList = ConverterPdf.ToImage(pdfPath, targetPath, startPageNum, endPageNum, imageFormat, resolution);
if (File.Exists(pdfPath))
{
File.Delete(pdfPath);
}

return imgList;
}


private static void SetLicense()
{
new License()
.SetLicense(new MemoryStream(Convert.FromBase64String(ConvertLicense.LicenseCode)));
}
}
}

也可以只用Aspose.Words来保存为图片

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/// <summary>
/// 转换为图片的方法
/// 你可以像这样调用该方法: ToImage("F:\\PdfFile.doc", "F:\\ImageFile", 1, 20, ImageFormat.Png, 256);
/// </summary>
/// <param name="sourcePath"></param>
/// <param name="targetPath">
/// 图片输出路径,如果为空,默认值为Word所在路径
/// </param>
/// <param name="startPageNum">
/// 从PDF文档的第几页开始转换,如果为0,默认值为1
/// </param>
/// <param name="endPageNum">
/// 从PDF文档的第几页开始停止转换,如果为0,默认值为Word总页数
/// </param>
/// <param name="imageFormat">
/// 设置所需图片格式,如果为null,默认格式为JPEG
/// </param>
/// <param name="resolution">
/// 设置图片的像素,数字越大越清晰,如果为0,默认值为128,建议最大值不要超过1024
/// </param>
public static List<string> ToImage2(
string sourcePath,
string targetPath,
int startPageNum = 1,
int endPageNum = int.MaxValue,
ImageFormat imageFormat = null,
float resolution = 256
)
{
SetLicense();
string imageName = Path.GetFileNameWithoutExtension(sourcePath).Replace(" ", "");

// 返回的图片绝对路径集合
List<string> images = new List<string>();
try
{
// open word file
Document doc = new Document(sourcePath);

if (targetPath.Trim().Length == 0)
{
targetPath = Path.GetDirectoryName(sourcePath);
}

if (!Directory.Exists(targetPath))
{
if (targetPath != null)
{
Directory.CreateDirectory(targetPath);
}
}


if (startPageNum <= 0)
{
startPageNum = 1;
}

if (endPageNum > doc.PageCount || endPageNum <= 0)
{
endPageNum = doc.PageCount;
}

if (startPageNum > endPageNum)
{
startPageNum = endPageNum;
endPageNum = startPageNum;
}

if (imageFormat == null)
{
imageFormat = ImageFormat.Jpeg;
}

if (resolution <= 0)
{
resolution = 128;
}

ImageSaveOptions imageSaveOptions = new ImageSaveOptions(GetSaveFormat(imageFormat))
{
Resolution = resolution
};

// start to convert each page
for (int i = startPageNum; i <= endPageNum; i++)
{
imageSaveOptions.PageIndex = i - 1;
if (targetPath != null)

{
string pathAll = Path.Combine(
targetPath,
imageName + "_" + i + "." + imageFormat.ToString().ToLower()
);
doc.Save(
pathAll,
imageSaveOptions
);
images.Add(pathAll);
}
}
}
catch (Exception ex)
{
throw new Exception(
"The document appears to be corrupted and cannot be loaded.".Equals(ex.Message)
? "文件似乎已损坏,无法加载。"
: "文件被占用请关闭后重新导入"
);
}

return images;
}

private static SaveFormat GetSaveFormat(ImageFormat imageFormat)
{
SaveFormat sf;
if (imageFormat.Equals(ImageFormat.Png))
{
sf = SaveFormat.Png;
}
else if (imageFormat.Equals(ImageFormat.Jpeg))
{
sf = SaveFormat.Jpeg;
}
else if (imageFormat.Equals(ImageFormat.Tiff))
{
sf = SaveFormat.Tiff;
}
else if (imageFormat.Equals(ImageFormat.Bmp))
{
sf = SaveFormat.Bmp;
}
else
{
sf = SaveFormat.Unknown;
}

return sf;
}

Excel => PDF/IMG

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
using System;
using System.Collections.Generic;
using System.Drawing.Imaging;
using System.IO;
using Aspose.Cells;

namespace document_converter.Utils
{
public class ConverterExcel
{
/// <summary>
/// 转换为PDF
/// </summary>
/// <param name="sourcePath">原路径</param>
/// <param name="targetPath">目标路径</param>
public static void ToPdf(
string sourcePath,
string targetPath
)
{
SetLicense();
// 创建 Workbook 对象并加载 Excel 文件
using (Workbook workbook = new Workbook(sourcePath))
{
// 创建 PDF 保存选项
PdfSaveOptions options = new PdfSaveOptions();

// 将 Workbook 保存为 PDF 文件
workbook.Save(targetPath, options);
}
}


/// <summary>
/// 转换为图片
/// </summary>
/// <param name="sourcePath">原路径</param>
/// <param name="targetPath">目标文件夹</param>
/// <param name="startPageNum">开始页面</param>
/// <param name="endPageNum">结束页面</param>
/// <param name="imageFormat">格式</param>
/// <param name="resolution">清晰度</param>
/// <returns></returns>
public static List<string> ToImage(
string sourcePath,
string targetPath,
int startPageNum = 1,
int endPageNum = int.MaxValue,
ImageFormat imageFormat = null,
int resolution = 128
)
{
SetLicense();
string pdfPath = Path.ChangeExtension(sourcePath, "temp");
ToPdf(sourcePath, pdfPath);
var imgList = ConverterPdf.ToImage(pdfPath, targetPath, startPageNum, endPageNum, imageFormat, resolution);
if (File.Exists(pdfPath))
{
File.Delete(pdfPath);
}

return imgList;
}

private static void SetLicense()
{
new License()
.SetLicense(
new MemoryStream(
Convert.FromBase64String(ConvertLicense.LicenseCode)
)
);
}
}
}

PPT => PDF/IMG

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
using System;
using System.Collections.Generic;
using System.Drawing.Imaging;
using System.IO;
using Aspose.Slides;


namespace document_converter.Utils
{
public class ConverterPpt
{
/// <summary>
/// 转换为PDF
/// </summary>
/// <param name="sourcePath">原路径</param>
/// <param name="targetPath">目标路径</param>
public static void ToPdf(
string sourcePath,
string targetPath
)
{
SetLicense();
using (var presentation = new Presentation(sourcePath))
{
// 保存为PDF文件
presentation.Save(targetPath, Aspose.Slides.Export.SaveFormat.Pdf);
}
}

/// <summary>
/// 转换为图片
/// </summary>
/// <param name="sourcePath">原路径</param>
/// <param name="targetPath">目标文件夹</param>
/// <param name="startPageNum">开始页面</param>
/// <param name="endPageNum">结束页面</param>
/// <param name="imageFormat">格式</param>
/// <param name="resolution">清晰度</param>
/// <returns></returns>
public static List<string> ToImage(
string sourcePath,
string targetPath,
int startPageNum = 1,
int endPageNum = int.MaxValue,
ImageFormat imageFormat = null,
int resolution = 128
)
{
SetLicense();
string pdfPath = Path.ChangeExtension(sourcePath, "temp");
ToPdf(sourcePath, pdfPath);
var imgList = ConverterPdf.ToImage(pdfPath, targetPath, startPageNum, endPageNum, imageFormat, resolution);
if (File.Exists(pdfPath))
{
File.Delete(pdfPath);
}

return imgList;
}

private static void SetLicense()
{
new License()
.SetLicense(
new MemoryStream(
Convert.FromBase64String(ConvertLicense.LicenseCode)
)
);
}
}
}

PDF => IMG

使用Aspose.PDF

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using Aspose.Pdf;
using Aspose.Pdf.Devices;


namespace document_converter.Utils
{
public class ConverterPdf
{
/// <summary>
/// 转换为图片的方法
/// 你可以像这样调用该方法: ToImage("D:\\PdfFile.pdf", "D:\\ImageFile", 1, 20, ImageFormat.Png, 256);
/// </summary>
/// <param name="sourcePath"></param>
/// <param name="targetPath">
/// 图片输出路径,如果为空,默认值为Word所在路径
/// </param>
/// <param name="startPageNum">
/// 从PDF文档的第几页开始转换,如果为0,默认值为1
/// </param>
/// <param name="endPageNum">
/// 从PDF文档的第几页开始停止转换,如果为0,默认值为Word总页数
/// </param>
/// <param name="imageFormat">
/// 设置所需图片格式,如果为null,默认格式为JPEG
/// </param>
/// <param name="resolution">
/// 设置图片的像素,数字越大越清晰,如果为0,默认值为128,建议最大值不要超过1024
/// </param>
public static List<string> ToImage(
string sourcePath,
string targetPath,
int startPageNum = 1,
int endPageNum = int.MaxValue,
ImageFormat imageFormat = null,
int resolution = 128
)
{
SetLicense();
string imageName = Path.GetFileNameWithoutExtension(sourcePath).Replace(" ", "");
// 返回的图片绝对路径集合
List<string> images = new List<string>();
try
{
if (!Directory.Exists(targetPath))
{
if (targetPath != null)
{
Directory.CreateDirectory(targetPath);
}
}

using (var pdfDocument = new Document(sourcePath))
{
if (startPageNum <= 0)
{
startPageNum = 1;
}

if (endPageNum > pdfDocument.Pages.Count || endPageNum <= 0)
{
endPageNum = pdfDocument.Pages.Count;
}

if (startPageNum > endPageNum)
{
startPageNum = endPageNum;
endPageNum = startPageNum;
}

if (imageFormat == null)
{
imageFormat = ImageFormat.Jpeg;
}

if (resolution <= 0)
{
resolution = 128;
}

for (int i = startPageNum; i <= endPageNum; i++)
{
using (var imageStream = new MemoryStream())
{
var jpegDevice = new JpegDevice(new Resolution(resolution), 100);
jpegDevice.Process(pdfDocument.Pages[i], imageStream);

imageStream.Position = 0;

using (var image = new Bitmap(imageStream))
{
if (targetPath == null) continue;
string pathAll = Path.Combine(
targetPath,
imageName + "_" + i + "." + imageFormat.ToString().ToLower()
);
images.Add(pathAll);
image.Save(pathAll, imageFormat);
}
}
}
}
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}

return images;
}

private static void SetLicense()
{
new License()
.SetLicense(
new MemoryStream(
Convert.FromBase64String(ConvertLicense.LicenseCode)
)
);
}
}
}

使用O2S.Components.PDFRender4NET

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using O2S.Components.PDFRender4NET;

namespace document_converter.Utils
{
public class ConverterPdf
{
public enum Definition
{
Low = 2,
Standard = 3,
High = 5,
SuperHigh = 8,
UltraHigh = 10
}

/// <summary>
/// 获取PDF的页数
/// </summary>
/// <param name="pdfInputPath"></param>
/// <returns></returns>
public static int GetPageNum(string pdfInputPath)
{
PDFFile pdfFile = PDFFile.Open(pdfInputPath);
return pdfFile.PageCount;
}

/// <summary>
/// 将PDF文档转换为图片的方法
/// </summary>
/// <param name="pdfInputPath">PDF文件路径</param>
/// <param name="imageOutputPath">图片输出路径</param>
/// <param name="startPageNum">从PDF文档的第几页开始转换</param>
/// <param name="endPageNum">从PDF文档的第几页开始停止转换</param>
/// <param name="imageFormat">设置所需图片格式</param>
/// <param name="definition">设置图片的清晰度,数字越大越清晰</param>
public static List<string> ToImage(
string pdfInputPath,
string imageOutputPath,
int startPageNum = 1,
int endPageNum = int.MaxValue,
ImageFormat imageFormat = null,
Definition definition = Definition.Standard
)
{
Console.WriteLine($@"Converting: [{pdfInputPath}] -> [{imageOutputPath}]");
List<string> imgList = new List<string>();
string imageName = "pdfimg";
if (imageFormat == null)
{
imageFormat = ImageFormat.Jpeg;
}

PDFFile pdfFile = PDFFile.Open(pdfInputPath);
if (!Directory.Exists(imageOutputPath))
{
Directory.CreateDirectory(imageOutputPath);
}

if (startPageNum <= 0)
{
startPageNum = 1;
}

if (endPageNum > pdfFile.PageCount)
{
endPageNum = pdfFile.PageCount;
}

if (startPageNum > endPageNum)
{
startPageNum = endPageNum;
endPageNum = startPageNum;
}

for (int i = startPageNum; i <= endPageNum; i++)
{
Bitmap pageImage = pdfFile.GetPageImage(i - 1, 56 * (int)definition);

string filePath = imageOutputPath + imageName + i + "." + imageFormat.ToString().ToLower();
imgList.Add(filePath);
pageImage.Save(filePath, imageFormat);
pageImage.Dispose();
}

pdfFile.Dispose();
return imgList;
}
}
}

其中DLL下载

链接:https://pan.baidu.com/s/19a8hC20b24l1mSynSw1blA
提取码:psvm

授权

1
2
3
4
5
6
7
8
namespace document_converter.Utils
{
public class ConvertLicense
{
public static string LicenseCode =
"PExpY2Vuc2U+CiAgPERhdGE+CiAgICA8TGljZW5zZWRUbz5TdXpob3UgQXVuYm94IFNvZnR3YXJlIENvLiwgTHRkLjwvTGljZW5zZWRUbz4KICAgIDxFbWFpbFRvPnNhbGVzQGF1bnRlYy5jb208L0VtYWlsVG8+CiAgICA8TGljZW5zZVR5cGU+RGV2ZWxvcGVyIE9FTTwvTGljZW5zZVR5cGU+CiAgICA8TGljZW5zZU5vdGU+TGltaXRlZCB0byAxIGRldmVsb3BlciwgdW5saW1pdGVkIHBoeXNpY2FsIGxvY2F0aW9uczwvTGljZW5zZU5vdGU+CiAgICA8T3JkZXJJRD4xOTA4MjYwODA3NTM8L09yZGVySUQ+CiAgICA8VXNlcklEPjEzNDk3NjAwNjwvVXNlcklEPgogICAgPE9FTT5UaGlzIGlzIGEgcmVkaXN0cmlidXRhYmxlIGxpY2Vuc2U8L09FTT4KICAgIDxQcm9kdWN0cz4KICAgICAgPFByb2R1Y3Q+QXNwb3NlLlRvdGFsIGZvciAuTkVUPC9Qcm9kdWN0PgogICAgPC9Qcm9kdWN0cz4KICAgIDxFZGl0aW9uVHlwZT5FbnRlcnByaXNlPC9FZGl0aW9uVHlwZT4KICAgIDxTZXJpYWxOdW1iZXI+M2U0NGRlMzAtZmNkMi00MTA2LWIzNWQtNDZjNmEzNzE1ZmMyPC9TZXJpYWxOdW1iZXI+CiAgICA8U3Vic2NyaXB0aW9uRXhwaXJ5PjIwMjAwODI3PC9TdWJzY3JpcHRpb25FeHBpcnk+CiAgICA8TGljZW5zZVZlcnNpb24+My4wPC9MaWNlbnNlVmVyc2lvbj4KICAgIDxMaWNlbnNlSW5zdHJ1Y3Rpb25zPmh0dHBzOi8vcHVyY2hhc2UuYXNwb3NlLmNvbS9wb2xpY2llcy91c2UtbGljZW5zZTwvTGljZW5zZUluc3RydWN0aW9ucz4KICA8L0RhdGE+CiAgPFNpZ25hdHVyZT53UGJtNUt3ZTYvRFZXWFNIY1o4d2FiVEFQQXlSR0pEOGI3L00zVkV4YWZpQnd5U2h3YWtrNGI5N2c2eGtnTjhtbUFGY3J0c0cwd1ZDcnp6MytVYk9iQjRYUndTZWxsTFdXeXNDL0haTDNpN01SMC9jZUFxaVZFOU0rWndOQkR4RnlRbE9uYTFQajhQMzhzR1grQ3ZsemJLZFZPZXk1S3A2dDN5c0dqYWtaL1E9PC9TaWduYXR1cmU+CjwvTGljZW5zZT4=";
}
}

注意

要分别为每个 Aspose 产品设置 License。即使所有组件公用一个许可证文件,例如Aspose.Total.lic,您仍然需要为应用程序中使用的每个 Aspose 产品单独调用SetLicense 。

调用

完整代码

https://gitee.com/psvmc/document-converter.git

返回码

  • 0 转换成功
  • 1 转换失败
  • 2 格式不支持
  • 3 文件不存在

转为PDF

Word => PDF

1
2
document-converter.exe pdf "D:\\Tools\\Docs\\01.docx" "D:\\Tools\\Docs\\pdf\\01.pdf"
document-converter.exe pdf "D:\\Tools\\Docs\\01.doc" "D:\\Tools\\Docs\\pdf\\01.pdf"

PPT => PDF

1
document-converter.exe pdf "D:\\Tools\\Docs\\02.pptx" "D:\\Tools\\Docs\\pdf\\02.pdf"

缺少字体文档测试

1
document-converter.exe pdf "D:\\Tools\\Docs\\more\\第一节 导数的概念及运算 定积分.ppt" "D:\\Tools\\Docs\\pdf\\第一节 导数的概念及运算 定积分.pdf"

Excel => PDF

1
document-converter.exe pdf "D:\\Tools\\Docs\\03.xlsx" "D:\\Tools\\Docs\\pdf\\03.pdf"

TXT => PDF

1
document-converter.exe pdf "D:\\Tools\\Docs\\04.txt" "D:\\Tools\\Docs\\pdf\\04.pdf"

XML => PDF

1
document-converter.exe pdf "D:\\Tools\\Docs\\05.xml" "D:\\Tools\\Docs\\pdf\\05.pdf"

HTML => PDF

1
document-converter.exe pdf "D:\\Tools\\Docs\\06.html" "D:\\Tools\\Docs\\pdf\\06.pdf"

转为图片

PDF => IMG

1
document-converter.exe img "D:\\Tools\\Docs\\pdf\\01.pdf" "D:\\Tools\\Docs\\images\\pdf\\"

Word => IMG

1
document-converter.exe img "D:\\Tools\\Docs\\01.docx" "D:\\Tools\\Docs\\images\\word\\"

PPT => IMG

1
document-converter.exe img "D:\\Tools\\Docs\\02.pptx" "D:\\Tools\\Docs\\images\\ppt\\"

Excel => IMG

1
document-converter.exe img "D:\\Tools\\Docs\\03.xlsx" "D:\\Tools\\Docs\\images\\excel\\"

TXT => IMG

1
document-converter.exe img "D:\\Tools\\Docs\\04.txt" "D:\\Tools\\Docs\\images\\txt\\"

XML => IMG

1
document-converter.exe img "D:\\Tools\\Docs\\05.xml" "D:\\Tools\\Docs\\images\\xml\\"

HTML => IMG

1
document-converter.exe img "D:\\Tools\\Docs\\06.html" "D:\\Tools\\Docs\\images\\html\\"

Aspose.PDF和O2S.Components.PDFRender4NET

Aspose.PDF是一个用于处理PDF文件的强大工具,它提供了一系列的功能,包括:

  1. 创建和编辑PDF文件:可以使用Aspose.PDF创建新的PDF文档、编辑和修改现有的PDF文档。

  2. 将PDF文件转换为其他格式:将PDF文件转换为多种文件格式,包括DOC、DOCX、HTML、XML、PPT和XPS等。

  3. 添加水印和加密:可以在PDF文档中添加水印、数字签名和加密等安全特性。

  4. 表单处理:用于创建、填写和提交PDF表单。

  5. 文本和图像处理:可以从PDF文档中提取文本和图像,还可以在文档中插入、更新和删除文本和图像等。

  6. 页面元素处理:可以在PDF页面上添加、更新、删除和移动页面元素,如链接、书签、注释、标签和旋转等。

总之,Aspose.PDF提供了广泛的功能,能够满足用户处理PDF文件的各种需求。

Aspose.PDF功能比较多所以文件比较大,有32.9M。

O2S.Components.PDFRender4NET在转图片上效果也不错,大小只有944K,还不到1M,推荐使用。

在网上看的

O2S.Components.PDFRender4NET依赖Ghostscript软件来渲染PDF文件。

但是实际测试的时候并不用安装Ghostscript,也能转换成功。