基于Docker Compose和Puppeteer搭建HTML转PDF服务

前言

官方文档

https://pptr.nodejs.cn/

NodeJS服务

package.json

package.json

1
2
3
4
5
6
{
"dependencies": {
"express": "^5.2.1",
"puppeteer": "^24.8.2"
}
}

index.js

index.js

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
const puppeteer = require('puppeteer');
const express = require('express');
const { readFile } = require('node:fs/promises');
const app = express();
const PORT = process.env.PORT || 3000;

// 中间件
app.use(express.json({ limit: '10mb' }));
app.use(express.urlencoded({ extended: true, limit: '10mb' }));

// 健康检查
app.get('/health', (req, res) => {
res.json({
status: 'healthy',
timestamp: new Date().toISOString(),
node: process.version,
memory: process.memoryUsage()
});
});

// 首页
app.get('/', async (req, res) => {
let content_url = encodeURIComponent('https://www.psvmc.cn/article/2026-01-07-android-audio-m4a-recorder.html');
let header_url = encodeURIComponent('http://49.4.11.108:8924/static/paperPDF/header.html');
let finish_selector = encodeURIComponent('.footer');
let top = 25;
let bottom = 10;
let timeout_sec = 180;
let url_all = `/html2pdf?header_url=${header_url}&content_url=${content_url}&finish_selector=${finish_selector}&top=${top}&bottom=${bottom}&timeout_sec=${timeout_sec}`;
let htmlAll = ""
try {
htmlAll = await readFile('./home.html', 'utf8');
} catch (err) {
console.error('读取文件失败:', err.message);
}
htmlAll = htmlAll.replace("{{url_all}}", url_all);
res.send(htmlAll);
});

// 测试页面
app.get('/test', async (req, res) => {
try {
const browser = await puppeteer.launch({
headless: "new",
timeout: 1000 * 300,
args: [
"--no-sandbox", // 关键参数:禁用沙箱
"--disable-setuid-sandbox",
"--disable-dev-shm-usage", // 避免 /dev/shm 空间不足
],
});
const page = await browser.newPage();
await page.setContent('<p>服务运行正常!</p>');
const pdfBuffer = await page.pdf({ format: 'A4' });
await browser.close();
res.set({
'Content-Type': 'application/pdf',
'Content-Disposition': 'inline; filename="test.pdf"'
});

res.send(pdfBuffer);
} catch (error) {
res.status(500).json({ code: 1, msg: error.message });
}
});

// HTML转PDF
app.get('/html2pdf', async (req, res) => {
try {
let { timeout_sec = 120, header_url, content_url, finish_selector, top, bottom, left, right } = req.query;

let timeout = Number(timeout_sec) * 1000;
top = top || 5;
bottom = bottom || 5;
left = left || 5;
right = right || 5;
if (!content_url) {
return res.status(500).json({
code: 1,
msg: '内容地址不能为空'
});
}

const browser = await puppeteer.launch({
headless: "new",
timeout: Number(timeout),
args: [
"--no-sandbox", // 关键参数:禁用沙箱
"--disable-setuid-sandbox",
"--disable-dev-shm-usage", // 避免 /dev/shm 空间不足
],
});
const page = await browser.newPage();

// 加载正文,并等待请求结束
await page.goto(content_url, {
waitUntil: "networkidle2",
timeout: Number(timeout)
});

if (finish_selector) {
await page.waitForSelector(finish_selector, {
timeout: Number(timeout)
});
}

// 延迟1秒等待动画结束
await new Promise((resolve) => setTimeout(resolve, 1000));

//读取header_url的内容
let headerHtml = '';
if (header_url) {
try {
const response = await fetch(header_url);
if (response.ok) {
headerHtml = await response.text();
}
} catch (err) {
console.error("获取远程页眉模板失败:", err);
}
}

// 保存正文PDF
let pdfOptions = {
format: "A4",
printBackground: true,
displayHeaderFooter: true,
headerTemplate: headerHtml,
preferCSSPageSize: true,
footerTemplate: `
<div style="position: absolute;bottom:0;z-index:999;font-size: 12px; width: 100%; height:40px;line-height:40px; text-align: center;">
<span class="pageNumber"></span> / <span class="totalPages"></span>
</div>`,
margin: {
top: `${top}mm`, // 为页眉留出空间
left: `${left}mm`,
right: `${right}mm`,
bottom: `${bottom}mm`, // 为页脚留出空间
},
}

const pdfBuffer = await page.pdf(pdfOptions);
await browser.close();
res.set({
'Content-Type': 'application/pdf',
'Content-Length': pdfBuffer.length,
'Content-Disposition': 'inline; filename="document.pdf"'
});

res.send(pdfBuffer);
} catch (error) {
res.status(500).json({ code: 1, msg: error.message });
}
});

// 启动服务器
app.listen(PORT, () => {
console.log(`PDF服务运行在 http://localhost:${PORT}`);
console.log(`环境: ${process.env.NODE_ENV || 'development'}`);
console.log(`服务启动时间: ${new Date().toISOString()}`);
});

home.html

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
<html>
<head>
<title>PDF生成服务</title>

<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
h1 {
color: #333;
}
p {
color: #666;
}
ul {
list-style-type: none;
padding: 0;
}
li {
margin-bottom: 10px;
}
a {
color: #007bff;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}

table {
border-collapse: collapse;
}
th,
td {
border: 1px solid #ddd;
padding: 8px;
text-align: left;
padding-left: 12px;
padding-right: 12px;
}
th {
background-color: #f2f2f2;
}
</style>
</head>
<body>
<h1>PDF生成服务</h1>
<ul>
<li><a href="/health" target="_blank">健康检查</a></li>
<li><a href="/test" target="_blank">测试PDF生成1</a></li>
<li><a href="{{url_all}}" target="_blank">测试PDF生成2</a></li>
</ul>
<div>
<p>接口说明</p>
<p>接口地址:/html2pdf</p>
<p>请求方法:GET</p>
<p>参数说明:</p>

<table>
<tr>
<th>参数名</th>
<th>是否必填</th>
<th>默认值</th>
<th>说明</th>
</tr>
<tr>
<td>content_url</td>
<td></td>
<td>-</td>
<td>主体内容 URL</td>
</tr>
<tr>
<td>finish_selector</td>
<td></td>
<td>-</td>
<td>主体内容 结束选择器</td>
</tr>
<tr>
<td>header_url</td>
<td></td>
<td>-</td>
<td>页眉URL</td>
</tr>
<tr>
<td>top</td>
<td></td>
<td>5</td>
<td>顶部margin</td>
</tr>
<tr>
<td>bottom</td>
<td></td>
<td>5</td>
<td>底部margin</td>
</tr>
<tr>
<td>left</td>
<td></td>
<td>5</td>
<td>左侧margin</td>
</tr>
<tr>
<td>right</td>
<td></td>
<td>5</td>
<td>右侧margin</td>
</tr>
<tr>
<td>timeout_sec</td>
<td></td>
<td>120</td>
<td>超时时间</td>
</tr>
</table>
</div>
</body>
</html>

Dockerfile

脚本

Dockerfile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# 使用官方 Node.js 镜像
FROM node:18-bullseye-slim

# 设置时区(可选)
ENV TZ=Asia/Shanghai

# 更换为阿里云镜像源(更稳定)
RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
sed -i 's|security.debian.org/debian-security|mirrors.aliyun.com/debian-security|g' /etc/apt/sources.list

ENV LANG=C.UTF-8
# 设置环境变量,避免交互提示
ENV DEBIAN_FRONTEND=noninteractive
# 更新软件包列表
RUN apt update && \
# 安装文泉驿正黑和文泉驿微米黑字体
apt install -y fonts-wqy-zenhei fonts-wqy-microhei fonts-noto-cjk && \
# 清理 apt 缓存,减小镜像体积
apt clean && \
rm -rf /var/lib/apt/lists/*


# 更新软件源索引
RUN apt-get update

# 安装 Chromium 依赖
RUN apt-get install -y \
wget \
gnupg \
ca-certificates \
fonts-liberation \
libappindicator3-1 \
libasound2 \
libatk-bridge2.0-0 \
libatk1.0-0 \
libc6 \
libcairo2 \
libcups2 \
libdbus-1-3 \
libexpat1 \
libfontconfig1 \
libgbm1 \
libgcc1 \
libglib2.0-0 \
libgtk-3-0 \
libnspr4 \
libnss3 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libstdc++6 \
libx11-6 \
libx11-xcb1 \
libxcb1 \
libxcomposite1 \
libxcursor1 \
libxdamage1 \
libxext6 \
libxfixes3 \
libxi6 \
libxrandr2 \
libxrender1 \
libxss1 \
libxtst6 \
lsb-release \
xdg-utils \
&& rm -rf /var/lib/apt/lists/*

# 创建工作目录
WORKDIR /app

# 复制 package.json 和 package-lock.json
COPY package*.json ./

# 复制应用代码
COPY ./*.js .

# NPM镜像
# 配置 npm 镜像源
RUN echo "registry=https://registry.npmmirror.com" > ~/.npmrc && \
echo "puppeteer_download_host=https://registry.npmmirror.com" >> ~/.npmrc

RUN npm install -g cnpm --registry=https://registry.npmmirror.com
RUN cnpm install

# 运行 Puppeteer 脚本
CMD ["node", "index.js"]

EXPOSE 3000

构建(非必须)

下面的不是必须的,docker-compose在运行的时候会自动构建

1
2
3
4
# 直接构建
docker build -t puppeteer-api .
# 指定版本号构建
docker build -t puppeteer-api:0.2.0 .

测试

1
docker run -p 3000:3000 --name puppeteer-api puppeteer-api:0.2.0

如果运行报错可以这样测试

1
docker run -it --rm puppeteer-api:0.2.0 /bin/sh

DockerCompose

docker-compose.yml

1
2
3
4
5
6
7
8
9
10
11
12
13
version: "2.1"

services:
puppeteer-api:
build:
context: ./
dockerfile: Dockerfile
image: puppeteer-api:0.2.0
volumes:
- ./index.js:/app/index.js
- ./home.html:/app/home.html
ports:
- 3001:3000

注意

这种方式会在第一次运行的时候自动构建Docker镜像,如果后续修改了,需要更改版本号,才能重新构建。

启动/停止

启动

1
docker compose up -d

停止并删除

1
docker compose down

测试

1
http://localhost:3000/

设置镜像

查看版本

1
docker --version

针对Docker客户端版本大于 1.10.0 的用户

创建或修改 /etc/docker/daemon.json 文件

1
vi /etc/docker/daemon.json

添加或修改

1
2
3
4
5
6
{
"registry-mirrors": [
"https://docker.1ms.run",
"https://docker-0.unsee.tech"
]
}

命令修改

1
2
3
4
5
6
7
8
9
sudo mkdir -p /etc/docker
sudo tee /etc/docker/daemon.json <<EOF
{
"registry-mirrors": [
"https://docker.1ms.run",
"https://docker-0.unsee.tech"
]
}
EOF

不重启生效

1
sudo systemctl reload docker

查看是否生效

1
docker info | grep -A 5 "Registry Mirrors"

导出导入

如果服务器上镜像拉取失败可以本地导出再导入。

导出

1
docker save -o puppeteer-api.tar puppeteer-api:0.2.0

导入镜像

1
docker load -i puppeteer-api.tar

导入后,运行:

1
docker images | grep puppeteer-api