from PyPDF4 import PdfFileReader, PdfFileWriter
常用属性/方法:
PageObject。
with open('sample.pdf', 'rb') as f:
reader = PdfFileReader(f)
print('总页数:', reader.getNumPages())
page0 = reader.getPage(0)
print('第一页文字:', page0.extractText())
核心方法:
PageObject 加入写入器。
writer = PdfFileWriter()
writer.addPage(page0) # 添加已有页面
writer.addBlankPage(width=200, height=200) # 添加空白页
with open('output.pdf', 'wb') as out:
writer.write(out)
常用于添加水印或把两页内容合并。
watermark_reader = PdfFileReader('watermark.pdf')
watermark_page = watermark_reader.getPage(0)
pdf_reader = PdfFileReader('source.pdf')
pdf_writer = PdfFileWriter()
for i in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(i)
page.mergePage(watermark_page) # 叠加水印
pdf_writer.addPage(page)
with open('watermarked.pdf', 'wb') as out:
pdf_writer.write(out)
如果只想把完整文档拼接在一起,使用 PdfFileMerger 更简洁。
from PyPDF4 import PdfFileMerger
merger = PdfFileMerger()
merger.append('first.pdf')
merger.append('second.pdf')
merger.append('third.pdf')
merger.write('merged.pdf')
加密示例(用户密码 + 所有者密码):
writer = PdfFileWriter()
writer.appendPagesFromReader(PdfFileReader('plain.pdf'))
writer.encrypt(user_pwd='user123', owner_pwd='owner456', use_128bit=True) #
with open('protected.pdf', 'wb') as out:
writer.write(out)
解密示例:
reader = PdfFileReader('protected.pdf')
if reader.isEncrypted:
reader.decrypt('user123') #
print('已解密,页数:', reader.getNumPages())
读取、修改 PDF 元信息:
reader = PdfFileReader('sample.pdf')
info = reader.getDocumentInfo()
print('作者:', info.author)
writer = PdfFileWriter()
writer.appendPagesFromReader(reader)
writer.addMetadata({
'/Author': '张三',
'/Title' : '示例 PDF',
})
with open('with_meta.pdf', 'wb') as out:
writer.write(out)
page = reader.getPage(0)
page.rotateClockwise(90) # 顺时针 90°
page.scale(0.5, 0.5) # 缩小为原来 50%
writer = PdfFileWriter()
writer.addPage(page)
with open('transformed.pdf', 'wb') as out:
writer.write(out)
以上示例均基于 PyPDF4 官方文档与社区常见用法整理,适用于 Python 3.x 环境。实际使用时请根据具体 PDF 的结构(是否加密、是否包含压缩流等)进行错误捕获。