python使用PyPDF2库进行PDF基本操作
内容导读
互联网集市收集整理的这篇技术教程文章主要介绍了python使用PyPDF2库进行PDF基本操作,小编现在分享给大家,供广大互联网技能从业者学习和参考。文章包含3479字,纯文字阅读大概需要5分钟。
内容图文
![python使用PyPDF2库进行PDF基本操作](/upload/InfoBanner/zyjiaocheng/608/935f69eb72664929b8fe45fc5fee92bd.jpg)
# -*- coding =utf-8 -*-
# @Time : 2021/1/26 10:09
# @Author :Mr
# @File :pdftest.py
# @Software :PyCharm
from PyPDF2 import PdfFileReader, PdfFileWriter
import os
# 创建输出文件目录
def mkdir(read_file_path, out_path=None):
if out_path is None:
out_path = (os.path.join(os.path.dirname(read_file_path),
os.path.basename(read_file_path)[:-4].replace(" ", "")))
if not os.path.exists(out_path): # 判断是否存在文件夹如果不存在则创建为文件夹
# print("--- 创建新的文件夹... ---")
os.makedirs(out_path) # makedirs 创建文件时如果路径不存在会创建这个路径
# print("--- OK ---")
else:
# print("--- 文件夹已存在! ---")
pass
return out_path
# 切分每一页
def split_pdf(read_file_path, out_path=None):
out_path = mkdir(read_file_path, out_path)
pdfReader = PdfFileReader(read_file_path)
for page in range(pdfReader.getNumPages()):
pdf_writer = PdfFileWriter()
pdf_writer.addPage(pdfReader.getPage(page))
pdf_name = f'{page}.pdf'
pdf_file = os.path.join(out_path, pdf_name)
with open(pdf_file, 'wb') as output_pdf:
pdf_writer.write(output_pdf)
return
# PDF文件按条件分割
def split_by_pages(read_file_path, out_detail, out_path=None):
out_path = mkdir(read_file_path, out_path)
try:
fp_read_file = open(read_file_path, 'rb')
pdfReader = PdfFileReader(fp_read_file) # 将要分割的PDF内容格式话
page_count = pdfReader.getNumPages() # 获取PDF页数
print(page_count) # 打印页数
with open(out_detail, 'r', True, 'utf-8')as fp:
# print(fp)
txt = fp.readlines()
# print(txt)
for detail in txt: # 打开分割标准文件
# print(type(detail))
pages, write_file = detail.split() # 空格分组
print(pages, write_file)
# write_file, write_ext = os.path.splitext(write_file) # 用于返回文件名和扩展名元组
pdf_file = f'{pages}页.pdf'
pdf_files = os.path.join(out_path, pdf_file)
print(pdf_files)
# liststr=list(map(int, pages.split('-')))
# print(type(liststr))
start_page, end_page = list(map(int, pages.split('-'))) # 将字符串数组转换成整形数组
start_page -= 1
try:
print(f'开始分割{start_page}页-{end_page}页,保存为{pdf_file}......')
pdf_output = PdfFileWriter() # 实例一个 PDF文件编写器
for i in range(start_page, end_page):
pdf_output.addPage(pdfReader.getPage(i))
with open(pdf_files, 'wb') as sub_fp:
pdf_output.write(sub_fp)
print(f'完成分割{start_page}页-{end_page}页,保存为{pdf_file}!')
except IndexError:
print(f'分割页数超过了PDF的页数')
# fp.close()
except Exception as e:
print(e)
finally:
fp_read_file.close()
return
# 合并pdf
def merge_pdfs(out_path=None, out_file='new', *read_file_paths):
# print(type(read_file_path))
if not read_file_paths:
print('请重新选择文件')
else:
read_file_path = read_file_paths[0]
out_path = mkdir(read_file_path, out_path)
pdf_name = f'{out_file}.pdf'
output = os.path.join(out_path, pdf_name)
pdf_writer = PdfFileWriter()
for File in read_file_paths:
# 依次循环打开要合并文件
pdfReader = PdfFileReader(open(File, 'rb'))
numPages = pdfReader.getNumPages()
for index in range(0, numPages):
pageObj = pdfReader.getPage(index)
pdf_writer.addPage(pageObj)
# 把这个已合并了的PDF文档存储起来
with open(output, 'wb') as out:
pdf_writer.write(out)
return
# 删除PDF页
def delete_pdf(read_file_path, index, out_path=None):
out_path = mkdir(read_file_path, out_path)
pdfReader = PdfFileReader(open(read_file_path, "rb"))
pages = pdfReader.getNumPages()
pdf_writer = PdfFileWriter()
pdf_file = os.path.join(out_path, f'{os.path.basename(read_file_path)[:-4].replace(" ", "")}new.pdf')
print(pdf_file)
for page in range(pages):
if page + 1 in index:
continue
pdf_writer.addPage(pdfReader.getPage(page))
with open(pdf_file, 'wb') as out:
pdf_writer.write(out)
return
内容总结
以上是互联网集市为您收集整理的python使用PyPDF2库进行PDF基本操作全部内容,希望文章能够帮你解决python使用PyPDF2库进行PDF基本操作所遇到的程序开发问题。 如果觉得互联网集市技术教程内容还不错,欢迎将互联网集市网站推荐给程序员好友。
内容备注
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 gblab@vip.qq.com 举报,一经查实,本站将立刻删除。
内容手机端
扫描二维码推送至手机访问。