1200字范文,内容丰富有趣,写作的好帮手!
1200字范文 > 【初识python 2】对服务端发送PDF EXCEL文件URL转成图片后上传七牛并且返回图片url

【初识python 2】对服务端发送PDF EXCEL文件URL转成图片后上传七牛并且返回图片url

时间:2019-10-13 18:38:05

相关推荐

【初识python 2】对服务端发送PDF EXCEL文件URL转成图片后上传七牛并且返回图片url

序言

标题名字有点长,将就看吧 ^ _ ^

看此片文章之前请先阅读 【初识python 1】Python PPT PDF 转成图片

本篇实现了:

启动web服务通过url下载文件转换成图片并且上传七牛云返回图片url组后删除相关文件、图片、文件夹

问题及解决方案

环境

Windows10

WPS office

Python3

python模块

from flask import Flask, request

import comtypes.client

import os

import sys

from pdf2image import (

convert_from_path, convert_from_bytes

)

from qiniu import Auth, put_file, etag

from urllib.request import urlretrieve

import urllib.request

import pythoncom

模块安装注意

flask:为了实现web服务,安装及使用 【任意门】

qiniu:七牛云存储 安装及使用 【任意门】

urllib:用于文件下载及验证url是否可以访问

pythoncom:这是个坑,后面具体讲

具体实现代码

import comtypes.clientimport osimport win32import sysfrom pdf2image import (convert_from_path,convert_from_bytes)#ppt转图片可用from flask import Flask, requestimport comtypes.clientimport osimport sysfrom pdf2image import (convert_from_path, convert_from_bytes)from qiniu import Auth, put_file, etagfrom urllib.request import urlretrieveimport urllib.requestimport pythoncomapp = Flask(__name__)@app.route("/")def index():return '️ ⚠️️前方道路施工中 🚧️🚧....'@app.route('/filetoimg', methods=['POST'])def fileToImg():fileUrl = request.form['fileUrl']#验证网络地址是否可以访问checkCode = checkUrlVisit(fileUrl)if checkCode !=200:return Response({'msg':'文件地址,无法访问','code':550})#下载文件到本地file, path = downloadFile(fileUrl)#转化下载后的文件并且返回图片组rel = file2img(file, path)#删除本地该文件相关的图片、文件、文件夹deleteFolderAndFiles(file, path)return rel#验证url是否可以被正常访问def checkUrlVisit(url):request = urllib.request.Request(url)try:urllib.request.urlopen(request)print(f'可以被访问')return 200except urllib.request.HTTPError:print(f'不可以被访问')return 550# 返回请求参数def Response(reslut):code = 200msg = '操作完成'data = {}for key in reslut:if key == 'code':code = reslut['code']if key == 'msg':msg = reslut['msg']if key == 'data':data = reslut['data']return {'code': code, 'msg': msg, 'data': data}# 下载文件def downloadFile(fileUrl):path = os.path.abspath(os.path.dirname(sys.argv[0]))folderPath = path + '/filedown'isExists = os.path.exists(folderPath)# 文件夹是否存在if not isExists:os.makedirs(folderPath)aFileInfo = fileUrl.split('/')localPath = os.path.join(folderPath, aFileInfo[-1])fileIsExists = os.path.exists(localPath)# 文件如果存在不重复下载if not fileIsExists:urlretrieve(fileUrl, localPath, downloadSchedule)# 文件名称 文件路径return [aFileInfo[-1], folderPath]# 下载进度def downloadSchedule(a, b, c):'''''a:已经下载的数据块b:数据块的大小c:远程文件的大小'''per = 100.0 * a * b / cif per > 100:per = 100print('%.2f%%' % per)# 读取本地文件转换图片def file2img(file, path):# 当前运行目录if not file.endswith('ppt') and not file.endswith('pptx') and not file.endswith('pdf'):return Response({'msg': '文件类型不匹配,无法进行转换', 'code': 550})# 将获取的文件名字进行分割,提取出文件名,比如1024.ppt,按照下面的规则base=1024,ext=pptfilename, ext = file.split('.')# 文件路径filePath = path + '/' + file# 转换图片后文件夹路径folderPath = path+'/'+filename# 判断文件夹是否存在,不存在则创建isExists = os.path.exists(folderPath)if not isExists:os.makedirs(folderPath)print(f"正在转换文件:{file}")# 判断文件名称,筛选出后缀名为ppt或者pptx的文件if file.endswith('ppt') or file.endswith('pptx'):return ppt2png(filePath, folderPath, filename)elif file.endswith('pdf'):return pdf2png(filePath, folderPath, filename)# ppt转图片可用def ppt2png(filePath, folderPath, fileName):pythoncom.CoInitialize()try:# 使用wps的接口powerpoint = comtypes.client.CreateObject("kwpp.Application",dynamic = True)# powerpoint.Visible = 1ppt = powerpoint.Presentations.Open(filePath)# 另存为ppt.SaveAs(folderPath, 17)# 退出ppt.Close()powerpoint.Quit()except Exception:return Response({'msg':'文件转换异常'+fileName+'.ppt','code':550})return uploadFileToQiniu(folderPath, fileName, 'ppt')# pdf转图片def pdf2png(filePath, folderPath, fileName):try:images = convert_from_path(filePath, poppler_path=r'C:\Program Files\poppler-0.67.0\bin')for i, image in enumerate(images):fname = '/'+fileName+'_'+str(i+1)+'.jpg'image.save(folderPath+fname)except Exception:return Response({'msg':'文件转换异常'+fileName+'.pdf','code':550})return uploadFileToQiniu(folderPath, fileName, 'pdf')# 七牛的相关参数access_key = '自己弄'secret_key = '自己弄'bucket_name = '自己弄'# 资源可访问域名,如:/ 斜杠别漏了bucket_url = '自己弄'# 上传文件夹下的文件到七牛云def uploadFileToQiniu(folderPath, fileName, fileSort):pathList = os.listdir(folderPath)# 文件排序 ppt比较特殊需要 保存后获得的文件名称为"幻灯片1.jpg,幻灯片2.jpg,",所以需要重新排序,避免上传文件名称乱套if fileSort == 'ppt':pathList.sort(key=lambda x: int(x.replace("幻灯片", "").split('.')[0]))qiniuAuth = Auth(access_key, secret_key)newLinkList = []for k, v in enumerate(pathList):print(f"上传中:k:{k+1} - v:{folderPath}/{v}")name = fileName + '_' + str(k+1) + '.jpg'#上传成功后文件urlfileUrl = bucket_url+name#验证文件是否已经上传过了checkCode = checkUrlVisit(fileUrl)if checkCode ==200:newLinkList.append(fileUrl)continue#实际文件地址filePath = folderPath+'/'+v#七牛tokentoken = qiniuAuth.upload_token(bucket_name, name, 20)#上传图片ret, info = put_file(token, name, filePath, version='v2')print(f"上传结果:",info)if info.status_code != 200:print(f"上传七牛失败")continuenewLinkList.append(fileUrl)return Response({'data': newLinkList})# 删除指定文件相关的文件夹及文件图片def deleteFolderAndFiles(file,path):print(f"删除文件及文件夹")filename, ext = file.split('.')# 文件路径filePath = path + '/' + file# 转换图片后文件夹路径folderPath = path+'/'+filename# 判断文件夹是否存在,不存在则创建isExists = os.path.exists(folderPath)fileIsExists = os.path.exists(filePath)if isExists:for root, dirs, files in os.walk(folderPath):for f in files:print(f"{root}/{f}")os.remove(root+'/'+f)print(f"{root}")os.rmdir(root)if fileIsExists:print(f"{filePath}")os.remove(filePath)print(f"删除文件夹及文件完成:{filename}")if __name__ == "__main__":# 端口不设置默认5500app.run(port='8800')

最终想要实现的结果

实现多线程,加快执行效率文件记录到数据库,实现状态结果记录部署到ubuntu上,可正常运行

遇到的坑及解决方式

上一篇 【初识python 1】Python PPT PDF 转成图片 PPT转图片可以正常运行,但是这次web访问就报错无法正常执行了

Traceback (most recent call last):File "C:\Users\admin\AppData\Roaming\Python\Python39\site-packages\flask\app.py", line 2070, in wsgi_appresponse = self.full_dispatch_request()File "C:\Users\admin\AppData\Roaming\Python\Python39\site-packages\flask\app.py", line 1515, in full_dispatch_requestrv = self.handle_user_exception(e)File "C:\Users\admin\AppData\Roaming\Python\Python39\site-packages\flask\app.py", line 1513, in full_dispatch_requestrv = self.dispatch_request()File "C:\Users\admin\AppData\Roaming\Python\Python39\site-packages\flask\app.py", line 1499, in dispatch_requestreturn self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)File "D:\self\pytest\app.py", line 31, in fileToImgrel = file2img(file, path)File "D:\self\pytest\app.py", line 113, in file2imgreturn ppt2png(filePath, folderPath, filename)File "D:\self\pytest\app.py", line 123, in ppt2pngpowerpoint = comtypes.client.CreateObject("kwpp.Application",dynamic = True)File "C:\Users\admin\AppData\Roaming\Python\Python39\site-packages\comtypes\client\__init__.py", line 238, in CreateObjectobj = comtypes.CoCreateInstance(clsid, clsctx=clsctx, interface=interface)File "C:\Users\admin\AppData\Roaming\Python\Python39\site-packages\comtypes\__init__.py", line 1219, in CoCreateInstance_ole32.CoCreateInstance(byref(clsid), punkouter, clsctx, byref(iid), byref(p))File "_ctypes/callproc.c", line 997, in GetResultOSError: [WinError -2147221008] 尚未调用 CoInitialize。

解决办法:

import pythoncom#执行的地方加下面的内容-参考上门的代码pythoncom.CoInitialize()

接下来启动服务仍会遇到问题

Traceback (most recent call last):File "D:\self\pytest\app.py", line 11, in <module>import pythoncomFile "C:\Users\admin\AppData\Roaming\Python\Python39\site-packages\pythoncom.py", line 2, in <module>import pywintypesFile "C:\Users\admin\AppData\Roaming\Python\Python39\site-packages\win32\lib\pywintypes.py", line 105, in <module>__import_pywin32_system_module__("pywintypes", globals())File "C:\Users\admin\AppData\Roaming\Python\Python39\site-packages\win32\lib\pywintypes.py", line 87, in __import_pywin32_system_module__raise ImportError("No system module '%s' (%s)" % (modname, filename))ImportError: No system module 'pywintypes' (pywintypes39.dll)

解决办法:

网上找了很多答案基本上都是错误的结果,卡主了,差点就放弃了

说是在安装文件夹找到“pywin32_system32”文件夹复制到自己项目目录下一套文件夹下,然并卵

以上都不需要看了,下面公布正确答案【 任意门】

#文件夹位置C:\Users\admin\AppData\Roaming\Python\Python39\site-packages\pywin32_system32#拷贝到python安装目录这个文件夹下C:\Program Files\Python39\Lib\site-packages

结果图

Ps:坚持到最后总会胜利的,别放弃

下一篇:【初识python 3】PPT PDF 转图片优化告一段落

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。