目录结构
前言文档准备引入Maven依赖代码块提取结果验证ppt_demo.ppt 提取结果pptx_demo.pptx 提取结果前言
应公司需求,需实现以下功能
PPT文本内容的替换;PPT文本内容的提取;PPT中图片的提取存放;
此文章将使用Spire.Presentation实现对PPT文件中文本内容及图片的提取;
Spire.Presentation for Java是一个专业的 PowerPoint API,180846090使开发人员能够在 Java 应用程序中创建、读取、编写、转换和保存 PowerPoint 文档。作为一个独立的Java 库,Spire.Presentation 不需要在系统上安装Microsoft PowerPoint。
文档准备
小编准备了以下两个文件:《ppt_demo.ppt》《pptx_demo.pptx》,分别代表不同版本的PPT,以便提取测试,如下图
引入Maven依赖
<repositories><repository><id>com.e-iceblue</id><name>e-iceblue</name><url>http://repo.e-/nexus/content/groups/public/</url></repository></repositories><dependencies><dependency><groupId>e-iceblue</groupId><artifactId>spire.presentation</artifactId><version>4.9.2</version></dependency></dependencies>
代码块
package com.bjzaxk.utils;import com.spire.presentation.IAutoShape;import com.spire.presentation.ISlide;import com.spire.presentation.ParagraphEx;import com.spire.presentation.Presentation;import javax.imageio.ImageIO;import java.awt.image.BufferedImage;import java.io.File;import java.io.FileWriter;public class Demo {public static void main(String[] args) {// String filePath = "C:\\Users\\Administrator\\Desktop\\java_poi\\demo_file\\ppt_demo.ppt";String filePath = "C:\\Users\\Administrator\\Desktop\\java_poi\\demo_file\\pptx_demo.pptx";// 文本提取后存放路径及文件名// String extractFilePath = "C:\\Users\\Administrator\\Desktop\\java_poi\\demo_file\\ppt_demo.txt";String extractFilePath = "C:\\Users\\Administrator\\Desktop\\java_poi\\demo_file\\pptx_demo.txt";// 图片提取后存放路径String imageFilePath = "C:\\Users\\Administrator\\Desktop\\java_poi\\demo_file\\";pptTextExtract(filePath, extractFilePath);pptImageExtract(filePath, imageFilePath);}/*** @description: 提取PPT中的文本信息* @author: Mr.Jkx* @time: /2/2 14:53*/public static void pptTextExtract(String filePath, String extractFilePath) {try {//加载文档Presentation ppt = new Presentation();ppt.loadFromFile(filePath);StringBuilder buffer = new StringBuilder();//遍历文档中的幻灯片,提取文本for (Object slide : ppt.getSlides()) {for (Object shape : ((ISlide) slide).getShapes()) {if (shape instanceof IAutoShape) {for (Object tp : ((IAutoShape) shape).getTextFrame().getParagraphs()) {buffer.append(((ParagraphEx) tp).getText()).append("\r\n");}}}}if (buffer.length() > 0) {//保存到文本文件FileWriter writer = new FileWriter(extractFilePath);writer.write(buffer.toString());writer.flush();writer.close();}} catch (Exception e) {e.printStackTrace();}}/*** @description: 提取PPT中的图片* @author: Mr.Jkx* @time: /1/10 14:26*/public static void pptImageExtract(String filePath, String imageFilePath) {try {//加载文档Presentation ppt = new Presentation();ppt.loadFromFile(filePath);//提取文档中的所有图片for (int i = 0; i < ppt.getImages().getCount(); i++) {BufferedImage image = ppt.getImages().get(i).getImage();ImageIO.write(image, "PNG", new File(imageFilePath + "pptImage_" + System.currentTimeMillis() + ".png"));}} catch (Exception e) {e.printStackTrace();}}}