1200字范文,内容丰富有趣,写作的好帮手!
1200字范文 > word 转 html cms Java 将Word文件转换为HTML格式文件

word 转 html cms Java 将Word文件转换为HTML格式文件

时间:2023-03-05 22:38:19

相关推荐

word 转 html cms Java 将Word文件转换为HTML格式文件

前言:在很多时候我们都需要到项目中导入word文档,但是后期再次在前段显示这个文档的时候前端往往需要的是html格式的,所以这个时候就会提出一个需求:你们存文档的时候能不能存成html格式的? 于是这篇文章的内容就可以满足这个需求

我是通过MultiPartFile类来实现的,上代码:

一、首先导入需要的依赖包:

org.apache.poi

poi-scratchpad

3.17

org.apache.poi

poi-ooxml

3.17

fr.opensagres.xdocreport

fr.opensagres.xdocreport.converter.docx.xwpf

2.0.1

二、编写代码:

package com.lmt.service.file;

import java.io.ByteArrayInputStream;

import java.io.ByteArrayOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.FileWriter;

import java.io.IOException;

import java.io.InputStream;

import java.util.UUID;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.PicturesManager;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.hwpf.usermodel.PictureType;

import org.apache.poi.util.IOUtils;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

import org.springframework.beans.factory.annotation.Autowired;

import org.ponent;

import org.springframework.web.multipart.MultipartFile;

import org.w3c.dom.Document;

import fr.opensagres.poi.xwpf.converter.core.ImageManager;

import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;

import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;

@Component

public class WordToHtml {

private static final Logger logger = LoggerFactory.getLogger(WordToHtml.class);

//转换的方法

public File convert(MultipartFile file) {

//获得文件的名字

String filename = file.getOriginalFilename();

//获得文件的扩展名

String suffix=filename.substring(filename.lastIndexOf("."));

String newName=UUID.randomUUID().toString();

// TODO 需要保存在一个新的位置

//将文件保存在D:/test/文件下

File convFile = new File("D:/test/" + newName +suffix);

FileOutputStream fos = null;

try {

//创建文件

convFile.createNewFile();

fos = new FileOutputStream(convFile);

fos.write(file.getBytes());

} catch (IOException ex) {

logger.error("上传文件出错!", ex);

return null;

} finally {

IOUtils.closeQuietly(fos);

}

// 输入文件名的所在文件夹

// 加上反斜杠

String parentDirectory = convFile.getParent();

if (!parentDirectory.endsWith("\\")) {

parentDirectory = parentDirectory + "\\";

}

if (filename.endsWith(".docx")) {

return docxConvert(parentDirectory, convFile.getAbsolutePath(),newName);

} else if (filename.endsWith(".doc")) {

return docConvert(parentDirectory, convFile.getAbsolutePath(),newName);

} else {

logger.error("不支持的文件格式!");

return null;

}

}

private File docxConvert(String parentDirectory, String filename,String newName) {

try {

XWPFDocument document = new XWPFDocument(new FileInputStream(filename));

XHTMLOptions options = XHTMLOptions.create().setImageManager(new ImageManager(new File(parentDirectory), UUID.randomUUID().toString())).indent(4);

FileOutputStream out = new FileOutputStream(new File(parentDirectory + newName+ ".html"));

XHTMLConverter.getInstance().convert(document, out, options);

return new File(parentDirectory + newName+ ".html");

} catch (IOException ex) {

logger.error("word转化出错!", ex);

return null;

}

}

private File docConvert(String parentDirectory, String filename,String newName) {

try {

HWPFDocument document = new HWPFDocument(new FileInputStream(filename));

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(

DocumentBuilderFactory.newInstance().newDocumentBuilder()

.newDocument());

// converter默认对图片不作处理,需要手动下载图片并嵌入到html中

wordToHtmlConverter.setPicturesManager(new PicturesManager() {

@Override

public String savePicture(byte[] bytes, PictureType pictureType, String s, float v, float v1) {

String imageFilename = parentDirectory + "";

String identity=UUID.randomUUID().toString();

File imageFile = new File(imageFilename, identity+s);

imageFile.getParentFile().mkdirs();

InputStream in = null;

FileOutputStream out = null;

try {

in = new ByteArrayInputStream(bytes);

out = new FileOutputStream(imageFile);

IOUtils.copy(in, out);

} catch (IOException ex) {

logger.error("word转化出错!", ex);

} finally {

if (in != null) {

IOUtils.closeQuietly(in);

}

if (out != null) {

IOUtils.closeQuietly(out);

}

}

return imageFile.getName();

}

});

wordToHtmlConverter.processDocument(document);

Document htmlDocument = wordToHtmlConverter.getDocument();

ByteArrayOutputStream out = new ByteArrayOutputStream();

DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(out);

TransformerFactory tf = TransformerFactory.newInstance();

Transformer serializer = tf.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

serializer.transform(domSource, streamResult);

out.close();

String result = new String(out.toByteArray());

FileWriter writer = new FileWriter(parentDirectory + newName + ".html");

writer.write(result);

writer.close();

} catch (IOException | TransformerException | ParserConfigurationException ex) {

logger.error("word转化出错!", ex);

}

return new File(parentDirectory + newName + ".html");

}

/**

* 将上传的Word文档转化成HTML字符串

* @param attachfile

* @return

*/

public String convertToHtml(MultipartFile attachfile) {

String wordContent = "";

// 将Word文件转换为html

File file = convert(attachfile);

// 读取html文件

if (file != null) {

return "文件转换成功"

}

return "文件转换失败";

}

代码的含义已经在代码行的注释上有了,哪里有问题,欢迎大家随时在评论下方留言!

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。