1200字范文 > 使用POI将doc文件转换为html

使用POI将doc文件转换为html

时间：2023-07-18 16:48:44

相关推荐

使用POI将doc文件转换为html

需要的jar包有：有一些是依赖包，可以使用maven下载

doc文件转换为html文件

package com.gsww.sxzz.controller.service;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.PicturesManager;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.hwpf.usermodel.Picture;import org.apache.poi.hwpf.usermodel.PictureType;import org.jsoup.Jsoup; import org.w3c.dom.Document;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import java.io.*;import java.util.List;/*** Created by Carey on 15-2-2.*/public class docTohtml {public static void main(String argv[]) {try {convert2Html("D:\\b.doc","D:\\1.html");} catch (Exception e) {e.printStackTrace();}}//输出html文件 public static void writeFile(String content, String path) {FileOutputStream fos = null; BufferedWriter bw = null;org.jsoup.nodes.Document doc = Jsoup.parse(content);String styleOld=doc.getElementsByTag("style").html();//统一字体格式为宋体styleOld=styleOld.replaceAll("font-family:.+(?=;\\b)", "font-family:SimSun");doc.getElementsByTag("head").empty();doc.getElementsByTag("head").append("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"></meta>");doc.getElementsByTag("head").append(" <style type=\"text/css\"></style>");doc.getElementsByTag("style").append(styleOld);/*正则表达式查询字体内容：font-family:.+(?=;\b)*/System.out.println(content);content=doc.html();content=content.replace("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">", "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"></meta>");try {File file = new File(path);fos = new FileOutputStream(file);bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));bw.write(content);} catch (FileNotFoundException fnfe) {fnfe.printStackTrace();} catch (IOException ioe) {ioe.printStackTrace();} finally {try {if (bw != null)bw.close();if (fos != null)fos.close();} catch (IOException ie) {}}}//word 转 html public static void convert2Html(String fileName, String outPutFile)throws TransformerException, IOException,ParserConfigurationException {HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));//兼容以上版本// XSSFWorkbook xssfwork=new XSSFWorkbook(new FileInputStream(fileName));WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());wordToHtmlConverter.setPicturesManager( new PicturesManager(){public String savePicture( byte[] content,PictureType pictureType, String suggestedName,float widthInches, float heightInches ){return "test/"+suggestedName;}} );wordToHtmlConverter.processDocument(wordDocument);//save picturesList pics=wordDocument.getPicturesTable().getAllPictures();if(pics!=null){for(int i=0;i<pics.size();i++){Picture pic = (Picture)pics.get(i);System.out.println();try {pic.writeImageContent(new FileOutputStream("D:/test/"+ pic.suggestFullFileName()));} catch (FileNotFoundException e) {e.printStackTrace();}}}Document htmlDocument = wordToHtmlConverter.getDocument();ByteArrayOutputStream out = new ByteArrayOutputStream();DOMSource domSource = new DOMSource(htmlDocument);StreamResult streamResult = new StreamResult(out);TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "HTML");serializer.transform(domSource, streamResult);out.close();writeFile(new String(out.toByteArray()), outPutFile);}}

遇到的问题，当doc转换为html时不会将图像的线条给转换过来。只有在table表格中才可以转换为span标签。如果要作下滑线，可以放一个table的单元格只设定下边框就可以完美转换为html了。

将html转换为pdf

package com.gsww.sxzz.controller.service;import com.lowagie.text.pdf.BaseFont;import org.xhtmlrenderer.pdf.ITextFontResolver;import org.xhtmlrenderer.pdf.ITextRenderer;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.OutputStream;/*** Created by Carey on 15-2-2.*/public class htmlToPdf {public boolean convertHtmlToPdf(String inputFile, String outputFile){try {OutputStreamos = new FileOutputStream(outputFile);ITextRenderer renderer = new ITextRenderer();String url = new File(inputFile).toURI().toURL().toString();renderer.setDocument(url);// 解决中文支持问题ITextFontResolver fontResolver = renderer.getFontResolver();/*fontResolver.addFont("C:\\Windows\\Fonts\\simsunb.ttf", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);*///宋体文件的相对路径fontResolver.addFont("C:\\Windows\\Fonts\\simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);renderer.getSharedContext().setBaseURL("file:/D:/");renderer.layout();renderer.createPDF(os);os.flush();os.close();} catch (Exception e) {// TODO Auto-generated catch block e.printStackTrace();}return true;}public static void main(String [] args){htmlToPdf html2Pdf =new htmlToPdf();try {html2Pdf.convertHtmlToPdf("D:\\1.html","D:\\index.pdf");} catch (Exception e) {e.printStackTrace();}}}

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。