转载于https://www.jb51.net/article/101730.htm
原博客用的poi3.14,由于个人项目已经用了poi3.15,所以对引入的jar包版本做了修改,但工具类已经用的转载博客的,只对类名做了修改,后面发现转化的样式效果并不理想(主要体现在:表格,括号、图片、标题编号…),就舍弃了 world转html的功能,只用了富文本框 。
转载的目的在于备忘,后面如果再用到这个功能,进行改造。
jar包:
<dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>3.15</version>
</dependency>
<dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>3.15</version>
</dependency>
<dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>3.15</version>
</dependency>
<dependency><groupId>fr.opensagres.xdocreport</groupId><artifactId>xdocreport</artifactId><version>1.0.5</version>
</dependency>
<dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml-schemas</artifactId><version>3.15</version>
</dependency>
<dependency><groupId>org.apache.poi</groupId><artifactId>ooxml-schemas</artifactId><version>1.3</version>
</dependency>
工具类:
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;public class PoiUtils {
// doc转换为htmlvoid docToHtml() throws Exception {
String sourceFileName = "C:\\aaa\\test.doc";String targetFileName = "C:\\aaa\\worldToHtml.html";String imagePathStr = "C:\\aaa\\image\\";HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);// 保存图片,并返回图片的相对路径wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
out.write(content);} catch (Exception e) {
e.printStackTrace();}return "image/" + name;});wordToHtmlConverter.processDocument(wordDocument);Document htmlDocument = wordToHtmlConverter.getDocument();DOMSource domSource = new DOMSource(htmlDocument);StreamResult streamResult = new StreamResult(new File(targetFileName));TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "html");serializer.transform(domSource, streamResult);}// docx转换为htmlpublic void docxToHtml() throws Exception {
String sourceFileName = "C:\\aaa\\test.docx";String targetFileName = "C:\\aaa\\worldToHtml.html";String imagePathStr = "C:\\aaa\\image\\";OutputStreamWriter outputStreamWriter = null;try {
XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));XHTMLOptions options = XHTMLOptions.create();// 存放图片的文件夹options.setExtractor(new FileImageExtractor(new File(imagePathStr)));// html中图片的路径options.URIResolver(new BasicURIResolver("image"));outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();xhtmlConverter.convert(document, outputStreamWriter, options);} finally {
if (outputStreamWriter != null) {
outputStreamWriter.close();}}}
}
controller:
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
@RestController
@RequestMapping("/test")
public class TestController {
@GetMapping({
"/test"})public void qryAll() throws Exception {
PoiUtils poiUtils = new PoiUtils();poiUtils.docxToHtml();}
}
若要改造,可在配置文件配置
存储目录(image、doc/docx、html),在controller上传文件后生成新的文件名,存储到 doc/docx 文件夹,调用 PoiUtils 的方法(docxToHtml/docToHtml 把文件名作为参数)即可。