word转换为html的代码如下,同时需要在C:\WINDOWS\system32 的目录下添加一个.DLL文件,DLL文件见附件。JAVA代码如下
import java.io.File; import java.util.Calendar; import java.util.Date; import com.hifly.common.db.Db; import com.jacob.activeX.ActiveXComponent; import com.jacob.com.ComThread; import com.jacob.com.Dispatch; import com.jacob.com.Variant; /** * Word转换成Html * jacob jar与dll文件下载: http://www.matrix.org.cn/down_view.asp?id=13 * 下载了jacob并放到指定的路径之后(dll放到path,jar文件放到classpath),就可以写你自己的抽取程序了,下面是一个简单的例子: * * */ public class WordToHtml { /* * 转换单个word文件 * origPath为原地址,包括扩展名doc * destPath为转换后的文件地址,包括扩展名htm */ public static void word_To_Htm(String origPath , String destPath) throws Exception{ File ofile = new File(origPath); File dfile = new File(destPath); if(!destPath.endsWith(".htm")){ dfile = new File(destPath+".htm"); } if(ofile.exists() && !dfile.exists()){ ComThread.InitSTA(); ActiveXComponent app = new ActiveXComponent( "Word.Application"); try { app.setProperty("Visible", new Variant(false)); Dispatch docs = app.getProperty("Documents").toDispatch(); Dispatch doc = Dispatch.invoke( docs, "Open", Dispatch.Method, new Object[] {origPath, new Variant(false), new Variant(true) }, new int[1]) .toDispatch(); Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] { destPath, new Variant(8) }, new int[1]); Variant f = new Variant(false); Dispatch.call(doc, "Close", f); } catch (Exception e) { throw e; } finally { app.invoke("Quit", new Variant[] {}); ComThread.Release(); } } } /** * 把word文件转换成mht文件.转换完成后并自动关闭WORD文件 */ public static void word_To_Mht(String wordFileName, String htmlFile) { ComThread.InitSTA();// 初始化com的线程,非常重要!!使用结束后要调用 realease方法 // Instantiate objWord //Declare word object ActiveXComponent objWord = new ActiveXComponent("Word.Application"); // Assign a local word object Dispatch wordObject = (Dispatch) objWord.getObject(); // Create a Dispatch Parameter to show the document that is opened Dispatch.put((Dispatch) wordObject, "Visible", new Variant(true));// new // Variant(true)表示word应用程序可见 // Instantiate the Documents Property Dispatch documents = objWord.getProperty("Documents").toDispatch(); // documents表示word的所有文档窗口,(word是多文档应用程序) // Add a new word document, Current Active Document Dispatch document = Dispatch.call(documents, "Open", wordFileName) .toDispatch(); // 使用Add命令创建一个新文档,用Open命令可以打开一个现有文档 Dispatch.invoke(document, "SaveAs", Dispatch.Method, new Object[] { htmlFile, new Variant(8) }, new int[1]); Dispatch.call(document, "Close"); Dispatch.call(wordObject, "quit"); ComThread.Release();// 释放com线程。根据jacob的帮助文档,com的线程回收不由java的垃圾回收器处理 } /** * 文档转换函数 * * @param docfile * word文档的绝对路径加文件名(包含扩展名) * @param htmlfile * 转换后的html文件绝对路径和文件名(不含扩展名) */ public static void change(String docfile, String htmlfile) { ActiveXComponent app = new ActiveXComponent("Word.Application"); // 启动word try { app.setProperty("Visible", new Variant(false)); // 设置word不可见 Object docs = app.getProperty("Documents").toDispatch(); // Assign a local word object Dispatch wordObject = (Dispatch) app.getObject(); // Create a Dispatch Parameter to show the document that is opened // Dispatch.put((Dispatch) wordObject, "Visible", new // Variant(true));// new Variant(true)表示word应用程序可见 // Tip:设置一个对象的属性的时候,利用Dispatch的put方法,给属性赋值。上面这行语句相当于vb的 // wordObject.Visible = true 语句 // // //Instantiate the Documents Property // Dispatch documents = // objWord.getProperty("Documents").toDispatch(); // //documents表示word的所有文档窗口,(word是多文档应用程序) // Add a new word document, Current Active Document // Dispatch document = Dispatch.call(app, "Add").toDispatch(); // // 使用Add命令创建一个新文档,用Open命令可以打开一个现有文档 Object doc = Dispatch.invoke( app, "Open", Dispatch.Method, new Object[] { docfile, new Variant(false), new Variant(true) }, new int[1]).toDispatch(); // 打开word文件 Dispatch.invoke(app, "SaveAs", Dispatch.Method, new Object[] { htmlfile, new Variant(8) }, new int[1]); // 作为html格式保存到临时文件 Variant f = new Variant(false); Dispatch.call(app, "Close", f); } catch (Exception e) { e.printStackTrace(); } finally { app.invoke("Quit", new Variant[] {}); } } public static void wordToHtmlExpm(String wordFileName, String htmlFile) { ComThread.InitSTA();// 初始化com的线程,非常重要!!使用结束后要调用 realease方法 // Instantiate objWord //Declare word object ActiveXComponent objWord = new ActiveXComponent("Word.Application"); // Assign a local word object Dispatch wordObject = (Dispatch) objWord.getObject(); // Create a Dispatch Parameter to show the document that is opened Dispatch.put((Dispatch) wordObject, "Visible", new Variant(true));// new // Variant(true)表示word应用程序可见 // Instantiate the Documents Property Dispatch documents = objWord.getProperty("Documents").toDispatch(); // documents表示word的所有文档窗口,(word是多文档应用程序) // Add a new word document, Current Active Document Dispatch document = Dispatch.call(documents, "Open", wordFileName) .toDispatch(); // 使用Add命令创建一个新文档,用Open命令可以打开一个现有文档 // Dispatch document = Dispatch.call(documents, "Add").toDispatch(); // // 使用Add命令创建一个新文档,用Open命令可以打开一个现有文档 // Dispatch wordContent = Dispatch.get(document, // "Content").toDispatch(); // 取得word文件的内容 // Dispatch.call(wordContent, "InsertAfter", "这里是一个段落的内容");//插入一个段落 // Dispatch paragraphs = Dispatch.get(wordContent, // "Paragraphs").toDispatch(); // 所有段落 // int paragraphCount = Dispatch.get(paragraphs, "Count").toInt(); // // 一共的段落数 // 找到刚输入的段落,设置格式 /* * Dispatch lastParagraph = Dispatch.call(paragraphs, "Item", new * Variant(paragraphCount)). toDispatch(); // 最后一段 Dispatch * lastParagraphRange = Dispatch.get(lastParagraph, "Range"). * toDispatch(); Dispatch font = Dispatch.get(lastParagraphRange, * "Font").toDispatch(); Dispatch.put(font, "Bold", new Variant(true)); // * 设置为黑体 Dispatch.put(font, "Italic", new Variant(true)); // 设置为斜体 * Dispatch.put(font, "Name", new Variant("宋体")); // Dispatch.put(font, * "Size", new Variant(12)); //小四 */ // Dispatch.call(document, "SaveAs", new Variant("C:\\abc.doc")); // //保存一个新文档 // Dispatch.get(document, "SaveAs") // Dispatch.call(document, "SaveAs", new Variant(htmlFile)); // 保存一个新文档 Dispatch.invoke(document, "SaveAs", Dispatch.Method, new Object[] { htmlFile, new Variant(9) }, new int[1]); Dispatch.call(document, "Close"); Dispatch.call(wordObject, "quit"); ComThread.Release();// 释放com线程。根据jacob的帮助文档,com的线程回收不由java的垃圾回收器处理 } public static void deal(File file,Date lastTime){ File[] files = file.listFiles(); for(File f : files){ if(f.isDirectory()){ deal(f,lastTime); }else { //if (f.getName().toLowerCase().endsWith(".doc") || f.getName().toLowerCase().endsWith(".docx")){ Long time = f.lastModified(); Calendar cd = Calendar.getInstance(); cd.setTimeInMillis(time); //System.out.println(DateUtils.format(cd.getTime(),"yyyy-MM-dd HH:mm:ss SSS")); //if(cd.getTime().before(lastTime)){ String name = f.getName().replace("“", "").replace("”", ""); String docfile = f.getPath(); String htmlfile = f.getParent()+"\\"+name.substring(0, name.indexOf("."))+".htm"; word_To_Mht(docfile,htmlfile); // String fileName = name.substring(0,name.indexOf("."))+".htm"; // isnertReport(f.getName(),"doc"); // isnertReport(fileName,"html"); //} } try { Thread.sleep(1000*2); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } public static void start(){ String path = "e:\\file\\ab\\"; Date lastTime = new Date(); File file = new File(path); deal(file,lastTime); } public static void main(String[] strs) { start(); String wordFile = "D:\\temp\\1234.doc"; String htmlFile = "d:\\temp\\alarm\\" + System.currentTimeMillis() + ".mht"; WordToHtml.word_To_Mht(wordFile, htmlFile); } }
?