700字范文,内容丰富有趣,生活中的好帮手!
700字范文 > JAVA使用spire.doc将富文本和latex公式生成word文档

JAVA使用spire.doc将富文本和latex公式生成word文档

时间:2022-08-24 08:53:24

相关推荐

JAVA使用spire.doc将富文本和latex公式生成word文档

需求说明:数据库中存放的信息为富文本数据,并且包含了latex公式的代码,现在需要将富文本和latex公式数据输出到word文档中。

生成逻辑:将数据获取后进行遍历,获取每个节点,然后将节点增加到段落中,特别需要区别图片标签,需要获取图片的数据输出到文档中。这里使用了spire.doc包生成文档。

主要类:LatexToMathMLUtils工具生成文档的逻辑,LatexUtils latex公式相关工具

LatexToMathMLUtils.java代码:

package com.xxx.util.pdf;import java.io.IOException;import java.io.InputStream;import .HttpURLConnection;import .MalformedURLException;import .URL;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.apache.logging.log4j.LogManager;import org.apache.logging.log4j.Logger;import org.jsoup.Jsoup;import org.jsoup.nodes.Element;import org.jsoup.nodes.Node;import org.jsoup.select.Elements;import com.itextpdf.io.source.ByteArrayOutputStream;import com.lowagie.text.Image;import com.mon.PropertisUtil;import com.mon.StringUtil;import com.spire.doc.Document;import com.spire.doc.FileFormat;import com.spire.doc.Section;import com.spire.doc.documents.HorizontalAlignment;import com.spire.doc.documents.Paragraph;import com.spire.doc.documents.ParagraphStyle;import com.spire.doc.documents.UnderlineStyle;import com.spire.doc.fields.DocPicture;import com.spire.doc.fields.TextRange;import com.spire.doc.fields.omath.OfficeMath;/*** @author Administrator* @date 5月10日*/public class LatexToMathMLUtils {protected static Logger logger = LogManager.getLogger(LatexToMathMLUtils.class);//将试题富文本信息输出到文档中private static Paragraph writeQuestion(Document document, Paragraph paragraph, String question ,String questionId) throws IOException {//试题默认添加段落标签if(question != null && !question.startsWith("<p")){question = "<p>"+question+"</p>";}//去除公式中的换行<br/>,保证公式为一个整体question = LatexUtils.dealLatexTitle(question);org.jsoup.nodes.Document doc = Jsoup.parse(question); // html解析Element body = doc.body();Elements elements = body.children();analyseElements(document, paragraph, elements,questionId);return paragraph;}private static String latexFormat(String latex) {if (latex.contains("leqslant")) {latex = latex.replace("leqslant", "leq");}if (latex.contains("geqslant")) {latex = latex.replace("geqslant", "geq");}StringBuilder latexBuilder = new StringBuilder();boolean isChinese = false;String regexStr = "[\u4E00-\u9FA5]";for (Character c : latex.toCharArray()) {Matcher chineseMatch = pile(regexStr).matcher(c.toString());if (chineseMatch.find()) {if (isChinese) {latexBuilder.append(c);} else {latexBuilder.append("\\mbox{").append(c);isChinese = true;}continue;} else {if (isChinese) {isChinese = false;latexBuilder.append("}");}latexBuilder.append(c);}}return latexBuilder.toString();}//解析富文本数据private static void analyseElements(Document document, Paragraph paragraph, Elements elements,String questionId) throws IOException {for (Element element : elements) {if (element.children().size() == 0) {if ("p".equalsIgnoreCase(element.tagName())) {latexTextDeal(paragraph, element.text(),questionId);logger.error(element.toString());if(itemCheck(element.toString())) {} else {paragraph.appendText("\n");}} else if ("img".equalsIgnoreCase(element.tagName())) {byte[] img = addImg2(document, element);if (null != img) {DocPicture appendPicture = paragraph.appendPicture(img);String width="";if(element!=null){width = element.attr("width");}if (null != width && !"".equals(width)) {if(Float.parseFloat(element.attr("width"))* 0.65f > 460) {appendPicture.setWidth(Float.parseFloat(element.attr("width"))* 0.5f);appendPicture.setHeight(Float.parseFloat(element.attr("height"))* 0.5f);} else {appendPicture.setWidth(Float.parseFloat(element.attr("width"))* 0.65f);appendPicture.setHeight(Float.parseFloat(element.attr("height"))* 0.65f);}} else {/*appendPicture.setWidth(Float.parseFloat(element.attr("width"))* 0.5f);appendPicture.setHeight(Float.parseFloat(element.attr("height"))* 0.5f);*/Image addImg = addImg(document, element);appendPicture.setWidth(addImg.getWidth() * 0.5f);appendPicture.setHeight(addImg.getHeight() * 0.5f);}}paragraph.appendText("\n");} else {//判断是否含有Latex公式表达式latexTextDeal(paragraph, element.text(),questionId);paragraph.appendText("\n");}} else {analyseChildrenElement(document, paragraph, (Node) element,questionId);paragraph.appendText("\n");}}}//解析富文本的字节点数据private static void analyseChildrenElement(Document document, Paragraph paragraph, Node parent,String questionId) throws IOException {List<Node> childNodes = parent.childNodes();for (Node node : childNodes) {if (node.childNodes().size() > 0) {analyseChildrenElement(document, paragraph, node,questionId); //递归到最低一级元素} else {if ("#text".equalsIgnoreCase(node.nodeName()) && !"sub".equalsIgnoreCase(node.parent().nodeName())&& !"sup".equalsIgnoreCase(node.parent().nodeName())) { //判断是普通文本String content = node.attr("text");// paragraph.appendText(content);//判断是否含有Latex公式表达式latexTextDeal(paragraph, content,questionId);} else if ("img".equalsIgnoreCase(node.nodeName())) { //判断是图片byte[] img = addImg2(document, node);if (null != img) {DocPicture appendPicture = paragraph.appendPicture(img);String width="";if(node!=null){width = node.attr("width");}if (null != width && !"".equals(width)) {if(Float.parseFloat(node.attr("width"))* 0.65f > 460) {appendPicture.setWidth(Float.parseFloat(node.attr("width"))* 0.5f);appendPicture.setHeight(Float.parseFloat(node.attr("height"))* 0.5f);} else {appendPicture.setWidth(Float.parseFloat(node.attr("width"))* 0.65f);appendPicture.setHeight(Float.parseFloat(node.attr("height"))* 0.65f);}} else {/*appendPicture.setWidth(Float.parseFloat(node.attr("width"))* 0.5f);appendPicture.setHeight(Float.parseFloat(node.attr("height"))* 0.5f);*/Image addImg = addImg(document, node);appendPicture.setWidth(addImg.getWidth() * 0.5f);appendPicture.setHeight(addImg.getHeight() * 0.5f);}}} else if ("br".equalsIgnoreCase(node.nodeName())) { //判断是换行if (node.parent().childNodes().size() > 1) //判断是文本中换行,而非单纯换行,避免无用换行过多导致文本过长paragraph.appendText("\n");}}}}//获取网络图片的二级制数据private static byte[] addImg2(Document document, Node element) {byte[] img = null;String src ="";if(element != null && StringUtil.checkNotNull(element.attr("src"))){String paltForm=PropertisUtil.getInstance().getPlatform();src = element.attr("src");if("shaanxi".equals(paltForm)){src=src.replace("", "http://192.168.0.252:83");src=src.replace("", "http://192.168.0.252:83");}logger.debug("element.attr2(src):--"+src);try {if(src != null && !src.equals("")){src = src.trim();img = getImageFromNetByUrl(src);// img = wordHttpImage(src);}} catch (Exception e) {logger.error("src:"+src);logger.error(e);return null;}}return img;}/** 获取网络图片*/private static Image addImg(Document document, Node element) {Image img = null;String src ="";if(element != null && StringUtil.checkNotNull(element.attr("src"))){String paltForm=PropertisUtil.getInstance().getPlatform();src = element.attr("src");if("shaanxi".equals(paltForm)){src=src.replace("", "http://192.168.0.252:83");src=src.replace("", "http://192.168.0.252:83");}logger.debug("element.attr2(src):--"+src);try {if(src != null && !src.equals("")){src = src.trim();img = wordHttpImage(src);}} catch (MalformedURLException e) {logger.error(e.getMessage());} catch (Exception e) {logger.error("src:"+src);logger.error(e);return null;}}String width="";//String height="";if(element!=null){width = element.attr("width");}if (null != width && !"".equals(width)) {if(Float.parseFloat(element.attr("width"))* 0.65f > 460) {img.scalePercent(50,50);} else {img.scalePercent(65,65);}} else {img.scalePercent(50,50);}return img;}/*** 文档线上图片获取* */public static Image wordHttpImage(String imageUrl) throws Exception{//声明图片Image img = Image.getInstance(new URL(imageUrl));//img.scaleAbsolute(img.getWidth()/3, img.getHeight()/3);img.scalePercent(50,50);img.setAbsolutePosition(0, 0);img.setAlignment(Image.LEFT | Image.TEXTWRAP);return img;}private static void latexTextDeal(Paragraph paragraph,String latexText,String questionId){String formulaStr = null;try {logger.debug("latexText:"+latexText);//判断是否含有Latex公式表达式String[] titleSplit = LatexUtils.titleGroup(latexText);if(titleSplit == null){return;}//按照顺序写入文档for (int i = 0 ; i < titleSplit.length ; i++ ) {formulaStr = titleSplit[i];if(formulaStr != null){formulaStr = formulaStr.trim();}if(formulaStr != null && !formulaStr.equals("")){Matcher mather = pile.matcher(formulaStr);//匹配是否是公式if (!mather.find()){paragraph.appendText(titleSplit[i]);}else{//是,接着写入图片//调用latexImage方法将公式转换成图片,wordLocalImage方法处理图片,图片生成/*String no = System.currentTimeMillis()+WordUtils.getRandomNickname(10);Image img = LatexUtils.wordLocalImage(LatexUtils.latexImage(formulaStr,"/data/mxjy_pdf/wordtemp/w_"+questionId+"_"+no+".png"));paragraph.add(new Phrase(new Chunk(img, 0, 0, true)));*/OfficeMath math = new OfficeMath(paragraph.getDocument());paragraph.getItems().add(math);//logger.error(titleSplit[i]);String subSequence = titleSplit[i].subSequence(2, titleSplit[i].length()-2).toString();//logger.error(subSequence);math.fromLatexMathCode(latexFormat(subSequence));}}}} catch (Exception e) {logger.error(e.getMessage());}}//图片数据请求public static byte[] getImageFromNetByUrl(String strUrl){try {URL url = new URL(strUrl);HttpURLConnection conn = (HttpURLConnection)url.openConnection();conn.setRequestMethod("GET");conn.setConnectTimeout(3 * 1000);InputStream inStream = conn.getInputStream();//通过输入流获取图片数据byte[] btImg = readInputStream(inStream);//得到图片的二进制数据return btImg;} catch (Exception e) {e.printStackTrace();}return null;}public static byte[] readInputStream(InputStream inStream) throws Exception{ ByteArrayOutputStream outStream = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int len = 0; while( (len=inStream.read(buffer)) != -1 ){ outStream.write(buffer, 0, len); } inStream.close(); outStream.close();return outStream.toByteArray(); } //生成文档逻辑public static void produceItextWord(String paperName,String path,Map<String,String> quesMap,Map<String,String> quesAnsMap) throws Exception {if(quesMap == null){return;}//调用wordStart方法,文档名称,生成路径 Document document = new Document();Section section = document.addSection();Paragraph pName = section.addParagraph();pName.getFormat().setHorizontalAlignment(HorizontalAlignment.Center);pName.getFormat().setLineSpacing(15);TextRange appendText = pName.appendText(paperName);appendText.getCharacterFormat().setFontSize(20f);appendText.getCharacterFormat().setBold(true);//题干if(quesMap != null && quesMap.size() > 0){for (String id : quesMap.keySet()){//题干String title = quesMap.get(id);logger.error("题干信息:"+title);Paragraph qc = section.addParagraph(); writeQuestion(document, qc, title,id);}}//答案列表if(quesAnsMap != null && quesAnsMap.size() > 0){//答案// 设置字体,字号,加粗,颜色// 设置新的段落,使其字体为fontParagraph qa1 = section.addParagraph();TextRange appendText2 = qa1.appendText("答案");qa1.getFormat().setHorizontalAlignment(HorizontalAlignment.Center);appendText2.getCharacterFormat().setFontSize(20f);appendText2.getCharacterFormat().setBold(true);appendText2.getCharacterFormat().setUnderlineStyle(UnderlineStyle.None);for (String id : quesAnsMap.keySet()){//答案String ans = quesAnsMap.get(id);logger.error("答案信息:"+ans);Paragraph qa = section.addParagraph();writeQuestion(document, qa, ans,id);}}ParagraphStyle style1 = new ParagraphStyle(document);style1.setName("style");style1.getCharacterFormat().setFontName("Courier New");document.getStyles().add(style1);pName.applyStyle(style1.getName());document.saveToFile(path, FileFormat.Docx);// 关闭documentdocument.close();}public static boolean itemCheck(String elementContent) {//String st = "<p>.</p>";Pattern pattern=pile("\\<p>\\d{1,2}\\.</p>");Matcher matcher = pattern.matcher(elementContent);return matcher.find();}public static void main(String[] args) throws Exception {Map<String,String> quesMap = new HashMap<>();Map<String,String> quesAnsMap = new HashMap<>();String questionAns = "1.<p>设函数\\(f(x)=\\left\\{\\begin{array}{c}x+a, x \\leq 0 \\\\ \\ln x, x&gt;0\\end{array}\\right.\\), 已知 \\(x_{1}&lt;x_{2}\\), 且 \\(f\\left(x_{1}\\right)=f\\left(x_{2}\\right)\\), 若 \\(x_{2}-x_{1}\\) 的最小值为 \\(\\frac{1}{e^{2}}\\), 则 \\(a\\) 的值为_______.</p>";quesMap.put("3", questionAns);quesAnsMap.put("1", questionAns);produceItextWord("重庆八中高级高二(下)数学周考(三)测试", "D:\\ParagraphAlignment.doc", quesMap, quesAnsMap);}}

LatexUtils.java代码:

package com.xxx.util.pdf;import java.awt.Color;import java.awt.Graphics2D;import java.awt.image.BufferedImage;import java.io.File;import java.io.IOException;import .URL;import java.util.LinkedHashMap;import java.util.Map;import java.util.regex.Matcher;import java.util.regex.Pattern;import javax.imageio.ImageIO;import javax.swing.JLabel;import org.scilab.forge.jlatexmath.TeXConstants;import org.scilab.forge.jlatexmath.TeXFormula;import org.scilab.forge.jlatexmath.TeXIcon;import com.lowagie.text.Image;/*** Latex 公式处理工具类* @author liujg* @date 10月15日 下午1:51:59* @version V1.0*/public class LatexUtils {public static Pattern pattern = pile("\\\\\\\\\\((.*?)\\\\\\\\\\)|\\\\\\\\\\[(.*?)\\\\\\\\\\]");public static Pattern compile = pile("(\\\\\\()|(\\\\\\[)|(\\\\\\begin\\{equation\\})|(\\\\\\begin\\{aligned\\})");//试题列表private static Map<String,String> quesMap = new LinkedHashMap<String,String>();/*** latex公式转图片* */public static String latexImage(String formulaStr,String path){TeXFormula tf = new TeXFormula(formulaStr);TeXIcon ti = tf.createTeXIcon(TeXConstants.STYLE_DISPLAY, 40);BufferedImage bimg = new BufferedImage(ti.getIconWidth(), ti.getIconHeight(), BufferedImage.TYPE_4BYTE_ABGR);Graphics2D g2d = bimg.createGraphics();g2d.setColor(Color.white);g2d.fillRect(0,0,ti.getIconWidth(),ti.getIconHeight());JLabel jl = new JLabel();jl.setForeground(new Color(0, 0, 0));ti.paintIcon(jl, g2d, 0, 0);File out = new File(path);try {ImageIO.write(bimg, "png", out);} catch (IOException e) {}return path;}/*** 文档线下图片获取* */public static Image wordLocalImage(String imageUrl) throws Exception{//声明图片Image img = Image.getInstance(imageUrl);//绝对大小设置//img.scaleAbsolute(img.getWidth()/3,img.getHeight()/3);//比例大小img.scalePercent(30,30);//图片位置坐标img.setAbsolutePosition(0, 0);//图片位置靠左并且文字绕图形显示| Image.TEXTWRAP ,图片背景Image.UNDERLYINGimg.setAlignment(Image.LEFT | Image.TEXTWRAP);return img;}/*** 文档线上图片获取* */public static Image wordHttpImage(String imageUrl) throws Exception{//声明图片Image img = Image.getInstance(new URL(imageUrl));//img.scaleAbsolute(img.getWidth()/3, img.getHeight()/3);img.scalePercent(50,50);img.setAbsolutePosition(0, 0);img.setAlignment(Image.LEFT | Image.TEXTWRAP);return img;}/*** 去除Latex公式中的换行<br/>* @param title* @return* @author liujg* @date 10月27日 上午11:51:06*/public static String dealLatexTitle(String title){if(title == null){return null;}title = title.replaceAll("\\\\","\\\\\\\\");//\[ \] latex其实是没有的,为了转换公式不出错需要去掉,把整个公式替换成@@@,再把去掉\[ \]的然后以@@@截取后,就是 文字-公式-文字-公式的数组形式,最后依次写入文档String titleNot = title.replaceAll("\\\\\\\\\\((.*?)\\\\\\\\\\)|\\\\\\\\\\[(.*?)\\\\\\\\\\]","@@@");Matcher mather = pattern.matcher(title);while (mather.find()){String formula = mather.group();if(formula != null){formula = formula.replace("<br/>", "");formula = formula.replace("\\&nbsp;", "");}titleNot = titleNot.replaceFirst("@@@", formula);}return titleNot;}/*** 题干类容段落处理* */public static String[] titleGroup(String title){if(title == null){return null;}//公式处理\[(\cfrac{x-y}{x+y}-\cfrac{x+y}{x-y})\div \cfrac{2x}{{{x}^{2}}y-{{y}^{3}}}\] 将\cfrac转换成\\,否者后面\cfrac转换成mathml其实转换的事cfrac,报错title = title.replace("<p>", "");title = title.replace("</p>", "");title = title.replaceAll("\\\\","\\\\\\\\");//\[ \] latex其实是没有的,为了转换公式不出错需要去掉,把整个公式替换成@@@,再把去掉\[ \]的然后以@@@截取后,就是 文字-公式-文字-公式的数组形式,最后依次写入文档String titleNot = title.replaceAll("\\\\\\\\\\((.*?)\\\\\\\\\\)|\\\\\\\\\\[(.*?)\\\\\\\\\\]","@@@");Matcher mather = pattern.matcher(title);while (mather.find()){String formula = mather.group();if(formula != null){formula = formula.replace("<br/>", "");}titleNot = titleNot.replaceFirst("@@@", "#####"+formula+"#####");}String[] titleSplit = null;if(titleNot != null){titleNot = titleNot.replace("<br/>", "\n");titleSplit = titleNot.split("#####");}return titleSplit;}public static void main(String[] args) {String url = "xxxx";try {for(int i=0 ;i < 1 ;i++){Image img = LatexUtils.wordHttpImage(url);System.out.println(i+"--"+img);}} catch (Exception e) {e.printStackTrace();}}}

输出结果:

富文本html页面展示方式:

页面引入

<meta name="viewport" content="width=device-width, initial-scale=1">

<meta http-equiv="X-UA-Compatible" content="IE=edge">

<!-- <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> -->

<script src="../../js/polyfill.min.js?features=es6"></script>

<!-- <script type="text/javascript" id="MathJax-script" async src="/npm/mathjax@3/es5/tex-chtml.js"></script> -->

<script type="text/javascript" id="MathJax-script" async src="/npm/mathjax@3/es5/tex-chtml.js"></script>

数据在页面展示后使用js进行渲染:

MathJax.typesetPromise();

展示结果:

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。