首页 > 基础资料 博客日记

Java html 转 word,根据html文件生成word文档

2024-09-07 17:00:06基础资料围观224

文章Java html 转 word,根据html文件生成word文档分享给大家,欢迎收藏Java资料网,专注分享技术知识

获取html的路径、word的模版路径,html包含图片路径前缀

public class Html2Word {

    public static void main(String[] args) throws Exception {

        String html2WordTemplatePath = "D:\\test\\test\\html2word\\html to word template (1).docx";
        Document htmlDocument = Jsoup.parse(new File("D:\\test\\test\\html2word\\test-1.htm"));
        NiceXWPFDocument document = new NiceXWPFDocument(Files.newInputStream(Paths.get(html2WordTemplatePath)));

 boolean[] returnFlag = {false};
            String filePathPre = "D:\\test\\test\\html2word\\";

        List<Node> nodes = htmlDocument.body().childNodes();
        for (Node node : nodes) {
            if (!(node instanceof Element)){
                continue;
            }
            if (StringUtils.isEmpty(node.toString().trim())){
                continue;
            }

            XWPFParagraph paragraph;
            if (node.nodeName().equalsIgnoreCase("table") || node.nodeName().equalsIgnoreCase("ul") || node.nodeName().equalsIgnoreCase("span")){
                paragraph = document.getLastParagraph();
            }else {
                paragraph = document.createParagraph();
            }
            if (node.nodeName().equalsIgnoreCase("span")){
                paragraph.setSpacingAfter(200);
                continue;
            }
            SarHtml2WordUtils.parseHtmlToWord(node,document,paragraph,returnFlag,filePathPre);
            if (returnFlag[0]){
                break;
            }
        }
        document.getLastParagraph().createRun().addBreak(BreakType.PAGE);

        // 写入到输出流
        String outPath = "D:\\test\\test\\html2word" + System.currentTimeMillis() + ".docx";
        OutputStream outputStream1 = Files.newOutputStream(Paths.get(outPath));
        document.write(outputStream1);
        outputStream1.close();

    }

}

读取html中的换行,颜色等信息,绘制到word中
 

public class Html2WordUtils {


    /**
     * 解析 html 格式内容 转变为 word
     *
     * @param node          HTML的node 节点
     * @param doc           word 文档对象
     * @param xwpfParagraph 段落
     * @throws Exception 异常信息
     */
    public static void parseHtmlToWord(Node node, NiceXWPFDocument doc, XWPFParagraph xwpfParagraph,boolean[] returnFlag, String filePathPre) throws Exception {

        List<Node> nodes = node.childNodes();
        if (CollectionUtils.isNotEmpty(nodes)) {
            for (Node childNode : nodes) {
                parseHtmlToWord(childNode, doc, xwpfParagraph,returnFlag,filePathPre);
            }
        }
        //处理table标签
        if ("table".equalsIgnoreCase(node.nodeName())) {
            parseTableToWord(doc, node, xwpfParagraph,filePathPre);
            returnFlag[0] = true;
            return;
        }

        if (CollectionUtils.isNotEmpty(node.childNodes())) {
            return;
        }

        String nodeValue = node.toString();

        Node parent = node.parent();
        boolean boldFlag = false;
        String color = "";
        boolean subFlag = false;
        boolean supFlag = false;
        boolean ulFlag = false;
        boolean tableFlag = false;

        if (null != parent) {
            String parentNodeName = parent.nodeName();
            if (parentNodeName.equalsIgnoreCase("strong") || parentNodeName.equalsIgnoreCase("b")) {
                boldFlag = true;
            } else if (parentNodeName.equalsIgnoreCase("font")) {
                if (Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("strong")
                        || Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("b")) {
                    boldFlag = true;
                }
                String color1 = parent.attr("color");
                if (StringUtils.isNotEmpty(color1)){
                    if (!Objects.equals("#ff0000",color1)){
                        return;
                    }
                    color = color1.substring(1);
                }
                Node parented = parent.parent();
                if (null != parented){
                    if (parented.nodeName().equalsIgnoreCase("li")) {
                        if (Objects.requireNonNull(parented.parent()).nodeName().equalsIgnoreCase("ul")) {
                            ulFlag = true;
                        }
                    }
                }
            } else if (parentNodeName.equalsIgnoreCase("sub")) {
                subFlag = true;
            } else if (parentNodeName.equalsIgnoreCase("sup")) {
                supFlag = true;
            } else if (parentNodeName.equalsIgnoreCase("li")) {
                if (Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("ul")) {
                    ulFlag = true;
                }
            } else if (parentNodeName.equalsIgnoreCase("td")) {
                tableFlag = true;
            }
        }

        if (node.nodeName().equalsIgnoreCase("br")){
            Node preNode = node.previousSibling();
            if (null != preNode && null != preNode.parentNode()){
                if (preNode.parentNode().nodeName().equalsIgnoreCase("font")) {
                    String color1 = preNode.attr("color");
                    if (StringUtils.isNotEmpty(color1)){
                        if (!Objects.equals("#ff0000",color1)){
                            return;
                        }
                    }
                }
            }
        }

        if ("#text".equalsIgnoreCase(node.nodeName()) && !tableFlag && !nodeValue.contains("<")) {
            XWPFRun run = xwpfParagraph.createRun();
            run.setFontFamily("Times New Roman");
            run.setFontSize(10);
            if (boldFlag) {
                run.setBold(true);
            }
            if (StringUtils.isNotEmpty(color)) {
                run.setColor(color);
            }
            if (supFlag) {
                run.setSubscript(VerticalAlign.SUPERSCRIPT);
            }
            if (subFlag) {
                run.setSubscript(VerticalAlign.SUBSCRIPT);
            }
            if (ulFlag && StringUtils.isNotEmpty(nodeValue.trim())) {
                XWPFParagraph paragraph = doc.createParagraph();
                paragraph.setIndentFromLeft(0);
                paragraph.setFirstLineIndent(0);
                paragraph.setIndentationLeftChars(125);
                XWPFRun run1 = paragraph.createRun();
                run1.setFontFamily("宋体");
                run1.setFontSize(8);
                run1.setText("● ");
                run1.addTab();
                XWPFRun run2 = paragraph.createRun();
                run2.setText(nodeValue.trim());
                run2.setFontFamily("宋体");
                run2.setFontSize(10);
            }
            if (StringUtils.isNotEmpty(nodeValue) && !ulFlag){
                run.setText(nodeValue.trim());
            }

        }

        boolean enabledBreak = ReUtil.isMatch("(h[12345]|li|img|br)", node.nodeName().toLowerCase());
            if (enabledBreak) {
            XWPFRun run = xwpfParagraph.createRun();
            run.addCarriageReturn();
        }
    }

    private static void parseTableToWord(NiceXWPFDocument doc, Node node, XWPFParagraph paragraph,String filePathPre) throws Exception {
        //简化表格html
        String string = node.toString();
        org.jsoup.nodes.Document tableDoc = Jsoup.parse(Objects.requireNonNull(simplifyTable(string)));
        Elements trList = tableDoc.getElementsByTag("tr");

        // 获取页边距
        BigInteger right = (BigInteger) doc.getDocument().getBody().getSectPr().getPgMar().getRight();
        BigInteger left = (BigInteger) doc.getDocument().getBody().getSectPr().getPgMar().getLeft();
        // word 工作区域范围宽度
        double wordWorkAreaWidth = 21 - ((double) (right.intValue() + left.intValue()) / 567);

        //创建表格
        XWPFTable xwpfTable = doc.insertNewTbl(paragraph.getCTP().newCursor());
        if (null == xwpfTable) {
            return;
        }
        //设置样式
        xwpfTable.setWidth("100%");

        //写入表格行和列内容
        for (int row = 0; row < trList.size(); row++) {
            XWPFTableRow tableRow = xwpfTable.getRow(row);
            if (null == tableRow){
                tableRow = xwpfTable.createRow();
            }
            Element trElement = trList.get(row);
            Elements tds = trElement.getElementsByTag("td");

            double widthTotal = 0.0;
            for (int col = 0; col < tds.size(); col++) {

                Element colElement = tds.get(col);
                List<Node> nodes = colElement.childNodes();
                for (Node tdNode : nodes) {
                    if ("img".equalsIgnoreCase(tdNode.nodeName())) {
                        String width = tdNode.attr("width");
                        if (NumberUtils.isNumeric(width.trim())){
                            widthTotal = widthTotal + Double.parseDouble(width.trim());
                        }
                    }
                }
            }

            for (int col = 0; col < tds.size(); col++) {

                XWPFTableCell tableCell = tableRow.getCell(col);
                if (null == tableCell){
                    tableCell = tableRow.createCell();
                }
                CTTcPr tcPr = tableCell.getCTTc().isSetTcPr() ? tableCell.getCTTc().getTcPr() : tableCell.getCTTc().addNewTcPr();
                CTTcBorders ctTcBorders = tcPr.addNewTcBorders();
                ctTcBorders.addNewLeft().setVal(STBorder.NIL);
                ctTcBorders.addNewRight().setVal(STBorder.NIL);
                ctTcBorders.addNewTop().setVal(STBorder.NIL);
                ctTcBorders.addNewBottom().setVal(STBorder.NIL);

                Element colElement = tds.get(col);
                List<Node> nodes = colElement.childNodes();

                for (Node tdNode : nodes) {
                    if ("img".equalsIgnoreCase(tdNode.nodeName())) {
                        String src = tdNode.attr("src");
                        String width = tdNode.attr("width");
                        String height = tdNode.attr("height");
                        src = src.replaceAll("%20", " ").replaceAll("%26","&");

                        String picturePath = filePathPre + src;

                        InputStream inputStream = Files.newInputStream(Paths.get(picturePath));

                        XWPFRun xwpfRun = tableCell.getParagraphs().get(0).createRun();

                        double picWidth = wordWorkAreaWidth * ( Double.parseDouble(width.trim()) / widthTotal);
                        double picHeight = picWidth * Double.parseDouble(height.trim()) / Double.parseDouble(width.trim());

                        xwpfRun.addPicture(inputStream, Document.PICTURE_TYPE_PNG, src,
                                (int) (picWidth * Units.EMU_PER_CENTIMETER), (int) (picHeight * Units.EMU_PER_CENTIMETER));
                    }else if ("#text".equalsIgnoreCase(tdNode.nodeName())){
                         parseHtmlToWordTable(colElement, doc,  tableCell.getParagraphs().get(0));
                    }
                }
            }
        }
    }

    private static void parseHtmlToWordTable(Node node, NiceXWPFDocument doc, XWPFParagraph xwpfParagraph) {


            List<Node> nodes = node.childNodes();
            if (CollectionUtils.isNotEmpty(nodes)) {
                for (Node childNode : nodes) {
                    parseHtmlToWordTable(childNode, doc, xwpfParagraph);
                }
            }
            //处理table标签
            if ("table".equalsIgnoreCase(node.nodeName())) {
                return;
            }

            if (CollectionUtils.isNotEmpty(node.childNodes())) {
                return;
            }
            String nodeValue = node.toString();

            Node parent = node.parent();
            boolean boldFlag = false;
            String color = "";
            boolean subFlag = false;
            boolean supFlag = false;
            boolean ulFlag = false;

            if (null != parent) {
                String parentNodeName = parent.nodeName();
                if (parentNodeName.equalsIgnoreCase("strong") || parentNodeName.equalsIgnoreCase("b")) {
                    boldFlag = true;
                } else if (parentNodeName.equalsIgnoreCase("font")) {
                    if (Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("strong")
                            || Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("b")) {
                        boldFlag = true;
                    }
                    String color1 = parent.attr("color");
                    if (StringUtils.isNotEmpty(color1)){
                        color = color1.substring(1);
                    }
                    Node parented = parent.parent();
                    if (null != parented){
                        if (parented.nodeName().equalsIgnoreCase("li")) {
                            if (Objects.requireNonNull(parented.parent()).nodeName().equalsIgnoreCase("ul")) {
                                ulFlag = true;
                            }
                        }
                    }
                } else if (parentNodeName.equalsIgnoreCase("sub")) {
                    subFlag = true;
                } else if (parentNodeName.equalsIgnoreCase("sup")) {
                    supFlag = true;
                } else if (parentNodeName.equalsIgnoreCase("li")) {
                    if (Objects.requireNonNull(parent.parent()).nodeName().equalsIgnoreCase("ul")) {
                        ulFlag = true;
                    }
                }
            }

        if ("#text".equalsIgnoreCase(node.nodeName()) && !nodeValue.contains("<")) {
            XWPFRun run = xwpfParagraph.createRun();
            run.setFontFamily("Times New Roman");
            run.setFontSize(10);
            if (boldFlag) {
                run.setBold(true);
            }
            if (StringUtils.isNotEmpty(color)) {
                run.setColor(color);
            }
            if (supFlag) {
                run.setSubscript(VerticalAlign.SUPERSCRIPT);
            }
            if (subFlag) {
                run.setSubscript(VerticalAlign.SUBSCRIPT);
            }
            if (ulFlag && StringUtils.isNotEmpty(nodeValue.trim())) {
                XWPFParagraph paragraph = doc.createParagraph();
                paragraph.setIndentFromLeft(0);
                paragraph.setFirstLineIndent(0);
                paragraph.setIndentationLeftChars(125);
                XWPFRun run1 = paragraph.createRun();
                run1.setFontFamily("Times New Roman");
                run1.setFontSize(8);
                run1.setText("●");
                run1.addTab();
                XWPFRun run2 = paragraph.createRun();
                run2.setText(nodeValue.trim());
                run2.setFontFamily("Times New Roman");
                run2.setFontSize(10);
            }
            if (StringUtils.isNotEmpty(nodeValue) && !ulFlag) {
                run.setText(nodeValue.trim());
            }

        }

        boolean enabledBreak = ReUtil.isMatch("(|h[12345]|li|img|br)", node.nodeName().toLowerCase());
        if (enabledBreak) {
            XWPFRun run = xwpfParagraph.createRun();
            run.addCarriageReturn();
        }
    }

    public static String simplifyTable(String tableContent) {
        if (StringUtils.isEmpty(tableContent)) {
            return null;
        }
        org.jsoup.nodes.Document tableDoc = Jsoup.parse(tableContent);
        Elements trElements = tableDoc.getElementsByTag("tr");
        // 针对于colspan操作
        for (Element trElement : trElements) {
            //去除所有样式
            trElement.removeAttr("class");
            Elements tdElements = trElement.getElementsByTag("td");
            List<Element> tdEleList = covertElements2List(tdElements);
            for (Element curTdElement : tdEleList) {
                //去除所有样式
                curTdElement.removeAttr("class");
                Element ele = curTdElement.clone();
                String colspanValStr = curTdElement.attr("colspan");
                if (!StringUtils.isEmpty(colspanValStr)) {
                    ele.removeAttr("colspan");
                    int colspanVal = Integer.parseInt(colspanValStr);
                    for (int k = 0; k < colspanVal - 1; k++) {
                        curTdElement.after(ele.outerHtml());
                    }
                }
            }
        }
        // 针对于rowspan操作
        List<Element> trEleList = covertElements2List(trElements);
        Element firstTrEle = trElements.first();
        if (null == firstTrEle){
            return "";
        }
        Elements tdElements = firstTrEle.getElementsByTag("td");
        Integer tdCount = tdElements.size();
        //获取该列下所有单元格
        for (int i = 0; i < tdElements.size(); i++) {
            for (Element trElement : trEleList) {
                List<Element> tdElementList = covertElements2List(trElement.getElementsByTag("td"));
                Node curTdNode = tdElementList.get(i);
                Node cNode = curTdNode.clone();
                String rowspanValStr = curTdNode.attr("rowspan");
                if (!StringUtils.isEmpty(rowspanValStr)) {
                    cNode.removeAttr("rowspan");
                    Element nextTrElement = trElement.nextElementSibling();
                    int rowspanVal = Integer.parseInt(rowspanValStr);
                    for (int j = 0; j < rowspanVal - 1; j++) {
                        Node tempNode = cNode.clone();
                        List<Node> nodeList = new ArrayList<Node>();
                        nodeList.add(tempNode);
                        if (j > 0 && null != nextTrElement) {
                            nextTrElement = nextTrElement.nextElementSibling();
                        }
                        Integer indexNum = i + 1;
                        if (i == 0) {
                            indexNum = 0;
                        }
                        if (null != nextTrElement){
                            if (indexNum.equals(tdCount)) {
                                nextTrElement.appendChild(tempNode);
                            } else {
                                nextTrElement.insertChildren(indexNum, nodeList);
                            }
                        }
                    }
                }
            }
        }
        Element tableEle = tableDoc.getElementsByTag("table").first();
        if (null == tableEle){
            return "";
        }
        return tableEle.outerHtml();
    }

    private static List<Element> covertElements2List(Elements curElements) {
        return new ArrayList<>(curElements);
    }


}


文章来源:https://blog.csdn.net/m0_57605697/article/details/139346807
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若内容造成侵权/违法违规/事实不符,请联系邮箱:jacktools123@163.com进行投诉反馈,一经查实,立即删除!

标签:

相关文章

本站推荐

标签云