处理相关冲突

This commit is contained in:
chengli 2022-07-04 21:19:57 +08:00
parent 1c5c532317
commit 7bfc017dad
16 changed files with 1127 additions and 0 deletions

View File

@ -0,0 +1,15 @@
package com.actionsoft.apps.coe.pal.datamigration.util.htmltodocx;
/**
* @author baizp
* @Description:
* @date 2022/6/24 16:02
*/
/**
* 公共常量
*/
public class CommonConStant {
// 固定元素节点
public static final String COMMONATTR = "data-class";
}

View File

@ -0,0 +1,55 @@
package com.actionsoft.apps.coe.pal.datamigration.util.htmltodocx;
/**
* @author baizp
* @Description:
* @date 2022/6/24 15:59
*/
/**
* html 元素枚举映射类
*/
public enum ElementEnum {
H1("h1","h1","一级标题"),
H2("h2","h2","二级标题"),
H3("h3","h3","三级标题"),
H7("h7","h7","小标题"),
P("p", "paragraph", "段落"),
STRONG("strong","","加粗"),
I("i","","斜体"),
U("u", "", "字体下划线"),
IMG("img", "imgurl", "base64图片"),
TABLE("table","table","表格"),
BR("br","br","换行");
private String code;
private String value;
private String desc;
public String getCode() {
return code;
}
public String getValue() {
return value;
}
public String getDesc() {
return desc;
}
ElementEnum(String code, String value, String desc) {
this.code = code;
this.value = value;
this.desc = desc;
}
public static String getValueByCode(String code) {
for (ElementEnum e : ElementEnum.values()) {
if (e.getCode().equalsIgnoreCase(code)) {
return e.getValue();
}
}
return null;
}
}

View File

@ -0,0 +1,610 @@
package com.actionsoft.apps.coe.pal.datamigration.util.htmltodocx;
/**
* @author baizp
* @Description:
* @date 2022/6/24 16:01
*/
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.jaxb.Context;
import org.docx4j.model.structure.SectionWrapper;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.exceptions.InvalidFormatException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage;
import org.docx4j.openpackaging.parts.WordprocessingML.FooterPart;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart;
import org.docx4j.relationships.Relationship;
import org.docx4j.wml.*;
import org.docx4j.wml.PPrBase.Ind;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.math.BigInteger;
import java.util.List;
/**
* @program: htmltoword
* @description: html docx
* @author: corey
* @create: 2020-04-29 14:10
**/
public class HtmlToWord {
private static ObjectFactory factory;
private static WordprocessingMLPackage wordMLPackage;
/**
* 将一段富文本字符串转为一个字节数组
*
* @param data
* @return
*/
public static byte[] resolveHtml(String data) {
Document document = Jsoup.parseBodyFragment(data, "UTF-8");
ByteArrayOutputStream out = null;
try {
wordMLPackage = WordprocessingMLPackage.createPackage();
factory = Context.getWmlObjectFactory();
Relationship relationship = createFooterPart();
createFooterReference(relationship);
MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
alterStyleSheet();
// 添加固定元素
HtmlUtils.addElement(document);
Elements elements = document.select("[" + CommonConStant.COMMONATTR + "]");
for (Element em : elements) {
String type = em.attr(CommonConStant.COMMONATTR);
if (em.childNodeSize() > 0) {
if (em.childNodeSize() == 2) {
em.childNode(1);
}
}
switch (em.attr(CommonConStant.COMMONATTR)) {
case "title":
documentPart.addStyledParagraphOfText("Title", em.text());
break;
case "subtitle":
documentPart.addStyledParagraphOfText("Subtitle", em.text());
break;
case "imgurl":
String imgSrc = em.attr("src");
File file = new File(imgSrc);
byte[] bytes = convertImageToByteArray(file);
addImageToPackage(wordMLPackage, bytes);
break;
case "imgbase64":
break;
case "table":
Tbl table = addTable(em);
documentPart.addObject(table);
break;
case "h1":
P tmp = documentPart.addStyledParagraphOfText("Heading1", em.text());
//setNum(1, tmpstyle);
setNum1(1, tmp);
break;
case "h2":
P tmp1 = documentPart.addStyledParagraphOfText("Heading2", em.text());
//setNum(2, tmpstyle1);
setNum1(2, tmp1);
break;
case "h3":
P tmp2 = documentPart.addStyledParagraphOfText("Heading3", em.text());
//setNum(3, tmpstyle2);
setNum1(3, tmp2);
break;
case "paragraph":
P p = addParapraph(em.text());
//设置首行缩进
setFirstLine(p, "400");
documentPart.getContent().add(p);
break;
default:
documentPart.addParagraphOfText(em.text());
break;
}
}
addPageBreak(documentPart);
out = new ByteArrayOutputStream();
wordMLPackage.save(out);
return out.toByteArray();
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
if (out != null) {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public static void setNum(int level,Style style) {
ObjectFactory factory = Context.getWmlObjectFactory();
//Create and add <w:pPr> to style
PPr ppr = factory.createPPr();
style.setPPr(ppr);
PPrBase.NumPr numPr = factory.createPPrBaseNumPr();
PPrBase.NumPr.Ilvl ilvlElement = factory.createPPrBaseNumPrIlvl();
numPr.setIlvl(ilvlElement);
ilvlElement.setVal(BigInteger.valueOf(level));
PPrBase.NumPr.NumId numIdElement = factory.createPPrBaseNumPrNumId();
numPr.setNumId(numIdElement);
numIdElement.setVal(BigInteger.valueOf(level));
ppr.setNumPr(numPr);
}
public static void setNum1(int level,P p){
/*ObjectFactory factory = new org.docx4j.wml.ObjectFactory();
P p = factory.createP();*/
/*org.docx4j.wml.Text t = factory.createText();
t.setValue(em.text());*/
/*org.docx4j.wml.R run = factory.createR();
run.getContent().add(t);
p.getContent().add(run);*/
/*org.docx4j.wml.PPr ppr = factory.createPPr();
p.setPPr(ppr);*/
org.docx4j.wml.PPr ppr =p.getPPr();
// Create and add <w:numPr>
PPrBase.NumPr numPr = factory.createPPrBaseNumPr();
ppr.setNumPr(numPr);
// The <w:ilvl> element
PPrBase.NumPr.Ilvl ilvlElement = factory.createPPrBaseNumPrIlvl();
numPr.setIlvl(ilvlElement);
ilvlElement.setVal(BigInteger.valueOf(level));
// The <w:numId> element
PPrBase.NumPr.NumId numIdElement = factory.createPPrBaseNumPrNumId();
numPr.setNumId(numIdElement);
numIdElement.setVal(BigInteger.valueOf(level));
//wordMLPackage.getMainDocumentPart().addObject(p);
}
/**
* @param @param text
* @param @return 设定文件
* @return P 返回类型
* @throws
* @Title: addParapraph
* @Description: (文本转段落)
*/
private static P addParapraph(String text) {
factory = Context.getWmlObjectFactory();
P paragraph = factory.createP();
Text t = factory.createText();
t.setValue(text);
R run = factory.createR();
run.getContent().add(t);
paragraph.getContent().add(run);
RPr runProperties = factory.createRPr();
run.setRPr(runProperties);
return paragraph;
}
/**
* @param @param p
* @param @param str 设定文件
* @return void 返回类型
* @throws
* @Title: setFirstLine
*/
private static void setFirstLine(P p, String str) {
PPr ppr = getPPr(p);
Ind ind = ppr.getInd();
if (ind == null) {
ind = new Ind();
ppr.setInd(ind);
}
ind.setFirstLine(new BigInteger(str));
}
;
private static PPr getPPr(P p) {
PPr ppr = p.getPPr();
if (ppr == null) {
ppr = new PPr();
p.setPPr(ppr);
}
return ppr;
}
/**
* table @param @return 设定文件 @return Tbl 返回类型 @throws
*/
private static Tbl addTable(Element table) {
factory = Context.getWmlObjectFactory();
Tbl tbl = factory.createTbl();
addBorders(tbl);
Elements trs = table.getElementsByTag("tr");
for (Element tr : trs) {
Tr fTr = addTableTr(tr);
tbl.getContent().add(fTr);
}
return tbl;
}
/**
* tr @param @return 设定文件 @return Tr 返回类型 @throws
*/
private static Tr addTableTr(Element tr) {
Elements tds = tr.getElementsByTag("th").isEmpty() ? tr.getElementsByTag("td") : tr.getElementsByTag("th");
Tr ftr = factory.createTr();
for (int i = 0, j = tds.size(); i < j; i++) {
Tc ftd = factory.createTc();
setCellWidth(ftd, 1000);
ftd.getContent().add(wordMLPackage.getMainDocumentPart().createParagraphOfText(tds.get(i).text()));
ftr.getContent().add(ftd);
}
return ftr;
}
/**
* 本方法创建一个单元格属性集对象和一个表格宽度对象. 将给定的宽度设置到宽度对象然后将其添加到 属性集对象. 最后将属性集对象设置到单元格中.
*/
private static void setCellWidth(Tc tableCell, int width) {
TcPr tableCellProperties = new TcPr();
TblWidth tableWidth = new TblWidth();
tableWidth.setW(BigInteger.valueOf(width));
tableCellProperties.setTcW(tableWidth);
tableCell.setTcPr(tableCellProperties);
}
/**
* 本方法为表格添加边框
*/
private static void addBorders(Tbl table) {
table.setTblPr(new TblPr());
CTBorder border = new CTBorder();
border.setColor("auto");
border.setSz(new BigInteger("4"));
border.setSpace(new BigInteger("0"));
border.setVal(STBorder.SINGLE);
TblBorders borders = new TblBorders();
borders.setBottom(border);
borders.setLeft(border);
borders.setRight(border);
borders.setTop(border);
borders.setInsideH(border);
borders.setInsideV(border);
table.getTblPr().setTblBorders(borders);
}
/**
* 将图片从文件对象转换成字节数组.
*
* @param file 将要转换的文件
* @return 包含图片字节数据的字节数组
* @throws FileNotFoundException
* @throws IOException
*/
private static byte[] convertImageToByteArray(File file) throws FileNotFoundException, IOException {
InputStream is = new FileInputStream(file);
long length = file.length();
// 不能使用long类型创建数组, 需要用int类型.
if (length > Integer.MAX_VALUE) {
System.out.println("File too large!!");
}
byte[] bytes = new byte[(int) length];
int offset = 0;
int numRead = 0;
while (offset < bytes.length && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {
offset += numRead;
}
// 确认所有的字节都没读取
if (offset < bytes.length) {
System.out.println("Could not completely read file " + file.getName());
}
is.close();
return bytes;
}
/**
* Docx4j拥有一个由字节数组创建图片部件的工具方法, 随后将其添加到给定的包中. 为了能将图片添加 到一个段落中, 我们需要将图片转换成内联对象.
* 这也有一个方法, 方法需要文件名提示, 替换文本, 两个id标识符和一个是嵌入还是链接到的指示作为参数. 一个id用于文档中绘图对象不可见的属性,
* 另一个id用于图片本身不可见的绘制属性. 最后我们将内联 对象添加到段落中并将段落添加到包的主文档部件.
*
* @param wordMLPackage 要添加图片的包
* @param bytes 图片对应的字节数组
* @throws Exception 不幸的createImageInline方法抛出一个异常(没有更多具体的异常类型)
*/
private static void addImageToPackage(WordprocessingMLPackage wordMLPackage, byte[] bytes) throws Exception {
BinaryPartAbstractImage imagePart = BinaryPartAbstractImage.createImagePart(wordMLPackage, bytes);
int docPrId = 1;
int cNvPrId = 2;
Inline inline = imagePart.createImageInline("Filename hint", "Alternative text", docPrId, cNvPrId, false);
P paragraph = addInlineImageToParagraph(inline);
wordMLPackage.getMainDocumentPart().addObject(paragraph);
}
/**
* 创建一个对象工厂并用它创建一个段落和一个可运行块R. 然后将可运行块添加到段落中. 接下来创建一个图画并将其添加到可运行块R中. 最后我们将内联
* 对象添加到图画中并返回段落对象.
*
* @param inline 包含图片的内联对象.
* @return 包含图片的段落
*/
private static P addInlineImageToParagraph(Inline inline) {
// 添加内联对象到一个段落中
ObjectFactory factory = new ObjectFactory();
P paragraph = factory.createP();
R run = factory.createR();
paragraph.getContent().add(run);
Drawing drawing = factory.createDrawing();
run.getContent().add(drawing);
drawing.getAnchorOrInline().add(inline);
return paragraph;
}
/**
* This method alters the default style sheet that is part of each document.
* <p>
* To do this, we first retrieve the style sheet from the package and then get
* the Styles object from it. From this object, we get the list of actual styles
* and iterate over them. We check against all styles we want to alter and apply
* the alterations if applicable.
*
* @param
*/
public static void alterStyleSheet() {
StyleDefinitionsPart styleDefinitionsPart = wordMLPackage.getMainDocumentPart().getStyleDefinitionsPart();
Styles styles = null;
try {
styles = styleDefinitionsPart.getContents();
} catch (Docx4JException e) {
e.printStackTrace();
}
List<Style> stylesList = styles.getStyle();
for (Style style : stylesList) {
if (style.getStyleId().equals("Normal")) {
alterNormalStyle(style);
} else if (style.getStyleId().equals("Heading1")) {
alterHeading1Style(style);
} else if (style.getStyleId().equals("Heading2")) {
alterHeading2Style(style);
} else if (style.getStyleId().equals("Title") || style.getStyleId().equals("Subtitle")) {
getRunPropertiesAndRemoveThemeInfo(style);
}
}
}
/**
* First we create a run properties object as we want to remove nearly all of
* the existing styling. Then we change the font and font size and set the run
* properties on the given style. As in previous examples, the font size is
* defined to be in half-point size.
*/
private static void alterNormalStyle(Style style) {
// we want to change (or remove) almost all the run properties of the
// normal style, so we create a new one.
RPr rpr = new RPr();
changeFontToArial(rpr);
changeFontSize(rpr, 20);
style.setRPr(rpr);
}
/**
* For this style, we get the existing run properties from the style and remove
* the theme font information from them. Then we also remove the bold styling,
* change the font size (half-points) and add an underline.
*/
private static void alterHeading1Style(Style style) {
RPr rpr = getRunPropertiesAndRemoveThemeInfo(style);
removeBoldStyle(rpr);
changeFontSize(rpr, 28);
/* addUnderline(rpr); */
}
private static void alterHeading2Style(Style style) {
RPr rpr = getRunPropertiesAndRemoveThemeInfo(style);
removeBoldStyle(rpr);
changeFontSize(rpr, 24);
/* addUnderline(rpr); */
}
private static RPr getRunPropertiesAndRemoveThemeInfo(Style style) {
// We only want to change some settings, so we get the existing run
// properties from the style.
RPr rpr = style.getRPr();
removeThemeFontInformation(rpr);
return rpr;
}
/**
* Change the font of the given run properties to Arial.
* <p>
* A run font specifies the fonts which shall be used to display the contents of
* the run. Of the four possible types of content, we change the styling of two
* of them: ASCII and High ANSI. Finally we add the run font to the run
* properties.
*
* @param runProperties
*/
private static void changeFontToArial(RPr runProperties) {
RFonts runFont = new RFonts();
runFont.setAscii("Arial");
runFont.setHAnsi("Arial");
runProperties.setRFonts(runFont);
}
/**
* Change the font size of the given run properties to the given value.
*
* @param runProperties
* @param fontSize Twice the size needed, as it is specified as half-point value
*/
private static void changeFontSize(RPr runProperties, int fontSize) {
HpsMeasure size = new HpsMeasure();
size.setVal(BigInteger.valueOf(fontSize));
runProperties.setSz(size);
}
/**
* Removes the theme font information from the run properties. If this is not
* removed then the styles based on the normal style won't inherit the Arial
* font from the normal style.
*
* @param runProperties
*/
private static void removeThemeFontInformation(RPr runProperties) {
runProperties.getRFonts().setAsciiTheme(null);
runProperties.getRFonts().setHAnsiTheme(null);
}
/**
* Removes the Bold styling from the run properties.
*
* @param runProperties
*/
private static void removeBoldStyle(RPr runProperties) {
runProperties.getB().setVal(false);
}
/**
* As in the previous example, this method creates a footer part and adds it to
* the main document and then returns the corresponding relationship.
*
* @return
* @throws InvalidFormatException
*/
private static Relationship createFooterPart() throws InvalidFormatException {
FooterPart footerPart = new FooterPart();
footerPart.setPackage(wordMLPackage);
footerPart.setJaxbElement(createFooterWithPageNr());
return wordMLPackage.getMainDocumentPart().addTargetPart(footerPart);
}
/**
* As in the previous example, we create a footer and a paragraph object. But
* this time, instead of adding text to a run, we add a field. And just as with
* the table of content, we have to add a begin and end character around the
* actual field with the page number. Finally we add the paragraph to the
* content of the footer and then return it.
*
* @return
*/
public static Ftr createFooterWithPageNr() {
Ftr ftr = factory.createFtr();
P paragraph = factory.createP();
addFieldBegin(paragraph);
addPageNumberField(paragraph);
addFieldEnd(paragraph);
ftr.getContent().add(paragraph);
return ftr;
}
/**
* Creating the page number field is nearly the same as creating the field in
* the TOC example. The only difference is in the value. We use the PAGE
* command, which prints the number of the current page, together with the
* MERGEFORMAT switch, which indicates that the current formatting should be
* preserved when the field is updated.
*
* @param paragraph
*/
private static void addPageNumberField(P paragraph) {
R run = factory.createR();
Text txt = new Text();
txt.setSpace("preserve");
txt.setValue(" PAGE \\* MERGEFORMAT ");
run.getContent().add(factory.createRInstrText(txt));
paragraph.getContent().add(run);
}
/**
* Every fields needs to be delimited by complex field characters. This method
* adds the delimiter that precedes the actual field to the given paragraph.
*
* @param paragraph
*/
private static void addFieldBegin(P paragraph) {
R run = factory.createR();
FldChar fldchar = factory.createFldChar();
fldchar.setFldCharType(STFldCharType.BEGIN);
run.getContent().add(fldchar);
paragraph.getContent().add(run);
}
/**
* Every fields needs to be delimited by complex field characters. This method
* adds the delimiter that follows the actual field to the given paragraph.
*
* @param paragraph
*/
private static void addFieldEnd(P paragraph) {
FldChar fldcharend = factory.createFldChar();
fldcharend.setFldCharType(STFldCharType.END);
R run3 = factory.createR();
run3.getContent().add(fldcharend);
paragraph.getContent().add(run3);
}
/**
* This method fetches the document final section properties, and adds a newly
* created footer reference to them.
*
* @param relationship
*/
public static void createFooterReference(Relationship relationship) {
List<SectionWrapper> sections = wordMLPackage.getDocumentModel().getSections();
SectPr sectPr = sections.get(sections.size() - 1).getSectPr();
// There is always a section wrapper, but it might not contain a sectPr
if (sectPr == null) {
sectPr = factory.createSectPr();
wordMLPackage.getMainDocumentPart().addObject(sectPr);
sections.get(sections.size() - 1).setSectPr(sectPr);
}
FooterReference footerReference = factory.createFooterReference();
footerReference.setId(relationship.getId());
footerReference.setType(HdrFtrRef.DEFAULT);
sectPr.getEGHdrFtrReferences().add(footerReference);
}
/**
* Adds a page break to the document.
*
* @param documentPart
*/
private static void addPageBreak(MainDocumentPart documentPart) {
Br breakObj = new Br();
breakObj.setType(STBrType.PAGE);
P paragraph = factory.createP();
paragraph.getContent().add(breakObj);
try {
documentPart.getContents().getBody().getContent().add(paragraph);
} catch (Docx4JException e) {
e.printStackTrace();
}
}
}

View File

@ -0,0 +1,202 @@
package com.actionsoft.apps.coe.pal.datamigration.util.htmltodocx;
/**
* @author baizp
* @Description:
* @date 2022/6/24 15:55
*/
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.xmlbeans.XmlCursor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.util.ObjectUtils;
import org.springframework.util.StringUtils;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
public class HtmlUtils {
/**
* 给document添加指定元素
*
* @param document
*/
public static void addElement(Document document) {
if (ObjectUtils.isEmpty(document)) {
throw new NullPointerException("不允许为空的对象添加元素");
}
Elements elements = document.getAllElements();
for (Element e : elements) {
String attrName = ElementEnum.getValueByCode(e.tag().getName());
if (!StringUtils.isEmpty(attrName)) {
e.attr(CommonConStant.COMMONATTR, attrName);
}
}
}
/**
* 将富文本内容写入到Word
* 因富文本样式种类繁多不能一一枚举目前实现了H1H2H3段落图片表格枚举
*
* @param ritchText 富文本内容
* @param doc 需要写入富文本内容的Word 写入图片和表格需要用到
* @param paragraph
* @param
*/
public static void resolveHtml(String ritchText, XWPFDocument doc, XWPFParagraph paragraph) {
Document document = Jsoup.parseBodyFragment(ritchText, "UTF-8");
try {
// 添加固定元素
HtmlUtils.addElement(document);
Elements elements = document.select("[" + CommonConStant.COMMONATTR + "]");
for (Element em : elements) {
XmlCursor xmlCursor = paragraph.getCTP().newCursor();
switch (em.attr(CommonConStant.COMMONATTR)) {
case "title":
break;
case "subtitle":
break;
case "imgurl":
String src = em.attr("src");
URL url = new URL(src);
URLConnection uc = url.openConnection();
InputStream inputStream = uc.getInputStream();
XWPFParagraph imgurlparagraph = doc.insertNewParagraph(xmlCursor);
ParagraphStyleUtil.setImageCenter(imgurlparagraph);
imgurlparagraph.createRun().addPicture(inputStream, XWPFDocument.PICTURE_TYPE_PNG, "图片.jpeg", Units.toEMU(150), Units.toEMU(150));
closeStream(inputStream);
File file = new File("picture.jpg");
boolean exists = file.exists();
if (exists) {
file.delete();
}
break;
case "imgbase64":
break;
case "table":
XWPFTable xwpfTable = doc.insertNewTbl(xmlCursor);
addTable(xwpfTable, em);
// 设置表格居中
ParagraphStyleUtil.setTableLocation(xwpfTable, "center");
// 设置内容居中
ParagraphStyleUtil.setCellLocation(xwpfTable, "CENTER", "center");
break;
case "h1":
XWPFParagraph h1paragraph = doc.insertNewParagraph(xmlCursor);
XWPFRun xwpfRun_1 = h1paragraph.createRun();
xwpfRun_1.setText(em.text());
//居中
ParagraphStyleUtil.setImageCenter(h1paragraph);
// 设置字体
ParagraphStyleUtil.setTitle(xwpfRun_1, TitleFontEnum.H1.getTitle());
break;
case "h2":
XWPFParagraph h2paragraph = doc.insertNewParagraph(xmlCursor);
XWPFRun xwpfRun_2 = h2paragraph.createRun();
xwpfRun_2.setText(em.text());
//居中
ParagraphStyleUtil.setImageCenter(h2paragraph);
// 设置字体
ParagraphStyleUtil.setTitle(xwpfRun_2, TitleFontEnum.H2.getTitle());
break;
case "h3":
XWPFParagraph h3paragraph = doc.insertNewParagraph(xmlCursor);
XWPFRun xwpfRun_3 = h3paragraph.createRun();
xwpfRun_3.setText(em.text());
// 设置字体
ParagraphStyleUtil.setTitle(xwpfRun_3, TitleFontEnum.H3.getTitle());
break;
case "paragraph":
XWPFParagraph paragraphd = doc.insertNewParagraph(xmlCursor);
// 设置段落缩进 4个空格
paragraphd.createRun().setText(" " + em.text());
break;
case "br":
XWPFParagraph br = doc.insertNewParagraph(xmlCursor);
XWPFRun run = br.createRun();
run.addBreak(BreakType.TEXT_WRAPPING);
case "h7":
XWPFParagraph h7paragraph = doc.insertNewParagraph(xmlCursor);
XWPFRun xwpfRun_7 = h7paragraph.createRun();
xwpfRun_7.setText(em.text());
//居左
ParagraphStyleUtil.AlignmentRight(h7paragraph);
// 设置字体
ParagraphStyleUtil.setTitle(xwpfRun_7, TitleFontEnum.H7.getTitle());
default:
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 关闭输入流
*
* @param closeables
*/
public static void closeStream(Closeable... closeables) {
for (Closeable c : closeables) {
if (c != null) {
try {
c.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/**
* 将富文本的表格转换为Word里面的表格
*/
private static void addTable(XWPFTable xwpfTable, Element table) {
Elements trs = table.getElementsByTag("tr");
// XWPFTableRow 第0行特殊处理
int rownum = 0;
for (Element tr : trs) {
addTableTr(xwpfTable, tr, rownum);
rownum++;
}
}
/**
* 将元素里面的tr 提取到 xwpfTabel
*/
private static void addTableTr(XWPFTable xwpfTable, Element tr, int rownum) {
Elements tds = tr.getElementsByTag("th").isEmpty() ? tr.getElementsByTag("td") : tr.getElementsByTag("th");
XWPFTableRow row_1 = null;
for (int i = 0, j = tds.size(); i < j; i++) {
if (0 == rownum) {
// XWPFTableRow 第0行特殊处理,
XWPFTableRow row_0 = xwpfTable.getRow(0);
if (i == 0) {
row_0.getCell(0).setText(tds.get(i).text());
} else {
row_0.addNewTableCell().setText(tds.get(i).text());
}
} else {
if (i == 0) {
// 换行需要创建一个新行
row_1 = xwpfTable.createRow();
row_1.getCell(i).setText(tds.get(i).text());
} else {
row_1.getCell(i).setText(tds.get(i).text());
}
}
}
}
}

View File

@ -0,0 +1,105 @@
package com.actionsoft.apps.coe.pal.datamigration.util.htmltodocx;
/**
* @author baizp
* @Description:
* @date 2022/6/24 15:57
*/
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import java.util.List;
/**
* 设置文本样式工具因为Word样式种类繁多不能一一枚举
* @author corey
* @version 1.0
* @date 2020/5/5 9:36 下午
*/
public class ParagraphStyleUtil {
/**
* 段落缩进
* @param paragraph
*/
public static void setIndentationFirstLine(XWPFParagraph paragraph){
paragraph.setFirstLineIndent(400);
}
/**
* 设置标题 根据富文本的tag来判断
* @param run
* @param title
*/
public static void setTitle(XWPFRun run, String title){
// 加粗
run.setBold(true);
run.setFontSize(TitleFontEnum.getFontByTitle(title));
}
/**
* 设置单元格水平位置和垂直位置
*
* @param xwpfTable
* @param verticalLoction 单元格中内容垂直上TOP下BOTTOM居中CENTERBOTH两端对齐
* @param horizontalLocation 单元格中内容水平居中center,left居左right居右both两端对齐
*/
public static void setCellLocation(XWPFTable xwpfTable, String verticalLoction, String horizontalLocation) {
List<XWPFTableRow> rows = xwpfTable.getRows();
for (XWPFTableRow row : rows) {
List<XWPFTableCell> cells = row.getTableCells();
for (XWPFTableCell cell : cells) {
CTTc cttc = cell.getCTTc();
CTP ctp = cttc.getPList().get(0);
CTPPr ctppr = ctp.getPPr();
if (ctppr == null) {
ctppr = ctp.addNewPPr();
}
CTJc ctjc = ctppr.getJc();
if (ctjc == null) {
ctjc = ctppr.addNewJc();
}
ctjc.setVal(STJc.Enum.forString(horizontalLocation)); //水平居中
cell.setVerticalAlignment(XWPFTableCell.XWPFVertAlign.valueOf(verticalLoction));//垂直居中
}
}
}
/**
* 设置表格位置
*
* @param xwpfTable
* @param location 整个表格居中center,left居左right居右both两端对齐
*/
public static void setTableLocation(XWPFTable xwpfTable, String location) {
CTTbl cttbl = xwpfTable.getCTTbl();
CTTblPr tblpr = cttbl.getTblPr() == null ? cttbl.addNewTblPr() : cttbl.getTblPr();
CTJc cTJc = tblpr.addNewJc();
cTJc.setVal(STJc.Enum.forString(location));
}
/**
* 设置图片居中
* @param xwpfParagraph
*/
public static void setImageCenter(XWPFParagraph xwpfParagraph){
//居中
xwpfParagraph.setAlignment(ParagraphAlignment.CENTER);
}
/**
*居左
*/
public static void AlignmentLeft(XWPFParagraph xwpfParagraph){
//
xwpfParagraph.setAlignment(ParagraphAlignment.LEFT);
}
/**
*居右
*/
public static void AlignmentRight(XWPFParagraph xwpfParagraph){
//
xwpfParagraph.setAlignment(ParagraphAlignment.RIGHT);
}
}

View File

@ -0,0 +1,61 @@
package com.actionsoft.apps.coe.pal.datamigration.util.htmltodocx;
import java.io.*;
import java.util.List;
/**
* @author baizp
* @Description:
* @date 2022/6/24 15:53
*/
public class Test {
public static void main(String[] args) {
//富文本转制度
String content = "富文本的内容";
StringBuffer sbf = new StringBuffer();
sbf.append("<html><body>");
sbf.append(content);
sbf.append("</body></html");
String contents = txt2String(sbf.toString());
byte[] result = HtmlToWord.resolveHtml(contents);
String plRid = UUIDGener.getUUID();
InputStream sbs = new ByteArrayInputStream(result);
new CreateMaps().updateMaps(plRid, sbs, name);
}
public static void outRichTextToDocx(String contents ,String outFilePath) {
String content = txt2String(contents);
InputStream inputStream=null;
OutputStream out = null;
try {
// 输入富文本内容返回字节数组
byte[] result = HtmlToWord.resolveHtml(content);
//输出文件
out = new FileOutputStream(outFilePath);
out.write(result);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 读取html文件的内容
*
* @param content 读取富文本
* @return 返回文件内容
*/
public static String txt2String(String content) {
StringBuilder result = new StringBuilder();
try {
// 构造一个BufferedReader类来读取富文本
result.append(System.lineSeparator()+content);
} catch (Exception e) {
e.printStackTrace();
}
return result.toString();
}
}

View File

@ -0,0 +1,38 @@
package com.actionsoft.apps.coe.pal.datamigration.util.htmltodocx;
/**
* @author baizp
* @Description:
* @date 2022/6/24 15:58
*/
public enum TitleFontEnum {
H1("h1", 24),
H2("h2", 22),
H3("h3", 12),
H7("h7",12)
;
private String title;
private Integer font;
public String getTitle() {
return title;
}
public Integer getFont() {
return font;
}
TitleFontEnum(String title, Integer font) {
this.title = title;
this.font = font;
}
public static Integer getFontByTitle(String title){
for (TitleFontEnum e : TitleFontEnum.values()) {
if (title.equals(e.getTitle())) {
return e.getFont();
}
}
return null;
}
}

View File

@ -0,0 +1,41 @@
package com.actionsoft.apps.coe.pal.datamigration.util.htmltodocx;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import java.util.List;
import java.util.Map;
/**
* @author baizp
* @Description:
* @date 2022/6/24 15:54
*/
public class XWPFDocumentUtil {
public static void wordInsertRitchText(XWPFDocument doc, List<Map<String, Object>> ritchtextMap) {
try {
int i = 0;
long beginTime = System.currentTimeMillis();
// 如果需要替换多份富文本通过Map来操作key:要替换的标记value要替换的富文本内容
for (Map<String, Object> mapList : ritchtextMap) {
for (Map.Entry<String, Object> entry : mapList.entrySet()) {
i++;
for (XWPFParagraph paragraph : doc.getParagraphs()) {
if (entry.getKey().equals(paragraph.getText().trim())) {
// 在标记处插入指定富文本内容
HtmlUtils.resolveHtml(entry.getValue().toString(), doc, paragraph);
if (i == ritchtextMap.size()) {
//当导出最后一个富文本时 删除需要替换的标记
doc.removeBodyElement(doc.getPosOfParagraph(paragraph));
}
break;
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}