Skip to content

Commit

Permalink
Merge pull request #73 from airslate-oss/ASP-28637-fix-nested-tables
Browse files Browse the repository at this point in the history
[ASP-28637] fixed XWPFWordExtractor extracting nested table text
  • Loading branch information
alekseytatarynov authored Dec 5, 2022
2 parents 19ce327 + 131674a commit 813a4cd
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 5 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ subprojects {
apply plugin: 'de.thetaphi.forbiddenapis'
apply plugin: 'com.github.spotbugs'

version = '5.2.3-AIRSLATE-26'
version = '5.2.3-AIRSLATE-26.2'
ext {
bouncyCastleVersion = '1.70'
commonsCodecVersion = '1.15'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -541,13 +541,19 @@ private void appendBodyElementText(StringBuilder text, IBodyElement e, boolean i
} else if (e instanceof XWPFTable) {
XWPFTable eTable = (XWPFTable) e;
for (XWPFTableRow row : eTable.getRows()) {
for (XWPFTableCell cell : row.getTableCells()) {
List<XWPFTableCell> tableCells = row.getTableCells();
for (int i = 0; i < tableCells.size(); i++) {
XWPFTableCell cell = tableCells.get(i);
List<IBodyElement> localBodyElements = cell.getBodyElements();
for (int i = 0; i < localBodyElements.size(); i++) {
boolean localIsLast = (i == localBodyElements.size() - 1);
appendBodyElementText(text, localBodyElements.get(i), localIsLast);
for (int j = 0; j < localBodyElements.size(); j++) {
boolean localIsLast = (j == localBodyElements.size() - 1);
appendBodyElementText(text, localBodyElements.get(j), localIsLast);
}
if (i < tableCells.size() - 1) {
text.append("\t");
}
}
text.append('\n');
}

if (!isLast) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,14 @@ Licensed to the Apache Software Foundation (ASF) under one or more
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.IOException;
import java.io.InputStream;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.poi.extractor.POITextExtractor;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.StringUtil;
import org.apache.poi.xwpf.XWPFTestDataSamples;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
Expand Down Expand Up @@ -478,4 +482,41 @@ void bug55966() throws IOException {
assertEquals(expected, actual);
}
}

@Test
void testExtractorAddBreaksForNestedTable() throws Exception {
try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("NestedTables.docx")) {
String expected = "Table 1\n" +
"{{table(a)}}\t\n" +
"{{h}}Name\t\n" +
"{{name}}\t{{table(b)}}\t\n" +
"{{h}}Product\tFillableField\n" +
"{{Product}}\t{{t:t;r:y;l:\"text_field_1\";}} \n" +
"{{endtable}}\t\n" +
"\n" +
"\n" +
"{{endtable}}\t\n" +
"\n" +
"\n" +
"\n" +
"Table 2\n" +
"{{table(t1)}}\t\n" +
"{{table(t2)}}\n" +
"{{name}}\n" +
"{{endtable}}\n" +
"\n" +
"\t{{name}}\n" +
"\t\n" +
"{{endtable}}\t\n" +
"\n" +
"\n";

XWPFWordExtractor extractedDoc = new XWPFWordExtractor(doc);

String actual = extractedDoc.getText();

extractedDoc.close();
assertEquals(expected, actual);
}
}
}
Binary file added test-data/document/NestedTables.docx
Binary file not shown.

0 comments on commit 813a4cd

Please sign in to comment.