代码拉取完成,页面将自动刷新
同步操作将从 ofdrw/ofdrw 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
package org.ofdrw.reader;
import org.junit.jupiter.api.Test;
import org.ofdrw.core.basicStructure.doc.CT_PageArea;
import org.ofdrw.core.basicStructure.pageObj.Page;
import org.ofdrw.core.basicType.ST_Box;
import org.ofdrw.reader.extractor.ExtractorFilter;
import org.ofdrw.reader.extractor.RegionTextExtractorFilter;
import java.awt.*;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
/**
* 内容抽取测试用例
*
* @since 2020-09-21 23:09:24
*/
class ContentExtractorTest {
private Path src = Paths.get("src/test/resources/helloworld.ofd");
/**
* 提取指定页面的文本
*/
@Test
void getPageContent() throws IOException {
try (OFDReader reader = new OFDReader(src)) {
ContentExtractor extractor = new ContentExtractor(reader);
List<String> pageContent = extractor.getPageContent(1);
System.out.println(pageContent);
assertEquals(pageContent.size(), 1);
assertEquals("你好呀,OFD Reader&Writer!", pageContent.get(0));
}
}
/**
* 提取矩形区域内的文字
*/
@Test
void extractByFilter() throws IOException {
try (OFDReader reader = new OFDReader("src/test/resources/keyword.ofd")) {
CT_PageArea area = reader.getPage(1).getArea();
System.out.println(area.getPhysicalBox());
Rectangle rectangle = new Rectangle(0, 0, 283, 120);
ExtractorFilter filter = new RegionTextExtractorFilter(rectangle);
ContentExtractor extractor = new ContentExtractor(reader, filter);
List<String> pageContent = extractor.getPageContent(1);
System.out.println(pageContent);
}
}
/**
* 提取所有页面出现的文本
*/
@Test
void extractAll() throws IOException {
try (OFDReader reader = new OFDReader(src)) {
ContentExtractor extractor = new ContentExtractor(reader);
List<String> pageContent = extractor.extractAll();
System.out.println(pageContent);
assertEquals(pageContent.size(), 1);
assertEquals("你好呀,OFD Reader&Writer!", pageContent.get(0));
}
}
/**
* 含有PageBlock包裹的对象的文字提取测试
*/
@Test
void extractAllPageBlock() throws IOException {
Path src = Paths.get("src/test/resources/helloworld_with_pageblock.ofd");
try (OFDReader reader = new OFDReader(src)) {
ContentExtractor extractor = new ContentExtractor(reader);
List<String> pageContent = extractor.extractAll();
System.out.println(pageContent);
assertEquals(pageContent.size(), 1);
assertEquals("你好呀,OFD Reader&Writer!", pageContent.get(0));
}
}
/**
* 页面内容迭代器,通过迭代器可以实现对每一页的内容处理
*/
@Test
void traverse() throws IOException {
try (OFDReader reader = new OFDReader(src)) {
ContentExtractor extractor = new ContentExtractor(reader);
extractor.traverse((pageNum, contents) -> {
// 在这里你可以做些你喜欢的事情
assertEquals(contents.size(), 1);
assertEquals("你好呀,OFD Reader&Writer!", contents.get(0));
});
}
}
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。