From 91d17d5b9b575202019996b4bf111e088b88aa97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20T=C3=A2che?= Date: Mon, 18 Jan 2021 11:13:52 +0100 Subject: [PATCH 1/5] Adding Pattern constructor in TextNavigation and compiling pattern only once --- .../incubator/search/TextNavigation.java | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java index 9e90d27a64..61b39a738a 100644 --- a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java +++ b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java @@ -37,7 +37,7 @@ public class TextNavigation extends Navigation { private String mMatchedElementName = "text:p,text:h"; - private String mPatternText; + private Pattern mPattern; private OdfTextDocument mTextDocument; private TextSelection mCurrentSelectedItem; private String mCurrentText; @@ -51,10 +51,19 @@ public class TextNavigation extends Navigation { * @param doc the navigation scope */ public TextNavigation(String pattern, OdfTextDocument doc) { - this.mPatternText = pattern; - mTextDocument = doc; - mCurrentSelectedItem = null; - mbFinishFindInHeaderFooter = false; + this(Pattern.compile(pattern), doc); + } + + /** + * Construct TextNavigation with matched condition and navigation scope + * @param pattern the Pattern object to search with + * @param doc the navigation scope + */ + public TextNavigation(Pattern pattern, OdfTextDocument doc){ + this.mPattern = pattern; + mTextDocument = doc; + mCurrentSelectedItem = null; + mbFinishFindInHeaderFooter = false; } // the matched text might exist in header/footer @@ -70,8 +79,7 @@ private TextSelection findInHeaderFooter(TextSelection selected) { String content = textProcessor.getText(containerElement); int nextIndex = -1; - Pattern pattern = Pattern.compile(mPatternText); - Matcher matcher = pattern.matcher(content); + Matcher matcher = mPattern.matcher(content); // start from the end index of the selected item if (matcher.find(index + selected.getText().length())) { // here just consider \n\r\t occupy one char @@ -148,8 +156,7 @@ private TextSelection findnext(TextSelection selected) { String content = textProcessor.getText(containerElement); int nextIndex = -1; - Pattern pattern = Pattern.compile(mPatternText); - Matcher matcher = pattern.matcher(content); + Matcher matcher = mPattern.matcher(content); // start from the end index of the selected item if (matcher.find(index + selected.getText().length())) { // here just consider \n\r\t occupy one char @@ -203,9 +210,8 @@ public boolean match(Node element) { OdfWhitespaceProcessor textProcessor = new OdfWhitespaceProcessor(); String content = textProcessor.getText(element); - Pattern pattern = Pattern.compile(mPatternText); - Matcher matcher = pattern.matcher(content); - while (matcher.find()) { + Matcher matcher = mPattern.matcher(content); + if (matcher.find()) { // here just consider \n\r\t occupy one char mCurrentIndex = matcher.start(); int eIndex = matcher.end(); From 40b5be40afb8da0fb6ce3ed780c17a2ec5aea039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20T=C3=A2che?= Date: Mon, 18 Jan 2021 11:30:15 +0100 Subject: [PATCH 2/5] TextNavigation cleanup --- .../incubator/search/TextNavigation.java | 56 ++++++++----------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java index 61b39a738a..82611c9127 100644 --- a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java +++ b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java @@ -36,9 +36,9 @@ */ public class TextNavigation extends Navigation { - private String mMatchedElementName = "text:p,text:h"; - private Pattern mPattern; - private OdfTextDocument mTextDocument; + private static final String mMatchedElementName = "text:p,text:h"; + private final Pattern mPattern; + private final OdfTextDocument mTextDocument; private TextSelection mCurrentSelectedItem; private String mCurrentText; private int mCurrentIndex; @@ -73,20 +73,7 @@ private TextSelection findInHeaderFooter(TextSelection selected) { OdfElement element = null; if (selected != null) { - OdfElement containerElement = selected.getContainerElement(); - int index = selected.getIndex(); - OdfWhitespaceProcessor textProcessor = new OdfWhitespaceProcessor(); - String content = textProcessor.getText(containerElement); - - int nextIndex = -1; - Matcher matcher = mPattern.matcher(content); - // start from the end index of the selected item - if (matcher.find(index + selected.getText().length())) { - // here just consider \n\r\t occupy one char - nextIndex = matcher.start(); - int eIndex = matcher.end(); - mCurrentText = content.substring(nextIndex, eIndex); - } + int nextIndex = setCurrentTextAndGetIndex(selected); if (nextIndex != -1) { TextSelection item = new TextSelection(mCurrentText, selected.getContainerElement(), nextIndex); @@ -151,9 +138,26 @@ private TextSelection findnext(TextSelection selected) { } OdfElement containerElement = selected.getContainerElement(); + int nextIndex = setCurrentTextAndGetIndex(selected); + if (nextIndex != -1) { + TextSelection item = + new TextSelection(mCurrentText, containerElement, nextIndex); + return item; + } else { + OdfElement element = (OdfElement) getNextMatchElement(containerElement); + if (element != null) { + TextSelection item = new TextSelection(mCurrentText, element, mCurrentIndex); + return item; + } else { + return null; + } + } + } + + private int setCurrentTextAndGetIndex(TextSelection selected){ int index = selected.getIndex(); OdfWhitespaceProcessor textProcessor = new OdfWhitespaceProcessor(); - String content = textProcessor.getText(containerElement); + String content = textProcessor.getText(selected.getContainerElement()); int nextIndex = -1; Matcher matcher = mPattern.matcher(content); @@ -164,19 +168,7 @@ private TextSelection findnext(TextSelection selected) { int eIndex = matcher.end(); mCurrentText = content.substring(nextIndex, eIndex); } - if (nextIndex != -1) { - TextSelection item = - new TextSelection(mCurrentText, selected.getContainerElement(), nextIndex); - return item; - } else { - OdfElement element = (OdfElement) getNextMatchElement((Node) containerElement); - if (element != null) { - TextSelection item = new TextSelection(mCurrentText, element, mCurrentIndex); - return item; - } else { - return null; - } - } + return nextIndex; } /* (non-Javadoc) @@ -206,7 +198,7 @@ public boolean hasNext() { @Override public boolean match(Node element) { if (element instanceof OdfElement) { - if (mMatchedElementName.indexOf(element.getNodeName()) != -1) { + if (mMatchedElementName.contains(element.getNodeName())) { OdfWhitespaceProcessor textProcessor = new OdfWhitespaceProcessor(); String content = textProcessor.getText(element); From 6e7cf93863f96696e4bc9c82e31821c40cec7006 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20T=C3=A2che?= Date: Wed, 20 Jan 2021 10:28:43 +0100 Subject: [PATCH 3/5] Parent refresh prototype --- .../odfdom/incubator/search/Selection.java | 56 ++++++++++++++++--- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Selection.java b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Selection.java index 5a0676f829..378c7d2a38 100644 --- a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Selection.java +++ b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Selection.java @@ -20,7 +20,10 @@ import java.util.Hashtable; import java.util.Vector; + +import org.odftoolkit.odfdom.incubator.doc.text.OdfWhitespaceProcessor; import org.odftoolkit.odfdom.pkg.OdfElement; +import org.w3c.dom.Node; /** * Abstract class Selection describe one of the matched results The selection can be recognized by @@ -220,16 +223,51 @@ public static void unregisterItem(Selection item) { * @param offset the offset * @param positionIndex the mIndex of a certain position */ - public static synchronized void refresh( - OdfElement containerElement, int offset, int positionIndex) { - if (repository.containsKey(containerElement)) { - Vector selections = repository.get(containerElement); - for (int i = 0; i < selections.size(); i++) { - if (selections.get(i).getIndex() >= positionIndex) { - selections.get(i).refresh(offset); - } + public synchronized static void refresh(OdfElement containerElement, int offset, int positionIndex) { + refreshParent(containerElement, offset); + if (repository.containsKey(containerElement)) { + Vector selections = repository.get(containerElement); + for (Selection selection : selections) { + if (selection.getIndex() >= positionIndex) { + selection.refresh(offset); + } + } } - } + } + + private static void refreshParent(OdfElement containerElement, int offset) { + OdfElement parent = getOdfParent(containerElement); + while (parent != null) { + if (repository.containsKey(parent)) { + Vector selections = repository.get(parent); + for (Selection selection : selections) { + if (isAfter(selection, containerElement)) { + selection.refresh(offset); + } + } + } + parent = getOdfParent(parent); + } + } + + private static OdfElement getOdfParent(OdfElement element) { + Node parent = element.getParentNode(); + while (parent != null && !(parent instanceof OdfElement) && parent != parent.getParentNode()) { + parent = parent.getParentNode(); + } + return parent instanceof OdfElement ? (OdfElement) parent : null; + } + + private static boolean isAfter(Selection selection, OdfElement reference) { + //Assumes that reference is a child of selection.getElement + final OdfWhitespaceProcessor processor = new OdfWhitespaceProcessor(); + final String text = processor.getText(reference); + final int idx = processor.getText(selection.getElement()).indexOf(text); + if (idx == -1 || idx != processor.getText(selection.getElement()).lastIndexOf(text)) { + //TODO obviously don't do that, need to work with Text nodes perhaps + throw new IllegalStateException(); + } + return selection.getIndex() >= idx + text.length(); } private SelectionManager() {} From d629569317dbc3a10e0560fa17cf1a886c94ceca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20T=C3=A2che?= Date: Wed, 20 Jan 2021 10:59:53 +0100 Subject: [PATCH 4/5] Prototype for getNextMatchElement --- .../odfdom/incubator/search/Navigation.java | 12 ++++++++++++ .../odfdom/incubator/search/TextNavigation.java | 12 ++++++++++-- .../odfdom/incubator/search/TextStyleNavigation.java | 7 ++++++- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Navigation.java b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Navigation.java index 02f81b521f..f0058a56b9 100644 --- a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Navigation.java +++ b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Navigation.java @@ -20,6 +20,8 @@ import org.w3c.dom.Node; +import java.util.Set; + /** * Abstract class Navigation used to navigate the document and find the matched element by the user * defined conditions @@ -51,6 +53,13 @@ public abstract class Navigation { */ public abstract boolean match(Node element); + /** + * Checks if the given node can be matched by the navigation + * @param node the node + * @return true if the node is of interest for the navigation + */ + public abstract boolean isMatchingNode(Node node); + /** * get the next matched element in a whole dom tree * @@ -63,6 +72,9 @@ protected Node getNextMatchElement(Node startpoint) { Node currentpoint = startpoint; while ((matchedNode == null) && (currentpoint != null)) { + if (isMatchingNode(currentpoint) && currentpoint != startpoint){ + //TODO do something here + } Node sibling = currentpoint.getNextSibling(); if ((sibling != null) && (sibling.getNodeType() == Node.TEXT_NODE || sibling.getNodeType() == Node.ELEMENT_NODE) diff --git a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java index 82611c9127..b3bceab1cf 100644 --- a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java +++ b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java @@ -18,6 +18,9 @@ */ package org.odftoolkit.odfdom.incubator.search; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; @@ -36,7 +39,7 @@ */ public class TextNavigation extends Navigation { - private static final String mMatchedElementName = "text:p,text:h"; + private static final Set mMatchedElementNames = new HashSet<>(Arrays.asList("text:p", "text:h")); private final Pattern mPattern; private final OdfTextDocument mTextDocument; private TextSelection mCurrentSelectedItem; @@ -198,7 +201,7 @@ public boolean hasNext() { @Override public boolean match(Node element) { if (element instanceof OdfElement) { - if (mMatchedElementName.contains(element.getNodeName())) { + if (mMatchedElementNames.contains(element.getNodeName())) { OdfWhitespaceProcessor textProcessor = new OdfWhitespaceProcessor(); String content = textProcessor.getText(element); @@ -214,4 +217,9 @@ public boolean match(Node element) { } return false; } + + @Override + public boolean isMatchingNode(Node node) { + return mMatchedElementNames.contains(node.getNodeName()); + } } diff --git a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextStyleNavigation.java b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextStyleNavigation.java index ae5e086938..0397e3710a 100644 --- a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextStyleNavigation.java +++ b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextStyleNavigation.java @@ -155,7 +155,12 @@ public boolean match(Node element) { return match; } - private void getIndex(NodeList nodes, Node element) { + @Override + public boolean isMatchingNode(final Node node) { + return node instanceof OdfStylableElement; + } + + private void getIndex(NodeList nodes, Node element) { for (int i = 0; i < nodes.getLength(); i++) { Node node = nodes.item(i); if (node == element) { From f6c3dd6ee0be755011cae8769f663d10dc9c434c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20T=C3=A2che?= Date: Wed, 20 Jan 2021 13:23:51 +0100 Subject: [PATCH 5/5] Prototype implementation of parentMatches --- .../odfdom/incubator/search/Navigation.java | 47 +++++++++++-------- .../incubator/search/TextNavigation.java | 34 ++++++++++++-- .../incubator/search/TextStyleNavigation.java | 8 +++- 3 files changed, 65 insertions(+), 24 deletions(-) diff --git a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Navigation.java b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Navigation.java index f0058a56b9..dc65ded7b6 100644 --- a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Navigation.java +++ b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/Navigation.java @@ -72,31 +72,40 @@ protected Node getNextMatchElement(Node startpoint) { Node currentpoint = startpoint; while ((matchedNode == null) && (currentpoint != null)) { - if (isMatchingNode(currentpoint) && currentpoint != startpoint){ - //TODO do something here - } - Node sibling = currentpoint.getNextSibling(); - if ((sibling != null) - && (sibling.getNodeType() == Node.TEXT_NODE || sibling.getNodeType() == Node.ELEMENT_NODE) - && (match(sibling))) { - matchedNode = sibling; - } - while ((sibling != null) && (matchedNode == null)) { - if ((sibling.getNodeType() == Node.TEXT_NODE - || sibling.getNodeType() == Node.ELEMENT_NODE)) { - matchedNode = traverseTree(sibling); - } - sibling = sibling.getNextSibling(); - if (sibling != null && match(sibling)) { - matchedNode = sibling; - } + if (isMatchingNode(currentpoint) && currentpoint != startpoint && parentMatches(currentpoint, startpoint)){ + matchedNode = currentpoint; + } else { + Node sibling = currentpoint.getNextSibling(); + if ((sibling != null) + && (sibling.getNodeType() == Node.TEXT_NODE || sibling.getNodeType() == Node.ELEMENT_NODE) + && (match(sibling))) { + matchedNode = sibling; + } + while ((sibling != null) && (matchedNode == null)) { + if ((sibling.getNodeType() == Node.TEXT_NODE + || sibling.getNodeType() == Node.ELEMENT_NODE)) { + matchedNode = traverseTree(sibling); + } + sibling = sibling.getNextSibling(); + if (sibling != null && match(sibling)) { + matchedNode = sibling; + } + } + currentpoint = currentpoint.getParentNode(); } - currentpoint = currentpoint.getParentNode(); } return matchedNode; } + /** + * Checks that a parent node matches given the current node + * @param parent The parent node + * @param current The current node + * @return true if the parent matches, false otherwise + */ + protected abstract boolean parentMatches(final Node parent, final Node current); + /** * get the next matched element in a sub tree * diff --git a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java index b3bceab1cf..58c99a534a 100644 --- a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java +++ b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextNavigation.java @@ -218,8 +218,34 @@ public boolean match(Node element) { return false; } - @Override - public boolean isMatchingNode(Node node) { - return mMatchedElementNames.contains(node.getNodeName()); - } + @Override + public boolean isMatchingNode(Node node) { + return mMatchedElementNames.contains(node.getNodeName()); + } + + @Override + protected boolean parentMatches(final Node parent, final Node current) { + if (parent instanceof OdfElement) { + if (mMatchedElementNames.contains(parent.getNodeName())) { + OdfWhitespaceProcessor textProcessor = new OdfWhitespaceProcessor(); + String content = textProcessor.getText(parent); + String childContent = textProcessor.getText(current); + int idx = content.indexOf(childContent); + if (idx == -1 || content.lastIndexOf(childContent) != idx){ + //TODO + throw new IllegalStateException(); + } + + Matcher matcher = mPattern.matcher(content); + if (matcher.find(idx + childContent.length())) { + // here just consider \n\r\t occupy one char + mCurrentIndex = matcher.start(); + int eIndex = matcher.end(); + mCurrentText = content.substring(mCurrentIndex, eIndex); + return true; + } + } + } + return false; + } } diff --git a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextStyleNavigation.java b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextStyleNavigation.java index 0397e3710a..13164949cf 100644 --- a/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextStyleNavigation.java +++ b/odfdom/src/main/java/org/odftoolkit/odfdom/incubator/search/TextStyleNavigation.java @@ -157,7 +157,13 @@ public boolean match(Node element) { @Override public boolean isMatchingNode(final Node node) { - return node instanceof OdfStylableElement; + return node instanceof OdfStylableElement; + } + + @Override + protected boolean parentMatches(final Node parent, final Node current) { + //TODO? + return false; } private void getIndex(NodeList nodes, Node element) {