From f448fc6a13e0f8cd8819b90d834abe81f4255801 Mon Sep 17 00:00:00 2001 From: Henning von Bargen Date: Thu, 12 Jan 2023 16:09:21 +0100 Subject: [PATCH 1/3] Add special handling for Soft Hyphen (SHY) unicode symbol to DOCX emitter and PDF emitter. --- .../wpml/writer/AbstractWordXmlWriter.java | 14 +++++- .../report/engine/layout/pdf/hyphen/Word.java | 13 ++++++ .../engine/nLayout/area/impl/TextArea.java | 46 ++++++++++++++++--- .../nLayout/area/impl/TextCompositor.java | 16 ++++++- 4 files changed, 79 insertions(+), 10 deletions(-) diff --git a/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java b/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java index 307c1e60ecb..46415762a20 100644 --- a/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java +++ b/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java @@ -50,6 +50,8 @@ public abstract class AbstractWordXmlWriter { public static final int INDEX_NOTFOUND = -1; + protected final char SHY_CHAR = (char) 173; + protected int imageId = 75; protected int bookmarkId = 0; @@ -554,9 +556,17 @@ private void writeString(String txt, IStyle style) { start++; } end = start + 1; - continue; + } else if (ch == SHY_CHAR) { + // output previous text + writeText(txt.substring(start, end)); + writer.closeTag("w:t"); //$NON-NLS-1$ + writer.cdata(""); // $NON-LS-1$ + writer.openTag("w:t"); //$NON-NLS-1$ + start = end + 1; + end++; + } else { + end++; } - end++; } writeText(txt.substring(start)); diff --git a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java index a69bc3b17e1..e750812014a 100644 --- a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java +++ b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java @@ -18,6 +18,19 @@ public class Word { protected int end; protected String text; + private boolean keepLastSHY = true; + + /** + * Should a trailing SHY symbol be kept or omitted? + */ + public boolean isKeepLastSHY() { + return keepLastSHY; + } + + public void setKeepLastSHY(boolean keepLastSHY) { + this.keepLastSHY = keepLastSHY; + } + public Word(String text, int start, int end) { this.text = text; this.start = start; diff --git a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java index a7834baa665..63da0ecec42 100644 --- a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java +++ b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java @@ -28,6 +28,12 @@ public class TextArea extends AbstractArea implements ITextArea { protected String cachedText = null; + private final char SHY_CHAR = (char) 173; + + private boolean removeShy = true; // FIXME This should be configurable, depending on the emitter. + + private boolean keepLastSHY = true; + protected int runLevel; protected TextStyle style; @@ -115,12 +121,30 @@ public int getTextLength() { return textLength; } - private String calculateText() { + public String getRawText() { + return text.substring(offset, offset + textLength); + } + + private String calculateText(boolean removeShy) { if (blankLine || text == null) { return ""; - } else { - return text.substring(offset, offset + textLength); } + String shyText = text.substring(offset, offset + textLength); + if (removeShy) { + if (shyText.indexOf("Flammen") >= 0) { + System.out.println(shyText); + } + // Remove all SHY characters except a trailing one. + // FIXME: This is possibly worth performance tuning! + int indxShy = shyText.indexOf(SHY_CHAR); + for (; indxShy >= 0; indxShy = shyText.indexOf(SHY_CHAR)) { + String remaining = shyText.substring(indxShy + 1); + if (keepLastSHY && remaining.strip().length() == 0) + break; + shyText = shyText.substring(0, indxShy) + remaining; + } + } + return shyText; } public void addWord(int textLength, float wordWidth) { @@ -164,7 +188,7 @@ public TextStyle getStyle() { @Override public String getLogicalOrderText() { - return calculateText(); + return calculateText(removeShy); } /** @@ -177,9 +201,9 @@ public String getLogicalOrderText() { public String getText() { if (cachedText == null) { if ((runLevel & 1) == 0) { - cachedText = calculateText(); + cachedText = calculateText(removeShy); } else { - cachedText = flip(calculateText()); + cachedText = flip(calculateText(removeShy)); } } return cachedText; @@ -237,4 +261,14 @@ public void setWhiteSpaceNumber(int whiteSpaceNumber) { public boolean needClip() { return needClip; } + + public boolean isKeepLastSHY() { + return keepLastSHY; + } + + public void setKeepLastSHY(boolean keepLastSHY) { + this.keepLastSHY = keepLastSHY; + } + + } diff --git a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java index 3c75bcaaf3a..5c32898ba92 100644 --- a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java +++ b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java @@ -39,6 +39,8 @@ public class TextCompositor { private FontInfo fontInfo; private int runLevel; + private final String SHY_STRING = "\u00ad"; + /** offset relative to the text in the textContent. */ int offset = 0; @@ -157,8 +159,9 @@ private TextArea getNextTextArea(int maxLineWidth) { textArea.setMaxWidth(maxLineWidth); textArea.setWidth(0); addWordIntoTextArea(textArea, remainWord); + textArea.setKeepLastSHY(remainWord.isKeepLastSHY()); remainWord = null; - return textArea; + return textArea; // Why do we return here already? } // iterate the remainWords. if (null == remainWords || !remainWords.hasWord()) { @@ -250,13 +253,19 @@ private void addWordsIntoTextArea(TextArea textArea, IWordRecognizer words) { * */ private void addWordIntoTextArea(TextArea textArea, Word word) { + // get the word's size int textLength = word.getLength(); int wordWidth = getWordWidth(fontInfo, word); // append the letter spacing wordWidth += textStyle.getLetterSpacing() * textLength; int adjustWordSize = fontInfo.getItalicAdjust() + wordWidth; - if (textArea.hasSpace(adjustWordSize)) { + int hyphenWidth = 0; + if (word.getValue().endsWith(SHY_STRING)) { + hyphenWidth = getTextWidth(fontInfo, "-"); // We are using the minus for computing the hyphen size, because + // getTextWidth would return 0 width for SHY. + } + if (textArea.hasSpace(adjustWordSize + hyphenWidth)) { addWord(textArea, textLength, wordWidth); wordVestige = null; if (remainWords.hasWord()) { @@ -289,6 +298,9 @@ private void addWordIntoTextArea(TextArea textArea, Word word) { } else { wordVestige = null; remainWord = word; + if (remainWords.hasWord()) { + remainWord.setKeepLastSHY(false); + } } textArea.setLineBreak(true); hasLineBreak = true; From 787c67a88f3f249d59ee8dc0b6f0690859a27e67 Mon Sep 17 00:00:00 2001 From: Henning von Bargen Date: Thu, 12 Jan 2023 16:09:21 +0100 Subject: [PATCH 2/3] Add special handling for Soft Hyphen (SHY) unicode symbol to DOCX emitter and PDF emitter. --- .../wpml/writer/AbstractWordXmlWriter.java | 14 +++++- .../report/engine/layout/pdf/hyphen/Word.java | 13 ++++++ .../engine/nLayout/area/impl/TextArea.java | 43 ++++++++++++++++--- .../nLayout/area/impl/TextCompositor.java | 16 ++++++- 4 files changed, 76 insertions(+), 10 deletions(-) diff --git a/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java b/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java index 307c1e60ecb..46415762a20 100644 --- a/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java +++ b/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java @@ -50,6 +50,8 @@ public abstract class AbstractWordXmlWriter { public static final int INDEX_NOTFOUND = -1; + protected final char SHY_CHAR = (char) 173; + protected int imageId = 75; protected int bookmarkId = 0; @@ -554,9 +556,17 @@ private void writeString(String txt, IStyle style) { start++; } end = start + 1; - continue; + } else if (ch == SHY_CHAR) { + // output previous text + writeText(txt.substring(start, end)); + writer.closeTag("w:t"); //$NON-NLS-1$ + writer.cdata(""); // $NON-LS-1$ + writer.openTag("w:t"); //$NON-NLS-1$ + start = end + 1; + end++; + } else { + end++; } - end++; } writeText(txt.substring(start)); diff --git a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java index a69bc3b17e1..e750812014a 100644 --- a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java +++ b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java @@ -18,6 +18,19 @@ public class Word { protected int end; protected String text; + private boolean keepLastSHY = true; + + /** + * Should a trailing SHY symbol be kept or omitted? + */ + public boolean isKeepLastSHY() { + return keepLastSHY; + } + + public void setKeepLastSHY(boolean keepLastSHY) { + this.keepLastSHY = keepLastSHY; + } + public Word(String text, int start, int end) { this.text = text; this.start = start; diff --git a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java index a7834baa665..13c003eeefd 100644 --- a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java +++ b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java @@ -28,6 +28,12 @@ public class TextArea extends AbstractArea implements ITextArea { protected String cachedText = null; + private final char SHY_CHAR = (char) 173; + + private boolean removeShy = true; // FIXME This should be configurable, depending on the emitter. + + private boolean keepLastSHY = true; + protected int runLevel; protected TextStyle style; @@ -115,12 +121,27 @@ public int getTextLength() { return textLength; } - private String calculateText() { + public String getRawText() { + return text.substring(offset, offset + textLength); + } + + private String calculateText(boolean removeShy) { if (blankLine || text == null) { return ""; - } else { - return text.substring(offset, offset + textLength); } + String shyText = text.substring(offset, offset + textLength); + if (removeShy) { + // Remove all SHY characters except a trailing one. + // FIXME: This is possibly worth performance tuning! + int indxShy = shyText.indexOf(SHY_CHAR); + for (; indxShy >= 0; indxShy = shyText.indexOf(SHY_CHAR)) { + String remaining = shyText.substring(indxShy + 1); + if (keepLastSHY && remaining.strip().length() == 0) + break; + shyText = shyText.substring(0, indxShy) + remaining; + } + } + return shyText; } public void addWord(int textLength, float wordWidth) { @@ -164,7 +185,7 @@ public TextStyle getStyle() { @Override public String getLogicalOrderText() { - return calculateText(); + return calculateText(removeShy); } /** @@ -177,9 +198,9 @@ public String getLogicalOrderText() { public String getText() { if (cachedText == null) { if ((runLevel & 1) == 0) { - cachedText = calculateText(); + cachedText = calculateText(removeShy); } else { - cachedText = flip(calculateText()); + cachedText = flip(calculateText(removeShy)); } } return cachedText; @@ -237,4 +258,14 @@ public void setWhiteSpaceNumber(int whiteSpaceNumber) { public boolean needClip() { return needClip; } + + public boolean isKeepLastSHY() { + return keepLastSHY; + } + + public void setKeepLastSHY(boolean keepLastSHY) { + this.keepLastSHY = keepLastSHY; + } + + } diff --git a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java index 3c75bcaaf3a..5c32898ba92 100644 --- a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java +++ b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java @@ -39,6 +39,8 @@ public class TextCompositor { private FontInfo fontInfo; private int runLevel; + private final String SHY_STRING = "\u00ad"; + /** offset relative to the text in the textContent. */ int offset = 0; @@ -157,8 +159,9 @@ private TextArea getNextTextArea(int maxLineWidth) { textArea.setMaxWidth(maxLineWidth); textArea.setWidth(0); addWordIntoTextArea(textArea, remainWord); + textArea.setKeepLastSHY(remainWord.isKeepLastSHY()); remainWord = null; - return textArea; + return textArea; // Why do we return here already? } // iterate the remainWords. if (null == remainWords || !remainWords.hasWord()) { @@ -250,13 +253,19 @@ private void addWordsIntoTextArea(TextArea textArea, IWordRecognizer words) { * */ private void addWordIntoTextArea(TextArea textArea, Word word) { + // get the word's size int textLength = word.getLength(); int wordWidth = getWordWidth(fontInfo, word); // append the letter spacing wordWidth += textStyle.getLetterSpacing() * textLength; int adjustWordSize = fontInfo.getItalicAdjust() + wordWidth; - if (textArea.hasSpace(adjustWordSize)) { + int hyphenWidth = 0; + if (word.getValue().endsWith(SHY_STRING)) { + hyphenWidth = getTextWidth(fontInfo, "-"); // We are using the minus for computing the hyphen size, because + // getTextWidth would return 0 width for SHY. + } + if (textArea.hasSpace(adjustWordSize + hyphenWidth)) { addWord(textArea, textLength, wordWidth); wordVestige = null; if (remainWords.hasWord()) { @@ -289,6 +298,9 @@ private void addWordIntoTextArea(TextArea textArea, Word word) { } else { wordVestige = null; remainWord = word; + if (remainWords.hasWord()) { + remainWord.setKeepLastSHY(false); + } } textArea.setLineBreak(true); hasLineBreak = true; From 30d5a20f6a8f193b63383cb6edb2d40731aab937 Mon Sep 17 00:00:00 2001 From: Henning von Bargen Date: Mon, 16 Jan 2023 17:39:43 +0100 Subject: [PATCH 3/3] Improve readibility and documentation for SHY support --- .../wpml/writer/AbstractWordXmlWriter.java | 39 ++++-- .../report/engine/layout/pdf/hyphen/Word.java | 57 ++++++++- .../engine/nLayout/area/impl/TextArea.java | 119 ++++++++++++++---- .../nLayout/area/impl/TextCompositor.java | 44 +++++-- 4 files changed, 208 insertions(+), 51 deletions(-) diff --git a/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java b/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java index 46415762a20..adbb0197bd8 100644 --- a/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java +++ b/engine/org.eclipse.birt.report.engine.emitter.wpml/src/org/eclipse/birt/report/engine/emitter/wpml/writer/AbstractWordXmlWriter.java @@ -32,6 +32,10 @@ import org.eclipse.birt.report.engine.layout.pdf.util.PropertyUtil; import org.w3c.dom.css.CSSValue; +/** + * This is used for writing WordML by the DocxEmitter and by the old Word 2003 + * emitter. + */ public abstract class AbstractWordXmlWriter { protected XMLWriter writer; @@ -50,7 +54,29 @@ public abstract class AbstractWordXmlWriter { public static final int INDEX_NOTFOUND = -1; - protected final char SHY_CHAR = (char) 173; + /** + *

+ * The soft hyphen Unicode symbol is intended to be visible only when a line + * break occurs there. + *

+ *

+ * This hiding logic of the SHY symbol needs special attention in many emitters. + *

+ *

+ * SOFT HYPHEN is often abbreviated as SHY, which also is very descriptive, + * because this symbol is hiding inside the surrounding words most of the time. + *

+ *

+ * In most fonts, its width is defined as zero, which of cause is correct only + * if it is hidden. If it is rendered, it looks similar to the minus sign. + *

+ *

+ * The Unicode standard also defines a HYPHEN symbol, which should look the same + * as the SHY symbol, but doesn't have the hiding logic. However, the HYPHEN + * symbol is rarely defined in TTF fonts. + *

+ */ + public static final char SOFT_HYPHEN = '\u00ad'; protected int imageId = 75; @@ -556,8 +582,8 @@ private void writeString(String txt, IStyle style) { start++; } end = start + 1; - } else if (ch == SHY_CHAR) { - // output previous text + } else if (ch == SOFT_HYPHEN) { + // Output a special WordML tag for the SHY symbol. writeText(txt.substring(start, end)); writer.closeTag("w:t"); //$NON-NLS-1$ writer.cdata(""); // $NON-LS-1$ @@ -1012,11 +1038,8 @@ public void writeTextInRun(int type, String txt, IStyle style, String fontFamily * @param cellWidth the width of the container in points * @return String with truncated words that surpasses the cell width */ - public String cropOverflowString(String text, IStyle style, String fontFamily, int cellWidth) {// TODO: retrieve - // font type and - // replace plain - // with - // corresponding + public String cropOverflowString(String text, IStyle style, String fontFamily, int cellWidth) { + // TODO: retrieve font type and replace plain with corresponding Font font = new Font(fontFamily, Font.PLAIN, WordUtil .parseFontSize(PropertyUtil.getDimensionValue(style.getProperty(StyleConstants.STYLE_FONT_SIZE)))); Canvas c = new Canvas(); diff --git a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java index e750812014a..cfbbc898599 100644 --- a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java +++ b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/layout/pdf/hyphen/Word.java @@ -10,27 +10,72 @@ * * Contributors: * Actuate Corporation - initial API and implementation + * Henning von Bargen - Added at least a bit of JavaDoc, added SOFT HYPHEN support. ***********************************************************************/ + package org.eclipse.birt.report.engine.layout.pdf.hyphen; +/** + *

+ * Despite its name, this describes a fragment of a word of text. + *

+ *

+ * If the word does not contain possible hyphenation / line-breaking points, + * then it is a whole word. But if the word contains Unicode MINUS or HYPHEN or + * SOFT HYPHEN symbols, then the {@link BreakIterator} splits this whole word + * into more than one Word instances. + *

+ *

+ * For example, "extra-ordinary" will be split into two Word instances "extra-" + * and "ordinary". + *

+ */ public class Word { protected int start; protected int end; protected String text; - private boolean keepLastSHY = true; + private boolean keepTrailingSoftHyphen = true; /** - * Should a trailing SHY symbol be kept or omitted? + * Should a trailing Unicode SOFT HYPHEN (SHY) symbol be kept or omitted? + * + * @return true if a trailing soft hyphen should be kept, false if it should be + * omitted. + * + * @since 4.13 */ - public boolean isKeepLastSHY() { - return keepLastSHY; + public boolean isKeepTrailingSoftHyphen() { + return keepTrailingSoftHyphen; } - public void setKeepLastSHY(boolean keepLastSHY) { - this.keepLastSHY = keepLastSHY; + /** + * Set whether a trailing Unicode SOFT HYPHEN (SHY) symbol should be kept or + * omitted. The default value is true, so this is usually only called + * to omit it. + * + * @apiNote This is not really used inside the Word class. But a Word object is + * used to transmit the information piggyback to the + * {@link org.eclipse.birt.report.engine.nLayout.area.impl.TextArea} + * object, where the information is needed. + * + * @param keepTrailingSoftHyphen whether to keep the last soft hyphen or not. + * + * @since 4.13 + */ + public void setKeepTrailingSoftHyphen(boolean keepTrailingSoftHyphen) { + this.keepTrailingSoftHyphen = keepTrailingSoftHyphen; } + /** + * Create a Word instance as a substring of a given text. + * + * @see String#substring(int,int) + * + * @param text Text + * @param start start index of the substring + * @param end end index of the substring (exclusive). + */ public Word(String text, int start, int end) { this.text = text; this.start = start; diff --git a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java index 13c003eeefd..5a3e8ff8d6a 100644 --- a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java +++ b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextArea.java @@ -21,18 +21,63 @@ import org.eclipse.birt.report.engine.nLayout.area.style.TextStyle; import com.ibm.icu.text.Bidi; - +import com.ibm.icu.text.BreakIterator; + +/** + *

+ * An abstract representation of a line of styled text (eg. with a font and font + * size specified etc.) or a fragment thereof. + *

+ */ public class TextArea extends AbstractArea implements ITextArea { protected String text; protected String cachedText = null; - private final char SHY_CHAR = (char) 173; + /** + *

+ * The soft hyphen Unicode symbol. + *

+ *

+ * It needs special handling, because it should only be visible when a + * line-break occurs there and hidden otherwise. + *

+ *

+ * See + * {@link org.eclipse.birt.report.engine.emitter.wpml.writer.AbstractWordXmlWriter#SOFT_HYPHEN} + * for more detail. + *

+ */ + private static final char SOFT_HYPHEN = '\u00ad'; - private boolean removeShy = true; // FIXME This should be configurable, depending on the emitter. + /** + *

+ * This controls if Unicode SOFT HYPHEN symbols in a text should be removed from + * the output. The default value is true - remove soft hyphens. + *

+ *

+ * By setting the system property org.eclipse.birt.softhyphen.remove to + * false, the old, incorrect behavior of keeping them can be restored. + *

+ */ + private boolean removeSoftHyphens = "true".equals(System.getProperty("org.eclipse.birt.softhyphen.remove", "true")); // $NON-NLS-1 - private boolean keepLastSHY = true; + /** + *

+ * This controls if a Unicode SOFT HYPHEN at the end of the text area should be + * kept in the output or removed with the other SOFT HYPHENs when + * {@link #removeSoftHyphens} is set. + *

+ *

+ * Note that sometimes the same visible line of text can consist of more than + * one TextAreas. The text content of these text areas are the result of a + * {@link BreakIterator}. A pre-hyphenated word, e.g. "extra\u00adordinary" will + * be split by the {@link BreakIterator} into two + * {@link org.eclipse.birt.report.engine.layout.pdf.hyphen.Word "words"} can + * result in two TextAreas with the texts " + */ + private boolean keepTrailingSoftHyphen = true; protected int runLevel; @@ -121,27 +166,35 @@ public int getTextLength() { return textLength; } - public String getRawText() { - return text.substring(offset, offset + textLength); - } - - private String calculateText(boolean removeShy) { + /** + *

+ * Get a string with the text this TextArea represents. + *

+ *

+ * SOFT HYPHEN Unicode symbols inside the text are usually removed (depending on + * {@link #removeSoftHyphens}), except a trailing one (depending on + * {@link #keepTrailingSoftHyphen}). + *

+ * + * @return The unformatted text. + */ + private String calculateText() { if (blankLine || text == null) { return ""; } - String shyText = text.substring(offset, offset + textLength); - if (removeShy) { - // Remove all SHY characters except a trailing one. + String textResult = text.substring(offset, offset + textLength); + if (removeSoftHyphens) { + // Remove all Unicode SOFT HYPHEN symbols except a trailing one. // FIXME: This is possibly worth performance tuning! - int indxShy = shyText.indexOf(SHY_CHAR); - for (; indxShy >= 0; indxShy = shyText.indexOf(SHY_CHAR)) { - String remaining = shyText.substring(indxShy + 1); - if (keepLastSHY && remaining.strip().length() == 0) + int indxSoftHyphen = textResult.indexOf(SOFT_HYPHEN); + for (; indxSoftHyphen >= 0; indxSoftHyphen = textResult.indexOf(SOFT_HYPHEN)) { + String remaining = textResult.substring(indxSoftHyphen + 1); + if (keepTrailingSoftHyphen && remaining.strip().length() == 0) break; - shyText = shyText.substring(0, indxShy) + remaining; + textResult = textResult.substring(0, indxSoftHyphen) + remaining; } } - return shyText; + return textResult; } public void addWord(int textLength, float wordWidth) { @@ -185,7 +238,7 @@ public TextStyle getStyle() { @Override public String getLogicalOrderText() { - return calculateText(removeShy); + return calculateText(); } /** @@ -198,9 +251,9 @@ public String getLogicalOrderText() { public String getText() { if (cachedText == null) { if ((runLevel & 1) == 0) { - cachedText = calculateText(removeShy); + cachedText = calculateText(); } else { - cachedText = flip(calculateText(removeShy)); + cachedText = flip(calculateText()); } } return cachedText; @@ -259,12 +312,28 @@ public boolean needClip() { return needClip; } - public boolean isKeepLastSHY() { - return keepLastSHY; + /** + * Whether a Unicode SOFT HYPHEN at the end of the text area should be kept in + * the output or removed. + * + * @see #keepTrailingSoftHyphen + * + * @return true if the soft hyphen shall be kept. + */ + public boolean isKeepTrailingSoftHyphen() { + return keepTrailingSoftHyphen; } - public void setKeepLastSHY(boolean keepLastSHY) { - this.keepLastSHY = keepLastSHY; + /** + * Control whether a Unicode SOFT HYPHEN at the end of the text area should be + * kept in the output or removed. + * + * @see #keepTrailingSoftHyphen + * + * @param keepTrailingSoftHyphen true if the soft hyphen shall be kept. + */ + public void setKeepTrailingSoftHyphen(boolean keepTrailingSoftHyphen) { + this.keepTrailingSoftHyphen = keepTrailingSoftHyphen; } diff --git a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java index 5c32898ba92..331d944fbe4 100644 --- a/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java +++ b/engine/org.eclipse.birt.report.engine/src/org/eclipse/birt/report/engine/nLayout/area/impl/TextCompositor.java @@ -39,7 +39,10 @@ public class TextCompositor { private FontInfo fontInfo; private int runLevel; - private final String SHY_STRING = "\u00ad"; + /** + * @see TextArea#isKeepTrailingSoftHyphen() + */ + private static final String SOFT_HYPHEN = "\u00ad"; /** offset relative to the text in the textContent. */ int offset = 0; @@ -50,7 +53,7 @@ public class TextCompositor { private IWordRecognizer remainWords; /** the remain word */ private Word remainWord; - /** the remain characters in current word after hyphenation */ + /** the remain characters in current word after word-breaking / hyphenation */ private Word wordVestige; /** @@ -159,9 +162,18 @@ private TextArea getNextTextArea(int maxLineWidth) { textArea.setMaxWidth(maxLineWidth); textArea.setWidth(0); addWordIntoTextArea(textArea, remainWord); - textArea.setKeepLastSHY(remainWord.isKeepLastSHY()); + textArea.setKeepTrailingSoftHyphen(remainWord.isKeepTrailingSoftHyphen()); remainWord = null; - return textArea; // Why do we return here already? + return textArea; + // FIXME: Why do we return here already? + // This return here in a way contradicts the idea of the algorithm, which is to + // stuff as many words as possible into a TextArea, + // because it results in a (e.g. PDF) text line consisting of two (more than + // one) TextAreas A and B, where A is a TextArea with exactly one Word (= word + // fragment) that did not fit into the previous line, and B contains the next + // Words. + // This results in slightly larger PDF files than necessary and it and makes it + // slightly harder for accessibility software to understand the file. } // iterate the remainWords. if (null == remainWords || !remainWords.hasWord()) { @@ -261,9 +273,10 @@ private void addWordIntoTextArea(TextArea textArea, Word word) { wordWidth += textStyle.getLetterSpacing() * textLength; int adjustWordSize = fontInfo.getItalicAdjust() + wordWidth; int hyphenWidth = 0; - if (word.getValue().endsWith(SHY_STRING)) { - hyphenWidth = getTextWidth(fontInfo, "-"); // We are using the minus for computing the hyphen size, because - // getTextWidth would return 0 width for SHY. + if (word.getValue().endsWith(SOFT_HYPHEN)) { + hyphenWidth = getTextWidth(fontInfo, "-"); + // We are using the Unicode MINUS here for computing the hyphen dash size, + // because getTextWidth for the SOFT HYPHEN would return 0 width. } if (textArea.hasSpace(adjustWordSize + hyphenWidth)) { addWord(textArea, textLength, wordWidth); @@ -299,7 +312,16 @@ private void addWordIntoTextArea(TextArea textArea, Word word) { wordVestige = null; remainWord = word; if (remainWords.hasWord()) { - remainWord.setKeepLastSHY(false); + // The soft hyphen symbol should be omitted except for the last word in the + // line. + // Please Note: This condition is not quite correct, but OK for real-world data. + // If the soft hyphen is inside a word, then the breakIterator has at least + // one more "word", which is actually the (part of) the rest of this word. + // But if someone comes up with a word that *ends* with a soft-hyphen, + // then there might be no more remaining "words", so this results in + // hiding the soft hyphen. However, a word ending with a soft-hyphen + // doesn't make sense at all, so we don't care about this. + remainWord.setKeepTrailingSoftHyphen(false); } } textArea.setLineBreak(true); @@ -322,10 +344,8 @@ private void doWordBreak(String str, TextArea area) { if (endHyphenIndex == 0 && area.getWidth() == 0) { addWordVestige(area, 1, getTextWidth(fi, wb.getHyphenText(0, 1)), str.substring(1)); } else { - addWordVestige(area, endHyphenIndex, - getTextWidth(fi, wb.getHyphenText(0, endHyphenIndex)) - + textStyle.getLetterSpacing() * (endHyphenIndex - 1), - str.substring(endHyphenIndex)); + addWordVestige(area, endHyphenIndex, getTextWidth(fi, wb.getHyphenText(0, endHyphenIndex)) + + textStyle.getLetterSpacing() * (endHyphenIndex - 1), str.substring(endHyphenIndex)); } }