public class RemoveMinorityScriptsTextFilter extends java.lang.Object implements TextFilter
| Modifier and Type | Field and Description |
|---|---|
private double |
threshold |
| Modifier | Constructor and Description |
|---|---|
private |
RemoveMinorityScriptsTextFilter(double threshold) |
| Modifier and Type | Method and Description |
|---|---|
private java.util.Map<java.lang.Character.UnicodeScript,java.lang.Long> |
countByScript(java.lang.CharSequence text) |
java.lang.String |
filter(java.lang.CharSequence text) |
private long |
findMost(java.util.Map<java.lang.Character.UnicodeScript,java.lang.Long> counts) |
static RemoveMinorityScriptsTextFilter |
forThreshold(double threshold)
If a script has less than this percent of content compared to the most used one, its text is removed.
|
private void |
increment(java.util.Map<java.lang.Character.UnicodeScript,java.lang.Long> counter,
java.lang.Character.UnicodeScript unicodeScript) |
private java.lang.String |
remove(java.lang.CharSequence text,
java.util.Set<java.lang.Character.UnicodeScript> toRemove) |
private RemoveMinorityScriptsTextFilter(double threshold)
public static RemoveMinorityScriptsTextFilter forThreshold(double threshold)
threshold - 0-1, suggested value is 0.3. If smaller then removed, equal remains.public java.lang.String filter(java.lang.CharSequence text)
filter in interface TextFilterprivate java.lang.String remove(java.lang.CharSequence text,
java.util.Set<java.lang.Character.UnicodeScript> toRemove)
private long findMost(java.util.Map<java.lang.Character.UnicodeScript,java.lang.Long> counts)
private java.util.Map<java.lang.Character.UnicodeScript,java.lang.Long> countByScript(java.lang.CharSequence text)
private void increment(java.util.Map<java.lang.Character.UnicodeScript,java.lang.Long> counter,
java.lang.Character.UnicodeScript unicodeScript)