fix(katana): do not add word boundary if we detect a special character

This commit is contained in:
tretrauit 2024-01-17 02:15:23 +07:00
parent d5ab4169f0
commit 744b1a9065
2 changed files with 14 additions and 12 deletions

View File

@ -18,3 +18,8 @@ panic = 'unwind'
incremental = false incremental = false
codegen-units = 16 codegen-units = 16
rpath = false rpath = false
[profile.release-debug]
inherits = "release"
debug = true
strip = false

View File

@ -174,9 +174,6 @@ fn regexify_text(text: &String) -> String {
partial_match = true; partial_match = true;
} else { } else {
partial_match = false; partial_match = false;
}
if short_text {
} }
let mut regex = String::new(); let mut regex = String::new();
let mut ascii_text = String::new(); let mut ascii_text = String::new();
@ -186,14 +183,8 @@ fn regexify_text(text: &String) -> String {
// The character "0" is sometimes used in place of "O" in names // The character "0" is sometimes used in place of "O" in names
if ['0', 'O'].contains(&c) { if ['0', 'O'].contains(&c) {
ascii_text.push_str("[0O]"); ascii_text.push_str("[0O]");
} else if ['u', 'v'].contains(&c) && prev_chars.len() > 0 { } else if ['u', 'v'].contains(&c) {
let prev_char = prev_chars[prev_chars.len() - 1]; ascii_text.push_str("[uv]");
if ['u', 'v'].contains(&prev_char) {
ascii_text.pop();
ascii_text.push_str("[uv][uv]");
} else {
ascii_text.push(c);
}
} else if ['t'].contains(&c) { } else if ['t'].contains(&c) {
ascii_text.push_str("[ti]"); ascii_text.push_str("[ti]");
} else if ['I', 'l', '!', '1'].contains(&c) { } else if ['I', 'l', '!', '1'].contains(&c) {
@ -267,6 +258,7 @@ fn regexify_text(text: &String) -> String {
regex.push_str("(?=.*"); regex.push_str("(?=.*");
let processed_word = word.to_lowercase(); let processed_word = word.to_lowercase();
if partial_match && processed_word.len() > 4 { if partial_match && processed_word.len() > 4 {
// Remove first two and last two characters for "partial match"
if !processed_word[0..3].contains(|c: char| REGEX_CHARS.contains(&c)) if !processed_word[0..3].contains(|c: char| REGEX_CHARS.contains(&c))
&& !processed_word[word.len() - 2..word.len()] && !processed_word[word.len() - 2..word.len()]
.contains(|c: char| REGEX_CHARS.contains(&c)) .contains(|c: char| REGEX_CHARS.contains(&c))
@ -276,7 +268,12 @@ fn regexify_text(text: &String) -> String {
regex.push_str(&processed_word.as_str()); regex.push_str(&processed_word.as_str());
} }
} else { } else {
regex.push_str(format!("\\b{}\\b", &processed_word.as_str()).as_str()); // Do not push word boundary if the word contains special characters like "!"
if processed_word.contains(|c: char| c.is_alphanumeric()) {
regex.push_str(format!("\\b{}\\b", &processed_word.as_str()).as_str());
} else {
regex.push_str(format!("{}", &processed_word.as_str()).as_str());
}
} }
regex.push_str(")"); regex.push_str(")");
} }