From 8e2a62942702f5b8b3a91095d14a56d40e776e72 Mon Sep 17 00:00:00 2001
From: tretrauit <tretrauit@gmail.com>
Date: Sat, 10 Feb 2024 21:44:44 +0700
Subject: [PATCH] fix(tesseract/regex): fix wrong length of string because of
 unicode

Yeah.
---
 swordfish/src/tesseract/utils.rs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/swordfish/src/tesseract/utils.rs b/swordfish/src/tesseract/utils.rs
index 10798a5..eda0ed4 100644
--- a/swordfish/src/tesseract/utils.rs
+++ b/swordfish/src/tesseract/utils.rs
@@ -263,11 +263,15 @@ pub fn regexify_text(text: &String) -> String {
             if !p_word_unicode[0..3]
                 .iter()
                 .any(|c: &&str| REGEX_STRINGS.contains(&c))
-                && !p_word_unicode[word.len() - 2..word.len()]
+                && !p_word_unicode[p_word_unicode.len() - 2..p_word_unicode.len()]
                     .iter()
                     .any(|c: &&str| REGEX_STRINGS.contains(&c))
             {
-                regex.push_str(p_word_unicode[2..word.len() - 2].concat().as_str());
+                regex.push_str(
+                    p_word_unicode[2..p_word_unicode.len() - 2]
+                        .concat()
+                        .as_str(),
+                );
             } else {
                 regex.push_str(&processed_word.as_str());
             }
@@ -277,7 +281,7 @@ pub fn regexify_text(text: &String) -> String {
             if processed_word.chars().all(|c| c.is_ascii_alphanumeric()) {
                 regex.push_str(format!("\\b{}\\b", &processed_word.as_str()).as_str());
             } else {
-                regex.push_str(format!("{}", &processed_word.as_str()).as_str());
+                regex.push_str(&processed_word.as_str());
             }
         }
         regex.push_str(")");