chore(katana): move regex building to swordfish

Welp, I can add my dirty workaround now
2024-01-10 12:31:29 +07:00 · 2024-01-10 12:31:29 +07:00 · 952467d4b1
commit 952467d4b1
parent 5accadf277
2 changed files with 41 additions and 40 deletions
--- a/swordfish-common/src/database/katana.rs
+++ b/swordfish-common/src/database/katana.rs
@ -41,43 +41,13 @@ pub async fn query_character(name: &String, series: &String) -> Option<Character
 }

 pub async fn query_character_regex(name: &String, series: &String) -> Option<Character> {
-    let mut name_regex = String::new();
-    let mut ascii_name = String::new();
-    for c in name.chars() {
-        if c.is_ascii_alphanumeric() {
-            ascii_name.push(c);
-        } else {
-            ascii_name.push(' ');
-        }
-    }
-    ascii_name.split_whitespace().for_each(|word| {
-        name_regex.push_str("(?=.*\\b");
-        name_regex.push_str(word.to_lowercase().as_str());
-        name_regex.push_str("\\b)");
-    });
-    name_regex.push_str(".+");
-    let mut series_regex = String::new();
-    let mut ascii_series = String::new();
-    for c in series.chars() {
-        if c.is_ascii_alphanumeric() {
-            ascii_series.push(c);
-        } else {
-            ascii_series.push(' ');
-        }
-    }
-    ascii_series.split_whitespace().for_each(|word| {
-        series_regex.push_str("(?=.*\\b");
-        series_regex.push_str(word.to_lowercase().as_str());
-        series_regex.push_str("\\b)");
-    });
-    series_regex.push_str(".+");
    KATANA
        .get()
        .unwrap()
        .find_one(
            mongodb::bson::doc! {
-                "name": {"$regex": name_regex, "$options" : "i"},
-                "series": {"$regex": series_regex, "$options" : "i"}
+                "name": {"$regex": name, "$options" : "i"},
+                "series": {"$regex": series, "$options" : "i"}
            },
            None,
        )
--- a/swordfish/src/katana.rs
+++ b/swordfish/src/katana.rs
@ -13,7 +13,7 @@ use swordfish_common::{error, trace, warn};
 use tokio::task;
 use tokio::time::Instant;

-const ALLOWED_CHARS: [char; 10] = [' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\''];
+const ALLOWED_CHARS: [char; 11] = [' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\'', '@'];
 const CARD_NAME_X_OFFSET: u32 = 22;
 const CARD_NAME_Y_OFFSET: u32 = 28;
 const CARD_NAME_WIDTH: u32 = 202 - CARD_NAME_X_OFFSET;
@ -71,9 +71,7 @@ fn fix_tesseract_string(text: &mut String) {
    // This is usually the left bottom corner of the card
    trace!("Text: {}", text);
    if text.ends_with(r##"“NO"##) {
-        for _ in 0..3 {
-            text.pop();
-        }
+        text.drain(text.len() - 4..text.len());
    }
    // Workaround for "\n." (and others in the future)
    let text_clone = text.clone();
@ -101,8 +99,7 @@ fn fix_tesseract_string(text: &mut String) {
            trace!("Prev prev char: {}", prev_prev_char);
            if prev_prev_char == 'o' {
                rm_prev = -1;
-                text.remove(i - 2);
-                text.remove(i - 2);
+                text.drain(i - 3..i - 1);
                text.insert_str(i - 2, "yo!")
            }
        }
@ -167,6 +164,30 @@ fn fix_tesseract_string(text: &mut String) {
    trace!("Text (final): {}", text);
 }

+fn regexify_text(text: &String) -> String {
+    let mut regex = String::new();
+    let mut ascii_text = String::new();
+    for c in text.chars() {
+        // Here comes the workaround...
+        // The character "0" is sometimes used in place of "O" in names
+        if ['0', 'O'].contains(&c) {
+            ascii_text.push_str("[0O]");
+        } else if c.is_ascii_alphanumeric() {
+            ascii_text.push(c);
+        } else {
+            ascii_text.push(' ');
+        }
+    }
+    ascii_text.split_whitespace().for_each(|word| {
+        regex.push_str("(?=.*\\b");
+        regex.push_str(word.to_lowercase().as_str());
+        regex.push_str("\\b)");
+    });
+    regex.push_str(".+");
+    trace!("Regex: {}", regex);
+    regex
+}
+
 fn save_image_if_trace(img: &DynamicImage, path: &str) {
    let log_lvl = CONFIG.get().unwrap().log.level.as_str();
    if log_lvl == "trace" {
@ -282,7 +303,12 @@ pub async fn analyze_card_libtesseract(card: image::DynamicImage, count: u32) ->
        Some(c) => {
            character = c;
        }
-        None => match db::query_character_regex(&character.name, &character.series).await {
+        None => match db::query_character_regex(
+            &regexify_text(&character.name),
+            &regexify_text(&character.series),
+        )
+        .await
+        {
            Some(c) => {
                character = c;
            }
@ -349,7 +375,12 @@ pub async fn analyze_card_subprocess(card: image::DynamicImage, count: u32) -> D
        Some(c) => {
            character = c;
        }
-        None => match db::query_character_regex(&character.name, &character.series).await {
+        None => match db::query_character_regex(
+            &regexify_text(&character.name),
+            &regexify_text(&character.series),
+        )
+        .await
+        {
            Some(c) => {
                character = c;
            }