diff --git a/swordfish-common/src/database/katana.rs b/swordfish-common/src/database/katana.rs index 51d494e..39b502b 100644 --- a/swordfish-common/src/database/katana.rs +++ b/swordfish-common/src/database/katana.rs @@ -41,43 +41,13 @@ pub async fn query_character(name: &String, series: &String) -> Option Option { - let mut name_regex = String::new(); - let mut ascii_name = String::new(); - for c in name.chars() { - if c.is_ascii_alphanumeric() { - ascii_name.push(c); - } else { - ascii_name.push(' '); - } - } - ascii_name.split_whitespace().for_each(|word| { - name_regex.push_str("(?=.*\\b"); - name_regex.push_str(word.to_lowercase().as_str()); - name_regex.push_str("\\b)"); - }); - name_regex.push_str(".+"); - let mut series_regex = String::new(); - let mut ascii_series = String::new(); - for c in series.chars() { - if c.is_ascii_alphanumeric() { - ascii_series.push(c); - } else { - ascii_series.push(' '); - } - } - ascii_series.split_whitespace().for_each(|word| { - series_regex.push_str("(?=.*\\b"); - series_regex.push_str(word.to_lowercase().as_str()); - series_regex.push_str("\\b)"); - }); - series_regex.push_str(".+"); KATANA .get() .unwrap() .find_one( mongodb::bson::doc! { - "name": {"$regex": name_regex, "$options" : "i"}, - "series": {"$regex": series_regex, "$options" : "i"} + "name": {"$regex": name, "$options" : "i"}, + "series": {"$regex": series, "$options" : "i"} }, None, ) diff --git a/swordfish/src/katana.rs b/swordfish/src/katana.rs index fb61af9..f2a2d7d 100644 --- a/swordfish/src/katana.rs +++ b/swordfish/src/katana.rs @@ -13,7 +13,7 @@ use swordfish_common::{error, trace, warn}; use tokio::task; use tokio::time::Instant; -const ALLOWED_CHARS: [char; 10] = [' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\'']; +const ALLOWED_CHARS: [char; 11] = [' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\'', '@']; const CARD_NAME_X_OFFSET: u32 = 22; const CARD_NAME_Y_OFFSET: u32 = 28; const CARD_NAME_WIDTH: u32 = 202 - CARD_NAME_X_OFFSET; @@ -71,9 +71,7 @@ fn fix_tesseract_string(text: &mut String) { // This is usually the left bottom corner of the card trace!("Text: {}", text); if text.ends_with(r##"“NO"##) { - for _ in 0..3 { - text.pop(); - } + text.drain(text.len() - 4..text.len()); } // Workaround for "\n." (and others in the future) let text_clone = text.clone(); @@ -101,8 +99,7 @@ fn fix_tesseract_string(text: &mut String) { trace!("Prev prev char: {}", prev_prev_char); if prev_prev_char == 'o' { rm_prev = -1; - text.remove(i - 2); - text.remove(i - 2); + text.drain(i - 3..i - 1); text.insert_str(i - 2, "yo!") } } @@ -167,6 +164,30 @@ fn fix_tesseract_string(text: &mut String) { trace!("Text (final): {}", text); } +fn regexify_text(text: &String) -> String { + let mut regex = String::new(); + let mut ascii_text = String::new(); + for c in text.chars() { + // Here comes the workaround... + // The character "0" is sometimes used in place of "O" in names + if ['0', 'O'].contains(&c) { + ascii_text.push_str("[0O]"); + } else if c.is_ascii_alphanumeric() { + ascii_text.push(c); + } else { + ascii_text.push(' '); + } + } + ascii_text.split_whitespace().for_each(|word| { + regex.push_str("(?=.*\\b"); + regex.push_str(word.to_lowercase().as_str()); + regex.push_str("\\b)"); + }); + regex.push_str(".+"); + trace!("Regex: {}", regex); + regex +} + fn save_image_if_trace(img: &DynamicImage, path: &str) { let log_lvl = CONFIG.get().unwrap().log.level.as_str(); if log_lvl == "trace" { @@ -282,7 +303,12 @@ pub async fn analyze_card_libtesseract(card: image::DynamicImage, count: u32) -> Some(c) => { character = c; } - None => match db::query_character_regex(&character.name, &character.series).await { + None => match db::query_character_regex( + ®exify_text(&character.name), + ®exify_text(&character.series), + ) + .await + { Some(c) => { character = c; } @@ -349,7 +375,12 @@ pub async fn analyze_card_subprocess(card: image::DynamicImage, count: u32) -> D Some(c) => { character = c; } - None => match db::query_character_regex(&character.name, &character.series).await { + None => match db::query_character_regex( + ®exify_text(&character.name), + ®exify_text(&character.series), + ) + .await + { Some(c) => { character = c; }