chore(katana): move regex building to swordfish

Welp, I can add my dirty workaround now
This commit is contained in:
tretrauit 2024-01-10 12:31:29 +07:00
parent 5accadf277
commit 952467d4b1
2 changed files with 41 additions and 40 deletions

View File

@ -41,43 +41,13 @@ pub async fn query_character(name: &String, series: &String) -> Option<Character
} }
pub async fn query_character_regex(name: &String, series: &String) -> Option<Character> { pub async fn query_character_regex(name: &String, series: &String) -> Option<Character> {
let mut name_regex = String::new();
let mut ascii_name = String::new();
for c in name.chars() {
if c.is_ascii_alphanumeric() {
ascii_name.push(c);
} else {
ascii_name.push(' ');
}
}
ascii_name.split_whitespace().for_each(|word| {
name_regex.push_str("(?=.*\\b");
name_regex.push_str(word.to_lowercase().as_str());
name_regex.push_str("\\b)");
});
name_regex.push_str(".+");
let mut series_regex = String::new();
let mut ascii_series = String::new();
for c in series.chars() {
if c.is_ascii_alphanumeric() {
ascii_series.push(c);
} else {
ascii_series.push(' ');
}
}
ascii_series.split_whitespace().for_each(|word| {
series_regex.push_str("(?=.*\\b");
series_regex.push_str(word.to_lowercase().as_str());
series_regex.push_str("\\b)");
});
series_regex.push_str(".+");
KATANA KATANA
.get() .get()
.unwrap() .unwrap()
.find_one( .find_one(
mongodb::bson::doc! { mongodb::bson::doc! {
"name": {"$regex": name_regex, "$options" : "i"}, "name": {"$regex": name, "$options" : "i"},
"series": {"$regex": series_regex, "$options" : "i"} "series": {"$regex": series, "$options" : "i"}
}, },
None, None,
) )

View File

@ -13,7 +13,7 @@ use swordfish_common::{error, trace, warn};
use tokio::task; use tokio::task;
use tokio::time::Instant; use tokio::time::Instant;
const ALLOWED_CHARS: [char; 10] = [' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\'']; const ALLOWED_CHARS: [char; 11] = [' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\'', '@'];
const CARD_NAME_X_OFFSET: u32 = 22; const CARD_NAME_X_OFFSET: u32 = 22;
const CARD_NAME_Y_OFFSET: u32 = 28; const CARD_NAME_Y_OFFSET: u32 = 28;
const CARD_NAME_WIDTH: u32 = 202 - CARD_NAME_X_OFFSET; const CARD_NAME_WIDTH: u32 = 202 - CARD_NAME_X_OFFSET;
@ -71,9 +71,7 @@ fn fix_tesseract_string(text: &mut String) {
// This is usually the left bottom corner of the card // This is usually the left bottom corner of the card
trace!("Text: {}", text); trace!("Text: {}", text);
if text.ends_with(r##"“NO"##) { if text.ends_with(r##"“NO"##) {
for _ in 0..3 { text.drain(text.len() - 4..text.len());
text.pop();
}
} }
// Workaround for "\n." (and others in the future) // Workaround for "\n." (and others in the future)
let text_clone = text.clone(); let text_clone = text.clone();
@ -101,8 +99,7 @@ fn fix_tesseract_string(text: &mut String) {
trace!("Prev prev char: {}", prev_prev_char); trace!("Prev prev char: {}", prev_prev_char);
if prev_prev_char == 'o' { if prev_prev_char == 'o' {
rm_prev = -1; rm_prev = -1;
text.remove(i - 2); text.drain(i - 3..i - 1);
text.remove(i - 2);
text.insert_str(i - 2, "yo!") text.insert_str(i - 2, "yo!")
} }
} }
@ -167,6 +164,30 @@ fn fix_tesseract_string(text: &mut String) {
trace!("Text (final): {}", text); trace!("Text (final): {}", text);
} }
fn regexify_text(text: &String) -> String {
let mut regex = String::new();
let mut ascii_text = String::new();
for c in text.chars() {
// Here comes the workaround...
// The character "0" is sometimes used in place of "O" in names
if ['0', 'O'].contains(&c) {
ascii_text.push_str("[0O]");
} else if c.is_ascii_alphanumeric() {
ascii_text.push(c);
} else {
ascii_text.push(' ');
}
}
ascii_text.split_whitespace().for_each(|word| {
regex.push_str("(?=.*\\b");
regex.push_str(word.to_lowercase().as_str());
regex.push_str("\\b)");
});
regex.push_str(".+");
trace!("Regex: {}", regex);
regex
}
fn save_image_if_trace(img: &DynamicImage, path: &str) { fn save_image_if_trace(img: &DynamicImage, path: &str) {
let log_lvl = CONFIG.get().unwrap().log.level.as_str(); let log_lvl = CONFIG.get().unwrap().log.level.as_str();
if log_lvl == "trace" { if log_lvl == "trace" {
@ -282,7 +303,12 @@ pub async fn analyze_card_libtesseract(card: image::DynamicImage, count: u32) ->
Some(c) => { Some(c) => {
character = c; character = c;
} }
None => match db::query_character_regex(&character.name, &character.series).await { None => match db::query_character_regex(
&regexify_text(&character.name),
&regexify_text(&character.series),
)
.await
{
Some(c) => { Some(c) => {
character = c; character = c;
} }
@ -349,7 +375,12 @@ pub async fn analyze_card_subprocess(card: image::DynamicImage, count: u32) -> D
Some(c) => { Some(c) => {
character = c; character = c;
} }
None => match db::query_character_regex(&character.name, &character.series).await { None => match db::query_character_regex(
&regexify_text(&character.name),
&regexify_text(&character.series),
)
.await
{
Some(c) => { Some(c) => {
character = c; character = c;
} }