chore(katana): move regex building to swordfish
Welp, I can add my dirty workaround now
This commit is contained in:
parent
5accadf277
commit
952467d4b1
@ -41,43 +41,13 @@ pub async fn query_character(name: &String, series: &String) -> Option<Character
|
||||
}
|
||||
|
||||
pub async fn query_character_regex(name: &String, series: &String) -> Option<Character> {
|
||||
let mut name_regex = String::new();
|
||||
let mut ascii_name = String::new();
|
||||
for c in name.chars() {
|
||||
if c.is_ascii_alphanumeric() {
|
||||
ascii_name.push(c);
|
||||
} else {
|
||||
ascii_name.push(' ');
|
||||
}
|
||||
}
|
||||
ascii_name.split_whitespace().for_each(|word| {
|
||||
name_regex.push_str("(?=.*\\b");
|
||||
name_regex.push_str(word.to_lowercase().as_str());
|
||||
name_regex.push_str("\\b)");
|
||||
});
|
||||
name_regex.push_str(".+");
|
||||
let mut series_regex = String::new();
|
||||
let mut ascii_series = String::new();
|
||||
for c in series.chars() {
|
||||
if c.is_ascii_alphanumeric() {
|
||||
ascii_series.push(c);
|
||||
} else {
|
||||
ascii_series.push(' ');
|
||||
}
|
||||
}
|
||||
ascii_series.split_whitespace().for_each(|word| {
|
||||
series_regex.push_str("(?=.*\\b");
|
||||
series_regex.push_str(word.to_lowercase().as_str());
|
||||
series_regex.push_str("\\b)");
|
||||
});
|
||||
series_regex.push_str(".+");
|
||||
KATANA
|
||||
.get()
|
||||
.unwrap()
|
||||
.find_one(
|
||||
mongodb::bson::doc! {
|
||||
"name": {"$regex": name_regex, "$options" : "i"},
|
||||
"series": {"$regex": series_regex, "$options" : "i"}
|
||||
"name": {"$regex": name, "$options" : "i"},
|
||||
"series": {"$regex": series, "$options" : "i"}
|
||||
},
|
||||
None,
|
||||
)
|
||||
|
@ -13,7 +13,7 @@ use swordfish_common::{error, trace, warn};
|
||||
use tokio::task;
|
||||
use tokio::time::Instant;
|
||||
|
||||
const ALLOWED_CHARS: [char; 10] = [' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\''];
|
||||
const ALLOWED_CHARS: [char; 11] = [' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\'', '@'];
|
||||
const CARD_NAME_X_OFFSET: u32 = 22;
|
||||
const CARD_NAME_Y_OFFSET: u32 = 28;
|
||||
const CARD_NAME_WIDTH: u32 = 202 - CARD_NAME_X_OFFSET;
|
||||
@ -71,9 +71,7 @@ fn fix_tesseract_string(text: &mut String) {
|
||||
// This is usually the left bottom corner of the card
|
||||
trace!("Text: {}", text);
|
||||
if text.ends_with(r##"“NO"##) {
|
||||
for _ in 0..3 {
|
||||
text.pop();
|
||||
}
|
||||
text.drain(text.len() - 4..text.len());
|
||||
}
|
||||
// Workaround for "\n." (and others in the future)
|
||||
let text_clone = text.clone();
|
||||
@ -101,8 +99,7 @@ fn fix_tesseract_string(text: &mut String) {
|
||||
trace!("Prev prev char: {}", prev_prev_char);
|
||||
if prev_prev_char == 'o' {
|
||||
rm_prev = -1;
|
||||
text.remove(i - 2);
|
||||
text.remove(i - 2);
|
||||
text.drain(i - 3..i - 1);
|
||||
text.insert_str(i - 2, "yo!")
|
||||
}
|
||||
}
|
||||
@ -167,6 +164,30 @@ fn fix_tesseract_string(text: &mut String) {
|
||||
trace!("Text (final): {}", text);
|
||||
}
|
||||
|
||||
fn regexify_text(text: &String) -> String {
|
||||
let mut regex = String::new();
|
||||
let mut ascii_text = String::new();
|
||||
for c in text.chars() {
|
||||
// Here comes the workaround...
|
||||
// The character "0" is sometimes used in place of "O" in names
|
||||
if ['0', 'O'].contains(&c) {
|
||||
ascii_text.push_str("[0O]");
|
||||
} else if c.is_ascii_alphanumeric() {
|
||||
ascii_text.push(c);
|
||||
} else {
|
||||
ascii_text.push(' ');
|
||||
}
|
||||
}
|
||||
ascii_text.split_whitespace().for_each(|word| {
|
||||
regex.push_str("(?=.*\\b");
|
||||
regex.push_str(word.to_lowercase().as_str());
|
||||
regex.push_str("\\b)");
|
||||
});
|
||||
regex.push_str(".+");
|
||||
trace!("Regex: {}", regex);
|
||||
regex
|
||||
}
|
||||
|
||||
fn save_image_if_trace(img: &DynamicImage, path: &str) {
|
||||
let log_lvl = CONFIG.get().unwrap().log.level.as_str();
|
||||
if log_lvl == "trace" {
|
||||
@ -282,7 +303,12 @@ pub async fn analyze_card_libtesseract(card: image::DynamicImage, count: u32) ->
|
||||
Some(c) => {
|
||||
character = c;
|
||||
}
|
||||
None => match db::query_character_regex(&character.name, &character.series).await {
|
||||
None => match db::query_character_regex(
|
||||
®exify_text(&character.name),
|
||||
®exify_text(&character.series),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Some(c) => {
|
||||
character = c;
|
||||
}
|
||||
@ -349,7 +375,12 @@ pub async fn analyze_card_subprocess(card: image::DynamicImage, count: u32) -> D
|
||||
Some(c) => {
|
||||
character = c;
|
||||
}
|
||||
None => match db::query_character_regex(&character.name, &character.series).await {
|
||||
None => match db::query_character_regex(
|
||||
®exify_text(&character.name),
|
||||
®exify_text(&character.series),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Some(c) => {
|
||||
character = c;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user