chore(katana): move regex building to swordfish
Welp, I can add my dirty workaround now
This commit is contained in:
parent
5accadf277
commit
952467d4b1
@ -41,43 +41,13 @@ pub async fn query_character(name: &String, series: &String) -> Option<Character
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn query_character_regex(name: &String, series: &String) -> Option<Character> {
|
pub async fn query_character_regex(name: &String, series: &String) -> Option<Character> {
|
||||||
let mut name_regex = String::new();
|
|
||||||
let mut ascii_name = String::new();
|
|
||||||
for c in name.chars() {
|
|
||||||
if c.is_ascii_alphanumeric() {
|
|
||||||
ascii_name.push(c);
|
|
||||||
} else {
|
|
||||||
ascii_name.push(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ascii_name.split_whitespace().for_each(|word| {
|
|
||||||
name_regex.push_str("(?=.*\\b");
|
|
||||||
name_regex.push_str(word.to_lowercase().as_str());
|
|
||||||
name_regex.push_str("\\b)");
|
|
||||||
});
|
|
||||||
name_regex.push_str(".+");
|
|
||||||
let mut series_regex = String::new();
|
|
||||||
let mut ascii_series = String::new();
|
|
||||||
for c in series.chars() {
|
|
||||||
if c.is_ascii_alphanumeric() {
|
|
||||||
ascii_series.push(c);
|
|
||||||
} else {
|
|
||||||
ascii_series.push(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ascii_series.split_whitespace().for_each(|word| {
|
|
||||||
series_regex.push_str("(?=.*\\b");
|
|
||||||
series_regex.push_str(word.to_lowercase().as_str());
|
|
||||||
series_regex.push_str("\\b)");
|
|
||||||
});
|
|
||||||
series_regex.push_str(".+");
|
|
||||||
KATANA
|
KATANA
|
||||||
.get()
|
.get()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.find_one(
|
.find_one(
|
||||||
mongodb::bson::doc! {
|
mongodb::bson::doc! {
|
||||||
"name": {"$regex": name_regex, "$options" : "i"},
|
"name": {"$regex": name, "$options" : "i"},
|
||||||
"series": {"$regex": series_regex, "$options" : "i"}
|
"series": {"$regex": series, "$options" : "i"}
|
||||||
},
|
},
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
@ -13,7 +13,7 @@ use swordfish_common::{error, trace, warn};
|
|||||||
use tokio::task;
|
use tokio::task;
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
|
|
||||||
const ALLOWED_CHARS: [char; 10] = [' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\''];
|
const ALLOWED_CHARS: [char; 11] = [' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\'', '@'];
|
||||||
const CARD_NAME_X_OFFSET: u32 = 22;
|
const CARD_NAME_X_OFFSET: u32 = 22;
|
||||||
const CARD_NAME_Y_OFFSET: u32 = 28;
|
const CARD_NAME_Y_OFFSET: u32 = 28;
|
||||||
const CARD_NAME_WIDTH: u32 = 202 - CARD_NAME_X_OFFSET;
|
const CARD_NAME_WIDTH: u32 = 202 - CARD_NAME_X_OFFSET;
|
||||||
@ -71,9 +71,7 @@ fn fix_tesseract_string(text: &mut String) {
|
|||||||
// This is usually the left bottom corner of the card
|
// This is usually the left bottom corner of the card
|
||||||
trace!("Text: {}", text);
|
trace!("Text: {}", text);
|
||||||
if text.ends_with(r##"“NO"##) {
|
if text.ends_with(r##"“NO"##) {
|
||||||
for _ in 0..3 {
|
text.drain(text.len() - 4..text.len());
|
||||||
text.pop();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Workaround for "\n." (and others in the future)
|
// Workaround for "\n." (and others in the future)
|
||||||
let text_clone = text.clone();
|
let text_clone = text.clone();
|
||||||
@ -101,8 +99,7 @@ fn fix_tesseract_string(text: &mut String) {
|
|||||||
trace!("Prev prev char: {}", prev_prev_char);
|
trace!("Prev prev char: {}", prev_prev_char);
|
||||||
if prev_prev_char == 'o' {
|
if prev_prev_char == 'o' {
|
||||||
rm_prev = -1;
|
rm_prev = -1;
|
||||||
text.remove(i - 2);
|
text.drain(i - 3..i - 1);
|
||||||
text.remove(i - 2);
|
|
||||||
text.insert_str(i - 2, "yo!")
|
text.insert_str(i - 2, "yo!")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -167,6 +164,30 @@ fn fix_tesseract_string(text: &mut String) {
|
|||||||
trace!("Text (final): {}", text);
|
trace!("Text (final): {}", text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn regexify_text(text: &String) -> String {
|
||||||
|
let mut regex = String::new();
|
||||||
|
let mut ascii_text = String::new();
|
||||||
|
for c in text.chars() {
|
||||||
|
// Here comes the workaround...
|
||||||
|
// The character "0" is sometimes used in place of "O" in names
|
||||||
|
if ['0', 'O'].contains(&c) {
|
||||||
|
ascii_text.push_str("[0O]");
|
||||||
|
} else if c.is_ascii_alphanumeric() {
|
||||||
|
ascii_text.push(c);
|
||||||
|
} else {
|
||||||
|
ascii_text.push(' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ascii_text.split_whitespace().for_each(|word| {
|
||||||
|
regex.push_str("(?=.*\\b");
|
||||||
|
regex.push_str(word.to_lowercase().as_str());
|
||||||
|
regex.push_str("\\b)");
|
||||||
|
});
|
||||||
|
regex.push_str(".+");
|
||||||
|
trace!("Regex: {}", regex);
|
||||||
|
regex
|
||||||
|
}
|
||||||
|
|
||||||
fn save_image_if_trace(img: &DynamicImage, path: &str) {
|
fn save_image_if_trace(img: &DynamicImage, path: &str) {
|
||||||
let log_lvl = CONFIG.get().unwrap().log.level.as_str();
|
let log_lvl = CONFIG.get().unwrap().log.level.as_str();
|
||||||
if log_lvl == "trace" {
|
if log_lvl == "trace" {
|
||||||
@ -282,7 +303,12 @@ pub async fn analyze_card_libtesseract(card: image::DynamicImage, count: u32) ->
|
|||||||
Some(c) => {
|
Some(c) => {
|
||||||
character = c;
|
character = c;
|
||||||
}
|
}
|
||||||
None => match db::query_character_regex(&character.name, &character.series).await {
|
None => match db::query_character_regex(
|
||||||
|
®exify_text(&character.name),
|
||||||
|
®exify_text(&character.series),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
character = c;
|
character = c;
|
||||||
}
|
}
|
||||||
@ -349,7 +375,12 @@ pub async fn analyze_card_subprocess(card: image::DynamicImage, count: u32) -> D
|
|||||||
Some(c) => {
|
Some(c) => {
|
||||||
character = c;
|
character = c;
|
||||||
}
|
}
|
||||||
None => match db::query_character_regex(&character.name, &character.series).await {
|
None => match db::query_character_regex(
|
||||||
|
®exify_text(&character.name),
|
||||||
|
®exify_text(&character.series),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
character = c;
|
character = c;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user