fix(katana): implement regex for short strings
Pls work
This commit is contained in:
parent
a3b247bcb9
commit
389efeac0d
13
Cargo.toml
13
Cargo.toml
@ -5,3 +5,16 @@ members = [
|
|||||||
"swordfish"
|
"swordfish"
|
||||||
, "swordfish-user"]
|
, "swordfish-user"]
|
||||||
default-members = ["swordfish"]
|
default-members = ["swordfish"]
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
opt-level = 3
|
||||||
|
debug = false
|
||||||
|
split-debuginfo = '...' # Platform-specific.
|
||||||
|
strip = "symbols"
|
||||||
|
debug-assertions = false
|
||||||
|
overflow-checks = false
|
||||||
|
lto = "thin"
|
||||||
|
panic = 'unwind'
|
||||||
|
incremental = false
|
||||||
|
codegen-units = 16
|
||||||
|
rpath = false
|
||||||
|
@ -13,8 +13,8 @@ use swordfish_common::{error, trace, warn};
|
|||||||
use tokio::task;
|
use tokio::task;
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
|
|
||||||
const ALLOWED_CHARS: [char; 13] = [
|
const ALLOWED_CHARS: [char; 14] = [
|
||||||
' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\'', '@', '&', '_',
|
' ', '-', '.', '!', ':', '(', ')', '\'', '/', '\'', '@', '&', '_', 'é',
|
||||||
];
|
];
|
||||||
const CARD_NAME_X_OFFSET: u32 = 22;
|
const CARD_NAME_X_OFFSET: u32 = 22;
|
||||||
const CARD_NAME_Y_OFFSET: u32 = 28;
|
const CARD_NAME_Y_OFFSET: u32 = 28;
|
||||||
@ -168,10 +168,14 @@ fn fix_tesseract_string(text: &mut String) {
|
|||||||
|
|
||||||
fn regexify_text(text: &String) -> String {
|
fn regexify_text(text: &String) -> String {
|
||||||
let partial_match: bool;
|
let partial_match: bool;
|
||||||
|
let short_text = text.len() < 6;
|
||||||
if text.len() > 23 {
|
if text.len() > 23 {
|
||||||
partial_match = true;
|
partial_match = true;
|
||||||
} else {
|
} else {
|
||||||
partial_match = false;
|
partial_match = false;
|
||||||
|
}
|
||||||
|
if short_text {
|
||||||
|
|
||||||
}
|
}
|
||||||
let mut regex = String::new();
|
let mut regex = String::new();
|
||||||
let mut ascii_text = String::new();
|
let mut ascii_text = String::new();
|
||||||
@ -204,6 +208,8 @@ fn regexify_text(text: &String) -> String {
|
|||||||
ascii_text.push(' ');
|
ascii_text.push(' ');
|
||||||
} else if ['R'].contains(&c) {
|
} else if ['R'].contains(&c) {
|
||||||
ascii_text.push_str("[Rk]");
|
ascii_text.push_str("[Rk]");
|
||||||
|
} else if ['m'].contains(&c) {
|
||||||
|
ascii_text.push_str("(m|ra)");
|
||||||
} else if c.is_ascii_alphanumeric() {
|
} else if c.is_ascii_alphanumeric() {
|
||||||
ascii_text.push(c);
|
ascii_text.push(c);
|
||||||
} else {
|
} else {
|
||||||
@ -211,6 +217,43 @@ fn regexify_text(text: &String) -> String {
|
|||||||
}
|
}
|
||||||
prev_chars.push(c);
|
prev_chars.push(c);
|
||||||
}
|
}
|
||||||
|
// Filter for short string.
|
||||||
|
if short_text && !ascii_text.contains(|c: char| c.is_whitespace()) {
|
||||||
|
regex.push_str("^");
|
||||||
|
let mut request_quantifier: bool = false;
|
||||||
|
let mut regex_any: bool = false;
|
||||||
|
let mut regex_any_from: usize = 0;
|
||||||
|
for (i, char) in ascii_text.chars().enumerate() {
|
||||||
|
trace!("Char: {}", char);
|
||||||
|
if char == '[' {
|
||||||
|
regex_any = true;
|
||||||
|
regex_any_from = i;
|
||||||
|
if i == 0 {
|
||||||
|
request_quantifier = true;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
} else if i == ascii_text.len() - 1 {
|
||||||
|
regex.push_str(".*");
|
||||||
|
}
|
||||||
|
if regex_any {
|
||||||
|
if char == ']' {
|
||||||
|
regex_any = false;
|
||||||
|
regex.push_str(&ascii_text[regex_any_from..i + 1]);
|
||||||
|
if request_quantifier {
|
||||||
|
regex.push_str(".*");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
regex.push(char);
|
||||||
|
if i == 0 {
|
||||||
|
regex.push_str(".*");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
regex.push_str("$");
|
||||||
|
trace!("Regex (short string): {}", regex);
|
||||||
|
return regex;
|
||||||
|
}
|
||||||
let split = ascii_text.split_whitespace();
|
let split = ascii_text.split_whitespace();
|
||||||
let len = split.clone().count();
|
let len = split.clone().count();
|
||||||
trace!("Partial match: {}", partial_match);
|
trace!("Partial match: {}", partial_match);
|
||||||
@ -220,21 +263,21 @@ fn regexify_text(text: &String) -> String {
|
|||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
regex.push_str("(?=.*\\b");
|
regex.push_str("(?=.*");
|
||||||
let processed_word = word.to_lowercase();
|
let processed_word = word.to_lowercase();
|
||||||
if partial_match && processed_word.len() > 4 {
|
if partial_match && processed_word.len() > 4 {
|
||||||
if !processed_word[0..3].contains(|c: char| ['[', ']'].contains(&c))
|
if !processed_word[0..3].contains(|c: char| ['[', ']'].contains(&c))
|
||||||
&& !processed_word[word.len() - 2..word.len()]
|
&& !processed_word[word.len() - 2..word.len()]
|
||||||
.contains(|c: char| ['[', ']'].contains(&c))
|
.contains(|c: char| ['[', ']'].contains(&c))
|
||||||
{
|
{
|
||||||
regex.push_str(format!("[a-z0-9][a-z0-9]{}[a-z0-9][a-z0-9]", &processed_word[2..word.len() - 2]).as_str());
|
regex.push_str(&processed_word[2..word.len() - 2]);
|
||||||
} else {
|
} else {
|
||||||
regex.push_str(&processed_word.as_str());
|
regex.push_str(&processed_word.as_str());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
regex.push_str(&processed_word.as_str());
|
regex.push_str(format!("\\b{}\\b", &processed_word.as_str()).as_str());
|
||||||
}
|
}
|
||||||
regex.push_str("\\b)");
|
regex.push_str(")");
|
||||||
}
|
}
|
||||||
regex.push_str(".+");
|
regex.push_str(".+");
|
||||||
trace!("Regex: {}", regex);
|
trace!("Regex: {}", regex);
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
pub use leptess::{LepTess, Variable};
|
pub use leptess::{LepTess, Variable};
|
||||||
use std::sync::{Arc, Mutex};
|
use std::{
|
||||||
|
sync::{Arc, Mutex},
|
||||||
|
thread,
|
||||||
|
};
|
||||||
use tokio::task;
|
use tokio::task;
|
||||||
|
|
||||||
static mut TESSERACT_VEC: Vec<Arc<Mutex<LepTess>>> = Vec::new();
|
static mut TESSERACT_VEC: Vec<Arc<Mutex<LepTess>>> = Vec::new();
|
||||||
@ -66,19 +69,17 @@ pub fn create_tesseract(numeric_only: bool) -> Result<LepTess, String> {
|
|||||||
/// Because this function creates a new thread, it should only be called once.
|
/// Because this function creates a new thread, it should only be called once.
|
||||||
///
|
///
|
||||||
pub async fn init() {
|
pub async fn init() {
|
||||||
task::spawn(async {
|
task::spawn_blocking(|| loop {
|
||||||
loop {
|
unsafe {
|
||||||
unsafe {
|
if TESSERACT_VEC.len() < 9 {
|
||||||
if TESSERACT_VEC.len() < 9 {
|
let ocr = create_tesseract(false).unwrap();
|
||||||
let ocr = create_tesseract(false).unwrap();
|
TESSERACT_VEC.push(Arc::new(Mutex::new(ocr)));
|
||||||
TESSERACT_VEC.push(Arc::new(Mutex::new(ocr)));
|
}
|
||||||
}
|
if TESSERACT_NUMERIC_VEC.len() < 9 {
|
||||||
if TESSERACT_NUMERIC_VEC.len() < 9 {
|
let ocr = create_tesseract(true).unwrap();
|
||||||
let ocr = create_tesseract(true).unwrap();
|
TESSERACT_NUMERIC_VEC.push(Arc::new(Mutex::new(ocr)));
|
||||||
TESSERACT_NUMERIC_VEC.push(Arc::new(Mutex::new(ocr)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
|
||||||
}
|
}
|
||||||
|
thread::sleep(tokio::time::Duration::from_millis(500));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user