From b7519ac75a6b0dc6f89c98ad51bbaeac9e3b0442 Mon Sep 17 00:00:00 2001 From: tretrauit Date: Sun, 7 Jan 2024 23:52:49 +0700 Subject: [PATCH] feat: multithreading Multithreading in tokio::main and also a new queue system for LepTess (libtesseract) --- swordfish-common/src/utils/katana.rs | 2 +- swordfish/src/katana.rs | 63 +++++++++++++++++++++-- swordfish/src/main.rs | 67 ++++++------------------- swordfish/src/tesseract/libtesseract.rs | 47 +++++++++++------ 4 files changed, 106 insertions(+), 73 deletions(-) diff --git a/swordfish-common/src/utils/katana.rs b/swordfish-common/src/utils/katana.rs index 8d71866..eef2748 100644 --- a/swordfish-common/src/utils/katana.rs +++ b/swordfish-common/src/utils/katana.rs @@ -224,7 +224,7 @@ pub fn parse_cards_from_katana_klu_lookup(content: &String) -> Option { None => { error!("Failed to parse wishlist number: {}", content); return None; - }, + } } } let wishlist = match line_split?.nth(1) { diff --git a/swordfish/src/katana.rs b/swordfish/src/katana.rs index fb420f5..0115e6b 100644 --- a/swordfish/src/katana.rs +++ b/swordfish/src/katana.rs @@ -1,18 +1,21 @@ +use crate::helper; use crate::tesseract::{libtesseract, subprocess}; use crate::CONFIG; use image::imageops::colorops::contrast_in_place; use image::io::Reader as ImageReader; use image::{DynamicImage, ImageFormat}; use regex::Regex; +use serenity::all::Context; use serenity::model::channel::Message; use std::io::Cursor; use std::sync::LazyLock; use swordfish_common::database::katana as db; use swordfish_common::structs::Card; -use swordfish_common::{trace, warn}; +use swordfish_common::{error, trace, warn}; use tokio::task; +use tokio::time::Instant; -static TEXT_NUM_REGEX: LazyLock = LazyLock::new(|| Regex::new(r"[A-Za-z0-9]").unwrap()); +static ALLOWED_CHARS_REGEX: LazyLock = LazyLock::new(|| Regex::new(r"[\-!.': ]").unwrap()); const CARD_NAME_X_OFFSET: u32 = 22; const CARD_NAME_Y_OFFSET: u32 = 28; const CARD_NAME_WIDTH: u32 = 202 - CARD_NAME_X_OFFSET; @@ -57,7 +60,7 @@ fn fix_tesseract_string(text: &mut String) { text.remove(0); } // Remove the first character if it is not alphanumeric - if !TEXT_NUM_REGEX.is_match(text.clone().chars().nth(0).unwrap().to_string().as_str()) { + if !text.clone().chars().nth(0).unwrap().is_ascii_alphanumeric() { text.remove(0); } // Workaround IR -> Ik @@ -122,7 +125,7 @@ fn fix_tesseract_string(text: &mut String) { } // Remove all non-alphanumeric characters trace!("Text: {}", text); - text.retain(|c| TEXT_NUM_REGEX.is_match(&c.to_string()) || c.is_ascii_alphanumeric()); + text.retain(|c| ALLOWED_CHARS_REGEX.is_match(&c.to_string()) || c.is_ascii_alphanumeric()); // Fix "mn" -> "III" trace!("Text: {}", text); if text.ends_with("mn") { @@ -386,3 +389,55 @@ pub async fn analyze_drop_message(message: &Message) -> Result, String } Ok(cards) } + +pub async fn handle_drop_message(ctx: &Context, msg: &Message) { + let start = Instant::now(); + match analyze_drop_message(msg).await { + Ok(cards) => { + let duration = start.elapsed(); + let mut reply_str = String::new(); + for card in cards { + // reply_str.push_str(&format!("{:?}\n", card)); + let wishlist_str: String = match card.wishlist { + Some(wishlist) => { + let mut out_str = wishlist.to_string(); + while out_str.len() < 5 { + out_str.push(' '); + } + out_str + } + None => "None ".to_string(), + }; + let last_update_ts_str = match card.last_update_ts { + 0 => "`Never`".to_string(), + ts => { + format!("", ts.to_string()) + } + }; + reply_str.push_str( + format!( + ":heart: `{}` • `{}` • **{}** • {} • {}\n", + wishlist_str, card.print, card.name, card.series, last_update_ts_str + ) + .as_str(), + ) + } + reply_str.push_str(&format!("Time taken (to analyze): `{:?}`", duration)); + match msg.reply(ctx, reply_str).await { + Ok(_) => {} + Err(why) => { + error!("Failed to reply to message: {:?}", why); + } + }; + } + Err(why) => { + helper::error_message( + ctx, + msg, + format!("Failed to analyze drop: `{:?}`", why), + None, + ) + .await; + } + }; +} diff --git a/swordfish/src/main.rs b/swordfish/src/main.rs index 6e8a807..5971ba6 100644 --- a/swordfish/src/main.rs +++ b/swordfish/src/main.rs @@ -10,9 +10,9 @@ use std::env; use std::path::Path; use std::sync::OnceLock; use swordfish_common::*; -use tokio::time::Instant; use crate::config::Config; +use crate::tesseract::libtesseract; mod config; mod debug; @@ -138,55 +138,7 @@ async fn parse_katana(ctx: &Context, msg: &Message) -> Result<(), String> { if !helper::message_in_whitelist(msg, &config.features.katana_drop_analysis.whitelist) { return Ok(()); } - let start = Instant::now(); - match katana::analyze_drop_message(msg).await { - Ok(cards) => { - let duration = start.elapsed(); - let mut reply_str = String::new(); - for card in cards { - // reply_str.push_str(&format!("{:?}\n", card)); - let wishlist_str: String = match card.wishlist { - Some(wishlist) => { - let mut out_str = wishlist.to_string(); - while out_str.len() < 5 { - out_str.push(' '); - } - out_str - } - None => "None ".to_string(), - }; - let last_update_ts_str = match card.last_update_ts { - 0 => "`Never`".to_string(), - ts => { - format!("", ts.to_string()) - } - }; - reply_str.push_str( - format!( - ":heart: `{}` • `{}` • **{}** • {} • {}\n", - wishlist_str, card.print, card.name, card.series, last_update_ts_str - ) - .as_str(), - ) - } - reply_str.push_str(&format!("Time taken (to analyze): `{:?}`", duration)); - match msg.reply(ctx, reply_str).await { - Ok(_) => {} - Err(why) => { - error!("Failed to reply to message: {:?}", why); - } - }; - } - Err(why) => { - helper::error_message( - ctx, - msg, - format!("Failed to analyze drop: `{:?}`", why), - None, - ) - .await; - } - }; + katana::handle_drop_message(ctx, msg).await; } else { if msg.embeds.len() == 0 { return Ok(()); @@ -270,7 +222,7 @@ async fn parse_katana_embed(embed: &Embed) { }; } -#[tokio::main] +#[tokio::main(flavor = "multi_thread", worker_threads = 32)] async fn main() { match dotenv() { Ok(_) => {} @@ -294,11 +246,22 @@ async fn main() { setup_logger(&log_level).expect("Failed to setup logger"); info!("Swordfish v{} - {}", env!("CARGO_PKG_VERSION"), GITHUB_URL); info!("Log level: {}", log_level); + let config = CONFIG.get().unwrap(); + if config.log.file.enabled { + info!("Logging to file: {}", CONFIG.get().unwrap().log.file.path); + } + if config.tesseract.backend == "libtesseract" { + info!("Using libtesseract as Tesseract backend"); + info!("Initializing libtesseract..."); + libtesseract::init().await; + } else { + info!("Using subprocess as Tesseract backend"); + } info!("Initializing database..."); swordfish_common::database::init().await; info!("Initializing Discord client..."); let framework = StandardFramework::new().group(&GENERAL_GROUP); - framework.configure(Configuration::new().prefix(CONFIG.get().unwrap().general.prefix.clone())); + framework.configure(Configuration::new().prefix(config.general.prefix.clone())); // Login with a bot token from the environment let intents = GatewayIntents::non_privileged() | GatewayIntents::MESSAGE_CONTENT; diff --git a/swordfish/src/tesseract/libtesseract.rs b/swordfish/src/tesseract/libtesseract.rs index bafacd8..b76f0b5 100644 --- a/swordfish/src/tesseract/libtesseract.rs +++ b/swordfish/src/tesseract/libtesseract.rs @@ -8,19 +8,9 @@ static mut TESSERACT_NUMERIC_VEC: Vec>> = Vec::new(); pub unsafe fn get_tesseract() -> Arc> { let lep_tess: Arc>; if TESSERACT_VEC.len() == 0 { - for _ in 0..3 { - task::spawn(async move { - let ocr = init_tesseract(false).unwrap(); - TESSERACT_VEC.push(Arc::new(Mutex::new(ocr))); - }); - } - lep_tess = Arc::new(Mutex::new(init_tesseract(false).unwrap())); + lep_tess = Arc::new(Mutex::new(create_tesseract(false).unwrap())); } else { lep_tess = TESSERACT_VEC.pop().unwrap(); - task::spawn(async move { - let ocr = init_tesseract(false).unwrap(); - TESSERACT_VEC.push(Arc::new(Mutex::new(ocr))); - }); } lep_tess } @@ -30,22 +20,22 @@ pub unsafe fn get_tesseract_numeric() -> Arc> { if TESSERACT_NUMERIC_VEC.len() == 0 { for _ in 0..3 { task::spawn(async move { - let ocr = init_tesseract(false).unwrap(); + let ocr = create_tesseract(true).unwrap(); TESSERACT_NUMERIC_VEC.push(Arc::new(Mutex::new(ocr))); }); } - lep_tess = Arc::new(Mutex::new(init_tesseract(false).unwrap())); + lep_tess = Arc::new(Mutex::new(create_tesseract(true).unwrap())); } else { lep_tess = TESSERACT_NUMERIC_VEC.pop().unwrap(); task::spawn(async move { - let ocr = init_tesseract(false).unwrap(); + let ocr = create_tesseract(true).unwrap(); TESSERACT_NUMERIC_VEC.push(Arc::new(Mutex::new(ocr))); }); } lep_tess } -pub fn init_tesseract(numeric_only: bool) -> Result { +pub fn create_tesseract(numeric_only: bool) -> Result { let mut lep_tess = match LepTess::new(None, "eng") { Ok(lep_tess) => lep_tess, Err(why) => return Err(format!("Failed to initialize Tesseract: {:?}", why)), @@ -58,7 +48,9 @@ pub fn init_tesseract(numeric_only: bool) -> Result { .set_variable(Variable::TesseditOcrEngineMode, "1") .unwrap(); // Set 70 as DPI - lep_tess.set_variable(Variable::UserDefinedDpi, "70").unwrap(); + lep_tess + .set_variable(Variable::UserDefinedDpi, "70") + .unwrap(); if numeric_only { match lep_tess.set_variable(Variable::TesseditCharWhitelist, "0123456789") { Ok(_) => (), @@ -67,3 +59,26 @@ pub fn init_tesseract(numeric_only: bool) -> Result { } Ok(lep_tess) } + +/// +/// Initialize the Tesseract OCR engine. +/// +/// Because this function creates a new thread, it should only be called once. +/// +pub async fn init() { + task::spawn(async { + loop { + unsafe { + if TESSERACT_VEC.len() < 9 { + let ocr = create_tesseract(false).unwrap(); + TESSERACT_VEC.push(Arc::new(Mutex::new(ocr))); + } + if TESSERACT_NUMERIC_VEC.len() < 9 { + let ocr = create_tesseract(true).unwrap(); + TESSERACT_NUMERIC_VEC.push(Arc::new(Mutex::new(ocr))); + } + } + tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; + } + }); +}