feat: multithreading

Multithreading in tokio::main and also a new queue system for LepTess (libtesseract)
This commit is contained in:
tretrauit 2024-01-07 23:52:49 +07:00
parent 077c3bfd2a
commit b7519ac75a
4 changed files with 106 additions and 73 deletions

View File

@ -224,7 +224,7 @@ pub fn parse_cards_from_katana_klu_lookup(content: &String) -> Option<Card> {
None => {
error!("Failed to parse wishlist number: {}", content);
return None;
},
}
}
}
let wishlist = match line_split?.nth(1) {

View File

@ -1,18 +1,21 @@
use crate::helper;
use crate::tesseract::{libtesseract, subprocess};
use crate::CONFIG;
use image::imageops::colorops::contrast_in_place;
use image::io::Reader as ImageReader;
use image::{DynamicImage, ImageFormat};
use regex::Regex;
use serenity::all::Context;
use serenity::model::channel::Message;
use std::io::Cursor;
use std::sync::LazyLock;
use swordfish_common::database::katana as db;
use swordfish_common::structs::Card;
use swordfish_common::{trace, warn};
use swordfish_common::{error, trace, warn};
use tokio::task;
use tokio::time::Instant;
static TEXT_NUM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[A-Za-z0-9]").unwrap());
static ALLOWED_CHARS_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\-!.': ]").unwrap());
const CARD_NAME_X_OFFSET: u32 = 22;
const CARD_NAME_Y_OFFSET: u32 = 28;
const CARD_NAME_WIDTH: u32 = 202 - CARD_NAME_X_OFFSET;
@ -57,7 +60,7 @@ fn fix_tesseract_string(text: &mut String) {
text.remove(0);
}
// Remove the first character if it is not alphanumeric
if !TEXT_NUM_REGEX.is_match(text.clone().chars().nth(0).unwrap().to_string().as_str()) {
if !text.clone().chars().nth(0).unwrap().is_ascii_alphanumeric() {
text.remove(0);
}
// Workaround IR -> Ik
@ -122,7 +125,7 @@ fn fix_tesseract_string(text: &mut String) {
}
// Remove all non-alphanumeric characters
trace!("Text: {}", text);
text.retain(|c| TEXT_NUM_REGEX.is_match(&c.to_string()) || c.is_ascii_alphanumeric());
text.retain(|c| ALLOWED_CHARS_REGEX.is_match(&c.to_string()) || c.is_ascii_alphanumeric());
// Fix "mn" -> "III"
trace!("Text: {}", text);
if text.ends_with("mn") {
@ -386,3 +389,55 @@ pub async fn analyze_drop_message(message: &Message) -> Result<Vec<Card>, String
}
Ok(cards)
}
pub async fn handle_drop_message(ctx: &Context, msg: &Message) {
let start = Instant::now();
match analyze_drop_message(msg).await {
Ok(cards) => {
let duration = start.elapsed();
let mut reply_str = String::new();
for card in cards {
// reply_str.push_str(&format!("{:?}\n", card));
let wishlist_str: String = match card.wishlist {
Some(wishlist) => {
let mut out_str = wishlist.to_string();
while out_str.len() < 5 {
out_str.push(' ');
}
out_str
}
None => "None ".to_string(),
};
let last_update_ts_str = match card.last_update_ts {
0 => "`Never`".to_string(),
ts => {
format!("<t:{}:R>", ts.to_string())
}
};
reply_str.push_str(
format!(
":heart: `{}` • `{}` • **{}** • {} • {}\n",
wishlist_str, card.print, card.name, card.series, last_update_ts_str
)
.as_str(),
)
}
reply_str.push_str(&format!("Time taken (to analyze): `{:?}`", duration));
match msg.reply(ctx, reply_str).await {
Ok(_) => {}
Err(why) => {
error!("Failed to reply to message: {:?}", why);
}
};
}
Err(why) => {
helper::error_message(
ctx,
msg,
format!("Failed to analyze drop: `{:?}`", why),
None,
)
.await;
}
};
}

View File

@ -10,9 +10,9 @@ use std::env;
use std::path::Path;
use std::sync::OnceLock;
use swordfish_common::*;
use tokio::time::Instant;
use crate::config::Config;
use crate::tesseract::libtesseract;
mod config;
mod debug;
@ -138,55 +138,7 @@ async fn parse_katana(ctx: &Context, msg: &Message) -> Result<(), String> {
if !helper::message_in_whitelist(msg, &config.features.katana_drop_analysis.whitelist) {
return Ok(());
}
let start = Instant::now();
match katana::analyze_drop_message(msg).await {
Ok(cards) => {
let duration = start.elapsed();
let mut reply_str = String::new();
for card in cards {
// reply_str.push_str(&format!("{:?}\n", card));
let wishlist_str: String = match card.wishlist {
Some(wishlist) => {
let mut out_str = wishlist.to_string();
while out_str.len() < 5 {
out_str.push(' ');
}
out_str
}
None => "None ".to_string(),
};
let last_update_ts_str = match card.last_update_ts {
0 => "`Never`".to_string(),
ts => {
format!("<t:{}:R>", ts.to_string())
}
};
reply_str.push_str(
format!(
":heart: `{}` • `{}` • **{}** • {} • {}\n",
wishlist_str, card.print, card.name, card.series, last_update_ts_str
)
.as_str(),
)
}
reply_str.push_str(&format!("Time taken (to analyze): `{:?}`", duration));
match msg.reply(ctx, reply_str).await {
Ok(_) => {}
Err(why) => {
error!("Failed to reply to message: {:?}", why);
}
};
}
Err(why) => {
helper::error_message(
ctx,
msg,
format!("Failed to analyze drop: `{:?}`", why),
None,
)
.await;
}
};
katana::handle_drop_message(ctx, msg).await;
} else {
if msg.embeds.len() == 0 {
return Ok(());
@ -270,7 +222,7 @@ async fn parse_katana_embed(embed: &Embed) {
};
}
#[tokio::main]
#[tokio::main(flavor = "multi_thread", worker_threads = 32)]
async fn main() {
match dotenv() {
Ok(_) => {}
@ -294,11 +246,22 @@ async fn main() {
setup_logger(&log_level).expect("Failed to setup logger");
info!("Swordfish v{} - {}", env!("CARGO_PKG_VERSION"), GITHUB_URL);
info!("Log level: {}", log_level);
let config = CONFIG.get().unwrap();
if config.log.file.enabled {
info!("Logging to file: {}", CONFIG.get().unwrap().log.file.path);
}
if config.tesseract.backend == "libtesseract" {
info!("Using libtesseract as Tesseract backend");
info!("Initializing libtesseract...");
libtesseract::init().await;
} else {
info!("Using subprocess as Tesseract backend");
}
info!("Initializing database...");
swordfish_common::database::init().await;
info!("Initializing Discord client...");
let framework = StandardFramework::new().group(&GENERAL_GROUP);
framework.configure(Configuration::new().prefix(CONFIG.get().unwrap().general.prefix.clone()));
framework.configure(Configuration::new().prefix(config.general.prefix.clone()));
// Login with a bot token from the environment
let intents = GatewayIntents::non_privileged() | GatewayIntents::MESSAGE_CONTENT;

View File

@ -8,19 +8,9 @@ static mut TESSERACT_NUMERIC_VEC: Vec<Arc<Mutex<LepTess>>> = Vec::new();
pub unsafe fn get_tesseract() -> Arc<Mutex<LepTess>> {
let lep_tess: Arc<Mutex<LepTess>>;
if TESSERACT_VEC.len() == 0 {
for _ in 0..3 {
task::spawn(async move {
let ocr = init_tesseract(false).unwrap();
TESSERACT_VEC.push(Arc::new(Mutex::new(ocr)));
});
}
lep_tess = Arc::new(Mutex::new(init_tesseract(false).unwrap()));
lep_tess = Arc::new(Mutex::new(create_tesseract(false).unwrap()));
} else {
lep_tess = TESSERACT_VEC.pop().unwrap();
task::spawn(async move {
let ocr = init_tesseract(false).unwrap();
TESSERACT_VEC.push(Arc::new(Mutex::new(ocr)));
});
}
lep_tess
}
@ -30,22 +20,22 @@ pub unsafe fn get_tesseract_numeric() -> Arc<Mutex<LepTess>> {
if TESSERACT_NUMERIC_VEC.len() == 0 {
for _ in 0..3 {
task::spawn(async move {
let ocr = init_tesseract(false).unwrap();
let ocr = create_tesseract(true).unwrap();
TESSERACT_NUMERIC_VEC.push(Arc::new(Mutex::new(ocr)));
});
}
lep_tess = Arc::new(Mutex::new(init_tesseract(false).unwrap()));
lep_tess = Arc::new(Mutex::new(create_tesseract(true).unwrap()));
} else {
lep_tess = TESSERACT_NUMERIC_VEC.pop().unwrap();
task::spawn(async move {
let ocr = init_tesseract(false).unwrap();
let ocr = create_tesseract(true).unwrap();
TESSERACT_NUMERIC_VEC.push(Arc::new(Mutex::new(ocr)));
});
}
lep_tess
}
pub fn init_tesseract(numeric_only: bool) -> Result<LepTess, String> {
pub fn create_tesseract(numeric_only: bool) -> Result<LepTess, String> {
let mut lep_tess = match LepTess::new(None, "eng") {
Ok(lep_tess) => lep_tess,
Err(why) => return Err(format!("Failed to initialize Tesseract: {:?}", why)),
@ -58,7 +48,9 @@ pub fn init_tesseract(numeric_only: bool) -> Result<LepTess, String> {
.set_variable(Variable::TesseditOcrEngineMode, "1")
.unwrap();
// Set 70 as DPI
lep_tess.set_variable(Variable::UserDefinedDpi, "70").unwrap();
lep_tess
.set_variable(Variable::UserDefinedDpi, "70")
.unwrap();
if numeric_only {
match lep_tess.set_variable(Variable::TesseditCharWhitelist, "0123456789") {
Ok(_) => (),
@ -67,3 +59,26 @@ pub fn init_tesseract(numeric_only: bool) -> Result<LepTess, String> {
}
Ok(lep_tess)
}
///
/// Initialize the Tesseract OCR engine.
///
/// Because this function creates a new thread, it should only be called once.
///
pub async fn init() {
task::spawn(async {
loop {
unsafe {
if TESSERACT_VEC.len() < 9 {
let ocr = create_tesseract(false).unwrap();
TESSERACT_VEC.push(Arc::new(Mutex::new(ocr)));
}
if TESSERACT_NUMERIC_VEC.len() < 9 {
let ocr = create_tesseract(true).unwrap();
TESSERACT_NUMERIC_VEC.push(Arc::new(Mutex::new(ocr)));
}
}
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
}
});
}