From d1cee333ed28f79dae014b87f9c787f276c8cbae Mon Sep 17 00:00:00 2001 From: tretrauit Date: Fri, 5 Jan 2024 00:40:57 +0700 Subject: [PATCH] feat(katana): proper name & series reading --- swordfish-common/src/lib.rs | 1 + swordfish-common/src/structs.rs | 7 + swordfish-common/src/tesseract.rs | 3 + swordfish/src/katana.rs | 259 ++++++++++++++++++++++-------- swordfish/src/main.rs | 79 ++++----- 5 files changed, 245 insertions(+), 104 deletions(-) create mode 100644 swordfish-common/src/structs.rs diff --git a/swordfish-common/src/lib.rs b/swordfish-common/src/lib.rs index fd1bc2c..f1f5d4e 100644 --- a/swordfish-common/src/lib.rs +++ b/swordfish-common/src/lib.rs @@ -2,6 +2,7 @@ pub use log; pub use tracing::{debug, error, info, trace, warn}; use tracing_subscriber::{self, fmt, EnvFilter}; pub mod constants; +pub mod structs; pub mod tesseract; pub fn setup_logger(level: &str) -> Result<(), fern::InitError> { diff --git a/swordfish-common/src/structs.rs b/swordfish-common/src/structs.rs new file mode 100644 index 0000000..0abf3aa --- /dev/null +++ b/swordfish-common/src/structs.rs @@ -0,0 +1,7 @@ +#[derive(Debug)] +pub struct Card { + pub wishlist: Option, + pub name: String, + pub series: String, + pub print: i32, +} diff --git a/swordfish-common/src/tesseract.rs b/swordfish-common/src/tesseract.rs index ab23995..dac0b4a 100644 --- a/swordfish-common/src/tesseract.rs +++ b/swordfish-common/src/tesseract.rs @@ -5,6 +5,9 @@ pub fn init_tesseract(numeric_only: bool) -> Result { Ok(lep_tess) => lep_tess, Err(why) => return Err(format!("Failed to initialize Tesseract: {:?}", why)), }; + lep_tess.set_variable(Variable::TesseditPagesegMode, "6").unwrap(); + // Use LSTM only. + lep_tess.set_variable(Variable::TesseditOcrEngineMode, "1").unwrap(); if numeric_only { match lep_tess.set_variable(Variable::TesseditCharWhitelist, "0123456789") { Ok(_) => (), diff --git a/swordfish/src/katana.rs b/swordfish/src/katana.rs index 0d824af..37659d9 100644 --- a/swordfish/src/katana.rs +++ b/swordfish/src/katana.rs @@ -1,57 +1,194 @@ +use image::imageops::colorops::contrast_in_place; use image::io::Reader as ImageReader; +use image::ImageFormat; use once_cell::sync::Lazy; use regex::Regex; use serenity::model::channel::Message; -use serenity::prelude::*; use std::io::Cursor; -use std::sync::{Arc, Mutex}; -use std::thread; +use std::{env, thread}; +use swordfish_common::structs::Card; use swordfish_common::tesseract; -use swordfish_common::{debug, error, info, trace, warn}; +use swordfish_common::{trace, warn}; -static TEXT_REGEX: Lazy = Lazy::new(|| { - Regex::new(r"[A-Za-z ]").unwrap() -}); +static TEXT_NUM_REGEX: Lazy = Lazy::new(|| Regex::new(r"[A-Za-z0-9]").unwrap()); +static ALLOWED_CHARS_REGEX: Lazy = Lazy::new(|| Regex::new(r"['-: ]").unwrap()); -pub struct Card { - wishlist: Option, - name: String, - series: String, - print: i32, +fn replace_string(text: &mut String, from: &str, to: &str) -> bool { + match text.find(from) { + Some(i) => { + text.replace_range(i..i + from.len(), to); + true + } + None => false, + } } -pub fn analyze_card(card: image::DynamicImage) { +fn fix_tesseract_string(text: &mut String) { + // Remove the \n + trace!("Text: {}", text); + if text.ends_with("\n") { + text.pop(); + } + // Workaround for a bug the text + // e.g. "We Never Learn\nN" -> "We Never Learn" + trace!("Text: {}", text); + if text.ends_with("\nN") { + for _ in 0..2 { + text.pop(); + } + } + // Replace first (to prevent "byte index 13 is not a char boundary; it is inside '—' (bytes 11..14)") + while replace_string(text, "—", "-") { + trace!("Replacing '—' with '-'"); + } + // Workaround for a bug the text + trace!("Text: {}", text); + if text.starts_with("- ") || text.starts_with("-.") { + text.remove(0); + text.remove(0); + } + // Workaround IR -> Ik + // Maybe it only occurs if Ik is in the start of the string? + // e.g. "IReda" -> "Ikeda" + trace!("Text: {}", text); + replace_string(text, "IR", "Ik"); + // Workaround for "A\n" + // This is usually the corner of the card + trace!("Text: {}", text); + replace_string(text, "A\n", ""); + // Workaround for "\n." (and others in the future) + for (i, c) in text.clone().chars().enumerate() { + if c != '\n' { + continue; + } + let prev_char = match text.chars().nth(i - 1) { + Some(c) => c, + None => continue, + }; + let next_char = match text.chars().nth(i + 1) { + Some(c) => c, + None => break, + }; + let mut rm_prev: bool = false; + trace!("Prev char: {}", prev_char); + if ['-'].contains(&prev_char) { + rm_prev = true; + text.remove(i - 1); + } + trace!("Next char: {}", next_char); + if ['.'].contains(&next_char) { + if rm_prev { + text.remove(i); + } else { + text.remove(i + 1); + } + } + } + // Replace "\n" with " " + trace!("Text: {}", text); + replace_string(text, "\n", " "); + // Remove all non-alphanumeric characters + trace!("Text: {}", text); + text.retain(|c| TEXT_NUM_REGEX.is_match(&c.to_string()) || ALLOWED_CHARS_REGEX.is_match(&c.to_string())); + // Fix "mn" -> "III" + trace!("Text: {}", text); + if text.ends_with("mn") { + text.pop(); + text.pop(); + text.push_str("III"); + } + // Fix "1ll" -> "III" + trace!("Text: {}", text); + replace_string(text, "1ll", "III"); + // Replace multiple spaces with one space + trace!("Text: {}", text); + while replace_string(text, " ", " ") { + trace!("Removing multiple spaces"); + } + // Workaround if the first character is a space + trace!("Text: {}", text); + while text.starts_with(" ") { + trace!("Removing leading space"); + text.remove(0); + } + trace!("Text (final): {}", text); +} + +fn save_image_if_trace(img: &image::DynamicImage, path: &str) { + let log_lvl = match env::var("LOG_LEVEL") { + Ok(log_lvl) => log_lvl, + Err(_) => return, + }; + if log_lvl == "trace" { + match img.save(path) { + Ok(_) => { + trace!("Saved image to {}", path); + } + Err(why) => { + warn!("{}", format!("Failed to save image: {:?}", why)) + } + }; + } +} + +pub fn analyze_card(card: image::DynamicImage, count: u32) -> Card { trace!("Spawning threads for analyzing card..."); // Read the name and the series let card_clone = card.clone(); let name_thread = thread::spawn(move || { let mut leptess = tesseract::init_tesseract(false).expect("Failed to initialize Tesseract"); - let name_img = card_clone.crop_imm(22, 26, 202 - 22, 70 - 26); - name_img.save("debug/4-name.png").unwrap(); - leptess.set_image_from_mem(&name_img.as_bytes()).unwrap(); - leptess.get_utf8_text().expect("Failed to read name") + // let binding = tesseract::init_tesseract_quick(false); + // let mut leptess = binding.lock().unwrap(); + let name_img = card_clone.crop_imm(22, 26, 204 - 22, 70 - 26); + let mut buffer: Cursor> = Cursor::new(Vec::new()); + match name_img.write_to(&mut buffer, ImageFormat::Png) { + Ok(_) => {} + Err(why) => { + panic!("{}", format!("Failed to write image: {:?}", why)); + } + }; + save_image_if_trace(&name_img, format!("debug/4-{}-name.png", count).as_str()); + leptess.set_image_from_mem(&buffer.get_mut()).unwrap(); + let mut name_str = leptess.get_utf8_text().expect("Failed to read name"); + fix_tesseract_string(&mut name_str); + name_str }); let card_clone = card.clone(); let series_thread = thread::spawn(move || { let mut leptess = tesseract::init_tesseract(false).expect("Failed to initialize Tesseract"); - let series_img = card_clone.crop_imm(22, 276, 202 - 22, 330 - 276); - series_img.save("debug/4-series.png").unwrap(); - leptess.set_image_from_mem(&series_img.as_bytes()).unwrap(); - let series = leptess.get_utf8_text().unwrap(); + // let binding = tesseract::init_tesseract_quick(false); + // let mut leptess = binding.lock().unwrap(); + let series_img = card_clone.crop_imm(22, 276, 204 - 22, 330 - 276); + let mut buffer: Cursor> = Cursor::new(Vec::new()); + match series_img.write_to(&mut buffer, ImageFormat::Png) { + Ok(_) => {} + Err(why) => { + panic!("{}", format!("Failed to write image: {:?}", why)); + } + }; + save_image_if_trace(&series_img, format!("debug/4-{}-series.png", count).as_str()); + leptess.set_image_from_mem(&buffer.get_mut()).unwrap(); + let mut series_str = leptess.get_utf8_text().expect("Failed to read name"); + fix_tesseract_string(&mut series_str); + series_str }); let name = name_thread.join().unwrap(); trace!("Name: {}", name); let series = series_thread.join().unwrap(); - trace!("Series: {}", name); - // Read the print number + trace!("Series: {}", series); + // TODO: Read the print number + // TODO: Read the wishlist number (from our database) + return Card { + wishlist: None, + name, + series, + print: 0, + }; } -pub async fn analyze_drop_message( - leptess_arc: &Arc>, - message: &Message, -) -> Result<(), String> { +pub async fn analyze_drop_message(message: &Message) -> Result, String> { if message.attachments.len() < 1 { return Err("No attachments found".to_string()); }; @@ -62,49 +199,39 @@ pub async fn analyze_drop_message( Err(why) => return Err(format!("Failed to download attachment: {:?}", why)), }; // Pre-process the image - let mut img = match ImageReader::new(Cursor::new(image_bytes)).with_guessed_format() { - Ok(reader) => match reader.decode() { + let mut img = + match ImageReader::with_format(Cursor::new(image_bytes), ImageFormat::Png).decode() { Ok(img) => img, Err(why) => return Err(format!("Failed to decode image: {:?}", why)), - }, - Err(why) => return Err(format!("Failed to read image: {:?}", why)), - }; + }; trace!("Grayscaling image..."); img = img.grayscale(); - img.save("debug/1-grayscale.png").unwrap(); + save_image_if_trace(&img, "debug/1-grayscale.png"); trace!("Increasing contrast of the image..."); - img = img.adjust_contrast(1.0); - img.save("debug/2-contrast.png").unwrap(); + contrast_in_place(&mut img, 127.0); + save_image_if_trace(&img, "debug/2-contrast.png"); // Cropping cards let distance = 257 - 29 + 305 - 259; let cards_count = img.width() / distance; trace!("Cropping {} cards...", cards_count); let mut jobs: Vec<_> = Vec::new(); - for i_real in 0..cards_count { - let i = i_real.clone(); - let leptess_mutex = leptess_arc.clone(); - let img = img.clone(); + let mut cards: Vec = Vec::with_capacity(cards_count.try_into().unwrap()); + for index in 0..cards_count { + let i = index.clone(); + let x = 29 + distance * i; + let y = 34; + let width = 257 + distance * i - x; + let height = 387 - y; + trace!("Cropping card {} ({}, {}, {}, {})", i, x, y, width, height); + let card_img = img.crop_imm(x, y, width, height); + save_image_if_trace(&card_img, &format!("debug/3-cropped-{}.png", i)); let job = move || { - Ok({ - let x = 29 + distance * i; - let y = 34; - let width = 257 + distance * i - x; - let height = 387 - y; - trace!("Cropping card {} ({}, {}, {}, {})", i, x, y, width, height); - let card_img = img.crop_imm(x, y, width, height); - match card_img.save(format!("debug/3-cropped-{}.png", i)) { - Ok(_) => { - trace!("Saved cropped card {}", i); - let leptess = leptess_mutex.lock().unwrap(); - analyze_card(card_img); - } - Err(why) => return Err(format!("Failed to save image: {:?}", why)), - }; - }) + trace!("Analyzing card {}", i); + Ok((i, analyze_card(card_img, i))) }; jobs.push(job); } - let mut tasks: Vec>> = Vec::new(); + let mut tasks: Vec>> = Vec::new(); for job in jobs { let task = thread::spawn(job); tasks.push(task); @@ -112,15 +239,17 @@ pub async fn analyze_drop_message( for task in tasks { let result = task.join(); match result { - Ok(_) => (), - Err(why) => return Err(format!("Failed to crop card: {:?}", why)), + Ok(result) => { + match result { + Ok((i, card)) => { + trace!("Finished analyzing card {}", i); + cards.push(card); + } + Err(why) => return Err(format!("Failed to analyze card: {}", why)), + }; + } + Err(why) => return Err(format!("Failed to analyze card: {:?}", why)), }; } - let leptess_mutex = leptess_arc.clone(); - let mut leptess = leptess_mutex.lock().unwrap(); - match leptess.set_image_from_mem(&img.as_bytes()) { - Ok(_) => (), - Err(why) => return Err(format!("Failed to set image: {:?}", why)), - }; - Ok(()) + Ok(cards) } diff --git a/swordfish/src/main.rs b/swordfish/src/main.rs index a959ea9..bf19e41 100644 --- a/swordfish/src/main.rs +++ b/swordfish/src/main.rs @@ -1,5 +1,5 @@ use dotenvy::dotenv; -use once_cell::unsync::Lazy; + use serenity::async_trait; use serenity::framework::standard::macros::{command, group}; use serenity::framework::standard::{CommandResult, Configuration, StandardFramework}; @@ -10,27 +10,17 @@ use serenity::model::{ use serenity::prelude::*; use std::env; use std::path::Path; -use std::sync::{Arc, Mutex}; +use std::time::Instant; use swordfish_common::*; +use crate::config::Config; + mod config; mod helper; mod katana; mod template; const GITHUB_URL: &str = "https://github.com/teppyboy/swordfish"; -static mut LEPTESS_ARC: Lazy>> = Lazy::new(|| { - println!("Initializing Tesseract..."); - Arc::new(Mutex::new( - tesseract::init_tesseract(false).expect("Failed to initialize Tesseract"), - )) -}); -static mut LEPTESS_NUMERIC_ARC: Lazy>> = Lazy::new(|| { - println!("Initializing Tesseract (numeric filter)..."); - Arc::new(Mutex::new( - tesseract::init_tesseract(true).expect("Failed to initialize Tesseract (numeric filter)"), - )) -}); #[group] #[commands(ping, kdropanalyze)] @@ -55,24 +45,22 @@ impl EventHandler for Handler { } } -async fn parse_katana(ctx: &Context, msg: &Message) -> Result<(), String> { +async fn parse_katana(_ctx: &Context, msg: &Message) -> Result<(), String> { if msg.content.contains("is dropping 3 cards!") || msg .content .contains("I'm dropping 3 cards since this server is currently active!") { - trace!("Card drop detected, executing drop analyzer..."); - unsafe { - match katana::analyze_drop_message(&LEPTESS_ARC, msg).await { - Ok(_) => { - // msg.reply(ctx, "Drop analysis complete").await?; - } - Err(why) => { - trace!("Failed to analyze drop: `{:?}`", why); - // helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why)).await; - } - }; - } + // trace!("Card drop detected, executing drop analyzer..."); + // match katana::analyze_drop_message(&LEPTESS_ARC, msg).await { + // Ok(_) => { + // // msg.reply(ctx, "Drop analysis complete").await?; + // } + // Err(why) => { + // trace!("Failed to analyze drop: `{:?}`", why); + // // helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why)).await; + // } + // }; } Ok(()) } @@ -81,14 +69,15 @@ async fn parse_katana(ctx: &Context, msg: &Message) -> Result<(), String> { async fn main() { dotenv().unwrap(); let token = env::var("DISCORD_TOKEN").expect("Token not found"); - let config: config::Config; + let config: Config; if Path::new("./config.toml").exists() { config = config::Config::load("./config.toml"); } else { config = config::Config::new(); config.save("./config.toml"); } - let log_level = env::var("LOG_LEVEL").unwrap_or(config.log.level); + let level_str = config.log.level; + let log_level = env::var("LOG_LEVEL").unwrap_or(level_str); setup_logger(&log_level).expect("Failed to setup logger"); info!("Swordfish v{} - {}", env!("CARGO_PKG_VERSION"), GITHUB_URL); info!("Log level: {}", log_level); @@ -174,16 +163,28 @@ async fn kdropanalyze(ctx: &Context, msg: &Message) -> CommandResult { return Ok(()); } }; - unsafe { - match katana::analyze_drop_message(&LEPTESS_ARC, &target_msg).await { - Ok(_) => { - msg.reply(ctx, "Drop analysis complete").await?; + let start = Instant::now(); + match katana::analyze_drop_message(&target_msg).await { + Ok(cards) => { + let duration = start.elapsed(); + let mut reply_str = String::new(); + for card in cards { + // reply_str.push_str(&format!("{:?}\n", card)); + reply_str.push_str( + format!( + ":heart: `{:?}` • `{}` • **{}** • {}\n", + card.wishlist, card.print, card.name, card.series + ) + .as_str(), + ) } - Err(why) => { - helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why)) - .await; - } - }; - } + reply_str.push_str(&format!("Time taken (to analyze): `{:?}`", duration)); + msg.reply(ctx, reply_str).await?; + } + Err(why) => { + helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why)) + .await; + } + }; Ok(()) }