feat(katana): proper name & series reading

This commit is contained in:
tretrauit 2024-01-05 00:40:57 +07:00
parent 65911d3706
commit d1cee333ed
5 changed files with 245 additions and 104 deletions

View File

@ -2,6 +2,7 @@ pub use log;
pub use tracing::{debug, error, info, trace, warn}; pub use tracing::{debug, error, info, trace, warn};
use tracing_subscriber::{self, fmt, EnvFilter}; use tracing_subscriber::{self, fmt, EnvFilter};
pub mod constants; pub mod constants;
pub mod structs;
pub mod tesseract; pub mod tesseract;
pub fn setup_logger(level: &str) -> Result<(), fern::InitError> { pub fn setup_logger(level: &str) -> Result<(), fern::InitError> {

View File

@ -0,0 +1,7 @@
#[derive(Debug)]
pub struct Card {
pub wishlist: Option<i32>,
pub name: String,
pub series: String,
pub print: i32,
}

View File

@ -5,6 +5,9 @@ pub fn init_tesseract(numeric_only: bool) -> Result<LepTess, String> {
Ok(lep_tess) => lep_tess, Ok(lep_tess) => lep_tess,
Err(why) => return Err(format!("Failed to initialize Tesseract: {:?}", why)), Err(why) => return Err(format!("Failed to initialize Tesseract: {:?}", why)),
}; };
lep_tess.set_variable(Variable::TesseditPagesegMode, "6").unwrap();
// Use LSTM only.
lep_tess.set_variable(Variable::TesseditOcrEngineMode, "1").unwrap();
if numeric_only { if numeric_only {
match lep_tess.set_variable(Variable::TesseditCharWhitelist, "0123456789") { match lep_tess.set_variable(Variable::TesseditCharWhitelist, "0123456789") {
Ok(_) => (), Ok(_) => (),

View File

@ -1,57 +1,194 @@
use image::imageops::colorops::contrast_in_place;
use image::io::Reader as ImageReader; use image::io::Reader as ImageReader;
use image::ImageFormat;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex; use regex::Regex;
use serenity::model::channel::Message; use serenity::model::channel::Message;
use serenity::prelude::*;
use std::io::Cursor; use std::io::Cursor;
use std::sync::{Arc, Mutex}; use std::{env, thread};
use std::thread; use swordfish_common::structs::Card;
use swordfish_common::tesseract; use swordfish_common::tesseract;
use swordfish_common::{debug, error, info, trace, warn}; use swordfish_common::{trace, warn};
static TEXT_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"[A-Za-z ]").unwrap()
});
static TEXT_NUM_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[A-Za-z0-9]").unwrap());
static ALLOWED_CHARS_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"['-: ]").unwrap());
pub struct Card { fn replace_string(text: &mut String, from: &str, to: &str) -> bool {
wishlist: Option<i32>, match text.find(from) {
name: String, Some(i) => {
series: String, text.replace_range(i..i + from.len(), to);
print: i32, true
}
None => false,
}
} }
pub fn analyze_card(card: image::DynamicImage) { fn fix_tesseract_string(text: &mut String) {
// Remove the \n
trace!("Text: {}", text);
if text.ends_with("\n") {
text.pop();
}
// Workaround for a bug the text
// e.g. "We Never Learn\nN" -> "We Never Learn"
trace!("Text: {}", text);
if text.ends_with("\nN") {
for _ in 0..2 {
text.pop();
}
}
// Replace first (to prevent "byte index 13 is not a char boundary; it is inside '—' (bytes 11..14)")
while replace_string(text, "", "-") {
trace!("Replacing '—' with '-'");
}
// Workaround for a bug the text
trace!("Text: {}", text);
if text.starts_with("- ") || text.starts_with("-.") {
text.remove(0);
text.remove(0);
}
// Workaround IR -> Ik
// Maybe it only occurs if Ik is in the start of the string?
// e.g. "IReda" -> "Ikeda"
trace!("Text: {}", text);
replace_string(text, "IR", "Ik");
// Workaround for "A\n"
// This is usually the corner of the card
trace!("Text: {}", text);
replace_string(text, "A\n", "");
// Workaround for "\n." (and others in the future)
for (i, c) in text.clone().chars().enumerate() {
if c != '\n' {
continue;
}
let prev_char = match text.chars().nth(i - 1) {
Some(c) => c,
None => continue,
};
let next_char = match text.chars().nth(i + 1) {
Some(c) => c,
None => break,
};
let mut rm_prev: bool = false;
trace!("Prev char: {}", prev_char);
if ['-'].contains(&prev_char) {
rm_prev = true;
text.remove(i - 1);
}
trace!("Next char: {}", next_char);
if ['.'].contains(&next_char) {
if rm_prev {
text.remove(i);
} else {
text.remove(i + 1);
}
}
}
// Replace "\n" with " "
trace!("Text: {}", text);
replace_string(text, "\n", " ");
// Remove all non-alphanumeric characters
trace!("Text: {}", text);
text.retain(|c| TEXT_NUM_REGEX.is_match(&c.to_string()) || ALLOWED_CHARS_REGEX.is_match(&c.to_string()));
// Fix "mn" -> "III"
trace!("Text: {}", text);
if text.ends_with("mn") {
text.pop();
text.pop();
text.push_str("III");
}
// Fix "1ll" -> "III"
trace!("Text: {}", text);
replace_string(text, "1ll", "III");
// Replace multiple spaces with one space
trace!("Text: {}", text);
while replace_string(text, " ", " ") {
trace!("Removing multiple spaces");
}
// Workaround if the first character is a space
trace!("Text: {}", text);
while text.starts_with(" ") {
trace!("Removing leading space");
text.remove(0);
}
trace!("Text (final): {}", text);
}
fn save_image_if_trace(img: &image::DynamicImage, path: &str) {
let log_lvl = match env::var("LOG_LEVEL") {
Ok(log_lvl) => log_lvl,
Err(_) => return,
};
if log_lvl == "trace" {
match img.save(path) {
Ok(_) => {
trace!("Saved image to {}", path);
}
Err(why) => {
warn!("{}", format!("Failed to save image: {:?}", why))
}
};
}
}
pub fn analyze_card(card: image::DynamicImage, count: u32) -> Card {
trace!("Spawning threads for analyzing card..."); trace!("Spawning threads for analyzing card...");
// Read the name and the series // Read the name and the series
let card_clone = card.clone(); let card_clone = card.clone();
let name_thread = thread::spawn(move || { let name_thread = thread::spawn(move || {
let mut leptess = tesseract::init_tesseract(false).expect("Failed to initialize Tesseract"); let mut leptess = tesseract::init_tesseract(false).expect("Failed to initialize Tesseract");
let name_img = card_clone.crop_imm(22, 26, 202 - 22, 70 - 26); // let binding = tesseract::init_tesseract_quick(false);
name_img.save("debug/4-name.png").unwrap(); // let mut leptess = binding.lock().unwrap();
leptess.set_image_from_mem(&name_img.as_bytes()).unwrap(); let name_img = card_clone.crop_imm(22, 26, 204 - 22, 70 - 26);
leptess.get_utf8_text().expect("Failed to read name") let mut buffer: Cursor<Vec<u8>> = Cursor::new(Vec::new());
match name_img.write_to(&mut buffer, ImageFormat::Png) {
Ok(_) => {}
Err(why) => {
panic!("{}", format!("Failed to write image: {:?}", why));
}
};
save_image_if_trace(&name_img, format!("debug/4-{}-name.png", count).as_str());
leptess.set_image_from_mem(&buffer.get_mut()).unwrap();
let mut name_str = leptess.get_utf8_text().expect("Failed to read name");
fix_tesseract_string(&mut name_str);
name_str
}); });
let card_clone = card.clone(); let card_clone = card.clone();
let series_thread = thread::spawn(move || { let series_thread = thread::spawn(move || {
let mut leptess = tesseract::init_tesseract(false).expect("Failed to initialize Tesseract"); let mut leptess = tesseract::init_tesseract(false).expect("Failed to initialize Tesseract");
let series_img = card_clone.crop_imm(22, 276, 202 - 22, 330 - 276); // let binding = tesseract::init_tesseract_quick(false);
series_img.save("debug/4-series.png").unwrap(); // let mut leptess = binding.lock().unwrap();
leptess.set_image_from_mem(&series_img.as_bytes()).unwrap(); let series_img = card_clone.crop_imm(22, 276, 204 - 22, 330 - 276);
let series = leptess.get_utf8_text().unwrap(); let mut buffer: Cursor<Vec<u8>> = Cursor::new(Vec::new());
match series_img.write_to(&mut buffer, ImageFormat::Png) {
Ok(_) => {}
Err(why) => {
panic!("{}", format!("Failed to write image: {:?}", why));
}
};
save_image_if_trace(&series_img, format!("debug/4-{}-series.png", count).as_str());
leptess.set_image_from_mem(&buffer.get_mut()).unwrap();
let mut series_str = leptess.get_utf8_text().expect("Failed to read name");
fix_tesseract_string(&mut series_str);
series_str
}); });
let name = name_thread.join().unwrap(); let name = name_thread.join().unwrap();
trace!("Name: {}", name); trace!("Name: {}", name);
let series = series_thread.join().unwrap(); let series = series_thread.join().unwrap();
trace!("Series: {}", name); trace!("Series: {}", series);
// Read the print number // TODO: Read the print number
// TODO: Read the wishlist number (from our database)
return Card {
wishlist: None,
name,
series,
print: 0,
};
} }
pub async fn analyze_drop_message( pub async fn analyze_drop_message(message: &Message) -> Result<Vec<Card>, String> {
leptess_arc: &Arc<Mutex<tesseract::LepTess>>,
message: &Message,
) -> Result<(), String> {
if message.attachments.len() < 1 { if message.attachments.len() < 1 {
return Err("No attachments found".to_string()); return Err("No attachments found".to_string());
}; };
@ -62,49 +199,39 @@ pub async fn analyze_drop_message(
Err(why) => return Err(format!("Failed to download attachment: {:?}", why)), Err(why) => return Err(format!("Failed to download attachment: {:?}", why)),
}; };
// Pre-process the image // Pre-process the image
let mut img = match ImageReader::new(Cursor::new(image_bytes)).with_guessed_format() { let mut img =
Ok(reader) => match reader.decode() { match ImageReader::with_format(Cursor::new(image_bytes), ImageFormat::Png).decode() {
Ok(img) => img, Ok(img) => img,
Err(why) => return Err(format!("Failed to decode image: {:?}", why)), Err(why) => return Err(format!("Failed to decode image: {:?}", why)),
}, };
Err(why) => return Err(format!("Failed to read image: {:?}", why)),
};
trace!("Grayscaling image..."); trace!("Grayscaling image...");
img = img.grayscale(); img = img.grayscale();
img.save("debug/1-grayscale.png").unwrap(); save_image_if_trace(&img, "debug/1-grayscale.png");
trace!("Increasing contrast of the image..."); trace!("Increasing contrast of the image...");
img = img.adjust_contrast(1.0); contrast_in_place(&mut img, 127.0);
img.save("debug/2-contrast.png").unwrap(); save_image_if_trace(&img, "debug/2-contrast.png");
// Cropping cards // Cropping cards
let distance = 257 - 29 + 305 - 259; let distance = 257 - 29 + 305 - 259;
let cards_count = img.width() / distance; let cards_count = img.width() / distance;
trace!("Cropping {} cards...", cards_count); trace!("Cropping {} cards...", cards_count);
let mut jobs: Vec<_> = Vec::new(); let mut jobs: Vec<_> = Vec::new();
for i_real in 0..cards_count { let mut cards: Vec<Card> = Vec::with_capacity(cards_count.try_into().unwrap());
let i = i_real.clone(); for index in 0..cards_count {
let leptess_mutex = leptess_arc.clone(); let i = index.clone();
let img = img.clone(); let x = 29 + distance * i;
let y = 34;
let width = 257 + distance * i - x;
let height = 387 - y;
trace!("Cropping card {} ({}, {}, {}, {})", i, x, y, width, height);
let card_img = img.crop_imm(x, y, width, height);
save_image_if_trace(&card_img, &format!("debug/3-cropped-{}.png", i));
let job = move || { let job = move || {
Ok({ trace!("Analyzing card {}", i);
let x = 29 + distance * i; Ok((i, analyze_card(card_img, i)))
let y = 34;
let width = 257 + distance * i - x;
let height = 387 - y;
trace!("Cropping card {} ({}, {}, {}, {})", i, x, y, width, height);
let card_img = img.crop_imm(x, y, width, height);
match card_img.save(format!("debug/3-cropped-{}.png", i)) {
Ok(_) => {
trace!("Saved cropped card {}", i);
let leptess = leptess_mutex.lock().unwrap();
analyze_card(card_img);
}
Err(why) => return Err(format!("Failed to save image: {:?}", why)),
};
})
}; };
jobs.push(job); jobs.push(job);
} }
let mut tasks: Vec<thread::JoinHandle<Result<(), String>>> = Vec::new(); let mut tasks: Vec<thread::JoinHandle<Result<(u32, Card), String>>> = Vec::new();
for job in jobs { for job in jobs {
let task = thread::spawn(job); let task = thread::spawn(job);
tasks.push(task); tasks.push(task);
@ -112,15 +239,17 @@ pub async fn analyze_drop_message(
for task in tasks { for task in tasks {
let result = task.join(); let result = task.join();
match result { match result {
Ok(_) => (), Ok(result) => {
Err(why) => return Err(format!("Failed to crop card: {:?}", why)), match result {
Ok((i, card)) => {
trace!("Finished analyzing card {}", i);
cards.push(card);
}
Err(why) => return Err(format!("Failed to analyze card: {}", why)),
};
}
Err(why) => return Err(format!("Failed to analyze card: {:?}", why)),
}; };
} }
let leptess_mutex = leptess_arc.clone(); Ok(cards)
let mut leptess = leptess_mutex.lock().unwrap();
match leptess.set_image_from_mem(&img.as_bytes()) {
Ok(_) => (),
Err(why) => return Err(format!("Failed to set image: {:?}", why)),
};
Ok(())
} }

View File

@ -1,5 +1,5 @@
use dotenvy::dotenv; use dotenvy::dotenv;
use once_cell::unsync::Lazy;
use serenity::async_trait; use serenity::async_trait;
use serenity::framework::standard::macros::{command, group}; use serenity::framework::standard::macros::{command, group};
use serenity::framework::standard::{CommandResult, Configuration, StandardFramework}; use serenity::framework::standard::{CommandResult, Configuration, StandardFramework};
@ -10,27 +10,17 @@ use serenity::model::{
use serenity::prelude::*; use serenity::prelude::*;
use std::env; use std::env;
use std::path::Path; use std::path::Path;
use std::sync::{Arc, Mutex}; use std::time::Instant;
use swordfish_common::*; use swordfish_common::*;
use crate::config::Config;
mod config; mod config;
mod helper; mod helper;
mod katana; mod katana;
mod template; mod template;
const GITHUB_URL: &str = "https://github.com/teppyboy/swordfish"; const GITHUB_URL: &str = "https://github.com/teppyboy/swordfish";
static mut LEPTESS_ARC: Lazy<Arc<Mutex<tesseract::LepTess>>> = Lazy::new(|| {
println!("Initializing Tesseract...");
Arc::new(Mutex::new(
tesseract::init_tesseract(false).expect("Failed to initialize Tesseract"),
))
});
static mut LEPTESS_NUMERIC_ARC: Lazy<Arc<Mutex<tesseract::LepTess>>> = Lazy::new(|| {
println!("Initializing Tesseract (numeric filter)...");
Arc::new(Mutex::new(
tesseract::init_tesseract(true).expect("Failed to initialize Tesseract (numeric filter)"),
))
});
#[group] #[group]
#[commands(ping, kdropanalyze)] #[commands(ping, kdropanalyze)]
@ -55,24 +45,22 @@ impl EventHandler for Handler {
} }
} }
async fn parse_katana(ctx: &Context, msg: &Message) -> Result<(), String> { async fn parse_katana(_ctx: &Context, msg: &Message) -> Result<(), String> {
if msg.content.contains("is dropping 3 cards!") if msg.content.contains("is dropping 3 cards!")
|| msg || msg
.content .content
.contains("I'm dropping 3 cards since this server is currently active!") .contains("I'm dropping 3 cards since this server is currently active!")
{ {
trace!("Card drop detected, executing drop analyzer..."); // trace!("Card drop detected, executing drop analyzer...");
unsafe { // match katana::analyze_drop_message(&LEPTESS_ARC, msg).await {
match katana::analyze_drop_message(&LEPTESS_ARC, msg).await { // Ok(_) => {
Ok(_) => { // // msg.reply(ctx, "Drop analysis complete").await?;
// msg.reply(ctx, "Drop analysis complete").await?; // }
} // Err(why) => {
Err(why) => { // trace!("Failed to analyze drop: `{:?}`", why);
trace!("Failed to analyze drop: `{:?}`", why); // // helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why)).await;
// helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why)).await; // }
} // };
};
}
} }
Ok(()) Ok(())
} }
@ -81,14 +69,15 @@ async fn parse_katana(ctx: &Context, msg: &Message) -> Result<(), String> {
async fn main() { async fn main() {
dotenv().unwrap(); dotenv().unwrap();
let token = env::var("DISCORD_TOKEN").expect("Token not found"); let token = env::var("DISCORD_TOKEN").expect("Token not found");
let config: config::Config; let config: Config;
if Path::new("./config.toml").exists() { if Path::new("./config.toml").exists() {
config = config::Config::load("./config.toml"); config = config::Config::load("./config.toml");
} else { } else {
config = config::Config::new(); config = config::Config::new();
config.save("./config.toml"); config.save("./config.toml");
} }
let log_level = env::var("LOG_LEVEL").unwrap_or(config.log.level); let level_str = config.log.level;
let log_level = env::var("LOG_LEVEL").unwrap_or(level_str);
setup_logger(&log_level).expect("Failed to setup logger"); setup_logger(&log_level).expect("Failed to setup logger");
info!("Swordfish v{} - {}", env!("CARGO_PKG_VERSION"), GITHUB_URL); info!("Swordfish v{} - {}", env!("CARGO_PKG_VERSION"), GITHUB_URL);
info!("Log level: {}", log_level); info!("Log level: {}", log_level);
@ -174,16 +163,28 @@ async fn kdropanalyze(ctx: &Context, msg: &Message) -> CommandResult {
return Ok(()); return Ok(());
} }
}; };
unsafe { let start = Instant::now();
match katana::analyze_drop_message(&LEPTESS_ARC, &target_msg).await { match katana::analyze_drop_message(&target_msg).await {
Ok(_) => { Ok(cards) => {
msg.reply(ctx, "Drop analysis complete").await?; let duration = start.elapsed();
let mut reply_str = String::new();
for card in cards {
// reply_str.push_str(&format!("{:?}\n", card));
reply_str.push_str(
format!(
":heart: `{:?}` • `{}` • **{}** • {}\n",
card.wishlist, card.print, card.name, card.series
)
.as_str(),
)
} }
Err(why) => { reply_str.push_str(&format!("Time taken (to analyze): `{:?}`", duration));
helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why)) msg.reply(ctx, reply_str).await?;
.await; }
} Err(why) => {
}; helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why))
} .await;
}
};
Ok(()) Ok(())
} }