feat(katana): proper name & series reading

This commit is contained in:
tretrauit 2024-01-05 00:40:57 +07:00
parent 65911d3706
commit d1cee333ed
5 changed files with 245 additions and 104 deletions

View File

@ -2,6 +2,7 @@ pub use log;
pub use tracing::{debug, error, info, trace, warn};
use tracing_subscriber::{self, fmt, EnvFilter};
pub mod constants;
pub mod structs;
pub mod tesseract;
pub fn setup_logger(level: &str) -> Result<(), fern::InitError> {

View File

@ -0,0 +1,7 @@
#[derive(Debug)]
pub struct Card {
pub wishlist: Option<i32>,
pub name: String,
pub series: String,
pub print: i32,
}

View File

@ -5,6 +5,9 @@ pub fn init_tesseract(numeric_only: bool) -> Result<LepTess, String> {
Ok(lep_tess) => lep_tess,
Err(why) => return Err(format!("Failed to initialize Tesseract: {:?}", why)),
};
lep_tess.set_variable(Variable::TesseditPagesegMode, "6").unwrap();
// Use LSTM only.
lep_tess.set_variable(Variable::TesseditOcrEngineMode, "1").unwrap();
if numeric_only {
match lep_tess.set_variable(Variable::TesseditCharWhitelist, "0123456789") {
Ok(_) => (),

View File

@ -1,57 +1,194 @@
use image::imageops::colorops::contrast_in_place;
use image::io::Reader as ImageReader;
use image::ImageFormat;
use once_cell::sync::Lazy;
use regex::Regex;
use serenity::model::channel::Message;
use serenity::prelude::*;
use std::io::Cursor;
use std::sync::{Arc, Mutex};
use std::thread;
use std::{env, thread};
use swordfish_common::structs::Card;
use swordfish_common::tesseract;
use swordfish_common::{debug, error, info, trace, warn};
use swordfish_common::{trace, warn};
static TEXT_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"[A-Za-z ]").unwrap()
});
static TEXT_NUM_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[A-Za-z0-9]").unwrap());
static ALLOWED_CHARS_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"['-: ]").unwrap());
pub struct Card {
wishlist: Option<i32>,
name: String,
series: String,
print: i32,
fn replace_string(text: &mut String, from: &str, to: &str) -> bool {
match text.find(from) {
Some(i) => {
text.replace_range(i..i + from.len(), to);
true
}
None => false,
}
}
pub fn analyze_card(card: image::DynamicImage) {
fn fix_tesseract_string(text: &mut String) {
// Remove the \n
trace!("Text: {}", text);
if text.ends_with("\n") {
text.pop();
}
// Workaround for a bug the text
// e.g. "We Never Learn\nN" -> "We Never Learn"
trace!("Text: {}", text);
if text.ends_with("\nN") {
for _ in 0..2 {
text.pop();
}
}
// Replace first (to prevent "byte index 13 is not a char boundary; it is inside '—' (bytes 11..14)")
while replace_string(text, "", "-") {
trace!("Replacing '—' with '-'");
}
// Workaround for a bug the text
trace!("Text: {}", text);
if text.starts_with("- ") || text.starts_with("-.") {
text.remove(0);
text.remove(0);
}
// Workaround IR -> Ik
// Maybe it only occurs if Ik is in the start of the string?
// e.g. "IReda" -> "Ikeda"
trace!("Text: {}", text);
replace_string(text, "IR", "Ik");
// Workaround for "A\n"
// This is usually the corner of the card
trace!("Text: {}", text);
replace_string(text, "A\n", "");
// Workaround for "\n." (and others in the future)
for (i, c) in text.clone().chars().enumerate() {
if c != '\n' {
continue;
}
let prev_char = match text.chars().nth(i - 1) {
Some(c) => c,
None => continue,
};
let next_char = match text.chars().nth(i + 1) {
Some(c) => c,
None => break,
};
let mut rm_prev: bool = false;
trace!("Prev char: {}", prev_char);
if ['-'].contains(&prev_char) {
rm_prev = true;
text.remove(i - 1);
}
trace!("Next char: {}", next_char);
if ['.'].contains(&next_char) {
if rm_prev {
text.remove(i);
} else {
text.remove(i + 1);
}
}
}
// Replace "\n" with " "
trace!("Text: {}", text);
replace_string(text, "\n", " ");
// Remove all non-alphanumeric characters
trace!("Text: {}", text);
text.retain(|c| TEXT_NUM_REGEX.is_match(&c.to_string()) || ALLOWED_CHARS_REGEX.is_match(&c.to_string()));
// Fix "mn" -> "III"
trace!("Text: {}", text);
if text.ends_with("mn") {
text.pop();
text.pop();
text.push_str("III");
}
// Fix "1ll" -> "III"
trace!("Text: {}", text);
replace_string(text, "1ll", "III");
// Replace multiple spaces with one space
trace!("Text: {}", text);
while replace_string(text, " ", " ") {
trace!("Removing multiple spaces");
}
// Workaround if the first character is a space
trace!("Text: {}", text);
while text.starts_with(" ") {
trace!("Removing leading space");
text.remove(0);
}
trace!("Text (final): {}", text);
}
fn save_image_if_trace(img: &image::DynamicImage, path: &str) {
let log_lvl = match env::var("LOG_LEVEL") {
Ok(log_lvl) => log_lvl,
Err(_) => return,
};
if log_lvl == "trace" {
match img.save(path) {
Ok(_) => {
trace!("Saved image to {}", path);
}
Err(why) => {
warn!("{}", format!("Failed to save image: {:?}", why))
}
};
}
}
pub fn analyze_card(card: image::DynamicImage, count: u32) -> Card {
trace!("Spawning threads for analyzing card...");
// Read the name and the series
let card_clone = card.clone();
let name_thread = thread::spawn(move || {
let mut leptess = tesseract::init_tesseract(false).expect("Failed to initialize Tesseract");
let name_img = card_clone.crop_imm(22, 26, 202 - 22, 70 - 26);
name_img.save("debug/4-name.png").unwrap();
leptess.set_image_from_mem(&name_img.as_bytes()).unwrap();
leptess.get_utf8_text().expect("Failed to read name")
// let binding = tesseract::init_tesseract_quick(false);
// let mut leptess = binding.lock().unwrap();
let name_img = card_clone.crop_imm(22, 26, 204 - 22, 70 - 26);
let mut buffer: Cursor<Vec<u8>> = Cursor::new(Vec::new());
match name_img.write_to(&mut buffer, ImageFormat::Png) {
Ok(_) => {}
Err(why) => {
panic!("{}", format!("Failed to write image: {:?}", why));
}
};
save_image_if_trace(&name_img, format!("debug/4-{}-name.png", count).as_str());
leptess.set_image_from_mem(&buffer.get_mut()).unwrap();
let mut name_str = leptess.get_utf8_text().expect("Failed to read name");
fix_tesseract_string(&mut name_str);
name_str
});
let card_clone = card.clone();
let series_thread = thread::spawn(move || {
let mut leptess = tesseract::init_tesseract(false).expect("Failed to initialize Tesseract");
let series_img = card_clone.crop_imm(22, 276, 202 - 22, 330 - 276);
series_img.save("debug/4-series.png").unwrap();
leptess.set_image_from_mem(&series_img.as_bytes()).unwrap();
let series = leptess.get_utf8_text().unwrap();
// let binding = tesseract::init_tesseract_quick(false);
// let mut leptess = binding.lock().unwrap();
let series_img = card_clone.crop_imm(22, 276, 204 - 22, 330 - 276);
let mut buffer: Cursor<Vec<u8>> = Cursor::new(Vec::new());
match series_img.write_to(&mut buffer, ImageFormat::Png) {
Ok(_) => {}
Err(why) => {
panic!("{}", format!("Failed to write image: {:?}", why));
}
};
save_image_if_trace(&series_img, format!("debug/4-{}-series.png", count).as_str());
leptess.set_image_from_mem(&buffer.get_mut()).unwrap();
let mut series_str = leptess.get_utf8_text().expect("Failed to read name");
fix_tesseract_string(&mut series_str);
series_str
});
let name = name_thread.join().unwrap();
trace!("Name: {}", name);
let series = series_thread.join().unwrap();
trace!("Series: {}", name);
// Read the print number
trace!("Series: {}", series);
// TODO: Read the print number
// TODO: Read the wishlist number (from our database)
return Card {
wishlist: None,
name,
series,
print: 0,
};
}
pub async fn analyze_drop_message(
leptess_arc: &Arc<Mutex<tesseract::LepTess>>,
message: &Message,
) -> Result<(), String> {
pub async fn analyze_drop_message(message: &Message) -> Result<Vec<Card>, String> {
if message.attachments.len() < 1 {
return Err("No attachments found".to_string());
};
@ -62,49 +199,39 @@ pub async fn analyze_drop_message(
Err(why) => return Err(format!("Failed to download attachment: {:?}", why)),
};
// Pre-process the image
let mut img = match ImageReader::new(Cursor::new(image_bytes)).with_guessed_format() {
Ok(reader) => match reader.decode() {
let mut img =
match ImageReader::with_format(Cursor::new(image_bytes), ImageFormat::Png).decode() {
Ok(img) => img,
Err(why) => return Err(format!("Failed to decode image: {:?}", why)),
},
Err(why) => return Err(format!("Failed to read image: {:?}", why)),
};
};
trace!("Grayscaling image...");
img = img.grayscale();
img.save("debug/1-grayscale.png").unwrap();
save_image_if_trace(&img, "debug/1-grayscale.png");
trace!("Increasing contrast of the image...");
img = img.adjust_contrast(1.0);
img.save("debug/2-contrast.png").unwrap();
contrast_in_place(&mut img, 127.0);
save_image_if_trace(&img, "debug/2-contrast.png");
// Cropping cards
let distance = 257 - 29 + 305 - 259;
let cards_count = img.width() / distance;
trace!("Cropping {} cards...", cards_count);
let mut jobs: Vec<_> = Vec::new();
for i_real in 0..cards_count {
let i = i_real.clone();
let leptess_mutex = leptess_arc.clone();
let img = img.clone();
let mut cards: Vec<Card> = Vec::with_capacity(cards_count.try_into().unwrap());
for index in 0..cards_count {
let i = index.clone();
let x = 29 + distance * i;
let y = 34;
let width = 257 + distance * i - x;
let height = 387 - y;
trace!("Cropping card {} ({}, {}, {}, {})", i, x, y, width, height);
let card_img = img.crop_imm(x, y, width, height);
save_image_if_trace(&card_img, &format!("debug/3-cropped-{}.png", i));
let job = move || {
Ok({
let x = 29 + distance * i;
let y = 34;
let width = 257 + distance * i - x;
let height = 387 - y;
trace!("Cropping card {} ({}, {}, {}, {})", i, x, y, width, height);
let card_img = img.crop_imm(x, y, width, height);
match card_img.save(format!("debug/3-cropped-{}.png", i)) {
Ok(_) => {
trace!("Saved cropped card {}", i);
let leptess = leptess_mutex.lock().unwrap();
analyze_card(card_img);
}
Err(why) => return Err(format!("Failed to save image: {:?}", why)),
};
})
trace!("Analyzing card {}", i);
Ok((i, analyze_card(card_img, i)))
};
jobs.push(job);
}
let mut tasks: Vec<thread::JoinHandle<Result<(), String>>> = Vec::new();
let mut tasks: Vec<thread::JoinHandle<Result<(u32, Card), String>>> = Vec::new();
for job in jobs {
let task = thread::spawn(job);
tasks.push(task);
@ -112,15 +239,17 @@ pub async fn analyze_drop_message(
for task in tasks {
let result = task.join();
match result {
Ok(_) => (),
Err(why) => return Err(format!("Failed to crop card: {:?}", why)),
Ok(result) => {
match result {
Ok((i, card)) => {
trace!("Finished analyzing card {}", i);
cards.push(card);
}
Err(why) => return Err(format!("Failed to analyze card: {}", why)),
};
}
Err(why) => return Err(format!("Failed to analyze card: {:?}", why)),
};
}
let leptess_mutex = leptess_arc.clone();
let mut leptess = leptess_mutex.lock().unwrap();
match leptess.set_image_from_mem(&img.as_bytes()) {
Ok(_) => (),
Err(why) => return Err(format!("Failed to set image: {:?}", why)),
};
Ok(())
Ok(cards)
}

View File

@ -1,5 +1,5 @@
use dotenvy::dotenv;
use once_cell::unsync::Lazy;
use serenity::async_trait;
use serenity::framework::standard::macros::{command, group};
use serenity::framework::standard::{CommandResult, Configuration, StandardFramework};
@ -10,27 +10,17 @@ use serenity::model::{
use serenity::prelude::*;
use std::env;
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::time::Instant;
use swordfish_common::*;
use crate::config::Config;
mod config;
mod helper;
mod katana;
mod template;
const GITHUB_URL: &str = "https://github.com/teppyboy/swordfish";
static mut LEPTESS_ARC: Lazy<Arc<Mutex<tesseract::LepTess>>> = Lazy::new(|| {
println!("Initializing Tesseract...");
Arc::new(Mutex::new(
tesseract::init_tesseract(false).expect("Failed to initialize Tesseract"),
))
});
static mut LEPTESS_NUMERIC_ARC: Lazy<Arc<Mutex<tesseract::LepTess>>> = Lazy::new(|| {
println!("Initializing Tesseract (numeric filter)...");
Arc::new(Mutex::new(
tesseract::init_tesseract(true).expect("Failed to initialize Tesseract (numeric filter)"),
))
});
#[group]
#[commands(ping, kdropanalyze)]
@ -55,24 +45,22 @@ impl EventHandler for Handler {
}
}
async fn parse_katana(ctx: &Context, msg: &Message) -> Result<(), String> {
async fn parse_katana(_ctx: &Context, msg: &Message) -> Result<(), String> {
if msg.content.contains("is dropping 3 cards!")
|| msg
.content
.contains("I'm dropping 3 cards since this server is currently active!")
{
trace!("Card drop detected, executing drop analyzer...");
unsafe {
match katana::analyze_drop_message(&LEPTESS_ARC, msg).await {
Ok(_) => {
// msg.reply(ctx, "Drop analysis complete").await?;
}
Err(why) => {
trace!("Failed to analyze drop: `{:?}`", why);
// helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why)).await;
}
};
}
// trace!("Card drop detected, executing drop analyzer...");
// match katana::analyze_drop_message(&LEPTESS_ARC, msg).await {
// Ok(_) => {
// // msg.reply(ctx, "Drop analysis complete").await?;
// }
// Err(why) => {
// trace!("Failed to analyze drop: `{:?}`", why);
// // helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why)).await;
// }
// };
}
Ok(())
}
@ -81,14 +69,15 @@ async fn parse_katana(ctx: &Context, msg: &Message) -> Result<(), String> {
async fn main() {
dotenv().unwrap();
let token = env::var("DISCORD_TOKEN").expect("Token not found");
let config: config::Config;
let config: Config;
if Path::new("./config.toml").exists() {
config = config::Config::load("./config.toml");
} else {
config = config::Config::new();
config.save("./config.toml");
}
let log_level = env::var("LOG_LEVEL").unwrap_or(config.log.level);
let level_str = config.log.level;
let log_level = env::var("LOG_LEVEL").unwrap_or(level_str);
setup_logger(&log_level).expect("Failed to setup logger");
info!("Swordfish v{} - {}", env!("CARGO_PKG_VERSION"), GITHUB_URL);
info!("Log level: {}", log_level);
@ -174,16 +163,28 @@ async fn kdropanalyze(ctx: &Context, msg: &Message) -> CommandResult {
return Ok(());
}
};
unsafe {
match katana::analyze_drop_message(&LEPTESS_ARC, &target_msg).await {
Ok(_) => {
msg.reply(ctx, "Drop analysis complete").await?;
let start = Instant::now();
match katana::analyze_drop_message(&target_msg).await {
Ok(cards) => {
let duration = start.elapsed();
let mut reply_str = String::new();
for card in cards {
// reply_str.push_str(&format!("{:?}\n", card));
reply_str.push_str(
format!(
":heart: `{:?}` • `{}` • **{}** • {}\n",
card.wishlist, card.print, card.name, card.series
)
.as_str(),
)
}
Err(why) => {
helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why))
.await;
}
};
}
reply_str.push_str(&format!("Time taken (to analyze): `{:?}`", duration));
msg.reply(ctx, reply_str).await?;
}
Err(why) => {
helper::error_message(ctx, msg, format!("Failed to analyze drop: `{:?}`", why))
.await;
}
};
Ok(())
}