From ebafd93110133bdd03c4d64f6b901c3dc6035535 Mon Sep 17 00:00:00 2001 From: tretrauit Date: Sun, 7 Jan 2024 18:15:51 +0700 Subject: [PATCH] fix(katana): add workaround for text parsing --- swordfish/src/katana.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/swordfish/src/katana.rs b/swordfish/src/katana.rs index 718111b..6efce8e 100644 --- a/swordfish/src/katana.rs +++ b/swordfish/src/katana.rs @@ -57,6 +57,10 @@ fn fix_tesseract_string(text: &mut String) { text.remove(0); text.remove(0); } + // Remove the first character if it is not alphanumeric + if !TEXT_NUM_REGEX.is_match(text.clone().chars().nth(0).unwrap().to_string().as_str()) { + text.remove(0); + } // Workaround IR -> Ik // Maybe it only occurs if Ik is in the start of the string? // e.g. "IReda" -> "Ikeda"