From 5ae36d7f2af7a76c377a2a0b88040b1764507de2 Mon Sep 17 00:00:00 2001 From: tretrauit Date: Thu, 1 Feb 2024 00:08:32 +0700 Subject: [PATCH] =?UTF-8?q?fix(tesseract/regex):=20add=20workaround=20for?= =?UTF-8?q?=20=C3=A9,=20=C3=A1=20and=20d?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- swordfish/src/tesseract/utils.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/swordfish/src/tesseract/utils.rs b/swordfish/src/tesseract/utils.rs index 657e9e5..5c030fc 100644 --- a/swordfish/src/tesseract/utils.rs +++ b/swordfish/src/tesseract/utils.rs @@ -170,6 +170,8 @@ pub fn regexify_text(text: &String) -> String { ascii_text.push_str("[Il!1i]"); } else if ['o', 'c'].contains(&c) { ascii_text.push_str("[oc]"); + } else if ['é', 'á', 'd'].contains(&c) { + ascii_text.push_str("[éád]"); } else if ['.'].contains(&c) { if prev_chars.len() > 3 { let prev_char = prev_chars[prev_chars.len() - 1];