From 06d2c2de916bdbc3aa612de8ea33b5a172cca706 Mon Sep 17 00:00:00 2001 From: Inga Date: Sun, 14 Jan 2024 16:39:19 +0000 Subject: [PATCH] implemented basic english spellchecker regex --- .../viteThesaurusRegexPlugin.test.ts | 43 +++++++++++++++++++ src/build-scripts/viteThesaurusRegexPlugin.ts | 25 +++++++++++ 2 files changed, 68 insertions(+) create mode 100644 src/build-scripts/viteThesaurusRegexPlugin.test.ts create mode 100644 src/build-scripts/viteThesaurusRegexPlugin.ts diff --git a/src/build-scripts/viteThesaurusRegexPlugin.test.ts b/src/build-scripts/viteThesaurusRegexPlugin.test.ts new file mode 100644 index 0000000..ec2952b --- /dev/null +++ b/src/build-scripts/viteThesaurusRegexPlugin.test.ts @@ -0,0 +1,43 @@ +import t from 'tap'; + +import { getRegexStringForThesaurusPath } from './viteThesaurusRegexPlugin.js'; + +void t.test('extractWordsFromFile', async (t) => { + const regexString = await getRegexStringForThesaurusPath( + new URL('../../build-resources/th-en-x-basic.dat', import.meta.url), + ); + const regex = new RegExp(regexString, 'gi'); + + const getMatches = (str: string) => + [...str.matchAll(regex)].map((match) => match[0]); + + t.strictSame( + getMatches( + 'afterthought airplane another anybody anyhow anyone anything anywhere', + ), + [], + ); + + t.strictSame( + getMatches( + 'afterthought airplane another xxx anybody anyhow anyone anything anywhere', + ), + ['xxx'], + ); + + t.strictSame( + getMatches( + 'xx afterthought airplane another yy anybody anyhow anyone anything anywhere zzzz', + ), + ['xx', 'yy', 'zzzz'], + ); + + t.strictSame(getMatches('xx yy zzzz'), ['xx', 'yy', 'zzzz']); + + t.strictSame( + getMatches( + "Thing to help people escape really fast if there's a problem and everything is on fire so they decide not to go to space", + ), + [], + ); +}); diff --git a/src/build-scripts/viteThesaurusRegexPlugin.ts b/src/build-scripts/viteThesaurusRegexPlugin.ts new file mode 100644 index 0000000..15491b5 --- /dev/null +++ b/src/build-scripts/viteThesaurusRegexPlugin.ts @@ -0,0 +1,25 @@ +import type { PathLike } from 'node:fs'; +import { extractWordsFromFile } from './oooThesaurusParser.js'; + +/*type ViteThesaurusRegexPluginOptions = { + inputOooThesaurusPath: PathLike; + outputRegexPath: string; +};*/ + +const WORD_CHARACTER_REGEX_FRAGMENT = "[a-zA-Z']"; + +export const getRegexStringForThesaurusPath = async ( + inputOooThesaurusPath: PathLike, +) => { + const words = await extractWordsFromFile(inputOooThesaurusPath); + return `(? { +}; +*/