You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
1.2 KiB
41 lines
1.2 KiB
import type { PathLike } from 'node:fs';
|
|
import fs from 'node:fs/promises';
|
|
import _ from 'lodash';
|
|
|
|
const splitList = <T>(
|
|
list: T[],
|
|
isSeparator: (element: T) => boolean,
|
|
): T[][] => {
|
|
const result: T[][] = [];
|
|
for (const element of list) {
|
|
if (isSeparator(element)) {
|
|
result.push([]);
|
|
} else {
|
|
// `result` is never empty, so it's guaranteed to have the last element
|
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
result[result.length - 1]!.push(element);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
};
|
|
|
|
const WORD_REGEX = /^[a-zA-Z']+$/gi;
|
|
|
|
const extractWordsFromThesaurus = (thesaurus: string): string[] => {
|
|
const lines = thesaurus.split('\n').slice(1);
|
|
const chunks = splitList(lines, (line) => !line.trim().length);
|
|
return _.compact(
|
|
chunks
|
|
.filter((chunk) => chunk.length)
|
|
.map((chunk) => chunk[0]?.split('|')?.[0]),
|
|
).filter(WORD_REGEX.test.bind(WORD_REGEX));
|
|
};
|
|
|
|
export const extractWordsFromFile = async (
|
|
filePath: PathLike,
|
|
): Promise<string[]> => {
|
|
return extractWordsFromThesaurus(
|
|
await fs.readFile(filePath, { encoding: 'ascii' }),
|
|
);
|
|
};
|
|
|