You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
test-assignment-overleaf/src/build-scripts/oooThesaurusParser.ts

41 lines
1.2 KiB

import type { PathLike } from 'node:fs';
import fs from 'node:fs/promises';
import _ from 'lodash';
const splitList = <T>(
list: T[],
isSeparator: (element: T) => boolean,
): T[][] => {
const result: T[][] = [];
for (const element of list) {
if (isSeparator(element)) {
result.push([]);
} else {
// `result` is never empty, so it's guaranteed to have the last element
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
result[result.length - 1]!.push(element);
}
}
return result;
};
const WORD_REGEX = /^[a-zA-Z']+$/gi;
const extractWordsFromThesaurus = (thesaurus: string): string[] => {
const lines = thesaurus.split('\n').slice(1);
const chunks = splitList(lines, (line) => !line.trim().length);
return _.compact(
chunks
.filter((chunk) => chunk.length)
.map((chunk) => chunk[0]?.split('|')?.[0]),
).filter(WORD_REGEX.test.bind(WORD_REGEX));
};
export const extractWordsFromFile = async (
filePath: PathLike,
): Promise<string[]> => {
return extractWordsFromThesaurus(
await fs.readFile(filePath, { encoding: 'ascii' }),
);
};