added sitemap-parser

main
Inga 🏳‍🌈 6 months ago
parent 393f2dedd9
commit def5a1851e
  1. 20
      sitemap-parser/.eslintrc.js
  2. 132
      sitemap-parser/.gitignore
  3. 5
      sitemap-parser/.prettierrc
  4. 60
      sitemap-parser/README.md
  5. 6012
      sitemap-parser/package-lock.json
  6. 51
      sitemap-parser/package.json
  7. 73
      sitemap-parser/src/createSitemap.spec.ts
  8. 40
      sitemap-parser/src/createSitemap.ts
  9. 36
      sitemap-parser/src/resolveByPath.spec.ts
  10. 41
      sitemap-parser/src/resolveByPath.ts
  11. 13
      sitemap-parser/src/types.ts
  12. 9
      sitemap-parser/test/jest-e2e.json
  13. 4
      sitemap-parser/tsconfig.build.json
  14. 18
      sitemap-parser/tsconfig.json

@ -0,0 +1,20 @@
module.exports = {
parser: '@typescript-eslint/parser',
parserOptions: {
project: 'tsconfig.json',
tsconfigRootDir: __dirname,
sourceType: 'module',
},
plugins: ['@typescript-eslint/eslint-plugin'],
extends: [
'eslint:recommended',
'plugin:@typescript-eslint/strict-type-checked',
'plugin:prettier/recommended',
],
root: true,
env: {
node: true,
jest: true,
},
ignorePatterns: ['.eslintrc.js'],
};

@ -0,0 +1,132 @@
# ---> Node
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# Snowpack dependency directory (https://snowpack.dev/)
web_modules/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional stylelint cache
.stylelintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# Next.js build output
.next
out
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# vuepress v2.x temp and cache directory
.temp
.cache
# Docusaurus cache and generated files
.docusaurus
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

@ -0,0 +1,5 @@
{
"tabWidth": 4,
"singleQuote": true,
"trailingComma": "all"
}

@ -0,0 +1,60 @@
# Assignment
## Challenge: Create a Sitemap Tree from a List of URLs
### Context
You are connecting an headless CMS to a frontend and want the editors to specify how the pages are structured from within the CMS. The CMS however does not provide sitemap functionality out of the box, instead it only gives you the option to request a list of all pages.
An example request would return you this payload:
```ts
[
{ "id": 1, "slug": "root", "parent": null },
{ "id": 2, "slug": "contact", "parent": 1 },
{ "id": 3, "slug": "email", "parent": 2 },
{ "id": 4, "slug": "phone", "parent": 2 },
{ "id": 5, "slug": "products", "parent": 1 },
{ "id": 6, "slug": "electronics", "parent": 5 },
{ "id": 7, "slug": "cameras", "parent": 6 },
{ "id": 8, "slug": "books", "parent": 1 },
{ "id": 9, "slug": "fiction", "parent": 8 },
{ "id": 10, "slug": "children", "parent": 8 }
]
```
### Tasks:
**Task 1:** Given a list of URLs, create a tree-like sitemap structure. Each node in the tree should represent a path segment in a URL, encapsulated in a children field. This function should by typed to return a `Tree` type and the output should be similar to this, however you can have a different output type with reason:
```ts
{
"name": "root",
"id": 1,
"children": [
{
"name": "contact",
"id": 2,
"children": [
{
"name": "email",
"id": 3,
"children": []
},
{
"name": "phone",
"id": 4,
"children": []
}
]
},
/* ... (Other branches for 'products', and 'books', each with the associated IDs) ... */
]
}
```
**Task 2:** Write a `resolveByPath` function that takes in a `path` as a string and returns the id of a page if found. A few test cases are:
- `/` => 1
- `/contact` => 2
- `/contact/email` => 3
- `/contact/whatsapp` => null
- `/products/electronics/cameras` => 7

File diff suppressed because it is too large Load Diff

@ -0,0 +1,51 @@
{
"name": "sitemap-parser",
"version": "0.0.1",
"description": "",
"author": "",
"private": true,
"license": "UNLICENSED",
"scripts": {
"typecheck": "tsc --noEmit",
"lint": "eslint \"{src,apps,libs,test}/**/*.ts\"",
"test": "jest",
"test:watch": "jest --watch",
"test:cov": "jest --coverage",
"test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand",
"test:e2e": "jest --config ./test/jest-e2e.json"
},
"devDependencies": {
"@tsconfig/strictest": "^2.0.2",
"@types/jest": "^29.5.9",
"@types/node": "^20.3.1",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.42.0",
"eslint-config-prettier": "^9.0.0",
"eslint-plugin-prettier": "^5.0.0",
"jest": "^29.5.0",
"prettier": "^3.0.0",
"ts-jest": "^29.1.0",
"ts-loader": "^9.4.3",
"ts-node": "^10.9.1",
"tsconfig-paths": "^4.2.0",
"typescript": "^5.1.3"
},
"jest": {
"moduleFileExtensions": [
"js",
"json",
"ts"
],
"rootDir": "src",
"testRegex": ".*\\.spec\\.ts$",
"transform": {
"^.+\\.(t|j)s$": "ts-jest"
},
"collectCoverageFrom": [
"**/*.(t|j)s"
],
"coverageDirectory": "../coverage",
"testEnvironment": "node"
}
}

@ -0,0 +1,73 @@
import { createSitemap } from './createSitemap';
import { ApiResponse, Sitemap } from './types';
describe('createSitemap', () => {
it('creates correct sitemap for sample data', () => {
const sampleInput = [
{ id: 1, slug: 'root', parent: null },
{ id: 2, slug: 'contact', parent: 1 },
{ id: 3, slug: 'email', parent: 2 },
{ id: 4, slug: 'phone', parent: 2 },
{ id: 5, slug: 'products', parent: 1 },
{ id: 6, slug: 'electronics', parent: 5 },
{ id: 7, slug: 'cameras', parent: 6 },
{ id: 8, slug: 'books', parent: 1 },
{ id: 9, slug: 'fiction', parent: 8 },
{ id: 10, slug: 'children', parent: 8 },
] as const satisfies ApiResponse;
const sampleOutput = {
name: 'root',
id: 1,
children: [
{
name: 'contact',
id: 2,
children: [
{
name: 'email',
id: 3,
children: [],
},
{
name: 'phone',
id: 4,
children: [],
},
],
},
{
name: 'products',
id: 5,
children: [
{
name: 'electronics',
id: 6,
children: [
{
name: 'cameras',
id: 7,
children: [],
},
],
},
],
},
{
name: 'books',
id: 8,
children: [
{ name: 'fiction', id: 9, children: [] },
{ name: 'children', id: 10, children: [] },
],
},
],
} as const satisfies Sitemap;
// ideally, for full TS experience, if I had more time, I'd do it so that
// typeof createSitemap(sampleInput) is the same as typeof sampleOutput,
// rather than just a general Sitemap
expect(createSitemap(sampleInput)).toEqual(sampleOutput);
});
});

@ -0,0 +1,40 @@
import { ApiResponse, Sitemap } from './types';
type Subtree = {
name: string;
id: number;
children: Sitemap[];
};
// alternatively, for full TS experience, if I had more time I'd also implement parsing in TS so that this declaration would look like:
// export const createSitemap = <TResponse extends ApiResponse>(apiResponse: TResponse): Sitemap<TResponse> => {
// and calling e.g. `createSitemap(const sample data json)` would return value with type `const sample sitemap`, rather than just general sitemap
export const createSitemap = (apiResponse: ApiResponse): Sitemap | null => {
const subtrees = new Map<number, Subtree>(
apiResponse.map(({ id, slug }) => [
id,
{ id, name: slug, children: [] },
]),
);
let root: Subtree | null = null;
for (const entry of apiResponse) {
if (entry.parent === null) {
if (root) {
throw new Error(
`Multiple root nodes found (${root.id} and ${entry.id})`,
);
}
// We can be sure that `get` returns value: `entry.id` exists because that's how we initialized subtrees in the beginning
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
root = subtrees.get(entry.id)!;
} else if (subtrees.has(entry.parent)) {
// We can be sure that both `get`s return values because:
// 1. We just checked that `entry.parent` is an existing id;
// 2. `entry.id` exists because that's how we initialized subtrees in the beginning
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
subtrees.get(entry.parent)!.children.push(subtrees.get(entry.id)!);
}
}
return root;
};

@ -0,0 +1,36 @@
import { createSitemap } from './createSitemap';
import { createPathResolver } from './resolveByPath';
describe('resolveByPath', () => {
it('resolves sample paths correctly', () => {
const sampleInput = [
{ id: 1, slug: 'root', parent: null },
{ id: 2, slug: 'contact', parent: 1 },
{ id: 3, slug: 'email', parent: 2 },
{ id: 4, slug: 'phone', parent: 2 },
{ id: 5, slug: 'products', parent: 1 },
{ id: 6, slug: 'electronics', parent: 5 },
{ id: 7, slug: 'cameras', parent: 6 },
{ id: 8, slug: 'books', parent: 1 },
{ id: 9, slug: 'fiction', parent: 8 },
{ id: 10, slug: 'children', parent: 8 },
];
const sitemap = createSitemap(sampleInput);
// ideally, for full TS experience, if I had more time, I'd do it so that resolvePath declaration
// makes it so that auto-complete for path works here, and e.g.
// the return value of `resolveByPath('/contact')` is declared not just as `number | null` but as `2`.
const resolveByPath = createPathResolver(sitemap);
expect(resolveByPath('/')).toBe(1);
expect(resolveByPath('/contact')).toBe(2);
expect(resolveByPath('/contact/email')).toBe(3);
expect(resolveByPath('/contact/whatsapp')).toBe(null);
expect(resolveByPath('/products/electronics/cameras')).toBe(7);
// It is not clear which one of these two is considered to be correct
//expect(resolveByPath('products///electronics///cameras///')).toBe(7);
expect(resolveByPath('products///electronics///cameras///')).toBe(null);
});
});

@ -0,0 +1,41 @@
import { Sitemap } from './types';
// First implementation, parsing paths and walking through the entire tree every time
/*export const createPathResolver =
(sitemap: Sitemap | null) => (path: string) => {
// TODO: use something to parse paths rather than just splitting it here by `'/'` (which might cause all kinds of problems for complicated paths).
// TODO2: Should probably also handle urlencode somehow, e.g. for `path === '/root/page%2fwith%2fslash/etc'`
const parts = path.split('/').filter((part) => part.length);
return (
parts.reduce<Sitemap | null | undefined>(
(node, part) =>
node?.children.find(({ name }) => name === part),
sitemap,
)?.id ?? null
);
};*/
const createPaths = (
sitemap: Sitemap | null | undefined,
): [string, number][] =>
!sitemap
? []
: sitemap.children.flatMap((childNode) => [
[childNode.name, childNode.id] as [string, number],
...createPaths(childNode).map(
([path, id]) =>
[`${childNode.name}/${path}`, id] as [string, number],
),
]);
// Second, improved implementation, where we build a static hashmap from complete paths to IDs once,
// and then use it as a single lookup table.
export const createPathResolver = (sitemap: Sitemap | null) => {
const paths = new Map(
createPaths(sitemap).map(([path, id]) => [`/${path}`, id]),
);
return (path: string) =>
(path === '/' ? sitemap?.id : paths.get(path)) ?? null;
};

@ -0,0 +1,13 @@
export type ApiResponse = Readonly<
{
id: number;
slug: string;
parent: number | null;
}[]
>;
export type Sitemap = Readonly<{
name: string;
id: number;
children: readonly Sitemap[];
}>;

@ -0,0 +1,9 @@
{
"moduleFileExtensions": ["js", "json", "ts"],
"rootDir": ".",
"testEnvironment": "node",
"testRegex": ".e2e-spec.ts$",
"transform": {
"^.+\\.(t|j)s$": "ts-jest"
}
}

@ -0,0 +1,4 @@
{
"extends": "./tsconfig.json",
"exclude": ["node_modules", "test", "dist", "**/*.spec.ts"]
}

@ -0,0 +1,18 @@
{
"extends": "@tsconfig/strictest/tsconfig.json",
"compilerOptions": {
"module": "commonjs",
"declaration": true,
"removeComments": true,
"emitDecoratorMetadata": true,
"experimentalDecorators": true,
"allowSyntheticDefaultImports": true,
"target": "ES2021",
"sourceMap": true,
"outDir": "./dist",
"baseUrl": "./",
"incremental": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": false
}
}
Loading…
Cancel
Save