diff --git a/.vscode/launch.json b/.vscode/launch.json index d5d185cec54649405ca688b1466d8f2e4447fd09..3f4e3e8924d914961606e288e0e64727d4980fac 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -26,19 +26,15 @@ "args": [ "inspect", "https://dbogatov.org", - "-r", - "-t", - "100", - "--ignore-prefixes", - "m,l", - "--accept-codes", - "444,555" + "-r" ], "console": "integratedTerminal", "internalConsoleOptions": "neverOpen", "protocol": "inspector", "preLaunchTask": "build", - "outFiles": ["${workspaceFolder}/dist/**/*.js"] + "outFiles": [ + "${workspaceFolder}/dist/**/*.js" + ] } ] } diff --git a/package-lock.json b/package-lock.json index a498a5b219efbb4a2f13a544ce3c28807cc8a5bb..bbb42b3a12006638a7a0d17234494769be70c2cf 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { - "name": "@dbogatov/broken-links-inspector", - "version": "0.1.2", + "name": "broken-links-inspector", + "version": "1.0.0", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -365,6 +365,11 @@ "resolved": "https://registry.npmjs.org/@types/color-name/-/color-name-1.1.1.tgz", "integrity": "sha512-rr+OQyAjxze7GgWrSaJwydHStIhHq2lvY3BOC2Mj7KnzI7XK0Uw1TOOdI9lDoajEbSWLiYgoo4f1R51erQfhPQ==" }, + "@types/intercept-stdout": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/@types/intercept-stdout/-/intercept-stdout-0.1.0.tgz", + "integrity": "sha512-b4+N4+pHcUWaK75k4GDavB5ZS6aHdlsyxKaU82JEq7mzY0+kziTiT6nKm+OCkMlGvL6RNk2rera2InJKVP5OiQ==" + }, "@types/mocha": { "version": "7.0.2", "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-7.0.2.tgz", @@ -1103,6 +1108,14 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "dev": true }, + "intercept-stdout": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/intercept-stdout/-/intercept-stdout-0.1.2.tgz", + "integrity": "sha1-Emq/H65sUJpCipjGGmMVWQQq6f0=", + "requires": { + "lodash.toarray": "^3.0.0" + } + }, "is-arguments": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/is-arguments/-/is-arguments-1.0.4.tgz", @@ -1395,6 +1408,21 @@ "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==", "dev": true }, + "lodash._arraycopy": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/lodash._arraycopy/-/lodash._arraycopy-3.0.0.tgz", + "integrity": "sha1-due3wfH7klRzdIeKVi7Qaj5Q9uE=" + }, + "lodash._basevalues": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/lodash._basevalues/-/lodash._basevalues-3.0.0.tgz", + "integrity": "sha1-W3dXYoAr3j0yl1A+JjAIIP32Ybc=" + }, + "lodash._getnative": { + "version": "3.9.1", + "resolved": "https://registry.npmjs.org/lodash._getnative/-/lodash._getnative-3.9.1.tgz", + "integrity": "sha1-VwvH3t5G1hzc3mh9ZdPuy6o6r/U=" + }, "lodash.flattendeep": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/lodash.flattendeep/-/lodash.flattendeep-4.4.0.tgz", @@ -1406,6 +1434,36 @@ "resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz", "integrity": "sha1-LRd/ZS+jHpObRDjVNBSZ36OCXpk=" }, + "lodash.isarguments": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/lodash.isarguments/-/lodash.isarguments-3.1.0.tgz", + "integrity": "sha1-L1c9hcaiQon/AGY7SRwdM4/zRYo=" + }, + "lodash.isarray": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/lodash.isarray/-/lodash.isarray-3.0.4.tgz", + "integrity": "sha1-eeTriMNqgSKvhvhEqpvNhRtfu1U=" + }, + "lodash.keys": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/lodash.keys/-/lodash.keys-3.1.2.tgz", + "integrity": "sha1-TbwEcrFWvlCgsoaFXRvQsMZWCYo=", + "requires": { + "lodash._getnative": "^3.0.0", + "lodash.isarguments": "^3.0.0", + "lodash.isarray": "^3.0.0" + } + }, + "lodash.toarray": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/lodash.toarray/-/lodash.toarray-3.0.2.tgz", + "integrity": "sha1-KyBPD6T1HChcbwDIHRzqWiMEEXk=", + "requires": { + "lodash._arraycopy": "^3.0.0", + "lodash._basevalues": "^3.0.0", + "lodash.keys": "^3.0.0" + } + }, "log-symbols": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-3.0.0.tgz", diff --git a/package.json b/package.json index 10d851f751eb0f6a9fb1255409965e7b605be0e9..51f3536d660687dc520b85e468b5f3814f2d49da 100644 --- a/package.json +++ b/package.json @@ -45,14 +45,15 @@ "author": "Dmytro Bogatov", "license": "MIT", "dependencies": { + "@types/intercept-stdout": "^0.1.0", "@types/node": "^14.0.13", "axios": "^0.19.2", "chalk": "^4.1.0", "commander": "^5.1.0", "htmlparser2": "^4.1.0", + "intercept-stdout": "^0.1.2", "js2xmlparser": "^4.0.1", - "matcher": "^3.0.0", - "sinon": "^9.0.2" + "matcher": "^3.0.0" }, "devDependencies": { "@types/chai": "^4.2.11", diff --git a/src/index.ts b/src/index.ts index 726d179322ef60a960170240a45635f23ad7e941..b2ff7f55b9c1572d191cfed4df534a0e19ba46c4 100644 --- a/src/index.ts +++ b/src/index.ts @@ -2,7 +2,7 @@ import commander from "commander" import chalk from "chalk" -import { Inspector, URLsMatchingSet, Config } from "./inspector" +import { Inspector, URLsMatchingSet } from "./inspector" import { ConsoleReporter, JUnitReporter } from "./report" commander @@ -37,7 +37,8 @@ commander skipURLs: inspectObj.skip as string[], verbose: inspectObj.verbose as boolean, get: inspectObj.get as boolean, - ignoreSkipped: inspectObj.ignoreSkipped as boolean + ignoreSkipped: inspectObj.ignoreSkipped as boolean, + disablePrint: false }) let result = await inspector.processURL(new URL(url), inspectObj.recursive as boolean) diff --git a/src/inspector.ts b/src/inspector.ts index 313f5a6b45cdb7efb40c00e65de71cdd26a90203..cc08fc0bb984d1a7103316dc124526a691ee007b 100644 --- a/src/inspector.ts +++ b/src/inspector.ts @@ -3,16 +3,27 @@ import axios, { AxiosError } from "axios" import { Result, CheckStatus } from "./result"; import { isMatch } from "matcher" -export class Inspector { +export interface IHttpClient { + request(get: boolean, url: string): Promise<string> +} +export class HttpClientFailure { constructor( - private readonly matcher: URLsMatchingSet, - private readonly config: Config + readonly timeout: boolean, + readonly code: number + ) { } +} + +export class AxiosHttpClient implements IHttpClient { + + constructor( + readonly timeout: number, + readonly acceptedCodes: number[] ) { } - async timeout<T>(timeoutMs: number, promise: () => Promise<T>, failureMessage: string = "timeout"): Promise<T> { + private async timeoutWrapper<T>(timeoutMs: number, promise: () => Promise<T>, failureMessage: string = "timeout"): Promise<T> { let timeoutHandle: NodeJS.Timeout | undefined - const timeoutPromise = new Promise<never>((resolve, reject) => { + const timeoutPromise = new Promise<never>((_, reject) => { timeoutHandle = setTimeout(() => reject(new Error(failureMessage)), timeoutMs) }) @@ -24,16 +35,53 @@ export class Inspector { return result; } + async request(get: boolean, url: string): Promise<string> { + + const instance = axios.create() + + try { + return (await this.timeoutWrapper(this.timeout, () => get ? instance.get(url) : instance.head(url))).data as string + } catch (exception) { + + const error: AxiosError = exception; + + if ((exception.message as string).includes("timeout")) { + throw new HttpClientFailure(true, -1) + } else if (!error.response) { + throw new HttpClientFailure(false, -1) + } else { + if (this.acceptedCodes.some(code => code == error.response?.status)) { + return "" + } else { + throw new HttpClientFailure(false, error.response.status) + } + } + } + } +} + +export class Inspector { + + constructor( + private readonly matcher: URLsMatchingSet, + private readonly config: Config, + private readonly httpClient: IHttpClient = new AxiosHttpClient(config.timeout, config.acceptedCodes) + ) { } + async processURL(originalUrl: URL, recursive: boolean): Promise<Result> { - let result = new Result(this.config.ignoreSkipped); + let result = new Result(this.config.ignoreSkipped, this.config.disablePrint); // [url, GET, parent?] let urlsToCheck: [string, boolean, string?][] = [[originalUrl.href, true, undefined]] let processingRoutine = async (url: string, useGet: boolean, parent?: string) => { try { - url = parent ? new URL(url, parent).href : new URL(url).href + try { + url = new URL(url).href + } catch (_) { + url = new URL(url, parent).href + } if (url.includes("#")) { url = url.split("#")[0] } @@ -48,12 +96,7 @@ export class Inspector { } else { let urlToCheck = parent ? new URL(url, parent).href : url - const instance = axios.create() - const response = useGet || shouldParse ? - await this.timeout(this.config.timeout, () => instance.get(urlToCheck)) : - await this.timeout(this.config.timeout, () => instance.head(urlToCheck)) - - let html = response.data as string + let html = await this.httpClient.request(useGet || shouldParse, urlToCheck) if (shouldParse) { @@ -68,27 +111,21 @@ export class Inspector { } } catch (exception) { - const error: AxiosError = exception; + const error: HttpClientFailure = exception; // if HEAD was used, retry with GET if (!useGet) { urlsToCheck.push([url, true, parent]) } else { - if ((exception.message as string).includes("timeout")) { + if (error.timeout) { result.add({ url: url, status: CheckStatus.Timeout }, parent) - } else if (!error.response) { - result.add({ url: url, status: CheckStatus.GenericError }, parent) + } else if (error.code > -1) { + result.add({ url: url, status: CheckStatus.NonSuccessCode, message: `${error.code}` }, parent) } else { - if (this.config.acceptedCodes.some(code => code == error.response?.status)) { - result.add({ url: url, status: CheckStatus.OK }, parent) - } else { - result.add({ url: url, status: CheckStatus.NonSuccessCode, message: `${error.response.status}` }, parent) - } + result.add({ url: url, status: CheckStatus.GenericError }, parent) } } - } - } let promises: Promise<void>[] = [] @@ -123,7 +160,7 @@ export class Inspector { } }, { decodeEntities: true } - ); + ) parserInstance.write(html) parserInstance.end() @@ -140,6 +177,7 @@ export class Config { verbose: boolean = false get: boolean = false ignoreSkipped: boolean = false + disablePrint: boolean = false } export enum URLMatchingRule { diff --git a/src/report.ts b/src/report.ts index b95bb6f1dc7461cf5ca8859d215ab13631df00e2..41dab69495526acd4ee596115e57722ef7cf5102 100644 --- a/src/report.ts +++ b/src/report.ts @@ -1,11 +1,10 @@ import { ResultItem, CheckStatus } from "./result" import chalk from "chalk" import { parse } from "js2xmlparser" - import fs from "fs"; export interface IReporter { - process(pages: Map<string, ResultItem[]>): void + process(pages: Map<string, ResultItem[]>): any } /** diff --git a/src/result.ts b/src/result.ts index 1123e5bbf6f241f0e8883bfd1f7e8b8706cda663..46621597d2268513dbae58324ca8a90aa26def2a 100644 --- a/src/result.ts +++ b/src/result.ts @@ -6,18 +6,20 @@ export class Result { private addedCount = 0 private atLeastOneBroken = false - constructor(readonly ignoreSkipped: boolean) { } + constructor(readonly ignoreSkipped: boolean, readonly disablePrint: boolean) { } public add(completedCheck: ResultItem, parent: string = "original request") { if (completedCheck.status == CheckStatus.Skipped && this.ignoreSkipped) { return } - if (this.addedCount > 0 && this.addedCount % 80 == 0) { - process.stdout.write("\n") + if (!this.disablePrint) { + if (this.addedCount > 0 && this.addedCount % 80 == 0) { + process.stdout.write("\n") + } + process.stdout.write(completedCheck.status == CheckStatus.OK || completedCheck.status == CheckStatus.Skipped ? "." : "x") + this.addedCount++ } - process.stdout.write(completedCheck.status == CheckStatus.OK || completedCheck.status == CheckStatus.Skipped ? "." : "x") - this.addedCount++ if (this.pages.has(parent)) { this.pages.get(parent)?.push(completedCheck) @@ -47,13 +49,17 @@ export class Result { return count } - public report<ReporterT extends IReporter>(reporter: ReporterT): void { - reporter.process(this.pages) + public report<ReporterT extends IReporter>(reporter: ReporterT): any { + return reporter.process(this.pages) } public success() { return !this.atLeastOneBroken } + + public set(pages: Map<string, ResultItem[]>) { + this.pages = pages + } } export class ResultItem { diff --git a/test/process-url.ts b/test/process-url.ts index c397ff443ea1503e0a0cbf9bb12d3fcabfa58e11..ebf9ace4fdde6289f427cb5a619caf24c44c676f 100644 --- a/test/process-url.ts +++ b/test/process-url.ts @@ -1,37 +1,281 @@ -import { Inspector, URLsMatchingSet, URLMatchingRule, Config } from "../src/inspector" -import { expect, assert } from "chai"; +import { Inspector, URLsMatchingSet, Config, IHttpClient, HttpClientFailure, AxiosHttpClient } from "../src/inspector" +import { assert } from "chai"; import "mocha"; -import { ConsoleReporter, JUnitReporter } from "../src/report"; -import sinon from "sinon" +import { ConsoleReporter, JUnitReporter, IReporter } from "../src/report"; +import { ResultItem, CheckStatus, Result } from "../src/result"; +import intercept from "intercept-stdout"; -describe("processURL", function () { +class MockHttpClient implements IHttpClient { - this.timeout(50_000); + // Map<url, [response, timeout, failure, code]> + constructor(readonly map: Map<string, [string[], boolean, boolean, number]>) { } - const validURL = new URL("https://dbogatov.org") + async request(get: boolean, url: string): Promise<string> { + const [urls, timeout, failure, code] = this.map.get(url)! + if (timeout) { + throw new HttpClientFailure(true, -1) + } else if (failure) { + throw new HttpClientFailure(false, -1) + } else if (code != -1) { + throw new HttpClientFailure(false, code) + } else if (!get) { + return "" + } else { + return "<html>" + urls.map(url => `<a href="https://${url}">link</a>`).join("") + "</html>" + } + } +} - before(function () { - sinon.stub(console, "log") - sinon.stub(process.stdout, "write") - }); +class MockReporter implements IReporter { + process(pages: Map<string, ResultItem[]>): any { + return pages + } +} - it("processes non-recursive", async () => { - const result = await new Inspector(new URLsMatchingSet(), new Config()).processURL(validURL, false) +function toURL(url: string, path: string = "") { + return new URL(`https://${url}/${path}`).href +} - assert(result.count() > 1) - // assert(result[0].url === validURL.href) - // assert(result[0].status == CheckStatus.OK) - }); +function stripEffects(text: string) { + return text.replace(/[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, "") +} - it("processes recursive", async () => { - const result = await new Inspector(new URLsMatchingSet(), new Config()).processURL(validURL, true) +function assertEqualResults(expected: Map<string, ResultItem[]>, actual: Map<string, ResultItem[]>) { - result.report(new ConsoleReporter()) - result.report(new JUnitReporter(false)) + for (const [expectedURL, expectedChecks] of expected) { + assert(actual.has(expectedURL)) + let actualChecks = actual.get(expectedURL)! + assert(expectedChecks.length == actualChecks.length) + for (const expectedCheck of expectedChecks) { + let actualCheck = actualChecks.find(c => c.url === expectedCheck.url) + assert(actualCheck) + assert(expectedCheck.status == actualCheck!.status) + assert(expectedCheck.message == actualCheck!.message) + } + } - // assert(result.length == 1) - // assert(result[0].url === validURL.href) - // assert(result[0].status == CheckStatus.OK) - }); +} -}); +const original = "original.com" +const external = "external.com" + +const map = new Map<string, [string[], boolean, boolean, number]>([ + [toURL(original), [ + [ + `${original}/success`, + `${original}/not-found#anchor`, + `${original}/timeout`, + `${original}/failure`, + `${external}/1`, + `${external}/to-skip`, + original + ], false, false, -1] + ], + [toURL(original, "success"), [[`${original}/recursive`, `${external}/2`], false, false, -1]], + [toURL(original, "not-found"), [[], false, false, 404]], + [toURL(original, "timeout"), [[], true, false, -1]], + [toURL(original, "failure"), [[], false, true, -1]], + [toURL(original, "recursive"), [[], false, false, -1]], + [toURL(external, "1"), [[], false, false, -1]], + [toURL(external, "2"), [[], false, false, -1]] +]) +const expectedNonRecursive = new Map<string, ResultItem[]>([ + ["original request", [{ url: toURL(original), status: CheckStatus.OK }]], + [toURL(original), [ + { url: toURL(original, "success"), status: CheckStatus.OK }, + { url: toURL(original, "not-found"), status: CheckStatus.NonSuccessCode, message: `${404}` }, + { url: toURL(original, "timeout"), status: CheckStatus.Timeout }, + { url: toURL(original, "failure"), status: CheckStatus.GenericError }, + { url: toURL(original), status: CheckStatus.Skipped }, + { url: toURL(external, "1"), status: CheckStatus.OK }, + { url: toURL(external, "to-skip"), status: CheckStatus.Skipped } + ]] +]) + +describe("Axios web server", async () => { + + it("OK", async () => { + await new AxiosHttpClient(5000, []).request(false, "https://dbogatov.org") + }) + + it("timeout", async () => { + try { + await new AxiosHttpClient(5, []).request(false, "https://dbogatov.org") + } catch (exception) { + const error: HttpClientFailure = exception; + assert(error.timeout) + } + }) + + it("404", async () => { + try { + await new AxiosHttpClient(2000, []).request(false, "https://dbogatov.org/not-found-123") + } catch (exception) { + const error: HttpClientFailure = exception; + assert(error.code == 404) + } + }) + + it("generic", async () => { + try { + await new AxiosHttpClient(1000, []).request(false, "bad-url") + } catch (exception) { + const error: HttpClientFailure = exception; + assert(!error.timeout) + assert(error.code == -1) + } + }) + +}) + +describe("process mock URL", function () { + + const httpClient = new MockHttpClient(map); + + ([true, false] as boolean[]).forEach(recursive => { + + it(`processes ${recursive ? "" : "non-"}recursive`, async () => { + let config = new Config() + config.disablePrint = true + config.skipURLs = ["to-skip"] + const inspector = new Inspector( + new URLsMatchingSet(), + config, + httpClient + ) + let unhook_intercept = intercept(_ => { return "" }); + + const result = await inspector.processURL(new URL("https://original.com"), recursive) + + unhook_intercept(); + + const actual = result.report(new MockReporter()) as Map<string, ResultItem[]> + let expected = new Map(expectedNonRecursive) + + if (recursive) { + expected.set( + toURL(original, "success"), + [ + { url: toURL(original, "recursive"), status: CheckStatus.OK }, + { url: toURL(external, "2"), status: CheckStatus.OK } + ] + ) + } + + assertEqualResults(expected, actual) + assert(!result.success()) + }); + }) + + describe("reporters", function () { + + it("console", () => { + + let log: string = "" + let unhook_intercept = intercept(line => { + log += stripEffects(line) + return "" + }); + + let result = new Result(true, true) + result.set(expectedNonRecursive) + result.report(new ConsoleReporter()) + + unhook_intercept(); + + let lines = log.split(/\r?\n/) + + for (const [expectedURL, expectedChecks] of expectedNonRecursive) { + assert(lines.find(l => l.startsWith(expectedURL))) + + for (const expectedCheck of expectedChecks) { + if (expectedCheck.status == CheckStatus.Skipped) { + continue + } + let check = lines.find(l => l.includes("\t") && l.includes(expectedCheck.url + " ")) + assert(check, `${expectedCheck.url} not found`) + assert( + check!.includes(expectedCheck.status == CheckStatus.NonSuccessCode || expectedCheck.status == CheckStatus.GenericError ? "BROKEN" : expectedCheck.status), + `${expectedCheck.url}: status (${expectedCheck.status}) not found in "${check}"` + ) + if (expectedCheck.message) { + assert(check!.includes(expectedCheck.message)) + } + } + } + }) + + it("junit", () => { + + let log: string = "" + let unhook_intercept = intercept(line => { + log += line + return "" + }); + + let result = new Result(true, true) + result.set(expectedNonRecursive) + result.report(new JUnitReporter(false)) + + unhook_intercept(); + + result.report(new JUnitReporter(true)) + + let lines = log.split(/\r?\n/) + + for (const [expectedURL, expectedChecks] of expectedNonRecursive) { + assert(lines.find(l => l.includes("testsuite") && l.includes(expectedURL))) + + for (const expectedCheck of expectedChecks) { + assert(lines.find(l => l.includes("testcase") && l.includes(expectedURL) && l.includes(expectedCheck.url))) + } + } + }) + }) +}) + +describe("process real URL", async () => { + let config = new Config() + config.disablePrint = true + const inspector = new Inspector( + new URLsMatchingSet(), + config, + new AxiosHttpClient(config.timeout, config.acceptedCodes) + ) + await inspector.processURL(new URL("https://dbogatov.org"), false) +}) + +describe("result", () => { + + it("ignores skipped", () => { + let result = new Result(true, true) + result.add({ status: CheckStatus.Skipped, url: "skip" }) + result.add(new ResultItem()) + assert(result.count() == 1) + }) + + it("print progress", () => { + let result = new Result(true, false) + + let log: string = "" + let unhook_intercept = intercept(line => { + log += line + return "" + }); + + result.add({ status: CheckStatus.GenericError, url: "" }) + for (let index = 0; index < 120; index++) { + result.add({ status: CheckStatus.OK, url: `${index}` }) + } + + unhook_intercept(); + + let lines = log.split(/\r?\n/) + + assert(result.count() == 121) + assert(lines.length == 2) + assert(lines[0].startsWith("x")) + assert(lines[0].length == 80) + }) + +})