diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..9aa60965a6df3d444151e1724515ecff3af867c7 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,10 @@ +{ + "cSpell.words": [ + "classname", + "junit", + "testcase", + "testsuite", + "testsuites", + "xmlparser" + ] +} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index f9cdf9342d39623177bb4aeaa7e0669495a5f34f..a9b26c1f1576c680cbed451503d1e04b39944285 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "broken-links-inspector", - "version": "1.1.2", + "version": "1.1.5", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -353,6 +353,11 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-14.0.13.tgz", "integrity": "sha512-rouEWBImiRaSJsVA+ITTFM6ZxibuAlTuNOCyxVbwreu6k6+ujs7DfnU9o+PShFhET78pMBl3eH+AGSI5eOTkPA==" }, + "@types/pluralize": { + "version": "0.0.29", + "resolved": "https://registry.npmjs.org/@types/pluralize/-/pluralize-0.0.29.tgz", + "integrity": "sha512-BYOID+l2Aco2nBik+iYS4SZX0Lf20KPILP5RGmM1IgzdwNdTs0eebiFriOPcej1sX9mLnSoiNte5zcFxssgpGA==" + }, "@types/sinon": { "version": "9.0.4", "resolved": "https://registry.npmjs.org/@types/sinon/-/sinon-9.0.4.tgz", @@ -2350,6 +2355,11 @@ "find-up": "^4.0.0" } }, + "pluralize": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/pluralize/-/pluralize-8.0.0.tgz", + "integrity": "sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==" + }, "prelude-ls": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", diff --git a/package.json b/package.json index 98d641b0c7f98d53a7da842ab34cdc8c51f46d1d..f2409188491ea614713ce4e2be0e7db84b917427 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "broken-links-inspector", - "version": "1.1.5", + "version": "1.2.0", "description": "Extract and recursively check all URLs reporting broken ones", "main": "dist/inspector.js", "types": "dist/inspector.d.ts", @@ -48,13 +48,15 @@ "dependencies": { "@types/intercept-stdout": "^0.1.0", "@types/node": "^14.0.13", + "@types/pluralize": "0.0.29", "axios": "^0.19.2", "chalk": "^4.1.0", "commander": "^5.1.0", "htmlparser2": "^4.1.0", "intercept-stdout": "^0.1.2", "js2xmlparser": "^4.0.1", - "matcher": "^3.0.0" + "matcher": "^3.0.0", + "pluralize": "^8.0.0" }, "devDependencies": { "@types/chai": "^4.2.11", diff --git a/src/index.ts b/src/index.ts index 640d76a9063bd082f2756a5cc3a67b3ecdd80f6c..1b6df0a938071e23192ea1435b213dfa3bc0529f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -6,7 +6,7 @@ import { Inspector, URLsMatchingSet } from "./inspector" import { ConsoleReporter, JUnitReporter } from "./report" commander - .version("1.1.5") + .version("1.2.0") .description("Extract and recursively check all URLs reporting broken ones\n\nDedicated to Daria Bogatova \u2665") commander @@ -17,6 +17,7 @@ commander .option("-g, --get", "use GET request instead of HEAD", false) .option("-s, --skip <globs>", "URLs to skip defined by globs, like '*linkedin*'", (value: string, previous: string[]) => previous.concat([value]), []) .option("--reporters <coma-separated-strings>", "Reporters to use in processing the results (junit, console)", (value: string, _) => value.split(","), ["console"]) + .option("--retries <number>", "The number of times to retry TIMEOUT URLs", (value: string, _) => parseInt(value), 3) .option("--ignore-prefixes <coma-separated-strings>", "prefix(es) to ignore (without ':'), like mailto: and tel:", (value: string, _) => value.split(","), ["javascript", "data", "mailto", "sms", "tel", "geo"]) .option("--accept-codes <coma-separated-numbers>", "HTTP response code(s) (beyond 200-299) to accept, like 999 for linkedin", (value: string, _) => value.split(",").map(code => parseInt(code)), [999]) .option("--ignore-skipped", "Do not report skipped URLs", false) @@ -38,7 +39,8 @@ commander verbose: inspectObj.verbose as boolean, get: inspectObj.get as boolean, ignoreSkipped: inspectObj.ignoreSkipped as boolean, - disablePrint: false + disablePrint: false, + retries: inspectObj.retries as number }) const result = await inspector.processURL(new URL(url), inspectObj.recursive as boolean) diff --git a/src/inspector.ts b/src/inspector.ts index 44eca94f871c2d2b40a4f8eb0db01e797f2e7fdf..8d2b293d2e7966a503bd1b9ae35b0a7168ef3780 100644 --- a/src/inspector.ts +++ b/src/inspector.ts @@ -2,6 +2,7 @@ import * as parser from "htmlparser2" import axios, { AxiosError } from "axios" import { Result, CheckStatus } from "./result" import { isMatch } from "matcher" +import pluralize from "pluralize" export interface IHttpClient { request(get: boolean, url: string): Promise<string> @@ -73,10 +74,10 @@ export class Inspector { async processURL(originalUrl: URL, recursive: boolean): Promise<Result> { const result = new Result(this.config.ignoreSkipped, this.config.disablePrint) - // [url, GET, parent?] - const urlsToCheck: [string, boolean, string?][] = [[originalUrl.href, true, undefined]] + // [url, GET, attempts, parent?] + const urlsToCheck: [string, boolean, number, string?][] = [[originalUrl.href, true, 0, undefined]] - const processingRoutine = async (url: string, useGet: boolean, parent?: string) => { + const processingRoutine = async (url: string, useGet: boolean, attempts: number, parent?: string) => { try { try { @@ -105,11 +106,15 @@ export class Inspector { const discoveredURLs = this.extractURLs(html) for (const discovered of discoveredURLs) { - urlsToCheck.push([discovered, this.config.get, url]) + urlsToCheck.push([discovered, this.config.get, 0, url]) } } - result.add({ url: url, status: CheckStatus.OK }, parent) + if (attempts == 0) { + result.add({ url: url, status: CheckStatus.OK }, parent) + } else { + result.add({ url: url, status: CheckStatus.Retried, message: `${attempts} ${pluralize("retry", attempts)}` }, parent) + } } } catch (exception) { @@ -117,10 +122,15 @@ export class Inspector { // if HEAD was used, retry with GET if (!useGet) { - urlsToCheck.push([url, true, parent]) + urlsToCheck.push([url, true, attempts, parent]) } else { if (error.timeout) { - result.add({ url: url, status: CheckStatus.Timeout }, parent) + // retry if attempts left + if (attempts < this.config.retries) { + urlsToCheck.push([url, useGet, attempts + 1, parent]) + } else { + result.add({ url: url, status: CheckStatus.Timeout }, parent) + } } else if (error.code > -1) { result.add({ url: url, status: CheckStatus.NonSuccessCode, message: `${error.code}` }, parent) } else { @@ -135,9 +145,9 @@ export class Inspector { while (urlsToCheck.length > 0) { // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const [url, useGet, parent] = urlsToCheck.pop()! + const [url, useGet, attempts, parent] = urlsToCheck.pop()! - promises.push(processingRoutine(url, useGet, parent)) + promises.push(processingRoutine(url, useGet, attempts, parent)) if (urlsToCheck.length == 0) { await Promise.all(promises) @@ -181,6 +191,7 @@ export class Config { get = false ignoreSkipped = false disablePrint = false + retries = 3 } export enum URLMatchingRule { diff --git a/src/report.ts b/src/report.ts index f786327ddd4e78a295bf41767a83fa5dedc9e6b4..4d8732cb3a1a5c6e2c8901068e23e6a2ec83a448 100644 --- a/src/report.ts +++ b/src/report.ts @@ -114,6 +114,7 @@ export class JUnitReporter implements IReporter { skipped++ break case CheckStatus.OK: + case CheckStatus.Retried: oks++ break } @@ -156,6 +157,9 @@ export class ConsoleReporter implements IReporter { case CheckStatus.OK: statusLabel = chalk.green("OK".padEnd(labelWidth)) break + case CheckStatus.Retried: + statusLabel = chalk.magenta("RETRIED".padEnd(labelWidth)) + break case CheckStatus.Skipped: statusLabel = chalk.gray("SKIP".padEnd(labelWidth)) break @@ -189,6 +193,7 @@ export class ConsoleReporter implements IReporter { for (const check of page[1]) { switch (check.status) { case CheckStatus.OK: + case CheckStatus.Retried: oks++ break case CheckStatus.NonSuccessCode: diff --git a/src/result.ts b/src/result.ts index e7ed813c4ba41c9e2466920970b3a17ea44cd7d8..fb19e77a4859d95782484b6d6276781ccd410d54 100644 --- a/src/result.ts +++ b/src/result.ts @@ -17,7 +17,7 @@ export class Result { if (this.addedCount > 0 && this.addedCount % 80 == 0) { process.stdout.write("\n") } - process.stdout.write(completedCheck.status == CheckStatus.OK || completedCheck.status == CheckStatus.Skipped ? "." : "x") + process.stdout.write(isStatusFailure(completedCheck.status) ? "x" : ".") this.addedCount++ } @@ -29,11 +29,7 @@ export class Result { } this.checkedUrls.add(completedCheck.url) - if ( - completedCheck.status == CheckStatus.GenericError || - completedCheck.status == CheckStatus.Timeout || - completedCheck.status == CheckStatus.NonSuccessCode - ) { + if (isStatusFailure(completedCheck.status)) { this.atLeastOneBroken = true } } @@ -72,7 +68,15 @@ export class ResultItem { export enum CheckStatus { OK = "OK", Skipped = "SKIP", + Retried = "RETRIED", Timeout = "TIMEOUT", NonSuccessCode = "ERROR CODE", GenericError = "UNKNOWN" } + +export function isStatusFailure(status: CheckStatus): boolean { + return false || + status == CheckStatus.GenericError || + status == CheckStatus.Timeout || + status == CheckStatus.NonSuccessCode +} diff --git a/test/process-url.ts b/test/process-url.ts index 617e0aa557aaaa655e45c31488fa64e751fd6314..82a35c3a61fe6b2cd3019bad4981034b529acc34 100644 --- a/test/process-url.ts +++ b/test/process-url.ts @@ -7,12 +7,29 @@ import intercept from "intercept-stdout" class MockHttpClient implements IHttpClient { - // Map<url, [response, timeout, failure, code]> - constructor(readonly map: Map<string, [string[], boolean, boolean, number]>) { } + private attemptsMap = new Map<string, number>() + + // Map<url, [response, timeout, failure, code, retries]> + constructor(readonly map: Map<string, [string[], boolean, boolean, number, number]>) { } async request(get: boolean, url: string): Promise<string> { // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const [urls, timeout, failure, code] = this.map.get(url)! + const [urls, timeout, failure, code, retries] = this.map.get(url)! + + if (retries > 0) { + if (this.attemptsMap.has(url)) { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + if (this.attemptsMap.get(url)! < retries) { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + this.attemptsMap.set(url, this.attemptsMap.get(url)! + 1) + throw new HttpClientFailure(true, -1) + } + } else { + this.attemptsMap.set(url, 0) + throw new HttpClientFailure(true, -1) + } + } + if (timeout) { throw new HttpClientFailure(true, -1) } else if (failure) { @@ -55,7 +72,7 @@ function assertEqualResults(expected: Map<string, ResultItem[]>, actual: Map<str // eslint-disable-next-line @typescript-eslint/no-non-null-assertion assert(expectedCheck.status == actualCheck!.status) // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - assert(expectedCheck.message == actualCheck!.message) + assert(actualCheck!.message ? actualCheck!.message!.includes(expectedCheck.message!) : !expectedCheck.message) } } @@ -64,25 +81,28 @@ function assertEqualResults(expected: Map<string, ResultItem[]>, actual: Map<str const original = "original.com" const external = "external.com" -const map = new Map<string, [string[], boolean, boolean, number]>([ +// url -> [urls, timeout, failure, code, retries] +const map = new Map<string, [string[], boolean, boolean, number, number]>([ [toURL(original), [ [ `${original}/success`, `${original}/not-found#anchor`, `${original}/timeout`, `${original}/failure`, + `${original}/retried`, `${external}/1`, `${external}/to-skip`, original - ], false, false, -1] + ], false, false, -1, 0] ], - [toURL(original, "success"), [[`${original}/recursive`, `${external}/2`], false, false, -1]], - [toURL(original, "not-found"), [[], false, false, 404]], - [toURL(original, "timeout"), [[], true, false, -1]], - [toURL(original, "failure"), [[], false, true, -1]], - [toURL(original, "recursive"), [[], false, false, -1]], - [toURL(external, "1"), [[], false, false, -1]], - [toURL(external, "2"), [[], false, false, -1]] + [toURL(original, "success"), [[`${original}/recursive`, `${external}/2`], false, false, -1, 0]], + [toURL(original, "not-found"), [[], false, false, 404, 0]], + [toURL(original, "timeout"), [[], true, false, -1, 0]], + [toURL(original, "failure"), [[], false, true, -1, 0]], + [toURL(original, "retried"), [[], false, false, -1, 2]], + [toURL(original, "recursive"), [[], false, false, -1, 0]], + [toURL(external, "1"), [[], false, false, -1, 0]], + [toURL(external, "2"), [[], false, false, -1, 0]] ]) const expectedNonRecursive = new Map<string, ResultItem[]>([ ["original request", [{ url: toURL(original), status: CheckStatus.OK }]], @@ -91,6 +111,7 @@ const expectedNonRecursive = new Map<string, ResultItem[]>([ { url: toURL(original, "not-found"), status: CheckStatus.NonSuccessCode, message: `${404}` }, { url: toURL(original, "timeout"), status: CheckStatus.Timeout }, { url: toURL(original, "failure"), status: CheckStatus.GenericError }, + { url: toURL(original, "retried"), status: CheckStatus.Retried, message: `${2}` }, { url: toURL(original), status: CheckStatus.Skipped }, { url: toURL(external, "1"), status: CheckStatus.OK }, { url: toURL(external, "to-skip"), status: CheckStatus.Skipped } @@ -123,7 +144,7 @@ describe("Axios web server", async () => { it("generic", async () => { try { - await new AxiosHttpClient(1000, []).request(false, "bad-url") + await new AxiosHttpClient(1000, []).request(true, "ftp://bad-url-54234534.com") } catch (exception) { const error: HttpClientFailure = exception assert(!error.timeout) @@ -135,10 +156,10 @@ describe("Axios web server", async () => { describe("process mock URL", function () { - const httpClient = new MockHttpClient(map); - ([true, false] as boolean[]).forEach(recursive => { + const httpClient = new MockHttpClient(map) + it(`processes ${recursive ? "" : "non-"}recursive`, async () => { const config = new Config() config.disablePrint = true