1
0
mirror of https://github.com/S2-/minifyfromhtml.git synced 2025-08-02 20:00:05 +02:00

update packages to latest version

This commit is contained in:
s2
2022-08-20 18:51:33 +02:00
parent 09663a35a5
commit 806ebf9a57
4513 changed files with 366205 additions and 92512 deletions

View File

@@ -1,4 +1,4 @@
Copyright © 20162020 Domenic Denicola <d@domenic.me>
Copyright © Domenic Denicola <d@domenic.me>
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

View File

@@ -6,15 +6,17 @@ This package implements the HTML Standard's [encoding sniffing algorithm](https:
const htmlEncodingSniffer = require("html-encoding-sniffer");
const fs = require("fs");
const htmlBuffer = fs.readFileSync("./html-page.html");
const sniffedEncoding = htmlEncodingSniffer(htmlBuffer);
const htmlBytes = fs.readFileSync("./html-page.html");
const sniffedEncoding = htmlEncodingSniffer(htmlBytes);
```
The passed bytes are given as a `Uint8Array`; the Node.js `Buffer` subclass of `Uint8Array` will also work, as shown above.
The returned value will be a canonical [encoding name](https://encoding.spec.whatwg.org/#names-and-labels) (not a label). You might then combine this with the [whatwg-encoding](https://github.com/jsdom/whatwg-encoding) package to decode the result:
```js
const whatwgEncoding = require("whatwg-encoding");
const htmlString = whatwgEncoding.decode(htmlBuffer, sniffedEncoding);
const htmlString = whatwgEncoding.decode(htmlBytes, sniffedEncoding);
```
## Options
@@ -22,7 +24,7 @@ const htmlString = whatwgEncoding.decode(htmlBuffer, sniffedEncoding);
You can pass two potential options to `htmlEncodingSniffer`:
```js
const sniffedEncoding = htmlEncodingSniffer(htmlBuffer, {
const sniffedEncoding = htmlEncodingSniffer(htmlBytes, {
transportLayerEncodingLabel,
defaultEncoding
});

View File

@@ -2,15 +2,15 @@
const whatwgEncoding = require("whatwg-encoding");
// https://html.spec.whatwg.org/#encoding-sniffing-algorithm
module.exports = (buffer, { transportLayerEncodingLabel, defaultEncoding = "windows-1252" } = {}) => {
let encoding = whatwgEncoding.getBOMEncoding(buffer); // see https://github.com/whatwg/html/issues/1910
module.exports = (uint8Array, { transportLayerEncodingLabel, defaultEncoding = "windows-1252" } = {}) => {
let encoding = whatwgEncoding.getBOMEncoding(uint8Array);
if (encoding === null && transportLayerEncodingLabel !== undefined) {
encoding = whatwgEncoding.labelToName(transportLayerEncodingLabel);
}
if (encoding === null) {
encoding = prescanMetaCharset(buffer);
encoding = prescanMetaCharset(uint8Array);
}
if (encoding === null) {
@@ -21,24 +21,24 @@ module.exports = (buffer, { transportLayerEncodingLabel, defaultEncoding = "wind
};
// https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding
function prescanMetaCharset(buffer) {
const l = Math.min(buffer.length, 1024);
function prescanMetaCharset(uint8Array) {
const l = Math.min(uint8Array.byteLength, 1024);
for (let i = 0; i < l; i++) {
let c = buffer[i];
let c = uint8Array[i];
if (c === 0x3C) {
// "<"
const c1 = buffer[i + 1];
const c2 = buffer[i + 2];
const c3 = buffer[i + 3];
const c4 = buffer[i + 4];
const c5 = buffer[i + 5];
const c1 = uint8Array[i + 1];
const c2 = uint8Array[i + 2];
const c3 = uint8Array[i + 3];
const c4 = uint8Array[i + 4];
const c5 = uint8Array[i + 5];
// !-- (comment start)
if (c1 === 0x21 && c2 === 0x2D && c3 === 0x2D) {
i += 4;
for (; i < l; i++) {
c = buffer[i];
const cMinus1 = buffer[i - 1];
const cMinus2 = buffer[i - 2];
c = uint8Array[i];
const cMinus1 = uint8Array[i - 1];
const cMinus2 = uint8Array[i - 2];
// --> (comment end)
if (c === 0x3E && cMinus1 === 0x2D && cMinus2 === 0x2D) {
break;
@@ -58,7 +58,7 @@ function prescanMetaCharset(buffer) {
let attrRes;
do {
attrRes = getAttribute(buffer, i, l);
attrRes = getAttribute(uint8Array, i, l);
if (attrRes.attr && !attributeList.has(attrRes.attr.name)) {
attributeList.add(attrRes.attr.name);
if (attrRes.attr.name === "http-equiv") {
@@ -97,7 +97,7 @@ function prescanMetaCharset(buffer) {
} else if ((c1 >= 0x41 && c1 <= 0x5A) || (c1 >= 0x61 && c1 <= 0x7A)) {
// a-z or A-Z
for (i += 2; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
// space or >
if (isSpaceCharacter(c) || c === 0x3E) {
break;
@@ -105,13 +105,13 @@ function prescanMetaCharset(buffer) {
}
let attrRes;
do {
attrRes = getAttribute(buffer, i, l);
attrRes = getAttribute(uint8Array, i, l);
i = attrRes.i;
} while (attrRes.attr);
} else if (c1 === 0x21 || c1 === 0x2F || c1 === 0x3F) {
// ! or / or ?
for (i += 2; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
// >
if (c === 0x3E) {
break;
@@ -124,9 +124,9 @@ function prescanMetaCharset(buffer) {
}
// https://html.spec.whatwg.org/multipage/syntax.html#concept-get-attributes-when-sniffing
function getAttribute(buffer, i, l) {
function getAttribute(uint8Array, i, l) {
for (; i < l; i++) {
let c = buffer[i];
let c = uint8Array[i];
// space or /
if (isSpaceCharacter(c) || c === 0x2F) {
continue;
@@ -138,7 +138,7 @@ function getAttribute(buffer, i, l) {
let name = "";
let value = "";
nameLoop:for (; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
// "="
if (c === 0x3D && name !== "") {
i++;
@@ -147,7 +147,7 @@ function getAttribute(buffer, i, l) {
// space
if (isSpaceCharacter(c)) {
for (i++; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
// space
if (isSpaceCharacter(c)) {
continue;
@@ -173,11 +173,11 @@ function getAttribute(buffer, i, l) {
name += String.fromCharCode(c);
}
}
c = buffer[i];
c = uint8Array[i];
// space
if (isSpaceCharacter(c)) {
for (i++; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
// space
if (isSpaceCharacter(c)) {
continue;
@@ -190,7 +190,7 @@ function getAttribute(buffer, i, l) {
if (c === 0x22 || c === 0x27) {
const quote = c;
for (i++; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
if (c === quote) {
i++;
@@ -219,7 +219,7 @@ function getAttribute(buffer, i, l) {
}
for (i++; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
// space or >
if (isSpaceCharacter(c) || c === 0x3E) {
@@ -241,7 +241,7 @@ function extractCharacterEncodingFromMeta(string) {
let position = 0;
while (true) {
const indexOfCharset = string.substring(position).search(/charset/i);
const indexOfCharset = string.substring(position).search(/charset/ui);
if (indexOfCharset === -1) {
return null;
@@ -282,7 +282,7 @@ function extractCharacterEncodingFromMeta(string) {
return null;
}
const indexOfASCIIWhitespaceOrSemicolon = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/);
const indexOfASCIIWhitespaceOrSemicolon = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/u);
const end = indexOfASCIIWhitespaceOrSemicolon === -1 ?
string.length :
position + indexOfASCIIWhitespaceOrSemicolon + 1;

View File

@@ -1,26 +1,26 @@
{
"_from": "html-encoding-sniffer@^2.0.1",
"_id": "html-encoding-sniffer@2.0.1",
"_from": "html-encoding-sniffer@^3.0.0",
"_id": "html-encoding-sniffer@3.0.0",
"_inBundle": false,
"_integrity": "sha512-D5JbOMBIR/TVZkubHT+OyT2705QvogUW4IBn6nHd756OwieSF9aDYFj4dv6HHEVGYbHaLETa3WggZYWWMyy3ZQ==",
"_integrity": "sha512-oWv4T4yJ52iKrufjnyZPkrN0CH3QnrUqdB6In1g5Fe1mia8GmF36gnfNySxoZtxD5+NmYw1EElVXiBk93UeskA==",
"_location": "/html-encoding-sniffer",
"_phantomChildren": {},
"_requested": {
"type": "range",
"registry": true,
"raw": "html-encoding-sniffer@^2.0.1",
"raw": "html-encoding-sniffer@^3.0.0",
"name": "html-encoding-sniffer",
"escapedName": "html-encoding-sniffer",
"rawSpec": "^2.0.1",
"rawSpec": "^3.0.0",
"saveSpec": null,
"fetchSpec": "^2.0.1"
"fetchSpec": "^3.0.0"
},
"_requiredBy": [
"/jsdom"
],
"_resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-2.0.1.tgz",
"_shasum": "42a6dc4fd33f00281176e8b23759ca4e4fa185f3",
"_spec": "html-encoding-sniffer@^2.0.1",
"_resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-3.0.0.tgz",
"_shasum": "2cb1a8cf0db52414776e5b2a7a04d5dd98158de9",
"_spec": "html-encoding-sniffer@^3.0.0",
"_where": "D:\\Projects\\minifyfromhtml\\node_modules\\jsdom",
"author": {
"name": "Domenic Denicola",
@@ -32,16 +32,17 @@
},
"bundleDependencies": false,
"dependencies": {
"whatwg-encoding": "^1.0.5"
"whatwg-encoding": "^2.0.0"
},
"deprecated": false,
"description": "Sniff the encoding from a HTML byte stream",
"devDependencies": {
"eslint": "^6.8.0",
"mocha": "^7.0.0"
"@domenic/eslint-config": "^1.4.0",
"eslint": "^7.32.0",
"mocha": "^9.1.1"
},
"engines": {
"node": ">=10"
"node": ">=12"
},
"files": [
"lib/"
@@ -62,5 +63,5 @@
"lint": "eslint .",
"test": "mocha"
},
"version": "2.0.1"
"version": "3.0.0"
}