1
0
mirror of https://github.com/S2-/minifyfromhtml.git synced 2025-08-03 04:10:04 +02:00

update modules

This commit is contained in:
s2
2020-07-20 16:16:07 +02:00
parent 783511ce12
commit 2b23424b86
785 changed files with 91905 additions and 56057 deletions

View File

@@ -2,27 +2,19 @@
const whatwgEncoding = require("whatwg-encoding");
// https://html.spec.whatwg.org/#encoding-sniffing-algorithm
module.exports = function sniffHTMLEncoding(buffer, options) {
module.exports = (buffer, { transportLayerEncodingLabel, defaultEncoding = "windows-1252" } = {}) => {
let encoding = whatwgEncoding.getBOMEncoding(buffer); // see https://github.com/whatwg/html/issues/1910
if (options === undefined) {
options = {};
}
if (encoding === null && options.transportLayerEncodingLabel !== undefined) {
encoding = whatwgEncoding.labelToName(options.transportLayerEncodingLabel);
if (encoding === null && transportLayerEncodingLabel !== undefined) {
encoding = whatwgEncoding.labelToName(transportLayerEncodingLabel);
}
if (encoding === null) {
encoding = prescanMetaCharset(buffer);
}
if (encoding === null && options.defaultEncoding !== undefined) {
encoding = options.defaultEncoding;
}
if (encoding === null) {
encoding = "windows-1252";
encoding = defaultEncoding;
}
return encoding;
@@ -35,8 +27,8 @@ function prescanMetaCharset(buffer) {
let c = buffer[i];
if (c === 0x3C) {
// "<"
let c1 = buffer[i + 1];
let c2 = buffer[i + 2];
const c1 = buffer[i + 1];
const c2 = buffer[i + 2];
const c3 = buffer[i + 3];
const c4 = buffer[i + 4];
const c5 = buffer[i + 5];
@@ -45,11 +37,10 @@ function prescanMetaCharset(buffer) {
i += 4;
for (; i < l; i++) {
c = buffer[i];
c1 = buffer[i + 1];
c2 = buffer[i + 2];
const cMinus1 = buffer[i - 1];
const cMinus2 = buffer[i - 2];
// --> (comment end)
if (c === 0x2D && c1 === 0x2D && c2 === 0x3E) {
i += 2;
if (c === 0x3E && cMinus1 === 0x2D && cMinus2 === 0x2D) {
break;
}
}
@@ -60,6 +51,7 @@ function prescanMetaCharset(buffer) {
(isSpaceCharacter(c5) || c5 === 0x2F)) {
// "meta" + space or /
i += 6;
const attributeList = new Set();
let gotPragma = false;
let needPragma = null;
let charset = null;
@@ -67,7 +59,8 @@ function prescanMetaCharset(buffer) {
let attrRes;
do {
attrRes = getAttribute(buffer, i, l);
if (attrRes.attr) {
if (attrRes.attr && !attributeList.has(attrRes.attr.name)) {
attributeList.add(attrRes.attr.name);
if (attrRes.attr.name === "http-equiv") {
gotPragma = attrRes.attr.value === "content-type";
} else if (attrRes.attr.name === "content" && !charset) {
@@ -140,7 +133,6 @@ function getAttribute(buffer, i, l) {
}
// ">"
if (c === 0x3E) {
i++;
break;
}
let name = "";
@@ -249,12 +241,12 @@ function extractCharacterEncodingFromMeta(string) {
let position = 0;
while (true) {
let subPosition = string.substring(position).search(/charset/i);
const indexOfCharset = string.substring(position).search(/charset/i);
if (subPosition === -1) {
if (indexOfCharset === -1) {
return null;
}
subPosition += "charset".length;
let subPosition = position + indexOfCharset + "charset".length;
while (isSpaceCharacter(string[subPosition].charCodeAt(0))) {
++subPosition;
@@ -290,10 +282,11 @@ function extractCharacterEncodingFromMeta(string) {
return null;
}
let end = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/);
if (end === -1) {
end = string.length;
}
const indexOfASCIIWhitespaceOrSemicolon = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/);
const end = indexOfASCIIWhitespaceOrSemicolon === -1 ?
string.length :
position + indexOfASCIIWhitespaceOrSemicolon + 1;
return whatwgEncoding.labelToName(string.substring(position, end));
}