mirror of
https://github.com/S2-/minifyfromhtml.git
synced 2025-08-03 12:20:04 +02:00
update node modules
This commit is contained in:
234
node_modules/parse5/lib/tokenizer/preprocessor.js
generated
vendored
234
node_modules/parse5/lib/tokenizer/preprocessor.js
generated
vendored
@@ -1,147 +1,159 @@
|
||||
'use strict';
|
||||
|
||||
var UNICODE = require('../common/unicode');
|
||||
const unicode = require('../common/unicode');
|
||||
const ERR = require('../common/error-codes');
|
||||
|
||||
//Aliases
|
||||
var $ = UNICODE.CODE_POINTS;
|
||||
|
||||
//Utils
|
||||
|
||||
//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
|
||||
//this functions if they will be situated in another module due to context switch.
|
||||
//Always perform inlining check before modifying this functions ('node --trace-inlining').
|
||||
function isSurrogatePair(cp1, cp2) {
|
||||
return cp1 >= 0xD800 && cp1 <= 0xDBFF && cp2 >= 0xDC00 && cp2 <= 0xDFFF;
|
||||
}
|
||||
|
||||
function getSurrogatePairCodePoint(cp1, cp2) {
|
||||
return (cp1 - 0xD800) * 0x400 + 0x2400 + cp2;
|
||||
}
|
||||
|
||||
const $ = unicode.CODE_POINTS;
|
||||
|
||||
//Const
|
||||
var DEFAULT_BUFFER_WATERLINE = 1 << 16;
|
||||
|
||||
const DEFAULT_BUFFER_WATERLINE = 1 << 16;
|
||||
|
||||
//Preprocessor
|
||||
//NOTE: HTML input preprocessing
|
||||
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
|
||||
var Preprocessor = module.exports = function () {
|
||||
this.html = null;
|
||||
class Preprocessor {
|
||||
constructor() {
|
||||
this.html = null;
|
||||
|
||||
this.pos = -1;
|
||||
this.lastGapPos = -1;
|
||||
this.lastCharPos = -1;
|
||||
|
||||
this.gapStack = [];
|
||||
|
||||
this.skipNextNewLine = false;
|
||||
|
||||
this.lastChunkWritten = false;
|
||||
this.endOfChunkHit = false;
|
||||
this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
|
||||
};
|
||||
|
||||
Preprocessor.prototype.dropParsedChunk = function () {
|
||||
if (this.pos > this.bufferWaterline) {
|
||||
this.lastCharPos -= this.pos;
|
||||
this.html = this.html.substring(this.pos);
|
||||
this.pos = 0;
|
||||
this.pos = -1;
|
||||
this.lastGapPos = -1;
|
||||
this.lastCharPos = -1;
|
||||
|
||||
this.gapStack = [];
|
||||
|
||||
this.skipNextNewLine = false;
|
||||
|
||||
this.lastChunkWritten = false;
|
||||
this.endOfChunkHit = false;
|
||||
this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
|
||||
}
|
||||
};
|
||||
|
||||
Preprocessor.prototype._addGap = function () {
|
||||
this.gapStack.push(this.lastGapPos);
|
||||
this.lastGapPos = this.pos;
|
||||
};
|
||||
_err() {
|
||||
// NOTE: err reporting is noop by default. Enabled by mixin.
|
||||
}
|
||||
|
||||
Preprocessor.prototype._processHighRangeCodePoint = function (cp) {
|
||||
//NOTE: try to peek a surrogate pair
|
||||
if (this.pos !== this.lastCharPos) {
|
||||
var nextCp = this.html.charCodeAt(this.pos + 1);
|
||||
_addGap() {
|
||||
this.gapStack.push(this.lastGapPos);
|
||||
this.lastGapPos = this.pos;
|
||||
}
|
||||
|
||||
if (isSurrogatePair(cp, nextCp)) {
|
||||
//NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
|
||||
this.pos++;
|
||||
cp = getSurrogatePairCodePoint(cp, nextCp);
|
||||
_processSurrogate(cp) {
|
||||
//NOTE: try to peek a surrogate pair
|
||||
if (this.pos !== this.lastCharPos) {
|
||||
const nextCp = this.html.charCodeAt(this.pos + 1);
|
||||
|
||||
//NOTE: add gap that should be avoided during retreat
|
||||
this._addGap();
|
||||
if (unicode.isSurrogatePair(nextCp)) {
|
||||
//NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
|
||||
this.pos++;
|
||||
|
||||
//NOTE: add gap that should be avoided during retreat
|
||||
this._addGap();
|
||||
|
||||
return unicode.getSurrogatePairCodePoint(cp, nextCp);
|
||||
}
|
||||
}
|
||||
|
||||
//NOTE: we are at the end of a chunk, therefore we can't infer surrogate pair yet.
|
||||
else if (!this.lastChunkWritten) {
|
||||
this.endOfChunkHit = true;
|
||||
return $.EOF;
|
||||
}
|
||||
|
||||
//NOTE: isolated surrogate
|
||||
this._err(ERR.surrogateInInputStream);
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
||||
dropParsedChunk() {
|
||||
if (this.pos > this.bufferWaterline) {
|
||||
this.lastCharPos -= this.pos;
|
||||
this.html = this.html.substring(this.pos);
|
||||
this.pos = 0;
|
||||
this.lastGapPos = -1;
|
||||
this.gapStack = [];
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: we've hit the end of chunk, stop processing at this point
|
||||
else if (!this.lastChunkWritten) {
|
||||
this.endOfChunkHit = true;
|
||||
return $.EOF;
|
||||
write(chunk, isLastChunk) {
|
||||
if (this.html) {
|
||||
this.html += chunk;
|
||||
} else {
|
||||
this.html = chunk;
|
||||
}
|
||||
|
||||
this.lastCharPos = this.html.length - 1;
|
||||
this.endOfChunkHit = false;
|
||||
this.lastChunkWritten = isLastChunk;
|
||||
}
|
||||
|
||||
return cp;
|
||||
};
|
||||
insertHtmlAtCurrentPos(chunk) {
|
||||
this.html = this.html.substring(0, this.pos + 1) + chunk + this.html.substring(this.pos + 1, this.html.length);
|
||||
|
||||
Preprocessor.prototype.write = function (chunk, isLastChunk) {
|
||||
if (this.html)
|
||||
this.html += chunk;
|
||||
|
||||
else
|
||||
this.html = chunk;
|
||||
|
||||
this.lastCharPos = this.html.length - 1;
|
||||
this.endOfChunkHit = false;
|
||||
this.lastChunkWritten = isLastChunk;
|
||||
};
|
||||
|
||||
Preprocessor.prototype.insertHtmlAtCurrentPos = function (chunk) {
|
||||
this.html = this.html.substring(0, this.pos + 1) +
|
||||
chunk +
|
||||
this.html.substring(this.pos + 1, this.html.length);
|
||||
|
||||
this.lastCharPos = this.html.length - 1;
|
||||
this.endOfChunkHit = false;
|
||||
};
|
||||
|
||||
|
||||
Preprocessor.prototype.advance = function () {
|
||||
this.pos++;
|
||||
|
||||
if (this.pos > this.lastCharPos) {
|
||||
if (!this.lastChunkWritten)
|
||||
this.endOfChunkHit = true;
|
||||
|
||||
return $.EOF;
|
||||
this.lastCharPos = this.html.length - 1;
|
||||
this.endOfChunkHit = false;
|
||||
}
|
||||
|
||||
var cp = this.html.charCodeAt(this.pos);
|
||||
advance() {
|
||||
this.pos++;
|
||||
|
||||
if (this.pos > this.lastCharPos) {
|
||||
this.endOfChunkHit = !this.lastChunkWritten;
|
||||
return $.EOF;
|
||||
}
|
||||
|
||||
let cp = this.html.charCodeAt(this.pos);
|
||||
|
||||
//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
|
||||
//must be ignored.
|
||||
if (this.skipNextNewLine && cp === $.LINE_FEED) {
|
||||
this.skipNextNewLine = false;
|
||||
this._addGap();
|
||||
return this.advance();
|
||||
}
|
||||
|
||||
//NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
|
||||
if (cp === $.CARRIAGE_RETURN) {
|
||||
this.skipNextNewLine = true;
|
||||
return $.LINE_FEED;
|
||||
}
|
||||
|
||||
//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
|
||||
//must be ignored.
|
||||
if (this.skipNextNewLine && cp === $.LINE_FEED) {
|
||||
this.skipNextNewLine = false;
|
||||
this._addGap();
|
||||
return this.advance();
|
||||
|
||||
if (unicode.isSurrogate(cp)) {
|
||||
cp = this._processSurrogate(cp);
|
||||
}
|
||||
|
||||
//OPTIMIZATION: first check if code point is in the common allowed
|
||||
//range (ASCII alphanumeric, whitespaces, big chunk of BMP)
|
||||
//before going into detailed performance cost validation.
|
||||
const isCommonValidRange =
|
||||
(cp > 0x1f && cp < 0x7f) || cp === $.LINE_FEED || cp === $.CARRIAGE_RETURN || (cp > 0x9f && cp < 0xfdd0);
|
||||
|
||||
if (!isCommonValidRange) {
|
||||
this._checkForProblematicCharacters(cp);
|
||||
}
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
||||
//NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
|
||||
if (cp === $.CARRIAGE_RETURN) {
|
||||
this.skipNextNewLine = true;
|
||||
return $.LINE_FEED;
|
||||
_checkForProblematicCharacters(cp) {
|
||||
if (unicode.isControlCodePoint(cp)) {
|
||||
this._err(ERR.controlCharacterInInputStream);
|
||||
} else if (unicode.isUndefinedCodePoint(cp)) {
|
||||
this._err(ERR.noncharacterInInputStream);
|
||||
}
|
||||
}
|
||||
|
||||
this.skipNextNewLine = false;
|
||||
retreat() {
|
||||
if (this.pos === this.lastGapPos) {
|
||||
this.lastGapPos = this.gapStack.pop();
|
||||
this.pos--;
|
||||
}
|
||||
|
||||
//OPTIMIZATION: first perform check if the code point in the allowed range that covers most common
|
||||
//HTML input (e.g. ASCII codes) to avoid performance-cost operations for high-range code points.
|
||||
return cp >= 0xD800 ? this._processHighRangeCodePoint(cp) : cp;
|
||||
};
|
||||
|
||||
Preprocessor.prototype.retreat = function () {
|
||||
if (this.pos === this.lastGapPos) {
|
||||
this.lastGapPos = this.gapStack.pop();
|
||||
this.pos--;
|
||||
}
|
||||
}
|
||||
|
||||
this.pos--;
|
||||
};
|
||||
|
||||
module.exports = Preprocessor;
|
||||
|
Reference in New Issue
Block a user