https://project.mdnd-it.cc/work_packages/94
This commit is contained in:
2025-08-23 04:25:28 +02:00
parent 725516ad6c
commit 19cfa031d0
25823 changed files with 1095587 additions and 2801760 deletions
+91 -70
View File
@@ -71,6 +71,12 @@
parser.ns = Object.create(rootNS)
}
// disallow unquoted attribute values if not otherwise configured
// and strict mode is true
if (parser.opt.unquotedAttributeValues === undefined) {
parser.opt.unquotedAttributeValues = !strict;
}
// mostly just for error reporting
parser.trackPosition = parser.opt.position !== false
if (parser.trackPosition) {
@@ -164,6 +170,7 @@
} catch (ex) {
Stream = function () {}
}
if (!Stream) Stream = function () {}
var streamWraps = sax.EVENTS.filter(function (ev) {
return ev !== 'error' && ev !== 'end'
@@ -262,28 +269,14 @@
return Stream.prototype.on.call(me, ev, handler)
}
// character classes and tokens
var whitespace = '\r\n\t '
// this really needs to be replaced with character classes.
// XML allows all manner of ridiculous numbers and digits.
var number = '0124356789'
var letter = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
// (Letter | "_" | ":")
var quote = '\'"'
var attribEnd = whitespace + '>'
var CDATA = '[CDATA['
var DOCTYPE = 'DOCTYPE'
var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
// turn all the string character sets into character class objects.
whitespace = charClass(whitespace)
number = charClass(number)
letter = charClass(letter)
// http://www.w3.org/TR/REC-xml/#NT-NameStartChar
// This implementation works on strings, a single character at a time
// as such, it cannot ever support astral-plane characters (10000-EFFFF)
@@ -292,31 +285,29 @@
// is left as an exercise for the reader.
var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
quote = charClass(quote)
attribEnd = charClass(attribEnd)
function charClass (str) {
return str.split('').reduce(function (s, c) {
s[c] = true
return s
}, {})
function isWhitespace (c) {
return c === ' ' || c === '\n' || c === '\r' || c === '\t'
}
function isRegExp (c) {
return Object.prototype.toString.call(c) === '[object RegExp]'
function isQuote (c) {
return c === '"' || c === '\''
}
function is (charclass, c) {
return isRegExp(charclass) ? !!c.match(charclass) : charclass[c]
function isAttribEnd (c) {
return c === '>' || isWhitespace(c)
}
function not (charclass, c) {
return !is(charclass, c)
function isMatch (regex, c) {
return regex.test(c)
}
function notMatch (regex, c) {
return !isMatch(regex, c)
}
var S = 0
@@ -949,7 +940,7 @@
}
}
entity = entity.replace(/^0+/, '')
if (numStr.toLowerCase() !== entity) {
if (isNaN(num) || numStr.toLowerCase() !== entity) {
strictFail(parser, 'Invalid character entity')
return '&' + parser.entity + ';'
}
@@ -961,7 +952,7 @@
if (c === '<') {
parser.state = S.OPEN_WAKA
parser.startTagPosition = parser.position
} else if (not(whitespace, c)) {
} else if (!isWhitespace(c)) {
// have to process this as a text node.
// weird, but happens.
strictFail(parser, 'Non-whitespace before first tag.')
@@ -998,9 +989,11 @@
while (true) {
c = charAt(chunk, i++)
parser.c = c
if (!c) {
break
}
if (parser.trackPosition) {
parser.position++
if (c === '\n') {
@@ -1010,6 +1003,7 @@
parser.column++
}
}
switch (parser.state) {
case S.BEGIN:
parser.state = S.BEGIN_WHITESPACE
@@ -1044,7 +1038,7 @@
parser.state = S.OPEN_WAKA
parser.startTagPosition = parser.position
} else {
if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot)) {
if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) {
strictFail(parser, 'Text data outside of root node.')
}
if (c === '&') {
@@ -1078,9 +1072,9 @@
if (c === '!') {
parser.state = S.SGML_DECL
parser.sgmlDecl = ''
} else if (is(whitespace, c)) {
} else if (isWhitespace(c)) {
// wait for it...
} else if (is(nameStart, c)) {
} else if (isMatch(nameStart, c)) {
parser.state = S.OPEN_TAG
parser.tagName = c
} else if (c === '/') {
@@ -1102,15 +1096,22 @@
continue
case S.SGML_DECL:
if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
if (parser.sgmlDecl + c === '--') {
parser.state = S.COMMENT
parser.comment = ''
parser.sgmlDecl = ''
continue;
}
if (parser.doctype && parser.doctype !== true && parser.sgmlDecl) {
parser.state = S.DOCTYPE_DTD
parser.doctype += '<!' + parser.sgmlDecl + c
parser.sgmlDecl = ''
} else if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
emitNode(parser, 'onopencdata')
parser.state = S.CDATA
parser.sgmlDecl = ''
parser.cdata = ''
} else if (parser.sgmlDecl + c === '--') {
parser.state = S.COMMENT
parser.comment = ''
parser.sgmlDecl = ''
} else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
parser.state = S.DOCTYPE
if (parser.doctype || parser.sawRoot) {
@@ -1123,7 +1124,7 @@
emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
parser.sgmlDecl = ''
parser.state = S.TEXT
} else if (is(quote, c)) {
} else if (isQuote(c)) {
parser.state = S.SGML_DECL_QUOTED
parser.sgmlDecl += c
} else {
@@ -1148,7 +1149,7 @@
parser.doctype += c
if (c === '[') {
parser.state = S.DOCTYPE_DTD
} else if (is(quote, c)) {
} else if (isQuote(c)) {
parser.state = S.DOCTYPE_QUOTED
parser.q = c
}
@@ -1164,12 +1165,18 @@
continue
case S.DOCTYPE_DTD:
parser.doctype += c
if (c === ']') {
parser.doctype += c
parser.state = S.DOCTYPE
} else if (is(quote, c)) {
} else if (c === '<') {
parser.state = S.OPEN_WAKA
parser.startTagPosition = parser.position
} else if (isQuote(c)) {
parser.doctype += c
parser.state = S.DOCTYPE_DTD_QUOTED
parser.q = c
} else {
parser.doctype += c
}
continue
@@ -1210,6 +1217,8 @@
// which is a comment of " blah -- bloo "
parser.comment += '--' + c
parser.state = S.COMMENT
} else if (parser.doctype && parser.doctype !== true) {
parser.state = S.DOCTYPE_DTD
} else {
parser.state = S.TEXT
}
@@ -1251,7 +1260,7 @@
case S.PROC_INST:
if (c === '?') {
parser.state = S.PROC_INST_ENDING
} else if (is(whitespace, c)) {
} else if (isWhitespace(c)) {
parser.state = S.PROC_INST_BODY
} else {
parser.procInstName += c
@@ -1259,7 +1268,7 @@
continue
case S.PROC_INST_BODY:
if (!parser.procInstBody && is(whitespace, c)) {
if (!parser.procInstBody && isWhitespace(c)) {
continue
} else if (c === '?') {
parser.state = S.PROC_INST_ENDING
@@ -1283,7 +1292,7 @@
continue
case S.OPEN_TAG:
if (is(nameBody, c)) {
if (isMatch(nameBody, c)) {
parser.tagName += c
} else {
newTag(parser)
@@ -1292,7 +1301,7 @@
} else if (c === '/') {
parser.state = S.OPEN_TAG_SLASH
} else {
if (not(whitespace, c)) {
if (!isWhitespace(c)) {
strictFail(parser, 'Invalid character in tag name')
}
parser.state = S.ATTRIB
@@ -1312,13 +1321,13 @@
case S.ATTRIB:
// haven't read the attribute name yet.
if (is(whitespace, c)) {
if (isWhitespace(c)) {
continue
} else if (c === '>') {
openTag(parser)
} else if (c === '/') {
parser.state = S.OPEN_TAG_SLASH
} else if (is(nameStart, c)) {
} else if (isMatch(nameStart, c)) {
parser.attribName = c
parser.attribValue = ''
parser.state = S.ATTRIB_NAME
@@ -1335,9 +1344,9 @@
parser.attribValue = parser.attribName
attrib(parser)
openTag(parser)
} else if (is(whitespace, c)) {
} else if (isWhitespace(c)) {
parser.state = S.ATTRIB_NAME_SAW_WHITE
} else if (is(nameBody, c)) {
} else if (isMatch(nameBody, c)) {
parser.attribName += c
} else {
strictFail(parser, 'Invalid attribute name')
@@ -1347,7 +1356,7 @@
case S.ATTRIB_NAME_SAW_WHITE:
if (c === '=') {
parser.state = S.ATTRIB_VALUE
} else if (is(whitespace, c)) {
} else if (isWhitespace(c)) {
continue
} else {
strictFail(parser, 'Attribute without value')
@@ -1360,7 +1369,7 @@
parser.attribName = ''
if (c === '>') {
openTag(parser)
} else if (is(nameStart, c)) {
} else if (isMatch(nameStart, c)) {
parser.attribName = c
parser.state = S.ATTRIB_NAME
} else {
@@ -1371,13 +1380,15 @@
continue
case S.ATTRIB_VALUE:
if (is(whitespace, c)) {
if (isWhitespace(c)) {
continue
} else if (is(quote, c)) {
} else if (isQuote(c)) {
parser.q = c
parser.state = S.ATTRIB_VALUE_QUOTED
} else {
strictFail(parser, 'Unquoted attribute value')
if (!parser.opt.unquotedAttributeValues) {
error(parser, 'Unquoted attribute value')
}
parser.state = S.ATTRIB_VALUE_UNQUOTED
parser.attribValue = c
}
@@ -1398,13 +1409,13 @@
continue
case S.ATTRIB_VALUE_CLOSED:
if (is(whitespace, c)) {
if (isWhitespace(c)) {
parser.state = S.ATTRIB
} else if (c === '>') {
openTag(parser)
} else if (c === '/') {
parser.state = S.OPEN_TAG_SLASH
} else if (is(nameStart, c)) {
} else if (isMatch(nameStart, c)) {
strictFail(parser, 'No whitespace between attributes')
parser.attribName = c
parser.attribValue = ''
@@ -1415,7 +1426,7 @@
continue
case S.ATTRIB_VALUE_UNQUOTED:
if (not(attribEnd, c)) {
if (!isAttribEnd(c)) {
if (c === '&') {
parser.state = S.ATTRIB_VALUE_ENTITY_U
} else {
@@ -1433,9 +1444,9 @@
case S.CLOSE_TAG:
if (!parser.tagName) {
if (is(whitespace, c)) {
if (isWhitespace(c)) {
continue
} else if (not(nameStart, c)) {
} else if (notMatch(nameStart, c)) {
if (parser.script) {
parser.script += '</' + c
parser.state = S.SCRIPT
@@ -1447,14 +1458,14 @@
}
} else if (c === '>') {
closeTag(parser)
} else if (is(nameBody, c)) {
} else if (isMatch(nameBody, c)) {
parser.tagName += c
} else if (parser.script) {
parser.script += '</' + parser.tagName
parser.tagName = ''
parser.state = S.SCRIPT
} else {
if (not(whitespace, c)) {
if (!isWhitespace(c)) {
strictFail(parser, 'Invalid tagname in closing tag')
}
parser.state = S.CLOSE_TAG_SAW_WHITE
@@ -1462,7 +1473,7 @@
continue
case S.CLOSE_TAG_SAW_WHITE:
if (is(whitespace, c)) {
if (isWhitespace(c)) {
continue
}
if (c === '>') {
@@ -1495,10 +1506,17 @@
}
if (c === ';') {
parser[buffer] += parseEntity(parser)
parser.entity = ''
parser.state = returnState
} else if (is(parser.entity.length ? entityBody : entityStart, c)) {
var parsedEntity = parseEntity(parser)
if (parser.opt.unparsedEntities && !Object.values(sax.XML_ENTITIES).includes(parsedEntity)) {
parser.entity = ''
parser.state = returnState
parser.write(parsedEntity)
} else {
parser[buffer] += parsedEntity
parser.entity = ''
parser.state = returnState
}
} else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) {
parser.entity += c
} else {
strictFail(parser, 'Invalid character in entity name')
@@ -1509,8 +1527,9 @@
continue
default:
default: /* istanbul ignore next */ {
throw new Error(parser, 'Unknown state: ' + parser.state)
}
}
} // while
@@ -1521,6 +1540,7 @@
}
/*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
/* istanbul ignore next */
if (!String.fromCodePoint) {
(function () {
var stringFromCharCode = String.fromCharCode
@@ -1562,6 +1582,7 @@
}
return result
}
/* istanbul ignore next */
if (Object.defineProperty) {
Object.defineProperty(String, 'fromCodePoint', {
value: fromCodePoint,