https://project.mdnd-it.cc/work_packages/94
This commit is contained in:
2025-08-23 04:25:28 +02:00
parent 725516ad6c
commit 19cfa031d0
25823 changed files with 1095587 additions and 2801760 deletions
@@ -0,0 +1,25 @@
import { Stream } from "../../common/Stream";
/**
* @constructor
* @implements {Decoder}
* @param {{fatal: boolean}} options
*/
export declare class UTF8Decoder {
readonly fatal: boolean;
utf8_code_point: number;
utf8_bytes_seen: number;
utf8_bytes_needed: number;
utf8_lower_boundary: number;
utf8_upper_boundary: number;
constructor(options: {
fatal: boolean;
});
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point.
*/
handler(stream: Stream, bite: number): (number | Array<number>) | null;
}
@@ -0,0 +1,126 @@
import { decoderError } from "../../encoding/encodings";
import { finished } from "../../encoding/finished";
import { end_of_stream } from "../../encoding/terminology";
import { inRange } from "../../encoding/utilities";
/**
* @constructor
* @implements {Decoder}
* @param {{fatal: boolean}} options
*/
var UTF8Decoder = /** @class */ (function () {
function UTF8Decoder(options) {
this.fatal = options.fatal;
// utf-8's decoder's has an associated utf-8 code point, utf-8
// bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
// lower boundary (initially 0x80), and a utf-8 upper boundary
// (initially 0xBF).
/** @type {number} */ this.utf8_code_point = 0,
/** @type {number} */ this.utf8_bytes_seen = 0,
/** @type {number} */ this.utf8_bytes_needed = 0,
/** @type {number} */ this.utf8_lower_boundary = 0x80,
/** @type {number} */ this.utf8_upper_boundary = 0xBF;
}
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point.
*/
UTF8Decoder.prototype.handler = function (stream, bite) {
// 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
// set utf-8 bytes needed to 0 and return error.
if (bite === end_of_stream && this.utf8_bytes_needed !== 0) {
this.utf8_bytes_needed = 0;
return decoderError(this.fatal);
}
// 2. If byte is end-of-stream, return finished.
if (bite === end_of_stream)
return finished;
// 3. If utf-8 bytes needed is 0, based on byte:
if (this.utf8_bytes_needed === 0) {
// 0x00 to 0x7F
if (inRange(bite, 0x00, 0x7F)) {
// Return a code point whose value is byte.
return bite;
}
// 0xC2 to 0xDF
else if (inRange(bite, 0xC2, 0xDF)) {
// 1. Set utf-8 bytes needed to 1.
this.utf8_bytes_needed = 1;
// 2. Set UTF-8 code point to byte & 0x1F.
this.utf8_code_point = bite & 0x1F;
}
// 0xE0 to 0xEF
else if (inRange(bite, 0xE0, 0xEF)) {
// 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
if (bite === 0xE0)
this.utf8_lower_boundary = 0xA0;
// 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
if (bite === 0xED)
this.utf8_upper_boundary = 0x9F;
// 3. Set utf-8 bytes needed to 2.
this.utf8_bytes_needed = 2;
// 4. Set UTF-8 code point to byte & 0xF.
this.utf8_code_point = bite & 0xF;
}
// 0xF0 to 0xF4
else if (inRange(bite, 0xF0, 0xF4)) {
// 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
if (bite === 0xF0)
this.utf8_lower_boundary = 0x90;
// 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
if (bite === 0xF4)
this.utf8_upper_boundary = 0x8F;
// 3. Set utf-8 bytes needed to 3.
this.utf8_bytes_needed = 3;
// 4. Set UTF-8 code point to byte & 0x7.
this.utf8_code_point = bite & 0x7;
}
// Otherwise
else {
// Return error.
return decoderError(this.fatal);
}
// Return continue.
return null;
}
// 4. If byte is not in the range utf-8 lower boundary to utf-8
// upper boundary, inclusive, run these substeps:
if (!inRange(bite, this.utf8_lower_boundary, this.utf8_upper_boundary)) {
// 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
// bytes seen to 0, set utf-8 lower boundary to 0x80, and set
// utf-8 upper boundary to 0xBF.
this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0;
this.utf8_lower_boundary = 0x80;
this.utf8_upper_boundary = 0xBF;
// 2. Prepend byte to stream.
stream.prepend(bite);
// 3. Return error.
return decoderError(this.fatal);
}
// 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
// to 0xBF.
this.utf8_lower_boundary = 0x80;
this.utf8_upper_boundary = 0xBF;
// 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
// 0x3F)
this.utf8_code_point = (this.utf8_code_point << 6) | (bite & 0x3F);
// 7. Increase utf-8 bytes seen by one.
this.utf8_bytes_seen += 1;
// 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
// continue.
if (this.utf8_bytes_seen !== this.utf8_bytes_needed)
return null;
// 9. Let code point be utf-8 code point.
var code_point = this.utf8_code_point;
// 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
// seen to 0.
this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0;
// 11. Return a code point whose value is code point.
return code_point;
};
return UTF8Decoder;
}());
export { UTF8Decoder };
//# sourceMappingURL=UTF8Decoder.js.map
@@ -0,0 +1 @@
{"version":3,"file":"UTF8Decoder.js","sourceRoot":"","sources":["../../../../src/coders/utf-8/UTF8Decoder.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAEnD;;;;GAIG;AACH;IAUE,qBAAY,OAA4B;QACtC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAE3B,8DAA8D;QAC9D,gEAAgE;QAChE,8DAA8D;QAC9D,oBAAoB;QACpB,qBAAqB,CAAC,IAAI,CAAC,eAAe,GAAG,CAAC;YAC9C,qBAAqB,CAAC,IAAI,CAAC,eAAe,GAAG,CAAC;YAC9C,qBAAqB,CAAC,IAAI,CAAC,iBAAiB,GAAG,CAAC;YAChD,qBAAqB,CAAC,IAAI,CAAC,mBAAmB,GAAG,IAAI;YACrD,qBAAqB,CAAC,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC;IACxD,CAAC;IAED;;;;;;OAMG;IACH,6BAAO,GAAP,UAAQ,MAAc,EAAE,IAAY;QAClC,+DAA+D;QAC/D,gDAAgD;QAChD,IAAI,IAAI,KAAK,aAAa,IAAI,IAAI,CAAC,iBAAiB,KAAK,CAAC,EAAE;YAC1D,IAAI,CAAC,iBAAiB,GAAG,CAAC,CAAC;YAC3B,OAAO,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SACjC;QAED,gDAAgD;QAChD,IAAI,IAAI,KAAK,aAAa;YACxB,OAAO,QAAQ,CAAC;QAElB,gDAAgD;QAChD,IAAI,IAAI,CAAC,iBAAiB,KAAK,CAAC,EAAE;YAEhC,eAAe;YACf,IAAI,OAAO,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE;gBAC7B,2CAA2C;gBAC3C,OAAO,IAAI,CAAC;aACb;YAED,eAAe;iBACV,IAAI,OAAO,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE;gBAClC,kCAAkC;gBAClC,IAAI,CAAC,iBAAiB,GAAG,CAAC,CAAC;gBAE3B,0CAA0C;gBAC1C,IAAI,CAAC,eAAe,GAAG,IAAI,GAAG,IAAI,CAAC;aACpC;YAED,eAAe;iBACV,IAAI,OAAO,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE;gBAClC,wDAAwD;gBACxD,IAAI,IAAI,KAAK,IAAI;oBACf,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC;gBAClC,wDAAwD;gBACxD,IAAI,IAAI,KAAK,IAAI;oBACf,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC;gBAClC,kCAAkC;gBAClC,IAAI,CAAC,iBAAiB,GAAG,CAAC,CAAC;gBAC3B,yCAAyC;gBACzC,IAAI,CAAC,eAAe,GAAG,IAAI,GAAG,GAAG,CAAC;aACnC;YAED,eAAe;iBACV,IAAI,OAAO,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE;gBAClC,wDAAwD;gBACxD,IAAI,IAAI,KAAK,IAAI;oBACf,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC;gBAClC,wDAAwD;gBACxD,IAAI,IAAI,KAAK,IAAI;oBACf,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC;gBAClC,kCAAkC;gBAClC,IAAI,CAAC,iBAAiB,GAAG,CAAC,CAAC;gBAC3B,yCAAyC;gBACzC,IAAI,CAAC,eAAe,GAAG,IAAI,GAAG,GAAG,CAAC;aACnC;YAED,YAAY;iBACP;gBACH,gBAAgB;gBAChB,OAAO,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACjC;YAED,mBAAmB;YACnB,OAAO,IAAI,CAAC;SACb;QAED,+DAA+D;QAC/D,iDAAiD;QACjD,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,CAAC,EAAE;YAEtE,yDAAyD;YACzD,6DAA6D;YAC7D,gCAAgC;YAChC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;YACzE,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC;YAChC,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC;YAEhC,6BAA6B;YAC7B,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAErB,mBAAmB;YACnB,OAAO,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SACjC;QAED,+DAA+D;QAC/D,WAAW;QACX,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC;QAChC,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC;QAEhC,+DAA+D;QAC/D,QAAQ;QACR,IAAI,CAAC,eAAe,GAAG,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAEnE,uCAAuC;QACvC,IAAI,CAAC,eAAe,IAAI,CAAC,CAAC;QAE1B,6DAA6D;QAC7D,YAAY;QACZ,IAAI,IAAI,CAAC,eAAe,KAAK,IAAI,CAAC,iBAAiB;YACjD,OAAO,IAAI,CAAC;QAEd,yCAAyC;QACzC,IAAM,UAAU,GAAG,IAAI,CAAC,eAAe,CAAC;QAExC,gEAAgE;QAChE,aAAa;QACb,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;QAEzE,qDAAqD;QACrD,OAAO,UAAU,CAAC;IACpB,CAAC;IACH,kBAAC;AAAD,CAAC,AAhJD,IAgJC"}
@@ -0,0 +1,18 @@
import { Stream } from "../../common/Stream";
/**
* @constructor
* @implements {Encoder}
* @param {{fatal: boolean}} options
*/
export declare class UTF8Encoder {
readonly fatal: boolean;
constructor(options: {
fatal: boolean;
});
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
* @return {(number|!Array.<number>)} Byte(s) to emit.
*/
handler(stream: Stream, code_point: number): (number | Array<number>);
}
@@ -0,0 +1,64 @@
import { finished } from "../../encoding/finished";
import { end_of_stream, isASCIICodePoint } from "../../encoding/terminology";
import { inRange } from "../../encoding/utilities";
/**
* @constructor
* @implements {Encoder}
* @param {{fatal: boolean}} options
*/
var UTF8Encoder = /** @class */ (function () {
function UTF8Encoder(options) {
this.fatal = options.fatal;
}
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
* @return {(number|!Array.<number>)} Byte(s) to emit.
*/
UTF8Encoder.prototype.handler = function (stream, code_point) {
// 1. If code point is end-of-stream, return finished.
if (code_point === end_of_stream)
return finished;
// 2. If code point is an ASCII code point, return a byte whose
// value is code point.
if (isASCIICodePoint(code_point))
return code_point;
// 3. Set count and offset based on the range code point is in:
var count, offset;
// U+0080 to U+07FF, inclusive:
if (inRange(code_point, 0x0080, 0x07FF)) {
// 1 and 0xC0
count = 1;
offset = 0xC0;
}
// U+0800 to U+FFFF, inclusive:
else if (inRange(code_point, 0x0800, 0xFFFF)) {
// 2 and 0xE0
count = 2;
offset = 0xE0;
}
// U+10000 to U+10FFFF, inclusive:
else if (inRange(code_point, 0x10000, 0x10FFFF)) {
// 3 and 0xF0
count = 3;
offset = 0xF0;
}
// 4. Let bytes be a byte sequence whose first byte is (code
// point >> (6 × count)) + offset.
var bytes = [(code_point >> (6 * count)) + offset];
// 5. Run these substeps while count is greater than 0:
while (count > 0) {
// 1. Set temp to code point >> (6 × (count 1)).
var temp = code_point >> (6 * (count - 1));
// 2. Append to bytes 0x80 | (temp & 0x3F).
bytes.push(0x80 | (temp & 0x3F));
// 3. Decrease count by one.
count -= 1;
}
// 6. Return bytes bytes, in order.
return bytes;
};
return UTF8Encoder;
}());
export { UTF8Encoder };
//# sourceMappingURL=UTF8Encoder.js.map
@@ -0,0 +1 @@
{"version":3,"file":"UTF8Encoder.js","sourceRoot":"","sources":["../../../../src/coders/utf-8/UTF8Encoder.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAC7E,OAAO,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAEnD;;;;GAIG;AACH;IAIE,qBAAY,OAA4B;QACtC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IAC7B,CAAC;IAED;;;;OAIG;IACH,6BAAO,GAAP,UAAQ,MAAc,EAAE,UAAkB;QACxC,sDAAsD;QACtD,IAAI,UAAU,KAAK,aAAa;YAC9B,OAAO,QAAQ,CAAC;QAElB,+DAA+D;QAC/D,uBAAuB;QACvB,IAAI,gBAAgB,CAAC,UAAU,CAAC;YAC9B,OAAO,UAAU,CAAC;QAEpB,+DAA+D;QAC/D,IAAI,KAAa,EAAE,MAAc,CAAC;QAClC,+BAA+B;QAC/B,IAAI,OAAO,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE;YACvC,aAAa;YACb,KAAK,GAAG,CAAC,CAAC;YACV,MAAM,GAAG,IAAI,CAAC;SACf;QACD,+BAA+B;aAC1B,IAAI,OAAO,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE;YAC5C,aAAa;YACb,KAAK,GAAG,CAAC,CAAC;YACV,MAAM,GAAG,IAAI,CAAC;SACf;QACD,kCAAkC;aAC7B,IAAI,OAAO,CAAC,UAAU,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE;YAC/C,aAAa;YACb,KAAK,GAAG,CAAC,CAAC;YACV,MAAM,GAAG,IAAI,CAAC;SACf;QAED,4DAA4D;QAC5D,kCAAkC;QAClC,IAAM,KAAK,GAAG,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;QAErD,uDAAuD;QACvD,OAAO,KAAK,GAAG,CAAC,EAAE;YAEhB,kDAAkD;YAClD,IAAM,IAAI,GAAG,UAAU,IAAI,CAAC,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC;YAE7C,2CAA2C;YAC3C,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC;YAEjC,4BAA4B;YAC5B,KAAK,IAAI,CAAC,CAAC;SACZ;QAED,mCAAmC;QACnC,OAAO,KAAK,CAAC;IACf,CAAC;IACH,kBAAC;AAAD,CAAC,AAhED,IAgEC"}
@@ -0,0 +1,2 @@
export * from './UTF8Decoder';
export * from './UTF8Encoder';
@@ -0,0 +1,3 @@
export * from './UTF8Decoder';
export * from './UTF8Encoder';
//# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/coders/utf-8/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC"}