You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
			
				
					126 lines
				
				5.5 KiB
			
		
		
			
		
	
	
					126 lines
				
				5.5 KiB
			| 
											10 months ago
										 | import { decoderError } from "../../encoding/encodings"; | ||
|  | import { finished } from "../../encoding/finished"; | ||
|  | import { end_of_stream } from "../../encoding/terminology"; | ||
|  | import { inRange } from "../../encoding/utilities"; | ||
|  | /** | ||
|  |  * @constructor | ||
|  |  * @implements {Decoder} | ||
|  |  * @param {{fatal: boolean}} options | ||
|  |  */ | ||
|  | var UTF8Decoder = /** @class */ (function () { | ||
|  |     function UTF8Decoder(options) { | ||
|  |         this.fatal = options.fatal; | ||
|  |         // utf-8's decoder's has an associated utf-8 code point, utf-8
 | ||
|  |         // bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
 | ||
|  |         // lower boundary (initially 0x80), and a utf-8 upper boundary
 | ||
|  |         // (initially 0xBF).
 | ||
|  |         /** @type {number} */ this.utf8_code_point = 0, | ||
|  |             /** @type {number} */ this.utf8_bytes_seen = 0, | ||
|  |             /** @type {number} */ this.utf8_bytes_needed = 0, | ||
|  |             /** @type {number} */ this.utf8_lower_boundary = 0x80, | ||
|  |             /** @type {number} */ this.utf8_upper_boundary = 0xBF; | ||
|  |     } | ||
|  |     /** | ||
|  |      * @param {Stream} stream The stream of bytes being decoded. | ||
|  |      * @param {number} bite The next byte read from the stream. | ||
|  |      * @return {?(number|!Array.<number>)} The next code point(s) | ||
|  |      *     decoded, or null if not enough data exists in the input | ||
|  |      *     stream to decode a complete code point. | ||
|  |      */ | ||
|  |     UTF8Decoder.prototype.handler = function (stream, bite) { | ||
|  |         // 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
 | ||
|  |         // set utf-8 bytes needed to 0 and return error.
 | ||
|  |         if (bite === end_of_stream && this.utf8_bytes_needed !== 0) { | ||
|  |             this.utf8_bytes_needed = 0; | ||
|  |             return decoderError(this.fatal); | ||
|  |         } | ||
|  |         // 2. If byte is end-of-stream, return finished.
 | ||
|  |         if (bite === end_of_stream) | ||
|  |             return finished; | ||
|  |         // 3. If utf-8 bytes needed is 0, based on byte:
 | ||
|  |         if (this.utf8_bytes_needed === 0) { | ||
|  |             // 0x00 to 0x7F
 | ||
|  |             if (inRange(bite, 0x00, 0x7F)) { | ||
|  |                 // Return a code point whose value is byte.
 | ||
|  |                 return bite; | ||
|  |             } | ||
|  |             // 0xC2 to 0xDF
 | ||
|  |             else if (inRange(bite, 0xC2, 0xDF)) { | ||
|  |                 // 1. Set utf-8 bytes needed to 1.
 | ||
|  |                 this.utf8_bytes_needed = 1; | ||
|  |                 // 2. Set UTF-8 code point to byte & 0x1F.
 | ||
|  |                 this.utf8_code_point = bite & 0x1F; | ||
|  |             } | ||
|  |             // 0xE0 to 0xEF
 | ||
|  |             else if (inRange(bite, 0xE0, 0xEF)) { | ||
|  |                 // 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
 | ||
|  |                 if (bite === 0xE0) | ||
|  |                     this.utf8_lower_boundary = 0xA0; | ||
|  |                 // 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
 | ||
|  |                 if (bite === 0xED) | ||
|  |                     this.utf8_upper_boundary = 0x9F; | ||
|  |                 // 3. Set utf-8 bytes needed to 2.
 | ||
|  |                 this.utf8_bytes_needed = 2; | ||
|  |                 // 4. Set UTF-8 code point to byte & 0xF.
 | ||
|  |                 this.utf8_code_point = bite & 0xF; | ||
|  |             } | ||
|  |             // 0xF0 to 0xF4
 | ||
|  |             else if (inRange(bite, 0xF0, 0xF4)) { | ||
|  |                 // 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
 | ||
|  |                 if (bite === 0xF0) | ||
|  |                     this.utf8_lower_boundary = 0x90; | ||
|  |                 // 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
 | ||
|  |                 if (bite === 0xF4) | ||
|  |                     this.utf8_upper_boundary = 0x8F; | ||
|  |                 // 3. Set utf-8 bytes needed to 3.
 | ||
|  |                 this.utf8_bytes_needed = 3; | ||
|  |                 // 4. Set UTF-8 code point to byte & 0x7.
 | ||
|  |                 this.utf8_code_point = bite & 0x7; | ||
|  |             } | ||
|  |             // Otherwise
 | ||
|  |             else { | ||
|  |                 // Return error.
 | ||
|  |                 return decoderError(this.fatal); | ||
|  |             } | ||
|  |             // Return continue.
 | ||
|  |             return null; | ||
|  |         } | ||
|  |         // 4. If byte is not in the range utf-8 lower boundary to utf-8
 | ||
|  |         // upper boundary, inclusive, run these substeps:
 | ||
|  |         if (!inRange(bite, this.utf8_lower_boundary, this.utf8_upper_boundary)) { | ||
|  |             // 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
 | ||
|  |             // bytes seen to 0, set utf-8 lower boundary to 0x80, and set
 | ||
|  |             // utf-8 upper boundary to 0xBF.
 | ||
|  |             this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0; | ||
|  |             this.utf8_lower_boundary = 0x80; | ||
|  |             this.utf8_upper_boundary = 0xBF; | ||
|  |             // 2. Prepend byte to stream.
 | ||
|  |             stream.prepend(bite); | ||
|  |             // 3. Return error.
 | ||
|  |             return decoderError(this.fatal); | ||
|  |         } | ||
|  |         // 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
 | ||
|  |         // to 0xBF.
 | ||
|  |         this.utf8_lower_boundary = 0x80; | ||
|  |         this.utf8_upper_boundary = 0xBF; | ||
|  |         // 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
 | ||
|  |         // 0x3F)
 | ||
|  |         this.utf8_code_point = (this.utf8_code_point << 6) | (bite & 0x3F); | ||
|  |         // 7. Increase utf-8 bytes seen by one.
 | ||
|  |         this.utf8_bytes_seen += 1; | ||
|  |         // 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
 | ||
|  |         // continue.
 | ||
|  |         if (this.utf8_bytes_seen !== this.utf8_bytes_needed) | ||
|  |             return null; | ||
|  |         // 9. Let code point be utf-8 code point.
 | ||
|  |         var code_point = this.utf8_code_point; | ||
|  |         // 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
 | ||
|  |         // seen to 0.
 | ||
|  |         this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0; | ||
|  |         // 11. Return a code point whose value is code point.
 | ||
|  |         return code_point; | ||
|  |     }; | ||
|  |     return UTF8Decoder; | ||
|  | }()); | ||
|  | export { UTF8Decoder }; | ||
|  | //# sourceMappingURL=UTF8Decoder.js.map
 |