Script: Implement TextEncoderStream (#38466)

This implements the `TextEncoderStream` WebIDL interface. 

Testing: Existing WPT tests should be sufficient
Fixes: #37724

---------

Signed-off-by: minghuaw <wuminghua7@huawei.com>
This commit is contained in:
minghuaw 2025-08-15 15:57:00 +08:00 committed by GitHub
parent f24f225db8
commit d409137e4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 476 additions and 228 deletions

View file

@ -599,6 +599,7 @@ pub(crate) mod textdecoder;
pub(crate) mod textdecodercommon;
pub(crate) mod textdecoderstream;
pub(crate) mod textencoder;
pub(crate) mod textencoderstream;
pub(crate) mod textmetrics;
pub(crate) mod texttrack;
pub(crate) mod texttrackcue;

View file

@ -0,0 +1,390 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::cell::Cell;
use std::num::{NonZero, NonZeroU16};
use std::ptr::{self, NonNull};
use dom_struct::dom_struct;
use js::conversions::latin1_to_string;
use js::jsapi::{
JS_DeprecatedStringHasLatin1Chars, JS_GetTwoByteStringCharsAndLength, JS_IsExceptionPending,
JSObject, JSType, ToPrimitive,
};
use js::jsval::UndefinedValue;
use js::rust::{
HandleObject as SafeHandleObject, HandleValue as SafeHandleValue,
MutableHandleValue as SafeMutableHandleValue, ToString,
};
use js::typedarray::Uint8Array;
use script_bindings::conversions::SafeToJSValConvertible;
use crate::dom::bindings::buffer_source::create_buffer_source;
use crate::dom::bindings::codegen::Bindings::TextEncoderStreamBinding::TextEncoderStreamMethods;
use crate::dom::bindings::error::{Error, Fallible, throw_dom_exception};
use crate::dom::bindings::reflector::{Reflector, reflect_dom_object_with_proto};
use crate::dom::bindings::root::{Dom, DomRoot};
use crate::dom::bindings::str::DOMString;
use crate::dom::transformstreamdefaultcontroller::TransformerType;
use crate::dom::types::{GlobalScope, TransformStream, TransformStreamDefaultController};
use crate::script_runtime::{CanGc, JSContext as SafeJSContext};
use crate::{DomTypeHolder, DomTypes};
/// String converted from an input JS Value
enum ConvertedInput<'a> {
String(String),
CodeUnits(&'a [u16]),
}
/// Converts a JS value to primitive type so that it can be used with
/// `ToString`.
///
/// Set `rval` to `chunk` if `chunk` is a primitive JS value. Otherwise, convert
/// `chunk` into a primitive JS value and then set `rval` to the converted
/// primitive. This follows the `ToString` procedure with the exception that it
/// does not convert the value to string.
///
/// See below for the `ToString` procedure in spec:
/// <https://tc39.es/ecma262/multipage/abstract-operations.html#sec-tostring>
#[allow(unsafe_code)]
fn jsval_to_primitive(
cx: SafeJSContext,
global: &GlobalScope,
chunk: SafeHandleValue,
mut rval: SafeMutableHandleValue,
can_gc: CanGc,
) -> Fallible<()> {
// Step 1. If argument is a String, return argument.
// Step 2. If argument is a Symbol, throw a TypeError exception.
// Step 3. If argument is undefined, return "undefined".
// Step 4. If argument is null, return "null".
// Step 5. If argument is true, return "true".
// Step 6. If argument is false, return "false".
// Step 7. If argument is a Number, return Number::toString(argument, 10).
// Step 8. If argument is a BigInt, return BigInt::toString(argument, 10).
if chunk.is_primitive() {
rval.set(chunk.get());
return Ok(());
}
// Step 9. Assert: argument is an Object.
assert!(chunk.is_object());
// Step 10. Let primValue be ? ToPrimitive(argument, string).
rooted!(in(*cx) let obj = chunk.to_object());
let is_success =
unsafe { ToPrimitive(*cx, obj.handle().into(), JSType::JSTYPE_STRING, rval.into()) };
log::debug!("ToPrimitive is_success={:?}", is_success);
if !is_success {
unsafe {
if !JS_IsExceptionPending(*cx) {
throw_dom_exception(
cx,
global,
Error::Type("Cannot convert JSObject to primitive".to_owned()),
can_gc,
);
}
}
return Err(Error::JSFailed);
}
Ok(())
}
/// <https://encoding.spec.whatwg.org/#textencoderstream-encoder>
#[derive(Default, JSTraceable, MallocSizeOf)]
pub(crate) struct Encoder {
/// <https://encoding.spec.whatwg.org/#textencoderstream-pending-high-surrogate>
leading_surrogate: Cell<Option<NonZeroU16>>,
}
impl Encoder {
fn encode(&self, maybe_ill_formed: ConvertedInput<'_>) -> String {
match maybe_ill_formed {
ConvertedInput::String(s) => {
// Rust String is already UTF-8 encoded and cannot contain
// surrogate
if !s.is_empty() && self.leading_surrogate.take().is_some() {
let mut output = String::with_capacity(1 + s.len());
output.push('\u{FFFD}');
output.push_str(&s);
return output;
}
s
},
ConvertedInput::CodeUnits(code_units) => self.encode_from_code_units(code_units),
}
}
/// Encode an input slice of code unit into unicode scalar values
#[allow(unsafe_code)]
fn encode_from_code_units(&self, input: &[u16]) -> String {
// <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
//
// Step 3. Let output be the I/O queue of bytes « end-of-queue ».
let mut output = String::with_capacity(input.len());
// Step 4. While true:
// Step 4.1 Let item be the result of reading from input.
for result in char::decode_utf16(input.iter().cloned()) {
// Step 4.3 Let result be the result of executing the convert code unit
// to scalar value algorithm with encoder, item and input.
// <https://encoding.spec.whatwg.org/#convert-code-unit-to-scalar-value>
match result {
Ok(c) => {
// Step 1. If encoders leading surrogate is non-null:
// Step 1.1 Let leadingSurrogate be encoders leading surrogate.
// Step 1.2 Set encoders leading surrogate to null.
if self.leading_surrogate.take().is_some() {
// Step 1.5 Return U+FFFD (<28>).
output.push('\u{FFFD}');
}
// Step 1.4 Restore item to input.
// Note: pushing item to output is equivalent to restoring item to input
// and rerun the convert-code-unit-to-scalar-value algo
output.push(c);
},
Err(error) => {
let unpaired_surrogate = error.unpaired_surrogate();
match code_point_type(unpaired_surrogate) {
CodePointType::LeadingSurrogate => {
// Step 1.1 If encoders leading surrogate is non-null:
// Step 1.2 Set encoders leading surrogate to null.
if self.leading_surrogate.take().is_some() {
output.push('\u{FFFD}');
}
// Step 1.4 Restore item to input.
// Note: Replacing encoder's leading_surrogate is equivalent
// to restore item back to input and rerun the convert-
// code-unit-to-scalar-value algo.
// Step 2. If item is a leading surrogate, then set encoders
// leading surrogate to item and return continue.
self.leading_surrogate
.replace(NonZero::new(unpaired_surrogate));
},
CodePointType::TrailingSurrogate => match self.leading_surrogate.take() {
// Step 1.1 If encoders leading surrogate is non-null:
// Step 1.2 Set encoders leading surrogate to null.
Some(leading_surrogate) => {
// Step 1.3 If item is a trailing surrogate, then return a scalar
// value from surrogates given leadingSurrogate and item.
let c = char::decode_utf16([
leading_surrogate.get(),
unpaired_surrogate,
])
.next()
.expect("A pair of surrogate is supplied")
.expect("Decoding a pair of surrogate cannot fail");
output.push(c);
},
// Step 3. If item is a trailing surrogate, then return U+FFFD (<28>).
None => output.push('\u{FFFD}'),
},
CodePointType::ScalarValue => unreachable!("Scalar Value won't fail"),
}
},
}
}
output
}
}
enum CodePointType {
ScalarValue,
LeadingSurrogate,
TrailingSurrogate,
}
fn code_point_type(value: u16) -> CodePointType {
match value {
0xD800..=0xDBFF => CodePointType::LeadingSurrogate,
0xDC00..=0xDFFF => CodePointType::TrailingSurrogate,
_ => CodePointType::ScalarValue,
}
}
/// <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
#[allow(unsafe_code)]
pub(crate) fn encode_and_enqueue_a_chunk(
cx: SafeJSContext,
global: &GlobalScope,
chunk: SafeHandleValue,
encoder: &Encoder,
controller: &TransformStreamDefaultController,
can_gc: CanGc,
) -> Fallible<()> {
// Step 1. Let input be the result of converting chunk to a DOMString.
// Step 2. Convert input to an I/O queue of code units.
rooted!(in(*cx) let mut rval = UndefinedValue());
jsval_to_primitive(cx, global, chunk, rval.handle_mut(), can_gc)?;
assert!(!rval.is_object());
rooted!(in(*cx) let jsstr = unsafe { ToString(*cx, rval.handle()) });
if jsstr.is_null() {
unsafe {
if !JS_IsExceptionPending(*cx) {
throw_dom_exception(
cx,
global,
Error::Type("Cannot convert JS primitive to string".to_owned()),
can_gc,
);
}
}
return Err(Error::JSFailed);
}
let input = unsafe {
if JS_DeprecatedStringHasLatin1Chars(*jsstr) {
let s = NonNull::new(*jsstr).expect("jsstr cannot be null");
ConvertedInput::String(latin1_to_string(*cx, s))
} else {
let mut len = 0;
let data = JS_GetTwoByteStringCharsAndLength(*cx, std::ptr::null(), *jsstr, &mut len);
let maybe_ill_formed_code_units = std::slice::from_raw_parts(data, len);
ConvertedInput::CodeUnits(maybe_ill_formed_code_units)
}
};
// Step 3. Let output be the I/O queue of bytes « end-of-queue ».
// Step 4. While true:
// Step 4.1 Let item be the result of reading from input.
// Step 4.3 Let result be the result of executing the convert code unit
// to scalar value algorithm with encoder, item and input.
// Step 4.4 If result is not continue, then process an item with result,
// encoders encoder, input, output, and "fatal".
let output = encoder.encode(input);
// Step 4.2 If item is end-of-queue:
// Step 4.2.1 Convert output into a byte sequence.
let output = output.as_bytes();
// Step 4.2.2 If output is not empty:
if output.is_empty() {
// Step 4.2.3
return Ok(());
}
// Step 4.2.2.1 Let chunk be the result of creating a Uint8Array object
// given output and encoders relevant realm.
rooted!(in(*cx) let mut js_object = ptr::null_mut::<JSObject>());
let chunk: Uint8Array = create_buffer_source(cx, output, js_object.handle_mut(), can_gc)
.map_err(|_| Error::Type("Cannot convert byte sequence to Uint8Array".to_owned()))?;
rooted!(in(*cx) let mut rval = UndefinedValue());
chunk.safe_to_jsval(cx, rval.handle_mut());
// Step 4.2.2.2 Enqueue chunk into encoders transform.
controller.enqueue(cx, global, rval.handle(), can_gc)?;
Ok(())
}
/// <https://encoding.spec.whatwg.org/#encode-and-flush>
#[allow(unsafe_code)]
pub(crate) fn encode_and_flush(
cx: SafeJSContext,
global: &GlobalScope,
encoder: &Encoder,
controller: &TransformStreamDefaultController,
can_gc: CanGc,
) -> Fallible<()> {
// Step 1. If encoders leading surrogate is non-null:
if encoder.leading_surrogate.get().is_some() {
// Step 1.1 Let chunk be the result of creating a Uint8Array object
// given « 0xEF, 0xBF, 0xBD » and encoders relevant realm.
rooted!(in(*cx) let mut js_object = ptr::null_mut::<JSObject>());
let chunk: Uint8Array =
create_buffer_source(cx, &[0xEF_u8, 0xBF, 0xBD], js_object.handle_mut(), can_gc)
.map_err(|_| {
Error::Type("Cannot convert byte sequence to Uint8Array".to_owned())
})?;
rooted!(in(*cx) let mut rval = UndefinedValue());
chunk.safe_to_jsval(cx, rval.handle_mut());
// Step 1.2 Enqueue chunk into encoders transform.
return controller.enqueue(cx, global, rval.handle(), can_gc);
}
Ok(())
}
/// <https://encoding.spec.whatwg.org/#textencoderstream>
#[dom_struct]
pub(crate) struct TextEncoderStream {
reflector_: Reflector,
/// <https://streams.spec.whatwg.org/#generictransformstream>
transform: Dom<TransformStream>,
}
impl TextEncoderStream {
fn new_inherited(transform: &TransformStream) -> TextEncoderStream {
Self {
reflector_: Reflector::new(),
transform: Dom::from_ref(transform),
}
}
/// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
fn new_with_proto(
cx: SafeJSContext,
global: &GlobalScope,
proto: Option<SafeHandleObject>,
can_gc: CanGc,
) -> Fallible<DomRoot<TextEncoderStream>> {
// Step 1. Set thiss encoder to an instance of the UTF-8 encoder.
let encoder = Encoder::default();
// Step 2. Let transformAlgorithm be an algorithm which takes a chunk argument
// and runs the encode and enqueue a chunk algorithm with this and chunk.
// Step 3. Let flushAlgorithm be an algorithm which runs the encode and flush
// algorithm with this.
let transformer_type = TransformerType::Encoder(encoder);
// Step 4. Let transformStream be a new TransformStream.
let transform = TransformStream::new_with_proto(global, None, can_gc);
// Step 5. Set up transformStream with transformAlgorithm set to transformAlgorithm
// and flushAlgorithm set to flushAlgorithm.
transform.set_up(cx, global, transformer_type, can_gc)?;
// Step 6. Set thiss transform to transformStream.
Ok(reflect_dom_object_with_proto(
Box::new(TextEncoderStream::new_inherited(&transform)),
global,
proto,
can_gc,
))
}
}
#[allow(non_snake_case)]
impl TextEncoderStreamMethods<DomTypeHolder> for TextEncoderStream {
/// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
fn Constructor(
global: &<DomTypeHolder as DomTypes>::GlobalScope,
proto: Option<SafeHandleObject>,
can_gc: CanGc,
) -> Fallible<DomRoot<<DomTypeHolder as DomTypes>::TextEncoderStream>> {
TextEncoderStream::new_with_proto(GlobalScope::get_cx(), global, proto, can_gc)
}
/// <https://encoding.spec.whatwg.org/#dom-textencoder-encoding>
fn Encoding(&self) -> DOMString {
// Returns "utf-8".
DOMString::from("utf-8")
}
/// <https://streams.spec.whatwg.org/#dom-generictransformstream-readable>
fn Readable(&self) -> DomRoot<<DomTypeHolder as DomTypes>::ReadableStream> {
self.transform.get_readable()
}
/// <https://streams.spec.whatwg.org/#dom-generictransformstream-writable>
fn Writable(&self) -> DomRoot<<DomTypeHolder as DomTypes>::WritableStream> {
self.transform.get_writable()
}
}

View file

@ -28,6 +28,7 @@ use crate::dom::promise::Promise;
use crate::dom::promisenativehandler::{Callback, PromiseNativeHandler};
use crate::dom::textdecodercommon::TextDecoderCommon;
use crate::dom::textdecoderstream::{decode_and_enqueue_a_chunk, flush_and_enqueue};
use crate::dom::textencoderstream::{Encoder, encode_and_enqueue_a_chunk, encode_and_flush};
use crate::realms::{InRealm, enter_realm};
use crate::script_runtime::{CanGc, JSContext as SafeJSContext};
@ -75,6 +76,10 @@ pub(crate) enum TransformerType {
///
/// <https://encoding.spec.whatwg.org/#textdecodercommon>
Decoder(Rc<TextDecoderCommon>),
/// Algorithms supporting `TextEncoderStream` are implemented in Rust
///
/// <https://encoding.spec.whatwg.org/#textencoderstream-encoder>
Encoder(Encoder),
}
impl TransformerType {
@ -256,6 +261,28 @@ impl TransformStreamDefaultController {
p
})
},
TransformerType::Encoder(encoder) => {
// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
// Step 2. Let transformAlgorithm be an algorithm which takes a chunk argument and runs the encode
// and enqueue a chunk algorithm with this and chunk.
encode_and_enqueue_a_chunk(cx, global, chunk, encoder, self, can_gc)
// <https://streams.spec.whatwg.org/#transformstream-set-up>
// Step 5. Let transformAlgorithmWrapper be an algorithm that runs these steps given a value chunk:
// Step 5.1 Let result be the result of running transformAlgorithm given chunk.
// Step 5.2 If result is a Promise, then return result.
// Note: not applicable, the spec does NOT require encode_and_enqueue_a_chunk() to return a Promise
// Step 5.3 Return a promise resolved with undefined.
.map(|_| Promise::new_resolved(global, cx, (), can_gc))
.unwrap_or_else(|e| {
// <https://streams.spec.whatwg.org/#transformstream-set-up>
// Step 5.1 If this throws an exception e,
let realm = enter_realm(self);
let p = Promise::new_in_current_realm((&realm).into(), can_gc);
// return a promise rejected with e.
p.reject_error(e, can_gc);
p
})
},
};
Ok(result)
@ -311,6 +338,17 @@ impl TransformStreamDefaultController {
// Step 7.3 Return a promise resolved with undefined.
Promise::new_resolved(global, cx, (), can_gc)
},
TransformerType::Encoder(_) => {
// <https://streams.spec.whatwg.org/#transformstream-set-up>
// Step 7. Let cancelAlgorithmWrapper be an algorithm that runs these steps given a value reason:
// Step 7.1 Let result be the result of running cancelAlgorithm given reason,
// if cancelAlgorithm was given, or null otherwise
// Note: `TextDecoderStream` does NOT specify a cancel algorithm.
// Step 7.2 If result is a Promise, then return result.
// Note: Not applicable.
// Step 7.3 Return a promise resolved with undefined.
Promise::new_resolved(global, cx, (), can_gc)
},
};
Ok(result)
@ -376,6 +414,28 @@ impl TransformStreamDefaultController {
p
})
},
TransformerType::Encoder(encoder) => {
// <https://encoding.spec.whatwg.org/#textencoderstream-encoder>
// Step 3. Let flushAlgorithm be an algorithm which runs the encode and flush algorithm with this.
encode_and_flush(cx, global, encoder, self, can_gc)
// <https://streams.spec.whatwg.org/#transformstream-set-up>
// Step 6. Let flushAlgorithmWrapper be an algorithm that runs these steps:
// Step 6.1 Let result be the result of running flushAlgorithm,
// if flushAlgorithm was given, or null otherwise.
// Step 6.2 If result is a Promise, then return result.
// Note: Not applicable. The spec does NOT require encode_and_flush algo to return a Promise
// Step 6.3 Return a promise resolved with undefined.
.map(|_| Promise::new_resolved(global, cx, (), can_gc))
.unwrap_or_else(|e| {
// <https://streams.spec.whatwg.org/#transformstream-set-up>
// Step 6.1 If this throws an exception e,
let realm = enter_realm(self);
let p = Promise::new_in_current_realm((&realm).into(), can_gc);
// return a promise rejected with e.
p.reject_error(e, can_gc);
p
})
},
};
Ok(result)