diff --git a/Cargo.lock b/Cargo.lock index e18f8adef97..806ba90969a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6224,9 +6224,21 @@ dependencies = [ name = "script_bindings" version = "0.0.1" dependencies = [ + "cssparser", + "html5ever", + "jstraceable_derive", + "libc", + "log", + "malloc_size_of_derive", + "mozjs", + "num-traits", "phf_codegen", "phf_shared 0.11.2", + "regex", "serde_json", + "servo_atoms", + "servo_config", + "servo_malloc_size_of", "style", ] diff --git a/Cargo.toml b/Cargo.toml index e2cfc05be2d..4ecfb2a1173 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,6 +75,7 @@ hyper-rustls = { version = "0.27", default-features = false, features = ["http1" hyper_serde = { path = "components/hyper_serde" } hyper-util = { version = "0.1", features = ["client", "client-legacy", "http2", "tokio"] } icu_segmenter = "1.5.0" +js = { package = "mozjs", git = "https://github.com/servo/mozjs" } image = "0.24" imsz = "0.2" indexmap = { version = "2.7.1", features = ["std"] } diff --git a/components/script/Cargo.toml b/components/script/Cargo.toml index 9b2cf1519d5..1bf4a276787 100644 --- a/components/script/Cargo.toml +++ b/components/script/Cargo.toml @@ -71,7 +71,7 @@ image = { workspace = true } indexmap = { workspace = true } ipc-channel = { workspace = true } itertools = { workspace = true } -js = { package = "mozjs", git = "https://github.com/servo/mozjs" } +js = { workspace = true } jstraceable_derive = { path = "../jstraceable_derive" } keyboard-types = { workspace = true } libc = { workspace = true } diff --git a/components/script/dom/bindings/conversions.rs b/components/script/dom/bindings/conversions.rs index 33707ae75fb..82a3b8555f4 100644 --- a/components/script/dom/bindings/conversions.rs +++ b/components/script/dom/bindings/conversions.rs @@ -32,34 +32,29 @@ //! | sequences | `Vec` | | //! | union types | `T` | | -use std::{char, ffi, ptr, slice}; +use std::{ffi, ptr}; -use js::conversions::latin1_to_string; pub(crate) use js::conversions::{ ConversionBehavior, ConversionResult, FromJSValConvertible, ToJSValConvertible, }; use js::error::throw_type_error; use js::glue::{GetProxyReservedSlot, IsWrapper, JS_GetReservedSlot, UnwrapObjectDynamic}; -use js::jsapi::{ - Heap, IsWindowProxy, JSContext, JSObject, JSString, JS_DeprecatedStringHasLatin1Chars, - JS_GetLatin1StringCharsAndLength, JS_GetTwoByteStringCharsAndLength, JS_IsExceptionPending, - JS_NewStringCopyN, -}; -use js::jsval::{ObjectValue, StringValue, UndefinedValue}; +use js::jsapi::{Heap, IsWindowProxy, JSContext, JSObject, JS_IsExceptionPending}; +use js::jsval::{ObjectValue, UndefinedValue}; use js::rust::wrappers::{IsArrayObject, JS_GetProperty, JS_HasProperty}; use js::rust::{ get_object_class, is_dom_class, is_dom_object, maybe_wrap_value, HandleId, HandleObject, - HandleValue, MutableHandleValue, ToString, + HandleValue, MutableHandleValue, }; use num_traits::Float; -use servo_config::opts; +pub(crate) use script_bindings::conversions::*; use crate::dom::bindings::error::{Error, Fallible}; use crate::dom::bindings::inheritance::Castable; use crate::dom::bindings::num::Finite; use crate::dom::bindings::reflector::{DomObject, Reflector}; use crate::dom::bindings::root::DomRoot; -use crate::dom::bindings::str::{ByteString, DOMString, USVString}; +use crate::dom::bindings::str::DOMString; use crate::dom::bindings::trace::{JSTraceable, RootedTraceableBox}; use crate::dom::bindings::utils::DOMClass; use crate::dom::filelist::FileList; @@ -174,177 +169,6 @@ pub(crate) unsafe fn jsid_to_string(cx: *mut JSContext, id: HandleId) -> Option< None } -// http://heycam.github.io/webidl/#es-USVString -impl ToJSValConvertible for USVString { - unsafe fn to_jsval(&self, cx: *mut JSContext, rval: MutableHandleValue) { - self.0.to_jsval(cx, rval); - } -} - -/// Behavior for stringification of `JSVal`s. -#[derive(Clone, PartialEq)] -pub enum StringificationBehavior { - /// Convert `null` to the string `"null"`. - Default, - /// Convert `null` to the empty string. - Empty, -} - -// https://heycam.github.io/webidl/#es-DOMString -impl ToJSValConvertible for DOMString { - unsafe fn to_jsval(&self, cx: *mut JSContext, rval: MutableHandleValue) { - (**self).to_jsval(cx, rval); - } -} - -// https://heycam.github.io/webidl/#es-DOMString -impl FromJSValConvertible for DOMString { - type Config = StringificationBehavior; - unsafe fn from_jsval( - cx: *mut JSContext, - value: HandleValue, - null_behavior: StringificationBehavior, - ) -> Result, ()> { - if null_behavior == StringificationBehavior::Empty && value.get().is_null() { - Ok(ConversionResult::Success(DOMString::new())) - } else { - match ptr::NonNull::new(ToString(cx, value)) { - Some(jsstr) => Ok(ConversionResult::Success(jsstring_to_str(cx, jsstr))), - None => { - debug!("ToString failed"); - Err(()) - }, - } - } - } -} - -/// Convert the given `JSString` to a `DOMString`. Fails if the string does not -/// contain valid UTF-16. -pub(crate) unsafe fn jsstring_to_str(cx: *mut JSContext, s: ptr::NonNull) -> DOMString { - let latin1 = JS_DeprecatedStringHasLatin1Chars(s.as_ptr()); - DOMString::from_string(if latin1 { - latin1_to_string(cx, s.as_ptr()) - } else { - let mut length = 0; - let chars = JS_GetTwoByteStringCharsAndLength(cx, ptr::null(), s.as_ptr(), &mut length); - assert!(!chars.is_null()); - let potentially_ill_formed_utf16 = slice::from_raw_parts(chars, length); - let mut s = String::with_capacity(length); - for item in char::decode_utf16(potentially_ill_formed_utf16.iter().cloned()) { - match item { - Ok(c) => s.push(c), - Err(_) => { - // FIXME: Add more info like document URL in the message? - macro_rules! message { - () => { - "Found an unpaired surrogate in a DOM string. \ - If you see this in real web content, \ - please comment on https://github.com/servo/servo/issues/6564" - }; - } - if opts::get().debug.replace_surrogates { - error!(message!()); - s.push('\u{FFFD}'); - } else { - panic!(concat!( - message!(), - " Use `-Z replace-surrogates` \ - on the command line to make this non-fatal." - )); - } - }, - } - } - s - }) -} - -// http://heycam.github.io/webidl/#es-USVString -impl FromJSValConvertible for USVString { - type Config = (); - unsafe fn from_jsval( - cx: *mut JSContext, - value: HandleValue, - _: (), - ) -> Result, ()> { - let Some(jsstr) = ptr::NonNull::new(ToString(cx, value)) else { - debug!("ToString failed"); - return Err(()); - }; - let latin1 = JS_DeprecatedStringHasLatin1Chars(jsstr.as_ptr()); - if latin1 { - // FIXME(ajeffrey): Convert directly from DOMString to USVString - return Ok(ConversionResult::Success(USVString(String::from( - jsstring_to_str(cx, jsstr), - )))); - } - let mut length = 0; - let chars = JS_GetTwoByteStringCharsAndLength(cx, ptr::null(), jsstr.as_ptr(), &mut length); - assert!(!chars.is_null()); - let char_vec = slice::from_raw_parts(chars, length); - Ok(ConversionResult::Success(USVString( - String::from_utf16_lossy(char_vec), - ))) - } -} - -// http://heycam.github.io/webidl/#es-ByteString -impl ToJSValConvertible for ByteString { - unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) { - let jsstr = JS_NewStringCopyN( - cx, - self.as_ptr() as *const libc::c_char, - self.len() as libc::size_t, - ); - if jsstr.is_null() { - panic!("JS_NewStringCopyN failed"); - } - rval.set(StringValue(&*jsstr)); - } -} - -// http://heycam.github.io/webidl/#es-ByteString -impl FromJSValConvertible for ByteString { - type Config = (); - unsafe fn from_jsval( - cx: *mut JSContext, - value: HandleValue, - _option: (), - ) -> Result, ()> { - let string = ToString(cx, value); - if string.is_null() { - debug!("ToString failed"); - return Err(()); - } - - let latin1 = JS_DeprecatedStringHasLatin1Chars(string); - if latin1 { - let mut length = 0; - let chars = JS_GetLatin1StringCharsAndLength(cx, ptr::null(), string, &mut length); - assert!(!chars.is_null()); - - let char_slice = slice::from_raw_parts(chars as *mut u8, length); - return Ok(ConversionResult::Success(ByteString::new( - char_slice.to_vec(), - ))); - } - - let mut length = 0; - let chars = JS_GetTwoByteStringCharsAndLength(cx, ptr::null(), string, &mut length); - let char_vec = slice::from_raw_parts(chars, length); - - if char_vec.iter().any(|&c| c > 0xFF) { - throw_type_error(cx, "Invalid ByteString"); - Err(()) - } else { - Ok(ConversionResult::Success(ByteString::new( - char_vec.iter().map(|&c| c as u8).collect(), - ))) - } - } -} - impl ToJSValConvertible for Reflector { unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) { let obj = self.get_jsobject().get(); diff --git a/components/script/dom/bindings/mod.rs b/components/script/dom/bindings/mod.rs index 3512b59724a..65629fda46b 100644 --- a/components/script/dom/bindings/mod.rs +++ b/components/script/dom/bindings/mod.rs @@ -161,7 +161,6 @@ pub(crate) mod reflector; pub(crate) mod root; pub(crate) mod serializable; pub(crate) mod settings_stack; -#[allow(dead_code)] pub(crate) mod str; pub(crate) mod structuredclone; pub(crate) mod trace; diff --git a/components/script/dom/bindings/str.rs b/components/script/dom/bindings/str.rs index 64f180951cf..626b0b01e73 100644 --- a/components/script/dom/bindings/str.rs +++ b/components/script/dom/bindings/str.rs @@ -2,426 +2,13 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -//! The `ByteString` struct. -use std::borrow::{Borrow, Cow, ToOwned}; -use std::default::Default; -use std::hash::{Hash, Hasher}; -use std::marker::PhantomData; -use std::ops::{Deref, DerefMut}; -use std::str::FromStr; use std::sync::LazyLock; -use std::{fmt, ops, str}; -use cssparser::CowRcStr; -use html5ever::{LocalName, Namespace}; use num_traits::Zero; use regex::Regex; -use servo_atoms::Atom; +pub use script_bindings::str::*; use time_03::{Date, Month, OffsetDateTime, Time, Weekday}; -/// Encapsulates the IDL `ByteString` type. -#[derive(Clone, Debug, Default, Eq, JSTraceable, MallocSizeOf, PartialEq)] -pub struct ByteString(Vec); - -impl ByteString { - /// Creates a new `ByteString`. - pub fn new(value: Vec) -> ByteString { - ByteString(value) - } - - /// Returns `self` as a string, if it encodes valid UTF-8, and `None` - /// otherwise. - pub(crate) fn as_str(&self) -> Option<&str> { - str::from_utf8(&self.0).ok() - } - - /// Returns the length. - pub(crate) fn len(&self) -> usize { - self.0.len() - } - - /// Checks if the ByteString is empty. - pub(crate) fn is_empty(&self) -> bool { - self.0.is_empty() - } - - /// Returns `self` with A–Z replaced by a–z. - pub(crate) fn to_lower(&self) -> ByteString { - ByteString::new(self.0.to_ascii_lowercase()) - } -} - -impl From for Vec { - fn from(byte_string: ByteString) -> Vec { - byte_string.0 - } -} - -impl Hash for ByteString { - fn hash(&self, state: &mut H) { - self.0.hash(state); - } -} - -impl FromStr for ByteString { - type Err = (); - fn from_str(s: &str) -> Result { - Ok(ByteString::new(s.to_owned().into_bytes())) - } -} - -impl ops::Deref for ByteString { - type Target = [u8]; - fn deref(&self) -> &[u8] { - &self.0 - } -} - -/// A string that is constructed from a UCS-2 buffer by replacing invalid code -/// points with the replacement character. -#[derive(Clone, Default, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)] -pub(crate) struct USVString(pub(crate) String); - -impl Borrow for USVString { - #[inline] - fn borrow(&self) -> &str { - &self.0 - } -} - -impl Deref for USVString { - type Target = str; - - #[inline] - fn deref(&self) -> &str { - &self.0 - } -} - -impl DerefMut for USVString { - #[inline] - fn deref_mut(&mut self) -> &mut str { - &mut self.0 - } -} - -impl AsRef for USVString { - fn as_ref(&self) -> &str { - &self.0 - } -} - -impl fmt::Display for USVString { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&**self, f) - } -} - -impl PartialEq for USVString { - fn eq(&self, other: &str) -> bool { - &**self == other - } -} - -impl<'a> PartialEq<&'a str> for USVString { - fn eq(&self, other: &&'a str) -> bool { - &**self == *other - } -} - -impl From for USVString { - fn from(contents: String) -> USVString { - USVString(contents) - } -} - -/// Returns whether `s` is a `token`, as defined by -/// [RFC 2616](http://tools.ietf.org/html/rfc2616#page-17). -pub(crate) fn is_token(s: &[u8]) -> bool { - if s.is_empty() { - return false; // A token must be at least a single character - } - s.iter().all(|&x| { - // http://tools.ietf.org/html/rfc2616#section-2.2 - match x { - 0..=31 | 127 => false, // CTLs - 40 | 41 | 60 | 62 | 64 | 44 | 59 | 58 | 92 | 34 | 47 | 91 | 93 | 63 | 61 | 123 | - 125 | 32 => false, // separators - x if x > 127 => false, // non-CHARs - _ => true, - } - }) -} - -/// A DOMString. -/// -/// This type corresponds to the [`DOMString`] type in WebIDL. -/// -/// [`DOMString`]: https://webidl.spec.whatwg.org/#idl-DOMString -/// -/// Conceptually, a DOMString has the same value space as a JavaScript String, -/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with -/// unpaired surrogates present (also sometimes called WTF-16). -/// -/// Currently, this type stores a Rust `String`, in order to avoid issues when -/// integrating with the rest of the Rust ecosystem and even the rest of the -/// browser itself. -/// -/// However, Rust `String`s are guaranteed to be valid UTF-8, and as such have -/// a *smaller value space* than WTF-16 (i.e., some JavaScript String values -/// can not be represented as a Rust `String`). This introduces the question of -/// what to do with values being passed from JavaScript to Rust that contain -/// unpaired surrogates. -/// -/// The hypothesis is that it does not matter much how exactly those values are -/// transformed, because passing unpaired surrogates into the DOM is very rare. -/// In order to test this hypothesis, Servo will panic when encountering any -/// unpaired surrogates on conversion to `DOMString` by default. (The command -/// line option `-Z replace-surrogates` instead causes Servo to replace the -/// unpaired surrogate by a U+FFFD replacement character.) -/// -/// Currently, the lack of crash reports about this issue provides some -/// evidence to support the hypothesis. This evidence will hopefully be used to -/// convince other browser vendors that it would be safe to replace unpaired -/// surrogates at the boundary between JavaScript and native code. (This would -/// unify the `DOMString` and `USVString` types, both in the WebIDL standard -/// and in Servo.) -/// -/// This type is currently `!Send`, in order to help with an independent -/// experiment to store `JSString`s rather than Rust `String`s. -#[derive(Clone, Debug, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)] -pub struct DOMString(String, PhantomData<*const ()>); - -impl DOMString { - /// Creates a new `DOMString`. - pub(crate) fn new() -> DOMString { - DOMString(String::new(), PhantomData) - } - - /// Creates a new `DOMString` from a `String`. - pub(crate) fn from_string(s: String) -> DOMString { - DOMString(s, PhantomData) - } - - /// Get the internal `&str` value of this [`DOMString`]. - pub(crate) fn str(&self) -> &str { - &self.0 - } - - /// Appends a given string slice onto the end of this String. - pub(crate) fn push_str(&mut self, string: &str) { - self.0.push_str(string) - } - - /// Clears this `DOMString`, removing all contents. - pub(crate) fn clear(&mut self) { - self.0.clear() - } - - /// Shortens this String to the specified length. - pub(crate) fn truncate(&mut self, new_len: usize) { - self.0.truncate(new_len); - } - - /// Removes newline characters according to . - pub(crate) fn strip_newlines(&mut self) { - self.0.retain(|c| c != '\r' && c != '\n'); - } - - /// Removes leading and trailing ASCII whitespaces according to - /// . - pub(crate) fn strip_leading_and_trailing_ascii_whitespace(&mut self) { - if self.0.is_empty() { - return; - } - - let trailing_whitespace_len = self - .0 - .trim_end_matches(|ref c| char::is_ascii_whitespace(c)) - .len(); - self.0.truncate(trailing_whitespace_len); - if self.0.is_empty() { - return; - } - - let first_non_whitespace = self.0.find(|ref c| !char::is_ascii_whitespace(c)).unwrap(); - self.0.replace_range(0..first_non_whitespace, ""); - } - - /// - pub(crate) fn is_valid_floating_point_number_string(&self) -> bool { - static RE: LazyLock = LazyLock::new(|| { - Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap() - }); - - RE.is_match(&self.0) && self.parse_floating_point_number().is_some() - } - - /// - pub(crate) fn parse_floating_point_number(&self) -> Option { - // Steps 15-16 are telling us things about IEEE rounding modes - // for floating-point significands; this code assumes the Rust - // compiler already matches them in any cases where - // that actually matters. They are not - // related to f64::round(), which is for rounding to integers. - let input = &self.0; - if let Ok(val) = input.trim().parse::() { - if !( - // A valid number is the same as what rust considers to be valid, - // except for +1., NaN, and Infinity. - val.is_infinite() || val.is_nan() || input.ends_with('.') || input.starts_with('+') - ) { - return Some(val); - } - } - None - } - - /// Applies the same processing as `parse_floating_point_number` with some additional handling - /// according to ECMA's string conversion steps. - /// - /// Used for specific elements when handling floating point values, namely the `number` and - /// `range` inputs, as well as `meter` and `progress` elements. - /// - /// - /// - pub(crate) fn set_best_representation_of_the_floating_point_number(&mut self) { - if let Some(val) = self.parse_floating_point_number() { - // [tc39] Step 2: If x is either +0 or -0, return "0". - let parsed_value = if val.is_zero() { 0.0_f64 } else { val }; - - self.0 = parsed_value.to_string() - } - } -} - -impl Borrow for DOMString { - #[inline] - fn borrow(&self) -> &str { - &self.0 - } -} - -impl Default for DOMString { - fn default() -> Self { - DOMString(String::new(), PhantomData) - } -} - -impl Deref for DOMString { - type Target = str; - - #[inline] - fn deref(&self) -> &str { - &self.0 - } -} - -impl DerefMut for DOMString { - #[inline] - fn deref_mut(&mut self) -> &mut str { - &mut self.0 - } -} - -impl AsRef for DOMString { - fn as_ref(&self) -> &str { - &self.0 - } -} - -impl fmt::Display for DOMString { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&**self, f) - } -} - -impl PartialEq for DOMString { - fn eq(&self, other: &str) -> bool { - &**self == other - } -} - -impl<'a> PartialEq<&'a str> for DOMString { - fn eq(&self, other: &&'a str) -> bool { - &**self == *other - } -} - -impl From for DOMString { - fn from(contents: String) -> DOMString { - DOMString(contents, PhantomData) - } -} - -impl From<&str> for DOMString { - fn from(contents: &str) -> DOMString { - DOMString::from(String::from(contents)) - } -} - -impl<'a> From> for DOMString { - fn from(contents: Cow<'a, str>) -> DOMString { - match contents { - Cow::Owned(s) => DOMString::from(s), - Cow::Borrowed(s) => DOMString::from(s), - } - } -} - -impl From for LocalName { - fn from(contents: DOMString) -> LocalName { - LocalName::from(contents.0) - } -} - -impl From for Namespace { - fn from(contents: DOMString) -> Namespace { - Namespace::from(contents.0) - } -} - -impl From for Atom { - fn from(contents: DOMString) -> Atom { - Atom::from(contents.0) - } -} - -impl From for String { - fn from(contents: DOMString) -> String { - contents.0 - } -} - -impl From for Vec { - fn from(contents: DOMString) -> Vec { - contents.0.into() - } -} - -impl<'a> From for Cow<'a, str> { - fn from(contents: DOMString) -> Cow<'a, str> { - contents.0.into() - } -} - -impl<'a> From for CowRcStr<'a> { - fn from(contents: DOMString) -> CowRcStr<'a> { - contents.0.into() - } -} - -impl Extend for DOMString { - fn extend(&mut self, iterable: I) - where - I: IntoIterator, - { - self.0.extend(iterable) - } -} - /// fn parse_month_component(value: &str) -> Option<(i32, u32)> { // Step 3 diff --git a/components/script/dom/bindings/trace.rs b/components/script/dom/bindings/trace.rs index 926bb49109f..7a3592eeaf3 100644 --- a/components/script/dom/bindings/trace.rs +++ b/components/script/dom/bindings/trace.rs @@ -62,7 +62,6 @@ use crate::dom::bindings::cell::DomRefCell; use crate::dom::bindings::error::Error; use crate::dom::bindings::refcounted::{Trusted, TrustedPromise}; use crate::dom::bindings::reflector::{DomObject, Reflector}; -use crate::dom::bindings::str::{DOMString, USVString}; use crate::dom::htmlimageelement::SourceSet; use crate::dom::htmlmediaelement::HTMLMediaElementFetchContext; use crate::dom::windowproxy::WindowProxyHandler; @@ -377,8 +376,6 @@ unsafe_no_jsmanaged_fields!(Error); unsafe_no_jsmanaged_fields!(TrustedPromise); unsafe_no_jsmanaged_fields!(WindowProxyHandler); -unsafe_no_jsmanaged_fields!(DOMString); -unsafe_no_jsmanaged_fields!(USVString); unsafe_no_jsmanaged_fields!(SourceSet); unsafe_no_jsmanaged_fields!(HTMLMediaElementFetchContext); unsafe_no_jsmanaged_fields!(StreamConsumer); diff --git a/components/script_bindings/Cargo.toml b/components/script_bindings/Cargo.toml index a9f5e75ae02..92b27ccfcd6 100644 --- a/components/script_bindings/Cargo.toml +++ b/components/script_bindings/Cargo.toml @@ -21,6 +21,18 @@ phf_shared = "0.11" serde_json = { workspace = true } [dependencies] +cssparser = { workspace = true } +html5ever = { workspace = true } +js = { workspace = true } +jstraceable_derive = { path = "../jstraceable_derive" } +libc = { workspace = true } +log = { workspace = true } +malloc_size_of = { workspace = true } +malloc_size_of_derive = { workspace = true } +num-traits = { workspace = true } +regex = { workspace = true } +servo_atoms = { workspace = true } +servo_config = { path = "../config" } style = { workspace = true } [features] diff --git a/components/script_bindings/conversions.rs b/components/script_bindings/conversions.rs new file mode 100644 index 00000000000..d9befd3c208 --- /dev/null +++ b/components/script_bindings/conversions.rs @@ -0,0 +1,193 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use std::{ptr, slice}; + +use js::conversions::{ + latin1_to_string, ConversionResult, FromJSValConvertible, ToJSValConvertible, +}; +use js::error::throw_type_error; +use js::jsapi::{ + JSContext, JSString, JS_DeprecatedStringHasLatin1Chars, JS_GetLatin1StringCharsAndLength, + JS_GetTwoByteStringCharsAndLength, JS_NewStringCopyN, +}; +use js::jsval::StringValue; +use js::rust::{HandleValue, MutableHandleValue, ToString}; +use servo_config::opts; + +use crate::str::{ByteString, DOMString, USVString}; + +// http://heycam.github.io/webidl/#es-USVString +impl ToJSValConvertible for USVString { + unsafe fn to_jsval(&self, cx: *mut JSContext, rval: MutableHandleValue) { + self.0.to_jsval(cx, rval); + } +} + +/// Behavior for stringification of `JSVal`s. +#[derive(Clone, PartialEq)] +pub enum StringificationBehavior { + /// Convert `null` to the string `"null"`. + Default, + /// Convert `null` to the empty string. + Empty, +} + +// https://heycam.github.io/webidl/#es-DOMString +impl ToJSValConvertible for DOMString { + unsafe fn to_jsval(&self, cx: *mut JSContext, rval: MutableHandleValue) { + (**self).to_jsval(cx, rval); + } +} + +// https://heycam.github.io/webidl/#es-DOMString +impl FromJSValConvertible for DOMString { + type Config = StringificationBehavior; + unsafe fn from_jsval( + cx: *mut JSContext, + value: HandleValue, + null_behavior: StringificationBehavior, + ) -> Result, ()> { + if null_behavior == StringificationBehavior::Empty && value.get().is_null() { + Ok(ConversionResult::Success(DOMString::new())) + } else { + match ptr::NonNull::new(ToString(cx, value)) { + Some(jsstr) => Ok(ConversionResult::Success(jsstring_to_str(cx, jsstr))), + None => { + debug!("ToString failed"); + Err(()) + }, + } + } + } +} + +/// Convert the given `JSString` to a `DOMString`. Fails if the string does not +/// contain valid UTF-16. +/// +/// # Safety +/// cx and s must point to valid values. +pub unsafe fn jsstring_to_str(cx: *mut JSContext, s: ptr::NonNull) -> DOMString { + let latin1 = JS_DeprecatedStringHasLatin1Chars(s.as_ptr()); + DOMString::from_string(if latin1 { + latin1_to_string(cx, s.as_ptr()) + } else { + let mut length = 0; + let chars = JS_GetTwoByteStringCharsAndLength(cx, ptr::null(), s.as_ptr(), &mut length); + assert!(!chars.is_null()); + let potentially_ill_formed_utf16 = slice::from_raw_parts(chars, length); + let mut s = String::with_capacity(length); + for item in char::decode_utf16(potentially_ill_formed_utf16.iter().cloned()) { + match item { + Ok(c) => s.push(c), + Err(_) => { + // FIXME: Add more info like document URL in the message? + macro_rules! message { + () => { + "Found an unpaired surrogate in a DOM string. \ + If you see this in real web content, \ + please comment on https://github.com/servo/servo/issues/6564" + }; + } + if opts::get().debug.replace_surrogates { + error!(message!()); + s.push('\u{FFFD}'); + } else { + panic!(concat!( + message!(), + " Use `-Z replace-surrogates` \ + on the command line to make this non-fatal." + )); + } + }, + } + } + s + }) +} + +// http://heycam.github.io/webidl/#es-USVString +impl FromJSValConvertible for USVString { + type Config = (); + unsafe fn from_jsval( + cx: *mut JSContext, + value: HandleValue, + _: (), + ) -> Result, ()> { + let Some(jsstr) = ptr::NonNull::new(ToString(cx, value)) else { + debug!("ToString failed"); + return Err(()); + }; + let latin1 = JS_DeprecatedStringHasLatin1Chars(jsstr.as_ptr()); + if latin1 { + // FIXME(ajeffrey): Convert directly from DOMString to USVString + return Ok(ConversionResult::Success(USVString(String::from( + jsstring_to_str(cx, jsstr), + )))); + } + let mut length = 0; + let chars = JS_GetTwoByteStringCharsAndLength(cx, ptr::null(), jsstr.as_ptr(), &mut length); + assert!(!chars.is_null()); + let char_vec = slice::from_raw_parts(chars, length); + Ok(ConversionResult::Success(USVString( + String::from_utf16_lossy(char_vec), + ))) + } +} + +// http://heycam.github.io/webidl/#es-ByteString +impl ToJSValConvertible for ByteString { + unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) { + let jsstr = JS_NewStringCopyN( + cx, + self.as_ptr() as *const libc::c_char, + self.len() as libc::size_t, + ); + if jsstr.is_null() { + panic!("JS_NewStringCopyN failed"); + } + rval.set(StringValue(&*jsstr)); + } +} + +// http://heycam.github.io/webidl/#es-ByteString +impl FromJSValConvertible for ByteString { + type Config = (); + unsafe fn from_jsval( + cx: *mut JSContext, + value: HandleValue, + _option: (), + ) -> Result, ()> { + let string = ToString(cx, value); + if string.is_null() { + debug!("ToString failed"); + return Err(()); + } + + let latin1 = JS_DeprecatedStringHasLatin1Chars(string); + if latin1 { + let mut length = 0; + let chars = JS_GetLatin1StringCharsAndLength(cx, ptr::null(), string, &mut length); + assert!(!chars.is_null()); + + let char_slice = slice::from_raw_parts(chars as *mut u8, length); + return Ok(ConversionResult::Success(ByteString::new( + char_slice.to_vec(), + ))); + } + + let mut length = 0; + let chars = JS_GetTwoByteStringCharsAndLength(cx, ptr::null(), string, &mut length); + let char_vec = slice::from_raw_parts(chars, length); + + if char_vec.iter().any(|&c| c > 0xFF) { + throw_type_error(cx, "Invalid ByteString"); + Err(()) + } else { + Ok(ConversionResult::Success(ByteString::new( + char_vec.iter().map(|&c| c as u8).collect(), + ))) + } + } +} diff --git a/components/script_bindings/lib.rs b/components/script_bindings/lib.rs index daa3e8897c2..835999bab88 100644 --- a/components/script_bindings/lib.rs +++ b/components/script_bindings/lib.rs @@ -1,3 +1,19 @@ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#[macro_use] +extern crate jstraceable_derive; +#[macro_use] +extern crate log; +#[macro_use] +extern crate malloc_size_of_derive; + +pub mod conversions; +pub mod str; +mod trace; + +// These trait exports are public, because they are used in the DOM bindings. +// Since they are used in derive macros, +// it is useful that they are accessible at the root of the crate. +pub(crate) use js::gc::Traceable as JSTraceable; diff --git a/components/script_bindings/str.rs b/components/script_bindings/str.rs new file mode 100644 index 00000000000..d7968e74523 --- /dev/null +++ b/components/script_bindings/str.rs @@ -0,0 +1,422 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +//! The `ByteString` struct. +use std::borrow::{Borrow, Cow, ToOwned}; +use std::default::Default; +use std::hash::{Hash, Hasher}; +use std::marker::PhantomData; +use std::ops::{Deref, DerefMut}; +use std::str::FromStr; +use std::sync::LazyLock; +use std::{fmt, ops, str}; + +use cssparser::CowRcStr; +use html5ever::{LocalName, Namespace}; +use num_traits::Zero; +use regex::Regex; +use servo_atoms::Atom; + +/// Encapsulates the IDL `ByteString` type. +#[derive(Clone, Debug, Default, Eq, JSTraceable, MallocSizeOf, PartialEq)] +pub struct ByteString(Vec); + +impl ByteString { + /// Creates a new `ByteString`. + pub fn new(value: Vec) -> ByteString { + ByteString(value) + } + + /// Returns `self` as a string, if it encodes valid UTF-8, and `None` + /// otherwise. + pub fn as_str(&self) -> Option<&str> { + str::from_utf8(&self.0).ok() + } + + /// Returns the length. + pub fn len(&self) -> usize { + self.0.len() + } + + /// Checks if the ByteString is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns `self` with A–Z replaced by a–z. + pub fn to_lower(&self) -> ByteString { + ByteString::new(self.0.to_ascii_lowercase()) + } +} + +impl From for Vec { + fn from(byte_string: ByteString) -> Vec { + byte_string.0 + } +} + +impl Hash for ByteString { + fn hash(&self, state: &mut H) { + self.0.hash(state); + } +} + +impl FromStr for ByteString { + type Err = (); + fn from_str(s: &str) -> Result { + Ok(ByteString::new(s.to_owned().into_bytes())) + } +} + +impl ops::Deref for ByteString { + type Target = [u8]; + fn deref(&self) -> &[u8] { + &self.0 + } +} + +/// A string that is constructed from a UCS-2 buffer by replacing invalid code +/// points with the replacement character. +#[derive(Clone, Default, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)] +pub struct USVString(pub String); + +impl Borrow for USVString { + #[inline] + fn borrow(&self) -> &str { + &self.0 + } +} + +impl Deref for USVString { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + &self.0 + } +} + +impl DerefMut for USVString { + #[inline] + fn deref_mut(&mut self) -> &mut str { + &mut self.0 + } +} + +impl AsRef for USVString { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for USVString { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +impl PartialEq for USVString { + fn eq(&self, other: &str) -> bool { + &**self == other + } +} + +impl<'a> PartialEq<&'a str> for USVString { + fn eq(&self, other: &&'a str) -> bool { + &**self == *other + } +} + +impl From for USVString { + fn from(contents: String) -> USVString { + USVString(contents) + } +} + +/// Returns whether `s` is a `token`, as defined by +/// [RFC 2616](http://tools.ietf.org/html/rfc2616#page-17). +pub fn is_token(s: &[u8]) -> bool { + if s.is_empty() { + return false; // A token must be at least a single character + } + s.iter().all(|&x| { + // http://tools.ietf.org/html/rfc2616#section-2.2 + match x { + 0..=31 | 127 => false, // CTLs + 40 | 41 | 60 | 62 | 64 | 44 | 59 | 58 | 92 | 34 | 47 | 91 | 93 | 63 | 61 | 123 | + 125 | 32 => false, // separators + x if x > 127 => false, // non-CHARs + _ => true, + } + }) +} + +/// A DOMString. +/// +/// This type corresponds to the [`DOMString`] type in WebIDL. +/// +/// [`DOMString`]: https://webidl.spec.whatwg.org/#idl-DOMString +/// +/// Conceptually, a DOMString has the same value space as a JavaScript String, +/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with +/// unpaired surrogates present (also sometimes called WTF-16). +/// +/// Currently, this type stores a Rust `String`, in order to avoid issues when +/// integrating with the rest of the Rust ecosystem and even the rest of the +/// browser itself. +/// +/// However, Rust `String`s are guaranteed to be valid UTF-8, and as such have +/// a *smaller value space* than WTF-16 (i.e., some JavaScript String values +/// can not be represented as a Rust `String`). This introduces the question of +/// what to do with values being passed from JavaScript to Rust that contain +/// unpaired surrogates. +/// +/// The hypothesis is that it does not matter much how exactly those values are +/// transformed, because passing unpaired surrogates into the DOM is very rare. +/// In order to test this hypothesis, Servo will panic when encountering any +/// unpaired surrogates on conversion to `DOMString` by default. (The command +/// line option `-Z replace-surrogates` instead causes Servo to replace the +/// unpaired surrogate by a U+FFFD replacement character.) +/// +/// Currently, the lack of crash reports about this issue provides some +/// evidence to support the hypothesis. This evidence will hopefully be used to +/// convince other browser vendors that it would be safe to replace unpaired +/// surrogates at the boundary between JavaScript and native code. (This would +/// unify the `DOMString` and `USVString` types, both in the WebIDL standard +/// and in Servo.) +/// +/// This type is currently `!Send`, in order to help with an independent +/// experiment to store `JSString`s rather than Rust `String`s. +#[derive(Clone, Debug, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)] +pub struct DOMString(String, PhantomData<*const ()>); + +impl DOMString { + /// Creates a new `DOMString`. + pub fn new() -> DOMString { + DOMString(String::new(), PhantomData) + } + + /// Creates a new `DOMString` from a `String`. + pub fn from_string(s: String) -> DOMString { + DOMString(s, PhantomData) + } + + /// Get the internal `&str` value of this [`DOMString`]. + pub fn str(&self) -> &str { + &self.0 + } + + /// Appends a given string slice onto the end of this String. + pub fn push_str(&mut self, string: &str) { + self.0.push_str(string) + } + + /// Clears this `DOMString`, removing all contents. + pub fn clear(&mut self) { + self.0.clear() + } + + /// Shortens this String to the specified length. + pub fn truncate(&mut self, new_len: usize) { + self.0.truncate(new_len); + } + + /// Removes newline characters according to . + pub fn strip_newlines(&mut self) { + self.0.retain(|c| c != '\r' && c != '\n'); + } + + /// Removes leading and trailing ASCII whitespaces according to + /// . + pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) { + if self.0.is_empty() { + return; + } + + let trailing_whitespace_len = self + .0 + .trim_end_matches(|ref c| char::is_ascii_whitespace(c)) + .len(); + self.0.truncate(trailing_whitespace_len); + if self.0.is_empty() { + return; + } + + let first_non_whitespace = self.0.find(|ref c| !char::is_ascii_whitespace(c)).unwrap(); + self.0.replace_range(0..first_non_whitespace, ""); + } + + /// + pub fn is_valid_floating_point_number_string(&self) -> bool { + static RE: LazyLock = LazyLock::new(|| { + Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap() + }); + + RE.is_match(&self.0) && self.parse_floating_point_number().is_some() + } + + /// + pub fn parse_floating_point_number(&self) -> Option { + // Steps 15-16 are telling us things about IEEE rounding modes + // for floating-point significands; this code assumes the Rust + // compiler already matches them in any cases where + // that actually matters. They are not + // related to f64::round(), which is for rounding to integers. + let input = &self.0; + if let Ok(val) = input.trim().parse::() { + if !( + // A valid number is the same as what rust considers to be valid, + // except for +1., NaN, and Infinity. + val.is_infinite() || val.is_nan() || input.ends_with('.') || input.starts_with('+') + ) { + return Some(val); + } + } + None + } + + /// Applies the same processing as `parse_floating_point_number` with some additional handling + /// according to ECMA's string conversion steps. + /// + /// Used for specific elements when handling floating point values, namely the `number` and + /// `range` inputs, as well as `meter` and `progress` elements. + /// + /// + /// + pub fn set_best_representation_of_the_floating_point_number(&mut self) { + if let Some(val) = self.parse_floating_point_number() { + // [tc39] Step 2: If x is either +0 or -0, return "0". + let parsed_value = if val.is_zero() { 0.0_f64 } else { val }; + + self.0 = parsed_value.to_string() + } + } +} + +impl Borrow for DOMString { + #[inline] + fn borrow(&self) -> &str { + &self.0 + } +} + +impl Default for DOMString { + fn default() -> Self { + DOMString(String::new(), PhantomData) + } +} + +impl Deref for DOMString { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + &self.0 + } +} + +impl DerefMut for DOMString { + #[inline] + fn deref_mut(&mut self) -> &mut str { + &mut self.0 + } +} + +impl AsRef for DOMString { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for DOMString { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +impl PartialEq for DOMString { + fn eq(&self, other: &str) -> bool { + &**self == other + } +} + +impl<'a> PartialEq<&'a str> for DOMString { + fn eq(&self, other: &&'a str) -> bool { + &**self == *other + } +} + +impl From for DOMString { + fn from(contents: String) -> DOMString { + DOMString(contents, PhantomData) + } +} + +impl From<&str> for DOMString { + fn from(contents: &str) -> DOMString { + DOMString::from(String::from(contents)) + } +} + +impl<'a> From> for DOMString { + fn from(contents: Cow<'a, str>) -> DOMString { + match contents { + Cow::Owned(s) => DOMString::from(s), + Cow::Borrowed(s) => DOMString::from(s), + } + } +} + +impl From for LocalName { + fn from(contents: DOMString) -> LocalName { + LocalName::from(contents.0) + } +} + +impl From for Namespace { + fn from(contents: DOMString) -> Namespace { + Namespace::from(contents.0) + } +} + +impl From for Atom { + fn from(contents: DOMString) -> Atom { + Atom::from(contents.0) + } +} + +impl From for String { + fn from(contents: DOMString) -> String { + contents.0 + } +} + +impl From for Vec { + fn from(contents: DOMString) -> Vec { + contents.0.into() + } +} + +impl<'a> From for Cow<'a, str> { + fn from(contents: DOMString) -> Cow<'a, str> { + contents.0.into() + } +} + +impl<'a> From for CowRcStr<'a> { + fn from(contents: DOMString) -> CowRcStr<'a> { + contents.0.into() + } +} + +impl Extend for DOMString { + fn extend(&mut self, iterable: I) + where + I: IntoIterator, + { + self.0.extend(iterable) + } +} diff --git a/components/script_bindings/trace.rs b/components/script_bindings/trace.rs new file mode 100644 index 00000000000..29317e7fda6 --- /dev/null +++ b/components/script_bindings/trace.rs @@ -0,0 +1,24 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use crate::str::{DOMString, USVString}; + +/// For use on non-jsmanaged types +/// Use #[derive(JSTraceable)] on JS managed types +macro_rules! unsafe_no_jsmanaged_fields( + ($($ty:ty),+) => ( + $( + #[allow(unsafe_code)] + unsafe impl crate::JSTraceable for $ty { + #[inline] + unsafe fn trace(&self, _: *mut ::js::jsapi::JSTracer) { + // Do nothing + } + } + )+ + ); +); + +unsafe_no_jsmanaged_fields!(DOMString); +unsafe_no_jsmanaged_fields!(USVString);