From 279ae53b7bb05b0098fe9b27dff429b888f222a8 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 15 Oct 2013 18:14:15 -0700 Subject: [PATCH 1/6] Pass scripts as ~str to support non-ASCII string literals etc. Of course scripts on the Web are not always encoded as UTF-8 and we will have to deal with that at some point. --- src/components/script/html/hubbub_html_parser.rs | 7 ++++--- src/components/script/script_task.rs | 3 ++- src/support/spidermonkey/rust-mozjs | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/components/script/html/hubbub_html_parser.rs b/src/components/script/html/hubbub_html_parser.rs index e5ccb6f2e83..b9892ac86a3 100644 --- a/src/components/script/html/hubbub_html_parser.rs +++ b/src/components/script/html/hubbub_html_parser.rs @@ -19,6 +19,7 @@ use std::cast; use std::cell::Cell; use std::comm; use std::comm::{Port, SharedChan}; +use std::str; use std::str::eq_slice; use std::task; use std::from_str::FromStr; @@ -102,7 +103,7 @@ macro_rules! handle_element_base( pub struct JSFile { - data: ~[u8], + data: ~str, url: Url } @@ -222,11 +223,11 @@ fn js_script_listener(to_parent: SharedChan, let bytes = result_port.recv(); if bytes.is_some() { - result_vec.push(JSFile { data: bytes.unwrap(), url: url_clone }); + result_vec.push(JSFile { data: str::from_utf8(bytes.unwrap()), url: url_clone }); } } JSTaskNewInlineScript(data, url) => { - result_vec.push(JSFile { data: data.into_bytes(), url: url }); + result_vec.push(JSFile { data: data, url: url }); } JSTaskExit => { break; diff --git a/src/components/script/script_task.rs b/src/components/script/script_task.rs index 31a0bd6eb9d..c96c2f626d3 100644 --- a/src/components/script/script_task.rs +++ b/src/components/script/script_task.rs @@ -30,6 +30,7 @@ use std::comm; use std::comm::{Port, SharedChan}; use std::io::read_whole_file; use std::ptr; +use std::str; use std::task::{spawn_sched, SingleThreaded}; use std::util::replace; use dom::window::TimerData; @@ -578,7 +579,7 @@ impl ScriptTask { Ok(bytes) => { compartment.define_functions(debug_fns); cx.evaluate_script(compartment.global_obj, - bytes, + str::from_utf8(bytes), url.path.clone(), 1); } diff --git a/src/support/spidermonkey/rust-mozjs b/src/support/spidermonkey/rust-mozjs index 1a9a5c3c146..7372f6b7ed1 160000 --- a/src/support/spidermonkey/rust-mozjs +++ b/src/support/spidermonkey/rust-mozjs @@ -1 +1 @@ -Subproject commit 1a9a5c3c1462bd3b3e7905ee2834a5ba906f0214 +Subproject commit 7372f6b7ed1cfa66f0883667d4b08901487c9ab2 From 2c302a462790db241fe1d265583633d622910293 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 15 Oct 2013 19:07:43 -0700 Subject: [PATCH 2/6] Convert between JS and Rust strings in a way that handles Unicode This doesn't handle invalid UTF-16 and may not be efficient. It's just a small change to make things better until we decide what we're doing about strings in the long run. Fixes #1068. --- src/components/script/dom/bindings/utils.rs | 26 +++++++++------------ 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/components/script/dom/bindings/utils.rs b/src/components/script/dom/bindings/utils.rs index 0a49bf4b37c..c43f510b620 100644 --- a/src/components/script/dom/bindings/utils.rs +++ b/src/components/script/dom/bindings/utils.rs @@ -14,17 +14,17 @@ use std::libc; use std::ptr; use std::ptr::{null, to_unsafe_ptr}; use std::str; +use std::vec; use std::unstable::raw::Box; use js::glue::*; use js::glue::{DefineFunctionWithReserved, GetObjectJSClass, RUST_OBJECT_TO_JSVAL}; use js::glue::{js_IsObjectProxyClass, js_IsFunctionProxyClass, IsProxyHandlerFamily}; use js::jsapi::{JS_AlreadyHasOwnProperty, JS_NewObject, JS_NewFunction, JS_GetGlobalObject}; use js::jsapi::{JS_DefineProperties, JS_WrapValue, JS_ForwardGetPropertyTo}; -use js::jsapi::{JS_EncodeString, JS_free, JS_GetStringCharsAndLength}; -use js::jsapi::{JS_GetClass, JS_LinkConstructorAndPrototype}; +use js::jsapi::{JS_GetClass, JS_LinkConstructorAndPrototype, JS_GetStringCharsAndLength}; use js::jsapi::{JS_GetFunctionPrototype, JS_InternString, JS_GetFunctionObject}; use js::jsapi::{JS_HasPropertyById, JS_GetPrototype, JS_GetGlobalForObject}; -use js::jsapi::{JS_NewStringCopyN, JS_DefineFunctions, JS_DefineProperty}; +use js::jsapi::{JS_NewUCStringCopyN, JS_DefineFunctions, JS_DefineProperty}; use js::jsapi::{JS_ValueToString, JS_GetReservedSlot, JS_SetReservedSlot}; use js::jsapi::{JSContext, JSObject, JSBool, jsid, JSClass, JSNative, JSTracer}; use js::jsapi::{JSFunctionSpec, JSPropertySpec, JSVal, JSPropertyDescriptor}; @@ -205,25 +205,21 @@ pub fn jsval_to_str(cx: *JSContext, v: JSVal) -> Result<~str, ()> { } } - let strbuf = JS_EncodeString(cx, jsstr); - let buf = str::raw::from_c_str(strbuf); - JS_free(cx, strbuf as *libc::c_void); - Ok(buf) + let length = 0; + let chars = JS_GetStringCharsAndLength(cx, jsstr, &length); + do vec::raw::buf_as_slice(chars, length as uint) |char_vec| { + Ok(str::from_utf16(char_vec)) + } } } #[fixed_stack_segment] pub unsafe fn domstring_to_jsval(cx: *JSContext, string: &DOMString) -> JSVal { match string { - &None => { - JSVAL_NULL - } - &Some(ref s) => { - do s.as_imm_buf |buf, len| { - let cbuf = cast::transmute(buf); - RUST_STRING_TO_JSVAL(JS_NewStringCopyN(cx, cbuf, len as libc::size_t)) + &None => JSVAL_NULL, + &Some(ref s) => do s.to_utf16().as_imm_buf |buf, len| { + RUST_STRING_TO_JSVAL(JS_NewUCStringCopyN(cx, buf, len as libc::size_t)) } - } } } From 591ded06cb65fbb5643efc2fefccca97e3898e98 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Wed, 16 Oct 2013 14:46:35 -0700 Subject: [PATCH 3/6] Test non-ASCII tag names --- src/test/html/content/test_collections.html | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/test/html/content/test_collections.html b/src/test/html/content/test_collections.html index 93837eb0c86..e92c19013a9 100644 --- a/src/test/html/content/test_collections.html +++ b/src/test/html/content/test_collections.html @@ -24,8 +24,10 @@ function check_collection(obj, num, classes, name) { } } -function check_tag(tagname, num, classes) { - check_collection(document.getElementsByTagName(tagname), num, classes, tagname.toUpperCase()); +function check_tag(tagname, num, classes, tagname_upper) { + if (tagname_upper === undefined) + tagname_upper = tagname.toUpperCase(); + check_collection(document.getElementsByTagName(tagname), num, classes, tagname_upper); } check_collection(document.links, 1, [HTMLAnchorElement], "A"); @@ -73,8 +75,8 @@ check_tag("track", 1, [HTMLTrackElement]); check_tag("audio", 1, [HTMLMediaElement, HTMLAudioElement]); check_tag("video", 1, [HTMLMediaElement, HTMLVideoElement]); -// FIXME: Test non-ASCII tag names -check_tag("foo", 1, [HTMLUnknownElement]); +// Test non-ASCII tag names. The ASCII-only uppercasing matches Firefox's behavior. +check_tag("foo-á", 1, [HTMLUnknownElement], "FOO-á"); finish(); @@ -134,7 +136,7 @@ finish(); -hi +hi From 5f14ee617b2c1b05391dfe2c6deecfb4bbcd2928 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Thu, 17 Oct 2013 12:59:05 -0700 Subject: [PATCH 4/6] Check for allocation failure in domstring_to_jsval --- src/components/script/dom/bindings/utils.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/components/script/dom/bindings/utils.rs b/src/components/script/dom/bindings/utils.rs index c43f510b620..d5ae6319c91 100644 --- a/src/components/script/dom/bindings/utils.rs +++ b/src/components/script/dom/bindings/utils.rs @@ -218,7 +218,13 @@ pub unsafe fn domstring_to_jsval(cx: *JSContext, string: &DOMString) -> JSVal { match string { &None => JSVAL_NULL, &Some(ref s) => do s.to_utf16().as_imm_buf |buf, len| { - RUST_STRING_TO_JSVAL(JS_NewUCStringCopyN(cx, buf, len as libc::size_t)) + let jsstr = JS_NewUCStringCopyN(cx, buf, len as libc::size_t); + if jsstr.is_null() { + // FIXME: is there something else we should do on failure? + JSVAL_NULL + } else { + RUST_STRING_TO_JSVAL(jsstr) + } } } } From f3533ecfc135b2fa2971eb6696e87a065e9bd85b Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Thu, 17 Oct 2013 13:04:34 -0700 Subject: [PATCH 5/6] Update comment with spec link --- src/test/html/content/test_collections.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/html/content/test_collections.html b/src/test/html/content/test_collections.html index e92c19013a9..f3dc6d33e59 100644 --- a/src/test/html/content/test_collections.html +++ b/src/test/html/content/test_collections.html @@ -75,7 +75,8 @@ check_tag("track", 1, [HTMLTrackElement]); check_tag("audio", 1, [HTMLMediaElement, HTMLAudioElement]); check_tag("video", 1, [HTMLMediaElement, HTMLVideoElement]); -// Test non-ASCII tag names. The ASCII-only uppercasing matches Firefox's behavior. +// Test non-ASCII tag names. Uppercasing is ASCII-only per spec: +// http://dom.spec.whatwg.org/#dom-element-tagname check_tag("foo-á", 1, [HTMLUnknownElement], "FOO-á"); finish(); From ff24707771d28c9c58398fef310ff2b456034a4a Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Thu, 17 Oct 2013 15:42:40 -0700 Subject: [PATCH 6/6] Fix broken testcase in rust-mozjs --- src/support/spidermonkey/rust-mozjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/support/spidermonkey/rust-mozjs b/src/support/spidermonkey/rust-mozjs index 7372f6b7ed1..dc50fb79583 160000 --- a/src/support/spidermonkey/rust-mozjs +++ b/src/support/spidermonkey/rust-mozjs @@ -1 +1 @@ -Subproject commit 7372f6b7ed1cfa66f0883667d4b08901487c9ab2 +Subproject commit dc50fb7958312e8ee5b4a03db516bcafd6df3d51