mirror of
https://github.com/servo/servo.git
synced 2025-08-02 12:10:29 +01:00
Fix CharacterDataMethods to count UTF-16 code units, not code points.
This commit is contained in:
parent
95a252a650
commit
006dd1002f
2 changed files with 67 additions and 28 deletions
|
@ -17,7 +17,7 @@ use dom::element::Element;
|
||||||
use dom::eventtarget::{EventTarget, EventTargetTypeId};
|
use dom::eventtarget::{EventTarget, EventTargetTypeId};
|
||||||
use dom::node::{Node, NodeTypeId};
|
use dom::node::{Node, NodeTypeId};
|
||||||
|
|
||||||
use util::str::{DOMString, slice_chars};
|
use util::str::DOMString;
|
||||||
|
|
||||||
use std::borrow::ToOwned;
|
use std::borrow::ToOwned;
|
||||||
use std::cell::Ref;
|
use std::cell::Ref;
|
||||||
|
@ -60,21 +60,25 @@ impl CharacterDataMethods for CharacterData {
|
||||||
|
|
||||||
// https://dom.spec.whatwg.org/#dom-characterdata-length
|
// https://dom.spec.whatwg.org/#dom-characterdata-length
|
||||||
fn Length(&self) -> u32 {
|
fn Length(&self) -> u32 {
|
||||||
self.data.borrow().chars().count() as u32
|
self.data.borrow().chars().map(|c| c.len_utf16()).sum::<usize>() as u32
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://dom.spec.whatwg.org/#dom-characterdata-substringdataoffset-count
|
// https://dom.spec.whatwg.org/#dom-characterdata-substringdata
|
||||||
fn SubstringData(&self, offset: u32, count: u32) -> Fallible<DOMString> {
|
fn SubstringData(&self, offset: u32, count: u32) -> Fallible<DOMString> {
|
||||||
let data = self.data.borrow();
|
let data = self.data.borrow();
|
||||||
// Step 1.
|
// Step 1.
|
||||||
let length = data.chars().count() as u32;
|
let data_from_offset = match find_utf16_code_unit_offset(&data, offset) {
|
||||||
if offset > length {
|
Some(offset_bytes) => &data[offset_bytes..],
|
||||||
// Step 2.
|
// Step 2.
|
||||||
return Err(IndexSize);
|
None => return Err(IndexSize)
|
||||||
}
|
};
|
||||||
// Steps 3-4.
|
let substring = match find_utf16_code_unit_offset(data_from_offset, count) {
|
||||||
let end = if length - offset < count { length } else { offset + count };
|
// Steps 3.
|
||||||
Ok(slice_chars(&*data, offset as usize, end as usize).to_owned())
|
None => data_from_offset,
|
||||||
|
// Steps 4.
|
||||||
|
Some(count_bytes) => &data_from_offset[..count_bytes],
|
||||||
|
};
|
||||||
|
Ok(substring.to_owned())
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://dom.spec.whatwg.org/#dom-characterdata-appenddatadata
|
// https://dom.spec.whatwg.org/#dom-characterdata-appenddatadata
|
||||||
|
@ -92,26 +96,30 @@ impl CharacterDataMethods for CharacterData {
|
||||||
self.ReplaceData(offset, count, "".to_owned())
|
self.ReplaceData(offset, count, "".to_owned())
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://dom.spec.whatwg.org/#dom-characterdata-replacedataoffset-count-data
|
// https://dom.spec.whatwg.org/#dom-characterdata-replacedata
|
||||||
fn ReplaceData(&self, offset: u32, count: u32, arg: DOMString) -> ErrorResult {
|
fn ReplaceData(&self, offset: u32, count: u32, arg: DOMString) -> ErrorResult {
|
||||||
// Step 1.
|
let new_data = {
|
||||||
let length = self.data.borrow().chars().count() as u32;
|
let data = self.data.borrow();
|
||||||
if offset > length {
|
let (prefix, data_from_offset) = match find_utf16_code_unit_offset(&data, offset) {
|
||||||
// Step 2.
|
Some(offset_bytes) => data.split_at(offset_bytes),
|
||||||
return Err(IndexSize);
|
// Step 2.
|
||||||
}
|
None => return Err(IndexSize)
|
||||||
// Step 3.
|
};
|
||||||
let count = match length - offset {
|
let suffix = match find_utf16_code_unit_offset(data_from_offset, count) {
|
||||||
diff if diff < count => diff,
|
// Steps 3.
|
||||||
_ => count,
|
None => "",
|
||||||
|
Some(count_bytes) => &data_from_offset[count_bytes..],
|
||||||
|
};
|
||||||
|
// Step 4: Mutation observers.
|
||||||
|
// Step 5 to 7.
|
||||||
|
let mut new_data = String::with_capacity(prefix.len() + arg.len() + suffix.len());
|
||||||
|
new_data.push_str(prefix);
|
||||||
|
new_data.push_str(&arg);
|
||||||
|
new_data.push_str(suffix);
|
||||||
|
new_data
|
||||||
};
|
};
|
||||||
// Step 4: Mutation observers.
|
*self.data.borrow_mut() = new_data;
|
||||||
// Step 5.
|
// FIXME: Once we have `Range`, we should implement step 8 to step 11
|
||||||
let mut data = slice_chars(&*self.data.borrow(), 0, offset as usize).to_owned();
|
|
||||||
data.push_str(&arg);
|
|
||||||
data.push_str(slice_chars(&*self.data.borrow(), (offset + count) as usize, length as usize));
|
|
||||||
*self.data.borrow_mut() = data;
|
|
||||||
// FIXME: Once we have `Range`, we should implement step7 to step11
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,3 +189,32 @@ impl LayoutCharacterDataHelpers for LayoutJS<CharacterData> {
|
||||||
&(*self.unsafe_get()).data.borrow_for_layout()
|
&(*self.unsafe_get()).data.borrow_for_layout()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Given a number of UTF-16 code units from the start of the given string,
|
||||||
|
/// return the corresponding number of UTF-8 bytes.
|
||||||
|
///
|
||||||
|
/// s[find_utf16_code_unit_offset(s, o).unwrap()..] == s.to_utf16()[o..].to_utf8()
|
||||||
|
fn find_utf16_code_unit_offset(s: &str, offset: u32) -> Option<usize> {
|
||||||
|
let mut code_units = 0;
|
||||||
|
for (i, c) in s.char_indices() {
|
||||||
|
if code_units == offset {
|
||||||
|
return Some(i)
|
||||||
|
}
|
||||||
|
code_units += 1;
|
||||||
|
if c > '\u{FFFF}' {
|
||||||
|
if code_units == offset {
|
||||||
|
panic!("\n\n\
|
||||||
|
Would split a surrogate pair in CharacterData API.\n\
|
||||||
|
If you see this in real content, please comment with the URL\n\
|
||||||
|
on https://github.com/servo/servo/issues/6873\n\
|
||||||
|
\n");
|
||||||
|
}
|
||||||
|
code_units += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if code_units == offset {
|
||||||
|
Some(s.len())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -18,12 +18,14 @@
|
||||||
#![feature(drain)]
|
#![feature(drain)]
|
||||||
#![feature(fnbox)]
|
#![feature(fnbox)]
|
||||||
#![feature(hashmap_hasher)]
|
#![feature(hashmap_hasher)]
|
||||||
|
#![feature(iter_arith)]
|
||||||
#![feature(mpsc_select)]
|
#![feature(mpsc_select)]
|
||||||
#![feature(nonzero)]
|
#![feature(nonzero)]
|
||||||
#![feature(plugin)]
|
#![feature(plugin)]
|
||||||
#![feature(ref_slice)]
|
#![feature(ref_slice)]
|
||||||
#![feature(rc_unique)]
|
#![feature(rc_unique)]
|
||||||
#![feature(slice_patterns)]
|
#![feature(slice_patterns)]
|
||||||
|
#![feature(str_split_at)]
|
||||||
#![feature(str_utf16)]
|
#![feature(str_utf16)]
|
||||||
#![feature(unicode)]
|
#![feature(unicode)]
|
||||||
#![feature(vec_push_all)]
|
#![feature(vec_push_all)]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue