Auto merge of #6854 - servo:slice_chars, r=jdm+Ms2ger

Remove usage of slice_chars in script

It’s deprecated in the #6850 rustup.

The first commit changes some behavior which was previously incorrect: the spec says indices in DOM strings are UTF-16 code units, not `char` code points.

The second commit should not change behavior, unless I made a mistake.

r? @jdm

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/6854)
<!-- Reviewable:end -->
This commit is contained in:
bors-servo 2015-08-28 05:16:03 -06:00
commit 2ca48ca404
12 changed files with 293 additions and 60 deletions

View file

@ -17,7 +17,7 @@ use dom::element::Element;
use dom::eventtarget::{EventTarget, EventTargetTypeId};
use dom::node::{Node, NodeTypeId};
use util::str::{DOMString, slice_chars};
use util::str::DOMString;
use std::borrow::ToOwned;
use std::cell::Ref;
@ -60,21 +60,25 @@ impl CharacterDataMethods for CharacterData {
// https://dom.spec.whatwg.org/#dom-characterdata-length
fn Length(&self) -> u32 {
self.data.borrow().chars().count() as u32
self.data.borrow().chars().map(|c| c.len_utf16()).sum::<usize>() as u32
}
// https://dom.spec.whatwg.org/#dom-characterdata-substringdataoffset-count
// https://dom.spec.whatwg.org/#dom-characterdata-substringdata
fn SubstringData(&self, offset: u32, count: u32) -> Fallible<DOMString> {
let data = self.data.borrow();
// Step 1.
let length = data.chars().count() as u32;
if offset > length {
let data_from_offset = match find_utf16_code_unit_offset(&data, offset) {
Some(offset_bytes) => &data[offset_bytes..],
// Step 2.
return Err(IndexSize);
}
// Steps 3-4.
let end = if length - offset < count { length } else { offset + count };
Ok(slice_chars(&*data, offset as usize, end as usize).to_owned())
None => return Err(IndexSize)
};
let substring = match find_utf16_code_unit_offset(data_from_offset, count) {
// Steps 3.
None => data_from_offset,
// Steps 4.
Some(count_bytes) => &data_from_offset[..count_bytes],
};
Ok(substring.to_owned())
}
// https://dom.spec.whatwg.org/#dom-characterdata-appenddatadata
@ -92,26 +96,30 @@ impl CharacterDataMethods for CharacterData {
self.ReplaceData(offset, count, "".to_owned())
}
// https://dom.spec.whatwg.org/#dom-characterdata-replacedataoffset-count-data
// https://dom.spec.whatwg.org/#dom-characterdata-replacedata
fn ReplaceData(&self, offset: u32, count: u32, arg: DOMString) -> ErrorResult {
// Step 1.
let length = self.data.borrow().chars().count() as u32;
if offset > length {
// Step 2.
return Err(IndexSize);
}
// Step 3.
let count = match length - offset {
diff if diff < count => diff,
_ => count,
let new_data = {
let data = self.data.borrow();
let (prefix, data_from_offset) = match find_utf16_code_unit_offset(&data, offset) {
Some(offset_bytes) => data.split_at(offset_bytes),
// Step 2.
None => return Err(IndexSize)
};
let suffix = match find_utf16_code_unit_offset(data_from_offset, count) {
// Steps 3.
None => "",
Some(count_bytes) => &data_from_offset[count_bytes..],
};
// Step 4: Mutation observers.
// Step 5 to 7.
let mut new_data = String::with_capacity(prefix.len() + arg.len() + suffix.len());
new_data.push_str(prefix);
new_data.push_str(&arg);
new_data.push_str(suffix);
new_data
};
// Step 4: Mutation observers.
// Step 5.
let mut data = slice_chars(&*self.data.borrow(), 0, offset as usize).to_owned();
data.push_str(&arg);
data.push_str(slice_chars(&*self.data.borrow(), (offset + count) as usize, length as usize));
*self.data.borrow_mut() = data;
// FIXME: Once we have `Range`, we should implement step7 to step11
*self.data.borrow_mut() = new_data;
// FIXME: Once we have `Range`, we should implement step 8 to step 11
Ok(())
}
@ -181,3 +189,32 @@ impl LayoutCharacterDataHelpers for LayoutJS<CharacterData> {
&(*self.unsafe_get()).data.borrow_for_layout()
}
}
/// Given a number of UTF-16 code units from the start of the given string,
/// return the corresponding number of UTF-8 bytes.
///
/// s[find_utf16_code_unit_offset(s, o).unwrap()..] == s.to_utf16()[o..].to_utf8()
fn find_utf16_code_unit_offset(s: &str, offset: u32) -> Option<usize> {
let mut code_units = 0;
for (i, c) in s.char_indices() {
if code_units == offset {
return Some(i)
}
code_units += 1;
if c > '\u{FFFF}' {
if code_units == offset {
panic!("\n\n\
Would split a surrogate pair in CharacterData API.\n\
If you see this in real content, please comment with the URL\n\
on https://github.com/servo/servo/issues/6873\n\
\n");
}
code_units += 1;
}
}
if code_units == offset {
Some(s.len())
} else {
None
}
}

View file

@ -18,12 +18,14 @@
#![feature(drain)]
#![feature(fnbox)]
#![feature(hashmap_hasher)]
#![feature(iter_arith)]
#![feature(mpsc_select)]
#![feature(nonzero)]
#![feature(plugin)]
#![feature(ref_slice)]
#![feature(rc_unique)]
#![feature(slice_patterns)]
#![feature(str_split_at)]
#![feature(str_utf16)]
#![feature(unicode)]
#![feature(vec_push_all)]

View file

@ -9,7 +9,7 @@ use dom::keyboardevent::{KeyboardEvent, key_value};
use msg::constellation_msg::{Key, KeyModifiers};
use msg::constellation_msg::{SHIFT, CONTROL, ALT, SUPER};
use util::mem::HeapSizeOf;
use util::str::{DOMString, slice_chars};
use util::str::DOMString;
use std::borrow::ToOwned;
use std::cmp::{min, max};
@ -26,7 +26,7 @@ pub enum Selection {
pub struct TextPoint {
/// 0-based line number
pub line: usize,
/// 0-based column number
/// 0-based column number in UTF-8 bytes
pub index: usize,
}
@ -62,15 +62,15 @@ impl Default for TextPoint {
}
/// Control whether this control should allow multiple lines.
#[derive(PartialEq)]
#[derive(PartialEq, Eq)]
pub enum Lines {
Single,
Multiple,
}
/// The direction in which to delete a character.
#[derive(PartialEq)]
pub enum DeleteDir {
#[derive(PartialEq, Eq, Copy, Clone)]
pub enum Direction {
Forward,
Backward
}
@ -121,13 +121,9 @@ impl<T: ClipboardProvider> TextInput<T> {
}
/// Remove a character at the current editing point
pub fn delete_char(&mut self, dir: DeleteDir) {
pub fn delete_char(&mut self, dir: Direction) {
if self.selection_begin.is_none() {
self.adjust_horizontal(if dir == DeleteDir::Forward {
1
} else {
-1
}, Selection::Selected);
self.adjust_horizontal_by_one(dir, Selection::Selected);
}
self.replace_selection("".to_owned());
}
@ -161,16 +157,16 @@ impl<T: ClipboardProvider> TextInput<T> {
self.get_sorted_selection().map(|(begin, end)| {
if begin.line != end.line {
let mut s = String::new();
s.push_str(slice_chars(&self.lines[begin.line], begin.index, self.lines[begin.line].len()));
s.push_str(&self.lines[begin.line][begin.index..]);
for (_, line) in self.lines.iter().enumerate().filter(|&(i,_)| begin.line < i && i < end.line) {
s.push_str("\n");
s.push_str(line);
}
s.push_str("\n");
s.push_str(slice_chars(&self.lines[end.line], 0, end.index));
s.push_str(&self.lines[end.line][..end.index]);
s
} else {
slice_chars(&self.lines[begin.line], begin.index, end.index).to_owned()
self.lines[begin.line][begin.index..end.index].to_owned()
}
})
}
@ -180,8 +176,8 @@ impl<T: ClipboardProvider> TextInput<T> {
self.clear_selection();
let new_lines = {
let prefix = slice_chars(&self.lines[begin.line], 0, begin.index);
let suffix = slice_chars(&self.lines[end.line], end.index, self.lines[end.line].chars().count());
let prefix = &self.lines[begin.line][..begin.index];
let suffix = &self.lines[end.line][end.index..];
let lines_prefix = &self.lines[..begin.line];
let lines_suffix = &self.lines[end.line + 1..];
@ -196,7 +192,7 @@ impl<T: ClipboardProvider> TextInput<T> {
insert_lines[0] = new_line;
let last_insert_lines_index = insert_lines.len() - 1;
self.edit_point.index = insert_lines[last_insert_lines_index].chars().count();
self.edit_point.index = insert_lines[last_insert_lines_index].len();
self.edit_point.line = begin.line + last_insert_lines_index;
insert_lines[last_insert_lines_index].push_str(suffix);
@ -212,9 +208,9 @@ impl<T: ClipboardProvider> TextInput<T> {
}
}
/// Return the length of the current line under the editing point.
/// Return the length in UTF-8 bytes of the current line under the editing point.
pub fn current_line_length(&self) -> usize {
self.lines[self.edit_point.line].chars().count()
self.lines[self.edit_point.line].len()
}
/// Adjust the editing point position by a given of lines. The resulting column is
@ -254,18 +250,60 @@ impl<T: ClipboardProvider> TextInput<T> {
/// requested is larger than is available in the current line, the editing point is
/// adjusted vertically and the process repeats with the remaining adjustment requested.
pub fn adjust_horizontal(&mut self, adjust: isize, select: Selection) {
let direction = if adjust >= 0 { Direction::Forward } else { Direction::Backward };
if self.adjust_selection_for_horizontal_change(direction, select) {
return
}
self.perform_horizontal_adjustment(adjust, select);
}
pub fn adjust_horizontal_by_one(&mut self, direction: Direction, select: Selection) {
if self.adjust_selection_for_horizontal_change(direction, select) {
return
}
let adjust = {
let current_line = &self.lines[self.edit_point.line];
// FIXME: We adjust by one code point, but it proably should be one grapheme cluster
// https://github.com/unicode-rs/unicode-segmentation
match direction {
Direction::Forward => {
match current_line[self.edit_point.index..].chars().next() {
Some(c) => c.len_utf8() as isize,
None => 1, // Going to the next line is a "one byte" offset
}
}
Direction::Backward => {
match current_line[..self.edit_point.index].chars().next_back() {
Some(c) => -(c.len_utf8() as isize),
None => -1, // Going to the previous line is a "one byte" offset
}
}
}
};
self.perform_horizontal_adjustment(adjust, select);
}
// Return whether to cancel the caret move
fn adjust_selection_for_horizontal_change(&mut self, adjust: Direction, select: Selection)
-> bool {
if select == Selection::Selected {
if self.selection_begin.is_none() {
self.selection_begin = Some(self.edit_point);
}
} else {
if let Some((begin, end)) = self.get_sorted_selection() {
self.edit_point = if adjust < 0 {begin} else {end};
self.edit_point = match adjust {
Direction::Backward => begin,
Direction::Forward => end,
};
self.clear_selection();
return
return true
}
}
false
}
fn perform_horizontal_adjustment(&mut self, adjust: isize, select: Selection) {
if adjust < 0 {
let remaining = self.edit_point.index;
if adjust.abs() as usize > remaining && self.edit_point.line > 0 {
@ -307,7 +345,7 @@ impl<T: ClipboardProvider> TextInput<T> {
});
let last_line = self.lines.len() - 1;
self.edit_point.line = last_line;
self.edit_point.index = self.lines[last_line].chars().count();
self.edit_point.index = self.lines[last_line].len();
}
/// Remove the current selection.
@ -350,19 +388,19 @@ impl<T: ClipboardProvider> TextInput<T> {
KeyReaction::DispatchInput
}
Key::Delete => {
self.delete_char(DeleteDir::Forward);
self.delete_char(Direction::Forward);
KeyReaction::DispatchInput
}
Key::Backspace => {
self.delete_char(DeleteDir::Backward);
self.delete_char(Direction::Backward);
KeyReaction::DispatchInput
}
Key::Left => {
self.adjust_horizontal(-1, maybe_select);
self.adjust_horizontal_by_one(Direction::Backward, maybe_select);
KeyReaction::Nothing
}
Key::Right => {
self.adjust_horizontal(1, maybe_select);
self.adjust_horizontal_by_one(Direction::Forward, maybe_select);
KeyReaction::Nothing
}
Key::Up => {

View file

@ -15,22 +15,29 @@ use msg::constellation_msg::SUPER;
use msg::constellation_msg::CONTROL;
use script::clipboard_provider::DummyClipboardContext;
use script::textinput::{TextInput, Selection, Lines, DeleteDir};
use script::textinput::{TextInput, Selection, Lines, Direction};
use std::borrow::ToOwned;
#[test]
fn test_textinput_delete_char() {
let mut textinput = TextInput::new(Lines::Single, "abcdefg".to_owned(), DummyClipboardContext::new(""));
textinput.adjust_horizontal(2, Selection::NotSelected);
textinput.delete_char(DeleteDir::Backward);
textinput.delete_char(Direction::Backward);
assert_eq!(textinput.get_content(), "acdefg");
textinput.delete_char(DeleteDir::Forward);
textinput.delete_char(Direction::Forward);
assert_eq!(textinput.get_content(), "adefg");
textinput.adjust_horizontal(2, Selection::Selected);
textinput.delete_char(DeleteDir::Forward);
textinput.delete_char(Direction::Forward);
assert_eq!(textinput.get_content(), "afg");
let mut textinput = TextInput::new(Lines::Single, "a🌠b".to_owned(), DummyClipboardContext::new(""));
// Same as "Right" key
textinput.adjust_horizontal_by_one(Direction::Forward, Selection::NotSelected);
textinput.delete_char(Direction::Forward);
// Not splitting surrogate pairs.
assert_eq!(textinput.get_content(), "ab");
}
#[test]
@ -43,6 +50,14 @@ fn test_textinput_insert_char() {
textinput.adjust_horizontal(2, Selection::Selected);
textinput.insert_char('b');
assert_eq!(textinput.get_content(), "ababefg");
let mut textinput = TextInput::new(Lines::Single, "a🌠c".to_owned(), DummyClipboardContext::new(""));
// Same as "Right" key
textinput.adjust_horizontal_by_one(Direction::Forward, Selection::NotSelected);
textinput.adjust_horizontal_by_one(Direction::Forward, Selection::NotSelected);
textinput.insert_char('b');
// Not splitting surrogate pairs.
assert_eq!(textinput.get_content(), "a🌠bc");
}
#[test]

View file

@ -29105,8 +29105,24 @@
]
},
"local_changes": {
"deleted": [],
"items": {},
"deleted": [
"shadow-dom/shadow-trees/hosting-multiple-shadow-trees-002.html",
"shadow-dom/shadow-trees/hosting-multiple-shadow-trees-006.html",
"shadow-dom/shadow-trees/hosting-multiple-shadow-trees-004.html",
"shadow-dom/shadow-trees/hosting-multiple-shadow-trees-003.html",
"2dcontext/transformations/canvas_transformations_reset_001.htm",
"shadow-dom/shadow-trees/hosting-multiple-shadow-trees-005.html"
],
"items": {
"testharness": {
"dom/nodes/CharacterData-surrogates.html": [
{
"path": "dom/nodes/CharacterData-surrogates.html",
"url": "/dom/nodes/CharacterData-surrogates.html"
}
]
}
},
"reftest_nodes": {}
},
"reftest_nodes": {

View file

@ -0,0 +1,3 @@
[CharacterData-surrogates.html]
type: testharness
expected: CRASH

View file

@ -66,6 +66,15 @@ function testNode(create, type) {
assert_equals(node.data, "資料")
assert_equals(node.length, 2)
}, type + ".data = '資料'")
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST";
assert_equals(node.data, "🌠 test 🌠 TEST")
assert_equals(node.length, 15) // Counting UTF-16 code units
}, type + ".data = '🌠 test 🌠 TEST'")
}
testNode(function() { return document.createTextNode("test") }, "Text")

View file

@ -78,6 +78,16 @@ function testNode(create, type) {
node.deleteData(45, 2);
assert_equals(node.data, "This is the character data test, append 資料,更多資料");
}, type + ".deleteData() with non-ascii data")
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
node.deleteData(5, 8); // Counting UTF-16 code units
assert_equals(node.data, "🌠 teST");
}, type + ".deleteData() with non-BMP data")
}
testNode(function() { return document.createTextNode("test") }, "Text")

View file

@ -73,6 +73,16 @@ function testNode(create, type) {
node.insertData(48, "更多");
assert_equals(node.data, "This is the character data test, append more 資料,更多測試資料");
}, type + ".insertData() with non-ascii data")
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
node.insertData(5, "--"); // Counting UTF-16 code units
assert_equals(node.data, "🌠 te--st 🌠 TEST");
}, type + ".insertData() with non-BMP data")
}
testNode(function() { return document.createTextNode("test") }, "Text")

View file

@ -146,6 +146,16 @@ function testNode(create, type) {
node.replaceData(44, 2, "文字");
assert_equals(node.data, "This is the character data test, other 資料,更多文字");
}, type + ".replaceData() with non-ASCII data")
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
node.replaceData(5, 8, "--"); // Counting UTF-16 code units
assert_equals(node.data, "🌠 te--ST");
}, type + ".replaceData() with non-BMP data")
}
testNode(function() { return document.createTextNode("test") }, "Text")

View file

@ -121,6 +121,15 @@ function testNode(create, type) {
assert_equals(node.substringData(12, 4), "char")
assert_equals(node.substringData(39, 2), "資料")
}, type + ".substringData() with non-ASCII data")
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
assert_equals(node.substringData(5, 8), "st 🌠 TE") // Counting UTF-16 code units
}, type + ".substringData() with non-BMP data")
}
testNode(function() { return document.createTextNode("test") }, "Text")

View file

@ -0,0 +1,74 @@
<!DOCTYPE html>
<meta charset=utf-8>
<title>Splitting and joining surrogate pairs in CharacterData methods</title>
<link rel=help href="https://dom.spec.whatwg.org/#dom-characterdata-substringdata">
<link rel=help href="https://dom.spec.whatwg.org/#dom-characterdata-replacedata">
<link rel=help href="https://dom.spec.whatwg.org/#dom-characterdata-insertdata">
<link rel=help href="https://dom.spec.whatwg.org/#dom-characterdata-deletedata">
<link rel=help href="https://dom.spec.whatwg.org/#dom-characterdata-data">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<div id="log"></div>
<script>
function testNode(create, type) {
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
assert_equals(node.substringData(1, 8), "\uDF20 test \uD83C")
}, type + ".substringData() splitting surrogate pairs")
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
node.replaceData(1, 4, "--");
assert_equals(node.data, "\uD83C--st 🌠 TEST");
node.replaceData(1, 2, "\uDF1F ");
assert_equals(node.data, "🌟 st 🌠 TEST");
node.replaceData(5, 2, "---");
assert_equals(node.data, "🌟 st---\uDF20 TEST");
node.replaceData(6, 2, " \uD83D");
assert_equals(node.data, "🌟 st- 🜠 TEST");
}, type + ".replaceData() splitting and creating surrogate pairs")
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
node.deleteData(1, 4);
assert_equals(node.data, "\uD83Cst 🌠 TEST");
node.deleteData(1, 4);
assert_equals(node.data, "🌠 TEST");
}, type + ".deleteData() splitting and creating surrogate pairs")
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
node.insertData(1, "--");
assert_equals(node.data, "\uD83C--\uDF20 test 🌠 TEST");
node.insertData(1, "\uDF1F ");
assert_equals(node.data, "🌟 --\uDF20 test 🌠 TEST");
node.insertData(5, " \uD83D");
assert_equals(node.data, "🌟 -- 🜠 test 🌠 TEST");
}, type + ".insertData() splitting and creating surrogate pairs")
}
testNode(function() { return document.createTextNode("test") }, "Text")
testNode(function() { return document.createComment("test") }, "Comment")
</script>