From d7e6f8b0f15326aad413eb1c59fc57e913e309b4 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Fri, 1 Apr 2016 12:24:49 -0700 Subject: [PATCH 1/4] Use correct byte indices in replace_selection --- components/script/textinput.rs | 16 +++++++++++++--- tests/unit/script/textinput.rs | 9 +++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/components/script/textinput.rs b/components/script/textinput.rs index cc5936d42b0..ec4af672178 100644 --- a/components/script/textinput.rs +++ b/components/script/textinput.rs @@ -107,6 +107,16 @@ fn is_printable_key(key: Key) -> bool { } } +/// The length in bytes of the first n characters in a UTF-8 string. +/// +/// If the string has fewer than n characters, returns the length of the whole string. +fn len_of_first_n_chars(text: &str, n: usize) -> usize { + match text.char_indices().take(n).last() { + Some((index, ch)) => index + ch.len_utf8(), + None => 0 + } +} + impl TextInput { /// Instantiate a new text input control pub fn new(lines: Lines, initial: DOMString, clipboard_provider: T, max_length: Option) -> TextInput { @@ -213,8 +223,8 @@ impl TextInput { usize::MAX }; - let last_char_to_insert = min(allowed_to_insert_count, insert.chars().count()); - let chars_to_insert = (&insert[0 .. last_char_to_insert]).to_owned(); + let last_char_index = len_of_first_n_chars(&*insert, allowed_to_insert_count); + let chars_to_insert = &insert[..last_char_index]; self.clear_selection(); @@ -225,7 +235,7 @@ impl TextInput { let lines_suffix = &self.lines[end.line + 1..]; let mut insert_lines = if self.multiline { - chars_to_insert.split('\n').map(|s| DOMString::from(s.to_owned())).collect() + chars_to_insert.split('\n').map(|s| DOMString::from(s)).collect() } else { vec!(DOMString::from(chars_to_insert)) }; diff --git a/tests/unit/script/textinput.rs b/tests/unit/script/textinput.rs index 2613c0f640a..ee40c9ff50c 100644 --- a/tests/unit/script/textinput.rs +++ b/tests/unit/script/textinput.rs @@ -206,6 +206,15 @@ fn test_textinput_replace_selection() { assert_eq!(textinput.get_content(), "abxyzefg"); } +#[test] +fn test_textinput_replace_selection_multibyte_char() { + let mut textinput = text_input(Lines::Single, "é"); + textinput.adjust_horizontal_by_one(Direction::Forward, Selection::Selected); + + textinput.replace_selection(DOMString::from("e")); + assert_eq!(textinput.get_content(), "e"); +} + #[test] fn test_textinput_current_line_length() { let mut textinput = text_input(Lines::Multiple, "abc\nde\nf"); From e17ed2e6b0572fd201c126dbe38ea949ca226fe8 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Fri, 1 Apr 2016 12:25:21 -0700 Subject: [PATCH 2/4] Add/correct documentation comments in textinput --- components/script/textinput.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/components/script/textinput.rs b/components/script/textinput.rs index ec4af672178..1f0cb0c53d8 100644 --- a/components/script/textinput.rs +++ b/components/script/textinput.rs @@ -165,6 +165,9 @@ impl TextInput { }) } + /// Return the selection range as UTF-8 byte offsets from the start of the content. + /// + /// If there is no selection, returns an empty range at the insertion point. pub fn get_absolute_selection_range(&self) -> Range { match self.get_sorted_selection() { Some((begin, _end)) => @@ -302,7 +305,7 @@ impl TextInput { self.edit_point.index = min(self.current_line_length(), self.edit_point.index); } - /// Adjust the editing point position by a given number of columns. If the adjustment + /// Adjust the editing point position by a given number of bytes. If the adjustment /// requested is larger than is available in the current line, the editing point is /// adjusted vertically and the process repeats with the remaining adjustment requested. pub fn adjust_horizontal(&mut self, adjust: isize, select: Selection) { @@ -339,7 +342,7 @@ impl TextInput { self.perform_horizontal_adjustment(adjust, select); } - // Return whether to cancel the caret move + /// Return whether to cancel the caret move fn adjust_selection_for_horizontal_change(&mut self, adjust: Direction, select: Selection) -> bool { if select == Selection::Selected { @@ -489,6 +492,7 @@ impl TextInput { } } + /// The length of the content in bytes. pub fn len(&self) -> usize { self.lines.iter().fold(0, |m, l| { m + l.len() + 1 @@ -520,10 +524,12 @@ impl TextInput { self.selection_begin = None; } + /// Get the insertion point as a byte offset from the start of the content. pub fn get_absolute_insertion_point(&self) -> usize { self.get_absolute_point_for_text_point(&self.edit_point) } + /// Convert a TextPoint into a byte offset from the start of the content. pub fn get_absolute_point_for_text_point(&self, text_point: &TextPoint) -> usize { self.lines.iter().enumerate().fold(0, |acc, (i, val)| { if i < text_point.line { @@ -534,6 +540,7 @@ impl TextInput { }) + text_point.index } + /// Convert a byte offset from the start of the content into a TextPoint. pub fn get_text_point_for_absolute_point(&self, abs_point: usize) -> TextPoint { let mut index = abs_point; let mut line = 0; From 29fb3f11502e1d69ec1f01e5da87abf212a16264 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Fri, 1 Apr 2016 12:36:25 -0700 Subject: [PATCH 3/4] Find the correct column index in adjust_vertical --- components/script/textinput.rs | 5 ++++- tests/unit/script/textinput.rs | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/components/script/textinput.rs b/components/script/textinput.rs index 1f0cb0c53d8..4992cffb3d4 100644 --- a/components/script/textinput.rs +++ b/components/script/textinput.rs @@ -301,8 +301,11 @@ impl TextInput { return; } + + let col = self.lines[self.edit_point.line][..self.edit_point.index].chars().count(); + self.edit_point.line = target_line as usize; - self.edit_point.index = min(self.current_line_length(), self.edit_point.index); + self.edit_point.index = len_of_first_n_chars(&self.lines[self.edit_point.line], col); } /// Adjust the editing point position by a given number of bytes. If the adjustment diff --git a/tests/unit/script/textinput.rs b/tests/unit/script/textinput.rs index ee40c9ff50c..f5ee2feeb35 100644 --- a/tests/unit/script/textinput.rs +++ b/tests/unit/script/textinput.rs @@ -244,6 +244,19 @@ fn test_textinput_adjust_vertical() { assert_eq!(textinput.edit_point.index, 1); } +#[test] +fn test_textinput_adjust_vertical_multibyte() { + let mut textinput = text_input(Lines::Multiple, "áé\nae"); + + textinput.adjust_horizontal_by_one(Direction::Forward, Selection::NotSelected); + assert_eq!(textinput.edit_point.line, 0); + assert_eq!(textinput.edit_point.index, 2); + + textinput.adjust_vertical(1, Selection::NotSelected); + assert_eq!(textinput.edit_point.line, 1); + assert_eq!(textinput.edit_point.index, 1); +} + #[test] fn test_textinput_adjust_horizontal() { let mut textinput = text_input(Lines::Multiple, "abc\nde\nf"); From deca979967a61e36b7ff8b5799e413b80fd8524c Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Fri, 1 Apr 2016 14:49:14 -0700 Subject: [PATCH 4/4] TextInput::max_length should be in code units, not bytes --- components/script/lib.rs | 1 + components/script/textinput.rs | 100 +++++++++++++++++++++++---------- tests/unit/script/textinput.rs | 49 ++++++++++++++++ 3 files changed, 120 insertions(+), 30 deletions(-) diff --git a/components/script/lib.rs b/components/script/lib.rs index fa6dc8a385e..3ab51fc954e 100644 --- a/components/script/lib.rs +++ b/components/script/lib.rs @@ -11,6 +11,7 @@ #![feature(custom_attribute)] #![feature(custom_derive)] #![feature(fnbox)] +#![feature(iter_arith)] #![feature(mpsc_select)] #![feature(nonzero)] #![feature(on_unimplemented)] diff --git a/components/script/textinput.rs b/components/script/textinput.rs index 4992cffb3d4..1be7962003c 100644 --- a/components/script/textinput.rs +++ b/components/script/textinput.rs @@ -42,6 +42,9 @@ pub struct TextInput { multiline: bool, #[ignore_heap_size_of = "Can't easily measure this generic type"] clipboard_provider: T, + /// The maximum number of UTF-16 code units this text input is allowed to hold. + /// + /// https://html.spec.whatwg.org/multipage/#attr-fe-maxlength pub max_length: Option } @@ -117,6 +120,22 @@ fn len_of_first_n_chars(text: &str, n: usize) -> usize { } } +/// The length in bytes of the first n code units a string when encoded in UTF-16. +/// +/// If the string is fewer than n code units, returns the length of the whole string. +fn len_of_first_n_code_units(text: &str, n: usize) -> usize { + let mut utf8_len = 0; + let mut utf16_len = 0; + for c in text.chars() { + utf16_len += c.len_utf16(); + if utf16_len > n { + break; + } + utf8_len += c.len_utf8(); + } + utf8_len +} + impl TextInput { /// Instantiate a new text input control pub fn new(lines: Lines, initial: DOMString, clipboard_provider: T, max_length: Option) -> TextInput { @@ -178,44 +197,51 @@ impl TextInput { } pub fn get_selection_text(&self) -> Option { - self.get_sorted_selection().map(|(begin, end)| { - if begin.line != end.line { - let mut s = String::new(); - s.push_str(&self.lines[begin.line][begin.index..]); - for (_, line) in self.lines.iter().enumerate().filter(|&(i, _)| begin.line < i && i < end.line) { - s.push_str("\n"); - s.push_str(line); - } - s.push_str("\n"); - s.push_str(&self.lines[end.line][..end.index]); - s - } else { - self.lines[begin.line][begin.index..end.index].to_owned() - } - }) + let text = self.fold_selection_slices(String::new(), |s, slice| s.push_str(slice)); + if text.is_empty() { + return None + } + Some(text) } + /// The length of the selected text in UTF-8 bytes. fn selection_len(&self) -> usize { - if let Some((begin, end)) = self.get_sorted_selection() { - let prefix = &self.lines[begin.line][0..begin.index]; - let suffix = &self.lines[end.line][end.index..]; - let lines_prefix = &self.lines[..begin.line]; - let lines_suffix = &self.lines[end.line + 1..]; + self.fold_selection_slices(0, |len, slice| *len += slice.len()) + } - self.len() - (prefix.chars().count() + - suffix.chars().count() + - lines_prefix.iter().fold(0, |m, i| m + i.chars().count() + 1) + - lines_suffix.iter().fold(0, |m, i| m + i.chars().count() + 1)) - } else { - 0 + /// The length of the selected text in UTF-16 code units. + fn selection_utf16_len(&self) -> usize { + self.fold_selection_slices(0usize, + |len, slice| *len += slice.chars().map(char::len_utf16).sum()) + } + + /// Run the callback on a series of slices that, concatenated, make up the selected text. + /// + /// The accumulator `acc` can be mutated by the callback, and will be returned at the end. + fn fold_selection_slices(&self, mut acc: B, mut f: F) -> B { + match self.get_sorted_selection() { + Some((begin, end)) if begin.line == end.line => { + f(&mut acc, &self.lines[begin.line][begin.index..end.index]) + } + Some((begin, end)) => { + f(&mut acc, &self.lines[begin.line][begin.index..]); + for line in &self.lines[begin.line + 1 .. end.line] { + f(&mut acc, "\n"); + f(&mut acc, line); + } + f(&mut acc, "\n"); + f(&mut acc, &self.lines[end.line][..end.index]) + } + None => {} } + acc } pub fn replace_selection(&mut self, insert: DOMString) { if let Some((begin, end)) = self.get_sorted_selection() { let allowed_to_insert_count = if let Some(max_length) = self.max_length { - let len_after_selection_replaced = self.len() - self.selection_len(); - if len_after_selection_replaced > max_length { + let len_after_selection_replaced = self.utf16_len() - self.selection_utf16_len(); + if len_after_selection_replaced >= max_length { // If, after deleting the selection, the len is still greater than the max // length, then don't delete/insert anything return @@ -226,7 +252,7 @@ impl TextInput { usize::MAX }; - let last_char_index = len_of_first_n_chars(&*insert, allowed_to_insert_count); + let last_char_index = len_of_first_n_code_units(&*insert, allowed_to_insert_count); let chars_to_insert = &insert[..last_char_index]; self.clear_selection(); @@ -498,7 +524,21 @@ impl TextInput { /// The length of the content in bytes. pub fn len(&self) -> usize { self.lines.iter().fold(0, |m, l| { - m + l.len() + 1 + m + l.len() + 1 // + 1 for the '\n' + }) - 1 + } + + /// The length of the content in bytes. + pub fn utf16_len(&self) -> usize { + self.lines.iter().fold(0, |m, l| { + m + l.chars().map(char::len_utf16).sum::() + 1 // + 1 for the '\n' + }) - 1 + } + + /// The length of the content in chars. + pub fn char_count(&self) -> usize { + self.lines.iter().fold(0, |m, l| { + m + l.chars().count() + 1 // + 1 for the '\n' }) - 1 } diff --git a/tests/unit/script/textinput.rs b/tests/unit/script/textinput.rs index f5ee2feeb35..318612e64af 100644 --- a/tests/unit/script/textinput.rs +++ b/tests/unit/script/textinput.rs @@ -117,6 +117,55 @@ fn test_single_line_textinput_with_max_length_doesnt_allow_appending_characters_ assert_eq!(textinput.get_content(), "atooe"); } +#[test] +fn test_single_line_textinput_with_max_length_multibyte() { + let mut textinput = TextInput::new( + Lines::Single, + DOMString::from(""), + DummyClipboardContext::new(""), + Some(2) + ); + + textinput.insert_char('á'); + assert_eq!(textinput.get_content(), "á"); + textinput.insert_char('é'); + assert_eq!(textinput.get_content(), "áé"); + textinput.insert_char('i'); + assert_eq!(textinput.get_content(), "áé"); +} + +#[test] +fn test_single_line_textinput_with_max_length_multi_code_unit() { + let mut textinput = TextInput::new( + Lines::Single, + DOMString::from(""), + DummyClipboardContext::new(""), + Some(3) + ); + + textinput.insert_char('\u{10437}'); + assert_eq!(textinput.get_content(), "\u{10437}"); + textinput.insert_char('\u{10437}'); + assert_eq!(textinput.get_content(), "\u{10437}"); + textinput.insert_char('x'); + assert_eq!(textinput.get_content(), "\u{10437}x"); + textinput.insert_char('x'); + assert_eq!(textinput.get_content(), "\u{10437}x"); +} + +#[test] +fn test_single_line_textinput_with_max_length_inside_char() { + let mut textinput = TextInput::new( + Lines::Single, + DOMString::from("\u{10437}"), + DummyClipboardContext::new(""), + Some(1) + ); + + textinput.insert_char('x'); + assert_eq!(textinput.get_content(), "\u{10437}"); +} + #[test] fn test_single_line_textinput_with_max_length_doesnt_allow_appending_characters_after_max_length_is_reached() { let mut textinput = TextInput::new(