layout: Add initial support for bidirectional text (BiDi) (#33148)

This adds supports for right-to-left text assigning bidi levels to all
line items when necessary. This includes support for the `dir` attribute
as well as corresponding CSS properties like `unicode-bidi`. It only
implements right-to-left rendering for inline layout at the moment and
doesn't include support for `dir=auto`. Because of missing features,
this causes quite a few tests to start failing, as references become
incorrect due to right-to-left rendering being active in some cases,
but not others (before it didn't exist at all).

Analysis of most of the new failures:

```
- /css/css-flexbox/gap-001-rtl.html
  /css/css-flexbox/gap-004-rtl.html
 - Require implementing BiDi in Flexbox, because the start and
   end inline margins are opposite the order of items.

- /css/CSS2/bidi-text/direction-applies-to-*.xht
  /css/CSS2/bidi-text/direction-applies-to-002.xht
  /css/CSS2/bidi-text/direction-applies-to-003.xht
  /css/CSS2/bidi-text/direction-applies-to-004.xht
  - Broken due to a bug in tables, not allocating the
    right amount of width for a column.

- /css/css-lists/inline-list.html
  - This fails because we wrongly insert a soft wrap opportunity between the
    start of an inline box and its first content.

- /css/css-text/bidi/bidi-lines-001.html
  /css/css-text/bidi/bidi-lines-002.html
  /css/CSS2/text/bidi-flag-emoji.html
  - We do not fully support unicode-bidi: plaintext

- /css/css-text/text-align/text-align-end-010.html
  /css/css-text/text-align/text-align-justify-006.html
  /css/css-text/text-align/text-align-start-010.html
  /html/dom/elements/global-attributes/*
  - We do not support dir=auto yet.

- /css/css-text/white-space/tab-bidi-001.html
  - Servo doesn't support tab stops

- /css/CSS2/positioning/abspos-block-level-001.html
  /css/css-text/word-break/word-break-normal-ar-000.html
  - Do not yet support RTL layout in block

- /css/css-text/white-space/pre-wrap-018.html
  - Even in RTL contexts, spaces at the end of the line must hang and
    not be reordered

- /css/css-text/white-space/trailing-space-and-text-alignment-rtl-002.html
  - We are letting spaces hang with white-space: pre, but they shouldn't
    hang.
```

Signed-off-by: Martin Robinson <mrobinson@igalia.com>
Co-authored-by: Rakhi Sharma <atbrakhi@igalia.com>
This commit is contained in:
Martin Robinson 2024-08-21 07:28:54 -07:00 committed by GitHub
parent 65bd5a3b99
commit 56280c6242
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
189 changed files with 547 additions and 762 deletions

View file

@ -6,6 +6,7 @@ use std::mem;
use std::ops::Range;
use app_units::Au;
use base::text::is_bidi_control;
use fonts::{
FontCacheThread, FontContext, FontRef, GlyphRun, ShapingFlags, ShapingOptions,
LAST_RESORT_GLYPH_ADVANCE,
@ -21,6 +22,7 @@ use style::computed_values::word_break::T as WordBreak;
use style::properties::ComputedValues;
use style::str::char_is_whitespace;
use style::values::computed::OverflowWrap;
use unicode_bidi::{BidiInfo, Level};
use unicode_script::Script;
use xi_unicode::linebreak_property;
@ -73,6 +75,9 @@ pub(crate) struct TextRunSegment {
#[serde(skip_serializing)]
pub script: Script,
/// The bidi Level of this segment.
pub bidi_level: Level,
/// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content.
pub range: Range<usize>,
@ -85,10 +90,11 @@ pub(crate) struct TextRunSegment {
}
impl TextRunSegment {
fn new(font_index: usize, script: Script, start_offset: usize) -> Self {
fn new(font_index: usize, script: Script, bidi_level: Level, start_offset: usize) -> Self {
Self {
script,
font_index,
script,
bidi_level,
range: start_offset..start_offset,
runs: Vec::new(),
break_at_start: false,
@ -102,12 +108,17 @@ impl TextRunSegment {
&mut self,
new_font: &FontRef,
script: Script,
bidi_level: Level,
fonts: &[FontKeyAndMetrics],
) -> bool {
fn is_specific(script: Script) -> bool {
script != Script::Common && script != Script::Inherited
}
if bidi_level != self.bidi_level {
return false;
}
let current_font_key_and_metrics = &fonts[self.font_index];
if new_font.font_key != current_font_key_and_metrics.key ||
new_font.descriptor.pt_size != current_font_key_and_metrics.pt_size
@ -151,6 +162,7 @@ impl TextRunSegment {
run.glyph_store.clone(),
text_run,
self.font_index,
self.bidi_level,
);
}
}
@ -198,7 +210,7 @@ impl TextRunSegment {
text_style.overflow_wrap == OverflowWrap::Anywhere ||
text_style.overflow_wrap == OverflowWrap::BreakWord;
let mut last_slice_end = self.range.start;
let mut last_slice = self.range.start..self.range.start;
for break_index in linebreak_iter {
if *break_index == self.range.start {
self.break_at_start = true;
@ -206,12 +218,13 @@ impl TextRunSegment {
}
// Extend the slice to the next UAX#14 line break opportunity.
let mut slice = last_slice_end..*break_index;
let mut slice = last_slice.end..*break_index;
let word = &formatting_context_text[slice.clone()];
// Split off any trailing whitespace into a separate glyph run.
let mut whitespace = slice.end..slice.end;
let mut rev_char_indices = word.char_indices().rev().peekable();
let ends_with_newline = rev_char_indices
.peek()
.map_or(false, |&(_, character)| character == '\n');
@ -250,8 +263,8 @@ impl TextRunSegment {
continue;
}
// Only advance the last_slice_end if we are not going to try to expand the slice.
last_slice_end = *break_index;
// Only advance the last slice if we are not going to try to expand the slice.
last_slice = slice.start..*break_index;
// Push the non-whitespace part of the range.
if !slice.is_empty() {
@ -328,6 +341,7 @@ impl TextRun {
font_context: &FontContext<FontCacheThread>,
linebreaker: &mut LineBreaker,
font_cache: &mut Vec<FontKeyAndMetrics>,
bidi_info: &BidiInfo,
) {
let inherited_text_style = self.parent_style.get_inherited_text().clone();
let letter_spacing = if inherited_text_style.letter_spacing.0.px() != 0. {
@ -349,7 +363,7 @@ impl TextRun {
let style_word_spacing: Option<Au> = specified_word_spacing.to_length().map(|l| l.into());
let segments = self
.segment_text_by_font(formatting_context_text, font_context, font_cache)
.segment_text_by_font(formatting_context_text, font_context, font_cache, bidi_info)
.into_iter()
.map(|(mut segment, font)| {
let word_spacing = style_word_spacing.unwrap_or_else(|| {
@ -360,6 +374,10 @@ impl TextRun {
specified_word_spacing.to_used_value(Au::from_f64_px(space_width))
});
let mut flags = flags.clone();
if segment.bidi_level.is_rtl() {
flags.insert(ShapingFlags::RTL_FLAG);
}
let shaping_options = ShapingOptions {
letter_spacing,
word_spacing,
@ -390,6 +408,7 @@ impl TextRun {
formatting_context_text: &str,
font_context: &FontContext<FontCacheThread>,
font_cache: &mut Vec<FontKeyAndMetrics>,
bidi_info: &BidiInfo,
) -> Vec<(TextRunSegment, FontRef)> {
let font_group = font_context.font_group(self.parent_style.clone_font());
let mut current: Option<(TextRunSegment, FontRef)> = None;
@ -416,8 +435,12 @@ impl TextRun {
// If the existing segment is compatible with the character, keep going.
let script = Script::from(character);
let bidi_level = bidi_info.levels[current_byte_index];
if let Some(current) = current.as_mut() {
if current.0.update_if_compatible(&font, script, font_cache) {
if current
.0
.update_if_compatible(&font, script, bidi_level, font_cache)
{
continue;
}
}
@ -433,7 +456,7 @@ impl TextRun {
None => self.text_range.start,
};
let new = (
TextRunSegment::new(font_index, script, start_byte_index),
TextRunSegment::new(font_index, script, bidi_level, start_byte_index),
font,
);
if let Some(mut finished) = current.replace(new) {
@ -449,7 +472,12 @@ impl TextRun {
current = font_group.write().first(font_context).map(|font| {
let font_index = add_or_get_font(&font, font_cache);
(
TextRunSegment::new(font_index, Script::Common, self.text_range.start),
TextRunSegment::new(
font_index,
Script::Common,
Level::ltr(),
self.text_range.start,
),
font,
)
})
@ -496,6 +524,10 @@ fn char_does_not_change_font(character: char) -> bool {
if character == '\u{00A0}' {
return true;
}
if is_bidi_control(character) {
return false;
}
let class = linebreak_property(character);
class == XI_LINE_BREAKING_CLASS_CM ||
class == XI_LINE_BREAKING_CLASS_GL ||