layout: Make all word separators justification opportunities (#30866)

This change adapts both layout and legacy layout to the specification
which gives a list of word separators to use as justification
opportunities.
This commit is contained in:
Martin Robinson 2023-12-21 23:49:24 +01:00 committed by GitHub
parent 8e31daeb6b
commit 709d00583f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 150 additions and 37 deletions

View file

@ -71,7 +71,7 @@ pub type GlyphId = u32;
// TODO: make this more type-safe. // TODO: make this more type-safe.
const FLAG_CHAR_IS_SPACE: u32 = 0x40000000; const FLAG_CHAR_IS_WORD_SEPARATOR: u32 = 0x40000000;
const FLAG_IS_SIMPLE_GLYPH: u32 = 0x80000000; const FLAG_IS_SIMPLE_GLYPH: u32 = 0x80000000;
// glyph advance; in Au's. // glyph advance; in Au's.
@ -112,15 +112,19 @@ impl GlyphEntry {
self.value & GLYPH_ID_MASK self.value & GLYPH_ID_MASK
} }
/// True if original char was normal (U+0020) space. Other chars may /// True if the original character was a word separator. These include spaces
/// map to space glyph, but this does not account for them. /// (U+0020), non-breaking spaces (U+00A0), and a few other characters
fn char_is_space(&self) -> bool { /// non-exhaustively listed in the specification. Other characters may map to the same
self.has_flag(FLAG_CHAR_IS_SPACE) /// glyphs, but this function does not take mapping into account.
///
/// See https://drafts.csswg.org/css-text/#word-separator.
fn char_is_word_separator(&self) -> bool {
self.has_flag(FLAG_CHAR_IS_WORD_SEPARATOR)
} }
#[inline(always)] #[inline(always)]
fn set_char_is_space(&mut self) { fn set_char_is_word_separator(&mut self) {
self.value |= FLAG_CHAR_IS_SPACE; self.value |= FLAG_CHAR_IS_WORD_SEPARATOR;
} }
fn glyph_count(&self) -> u16 { fn glyph_count(&self) -> u16 {
@ -384,13 +388,13 @@ impl<'a> GlyphInfo<'a> {
} }
} }
pub fn char_is_space(self) -> bool { pub fn char_is_word_separator(self) -> bool {
let (store, entry_i) = match self { let (store, entry_i) = match self {
GlyphInfo::Simple(store, entry_i) => (store, entry_i), GlyphInfo::Simple(store, entry_i) => (store, entry_i),
GlyphInfo::Detail(store, entry_i, _) => (store, entry_i), GlyphInfo::Detail(store, entry_i, _) => (store, entry_i),
}; };
store.char_is_space(entry_i) store.char_is_word_separator(entry_i)
} }
} }
@ -427,8 +431,10 @@ pub struct GlyphStore {
/// A cache of the advance of the entire glyph store. /// A cache of the advance of the entire glyph store.
total_advance: Au, total_advance: Au,
/// A cache of the number of spaces in the entire glyph store.
total_spaces: i32, /// A cache of the number of word separators in the entire glyph store.
/// See https://drafts.csswg.org/css-text/#word-separator.
total_word_separators: i32,
/// Used to check if fast path should be used in glyph iteration. /// Used to check if fast path should be used in glyph iteration.
has_detailed_glyphs: bool, has_detailed_glyphs: bool,
@ -447,7 +453,7 @@ impl<'a> GlyphStore {
entry_buffer: vec![GlyphEntry::initial(); length], entry_buffer: vec![GlyphEntry::initial(); length],
detail_store: DetailedGlyphStore::new(), detail_store: DetailedGlyphStore::new(),
total_advance: Au(0), total_advance: Au(0),
total_spaces: 0, total_word_separators: 0,
has_detailed_glyphs: false, has_detailed_glyphs: false,
is_whitespace: is_whitespace, is_whitespace: is_whitespace,
is_rtl: is_rtl, is_rtl: is_rtl,
@ -469,23 +475,28 @@ impl<'a> GlyphStore {
self.is_whitespace self.is_whitespace
} }
#[inline]
pub fn total_word_separators(&self) -> i32 {
self.total_word_separators
}
pub fn finalize_changes(&mut self) { pub fn finalize_changes(&mut self) {
self.detail_store.ensure_sorted(); self.detail_store.ensure_sorted();
self.cache_total_advance_and_spaces() self.cache_total_advance_and_word_seperators()
} }
#[inline(never)] #[inline(never)]
fn cache_total_advance_and_spaces(&mut self) { fn cache_total_advance_and_word_seperators(&mut self) {
let mut total_advance = Au(0); let mut total_advance = Au(0);
let mut total_spaces = 0; let mut total_word_separators = 0;
for glyph in self.iter_glyphs_for_byte_range(&Range::new(ByteIndex(0), self.len())) { for glyph in self.iter_glyphs_for_byte_range(&Range::new(ByteIndex(0), self.len())) {
total_advance = total_advance + glyph.advance(); total_advance = total_advance + glyph.advance();
if glyph.char_is_space() { if glyph.char_is_word_separator() {
total_spaces += 1; total_word_separators += 1;
} }
} }
self.total_advance = total_advance; self.total_advance = total_advance;
self.total_spaces = total_spaces; self.total_word_separators = total_word_separators;
} }
/// Adds a single glyph. /// Adds a single glyph.
@ -507,8 +518,20 @@ impl<'a> GlyphStore {
GlyphEntry::complex(data.cluster_start, data.ligature_start, 1) GlyphEntry::complex(data.cluster_start, data.ligature_start, 1)
}; };
if character == ' ' { // This list is taken from the non-exhaustive list of word separator characters in
entry.set_char_is_space() // the CSS Text Module Level 3 Spec:
// See https://drafts.csswg.org/css-text/#word-separator
if matches!(
character,
' ' |
'\u{00A0}' | // non-breaking space
'\u{1361}' | // Ethiopic word space
'\u{10100}' | // Aegean word separator
'\u{10101}' | // Aegean word separator
'\u{1039F}' | // Ugartic word divider
'\u{1091F}' // Phoenician word separator
) {
entry.set_char_is_word_separator();
} }
self.entry_buffer[i.to_usize()] = entry; self.entry_buffer[i.to_usize()] = entry;
@ -583,7 +606,7 @@ impl<'a> GlyphStore {
let mut index = 0; let mut index = 0;
let mut current_advance = Au(0); let mut current_advance = Au(0);
for glyph in self.iter_glyphs_for_byte_range(range) { for glyph in self.iter_glyphs_for_byte_range(range) {
if glyph.char_is_space() { if glyph.char_is_word_separator() {
current_advance += glyph.advance() + extra_word_spacing current_advance += glyph.advance() + extra_word_spacing
} else { } else {
current_advance += glyph.advance() current_advance += glyph.advance()
@ -599,7 +622,7 @@ impl<'a> GlyphStore {
#[inline] #[inline]
pub fn advance_for_byte_range(&self, range: &Range<ByteIndex>, extra_word_spacing: Au) -> Au { pub fn advance_for_byte_range(&self, range: &Range<ByteIndex>, extra_word_spacing: Au) -> Au {
if range.begin() == ByteIndex(0) && range.end() == self.len() { if range.begin() == ByteIndex(0) && range.end() == self.len() {
self.total_advance + extra_word_spacing * self.total_spaces self.total_advance + extra_word_spacing * self.total_word_separators
} else if !self.has_detailed_glyphs { } else if !self.has_detailed_glyphs {
self.advance_for_byte_range_simple_glyphs(range, extra_word_spacing) self.advance_for_byte_range_simple_glyphs(range, extra_word_spacing)
} else { } else {
@ -615,7 +638,7 @@ impl<'a> GlyphStore {
) -> Au { ) -> Au {
self.iter_glyphs_for_byte_range(range) self.iter_glyphs_for_byte_range(range)
.fold(Au(0), |advance, glyph| { .fold(Au(0), |advance, glyph| {
if glyph.char_is_space() { if glyph.char_is_word_separator() {
advance + glyph.advance() + extra_word_spacing advance + glyph.advance() + extra_word_spacing
} else { } else {
advance + glyph.advance() advance + glyph.advance()
@ -623,15 +646,15 @@ impl<'a> GlyphStore {
}) })
} }
pub fn char_is_space(&self, i: ByteIndex) -> bool { pub fn char_is_word_separator(&self, i: ByteIndex) -> bool {
assert!(i < self.len()); assert!(i < self.len());
self.entry_buffer[i.to_usize()].char_is_space() self.entry_buffer[i.to_usize()].char_is_word_separator()
} }
pub fn space_count_in_range(&self, range: &Range<ByteIndex>) -> u32 { pub fn word_separator_count_in_range(&self, range: &Range<ByteIndex>) -> u32 {
let mut spaces = 0; let mut spaces = 0;
for index in range.each_index() { for index in range.each_index() {
if self.char_is_space(index) { if self.char_is_word_separator(index) {
spaces += 1 spaces += 1
} }
} }

View file

@ -3036,7 +3036,7 @@ fn convert_text_run_to_glyphs(
for slice in text_run.natural_word_slices_in_visual_order(&range) { for slice in text_run.natural_word_slices_in_visual_order(&range) {
for glyph in slice.glyphs.iter_glyphs_for_byte_range(&slice.range) { for glyph in slice.glyphs.iter_glyphs_for_byte_range(&slice.range) {
let glyph_advance = if glyph.char_is_space() { let glyph_advance = if glyph.char_is_word_separator() {
glyph.advance() + text_run.extra_word_spacing glyph.advance() + text_run.extra_word_spacing
} else { } else {
glyph.advance() glyph.advance()

View file

@ -1156,7 +1156,7 @@ impl InlineFlow {
.run .run
.character_slices_in_range(&fragment_range) .character_slices_in_range(&fragment_range)
{ {
expansion_opportunities += slice.glyphs.space_count_in_range(&slice.range) expansion_opportunities += slice.glyphs.word_separator_count_in_range(&slice.range)
} }
} }

View file

@ -783,7 +783,9 @@ fn glyphs(
point, point,
}; };
glyphs.push(glyph); glyphs.push(glyph);
} else { }
if glyph.char_is_word_separator() {
origin.x += justification_adjustment; origin.x += justification_adjustment;
} }
origin.x += Length::from(glyph.advance()); origin.x += Length::from(glyph.advance());

View file

@ -976,10 +976,8 @@ impl<'a, 'b> InlineFormattingContextState<'a, 'b> {
if is_non_preserved_whitespace { if is_non_preserved_whitespace {
self.current_line_segment.trailing_whitespace_size = inline_advance; self.current_line_segment.trailing_whitespace_size = inline_advance;
} }
self.current_line_segment.justification_opportunities +=
if glyph_store.is_whitespace() { glyph_store.total_word_separators() as usize;
self.current_line_segment.justification_opportunities += 1;
}
match self.current_line_segment.line_items.last_mut() { match self.current_line_segment.line_items.last_mut() {
Some(LineItem::TextRun(text_run)) => { Some(LineItem::TextRun(text_run)) => {
@ -2124,9 +2122,7 @@ impl TextRunLineItem {
.text .text
.iter() .iter()
.map(|glyph_store| { .map(|glyph_store| {
if glyph_store.is_whitespace() { number_of_justification_opportunities += glyph_store.total_word_separators();
number_of_justification_opportunities += 1
}
Length::from(glyph_store.total_advance()) Length::from(glyph_store.total_advance())
}) })
.sum(); .sum();

View file

@ -250748,6 +250748,19 @@
], ],
{} {}
] ]
],
"text-justify-word-separators.html": [
"028b69e40e0c6761187bbb1435873fb5955ff67e",
[
null,
[
[
"/css/css-text/text-justify/text-justify-word-separators-ref.html",
"=="
]
],
{}
]
] ]
}, },
"text-spacing-trim": { "text-spacing-trim": {
@ -418199,6 +418212,10 @@
"text-justify-none-001-ref.html": [ "text-justify-none-001-ref.html": [
"c8500ac9f38a2a6c843e4bfbb383f6d20c77f8c3", "c8500ac9f38a2a6c843e4bfbb383f6d20c77f8c3",
[] []
],
"text-justify-word-separators-ref.html": [
"99154cff3923b181a080c218b6b399dbbc558761",
[]
] ]
}, },
"text-spacing-trim": { "text-spacing-trim": {

View file

@ -0,0 +1,28 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<link rel="author" title="Martin Robinson" href="mailto:mrobinson@igalia.com">
<link rel="help" href="https://drafts.csswg.org/css-text/#word-separator">
<meta name="assert" content="text-justify:inter-word should adjust spacing at all word separators.">
<link rel="stylesheet" type="text/css" href="/fonts/ahem.css" />
<style>
.justified {
font: 10px/1 Ahem;
text-align: justify;
text-justify: inter-word;
width: 120px;
border: solid 1px black;
}
</style>
</head>
<body>
<div class="justified">XXXX XXXX XXXX</div>
<div class="justified">XXXX XXXX XXXX</div>
<div class="justified">XXXX XXXX XXXX</div>
<div class="justified">XXXX XXXX XXXX</div>
<div class="justified">XXXX XXXX XXXX</div>
<div class="justified">XXXX XXXX XXXX</div>
<div class="justified">XXXX XXXX XXXX</div>
</body>
</html>

View file

@ -0,0 +1,47 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>CSS Text 6.4. Justification Method: text-justify: inter-word</title>
<link rel="author" title="Martin Robinson" href="mailto:mrobinson@igalia.com">
<link rel="help" href="https://drafts.csswg.org/css-text/#word-separator">
<link rel='match' href='text-justify-word-separators-ref.html'>
<meta name="assert" content="text-justify:inter-word should adjust spacing at all word separators.">
<link rel="stylesheet" type="text/css" href="/fonts/ahem.css" />
<style>
.justified {
font: 10px/1 Ahem;
text-align: justify;
text-justify: inter-word;
width: 120px;
border: solid 1px black;
}
/* Hide the word separators, in case the system doesn't
have an appropriate font installed and shows tofu.
Justification should still work in this case. */
.hidden {
color: transparent;
}
</style>
</head>
<body>
<!-- A normal space -->
<div class="justified">XXXX XXXX XXXX</div>
<!-- Non-breaking space -->
<div class="justified">XXXX<span class="hidden">&nbsp;</span>XXXX XXXX</div>
<!-- Ethiopic word space -->
<div class="justified">XXXX<span class="hidden">&#x1361;</span>XXXX XXXX</div>
<!-- Aegean word separators -->
<div class="justified">XXXX<span class="hidden">&#x10100;</span>XXXX XXXX</div>
<div class="justified">XXXX<span class="hidden">&#x10101;</span>XXXX XXXX</div>
<!-- Ugaritic word divider -->
<div class="justified">XXXX<span class="hidden">&#x1039F;</span>XXXX XXXX</div>
<!-- Phoenician word separator -->
<div class="justified">XXXX<span class="hidden">&#x1091F;</span>XXXX XXXX</div>
</body>
</html>