Expand the 'irregular' command to cover more cases

This commit is contained in:
mtkennerly 2023-12-09 14:48:44 +08:00
parent c0906c16a4
commit 716a2a1dfe
No known key found for this signature in database
GPG key ID: E764BE00BE6E6408
2 changed files with 34 additions and 9 deletions

View file

@ -33,6 +33,27 @@ impl State {
} }
} }
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize)]
pub enum Regularity {
/// Normal and may be included in the data set
#[default]
Regular,
/// Somewhat irregular, but still usable for the data set
Semiregular,
/// Fully irregular and should be excluded from the data set
Irregular,
}
impl Regularity {
pub fn worst(&self, other: Self) -> Self {
if other > *self {
other
} else {
*self
}
}
}
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
pub enum Error { pub enum Error {
#[error("Wiki client: {0}")] #[error("Wiki client: {0}")]

View file

@ -7,7 +7,7 @@ use wikitext_parser::{Attribute, TextPiece};
use crate::{ use crate::{
manifest::{placeholder, Os, Store, Tag}, manifest::{placeholder, Os, Store, Tag},
resource::ResourceFile, resource::ResourceFile,
Error, State, Error, Regularity, State,
}; };
const SAVE_INTERVAL: u32 = 100; const SAVE_INTERVAL: u32 = 100;
@ -462,7 +462,7 @@ impl WikiCacheEntry {
pub fn any_irregular_paths(&self, article: String) -> bool { pub fn any_irregular_paths(&self, article: String) -> bool {
for path in self.parse_all_paths(article) { for path in self.parse_all_paths(article) {
if path.irregular() { if path.irregular() || path.semiregular() {
return true; return true;
} }
} }
@ -479,7 +479,7 @@ pub enum PathKind {
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub struct WikiPath { pub struct WikiPath {
pub composite: String, pub composite: String,
pub irregular: bool, pub regularity: Regularity,
pub kind: Option<PathKind>, pub kind: Option<PathKind>,
pub store: Option<Store>, pub store: Option<Store>,
pub os: Option<Os>, pub os: Option<Os>,
@ -488,9 +488,7 @@ pub struct WikiPath {
impl WikiPath { impl WikiPath {
fn incorporate(&mut self, other: Self) { fn incorporate(&mut self, other: Self) {
if other.irregular { self.regularity = self.regularity.worst(other.regularity);
self.irregular = true;
}
if other.kind.is_some() { if other.kind.is_some() {
self.kind = other.kind; self.kind = other.kind;
@ -507,7 +505,7 @@ impl WikiPath {
pub fn incorporate_text(&mut self, text: &str) { pub fn incorporate_text(&mut self, text: &str) {
if text.contains(['<', '>']) { if text.contains(['<', '>']) {
self.irregular = true; self.regularity = Regularity::Irregular;
} else { } else {
self.composite += text; self.composite += text;
} }
@ -682,7 +680,11 @@ impl WikiPath {
} }
fn irregular(&self) -> bool { fn irregular(&self) -> bool {
self.irregular || self.composite.contains("{{") self.regularity == Regularity::Irregular || self.composite.contains("{{")
}
fn semiregular(&self) -> bool {
self.regularity == Regularity::Semiregular
} }
pub fn usable(&self) -> bool { pub fn usable(&self) -> bool {
@ -714,6 +716,8 @@ pub fn flatten_path(attribute: &Attribute) -> WikiPath {
} }
} }
"code" | "file" => { "code" | "file" => {
// These could be used for a path segment or for a note, but we assume path segment.
out.regularity = Regularity::Semiregular;
out.composite += "*"; out.composite += "*";
} }
"localizedpath" => { "localizedpath" => {
@ -726,7 +730,7 @@ pub fn flatten_path(attribute: &Attribute) -> WikiPath {
// Ignored. // Ignored.
} }
_ => { _ => {
out.irregular = true; out.regularity = Regularity::Irregular;
} }
}, },
TextPiece::InternalLink { .. } => {} TextPiece::InternalLink { .. } => {}