Track articles with malformed wikitext
This commit is contained in:
parent
f055532220
commit
2870f7686e
3 changed files with 23 additions and 8 deletions
|
@ -113351,6 +113351,7 @@ Moonlight thief:
|
||||||
pageId: 130474
|
pageId: 130474
|
||||||
steam: 1006830
|
steam: 1006830
|
||||||
Moonlighter:
|
Moonlighter:
|
||||||
|
malformed: true
|
||||||
pageId: 61691
|
pageId: 61691
|
||||||
Moonlit Mayhem:
|
Moonlit Mayhem:
|
||||||
pageId: 51342
|
pageId: 51342
|
||||||
|
|
|
@ -67,7 +67,7 @@ impl SteamCache {
|
||||||
i += 1;
|
i += 1;
|
||||||
if i % SAVE_INTERVAL == 0 {
|
if i % SAVE_INTERVAL == 0 {
|
||||||
self.save();
|
self.save();
|
||||||
println!("\n:: saved\n");
|
println!("\n:: saved ({i})\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
28
src/wiki.rs
28
src/wiki.rs
|
@ -12,7 +12,6 @@ use crate::{
|
||||||
};
|
};
|
||||||
|
|
||||||
const SAVE_INTERVAL: u32 = 100;
|
const SAVE_INTERVAL: u32 = 100;
|
||||||
const NAMESPACES: &[&str] = &["Company:", "File:", "Series:", "Topic:"];
|
|
||||||
|
|
||||||
async fn make_client() -> Result<mediawiki::api::Api, Error> {
|
async fn make_client() -> Result<mediawiki::api::Api, Error> {
|
||||||
mediawiki::api::Api::new("https://www.pcgamingwiki.com/w/api.php")
|
mediawiki::api::Api::new("https://www.pcgamingwiki.com/w/api.php")
|
||||||
|
@ -296,12 +295,17 @@ impl WikiCache {
|
||||||
if let Some(new_title) = latest.new_title.take() {
|
if let Some(new_title) = latest.new_title.take() {
|
||||||
println!(" page {} redirected to '{}'", cached.page_id, &new_title);
|
println!(" page {} redirected to '{}'", cached.page_id, &new_title);
|
||||||
|
|
||||||
for namespace in NAMESPACES {
|
match is_game_article(&new_title).await {
|
||||||
if new_title.starts_with(namespace) {
|
Ok(true) => {}
|
||||||
|
Ok(false) => {
|
||||||
println!(" page is no longer a game");
|
println!(" page is no longer a game");
|
||||||
self.0.remove(title);
|
self.0.remove(title);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(" unable to check if still a game: {e}");
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let cached = self.0.get(&new_title).cloned().unwrap_or_default();
|
let cached = self.0.get(&new_title).cloned().unwrap_or_default();
|
||||||
|
@ -327,12 +331,17 @@ impl WikiCache {
|
||||||
|
|
||||||
println!(" page {} renamed to '{}'", cached.page_id, &new_title);
|
println!(" page {} renamed to '{}'", cached.page_id, &new_title);
|
||||||
|
|
||||||
for namespace in NAMESPACES {
|
match is_game_article(&new_title).await {
|
||||||
if new_title.starts_with(namespace) {
|
Ok(true) => {}
|
||||||
|
Ok(false) => {
|
||||||
println!(" page is no longer a game");
|
println!(" page is no longer a game");
|
||||||
self.0.remove(title);
|
self.0.remove(title);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(" unable to check if still a game: {e}");
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut latest = match WikiCacheEntry::fetch_from_page(new_title.clone()).await {
|
let mut latest = match WikiCacheEntry::fetch_from_page(new_title.clone()).await {
|
||||||
|
@ -365,7 +374,7 @@ impl WikiCache {
|
||||||
i += 1;
|
i += 1;
|
||||||
if i % SAVE_INTERVAL == 0 {
|
if i % SAVE_INTERVAL == 0 {
|
||||||
self.save();
|
self.save();
|
||||||
println!("\n:: saved\n");
|
println!("\n:: saved ({i})\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -384,6 +393,8 @@ pub struct WikiCacheEntry {
|
||||||
pub gog_side: BTreeSet<u64>,
|
pub gog_side: BTreeSet<u64>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub lutris: Option<String>,
|
pub lutris: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
||||||
|
pub malformed: bool,
|
||||||
pub page_id: u64,
|
pub page_id: u64,
|
||||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
pub renamed_from: Vec<String>,
|
pub renamed_from: Vec<String>,
|
||||||
|
@ -434,7 +445,10 @@ impl WikiCacheEntry {
|
||||||
.as_str()
|
.as_str()
|
||||||
.ok_or(Error::WikiData("parse.wikitext"))?;
|
.ok_or(Error::WikiData("parse.wikitext"))?;
|
||||||
|
|
||||||
let wikitext = wikitext_parser::parse_wikitext(raw_wikitext, article, |e| println!(" Error: {}", e));
|
let wikitext = wikitext_parser::parse_wikitext(raw_wikitext, article, |e| {
|
||||||
|
out.malformed = true;
|
||||||
|
println!(" Error: {}", e);
|
||||||
|
});
|
||||||
|
|
||||||
for template in wikitext.list_double_brace_expressions() {
|
for template in wikitext.list_double_brace_expressions() {
|
||||||
if let TextPiece::DoubleBraceExpression { tag, attributes } = &template {
|
if let TextPiece::DoubleBraceExpression { tag, attributes } = &template {
|
||||||
|
|
Reference in a new issue