// -*- coding: utf-8 -*- // // Simple CMS // // Copyright (C) 2011-2024 Michael Büsch // // Licensed under the Apache License version 2.0 // or the MIT license, at your option. // SPDX-License-Identifier: Apache-2.0 OR MIT use crate::{ comm::{CmsComm, CommGetPage, CommPage, CommSubPages}, config::CmsConfig, }; use anyhow as ah; use async_recursion::async_recursion; use cms_ident::{CheckedIdent, UrlComp}; use std::{fmt::Write as _, sync::Arc, write as wr, writeln as ln}; const MAX_DEPTH: usize = 64; const DEFAULT_ELEMS_ALLOC: usize = 256; const DEFAULT_HTML_ALLOC: usize = 1024 * 16; fn xml_escape(mut s: String) -> String { if !s.is_empty() { if s.contains('&') { s = s.replace('&', "&"); } if s.contains('\'') { s = s.replace('\'', "'"); } if s.contains('"') { s = s.replace('"', """); } if s.contains('>') { s = s.replace('>', ">"); } if s.contains('<') { s = s.replace('<', "<"); } } s } pub struct SiteMapContext<'a> { pub comm: &'a mut CmsComm, pub config: Arc, pub root: &'a CheckedIdent, pub protocol: &'a str, } struct SiteMapElem { loc: String, lastmod: String, changefreq: String, priority: String, } #[async_recursion] async fn build_elems( ctx: &mut SiteMapContext<'_>, elems: &mut Vec, ident: &CheckedIdent, depth: usize, ) -> ah::Result<()> { if depth >= MAX_DEPTH { return Ok(()); } let Ok(CommPage { nav_stop, stamp, .. }) = ctx .comm .get_db_page(CommGetPage { path: ident.clone(), get_nav_stop: true, get_stamp: true, ..Default::default() }) .await else { return Ok(()); }; if nav_stop.unwrap_or(true) { return Ok(()); } let stamp = stamp.unwrap_or_default().format("%Y-%m-%dT%H:%M:%SZ"); let Ok(CommSubPages { mut names, .. }) = ctx.comm.get_db_sub_pages(ident).await else { return Ok(()); }; let loc = ident.url(UrlComp { protocol: Some(ctx.protocol), domain: Some(ctx.config.domain()), base: Some(ctx.config.url_base()), }); let lastmod; let changefreq; let priority; if depth == 1 { // Main groups lastmod = String::new(); changefreq = "monthly".to_string(); priority = "0.3".to_string(); } else { // Pages, main page and sub groups lastmod = stamp.to_string(); changefreq = String::new(); priority = "0.7".to_string(); } elems.push(SiteMapElem { loc, lastmod, changefreq, priority, }); names.sort_unstable(); for name in &names { let sub_ident = ident.clone_append(name).into_checked()?; build_elems(ctx, elems, &sub_ident, depth + 1).await?; } Ok(()) } async fn build_user_elems( ctx: &mut SiteMapContext<'_>, elems: &mut Vec, ) -> ah::Result<()> { let user_site_map = ctx.comm.get_db_string("site-map").await?; for line in user_site_map.lines() { let line = line.trim(); if line.is_empty() || line.starts_with('#') { continue; } let mut line = line.split_whitespace(); let Some(loc) = line.next() else { continue; }; let loc = format!("{}://{}/{}", ctx.protocol, ctx.config.domain(), loc); let priority = line.next().unwrap_or("0.7"); let changefreq = line.next().unwrap_or("always"); elems.push(SiteMapElem { loc, lastmod: String::new(), changefreq: changefreq.to_string(), priority: priority.to_string(), }); } Ok(()) } /// Site map generator. /// Specification: https://www.sitemaps.org/protocol.html pub struct SiteMap { elems: Vec, } impl SiteMap { pub async fn build(mut ctx: SiteMapContext<'_>) -> ah::Result { let mut elems = Vec::with_capacity(DEFAULT_ELEMS_ALLOC); let root = ctx.root.clone(); build_elems(&mut ctx, &mut elems, &root, 0).await?; build_user_elems(&mut ctx, &mut elems).await?; Ok(Self { elems }) } #[rustfmt::skip] pub fn get_xml(&self) -> ah::Result { let mut b = String::with_capacity(DEFAULT_HTML_ALLOC); ln!(b, r#""#)?; wr!(b, r#""#)?; for elem in &self.elems { let loc = xml_escape(elem.loc.clone()); let lastmod = xml_escape(elem.lastmod.clone()); let changefreq = xml_escape(elem.changefreq.clone()); let priority = xml_escape(elem.priority.clone()); ln!(b, r#""#)?; if !loc.is_empty() { ln!(b, r#"{loc}"#)?; } if !lastmod.is_empty() { ln!(b, r#"{lastmod}"#)?; } if !changefreq.is_empty() { ln!(b, r#"{changefreq}"#)?; } if !priority.is_empty() { ln!(b, r#"{priority}"#)?; } ln!(b, r#""#)?; } wr!(b, r#""#)?; Ok(b) } } // vim: ts=4 sw=4 expandtab