From 8cd0bd3371a4a179972209e9e27d152b4eda289e Mon Sep 17 00:00:00 2001 From: fundor333 Date: Mon, 16 Jun 2025 22:36:47 +0200 Subject: [PATCH] Adds syndication adder script Adds a script to automatically add syndication links to posts' frontmatter based on RSS feeds from Mastodon, Bluesky and Medium. It also updates the Makefile to include the new script, the `.gitignore` file to ignore the log file, and the syndication partial to correctly render syndication links. It uses the source URL inside the content to find the local file. --- .gitignore | 2 + action_script/syndication-adder.py | 185 ++++++++++++++++++ .../{temp.py => syndication-correction.py} | 0 layouts/partials/syndication.html | 15 +- makefile | 1 + poetry.lock | 4 +- pyproject.toml | 1 + 7 files changed, 193 insertions(+), 15 deletions(-) create mode 100644 action_script/syndication-adder.py rename action_script/{temp.py => syndication-correction.py} (100%) diff --git a/.gitignore b/.gitignore index 3cb3bbf3..1dfc15b4 100644 --- a/.gitignore +++ b/.gitignore @@ -185,3 +185,5 @@ OME/ build .env + +log_syndication.csv diff --git a/action_script/syndication-adder.py b/action_script/syndication-adder.py new file mode 100644 index 00000000..b46ffdb8 --- /dev/null +++ b/action_script/syndication-adder.py @@ -0,0 +1,185 @@ +import requests +from bs4 import BeautifulSoup +import os +import re +import yaml +import csv + +CARTELLA_POST = "content" +CSV_LOG = "log_feed.csv" +MASTODON_FEED = "https://mastodon.social/users/fundor333.rss" +BSKY_FEED = "https://bsky.app/profile/did:plc:u7piwonv4s27ysugjaa6im2q/rss" # facoltativo, se disponibile +MEDIUM_FEED = "https://medium.com/feed/@fundor333" + + +def process_feed_medium(feed_url, fonte): + aggiornamenti = [] + resp = requests.get(feed_url) + soup = BeautifulSoup(resp.content, "xml") + + for item in soup.find_all("item"): + link_medium = item.find("link").text.strip() + # Prefer content:encoded, fallback to description + encoded = item.find("content:encoded") + descrizione = item.find("description") + + content_html = ( + encoded.text if encoded else (descrizione.text if descrizione else "") + ) + soup_descr = BeautifulSoup(content_html, "html.parser") + source_links = [ + a["href"] + for a in soup_descr.find_all("a", href=True) + if "fundor333.com" in a["href"] + ] + + if not source_links: + continue + + source_url = source_links[0] + post_path = trova_file_post_da_source(source_url) + if not post_path: + continue + + nuovi = aggiungi_syndication_a_post(post_path, [link_medium]) + if nuovi: + print(f"[✓] Aggiornato {post_path} da {fonte}") + aggiornamenti.append( + { + "file": post_path, + "source": source_url, + "syndication": " | ".join(nuovi), + "feed": fonte, + } + ) + + return aggiornamenti + + +def trova_file_post_da_source(source_url): + match = re.search(r"https?://[^/]+/(.+?)/?$", source_url) + if not match: + return None + slug_path = match.group(1).rstrip("/") + percorso_cartella = os.path.join(CARTELLA_POST, slug_path) + + file_index = os.path.join(percorso_cartella, "index.md") + if os.path.exists(file_index): + return file_index + + file_slug = os.path.join(CARTELLA_POST, slug_path + ".md") + if os.path.exists(file_slug): + return file_slug + + return None + + +def normalizza_url(url): + return url.rstrip("/") + + +def aggiungi_syndication_a_post(percorso_file, nuovi_link): + with open(percorso_file, encoding="utf-8") as f: + content = f.read() + + if content.startswith("+++"): + raise NotImplementedError("Supporto TOML non ancora gestito.") + elif content.startswith("---"): + parts = content.split("---") + if len(parts) < 3: + print(f"Frontmatter non valido in {percorso_file}") + return [] + + frontmatter = yaml.safe_load(parts[1]) + esistenti = frontmatter.get("syndication", []) + + esistenti_norm = set(map(normalizza_url, esistenti)) + nuovi_norm = set(map(normalizza_url, nuovi_link)) + + da_aggiungere = list(nuovi_norm - esistenti_norm) + + if da_aggiungere: + frontmatter["syndication"] = sorted(esistenti_norm.union(nuovi_norm)) + nuovo_frontmatter = yaml.dump( + frontmatter, sort_keys=False, allow_unicode=True + ) + nuovo_content = f"---\n{nuovo_frontmatter}---{parts[2]}" + with open(percorso_file, "w", encoding="utf-8") as f: + f.write(nuovo_content) + return da_aggiungere + else: + return [] + else: + print(f"Formato frontmatter sconosciuto: {percorso_file}") + return [] + + +def process_feed(feed_url, fonte): + aggiornamenti = [] + + resp = requests.get(feed_url) + soup = BeautifulSoup(resp.content, "xml") + + for item in soup.find_all("item"): + guid = item.find("guid").text.strip() + content_html = item.find("description").text + + # Cerca link canonico (source) nel contenuto + soup_descr = BeautifulSoup(content_html, "html.parser") + source_links = [ + a["href"] + for a in soup_descr.find_all("a", href=True) + if "fundor333.com" in a["href"] + ] + + if not source_links: + continue + + source_url = source_links[0] + post_path = trova_file_post_da_source(source_url) + if not post_path: + continue + + nuovi = aggiungi_syndication_a_post(post_path, [guid]) + if nuovi: + print(f"[✓] Aggiornato {post_path} da {fonte}") + aggiornamenti.append( + { + "file": post_path, + "source": source_url, + "syndication": " | ".join(nuovi), + "feed": fonte, + } + ) + + return aggiornamenti + + +def main(): + log = [] + + print(">> Processando Mastodon") + log += process_feed(MASTODON_FEED, "mastodon") + + if BSKY_FEED: + print(">> Processando Bluesky") + log += process_feed(BSKY_FEED, "bsky") + + if MEDIUM_FEED: + print(">> Processando Medium") + log += process_feed_medium(MEDIUM_FEED, "medium") + + if log: + with open(CSV_LOG, "w", encoding="utf-8", newline="") as f: + writer = csv.DictWriter( + f, fieldnames=["file", "source", "syndication", "feed"] + ) + writer.writeheader() + writer.writerows(log) + print(f"\n[✓] Log salvato in {CSV_LOG}") + else: + print("\n[=] Nessuna modifica effettuata.") + + +if __name__ == "__main__": + main() diff --git a/action_script/temp.py b/action_script/syndication-correction.py similarity index 100% rename from action_script/temp.py rename to action_script/syndication-correction.py diff --git a/layouts/partials/syndication.html b/layouts/partials/syndication.html index e5197caa..50c64e25 100644 --- a/layouts/partials/syndication.html +++ b/layouts/partials/syndication.html @@ -1,17 +1,11 @@ -{{ $urlized := (replace (absURL .RelPermalink) "http://localhost:1313" "https://fundor333.com") | md5 }} - -{{ if index .Site.Data.syndication $urlized }} +{{with .Params.syndication}}

Syndication

This post was also syndicated to: - {{ $data:= index .Site.Data.syndication $urlized }} - - {{ $data:= $data.syndication }} - - {{ range $index, $url := $data}} + {{ range $index, $url := .}} {{- $parsed_url := urls.Parse $url -}} {{- if $index }}, {{- end }} {{ $parsed_url.Host }} @@ -20,8 +14,3 @@
{{ end }} -{{ if .Param "mp-syndicate-to" }} - {{ range .Param "mp-syndicate-to" }} - - {{ end }} -{{ end }} diff --git a/makefile b/makefile index 83a37874..f7373734 100644 --- a/makefile +++ b/makefile @@ -61,6 +61,7 @@ build: clean ## Build for dev .PHONY: syndication syndication: ## Syndication script @poetry run python action_script/syndication-collector.py + @poetry run python action_script/syndication-adder.py .PHONY: webmention webmention: ## Webmention script diff --git a/poetry.lock b/poetry.lock index 96aef48e..376ba3c6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -504,7 +504,7 @@ version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -698,4 +698,4 @@ rich = ">=13.9.4,<14.0.0" [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "870daf95268e9e14e55695e20903ed392d6313f8b71277b2ba9bb5241e703586" +content-hash = "7341842eda6fc17a7cc42cabe72c6bd8563429f468d5099977aa5ff0d7812463" diff --git a/pyproject.toml b/pyproject.toml index 1903fed8..13fa9b1b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ python-dotenv = "*" weeknotebot = "*" pillow = "^11.2.1" beautifulsoup4 = "^4.13.4" +pyyaml = "^6.0.2" [tool.poetry.group.dev.dependencies]