Adds syndication adder script

Adds a script to automatically add syndication links to posts' frontmatter based on RSS feeds from Mastodon, Bluesky and Medium.

It also updates the Makefile to include the new script, the `.gitignore` file to ignore the log file, and the syndication partial to correctly render syndication links.

It uses the source URL inside the content to find the local file.
This commit is contained in:
fundor333
2025-06-16 22:36:47 +02:00
committed by Fundor333
parent 90214c22e7
commit 8cd0bd3371
7 changed files with 193 additions and 15 deletions

2
.gitignore vendored
View File

@@ -185,3 +185,5 @@ OME/
build build
.env .env
log_syndication.csv

View File

@@ -0,0 +1,185 @@
import requests
from bs4 import BeautifulSoup
import os
import re
import yaml
import csv
CARTELLA_POST = "content"
CSV_LOG = "log_feed.csv"
MASTODON_FEED = "https://mastodon.social/users/fundor333.rss"
BSKY_FEED = "https://bsky.app/profile/did:plc:u7piwonv4s27ysugjaa6im2q/rss" # facoltativo, se disponibile
MEDIUM_FEED = "https://medium.com/feed/@fundor333"
def process_feed_medium(feed_url, fonte):
aggiornamenti = []
resp = requests.get(feed_url)
soup = BeautifulSoup(resp.content, "xml")
for item in soup.find_all("item"):
link_medium = item.find("link").text.strip()
# Prefer content:encoded, fallback to description
encoded = item.find("content:encoded")
descrizione = item.find("description")
content_html = (
encoded.text if encoded else (descrizione.text if descrizione else "")
)
soup_descr = BeautifulSoup(content_html, "html.parser")
source_links = [
a["href"]
for a in soup_descr.find_all("a", href=True)
if "fundor333.com" in a["href"]
]
if not source_links:
continue
source_url = source_links[0]
post_path = trova_file_post_da_source(source_url)
if not post_path:
continue
nuovi = aggiungi_syndication_a_post(post_path, [link_medium])
if nuovi:
print(f"[✓] Aggiornato {post_path} da {fonte}")
aggiornamenti.append(
{
"file": post_path,
"source": source_url,
"syndication": " | ".join(nuovi),
"feed": fonte,
}
)
return aggiornamenti
def trova_file_post_da_source(source_url):
match = re.search(r"https?://[^/]+/(.+?)/?$", source_url)
if not match:
return None
slug_path = match.group(1).rstrip("/")
percorso_cartella = os.path.join(CARTELLA_POST, slug_path)
file_index = os.path.join(percorso_cartella, "index.md")
if os.path.exists(file_index):
return file_index
file_slug = os.path.join(CARTELLA_POST, slug_path + ".md")
if os.path.exists(file_slug):
return file_slug
return None
def normalizza_url(url):
return url.rstrip("/")
def aggiungi_syndication_a_post(percorso_file, nuovi_link):
with open(percorso_file, encoding="utf-8") as f:
content = f.read()
if content.startswith("+++"):
raise NotImplementedError("Supporto TOML non ancora gestito.")
elif content.startswith("---"):
parts = content.split("---")
if len(parts) < 3:
print(f"Frontmatter non valido in {percorso_file}")
return []
frontmatter = yaml.safe_load(parts[1])
esistenti = frontmatter.get("syndication", [])
esistenti_norm = set(map(normalizza_url, esistenti))
nuovi_norm = set(map(normalizza_url, nuovi_link))
da_aggiungere = list(nuovi_norm - esistenti_norm)
if da_aggiungere:
frontmatter["syndication"] = sorted(esistenti_norm.union(nuovi_norm))
nuovo_frontmatter = yaml.dump(
frontmatter, sort_keys=False, allow_unicode=True
)
nuovo_content = f"---\n{nuovo_frontmatter}---{parts[2]}"
with open(percorso_file, "w", encoding="utf-8") as f:
f.write(nuovo_content)
return da_aggiungere
else:
return []
else:
print(f"Formato frontmatter sconosciuto: {percorso_file}")
return []
def process_feed(feed_url, fonte):
aggiornamenti = []
resp = requests.get(feed_url)
soup = BeautifulSoup(resp.content, "xml")
for item in soup.find_all("item"):
guid = item.find("guid").text.strip()
content_html = item.find("description").text
# Cerca link canonico (source) nel contenuto
soup_descr = BeautifulSoup(content_html, "html.parser")
source_links = [
a["href"]
for a in soup_descr.find_all("a", href=True)
if "fundor333.com" in a["href"]
]
if not source_links:
continue
source_url = source_links[0]
post_path = trova_file_post_da_source(source_url)
if not post_path:
continue
nuovi = aggiungi_syndication_a_post(post_path, [guid])
if nuovi:
print(f"[✓] Aggiornato {post_path} da {fonte}")
aggiornamenti.append(
{
"file": post_path,
"source": source_url,
"syndication": " | ".join(nuovi),
"feed": fonte,
}
)
return aggiornamenti
def main():
log = []
print(">> Processando Mastodon")
log += process_feed(MASTODON_FEED, "mastodon")
if BSKY_FEED:
print(">> Processando Bluesky")
log += process_feed(BSKY_FEED, "bsky")
if MEDIUM_FEED:
print(">> Processando Medium")
log += process_feed_medium(MEDIUM_FEED, "medium")
if log:
with open(CSV_LOG, "w", encoding="utf-8", newline="") as f:
writer = csv.DictWriter(
f, fieldnames=["file", "source", "syndication", "feed"]
)
writer.writeheader()
writer.writerows(log)
print(f"\n[✓] Log salvato in {CSV_LOG}")
else:
print("\n[=] Nessuna modifica effettuata.")
if __name__ == "__main__":
main()

View File

@@ -1,17 +1,11 @@
{{ $urlized := (replace (absURL .RelPermalink) "http://localhost:1313" "https://fundor333.com") | md5 }} {{with .Params.syndication}}
{{ if index .Site.Data.syndication $urlized }}
<hr> <hr>
<div class="syndication"> <div class="syndication">
<h3>Syndication</h3> <h3>Syndication</h3>
<i class="fas fa-link"></i> <i class="fas fa-link"></i>
This post was also syndicated to: This post was also syndicated to:
{{ $data:= index .Site.Data.syndication $urlized }} {{ range $index, $url := .}}
{{ $data:= $data.syndication }}
{{ range $index, $url := $data}}
{{- $parsed_url := urls.Parse $url -}} {{- $parsed_url := urls.Parse $url -}}
{{- if $index }}, {{- end }} {{- if $index }}, {{- end }}
<a target="_blank" class="u-syndication" href="{{ $url }}" rel="syndication">{{ $parsed_url.Host }}</a> <a target="_blank" class="u-syndication" href="{{ $url }}" rel="syndication">{{ $parsed_url.Host }}</a>
@@ -20,8 +14,3 @@
<br> <br>
{{ end }} {{ end }}
{{ if .Param "mp-syndicate-to" }}
{{ range .Param "mp-syndicate-to" }}
<a class="u-syndication" href="{{ . }}"></a>
{{ end }}
{{ end }}

View File

@@ -61,6 +61,7 @@ build: clean ## Build for dev
.PHONY: syndication .PHONY: syndication
syndication: ## Syndication script syndication: ## Syndication script
@poetry run python action_script/syndication-collector.py @poetry run python action_script/syndication-collector.py
@poetry run python action_script/syndication-adder.py
.PHONY: webmention .PHONY: webmention
webmention: ## Webmention script webmention: ## Webmention script

4
poetry.lock generated
View File

@@ -504,7 +504,7 @@ version = "6.0.2"
description = "YAML parser and emitter for Python" description = "YAML parser and emitter for Python"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["dev"] groups = ["main", "dev"]
files = [ files = [
{file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
{file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
@@ -698,4 +698,4 @@ rich = ">=13.9.4,<14.0.0"
[metadata] [metadata]
lock-version = "2.1" lock-version = "2.1"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "870daf95268e9e14e55695e20903ed392d6313f8b71277b2ba9bb5241e703586" content-hash = "7341842eda6fc17a7cc42cabe72c6bd8563429f468d5099977aa5ff0d7812463"

View File

@@ -14,6 +14,7 @@ python-dotenv = "*"
weeknotebot = "*" weeknotebot = "*"
pillow = "^11.2.1" pillow = "^11.2.1"
beautifulsoup4 = "^4.13.4" beautifulsoup4 = "^4.13.4"
pyyaml = "^6.0.2"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]