Adds a script to automatically add syndication links to posts' frontmatter based on RSS feeds from Mastodon, Bluesky and Medium. It also updates the Makefile to include the new script, the `.gitignore` file to ignore the log file, and the syndication partial to correctly render syndication links. It uses the source URL inside the content to find the local file.
186 lines
5.4 KiB
Python
186 lines
5.4 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
import os
|
|
import re
|
|
import yaml
|
|
import csv
|
|
|
|
CARTELLA_POST = "content"
|
|
CSV_LOG = "log_feed.csv"
|
|
MASTODON_FEED = "https://mastodon.social/users/fundor333.rss"
|
|
BSKY_FEED = "https://bsky.app/profile/did:plc:u7piwonv4s27ysugjaa6im2q/rss" # facoltativo, se disponibile
|
|
MEDIUM_FEED = "https://medium.com/feed/@fundor333"
|
|
|
|
|
|
def process_feed_medium(feed_url, fonte):
|
|
aggiornamenti = []
|
|
resp = requests.get(feed_url)
|
|
soup = BeautifulSoup(resp.content, "xml")
|
|
|
|
for item in soup.find_all("item"):
|
|
link_medium = item.find("link").text.strip()
|
|
# Prefer content:encoded, fallback to description
|
|
encoded = item.find("content:encoded")
|
|
descrizione = item.find("description")
|
|
|
|
content_html = (
|
|
encoded.text if encoded else (descrizione.text if descrizione else "")
|
|
)
|
|
soup_descr = BeautifulSoup(content_html, "html.parser")
|
|
source_links = [
|
|
a["href"]
|
|
for a in soup_descr.find_all("a", href=True)
|
|
if "fundor333.com" in a["href"]
|
|
]
|
|
|
|
if not source_links:
|
|
continue
|
|
|
|
source_url = source_links[0]
|
|
post_path = trova_file_post_da_source(source_url)
|
|
if not post_path:
|
|
continue
|
|
|
|
nuovi = aggiungi_syndication_a_post(post_path, [link_medium])
|
|
if nuovi:
|
|
print(f"[✓] Aggiornato {post_path} da {fonte}")
|
|
aggiornamenti.append(
|
|
{
|
|
"file": post_path,
|
|
"source": source_url,
|
|
"syndication": " | ".join(nuovi),
|
|
"feed": fonte,
|
|
}
|
|
)
|
|
|
|
return aggiornamenti
|
|
|
|
|
|
def trova_file_post_da_source(source_url):
|
|
match = re.search(r"https?://[^/]+/(.+?)/?$", source_url)
|
|
if not match:
|
|
return None
|
|
slug_path = match.group(1).rstrip("/")
|
|
percorso_cartella = os.path.join(CARTELLA_POST, slug_path)
|
|
|
|
file_index = os.path.join(percorso_cartella, "index.md")
|
|
if os.path.exists(file_index):
|
|
return file_index
|
|
|
|
file_slug = os.path.join(CARTELLA_POST, slug_path + ".md")
|
|
if os.path.exists(file_slug):
|
|
return file_slug
|
|
|
|
return None
|
|
|
|
|
|
def normalizza_url(url):
|
|
return url.rstrip("/")
|
|
|
|
|
|
def aggiungi_syndication_a_post(percorso_file, nuovi_link):
|
|
with open(percorso_file, encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
if content.startswith("+++"):
|
|
raise NotImplementedError("Supporto TOML non ancora gestito.")
|
|
elif content.startswith("---"):
|
|
parts = content.split("---")
|
|
if len(parts) < 3:
|
|
print(f"Frontmatter non valido in {percorso_file}")
|
|
return []
|
|
|
|
frontmatter = yaml.safe_load(parts[1])
|
|
esistenti = frontmatter.get("syndication", [])
|
|
|
|
esistenti_norm = set(map(normalizza_url, esistenti))
|
|
nuovi_norm = set(map(normalizza_url, nuovi_link))
|
|
|
|
da_aggiungere = list(nuovi_norm - esistenti_norm)
|
|
|
|
if da_aggiungere:
|
|
frontmatter["syndication"] = sorted(esistenti_norm.union(nuovi_norm))
|
|
nuovo_frontmatter = yaml.dump(
|
|
frontmatter, sort_keys=False, allow_unicode=True
|
|
)
|
|
nuovo_content = f"---\n{nuovo_frontmatter}---{parts[2]}"
|
|
with open(percorso_file, "w", encoding="utf-8") as f:
|
|
f.write(nuovo_content)
|
|
return da_aggiungere
|
|
else:
|
|
return []
|
|
else:
|
|
print(f"Formato frontmatter sconosciuto: {percorso_file}")
|
|
return []
|
|
|
|
|
|
def process_feed(feed_url, fonte):
|
|
aggiornamenti = []
|
|
|
|
resp = requests.get(feed_url)
|
|
soup = BeautifulSoup(resp.content, "xml")
|
|
|
|
for item in soup.find_all("item"):
|
|
guid = item.find("guid").text.strip()
|
|
content_html = item.find("description").text
|
|
|
|
# Cerca link canonico (source) nel contenuto
|
|
soup_descr = BeautifulSoup(content_html, "html.parser")
|
|
source_links = [
|
|
a["href"]
|
|
for a in soup_descr.find_all("a", href=True)
|
|
if "fundor333.com" in a["href"]
|
|
]
|
|
|
|
if not source_links:
|
|
continue
|
|
|
|
source_url = source_links[0]
|
|
post_path = trova_file_post_da_source(source_url)
|
|
if not post_path:
|
|
continue
|
|
|
|
nuovi = aggiungi_syndication_a_post(post_path, [guid])
|
|
if nuovi:
|
|
print(f"[✓] Aggiornato {post_path} da {fonte}")
|
|
aggiornamenti.append(
|
|
{
|
|
"file": post_path,
|
|
"source": source_url,
|
|
"syndication": " | ".join(nuovi),
|
|
"feed": fonte,
|
|
}
|
|
)
|
|
|
|
return aggiornamenti
|
|
|
|
|
|
def main():
|
|
log = []
|
|
|
|
print(">> Processando Mastodon")
|
|
log += process_feed(MASTODON_FEED, "mastodon")
|
|
|
|
if BSKY_FEED:
|
|
print(">> Processando Bluesky")
|
|
log += process_feed(BSKY_FEED, "bsky")
|
|
|
|
if MEDIUM_FEED:
|
|
print(">> Processando Medium")
|
|
log += process_feed_medium(MEDIUM_FEED, "medium")
|
|
|
|
if log:
|
|
with open(CSV_LOG, "w", encoding="utf-8", newline="") as f:
|
|
writer = csv.DictWriter(
|
|
f, fieldnames=["file", "source", "syndication", "feed"]
|
|
)
|
|
writer.writeheader()
|
|
writer.writerows(log)
|
|
print(f"\n[✓] Log salvato in {CSV_LOG}")
|
|
else:
|
|
print("\n[=] Nessuna modifica effettuata.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|