Enhances Mastodon link handling and previews

Adds functionality to identify Mastodon links in Hugo markdown files, extract relevant information like instance and ID, and generate previews.

This allows for embedding Mastodon toots directly into the generated Hugo site and provides a preview text extracted from the toot content improving the user experience and content integration. It also adds a fallback in case a toot disappears to avoid broken content.

Also adds `bleach` as a dependency to sanitize HTML content.
This commit is contained in:
fundor333
2025-07-27 19:35:30 +02:00
parent f0d39ddb48
commit dbda00405b
5 changed files with 179 additions and 2 deletions

View File

@@ -3,10 +3,75 @@ import re
import requests
import frontmatter
from bs4 import BeautifulSoup
from urllib.parse import urlparse
# --- Funzioni di Supporto ---
HUGO_CONTENT_PATH = "content"
MAX_LENGHT = 800
def get_instance_and_id(url):
"""
Estrae l'istanza (hostname) e un potenziale ID da un URL,
basandosi su pattern comuni di Mastodon.
Args:
url (str): La stringa URL da analizzare.
Returns:
tuple: Una tupla contenente (istanza, id).
Restituisce (None, None) se l'URL non è ben formato
o se non è possibile estrarre un'istanza.
"""
parsed_url = urlparse(url)
instance = parsed_url.netloc if parsed_url.netloc else None
if not instance:
return None, None
path_segments = parsed_url.path.strip("/").split("/")
# Logica per trovare l'ID basandosi sui pattern di Mastodon
if len(path_segments) >= 2 and path_segments[0].startswith("@"):
if len(path_segments) == 2:
if path_segments[1].isdigit():
return instance, path_segments[1]
else:
return instance, path_segments[0]
elif (
len(path_segments) > 2
and path_segments[1] == "statuses"
and path_segments[2].isdigit()
):
return instance, path_segments[2]
elif len(path_segments) > 2 and path_segments[2].isdigit():
return instance, path_segments[2]
elif (
len(path_segments) >= 3
and path_segments[0] == "web"
and path_segments[1] == "statuses"
and path_segments[2].isdigit()
):
return instance, path_segments[2]
elif (
len(path_segments) >= 4
and path_segments[0] == "users"
and path_segments[2] == "statuses"
and path_segments[3].isdigit()
):
return instance, path_segments[3]
if path_segments:
if path_segments[-1].isdigit():
return instance, path_segments[-1]
elif path_segments[0].startswith("@") and len(path_segments) == 1:
return instance, path_segments[0]
return instance, None # Nessun ID specifico trovato per URL di base o generici
def get_page_content(url):
@@ -23,7 +88,7 @@ def get_page_content(url):
return None
def extract_preview_from_html(html_content, max_length=200):
def extract_preview_from_html(html_content, max_length=MAX_LENGHT):
"""
Estrae una porzione di testo pulita dal contenuto HTML per una preview.
Prioritizza l'estrazione da:
@@ -146,6 +211,10 @@ def process_hugo_markdown_files(root_dir):
if is_mastodon_link(reply_url):
if post.metadata.get("mastodon_reply") is not True:
post.metadata["mastodon_reply"] = True
(
post.metadata["mastodon_instance"],
post.metadata["mastodon_id"],
) = get_instance_and_id(reply_url)
modified = True
print(
f" Flag 'mastodon_reply: true' aggiunto/aggiornato per {reply_url}"
@@ -153,6 +222,8 @@ def process_hugo_markdown_files(root_dir):
elif post.metadata.get("mastodon_reply") is True:
# Se non è più un link Mastodon ma il flag era presente, rimuovilo
del post.metadata["mastodon_reply"]
del post.metadata["mastodon_instance"]
del post.metadata["mastodon_id"]
modified = True
print(
f" Flag 'mastodon_reply' rimosso per {reply_url} (non più Mastodon)."

View File

@@ -13,3 +13,15 @@
{{with .Params.Rspv}}
<p><a href="{{.}}" class="u-rsvp"><i class="fa-regular fa-calendar-heart"></i> RSPV of <span class="url-title">{{.}}</span></a></p>
{{end}}
{{with .Params.Preview_text_from_reply}}
<blockquote class="u-in-reply-to in-reply-to">
<p>{{.}}</p>
</blockquote>
{{end}}
{{ if .Params.mastodon_reply}}
{{ partial "toot" . }}
{{end}}

View File

@@ -0,0 +1,63 @@
{{ $masIns := .Params.mastodon_instance }}
{{ $id := .Params.mastodon_id }}
{{ $tootLink := "" }}
{{ $handleInst := "" }}
{{ $urlToGet := print "https://" $masIns "/api/v1/statuses/" $id }}
{{ with resources.GetRemote $urlToGet }}
{{ $json := .Content | unmarshal }}
{{ if isset $json "account" }}
{{ $tootLink = print "https://" $masIns "@" $json.account.acct "/status/" $id }}
{{ $handleInst = print "@" $json.account.acct "@" $masIns }}
{{ end }}
{{ if isset $json "content" }}
<div class="toot">
<div class="toot-header">
<a class="toot-profile" href="https://{{ $masIns }}/@{{ $json.account.acct }}" rel="noopener">
<img src="{{ $json.account.avatar }}"
alt="Avatar for {{ $handleInst }}"
loading="lazy">
</a>
<div class="toot-author">
<a class="toot-author-name"
href="https://{{ $masIns }}/@{{ $json.account.acct }}"
rel="noopener">{{ $json.account.display_name }}</a>
<a class="toot-author-handle"
href="https://{{ $masIns }}/@{{ $json.account.acct }}"
rel="noopener">{{ $handleInst }}</a>
</div>
</div>
<div class="toot-content">{{ $json.content | safeHTML }}</div>
{{ with $json.media_attachments }}
{{ $count := len . }}
<div class="toot-media-grid" data-count="{{ $count }}">
{{ range . }}
{{ if eq .type "image" }}
<div class="toot-media-item">
<img src="{{ .url }}"
alt=""
loading="lazy">
</div>
{{ end }}
{{ end }}
</div>
{{ end }}
<div class="toot-footer">
<a href="{{ $tootLink }}"
class="toot-date"
rel="noopener">{{ dateFormat "3:04 PM · Jan 2, 2006" $json.created_at }}</a>
</div>
</div>
{{ end }}
{{ else }}
<div class="toot">
<p style="text-align: center; color: var(--secondary); margin: 0;">
[Source not online at time of site build.]
</p>
</div>
{{ end }}

32
poetry.lock generated
View File

@@ -23,6 +23,24 @@ charset-normalizer = ["charset-normalizer"]
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]]
name = "bleach"
version = "6.2.0"
description = "An easy safelist-based HTML-sanitizing tool."
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e"},
{file = "bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f"},
]
[package.dependencies]
webencodings = "*"
[package.extras]
css = ["tinycss2 (>=1.1.0,<1.5)"]
[[package]]
name = "certifi"
version = "2025.7.14"
@@ -860,6 +878,18 @@ platformdirs = ">=3.9.1,<5"
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""]
[[package]]
name = "webencodings"
version = "0.5.1"
description = "Character encoding aliases for legacy web content"
optional = false
python-versions = "*"
groups = ["main"]
files = [
{file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
{file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
]
[[package]]
name = "weeknotebot"
version = "1.7.0"
@@ -882,4 +912,4 @@ rich = ">=13.9.4,<14.0.0"
[metadata]
lock-version = "2.1"
python-versions = "^3.12"
content-hash = "c00284e308116f498c84c3a39944727cee6c2d8f4f14439efcd8d781329329da"
content-hash = "99647e0cf2079c1607e915fffc58c2acdd357b0ad6b0c167d8ace8a943625524"

View File

@@ -18,6 +18,7 @@ pyyaml = "*"
lxml = "*"
typer = "*"
python-frontmatter = "^1.1.0"
bleach = "^6.2.0"
[tool.poetry.group.dev.dependencies]