Enhances Mastodon link handling and previews
Adds functionality to identify Mastodon links in Hugo markdown files, extract relevant information like instance and ID, and generate previews. This allows for embedding Mastodon toots directly into the generated Hugo site and provides a preview text extracted from the toot content improving the user experience and content integration. It also adds a fallback in case a toot disappears to avoid broken content. Also adds `bleach` as a dependency to sanitize HTML content.
This commit is contained in:
@@ -3,10 +3,75 @@ import re
|
||||
import requests
|
||||
import frontmatter
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# --- Funzioni di Supporto ---
|
||||
|
||||
HUGO_CONTENT_PATH = "content"
|
||||
MAX_LENGHT = 800
|
||||
|
||||
|
||||
def get_instance_and_id(url):
|
||||
"""
|
||||
Estrae l'istanza (hostname) e un potenziale ID da un URL,
|
||||
basandosi su pattern comuni di Mastodon.
|
||||
|
||||
Args:
|
||||
url (str): La stringa URL da analizzare.
|
||||
|
||||
Returns:
|
||||
tuple: Una tupla contenente (istanza, id).
|
||||
Restituisce (None, None) se l'URL non è ben formato
|
||||
o se non è possibile estrarre un'istanza.
|
||||
"""
|
||||
parsed_url = urlparse(url)
|
||||
|
||||
instance = parsed_url.netloc if parsed_url.netloc else None
|
||||
|
||||
if not instance:
|
||||
return None, None
|
||||
|
||||
path_segments = parsed_url.path.strip("/").split("/")
|
||||
|
||||
# Logica per trovare l'ID basandosi sui pattern di Mastodon
|
||||
if len(path_segments) >= 2 and path_segments[0].startswith("@"):
|
||||
if len(path_segments) == 2:
|
||||
if path_segments[1].isdigit():
|
||||
return instance, path_segments[1]
|
||||
else:
|
||||
return instance, path_segments[0]
|
||||
elif (
|
||||
len(path_segments) > 2
|
||||
and path_segments[1] == "statuses"
|
||||
and path_segments[2].isdigit()
|
||||
):
|
||||
return instance, path_segments[2]
|
||||
elif len(path_segments) > 2 and path_segments[2].isdigit():
|
||||
return instance, path_segments[2]
|
||||
|
||||
elif (
|
||||
len(path_segments) >= 3
|
||||
and path_segments[0] == "web"
|
||||
and path_segments[1] == "statuses"
|
||||
and path_segments[2].isdigit()
|
||||
):
|
||||
return instance, path_segments[2]
|
||||
|
||||
elif (
|
||||
len(path_segments) >= 4
|
||||
and path_segments[0] == "users"
|
||||
and path_segments[2] == "statuses"
|
||||
and path_segments[3].isdigit()
|
||||
):
|
||||
return instance, path_segments[3]
|
||||
|
||||
if path_segments:
|
||||
if path_segments[-1].isdigit():
|
||||
return instance, path_segments[-1]
|
||||
elif path_segments[0].startswith("@") and len(path_segments) == 1:
|
||||
return instance, path_segments[0]
|
||||
|
||||
return instance, None # Nessun ID specifico trovato per URL di base o generici
|
||||
|
||||
|
||||
def get_page_content(url):
|
||||
@@ -23,7 +88,7 @@ def get_page_content(url):
|
||||
return None
|
||||
|
||||
|
||||
def extract_preview_from_html(html_content, max_length=200):
|
||||
def extract_preview_from_html(html_content, max_length=MAX_LENGHT):
|
||||
"""
|
||||
Estrae una porzione di testo pulita dal contenuto HTML per una preview.
|
||||
Prioritizza l'estrazione da:
|
||||
@@ -146,6 +211,10 @@ def process_hugo_markdown_files(root_dir):
|
||||
if is_mastodon_link(reply_url):
|
||||
if post.metadata.get("mastodon_reply") is not True:
|
||||
post.metadata["mastodon_reply"] = True
|
||||
(
|
||||
post.metadata["mastodon_instance"],
|
||||
post.metadata["mastodon_id"],
|
||||
) = get_instance_and_id(reply_url)
|
||||
modified = True
|
||||
print(
|
||||
f" Flag 'mastodon_reply: true' aggiunto/aggiornato per {reply_url}"
|
||||
@@ -153,6 +222,8 @@ def process_hugo_markdown_files(root_dir):
|
||||
elif post.metadata.get("mastodon_reply") is True:
|
||||
# Se non è più un link Mastodon ma il flag era presente, rimuovilo
|
||||
del post.metadata["mastodon_reply"]
|
||||
del post.metadata["mastodon_instance"]
|
||||
del post.metadata["mastodon_id"]
|
||||
modified = True
|
||||
print(
|
||||
f" Flag 'mastodon_reply' rimosso per {reply_url} (non più Mastodon)."
|
||||
|
||||
@@ -13,3 +13,15 @@
|
||||
{{with .Params.Rspv}}
|
||||
<p><a href="{{.}}" class="u-rsvp"><i class="fa-regular fa-calendar-heart"></i> RSPV of <span class="url-title">{{.}}</span></a></p>
|
||||
{{end}}
|
||||
|
||||
{{with .Params.Preview_text_from_reply}}
|
||||
<blockquote class="u-in-reply-to in-reply-to">
|
||||
<p>{{.}}</p>
|
||||
</blockquote>
|
||||
{{end}}
|
||||
|
||||
{{ if .Params.mastodon_reply}}
|
||||
|
||||
{{ partial "toot" . }}
|
||||
|
||||
{{end}}
|
||||
|
||||
63
layouts/partials/toot.html
Normal file
63
layouts/partials/toot.html
Normal file
@@ -0,0 +1,63 @@
|
||||
|
||||
{{ $masIns := .Params.mastodon_instance }}
|
||||
{{ $id := .Params.mastodon_id }}
|
||||
{{ $tootLink := "" }}
|
||||
{{ $handleInst := "" }}
|
||||
{{ $urlToGet := print "https://" $masIns "/api/v1/statuses/" $id }}
|
||||
|
||||
{{ with resources.GetRemote $urlToGet }}
|
||||
{{ $json := .Content | unmarshal }}
|
||||
{{ if isset $json "account" }}
|
||||
{{ $tootLink = print "https://" $masIns "@" $json.account.acct "/status/" $id }}
|
||||
{{ $handleInst = print "@" $json.account.acct "@" $masIns }}
|
||||
{{ end }}
|
||||
|
||||
{{ if isset $json "content" }}
|
||||
<div class="toot">
|
||||
<div class="toot-header">
|
||||
<a class="toot-profile" href="https://{{ $masIns }}/@{{ $json.account.acct }}" rel="noopener">
|
||||
<img src="{{ $json.account.avatar }}"
|
||||
alt="Avatar for {{ $handleInst }}"
|
||||
loading="lazy">
|
||||
</a>
|
||||
<div class="toot-author">
|
||||
<a class="toot-author-name"
|
||||
href="https://{{ $masIns }}/@{{ $json.account.acct }}"
|
||||
rel="noopener">{{ $json.account.display_name }}</a>
|
||||
<a class="toot-author-handle"
|
||||
href="https://{{ $masIns }}/@{{ $json.account.acct }}"
|
||||
rel="noopener">{{ $handleInst }}</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="toot-content">{{ $json.content | safeHTML }}</div>
|
||||
|
||||
{{ with $json.media_attachments }}
|
||||
{{ $count := len . }}
|
||||
<div class="toot-media-grid" data-count="{{ $count }}">
|
||||
{{ range . }}
|
||||
{{ if eq .type "image" }}
|
||||
<div class="toot-media-item">
|
||||
<img src="{{ .url }}"
|
||||
alt=""
|
||||
loading="lazy">
|
||||
</div>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
<div class="toot-footer">
|
||||
<a href="{{ $tootLink }}"
|
||||
class="toot-date"
|
||||
rel="noopener">{{ dateFormat "3:04 PM · Jan 2, 2006" $json.created_at }}</a>
|
||||
</div>
|
||||
</div>
|
||||
{{ end }}
|
||||
{{ else }}
|
||||
<div class="toot">
|
||||
<p style="text-align: center; color: var(--secondary); margin: 0;">
|
||||
[Source not online at time of site build.]
|
||||
</p>
|
||||
</div>
|
||||
{{ end }}
|
||||
32
poetry.lock
generated
32
poetry.lock
generated
@@ -23,6 +23,24 @@ charset-normalizer = ["charset-normalizer"]
|
||||
html5lib = ["html5lib"]
|
||||
lxml = ["lxml"]
|
||||
|
||||
[[package]]
|
||||
name = "bleach"
|
||||
version = "6.2.0"
|
||||
description = "An easy safelist-based HTML-sanitizing tool."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e"},
|
||||
{file = "bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
webencodings = "*"
|
||||
|
||||
[package.extras]
|
||||
css = ["tinycss2 (>=1.1.0,<1.5)"]
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2025.7.14"
|
||||
@@ -860,6 +878,18 @@ platformdirs = ">=3.9.1,<5"
|
||||
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
|
||||
test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""]
|
||||
|
||||
[[package]]
|
||||
name = "webencodings"
|
||||
version = "0.5.1"
|
||||
description = "Character encoding aliases for legacy web content"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
|
||||
{file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "weeknotebot"
|
||||
version = "1.7.0"
|
||||
@@ -882,4 +912,4 @@ rich = ">=13.9.4,<14.0.0"
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "c00284e308116f498c84c3a39944727cee6c2d8f4f14439efcd8d781329329da"
|
||||
content-hash = "99647e0cf2079c1607e915fffc58c2acdd357b0ad6b0c167d8ace8a943625524"
|
||||
|
||||
@@ -18,6 +18,7 @@ pyyaml = "*"
|
||||
lxml = "*"
|
||||
typer = "*"
|
||||
python-frontmatter = "^1.1.0"
|
||||
bleach = "^6.2.0"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
||||
Reference in New Issue
Block a user