Enhances Mastodon link handling and previews

Adds functionality to identify Mastodon links in Hugo markdown files, extract relevant information like instance and ID, and generate previews. This allows for embedding Mastodon toots directly into the generated Hugo site and provides a preview text extracted from the toot content improving the user experience and content integration. It also adds a fallback in case a toot disappears to avoid broken content. Also adds `bleach` as a dependency to sanitize HTML content.
2025-07-27 19:35:30 +02:00
parent f0d39ddb48
commit dbda00405b
5 changed files with 179 additions and 2 deletions
--- a/action_script/replay-getter.py
+++ b/action_script/replay-getter.py
@@ -3,10 +3,75 @@ import re
 import requests
 import frontmatter
 from bs4 import BeautifulSoup
+from urllib.parse import urlparse

 # --- Funzioni di Supporto ---

 HUGO_CONTENT_PATH = "content"
+MAX_LENGHT = 800
+
+
+def get_instance_and_id(url):
+    """
+    Estrae l'istanza (hostname) e un potenziale ID da un URL,
+    basandosi su pattern comuni di Mastodon.
+
+    Args:
+        url (str): La stringa URL da analizzare.
+
+    Returns:
+        tuple: Una tupla contenente (istanza, id).
+               Restituisce (None, None) se l'URL non è ben formato
+               o se non è possibile estrarre un'istanza.
+    """
+    parsed_url = urlparse(url)
+
+    instance = parsed_url.netloc if parsed_url.netloc else None
+
+    if not instance:
+        return None, None
+
+    path_segments = parsed_url.path.strip("/").split("/")
+
+    # Logica per trovare l'ID basandosi sui pattern di Mastodon
+    if len(path_segments) >= 2 and path_segments[0].startswith("@"):
+        if len(path_segments) == 2:
+            if path_segments[1].isdigit():
+                return instance, path_segments[1]
+            else:
+                return instance, path_segments[0]
+        elif (
+            len(path_segments) > 2
+            and path_segments[1] == "statuses"
+            and path_segments[2].isdigit()
+        ):
+            return instance, path_segments[2]
+        elif len(path_segments) > 2 and path_segments[2].isdigit():
+            return instance, path_segments[2]
+
+    elif (
+        len(path_segments) >= 3
+        and path_segments[0] == "web"
+        and path_segments[1] == "statuses"
+        and path_segments[2].isdigit()
+    ):
+        return instance, path_segments[2]
+
+    elif (
+        len(path_segments) >= 4
+        and path_segments[0] == "users"
+        and path_segments[2] == "statuses"
+        and path_segments[3].isdigit()
+    ):
+        return instance, path_segments[3]
+
+    if path_segments:
+        if path_segments[-1].isdigit():
+            return instance, path_segments[-1]
+        elif path_segments[0].startswith("@") and len(path_segments) == 1:
+            return instance, path_segments[0]
+
+    return instance, None  # Nessun ID specifico trovato per URL di base o generici


 def get_page_content(url):
@@ -23,7 +88,7 @@ def get_page_content(url):
        return None


-def extract_preview_from_html(html_content, max_length=200):
+def extract_preview_from_html(html_content, max_length=MAX_LENGHT):
    """
    Estrae una porzione di testo pulita dal contenuto HTML per una preview.
    Prioritizza l'estrazione da:
@@ -146,6 +211,10 @@ def process_hugo_markdown_files(root_dir):
                        if is_mastodon_link(reply_url):
                            if post.metadata.get("mastodon_reply") is not True:
                                post.metadata["mastodon_reply"] = True
+                                (
+                                    post.metadata["mastodon_instance"],
+                                    post.metadata["mastodon_id"],
+                                ) = get_instance_and_id(reply_url)
                                modified = True
                                print(
                                    f"  Flag 'mastodon_reply: true' aggiunto/aggiornato per {reply_url}"
@@ -153,6 +222,8 @@ def process_hugo_markdown_files(root_dir):
                        elif post.metadata.get("mastodon_reply") is True:
                            # Se non è più un link Mastodon ma il flag era presente, rimuovilo
                            del post.metadata["mastodon_reply"]
+                            del post.metadata["mastodon_instance"]
+                            del post.metadata["mastodon_id"]
                            modified = True
                            print(
                                f"  Flag 'mastodon_reply' rimosso per {reply_url} (non più Mastodon)."
--- a/layouts/partials/micro.html
+++ b/layouts/partials/micro.html
@@ -13,3 +13,15 @@
 {{with .Params.Rspv}}
 <p><a href="{{.}}" class="u-rsvp"><i class="fa-regular fa-calendar-heart"></i> RSPV of <span class="url-title">{{.}}</span></a></p>
 {{end}}
+
+{{with .Params.Preview_text_from_reply}}
+<blockquote class="u-in-reply-to in-reply-to">
+<p>{{.}}</p>
+</blockquote>
+{{end}}
+
+{{ if .Params.mastodon_reply}}
+
+{{ partial "toot"  . }}
+
+{{end}}
--- a/layouts/partials/toot.html
+++ b/layouts/partials/toot.html
@@ -0,0 +1,63 @@
+
+{{ $masIns := .Params.mastodon_instance }}
+{{ $id :=  .Params.mastodon_id }}
+{{ $tootLink := "" }}
+{{ $handleInst := "" }}
+{{ $urlToGet := print "https://" $masIns "/api/v1/statuses/" $id }}
+
+{{ with resources.GetRemote $urlToGet }}
+    {{ $json := .Content | unmarshal }}
+    {{ if isset $json "account" }}
+        {{ $tootLink = print "https://" $masIns "@" $json.account.acct "/status/" $id }}
+        {{ $handleInst = print "@" $json.account.acct "@" $masIns }}
+    {{ end }}
+
+    {{ if isset $json "content" }}
+        <div class="toot">
+            <div class="toot-header">
+                <a class="toot-profile" href="https://{{ $masIns }}/@{{ $json.account.acct }}" rel="noopener">
+                    <img src="{{ $json.account.avatar }}"
+                         alt="Avatar for {{ $handleInst }}"
+                         loading="lazy">
+                </a>
+                <div class="toot-author">
+                    <a class="toot-author-name"
+                       href="https://{{ $masIns }}/@{{ $json.account.acct }}"
+                       rel="noopener">{{ $json.account.display_name }}</a>
+                    <a class="toot-author-handle"
+                       href="https://{{ $masIns }}/@{{ $json.account.acct }}"
+                       rel="noopener">{{ $handleInst }}</a>
+                </div>
+            </div>
+
+            <div class="toot-content">{{ $json.content | safeHTML }}</div>
+
+            {{ with $json.media_attachments }}
+                {{ $count := len . }}
+                <div class="toot-media-grid" data-count="{{ $count }}">
+                    {{ range . }}
+                        {{ if eq .type "image" }}
+                            <div class="toot-media-item">
+                                <img src="{{ .url }}"
+                                     alt=""
+                                     loading="lazy">
+                            </div>
+                        {{ end }}
+                    {{ end }}
+                </div>
+            {{ end }}
+
+            <div class="toot-footer">
+                <a href="{{ $tootLink }}"
+                   class="toot-date"
+                   rel="noopener">{{ dateFormat "3:04 PM · Jan 2, 2006" $json.created_at }}</a>
+            </div>
+        </div>
+    {{ end }}
+{{ else }}
+    <div class="toot">
+        <p style="text-align: center; color: var(--secondary); margin: 0;">
+            [Source not online at time of site build.]
+        </p>
+    </div>
+{{ end }}
--- a/poetry.lock
+++ b/poetry.lock
@@ -23,6 +23,24 @@ charset-normalizer = ["charset-normalizer"]
 html5lib = ["html5lib"]
 lxml = ["lxml"]

+[[package]]
+name = "bleach"
+version = "6.2.0"
+description = "An easy safelist-based HTML-sanitizing tool."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e"},
+    {file = "bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f"},
+]
+
+[package.dependencies]
+webencodings = "*"
+
+[package.extras]
+css = ["tinycss2 (>=1.1.0,<1.5)"]
+
 [[package]]
 name = "certifi"
 version = "2025.7.14"
@@ -860,6 +878,18 @@ platformdirs = ">=3.9.1,<5"
 docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
 test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""]

+[[package]]
+name = "webencodings"
+version = "0.5.1"
+description = "Character encoding aliases for legacy web content"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
+    {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
+]
+
 [[package]]
 name = "weeknotebot"
 version = "1.7.0"
@@ -882,4 +912,4 @@ rich = ">=13.9.4,<14.0.0"
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "c00284e308116f498c84c3a39944727cee6c2d8f4f14439efcd8d781329329da"
+content-hash = "99647e0cf2079c1607e915fffc58c2acdd357b0ad6b0c167d8ace8a943625524"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ pyyaml = "*"
 lxml = "*"
 typer = "*"
 python-frontmatter = "^1.1.0"
+bleach = "^6.2.0"


 [tool.poetry.group.dev.dependencies]