From dbda00405b6ed55511b549b51b9a0f65314941f5 Mon Sep 17 00:00:00 2001
From: fundor333 <github@fundor333.com>
Date: Sun, 27 Jul 2025 19:35:30 +0200
Subject: [PATCH] Enhances Mastodon link handling and previews

Adds functionality to identify Mastodon links in Hugo markdown files, extract relevant information like instance and ID, and generate previews.

This allows for embedding Mastodon toots directly into the generated Hugo site and provides a preview text extracted from the toot content improving the user experience and content integration. It also adds a fallback in case a toot disappears to avoid broken content.

Also adds `bleach` as a dependency to sanitize HTML content.
---
 action_script/replay-getter.py | 73 +++++++++++++++++++++++++++++++++-
 layouts/partials/micro.html    | 12 ++++++
 layouts/partials/toot.html     | 63 +++++++++++++++++++++++++++++
 poetry.lock                    | 32 ++++++++++++++-
 pyproject.toml                 |  1 +
 5 files changed, 179 insertions(+), 2 deletions(-)
 create mode 100644 layouts/partials/toot.html

diff --git a/action_script/replay-getter.py b/action_script/replay-getter.py
index aae7ba81..a07dc821 100644
--- a/action_script/replay-getter.py
+++ b/action_script/replay-getter.py
@@ -3,10 +3,75 @@ import re
 import requests
 import frontmatter
 from bs4 import BeautifulSoup
+from urllib.parse import urlparse
 
 # --- Funzioni di Supporto ---
 
 HUGO_CONTENT_PATH = "content"
+MAX_LENGHT = 800
+
+
+def get_instance_and_id(url):
+    """
+    Estrae l'istanza (hostname) e un potenziale ID da un URL,
+    basandosi su pattern comuni di Mastodon.
+
+    Args:
+        url (str): La stringa URL da analizzare.
+
+    Returns:
+        tuple: Una tupla contenente (istanza, id).
+               Restituisce (None, None) se l'URL non è ben formato
+               o se non è possibile estrarre un'istanza.
+    """
+    parsed_url = urlparse(url)
+
+    instance = parsed_url.netloc if parsed_url.netloc else None
+
+    if not instance:
+        return None, None
+
+    path_segments = parsed_url.path.strip("/").split("/")
+
+    # Logica per trovare l'ID basandosi sui pattern di Mastodon
+    if len(path_segments) >= 2 and path_segments[0].startswith("@"):
+        if len(path_segments) == 2:
+            if path_segments[1].isdigit():
+                return instance, path_segments[1]
+            else:
+                return instance, path_segments[0]
+        elif (
+            len(path_segments) > 2
+            and path_segments[1] == "statuses"
+            and path_segments[2].isdigit()
+        ):
+            return instance, path_segments[2]
+        elif len(path_segments) > 2 and path_segments[2].isdigit():
+            return instance, path_segments[2]
+
+    elif (
+        len(path_segments) >= 3
+        and path_segments[0] == "web"
+        and path_segments[1] == "statuses"
+        and path_segments[2].isdigit()
+    ):
+        return instance, path_segments[2]
+
+    elif (
+        len(path_segments) >= 4
+        and path_segments[0] == "users"
+        and path_segments[2] == "statuses"
+        and path_segments[3].isdigit()
+    ):
+        return instance, path_segments[3]
+
+    if path_segments:
+        if path_segments[-1].isdigit():
+            return instance, path_segments[-1]
+        elif path_segments[0].startswith("@") and len(path_segments) == 1:
+            return instance, path_segments[0]
+
+    return instance, None  # Nessun ID specifico trovato per URL di base o generici
 
 
 def get_page_content(url):
@@ -23,7 +88,7 @@ def get_page_content(url):
         return None
 
 
-def extract_preview_from_html(html_content, max_length=200):
+def extract_preview_from_html(html_content, max_length=MAX_LENGHT):
     """
     Estrae una porzione di testo pulita dal contenuto HTML per una preview.
     Prioritizza l'estrazione da:
@@ -146,6 +211,10 @@ def process_hugo_markdown_files(root_dir):
                         if is_mastodon_link(reply_url):
                             if post.metadata.get("mastodon_reply") is not True:
                                 post.metadata["mastodon_reply"] = True
+                                (
+                                    post.metadata["mastodon_instance"],
+                                    post.metadata["mastodon_id"],
+                                ) = get_instance_and_id(reply_url)
                                 modified = True
                                 print(
                                     f"  Flag 'mastodon_reply: true' aggiunto/aggiornato per {reply_url}"
@@ -153,6 +222,8 @@ def process_hugo_markdown_files(root_dir):
                         elif post.metadata.get("mastodon_reply") is True:
                             # Se non è più un link Mastodon ma il flag era presente, rimuovilo
                             del post.metadata["mastodon_reply"]
+                            del post.metadata["mastodon_instance"]
+                            del post.metadata["mastodon_id"]
                             modified = True
                             print(
                                 f"  Flag 'mastodon_reply' rimosso per {reply_url} (non più Mastodon)."
diff --git a/layouts/partials/micro.html b/layouts/partials/micro.html
index 52307498..69376ea9 100644
--- a/layouts/partials/micro.html
+++ b/layouts/partials/micro.html
@@ -13,3 +13,15 @@
 {{with .Params.Rspv}}
 <p><a href="{{.}}" class="u-rsvp"><i class="fa-regular fa-calendar-heart"></i> RSPV of <span class="url-title">{{.}}</span></a></p>
 {{end}}
+
+{{with .Params.Preview_text_from_reply}}
+<blockquote class="u-in-reply-to in-reply-to">
+<p>{{.}}</p>
+</blockquote>
+{{end}}
+
+{{ if .Params.mastodon_reply}}
+
+{{ partial "toot"  . }}
+
+{{end}}
diff --git a/layouts/partials/toot.html b/layouts/partials/toot.html
new file mode 100644
index 00000000..a080be95
--- /dev/null
+++ b/layouts/partials/toot.html
@@ -0,0 +1,63 @@
+
+{{ $masIns := .Params.mastodon_instance }}
+{{ $id :=  .Params.mastodon_id }}
+{{ $tootLink := "" }}
+{{ $handleInst := "" }}
+{{ $urlToGet := print "https://" $masIns "/api/v1/statuses/" $id }}
+
+{{ with resources.GetRemote $urlToGet }}
+    {{ $json := .Content | unmarshal }}
+    {{ if isset $json "account" }}
+        {{ $tootLink = print "https://" $masIns "@" $json.account.acct "/status/" $id }}
+        {{ $handleInst = print "@" $json.account.acct "@" $masIns }}
+    {{ end }}
+
+    {{ if isset $json "content" }}
+        <div class="toot">
+            <div class="toot-header">
+                <a class="toot-profile" href="https://{{ $masIns }}/@{{ $json.account.acct }}" rel="noopener">
+                    <img src="{{ $json.account.avatar }}"
+                         alt="Avatar for {{ $handleInst }}"
+                         loading="lazy">
+                </a>
+                <div class="toot-author">
+                    <a class="toot-author-name"
+                       href="https://{{ $masIns }}/@{{ $json.account.acct }}"
+                       rel="noopener">{{ $json.account.display_name }}</a>
+                    <a class="toot-author-handle"
+                       href="https://{{ $masIns }}/@{{ $json.account.acct }}"
+                       rel="noopener">{{ $handleInst }}</a>
+                </div>
+            </div>
+
+            <div class="toot-content">{{ $json.content | safeHTML }}</div>
+
+            {{ with $json.media_attachments }}
+                {{ $count := len . }}
+                <div class="toot-media-grid" data-count="{{ $count }}">
+                    {{ range . }}
+                        {{ if eq .type "image" }}
+                            <div class="toot-media-item">
+                                <img src="{{ .url }}"
+                                     alt=""
+                                     loading="lazy">
+                            </div>
+                        {{ end }}
+                    {{ end }}
+                </div>
+            {{ end }}
+
+            <div class="toot-footer">
+                <a href="{{ $tootLink }}"
+                   class="toot-date"
+                   rel="noopener">{{ dateFormat "3:04 PM · Jan 2, 2006" $json.created_at }}</a>
+            </div>
+        </div>
+    {{ end }}
+{{ else }}
+    <div class="toot">
+        <p style="text-align: center; color: var(--secondary); margin: 0;">
+            [Source not online at time of site build.]
+        </p>
+    </div>
+{{ end }}
diff --git a/poetry.lock b/poetry.lock
index cf3534dd..90c8c04f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -23,6 +23,24 @@ charset-normalizer = ["charset-normalizer"]
 html5lib = ["html5lib"]
 lxml = ["lxml"]
 
+[[package]]
+name = "bleach"
+version = "6.2.0"
+description = "An easy safelist-based HTML-sanitizing tool."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e"},
+    {file = "bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f"},
+]
+
+[package.dependencies]
+webencodings = "*"
+
+[package.extras]
+css = ["tinycss2 (>=1.1.0,<1.5)"]
+
 [[package]]
 name = "certifi"
 version = "2025.7.14"
@@ -860,6 +878,18 @@ platformdirs = ">=3.9.1,<5"
 docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
 test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""]
 
+[[package]]
+name = "webencodings"
+version = "0.5.1"
+description = "Character encoding aliases for legacy web content"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
+    {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
+]
+
 [[package]]
 name = "weeknotebot"
 version = "1.7.0"
@@ -882,4 +912,4 @@ rich = ">=13.9.4,<14.0.0"
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "c00284e308116f498c84c3a39944727cee6c2d8f4f14439efcd8d781329329da"
+content-hash = "99647e0cf2079c1607e915fffc58c2acdd357b0ad6b0c167d8ace8a943625524"
diff --git a/pyproject.toml b/pyproject.toml
index 67476a1c..32849e31 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ pyyaml = "*"
 lxml = "*"
 typer = "*"
 python-frontmatter = "^1.1.0"
+bleach = "^6.2.0"
 
 
 [tool.poetry.group.dev.dependencies]