diff --git a/action_script/replay-getter.py b/action_script/replay-getter.py index aae7ba81..a07dc821 100644 --- a/action_script/replay-getter.py +++ b/action_script/replay-getter.py @@ -3,10 +3,75 @@ import re import requests import frontmatter from bs4 import BeautifulSoup +from urllib.parse import urlparse # --- Funzioni di Supporto --- HUGO_CONTENT_PATH = "content" +MAX_LENGHT = 800 + + +def get_instance_and_id(url): + """ + Estrae l'istanza (hostname) e un potenziale ID da un URL, + basandosi su pattern comuni di Mastodon. + + Args: + url (str): La stringa URL da analizzare. + + Returns: + tuple: Una tupla contenente (istanza, id). + Restituisce (None, None) se l'URL non è ben formato + o se non è possibile estrarre un'istanza. + """ + parsed_url = urlparse(url) + + instance = parsed_url.netloc if parsed_url.netloc else None + + if not instance: + return None, None + + path_segments = parsed_url.path.strip("/").split("/") + + # Logica per trovare l'ID basandosi sui pattern di Mastodon + if len(path_segments) >= 2 and path_segments[0].startswith("@"): + if len(path_segments) == 2: + if path_segments[1].isdigit(): + return instance, path_segments[1] + else: + return instance, path_segments[0] + elif ( + len(path_segments) > 2 + and path_segments[1] == "statuses" + and path_segments[2].isdigit() + ): + return instance, path_segments[2] + elif len(path_segments) > 2 and path_segments[2].isdigit(): + return instance, path_segments[2] + + elif ( + len(path_segments) >= 3 + and path_segments[0] == "web" + and path_segments[1] == "statuses" + and path_segments[2].isdigit() + ): + return instance, path_segments[2] + + elif ( + len(path_segments) >= 4 + and path_segments[0] == "users" + and path_segments[2] == "statuses" + and path_segments[3].isdigit() + ): + return instance, path_segments[3] + + if path_segments: + if path_segments[-1].isdigit(): + return instance, path_segments[-1] + elif path_segments[0].startswith("@") and len(path_segments) == 1: + return instance, path_segments[0] + + return instance, None # Nessun ID specifico trovato per URL di base o generici def get_page_content(url): @@ -23,7 +88,7 @@ def get_page_content(url): return None -def extract_preview_from_html(html_content, max_length=200): +def extract_preview_from_html(html_content, max_length=MAX_LENGHT): """ Estrae una porzione di testo pulita dal contenuto HTML per una preview. Prioritizza l'estrazione da: @@ -146,6 +211,10 @@ def process_hugo_markdown_files(root_dir): if is_mastodon_link(reply_url): if post.metadata.get("mastodon_reply") is not True: post.metadata["mastodon_reply"] = True + ( + post.metadata["mastodon_instance"], + post.metadata["mastodon_id"], + ) = get_instance_and_id(reply_url) modified = True print( f" Flag 'mastodon_reply: true' aggiunto/aggiornato per {reply_url}" @@ -153,6 +222,8 @@ def process_hugo_markdown_files(root_dir): elif post.metadata.get("mastodon_reply") is True: # Se non è più un link Mastodon ma il flag era presente, rimuovilo del post.metadata["mastodon_reply"] + del post.metadata["mastodon_instance"] + del post.metadata["mastodon_id"] modified = True print( f" Flag 'mastodon_reply' rimosso per {reply_url} (non più Mastodon)." diff --git a/layouts/partials/micro.html b/layouts/partials/micro.html index 52307498..69376ea9 100644 --- a/layouts/partials/micro.html +++ b/layouts/partials/micro.html @@ -13,3 +13,15 @@ {{with .Params.Rspv}}

RSPV of {{.}}

{{end}} + +{{with .Params.Preview_text_from_reply}} +
+

{{.}}

+
+{{end}} + +{{ if .Params.mastodon_reply}} + +{{ partial "toot" . }} + +{{end}} diff --git a/layouts/partials/toot.html b/layouts/partials/toot.html new file mode 100644 index 00000000..a080be95 --- /dev/null +++ b/layouts/partials/toot.html @@ -0,0 +1,63 @@ + +{{ $masIns := .Params.mastodon_instance }} +{{ $id := .Params.mastodon_id }} +{{ $tootLink := "" }} +{{ $handleInst := "" }} +{{ $urlToGet := print "https://" $masIns "/api/v1/statuses/" $id }} + +{{ with resources.GetRemote $urlToGet }} + {{ $json := .Content | unmarshal }} + {{ if isset $json "account" }} + {{ $tootLink = print "https://" $masIns "@" $json.account.acct "/status/" $id }} + {{ $handleInst = print "@" $json.account.acct "@" $masIns }} + {{ end }} + + {{ if isset $json "content" }} +
+
+ + Avatar for {{ $handleInst }} + + +
+ +
{{ $json.content | safeHTML }}
+ + {{ with $json.media_attachments }} + {{ $count := len . }} +
+ {{ range . }} + {{ if eq .type "image" }} +
+ +
+ {{ end }} + {{ end }} +
+ {{ end }} + + +
+ {{ end }} +{{ else }} +
+

+ [Source not online at time of site build.] +

+
+{{ end }} diff --git a/poetry.lock b/poetry.lock index cf3534dd..90c8c04f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -23,6 +23,24 @@ charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] +[[package]] +name = "bleach" +version = "6.2.0" +description = "An easy safelist-based HTML-sanitizing tool." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e"}, + {file = "bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f"}, +] + +[package.dependencies] +webencodings = "*" + +[package.extras] +css = ["tinycss2 (>=1.1.0,<1.5)"] + [[package]] name = "certifi" version = "2025.7.14" @@ -860,6 +878,18 @@ platformdirs = ">=3.9.1,<5" docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] +[[package]] +name = "webencodings" +version = "0.5.1" +description = "Character encoding aliases for legacy web content" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, + {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, +] + [[package]] name = "weeknotebot" version = "1.7.0" @@ -882,4 +912,4 @@ rich = ">=13.9.4,<14.0.0" [metadata] lock-version = "2.1" python-versions = "^3.12" -content-hash = "c00284e308116f498c84c3a39944727cee6c2d8f4f14439efcd8d781329329da" +content-hash = "99647e0cf2079c1607e915fffc58c2acdd357b0ad6b0c167d8ace8a943625524" diff --git a/pyproject.toml b/pyproject.toml index 67476a1c..32849e31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ pyyaml = "*" lxml = "*" typer = "*" python-frontmatter = "^1.1.0" +bleach = "^6.2.0" [tool.poetry.group.dev.dependencies]