90 lines
2.6 KiB
Python
90 lines
2.6 KiB
Python
import requests
|
|
import os
|
|
import datetime
|
|
import json
|
|
from pathlib import Path
|
|
import hashlib
|
|
|
|
|
|
http_domain = "https://fundor333.com"
|
|
domain = "fundor333.com"
|
|
token = os.getenv("WEBMENTIONS_TOKEN")
|
|
since_days = 30
|
|
|
|
|
|
class WebmentionFinder:
|
|
def __init__(self, http_domain: str, token: str, domain: str, since_days: int):
|
|
self.now = datetime.datetime.now()
|
|
self.since_data = self.now - datetime.timedelta(hours=24 * since_days)
|
|
self.all_hash = []
|
|
self.http_domain = http_domain
|
|
self.domain = domain
|
|
self.token = token
|
|
self.url = f"https://webmention.io/api/mentions.jf2?domain={self.domain}"
|
|
"&token={self.token}&since={self.since_data.isoformat()}&per-page=999"
|
|
|
|
def __clean_slug(self, slug: str):
|
|
return hashlib.md5(
|
|
(slug.split("?")[0]).encode("utf-8"), usedforsecurity=False
|
|
).hexdigest()
|
|
|
|
def __get_data(self):
|
|
r = requests.get(self.url)
|
|
data = r.json()
|
|
self.output = {}
|
|
|
|
for webmention in data["children"]:
|
|
|
|
label = self.__clean_slug(webmention["wm-target"])
|
|
self.all_hash.append(label)
|
|
label += "/" + str(webmention["wm-id"])
|
|
|
|
if self.output.get(label, False):
|
|
self.output[label].append(webmention)
|
|
else:
|
|
self.output[label] = [webmention]
|
|
|
|
def __generate_files(self):
|
|
|
|
for key in self.output.keys():
|
|
original_path = key
|
|
path_list = original_path.split("/")
|
|
|
|
path_list = [x for x in path_list if x.strip()]
|
|
if path_list != []:
|
|
filename = path_list.pop()
|
|
|
|
path_folder = os.path.join("data", "webmentions", *path_list)
|
|
|
|
Path(path_folder).mkdir(parents=True, exist_ok=True)
|
|
path_file = os.path.join(path_folder, filename + ".json")
|
|
|
|
with open(path_file, "w") as fp:
|
|
json.dump(self.output[key], fp)
|
|
|
|
def __clean_files(self):
|
|
|
|
for folder in self.all_hash:
|
|
path_f = os.path.join("data", "webmentions", folder)
|
|
|
|
files = os.listdir(path_f)
|
|
|
|
out_dict = []
|
|
for e in files:
|
|
with open(os.path.join(path_f, e)) as file:
|
|
out_dict.append(json.load(file))
|
|
|
|
with open(os.path.join("data", "webmentions", folder + ".json"), "w") as fp:
|
|
json.dump(out_dict, fp)
|
|
|
|
def run(self):
|
|
self.__get_data()
|
|
self.__generate_files()
|
|
self.__clean_files()
|
|
|
|
|
|
wb = WebmentionFinder(
|
|
http_domain=http_domain, token=token, since_days=since_days, domain=domain
|
|
)
|
|
wb.run()
|