Files
fundor333.com/action_script/syndication-collector.py
fundor333 3ddfaeec37 Fix path
2025-01-20 23:46:34 +01:00

68 lines
2.0 KiB
Python

import feedparser
from pathlib import Path
import os
import json
domain = 'https://fundor333.com'
rss_url_mastodon = 'https://mastodon.social/@fundor333.rss'
class MastodonFinder:
def find_urls(self, string):
x = string.split('"')
res = []
for i in x:
if i.startswith("https:") or i.startswith("http:"):
res.append(i)
return res
def run(self, rss_url: str, domain: str, output: dict):
feed = feedparser.parse(rss_url)
if feed.status == 200:
for entry in feed.entries:
link = entry.get('link')
for e in self.find_urls(entry.get('description')):
if domain in e:
if output.get(e, False):
output[e].append(link)
else:
output[e] = [link.strip()]
else:
print("Failed to get RSS feed. Status code:", feed.status)
class WriterSyndication:
def __init__(self, rss_url_mastodon: str, domain: str):
self.output = {}
self.rss_url_mastodon = rss_url_mastodon
self.domain = domain
def data_gathering(self):
m = MastodonFinder()
m.run(self.rss_url_mastodon, self.domain, self.output)
def write(self):
for key in self.output.keys():
original_path = key.split(self.domain)[1]
path_list = original_path.split('/')
path_list = [x for x in path_list if x.strip()]
filename = path_list.pop()
path_folder = os.path.join('data', "syndication", *path_list)
Path(path_folder).mkdir(parents=True, exist_ok=True)
path_file = os.path.join(path_folder, filename + ".json")
with open(path_file, 'w') as fp:
json.dump({"syndication": self.output[key]}, fp)
def run(self):
self.data_gathering()
self.write()
w = WriterSyndication(rss_url_mastodon, domain)
w.run()