68 lines
2.0 KiB
Python
68 lines
2.0 KiB
Python
import feedparser
|
|
from pathlib import Path
|
|
import os
|
|
import json
|
|
|
|
|
|
domain = 'https://fundor333.com'
|
|
rss_url_mastodon = 'https://mastodon.social/@fundor333.rss'
|
|
|
|
|
|
class MastodonFinder:
|
|
def find_urls(self, string):
|
|
x = string.split('"')
|
|
res = []
|
|
for i in x:
|
|
if i.startswith("https:") or i.startswith("http:"):
|
|
res.append(i)
|
|
return res
|
|
|
|
def run(self, rss_url: str, domain: str, output: dict):
|
|
feed = feedparser.parse(rss_url)
|
|
if feed.status == 200:
|
|
for entry in feed.entries:
|
|
link = entry.get('link')
|
|
for e in self.find_urls(entry.get('description')):
|
|
if domain in e:
|
|
if output.get(e, False):
|
|
output[e].append(link)
|
|
else:
|
|
output[e] = [link.strip()]
|
|
else:
|
|
print("Failed to get RSS feed. Status code:", feed.status)
|
|
|
|
|
|
class WriterSyndication:
|
|
def __init__(self, rss_url_mastodon: str, domain: str):
|
|
self.output = {}
|
|
self.rss_url_mastodon = rss_url_mastodon
|
|
self.domain = domain
|
|
|
|
def data_gathering(self):
|
|
m = MastodonFinder()
|
|
m.run(self.rss_url_mastodon, self.domain, self.output)
|
|
|
|
def write(self):
|
|
for key in self.output.keys():
|
|
original_path = key.split(self.domain)[1]
|
|
path_list = original_path.split('/')
|
|
|
|
path_list = [x for x in path_list if x.strip()]
|
|
filename = path_list.pop()
|
|
|
|
path_folder = os.path.join("..", 'data', "syndication", *path_list)
|
|
|
|
Path(path_folder).mkdir(parents=True, exist_ok=True)
|
|
path_file = os.path.join(path_folder, filename + ".json")
|
|
|
|
with open(path_file, 'w') as fp:
|
|
json.dump({"syndication": self.output[key]}, fp)
|
|
|
|
def run(self):
|
|
self.data_gathering()
|
|
self.write()
|
|
|
|
|
|
w = WriterSyndication(rss_url_mastodon, domain)
|
|
w.run()
|