fundor333.com/action_script/syndication-collector.py

import feedparser
from pathlib import Path
import os
import json


domain = 'https://fundor333.com'
rss_url_mastodon = 'https://mastodon.social/@fundor333.rss'


class MastodonFinder:
    def find_urls(self, string):
        x = string.split('"')
        res = []
        for i in x:
            if i.startswith("https:") or i.startswith("http:"):
                res.append(i)
        return res

    def run(self, rss_url: str, domain: str, output: dict):
        feed = feedparser.parse(rss_url)
        if feed.status == 200:
            for entry in feed.entries:
                link = entry.get('link')
                for e in self.find_urls(entry.get('description')):
                    if domain in e:
                        if output.get(e, False):
                            output[e].append(link)
                        else:
                            output[e] = [link.strip()]
        else:
            print("Failed to get RSS feed. Status code:", feed.status)


class WriterSyndication:
    def __init__(self, rss_url_mastodon: str, domain: str):
        self.output = {}
        self.rss_url_mastodon = rss_url_mastodon
        self.domain = domain

    def data_gathering(self):
        m = MastodonFinder()
        m.run(self.rss_url_mastodon, self.domain, self.output)

    def write(self):
        for key in self.output.keys():
            original_path = key.split(self.domain)[1]
            path_list = original_path.split('/')

            path_list = [x for x in path_list if x.strip()]
            filename = path_list.pop()

            path_folder = os.path.join("..", 'data', "syndication", *path_list)

            Path(path_folder).mkdir(parents=True, exist_ok=True)
            path_file = os.path.join(path_folder, filename + ".json")

            with open(path_file, 'w') as fp:
                json.dump({"syndication": self.output[key]}, fp)

    def run(self):
        self.data_gathering()
        self.write()


w = WriterSyndication(rss_url_mastodon, domain)
w.run()