Update internal link extraction to filter by class 'interlink-script'
This commit is contained in:
@@ -49,7 +49,9 @@ def get_internal_links_pointing_to_pages(sitemap_url):
|
|||||||
page_response.raise_for_status()
|
page_response.raise_for_status()
|
||||||
page_soup = BeautifulSoup(page_response.content, "html.parser")
|
page_soup = BeautifulSoup(page_response.content, "html.parser")
|
||||||
|
|
||||||
for link in page_soup.find_all("a", href=True):
|
for link in page_soup.find_all(
|
||||||
|
"a", {"class": "interlink-script"}, href=True
|
||||||
|
):
|
||||||
href = link.get("href")
|
href = link.get("href")
|
||||||
absolute_href = urljoin(page_url, href)
|
absolute_href = urljoin(page_url, href)
|
||||||
parsed_absolute_href = urlparse(absolute_href)
|
parsed_absolute_href = urlparse(absolute_href)
|
||||||
|
|||||||
Reference in New Issue
Block a user