From 118c60015e08ed33d80f4d88b99ccc24ca043e55 Mon Sep 17 00:00:00 2001
From: lza_menace <lza_menace@protonmail.com>
Date: Mon, 3 Apr 2023 21:04:27 -0700
Subject: [PATCH] add mirror to monero infodump site

---
 .gitignore |  1 +
 scrape.py  | 31 +++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100755 scrape.py

diff --git a/.gitignore b/.gitignore
index d2bd107..24438e7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -130,3 +130,4 @@ dmypy.json
 
 # nodes
 data
+infodump
\ No newline at end of file
diff --git a/scrape.py b/scrape.py
new file mode 100755
index 0000000..53d6487
--- /dev/null
+++ b/scrape.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+import os
+import requests
+import bs4
+
+os.system('mkdir -p infodump/thumbs')
+url = 'https://moneroinfodump.neocities.org/'
+contents = requests.get(url, timeout=15).content
+soup = bs4.BeautifulSoup(contents, 'html.parser')
+images = soup.find_all('img')
+links = soup.find_all('a')
+
+for image in images:
+    img = image.get('src')
+    if img.startswith('http'):
+        os.system(f'wget -q --no-clobber -O infodump/{os.path.basename(img)} {img}')
+        image['src'] = os.path.basename(img)
+    elif img.startswith('data:image/png'):
+        pass
+    else:
+        os.system(f'wget -q --no-clobber -O infodump/{img} {img}')
+        image['src'] = img
+
+for link in links:
+    href = link.get('href')
+    if href and href.startswith('https://i.imgur.com'):
+        link['href'] = os.path.basename(href)
+
+with open('infodump/index.html', 'w') as f:
+    f.write(str(soup))
\ No newline at end of file