From a85198c9ed672db33a341c89e31d271cf146bc5c Mon Sep 17 00:00:00 2001 From: lza_menace Date: Sat, 15 Apr 2023 12:25:48 -0700 Subject: [PATCH] update peer crawling to be recursive --- crontab | 2 +- xmrnodes/cli.py | 66 ++++++++++++++++++++----------------- xmrnodes/config.py | 1 + xmrnodes/templates/map.html | 7 ++-- 4 files changed, 42 insertions(+), 34 deletions(-) diff --git a/crontab b/crontab index 4c554f2..814bee7 100644 --- a/crontab +++ b/crontab @@ -1,3 +1,3 @@ */3 * * * * sh -c "cd ~/git/monero.fail; ./bin/cmd validate" 30 */4 * * * sh -c "cd ~/git/monero.fail; ./bin/cmd check" -0 */2 * * * sh -c "cd ~/git/monero.fail; ./bin/cmd get_peers" +0 */16 * * * sh -c "cd ~/git/monero.fail; ./bin/cmd get_peers" diff --git a/xmrnodes/cli.py b/xmrnodes/cli.py index 3ea9a1f..cfa2e53 100644 --- a/xmrnodes/cli.py +++ b/xmrnodes/cli.py @@ -1,5 +1,6 @@ import logging from datetime import datetime, timedelta +from time import sleep import geoip2.database import arrow @@ -76,16 +77,27 @@ def check(): @bp.cli.command("get_peers") def get_peers(): + """ + This command requests peers from the configured upstream node and fans out + to recursively scrape all other peers on the network. This will take + several hours to run. + """ + # keep track of all peers all_peers = [] print("[+] Preparing to crawl Monero p2p network") print(f"[.] Retrieving initial peers from {config.NODE_HOST}:{config.NODE_PORT}") - initial_peers = retrieve_peers(config.NODE_HOST, config.NODE_PORT) - with geoip2.database.Reader("./data/GeoLite2-City.mmdb") as reader: - for peer in initial_peers: - if peer not in all_peers: - all_peers.append(peer) + + # start initial list of peers to scan + peers_to_scan = retrieve_peers(config.NODE_HOST, config.NODE_PORT) + print(f"[+] Found {len(peers_to_scan)} initial peers to begin scraping.") + sleep(3) + + # helper function to add a new peer to the db or update an existing one + def save_peer(peer): + with geoip2.database.Reader("./data/GeoLite2-City.mmdb") as reader: _url = urlparse(peer) url = f"{_url.scheme}://{_url.netloc}".lower() + # add new peer if not in db if not Peer.select().where(Peer.url == peer).exists(): response = reader.city(_url.hostname) p = Peer( @@ -98,44 +110,38 @@ def get_peers(): ) p.save() print(f"{peer} - saving new peer") + # or update if it does else: p = Peer.select().where(Peer.url == peer).first() p.datetime = datetime.now() p.save() - + return _url + + # iterate over the whole list until all peers have been scanned + # add new peers to the list + # skip the peer if we've seen it already + try: + while peers_to_scan: + _peer = peers_to_scan[0] + peers_to_scan.pop(0) + if _peer in all_peers: + print(f'already found {_peer}') + continue + all_peers.append(_peer) try: - print(f"[.] Retrieving crawled peers from {_url.netloc}") - new_peers = retrieve_peers(_url.hostname, _url.port) - for peer in new_peers: - if peer not in all_peers: - all_peers.append(peer) - _url = urlparse(peer) - url = f"{_url.scheme}://{_url.netloc}".lower() - if not Peer.select().where(Peer.url == peer).exists(): - response = reader.city(_url.hostname) - p = Peer( - url=peer, - country=response.country.name, - city=response.city.name, - postal=response.postal.code, - lat=response.location.latitude, - lon=response.location.longitude, - ) - p.save() - print(f"{peer} - saving new peer") - else: - p = Peer.select().where(Peer.url == peer).first() - p.datetime = datetime.now() - p.save() + peer = save_peer(_peer) + peers_to_scan += retrieve_peers(peer.hostname, peer.port) except: pass + except KeyboardInterrupt: + print('Stopped.') print( f"[+] Found {len(all_peers)} peers from {config.NODE_HOST}:{config.NODE_PORT}" ) print("[+] Deleting old Monero p2p peers") for p in Peer.select(): - if p.hours_elapsed() > 24: + if p.hours_elapsed() > config.PEER_LIFETIME: print(f"[.] Deleting {p.url}") p.delete_instance() rw_cache("map_peers", list(Peer.select().execute())) diff --git a/xmrnodes/config.py b/xmrnodes/config.py index ce3cafa..b9fd613 100644 --- a/xmrnodes/config.py +++ b/xmrnodes/config.py @@ -13,3 +13,4 @@ TOR_PORT = environ.get("TOR_PORT", 9050) NODE_HOST = environ.get("NODE_HOST", "singapore.node.xmr.pm") NODE_PORT = environ.get("NODE_PORT", 18080) HEALTHY_BLOCK_DIFF = int(environ.get("HEALTHY_BLOCK_DIFF", 500)) +PEER_LIFETIME = int(environ.get("PEER_LIFETIME", 96)) diff --git a/xmrnodes/templates/map.html b/xmrnodes/templates/map.html index 4e07e2d..9ee7d72 100644 --- a/xmrnodes/templates/map.html +++ b/xmrnodes/templates/map.html @@ -60,9 +60,10 @@

Source Node: {{ source_node }}

This is not a full representation of the entire Monero network, - just a look into the peers being crawled from the source node ({{ source_node }}). - New peers are searched for on a recurring interval throughout the day. - Older peers are shown as more transparent and will be removed if not seen again after 24 hours. + just a look into the peers being recursively crawled from the source node ({{ source_node }}). + New peers are searched for once per day. + Older peers are shown as more transparent and will be removed + if not seen again after {{ config.PEER_LIFETIME }} hours.


Go home