update peer crawling to be recursive

pull/26/head
lza_menace 2 years ago
parent 574761ec8e
commit a85198c9ed

@ -1,3 +1,3 @@
*/3 * * * * sh -c "cd ~/git/monero.fail; ./bin/cmd validate" */3 * * * * sh -c "cd ~/git/monero.fail; ./bin/cmd validate"
30 */4 * * * sh -c "cd ~/git/monero.fail; ./bin/cmd check" 30 */4 * * * sh -c "cd ~/git/monero.fail; ./bin/cmd check"
0 */2 * * * sh -c "cd ~/git/monero.fail; ./bin/cmd get_peers" 0 */16 * * * sh -c "cd ~/git/monero.fail; ./bin/cmd get_peers"

@ -1,5 +1,6 @@
import logging import logging
from datetime import datetime, timedelta from datetime import datetime, timedelta
from time import sleep
import geoip2.database import geoip2.database
import arrow import arrow
@ -76,16 +77,27 @@ def check():
@bp.cli.command("get_peers") @bp.cli.command("get_peers")
def get_peers(): def get_peers():
"""
This command requests peers from the configured upstream node and fans out
to recursively scrape all other peers on the network. This will take
several hours to run.
"""
# keep track of all peers
all_peers = [] all_peers = []
print("[+] Preparing to crawl Monero p2p network") print("[+] Preparing to crawl Monero p2p network")
print(f"[.] Retrieving initial peers from {config.NODE_HOST}:{config.NODE_PORT}") print(f"[.] Retrieving initial peers from {config.NODE_HOST}:{config.NODE_PORT}")
initial_peers = retrieve_peers(config.NODE_HOST, config.NODE_PORT)
with geoip2.database.Reader("./data/GeoLite2-City.mmdb") as reader: # start initial list of peers to scan
for peer in initial_peers: peers_to_scan = retrieve_peers(config.NODE_HOST, config.NODE_PORT)
if peer not in all_peers: print(f"[+] Found {len(peers_to_scan)} initial peers to begin scraping.")
all_peers.append(peer) sleep(3)
# helper function to add a new peer to the db or update an existing one
def save_peer(peer):
with geoip2.database.Reader("./data/GeoLite2-City.mmdb") as reader:
_url = urlparse(peer) _url = urlparse(peer)
url = f"{_url.scheme}://{_url.netloc}".lower() url = f"{_url.scheme}://{_url.netloc}".lower()
# add new peer if not in db
if not Peer.select().where(Peer.url == peer).exists(): if not Peer.select().where(Peer.url == peer).exists():
response = reader.city(_url.hostname) response = reader.city(_url.hostname)
p = Peer( p = Peer(
@ -98,44 +110,38 @@ def get_peers():
) )
p.save() p.save()
print(f"{peer} - saving new peer") print(f"{peer} - saving new peer")
# or update if it does
else: else:
p = Peer.select().where(Peer.url == peer).first() p = Peer.select().where(Peer.url == peer).first()
p.datetime = datetime.now() p.datetime = datetime.now()
p.save() p.save()
return _url
# iterate over the whole list until all peers have been scanned
# add new peers to the list
# skip the peer if we've seen it already
try:
while peers_to_scan:
_peer = peers_to_scan[0]
peers_to_scan.pop(0)
if _peer in all_peers:
print(f'already found {_peer}')
continue
all_peers.append(_peer)
try: try:
print(f"[.] Retrieving crawled peers from {_url.netloc}") peer = save_peer(_peer)
new_peers = retrieve_peers(_url.hostname, _url.port) peers_to_scan += retrieve_peers(peer.hostname, peer.port)
for peer in new_peers:
if peer not in all_peers:
all_peers.append(peer)
_url = urlparse(peer)
url = f"{_url.scheme}://{_url.netloc}".lower()
if not Peer.select().where(Peer.url == peer).exists():
response = reader.city(_url.hostname)
p = Peer(
url=peer,
country=response.country.name,
city=response.city.name,
postal=response.postal.code,
lat=response.location.latitude,
lon=response.location.longitude,
)
p.save()
print(f"{peer} - saving new peer")
else:
p = Peer.select().where(Peer.url == peer).first()
p.datetime = datetime.now()
p.save()
except: except:
pass pass
except KeyboardInterrupt:
print('Stopped.')
print( print(
f"[+] Found {len(all_peers)} peers from {config.NODE_HOST}:{config.NODE_PORT}" f"[+] Found {len(all_peers)} peers from {config.NODE_HOST}:{config.NODE_PORT}"
) )
print("[+] Deleting old Monero p2p peers") print("[+] Deleting old Monero p2p peers")
for p in Peer.select(): for p in Peer.select():
if p.hours_elapsed() > 24: if p.hours_elapsed() > config.PEER_LIFETIME:
print(f"[.] Deleting {p.url}") print(f"[.] Deleting {p.url}")
p.delete_instance() p.delete_instance()
rw_cache("map_peers", list(Peer.select().execute())) rw_cache("map_peers", list(Peer.select().execute()))

@ -13,3 +13,4 @@ TOR_PORT = environ.get("TOR_PORT", 9050)
NODE_HOST = environ.get("NODE_HOST", "singapore.node.xmr.pm") NODE_HOST = environ.get("NODE_HOST", "singapore.node.xmr.pm")
NODE_PORT = environ.get("NODE_PORT", 18080) NODE_PORT = environ.get("NODE_PORT", 18080)
HEALTHY_BLOCK_DIFF = int(environ.get("HEALTHY_BLOCK_DIFF", 500)) HEALTHY_BLOCK_DIFF = int(environ.get("HEALTHY_BLOCK_DIFF", 500))
PEER_LIFETIME = int(environ.get("PEER_LIFETIME", 96))

@ -60,9 +60,10 @@
<p>Source Node: {{ source_node }}</p> <p>Source Node: {{ source_node }}</p>
<p> <p>
This is not a full representation of the entire Monero network, This is not a full representation of the entire Monero network,
just a look into the peers being crawled from the source node ({{ source_node }}). just a look into the peers being recursively crawled from the source node ({{ source_node }}).
New peers are searched for on a recurring interval throughout the day. New peers are searched for once per day.
Older peers are shown as more transparent and will be removed if not seen again after 24 hours. Older peers are shown as more transparent and will be removed
if not seen again after {{ config.PEER_LIFETIME }} hours.
</p> </p>
<br> <br>
<a href="/">Go home</a> <a href="/">Go home</a>

Loading…
Cancel
Save