performance improvements on get_peers script

main
lza_menace 5 months ago
parent 4dc78b3be8
commit 5bc00debc3

@ -88,78 +88,75 @@ def check_node(_node):
_hc.delete_instance() _hc.delete_instance()
node.delete_instance() node.delete_instance()
def upsert_peer(peer):
exists = Peer.select().where(Peer.url == peer).first()
if exists:
exists.datetime = datetime.utcnow()
exists.save()
else:
with geoip2.database.Reader("./data/GeoLite2-City.mmdb") as geodb:
try:
u = urlparse(peer)
_url = f"{u.scheme}://{u.netloc}".lower()
geodata = geodb.city(u.hostname)
p = Peer(
url=_url,
country=geodata.country.name,
city=geodata.city.name,
postal=geodata.postal.code,
lat=geodata.location.latitude,
lon=geodata.location.longitude,
)
p.save()
except Exception as e:
pass
@bp.cli.command("get_peers") def _get_peers():
def get_peers():
""" """
This command requests peers from the configured upstream node and fans out This command keeps will go through the oldest nodes and scan them for more peers.
to recursively scrape all other peers on the network. This will take Unresponsive peers get deleted. Responsive peers get their datestamp refreshed to move
several hours to run. to the top of the list. It will only crawl a subset of peers and is intended to be
run in intervals. The script will automatically prune out peers over time.
""" """
# keep track of all peers # crawl existing peers
all_peers = [] peers = Peer.select().order_by(Peer.datetime.asc()).limit(20)
print("[+] Preparing to crawl Monero p2p network") for peer in peers:
print(f"[.] Retrieving initial peers from {config.NODE_HOST}:{config.NODE_PORT}") try:
new_peers = retrieve_peers(peer.hostname, peer.port)
if new_peers:
new = []
for new_peer in new_peers:
exists = Peer.select().where(Peer.url == new_peer).first()
if not exists:
new.append(new_peer)
print(f"+++ Found {len(new)} more peers from {peer.url}")
upsert_peer(peer.url)
for new_peer in new_peers:
upsert_peer(new_peer)
else:
raise Exception('dead node')
except Exception as e:
print(f"--- Dead peer {peer.url}")
peer.delete_instance()
# start initial list of peers to scan # if no peers are available in the database then get a list of peers to scan from upstream node
if not peers:
print(f"[.] Retrieving peers from {config.NODE_HOST}:{config.NODE_PORT}")
peers_to_scan = retrieve_peers(config.NODE_HOST, config.NODE_PORT) peers_to_scan = retrieve_peers(config.NODE_HOST, config.NODE_PORT)
print(f"[+] Found {len(peers_to_scan)} initial peers to begin scraping.") print(f"[+] Found {len(peers_to_scan)} initial peers to begin scraping.")
sleep(3) for peer in peers_to_scan:
upsert_peer(peer)
# helper function to add a new peer to the db or update an existing one
def save_peer(peer):
with geoip2.database.Reader("./data/GeoLite2-City.mmdb") as reader:
_url = urlparse(peer)
url = f"{_url.scheme}://{_url.netloc}".lower()
# add new peer if not in db
if not Peer.select().where(Peer.url == peer).exists():
response = reader.city(_url.hostname)
p = Peer(
url=peer,
country=response.country.name,
city=response.city.name,
postal=response.postal.code,
lat=response.location.latitude,
lon=response.location.longitude,
)
p.save()
print(f"{peer} - saving new peer")
# or update if it does
else:
p = Peer.select().where(Peer.url == peer).first()
p.datetime = datetime.now()
p.save()
return _url
# iterate over the whole list until all peers have been scanned # rw_cache("map_peers", list(Peer.select().execute()))
# add new peers to the list
# skip the peer if we've seen it already @bp.cli.command("get_peers")
try: def get_peers():
while peers_to_scan:
_peer = peers_to_scan[0]
peers_to_scan.pop(0)
if _peer in all_peers:
print(f'already found {_peer}')
continue
all_peers.append(_peer)
try: try:
peer = save_peer(_peer) _get_peers()
peers_to_scan += retrieve_peers(peer.hostname, peer.port)
except:
pass
except KeyboardInterrupt: except KeyboardInterrupt:
print('Stopped.') print("Stopped")
except Exception as e:
print( print(f"Error: {e}")
f"[+] Found {len(all_peers)} peers from {config.NODE_HOST}:{config.NODE_PORT}"
)
print("[+] Deleting old Monero p2p peers")
for p in Peer.select():
if p.hours_elapsed() > config.PEER_LIFETIME:
print(f"[.] Deleting {p.url}")
p.delete_instance()
rw_cache("map_peers", list(Peer.select().execute()))
@bp.cli.command("validate") @bp.cli.command("validate")
def validate(): def validate():

@ -13,7 +13,6 @@ TOR_PORT = environ.get("TOR_PORT", 9050)
NODE_HOST = environ.get("NODE_HOST", "singapore.node.xmr.pm") NODE_HOST = environ.get("NODE_HOST", "singapore.node.xmr.pm")
NODE_PORT = environ.get("NODE_PORT", 18080) NODE_PORT = environ.get("NODE_PORT", 18080)
HEALTHY_BLOCK_DIFF = int(environ.get("HEALTHY_BLOCK_DIFF", 500)) HEALTHY_BLOCK_DIFF = int(environ.get("HEALTHY_BLOCK_DIFF", 500))
PEER_LIFETIME = int(environ.get("PEER_LIFETIME", 96))
I2P_HOST = environ.get("I2P_HOST", "127.0.0.1") I2P_HOST = environ.get("I2P_HOST", "127.0.0.1")
I2P_PORT = environ.get("I2P_PORT", 4444) I2P_PORT = environ.get("I2P_PORT", 4444)
DONATE_ADDRESS = environ.get("DONATE_ADDRESS", "878ca636oEHcjZ3Zimuwx4AZTbeMtqY11eVihramcBgVciC254jpUm9AxbwAd57nxv1HRE9AGG1cXBkvmRzfsFXh1L6f2CU") DONATE_ADDRESS = environ.get("DONATE_ADDRESS", "878ca636oEHcjZ3Zimuwx4AZTbeMtqY11eVihramcBgVciC254jpUm9AxbwAd57nxv1HRE9AGG1cXBkvmRzfsFXh1L6f2CU")

@ -102,13 +102,11 @@ def rw_cache(key_name, data=None):
def retrieve_peers(host, port): def retrieve_peers(host, port):
try: try:
print(f"[.] Connecting to {host}:{port}")
sock = socket.socket() sock = socket.socket()
sock.settimeout(5) sock.settimeout(5)
sock.connect((host, int(port))) sock.connect((host, int(port)))
except: except Exception as e:
sys.stderr.write("unable to connect to %s:%d\n" % (host, int([port]))) return None
sys.exit()
bucket = Bucket.create_handshake_request() bucket = Bucket.create_handshake_request()
@ -121,11 +119,9 @@ def retrieve_peers(host, port):
while 1: while 1:
buffer = sock.recv(8) buffer = sock.recv(8)
if not buffer: if not buffer:
sys.stderr.write("Invalid response; exiting\n")
break break
if not buffer.startswith(bytes(LEVIN_SIGNATURE)): if not buffer.startswith(bytes(LEVIN_SIGNATURE)):
sys.stderr.write("Invalid response; exiting\n")
break break
bucket = Bucket.from_buffer(signature=buffer, sock=sock) bucket = Bucket.from_buffer(signature=buffer, sock=sock)

@ -61,6 +61,14 @@ class Peer(Model):
lon = FloatField(null=True) lon = FloatField(null=True)
datetime = DateTimeField(default=datetime.utcnow) datetime = DateTimeField(default=datetime.utcnow)
@property
def port(self):
return urlparse(self.url).port
@property
def hostname(self):
return urlparse(self.url).hostname
def hours_elapsed(self): def hours_elapsed(self):
now = datetime.utcnow() now = datetime.utcnow()
diff = now - self.datetime diff = now - self.datetime

Loading…
Cancel
Save