You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
196 lines
7.0 KiB
Python
196 lines
7.0 KiB
Python
#!/usr/bin/env python
|
|
|
|
"""
|
|
The Pirate Bay scraper - This is the client
|
|
|
|
Uses 3 external libraries for scraping HTML elements from ThePirateBay and interacting with transmission-daemon.
|
|
Asks user for a search selection, offers a list of choices, and grabs the magnet link for the selection in a menu style fashion.
|
|
|
|
"""
|
|
|
|
__author__ = 'LANCE - https://github.com/lalanza808'
|
|
|
|
##################################################
|
|
# Libraries
|
|
|
|
# Built-in libraries
|
|
from os import path
|
|
import argparse
|
|
|
|
# 3rd party libraries
|
|
import requests
|
|
import bs4
|
|
import transmissionrpc
|
|
|
|
##################################################
|
|
# Variables
|
|
|
|
# Torrent server IP; can be any machine running transmission-daemon
|
|
# with a firewall inbound allowed to TCP/9091 (transmissionrpc)
|
|
rpcserver = []
|
|
|
|
# Dictionaries/Arrays for storing search results
|
|
tpb_search_results = {}
|
|
tpb_torrent_links = []
|
|
user_torrent_selection = ""
|
|
|
|
# Current/working PirateBay URL
|
|
tpb = "https://thepiratebay.org"
|
|
|
|
# Squelch HTTPS insecure warnings
|
|
requests.packages.urllib3.disable_warnings()
|
|
|
|
##################################################
|
|
# Parsing and Arguments
|
|
|
|
parser = argparse.ArgumentParser(description='Scrape The Pirate Bay for torrents.')
|
|
parser.add_argument('--query', '-q', dest='arg_search_string', help='The query string to search for on TPB', required=False)
|
|
parser.add_argument('--top', '-t', dest='arg_take_top', action='store_true', help='Automatically grab the torrent with most seeds', required=False)
|
|
parser.add_argument('--file', '-f', dest='arg_magnet_link', help='Direct link to magnet link or torrent file', required=False)
|
|
parser.add_argument('--url', '-u', dest='arg_torrent_page', help='URL of the torrent on TPB', required=False)
|
|
parser.add_argument('--server', '-s', dest='arg_server_host', help='IP/hostname of the Transmission server', required=False)
|
|
|
|
args = parser.parse_args()
|
|
|
|
##################################################
|
|
# Functions
|
|
|
|
#1
|
|
def Check_Transmission_Listener():
|
|
"""
|
|
Checks to see if transmission-daemon is listening on rpcserver
|
|
and initiates the function to ask user for input
|
|
"""
|
|
if not rpcserver and not args.arg_server_host:
|
|
print "No Transmission server specified! Quitting!"
|
|
exit(9)
|
|
elif args.arg_server_host:
|
|
rpcserver.append(args.arg_server_host)
|
|
try:
|
|
transmissionrpc.Client(rpcserver[0], port=9091)
|
|
Get_Search_URL()
|
|
except KeyboardInterrupt:
|
|
print "\n\nBye."
|
|
exit(1)
|
|
except transmissionrpc.error.TransmissionError:
|
|
print "[!] Transmission-daemon not listening on {}!".format(rpcserver[0])
|
|
exit(2)
|
|
|
|
#2
|
|
def Get_Search_URL():
|
|
"""
|
|
Takes input string to search for on TPB.
|
|
Formats string into proper url
|
|
Gets HTML source of search page for use in the next function
|
|
"""
|
|
#If magnet link supplied, directly add to queue, exit script
|
|
if args.arg_magnet_link:
|
|
transmissionrpc.Client(rpcserver[0]).add_torrent(args.arg_magnet_link)
|
|
exit(0)
|
|
#If URL supplied, skip to Download_Torrent_From_URL function
|
|
elif args.arg_torrent_page:
|
|
Download_Torrent_From_URL(args.arg_torrent_page)
|
|
exit(0)
|
|
#If search string provided, use it for Get_Torrent_Links function
|
|
elif args.arg_search_string:
|
|
tpb_search_string = args.arg_search_string
|
|
#If nothing supplied, ask user for search string
|
|
else:
|
|
tpb_search_string = raw_input("[+] What would you like to search?\n>>> ")
|
|
|
|
tpb_search_url = "{}/search/{}/0/7/0".format(tpb, tpb_search_string) #/0/7/0 tells TPB to sort descending by seeds
|
|
tpb_torrent_page_source = requests.get(tpb_search_url, verify=False).text #Use requests lib to fetch page source for bs4 parsing
|
|
|
|
Get_Torrent_Links(tpb_torrent_page_source) #Run Get_Torrent_Links function, passing page source for BS4 parsing
|
|
|
|
#3
|
|
def Get_Torrent_Links(source):
|
|
"""
|
|
Takes the page source and parses it with BeautifulSoup.
|
|
Finds all anchor elements on the page, pre-sorted by seeders
|
|
Enumerates list of elements, and adds them to tpb_search_results dictionary
|
|
"""
|
|
print "\n"
|
|
global tpb_torrent_links, tpb_search_results
|
|
|
|
#Update the tpb_torrent_links array with the returned torrents
|
|
tpb_torrent_page_soup = bs4.BeautifulSoup(source, "html.parser") #Create Beautiful Soup object
|
|
for link in tpb_torrent_page_soup.find_all('a'): #Find all anchor elements in page source
|
|
if link.get('href').startswith('/torrent'): #Only get links with /torrent as they're valid torrent pages
|
|
tpb_torrent_links.append(link.get('href')) #Set the results to tpb_torrent_links array
|
|
|
|
#If -t is supplied, bypass this section of code and go on to download the top torrent
|
|
if args.arg_take_top and tpb_torrent_links:
|
|
Download_Torrent_From_URL("{}/{}".format(tpb, tpb_torrent_links[0]))
|
|
#Print links in numeric order to the user
|
|
else:
|
|
for number,link in enumerate(tpb_torrent_links): #Enumerate the array so the numbers start at 0
|
|
tpb_search_results.update({number:link}) #Append results to tpb_search_results dictionary
|
|
print "({}) {}".format(number, path.basename(link))
|
|
if tpb_search_results: #If dict is not empty, continue with script
|
|
print "\n(98) Search again"
|
|
print "(99) Exit"
|
|
Get_User_Selection()
|
|
else: #If dict is empty (no results from search) re-run script
|
|
print "\nNo results found. Try again."
|
|
tpb_search_results = {}
|
|
tpb_torrent_links = []
|
|
args.arg_search_string = ''
|
|
Get_Search_URL() #Loop back to script start
|
|
|
|
#4
|
|
def Get_User_Selection():
|
|
"""
|
|
Asks for selection of torrent
|
|
"""
|
|
global tpb_torrent_links, tpb_search_results
|
|
|
|
#Ask user for numeric selection
|
|
try:
|
|
selection = int(raw_input("\n[+] Enter the number of the torrent to download.\n>>> "))
|
|
#Zeroize variables, loop back to script start
|
|
if selection == 98:
|
|
print "\nStarting over"
|
|
tpb_search_results = {}
|
|
tpb_torrent_links = []
|
|
args.arg_search_string = ''
|
|
Get_Search_URL()
|
|
#Exit script
|
|
elif selection == 99:
|
|
print "\nBye.\n"
|
|
exit()
|
|
#If valid number, move to next function to add to queue
|
|
elif selection in tpb_search_results:
|
|
user_torrent_selection = tpb_search_results[selection] #Updates variable based on key provided above, matches it with tpb_search_results dict
|
|
Download_Torrent_From_URL("{}/{}".format(tpb, user_torrent_selection))
|
|
#If anything other than 98, 99, or valid key number entered, loop back to selection input
|
|
else:
|
|
print "\nNot a valid number"
|
|
Get_User_Selection()
|
|
#If number isn't used, loop back to selection input
|
|
except ValueError:
|
|
print "\nThat is not a digit."
|
|
Get_User_Selection()
|
|
|
|
#5
|
|
def Download_Torrent_From_URL(tpb_torrent_url):
|
|
"""
|
|
Grabs the first magnet link and adds it to the queue via RPC to rpcserver
|
|
"""
|
|
tpb_magnet_links = []
|
|
tpb_torrent_page = requests.get(tpb_torrent_url, verify=False)
|
|
tpb_torrent_page_soup = bs4.BeautifulSoup(tpb_torrent_page.content, "html.parser")
|
|
|
|
for link in tpb_torrent_page_soup.find_all('a'):
|
|
if str(link.get('href')).startswith('magnet:?xt'):
|
|
tpb_magnet_links.append(link.get('href'))
|
|
|
|
tpb_magnet_link = tpb_magnet_links[0]
|
|
print "\n[+] Adding magnet link for torrent:\n\n{}".format(tpb_torrent_url)
|
|
transmissionrpc.Client(rpcserver[0]).add_torrent(tpb_magnet_link)
|
|
print "\n[.] Done!\n"
|
|
exit(0)
|
|
|
|
if __name__ == "__main__":
|
|
Check_Transmission_Listener()
|