You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

196 lines
7.0 KiB
Python

#!/usr/bin/env python
"""
The Pirate Bay scraper - This is the client
Uses 3 external libraries for scraping HTML elements from ThePirateBay and interacting with transmission-daemon.
Asks user for a search selection, offers a list of choices, and grabs the magnet link for the selection in a menu style fashion.
"""
__author__ = 'LANCE - https://github.com/lalanza808'
##################################################
# Libraries
# Built-in libraries
from os import path
import argparse
# 3rd party libraries
import requests
import bs4
import transmissionrpc
##################################################
# Variables
# Torrent server IP; can be any machine running transmission-daemon
# with a firewall inbound allowed to TCP/9091 (transmissionrpc)
rpcserver = []
# Dictionaries/Arrays for storing search results
tpb_search_results = {}
tpb_torrent_links = []
user_torrent_selection = ""
# Current/working PirateBay URL
tpb = "https://thepiratebay.org"
# Squelch HTTPS insecure warnings
requests.packages.urllib3.disable_warnings()
##################################################
# Parsing and Arguments
parser = argparse.ArgumentParser(description='Scrape The Pirate Bay for torrents.')
parser.add_argument('--query', '-q', dest='arg_search_string', help='The query string to search for on TPB', required=False)
parser.add_argument('--top', '-t', dest='arg_take_top', action='store_true', help='Automatically grab the torrent with most seeds', required=False)
parser.add_argument('--file', '-f', dest='arg_magnet_link', help='Direct link to magnet link or torrent file', required=False)
parser.add_argument('--url', '-u', dest='arg_torrent_page', help='URL of the torrent on TPB', required=False)
parser.add_argument('--server', '-s', dest='arg_server_host', help='IP/hostname of the Transmission server', required=False)
args = parser.parse_args()
##################################################
# Functions
#1
def Check_Transmission_Listener():
"""
Checks to see if transmission-daemon is listening on rpcserver
and initiates the function to ask user for input
"""
if not rpcserver and not args.arg_server_host:
print "No Transmission server specified! Quitting!"
exit(9)
elif args.arg_server_host:
rpcserver.append(args.arg_server_host)
try:
transmissionrpc.Client(rpcserver[0], port=9091)
Get_Search_URL()
except KeyboardInterrupt:
print "\n\nBye."
exit(1)
except transmissionrpc.error.TransmissionError:
print "[!] Transmission-daemon not listening on {}!".format(rpcserver[0])
exit(2)
#2
def Get_Search_URL():
"""
Takes input string to search for on TPB.
Formats string into proper url
Gets HTML source of search page for use in the next function
"""
#If magnet link supplied, directly add to queue, exit script
if args.arg_magnet_link:
transmissionrpc.Client(rpcserver[0]).add_torrent(args.arg_magnet_link)
exit(0)
#If URL supplied, skip to Download_Torrent_From_URL function
elif args.arg_torrent_page:
Download_Torrent_From_URL(args.arg_torrent_page)
exit(0)
#If search string provided, use it for Get_Torrent_Links function
elif args.arg_search_string:
tpb_search_string = args.arg_search_string
#If nothing supplied, ask user for search string
else:
tpb_search_string = raw_input("[+] What would you like to search?\n>>> ")
tpb_search_url = "{}/search/{}/0/7/0".format(tpb, tpb_search_string) #/0/7/0 tells TPB to sort descending by seeds
tpb_torrent_page_source = requests.get(tpb_search_url, verify=False).text #Use requests lib to fetch page source for bs4 parsing
Get_Torrent_Links(tpb_torrent_page_source) #Run Get_Torrent_Links function, passing page source for BS4 parsing
#3
def Get_Torrent_Links(source):
"""
Takes the page source and parses it with BeautifulSoup.
Finds all anchor elements on the page, pre-sorted by seeders
Enumerates list of elements, and adds them to tpb_search_results dictionary
"""
print "\n"
global tpb_torrent_links, tpb_search_results
#Update the tpb_torrent_links array with the returned torrents
tpb_torrent_page_soup = bs4.BeautifulSoup(source, "html.parser") #Create Beautiful Soup object
for link in tpb_torrent_page_soup.find_all('a'): #Find all anchor elements in page source
if link.get('href').startswith('/torrent'): #Only get links with /torrent as they're valid torrent pages
tpb_torrent_links.append(link.get('href')) #Set the results to tpb_torrent_links array
#If -t is supplied, bypass this section of code and go on to download the top torrent
if args.arg_take_top and tpb_torrent_links:
Download_Torrent_From_URL("{}/{}".format(tpb, tpb_torrent_links[0]))
#Print links in numeric order to the user
else:
for number,link in enumerate(tpb_torrent_links): #Enumerate the array so the numbers start at 0
tpb_search_results.update({number:link}) #Append results to tpb_search_results dictionary
print "({}) {}".format(number, path.basename(link))
if tpb_search_results: #If dict is not empty, continue with script
print "\n(98) Search again"
print "(99) Exit"
Get_User_Selection()
else: #If dict is empty (no results from search) re-run script
print "\nNo results found. Try again."
tpb_search_results = {}
tpb_torrent_links = []
args.arg_search_string = ''
Get_Search_URL() #Loop back to script start
#4
def Get_User_Selection():
"""
Asks for selection of torrent
"""
global tpb_torrent_links, tpb_search_results
#Ask user for numeric selection
try:
selection = int(raw_input("\n[+] Enter the number of the torrent to download.\n>>> "))
#Zeroize variables, loop back to script start
if selection == 98:
print "\nStarting over"
tpb_search_results = {}
tpb_torrent_links = []
args.arg_search_string = ''
Get_Search_URL()
#Exit script
elif selection == 99:
print "\nBye.\n"
exit()
#If valid number, move to next function to add to queue
elif selection in tpb_search_results:
user_torrent_selection = tpb_search_results[selection] #Updates variable based on key provided above, matches it with tpb_search_results dict
Download_Torrent_From_URL("{}/{}".format(tpb, user_torrent_selection))
#If anything other than 98, 99, or valid key number entered, loop back to selection input
else:
print "\nNot a valid number"
Get_User_Selection()
#If number isn't used, loop back to selection input
except ValueError:
print "\nThat is not a digit."
Get_User_Selection()
#5
def Download_Torrent_From_URL(tpb_torrent_url):
"""
Grabs the first magnet link and adds it to the queue via RPC to rpcserver
"""
tpb_magnet_links = []
tpb_torrent_page = requests.get(tpb_torrent_url, verify=False)
tpb_torrent_page_soup = bs4.BeautifulSoup(tpb_torrent_page.content, "html.parser")
for link in tpb_torrent_page_soup.find_all('a'):
if str(link.get('href')).startswith('magnet:?xt'):
tpb_magnet_links.append(link.get('href'))
tpb_magnet_link = tpb_magnet_links[0]
print "\n[+] Adding magnet link for torrent:\n\n{}".format(tpb_torrent_url)
transmissionrpc.Client(rpcserver[0]).add_torrent(tpb_magnet_link)
print "\n[.] Done!\n"
exit(0)
if __name__ == "__main__":
Check_Transmission_Listener()