You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

160 lines
4.4 KiB
Python

#!/usr/bin/env python
"""
The Pirate Bay scraper -
Uses 3 external libraries for scraping HTML elements from ThePirateBay and interacting with transmission-daemon.
Asks user for a search selection, offers a list of choices, and grabs the magnet link for the selection.
"""
__author__ = 'LANCE - https://github.com/lalanza808'
##################################################
# Libraries
# Built-in libraries
from os import path
# 3rd party libraries
import requests
import bs4
import transmissionrpc
##################################################
# Variables
# Dictionaries/Arrays for storing search results
results = {}
links = []
choice = ""
# Current/working PirateBay URL
tpb = "https://thepiratebay.se"
# IP of the machine running transmission. Probably localhost
transmissionServer = ''
# Squelch HTTPS insecure warnings
requests.packages.urllib3.disable_warnings()
##################################################
# Functions
def checkTransmission():
"""
Checks to see if transmission-daemon is running on transmissionServer
and and initiates the function to ask user for input
"""
try:
transmissionrpc.Client(transmissionServer, port=9091)
getSearchURL()
except KeyboardInterrupt:
print "\n\nLater bro."
exit(1)
except transmissionrpc.error.TransmissionError:
print "[!] Transmission-daemon not running!"
exit(2)
def getSearchURL():
"""
Takes input string to search for on TPB.
Formats string into proper url
Gets HTML source of search page for use in the next function
"""
searchString = raw_input("[+] What would you like to search?\n>>> ")
searchURL = "{}/search/{}/0/7/0".format(tpb, searchString) #/0/7/0 tells TPB to sort descending by seeds
pageSource = requests.get(searchURL, verify=False).text #Use requests lib to fetch page source for bs4 parsing
analyzeURL(pageSource) #Run analyzeURL function, passing page source
def analyzeURL(source):
"""
Takes the page source and parses it with BeautifulSoup.
Finds all anchor elements on the page, pre-sorted by seeders
Enumerates list of elements, and adds them to results dictionary
"""
print "\n"
global links, results
pageSoup = bs4.BeautifulSoup(source) #Create Beautiful Soup object
for link in pageSoup.find_all('a'): #Find all anchor elements in page source
if link.get('href').startswith('/torrent'): #Filter items that don't start with /torrent
links.append(link.get('href')) #Set the initial results to array 'links'
for number,link in enumerate(links): #Enumerate the array so the numbers start at 0
results.update({number:link}) #Append results to results dictionary
print "({}) {}".format(number, path.basename(link))
if results: #If dict is not empty, continue with script
print "\n(98) Search again"
print "(99) Exit"
chooseTorrent()
else: #If dict is empty (no results from search) re-run script
print "\nNo results found. Try again."
results = {}
links = []
getSearchURL() #Loop back to script start
def chooseTorrent():
"""
Asks for selection of torrent, and prepares for the download
"""
global links, results
try:
selection = int(raw_input("\n[+] Enter the digit of the torrent to download.\n>>> "))
if selection == 98:
print "\nStarting over"
results = {}
links = []
getSearchURL() #Loop back to start
elif selection == 99:
print "\nBye.\n"
exit() #Quit script
elif selection in results: #If selection exists, set value to 'choice' variable
choice = results[selection] #Updates variable based on key provided above, matches it with results dict
downloadTorrent(choice)
else: #If anything other than 98, 99, or valid key number entered, loop back to selection input
print "\nNot a valid number"
chooseTorrent()
except ValueError:
print "\nThat is not a digit."
chooseTorrent()
def downloadTorrent(torrent):
"""
Grabs the first magnet link and initiates the download using the transmissionrpc python library
"""
magnetLinks = []
torrentPage = requests.get("{}/{}".format(tpb, torrent), verify=False)
torrentPageSoup = bs4.BeautifulSoup(torrentPage.content)
for link in torrentPageSoup.find_all('a'):
if str(link.get('href')).startswith('magnet:?xt'):
magnetLinks.append(link.get('href'))
magnetLink = magnetLinks[0]
print "\n[+] Adding magnet link for torrent:\n\n{}".format(torrent)
transmissionrpc.Client(transmissionServer).add_torrent(magnetLink)
print "\n[.] Done!\n"
if __name__ == "__main__":
checkTransmission()