From 0d7da8ac1b1013203daccf6b5c50d07f5e9ed72a Mon Sep 17 00:00:00 2001
From: lalanza808 <catacombkid1@gmail.com>
Date: Fri, 27 Mar 2015 13:42:20 -0700
Subject: [PATCH] first push

---
 pirate.py | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 readme.md |  24 +++++++++
 2 files changed, 168 insertions(+)
 create mode 100755 pirate.py
 create mode 100644 readme.md

diff --git a/pirate.py b/pirate.py
new file mode 100755
index 0000000..f463a65
--- /dev/null
+++ b/pirate.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python
+
+
+"""
+The Pirate Bay scraper -
+
+Uses 2 external libraries for scraping HTML elements from ThePirateBay.
+Asks user for a search selection, offers a list of choices, and grabs the magent link for the selection.
+
+"""
+
+__author__ = 'LANCE'
+
+
+# Built-in libraries
+from platform import system as operatingSystem
+from os import path, system
+from urllib import urlretrieve
+from re import search
+from time import sleep
+
+# 3rd party libraries
+import requests
+import bs4
+
+
+results = {}
+links = []
+choice = ""
+tpb = "https://thepiratebay.se"
+	
+def getSearchURL():
+	"""
+	Takes input string to search for on TPB.
+	Formats string into proper url
+	"""
+	searchString = raw_input("[+] What would you like to search?\n>")
+	searchURL = "{}/search/{}/0/7/0".format(tpb, searchString) #/0/7/0 tells TPB to sort descending by seeds
+	pageSource = requests.get(searchURL, verify=False).text #Use requests lib to fetch page source for bs4 parsing
+	analyzeURL(pageSource) #Run analyzeURL function, passing page source
+	
+
+def analyzeURL(source):
+	"""
+	Takes the page source and parses it with BeautifulSoup.
+	Finds all anchor elements on the page, pre-sorted by seeders
+	Enumerates list of elements, and adds them to results dictionary
+	"""
+	print "\n"
+	global links
+	global results
+
+	pageSoup = bs4.BeautifulSoup(source) #Create Beautiful Soup object
+	for link in pageSoup.find_all('a'): #Find all anchor elements in page source
+		if link.get('href').startswith('/torrent'): #Filter items that don't start with /torrent
+			links.append(link.get('href')) #Set the initial results to array 'links'
+
+	for number,link in enumerate(links): #Enumerate the array so the numbers start at 0
+		results.update({number:link}) #Append results to results dictionary
+		print "({}) {}".format(number, path.basename(link))
+
+	if results: #If dict is not empty, continue with script
+		print "\n(98) Search again"
+		print "(99) Exit"
+		chooseTorrent()
+	else: #If dict is empty (no results from search) re-run script
+		print "\nNo results found. Try again."
+		results = {}
+		links = []
+		getSearchURL() #Loop back to script start
+
+	
+def chooseTorrent():
+	"""
+	Asks for selection of torrent, and prepares for the download
+	"""
+	global links, results
+	try:
+		selection = int(raw_input("\n*** Enter the digit of the torrent to download.\n>"))
+		if selection == 98:
+			print "\nStarting over"
+			results = {}
+			links = []
+			getSearchURL() #Loop back to start
+		elif selection == 99:
+			print "\nBye.\n"
+			exit() #Quit script
+		elif selection in results: #If selection exists, set value to 'choice' variable
+			choice = results[selection] #Updates variable based on key provided above, matches it with results dict
+			downloadTorrent(choice)
+		else: #If anything other than 98, 99, or valid key number entered, loop back to selection input
+			print "\nNot a valid number"
+			chooseTorrent()
+	except ValueError:
+		print "\nThat is not a digit."
+		chooseTorrent()
+	
+
+def downloadTorrent(torrent):
+	"""
+	Grabs the first magnet link and initiates the download
+	"""
+	# TPB no longer uses torrents as subdomain. Changing script to direct add magnet links
+	#torrentName = search("/torrent/(.*)", torrent) #Strip out first portion of string (/torrent/)
+	#torrentURL = "https://torrents.thepiratebay.se/{}.torrent".format(torrentName.group(1)) #TPB uses subdomain 'torrents' to host .torrent files
+
+	magnetLinks = []
+	torrentPage = requests.get("{}/{}".format(tpb, torrent), verify=False)
+	torrentPageSoup = bs4.BeautifulSoup(torrentPage.content)
+	for link in torrentPageSoup.find_all('a'):
+		if str(link.get('href')).startswith('magnet:?xt'):
+			magnetLinks.append(link.get('href'))
+	torrentURL = magnetLinks[0]
+	print "\n*** Adding magnet link:\n\n{}".format(torrentURL)
+	#urlretrieve(torrentURL, path.basename(torrentURL)) #Save torrent file as same name
+	checkOS(path.basename(torrentURL)) #Check host operating system for proper torrent client
+
+
+def checkOS(torrentDownload):
+	"""
+	Checks host operating system and determines how to start the torrent transfer
+	"""
+	if operatingSystem() == "Windows": #Windows is finished at this point
+		openCode = system("start {}".format(torrentDownload)) #Simply open it, default torrent client opens
+		if openCode == 0:
+			exit(0) #Clean exit
+		else:
+			print "\nYou need a torrent client installed.\n"
+			exit(1)
+	else:
+		checkTransmission(torrentDownload) #For linux systems running transmission-cli
+		
+def checkTransmission(torrentDownload):
+	"""
+	Checks for the existence of transmission-remote, necessary for starting torrents
+	"""
+	whichCode = system("which transmission-remote")
+	print "\n"
+	if whichCode == 0:
+		system("transmission-remote localhost:9091 -a {}".format(torrentDownload))
+		
+	
+if __name__ == "__main__":
+	getSearchURL()
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..ff46cd5
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,24 @@
+# Pirate
+
+Command line HTML parser/scraper used for grabbing torrents from [ThePirateBay](https://thepiratebay.se).
+
+Initially made for the older version with torrents hosted from a separate subdomain, but now modified for grabbing magnet links instead.
+
+----
+
+## Usage
+
+Place the script somewhere in your executable path. I like ~/bin
+
+```
+$ mkdir ~/bin
+$ echo 'PATH=$PATH:~/bin' > .bashrc
+```
+
+Then just run it
+
+```
+$ pirate.py
+```
+
+The rest is self explanatory