def _download_torrent_file(self, desired_item_name, tracker_results): downloaded = False for tracker_name, tracker_url in tracker_results.items(): if not downloaded: torrent_file_name = '%s.torrent' % \ desired_item_name.replace(' ', '') file_path = os.path.join(os.getcwd(), torrent_file_name) print 'Downloading torrent file from %s...' % \ tracker_name tracker_file = "%s.py" % tracker_name tracker_path = os.path.join(os.path.dirname(__file__), 'trackers', tracker_file) tracker = self._get_tracker_object(tracker_path) #url is the actual torrent's url on the tracker's site url = tracker.extract_download_url(tracker_url) if url: try: FIVE_SECONDS = 5 web_file = urllib2.urlopen(url, None, FIVE_SECONDS) data = web_file.read() web_file.close() if self._valid_torrent_file(data): try: utils.write_file(file_path, data) downloaded = True except IOError: downloaded = False except urllib2.URLError: downloaded = False if downloaded: return file_path else: raise DownloaderError('Unable to download from any tracker')
def _find_trackers(self, secondary_results_link, attempts=0): '''Identifies trackers that have the file that we can download from''' found_trackers = {} if attempts < 3: try: sock = urllib2.urlopen(secondary_results_link) html = sock.read() sock.close() except urllib2.URLError: attempts += 1 found_trackers = self._find_trackers(\ secondary_results_link, attempts) soup = BeautifulSoup(html) #all possible links on the page possible_trackers = soup.findAll('a') for possible_tracker in possible_trackers: tracker = remove_html_tags(str(possible_tracker)).split() if tracker: #tracker[0] is the name of the tracker stripped_tracker = tracker[0].replace('.com', '').replace( '.org', '').replace('.se', '') if stripped_tracker in self._trackers: #link is 'href="http://whatever.com' link = str(possible_tracker).split()[1] first_quote = link.index('"') + 1 second_quote = link.index('"', first_quote) tracker_url = link[first_quote:second_quote] found_trackers[stripped_tracker] = tracker_url if found_trackers == {}: raise DownloaderError('No known trackers') return found_trackers
def _general_result_link(self, parsed_general_results, desired_item_name): """Returns a url for the 'choose a tracker' page on torrentz.com""" for name, link in parsed_general_results.items(): if desired_item_name.lower() in name.lower(): return link raise DownloaderError('Desired item not in search results')
def _parse_general_search(self, search_results): '''Finds the item to download from meta-search results''' parsed_results = {} for item in search_results('item'): description = str(item.description) seeds_index = description.index('Seeds') + 7 cut_description = description[seeds_index:] space_index = cut_description.index(' ') seeds = cut_description[:space_index] seeds = seeds.replace(",", "") seeds = int(seeds) file_type = str(item.category).lower() bad_file_types = ['flac', 'wma'] if file_type not in bad_file_types and seeds >= 5: title = item.title title = remove_html_tags(str(title)) title = remove_entities(title) #guid is the url of the 'choose a tracker' #page on torrentz guid = item.guid guid = remove_html_tags(str(guid)) parsed_results[title] = guid if parsed_results == {}: raise DownloaderError('No valid results for search term') return parsed_results
def extract_download_url(self, url): start_index = url.index('torrent') start_index += 8 pirate_num = url[start_index:] pirate_title = self._pirate_title(url) if not pirate_title: raise DownloaderError('Unable to parse tracker site') download_url = 'http://torrents.thepiratebay.se/'+\ '%s/%s.%s.TPB.torrent' % (pirate_num, pirate_title, pirate_num) return download_url
def _pirate_title(self, url): try: sock = urllib2.urlopen(url) html = sock.read() sock.close() except URLError: raise DownloaderError('Connection issue') soup = BeautifulSoup(html) title = soup.find('div', {'id': 'title'}) pirate_title = None if title: formatted_title = str(title) formatted_title = remove_html_tags(formatted_title) formatted_title = formatted_title.strip() formatted_title = formatted_title.replace(',', '_') pirate_title = formatted_title.replace(' ', '_') return pirate_title
class DownloaderTest(unittest.TestCase): def setUp(self): self._mock_listdir = Mock(return_value=['test.py']) self._fake_soup = BeautifulSoup(search_results) self._correct_general_results = \ {'The Beatles Greatest Hits Remastered/2009/MP3 Bubanee': 'http://www.torrentz.com/0898a4b562c1098eb69b9b801c61a51d788df0f5'} self._correct_link = 'http://www.torrentz.com/0898a4b562c109' + \ '8eb69b9b801c61a51d788df0f5' self._correct_trackers = { 'torrenthound': 'http://www.torrenthound.com/hash/0898a4b5' + \ '62c1098eb69b9b801c61a51d788df0f5/torrent-info/The-Beatl' + \ 'es-2009-Greatest-Hits-CDRip-Remastered-Bubanee-', 'btmon': 'http://www.btmon.com/Audio/Unsorted/The_Beatles_' + \ '2009_Remastered_Greatest_Hits_41_Songs_CDRips_Bubanee.t' + \ 'orrent.html', 'btjunkie': 'http://btjunkie.org/torrent/The-Beatles-Great' + \ 'est-Hits-Remastered-2009-MP3-Bubanee/43580898a4b562c109' + \ '8eb69b9b801c61a51d788df0f5', 'thepiratebay': 'http://thepiratebay.org/torrent/5079924', 'fenopy': 'http://fenopy.com/torrent/The+Beatles+2009+Grea' + \ 'test+Hits+41+Songs+CDRip+Remastered+/MzYzODQxMA'} torrent_file_name = 'GreatestHits.torrent' self._downloads_folder = os.getcwd() self._correct_file_path = os.path.join(self._downloads_folder, torrent_file_name) self._downloader_ut = downloader.Downloader() def test_get_trackers(self): with patch('os.listdir', self._mock_listdir): trackers = self._downloader_ut._get_trackers() self.assertEqual(['test'], trackers) @patch.object(urllib2, 'urlopen', FakeSearchUrlOpen) def test_torrentz_search(self): search_term = 'TeSt QuERY' result = self._downloader_ut._torrentz_search(search_term) self.assertEquals(self._fake_soup, result) def test_parse_general_search_results__good_results(self): parsed_results = \ self._downloader_ut._parse_general_search(self._fake_soup) self.assertEquals(self._correct_general_results, parsed_results) def test_parse_general_search_results__bad_results(self): empty_soup = BeautifulSoup('<html></html>') self.assertRaises(DownloaderError, self._downloader_ut._parse_general_search, empty_soup) def test_general_results_link__good_result(self): parsed_results = \ self._downloader_ut._parse_general_search(self._fake_soup) link = \ self._downloader_ut._general_result_link(parsed_results, 'tHe bEatlEs') self.assertEquals(self._correct_link, link) def test_general_results_link__bad_result(self): parsed_results = \ self._downloader_ut._parse_general_search(self._fake_soup) self.assertRaises(DownloaderError, self._downloader_ut._general_result_link, parsed_results, 'huh?') @patch.object(urllib2, 'urlopen', FakeTrackerUrlOpen) def test_find_trackers__good_results(self): found_trackers = \ self._downloader_ut._find_trackers(self._correct_link) self.assertDictEqual(self._correct_trackers, found_trackers) @patch.object(urllib2, 'urlopen', FakeEmptyUrlOpen) def test_find_trackers__bad_results(self): with patch('utils.write_file', Mock): self.assertRaises(DownloaderError, self._downloader_ut._find_trackers, 'http://nada.com') @patch.object(urllib2, 'urlopen', FakeUrlOpenWithError) def test_find_trackers__URLError(self): self.assertRaises(DownloaderError, self._downloader_ut._find_trackers, self._correct_link) @patch.object(urllib2, 'urlopen', FakeEmptyUrlOpen) @patch.object(downloader.Downloader, '_get_tracker_object', fake_tracker_giver) def test_download_torrent_file__work_first_time(self): with patch('utils.write_file', Mock): result_file_path = \ self._downloader_ut._download_torrent_file('Greatest Hits', self._correct_trackers) self.assertEquals(self._correct_file_path, result_file_path) first_tracker, first_tracker_url = \ self._correct_trackers.items()[0] download_url = FakeEmptyUrlOpen.call_args[0][0] self.assertEquals(download_url, first_tracker_url) @patch.object(urllib2, 'urlopen', fake_changing_url_open.call) @patch.object(downloader.Downloader, '_get_tracker_object', fake_tracker_giver) def test_download_torrent_file__work_second_time(self): with patch('utils.write_file', Mock): result_file_path = \ self._downloader_ut._download_torrent_file('Greatest Hits', self._correct_trackers) self.assertEquals(self._correct_file_path, result_file_path) first_tracker, first_tracker_url = \ self._correct_trackers.items()[1] download_url = fake_changing_url_open.args[0] self.assertEquals(download_url, first_tracker_url) @patch.object(urllib2, 'urlopen', FakeUrlOpenWithError) @patch.object(downloader.Downloader, '_get_tracker_object', fake_tracker_giver) def test_download_torrent_file__doesnt_work(self): self.assertRaises(DownloaderError, self._downloader_ut._download_torrent_file, 'Greatest Hits', self._correct_trackers) @patch.object(downloader.Downloader, '_torrentz_search', Mock()) @patch.object(downloader.Downloader, '_parse_general_search', Mock()) @patch.object(downloader.Downloader, '_general_result_link', Mock()) @patch.object(downloader.Downloader, '_find_trackers', Mock()) @patch.object(downloader.Downloader, '_download_torrent_file', Mock()) @patch.object(downloader.Downloader, '_open_torrent', Mock()) def test_download__success(self): with patch('utils.write_file', Mock): result = self._downloader_ut.download('Greatest Hits', 'Greatest Hits') result = self._downloader_ut.download('Greatest Hits', 'Greatest Hits') @patch.object(downloader.Downloader, '_torrentz_search', Mock()) @patch.object(downloader.Downloader, '_parse_general_search', Mock(side_effect=DownloaderError(''))) @patch.object(downloader.Downloader, '_general_result_link', Mock()) @patch.object(downloader.Downloader, '_find_trackers', Mock()) @patch.object(downloader.Downloader, '_download_torrent_file', Mock()) @patch.object(downloader.Downloader, '_open_torrent', Mock()) def test_download__no_success(self): with patch('utils.write_file', Mock): result = self._downloader_ut.download('Greatest Hits', 'Greatest Hits') self.assertEquals(result, 1) def test_get_tracker_object(self): correct_url = Tracker().extract_download_url('test_url') tracker_path = os.path.join(os.path.dirname(__file__), 'test_resources/fake_tracker.py') result_tracker = \ self._downloader_ut._get_tracker_object(tracker_path) result_tracker_url = \ result_tracker.extract_download_url('test_url') self.assertEquals(correct_url, result_tracker_url)