def get_show_url(title): ''' Search Epguide Url for Show via Show Title. Use Google to search the url, this is also done on Epguide. ''' for (name, url, desc) in google.find('allintitle: site:epguides.com %s' % title, 1): if url.startswith('http://epguides.com'): if re.search(title, name): return url return None
f = open("output", "r") keyphrases = f.readlines() f.close() #Just an example hash #input_file_hash = "c52ddbefea727246a1566f669974b144" input_file_hash = "d41d8cd98f00b204e9800998ecf8427e" input_text = "" stopwords = set() lemmatizer = None query = '' for tmp in keyphrases: query += "+(" + tmp[:-1] + ")" urls = google.find(query, filetype = "pdf") files = [] number = 0 for url in urls: try: number += 1 print("\n{0})".format(number), end = "") path = download(url, directory = "./TMP") f = file(url, path) if (path == ''): f.processed = -1 files.append(f) except: print("Download\n\"{0}\"\nfailed\n".format(url)) f.processed = -1