Exemplo n.º 1
0
def get_show_url(title):
    ''' 
    Search Epguide Url for Show via Show Title.
    Use Google to search the url, this is also done on Epguide.
    '''
    for (name, url, desc) in google.find('allintitle: site:epguides.com %s' % title, 1):
        if url.startswith('http://epguides.com'):
              if re.search(title, name):
                    return url
    return None
Exemplo n.º 2
0
        
f = open("output", "r")
keyphrases = f.readlines()
f.close()
#Just an example hash
#input_file_hash = "c52ddbefea727246a1566f669974b144"
input_file_hash = "d41d8cd98f00b204e9800998ecf8427e"
input_text = ""
stopwords = set()
lemmatizer = None

query = ''
for tmp in keyphrases:
    query += "+(" + tmp[:-1] + ")"

urls = google.find(query, filetype = "pdf")

files = []
number = 0
for url in urls:
    try:
        number += 1
        print("\n{0})".format(number), end = "")
        path = download(url, directory = "./TMP")
        f = file(url, path)
        if (path == ''):
            f.processed = -1
        files.append(f)
    except:
        print("Download\n\"{0}\"\nfailed\n".format(url))
        f.processed = -1