def download_files(i, links): download_path = os.path.join(os.getcwd(), 'emi_files') if not os.path.exists(download_path): os.makedirs(download_path) files = [] for l in links: if not l.strip(): continue print "Downloading %s" % l # Determine filename from_file = open_url(l) content_disp = from_file.info().getheader('Content-Disposition') basename = None if content_disp: for item in content_disp.split(';'): item = item.strip() if item.strip().startswith('filename='): basename = item[len('filename="'):-len('"')] from_file.close() if basename is None: basename = os.path.basename(l) print "Filename %s" % basename new_path = os.path.join(download_path, "%s_%s" % (i, basename)) if not os.path.exists(new_path): tmp_path = download_external_url(l) os.rename(tmp_path, new_path) files.append((new_path, basename)) return files
def APS_connect(from_param, until_param=None, page=1, perpage=100): """ Manages connection to APS site and return connector. """ host = 'http://harvest.aps.org' function = '/content/journals/articles' from_param = 'from=' + str(from_param) until_param = 'until=' + str(until_param) params = "?" + from_param params += "&" params += until_param params += "&page=" + str(page) + "&per_page=" + str(perpage) # use the published date instead of metadata date params += "&date=published" url_to_open = host + function + params retries = 0 while retries < 5: retries += 1 try: write_message("Tries to open URL: %s" % (url_to_open,), verbose=5) conn = open_url(url_to_open) write_message("Success!", verbose=5) return conn except StandardError, e: if 'urlopen' in str(e) or 'URL could not be opened' in str(e): write_message("Error: APS could not be reached") if retries < 5: write_message("Retrying...") continue raise
def APS_connect(from_param, until_param=None, page=1, perpage=100): """ Manages connection to APS site and return connector. """ host = 'http://harvest.aps.org' function = '/content/journals/articles' from_param = 'from=' + str(from_param) params = "?" + from_param if(until_param): until_param = 'until=' + str(until_param) params += "&" params += until_param params += "&page=" + str(page) + "&per_page=" + str(perpage) url_to_open = host + function + params retries = 0 while retries < 5: retries += 1 try: write_message("Tries to open URL: %s" % (url_to_open,), verbose=5) conn = open_url(url_to_open) write_message("Success!", verbose=5) return conn except StandardError, e: if 'urlopen' in str(e) or 'URL could not be opened' in str(e): write_message("Error: APS could not be reached") if retries < 5: write_message("Retrying...") continue raise