Пример #1
0
def download_files(i, links):
    download_path = os.path.join(os.getcwd(), 'emi_files')
    if not os.path.exists(download_path):
        os.makedirs(download_path)

    files = []
    for l in links:
        if not l.strip():
            continue
        print "Downloading %s" % l
        # Determine filename
        from_file = open_url(l)
        content_disp = from_file.info().getheader('Content-Disposition')
        basename = None
        if content_disp:
            for item in content_disp.split(';'):
                item = item.strip()
                if item.strip().startswith('filename='):
                    basename = item[len('filename="'):-len('"')]
        from_file.close()
        if basename is None:
            basename = os.path.basename(l)
        print "Filename %s" % basename
        new_path = os.path.join(download_path, "%s_%s" % (i, basename))
        if not os.path.exists(new_path):
            tmp_path = download_external_url(l)
            os.rename(tmp_path, new_path)
        files.append((new_path, basename))
    return files
Пример #2
0
def APS_connect(from_param, until_param=None, page=1, perpage=100):
    """
    Manages connection to APS site and return connector.
    """
    host = 'http://harvest.aps.org'
    function = '/content/journals/articles'

    from_param = 'from=' + str(from_param)
    until_param = 'until=' + str(until_param)

    params = "?" + from_param
    params += "&"
    params += until_param

    params += "&page=" + str(page) + "&per_page=" + str(perpage)
    # use the published date instead of metadata date
    params += "&date=published"
    url_to_open = host + function + params
    retries = 0
    while retries < 5:
        retries += 1
        try:
            write_message("Tries to open URL: %s" % (url_to_open,), verbose=5)
            conn = open_url(url_to_open)
            write_message("Success!", verbose=5)
            return conn
        except StandardError, e:
            if 'urlopen' in str(e) or 'URL could not be opened' in str(e):
                write_message("Error: APS could not be reached")
                if retries < 5:
                    write_message("Retrying...")
                continue
            raise
Пример #3
0
def APS_connect(from_param, until_param=None, page=1, perpage=100):
    """
    Manages connection to APS site and return connector.
    """
    host = 'http://harvest.aps.org'
    function = '/content/journals/articles'

    from_param = 'from=' + str(from_param)
    params = "?" + from_param
    if(until_param):
        until_param = 'until=' + str(until_param)
        params += "&"
        params += until_param

    params += "&page=" + str(page) + "&per_page=" + str(perpage)
    url_to_open = host + function + params
    retries = 0
    while retries < 5:
        retries += 1
        try:
            write_message("Tries to open URL: %s" % (url_to_open,), verbose=5)
            conn = open_url(url_to_open)
            write_message("Success!", verbose=5)
            return conn
        except StandardError, e:
            if 'urlopen' in str(e) or 'URL could not be opened' in str(e):
                write_message("Error: APS could not be reached")
                if retries < 5:
                    write_message("Retrying...")
                continue
            raise
Пример #4
0
def download_files(i, links):
    download_path = os.path.join(os.getcwd(), 'emi_files')
    if not os.path.exists(download_path):
        os.makedirs(download_path)

    files = []
    for l in links:
        if not l.strip():
            continue
        print "Downloading %s" % l
        # Determine filename
        from_file = open_url(l)
        content_disp = from_file.info().getheader('Content-Disposition')
        basename = None
        if content_disp:
            for item in content_disp.split(';'):
                item = item.strip()
                if item.strip().startswith('filename='):
                    basename = item[len('filename="'):-len('"')]
        from_file.close()
        if basename is None:
            basename = os.path.basename(l)
        print "Filename %s" % basename
        new_path = os.path.join(download_path, "%s_%s" % (i, basename))
        if not os.path.exists(new_path):
            tmp_path = download_external_url(l)
            os.rename(tmp_path, new_path)
        files.append((new_path, basename))
    return files