Exemplo n.º 1
0
def load(file, cat):
    """Load a series or an archive file."""
    rdf = redif.load(redif.decode(ftp_get(settings.repec_ftp + file)))

    def key(r):
        r = dict(r)
        url = r['url'].rstrip('/') + '/' if 'url' in r else None
        return (file, cat, r['handle'], url)

    return [key(r) for r in rdf]
Exemplo n.º 2
0
def load(url):
    """Download ReDIF papers."""
    scheme = urlparse(url)[0]
    if scheme == 'ftp':
        papers = load_ftp(url)
    elif scheme in ['http', 'https']:
        papers = load_http(url)
    else:
        raise RuntimeError('Unknown scheme {}'.format(scheme))
    papers = redif.load(papers)
    for p in papers:
        fields = set(k for k, v in p)
        for f in ['handle', 'template-type']:
            if f not in fields:
                raise RuntimeError('{} is missing'.format(f))
    if len(papers) == 0:
        raise RuntimeError('Empty series')
    return papers
Exemplo n.º 3
0
def collect_names(files):
    """Download files and collect handle -> name associations."""
    handles = {}
    for i, file in enumerate(files):
        print(f'[{i+1}/{len(files)}] {file}...')
        try:
            rdf = redif.load(redif.decode(ftp_get(settings.repec_ftp + file)))
            for record in rdf:
                record = collect(record)
                if 'name' in record:
                    # Account for inconsistent capitalization across records
                    handle = record['handle'][0].lower()
                    newname = record['name'][0]
                    oldname = handles.setdefault(handle, newname)
                    if newname != oldname:
                        print(f'Conflicting names: "{oldname}" vs. "{newname}"'
                              f' in {handle}')
        except Exception:
            print(f'Skipping {file} due to errors')
    return handles