def load(file, cat): """Load a series or an archive file.""" rdf = redif.load(redif.decode(ftp_get(settings.repec_ftp + file))) def key(r): r = dict(r) url = r['url'].rstrip('/') + '/' if 'url' in r else None return (file, cat, r['handle'], url) return [key(r) for r in rdf]
def load(url): """Download ReDIF papers.""" scheme = urlparse(url)[0] if scheme == 'ftp': papers = load_ftp(url) elif scheme in ['http', 'https']: papers = load_http(url) else: raise RuntimeError('Unknown scheme {}'.format(scheme)) papers = redif.load(papers) for p in papers: fields = set(k for k, v in p) for f in ['handle', 'template-type']: if f not in fields: raise RuntimeError('{} is missing'.format(f)) if len(papers) == 0: raise RuntimeError('Empty series') return papers
def collect_names(files): """Download files and collect handle -> name associations.""" handles = {} for i, file in enumerate(files): print(f'[{i+1}/{len(files)}] {file}...') try: rdf = redif.load(redif.decode(ftp_get(settings.repec_ftp + file))) for record in rdf: record = collect(record) if 'name' in record: # Account for inconsistent capitalization across records handle = record['handle'][0].lower() newname = record['name'][0] oldname = handles.setdefault(handle, newname) if newname != oldname: print(f'Conflicting names: "{oldname}" vs. "{newname}"' f' in {handle}') except Exception: print(f'Skipping {file} due to errors') return handles