示例#1
0
文件: upgrade.py 项目: GaretJax/irco
def main():
    log = get_logger()

    argparser = argparse.ArgumentParser('irco-import')
    argparser.add_argument('-v', '--verbose', action='store_true')
    argparser.add_argument('database')

    args = argparser.parse_args()

    sentry.context.merge({
        'tags': {
            'command': 'irco-upgrade',
        },
        'extra': {
            'parsed_arguments': args.__dict__,
        }
    })

    log.info('arguments_parsed', args=args)

    config = Config()
    config.set_main_option('script_location', 'irco:migrations')
    config.set_main_option('sqlalchemy.url', args.database)

    command.upgrade(config, 'head', sql=False, tag=None)
示例#2
0
文件: scrape.py 项目: GaretJax/irco
def main():
    log = get_logger()

    argparser = argparse.ArgumentParser("irco-scrape")
    argparser.add_argument("search_id")
    argparser.add_argument("output")
    argparser.add_argument("count", type=int, nargs="?", help="Deprecated")
    args = argparser.parse_args()

    sentry.context.merge({"tags": {"command": "irco-init"}, "extra": {"parsed_arguments": args.__dict__}})

    log.info("arguments_parsed", args=args)

    if not os.path.exists(args.output):
        os.makedirs(args.output)

    digits = 5

    for i, start in iterpages(MAX_RECORDS):
        dest = os.path.join(args.output, "savedrecs-{:05d}.csv".format(i))
        end = start + MAX_RECORDS
        print("{:{}d} - {:{}d} => {}".format(start + 1, digits, end, digits, dest))
        with open(dest, "wb") as fh:
            try:
                download(args.search_id, start + 1, end, fh)
            except AbortDownload:
                break
    os.remove(dest)
示例#3
0
def main():
    log = get_logger()

    pipelines = {
        'compendex': compendex.pipeline,
        'scopus': scopus.pipeline,
        'wos': wos.pipeline,
    }

    argparser = argparse.ArgumentParser('irco-import')
    argparser.add_argument('-v', '--verbose', action='store_true')
    argparser.add_argument('-i', '--input-format', choices=pipelines, required=True)
    argparser.add_argument('-e', '--encoding', default='utf8')
    argparser.add_argument('-a', '--include-ambiguous-affiliations',
                           action='store_true', dest='ambiguous')
    argparser.add_argument('source', nargs='+')
    argparser.add_argument('database')

    args = argparser.parse_args()

    sentry.context.merge({
        'tags': {
            'command': 'irco-import',
            'input_format': args.input_format,
        },
        'extra': {
            'parsed_arguments': args.__dict__,
        }
    })

    log.info('arguments_parsed', args=args)

    pipeline = pipelines[args.input_format](encoding=args.encoding,
                                            include_ambiguous=args.ambiguous)
    engine = create_engine(args.database, echo=args.verbose)
    Session = sessionmaker(bind=engine)

    count_before = Session().query(models.Publication).count()
    records = get_records(args.source, pipeline)
    imported, ignored = import_records(engine, records)
    count_after = Session().query(models.Publication).count()

    pipeline.add_metric('imported_records', 'Records added to the database',
                        imported)
    pipeline.add_metric('ignored_records',
                        'Ignored records (already imported)', ignored)
    pipeline.add_metric('before_import', 'Records count before import',
                        count_before)
    pipeline.add_metric('after_import', 'Records count after import',
                        count_after)
    print()
    print(pipeline.report())
示例#4
0
import itertools
import collections

import networkx as nx


from irco import logging

log = logging.get_logger()


def get_institutions(publication):
    institutions = set()

    for affiliation in publication.affiliations:
        institutions.add(affiliation.institution.name)

    return institutions


def create(session, publications):
    g = nx.Graph()

    papers_count = collections.Counter()
    collaborations_count = collections.Counter()

    for publication in publications:
        institutions = get_institutions(publication)
        g.add_nodes_from(institutions)
        papers_count.update(institutions)