def org_top_categories(orgs):
    g = Graph()
    for org_name in orgs:
        org_uri = waan_uri(org_name)
        ln = local_name(org_uri)
        top_cat = load_incites_json_file(org_name, 'categories-by-year')
        if len(top_cat) == 0:
            logger.warning("{} file is empty.".format(org_name))
            continue
        for item in top_cat:
            cat = item['category']
            for tc_yr in item['counts']:
                count = tc_yr['count']
                year = tc_yr['year']
                category_uri = get_category_uri(cat)
                curi = D['topcategory-'] + ln + slugify(cat) + '-{}'.format(
                    year)
                g.add((curi, RDF.type, WOS.InCitesTopCategory))
                g.add(
                    (curi, RDFS.label, Literal("{} - {}".format(org_name,
                                                                cat))))
                g.add((curi, WOS.number, Literal(count)))
                g.add((curi, WOS.year, Literal(year)))
                g.add((curi, VIVO.relates, category_uri))
                g.add((curi, VIVO.relates, org_uri))
    ng = settings.INCITES_TOP_CATEGORIES
    backend.sync_updates(ng, g)
    return True
示例#2
0
def map_categories():
    jrn_key = get_journals()
    cat_key = read_categories(CATEGORY_FILE)
    g = Graph()
    # jrnls to categories
    g += map_journals_to_categories(jrn_key, cat_key)
    added, removed = backend.sync_updates(CATEGORY_NG, g)
    return added, removed
def org_total_counts(orgs):
    g = Graph()
    for org_name in orgs:
        org_uri = waan_uri(org_name)
        ln = local_name(org_uri)
        pcounts = load_incites_json_file(org_name, 'total')
        if len(pcounts) == 0:
            logger.warning("{} file is empty.".format(org_name))
            continue
        for item in pcounts:
            curi = D['pubcount-' + ln + '-' + str(item['year'])]
            g.add((curi, RDF.type, WOS.InCitesPubPerYear))
            g.add((curi, RDFS.label,
                   Literal("{} - {}".format(item['year'], item['count']))))
            g.add((curi, WOS.number, Literal(item['count'])))
            g.add((curi, WOS.year, Literal(item['year'])))
            g.add((org_uri, VIVO.relates, curi))
    ng = settings.INCITES_PUB_YEAR_COUNTS
    backend.sync_updates(ng, g)
    return True
示例#4
0
def process(triple_files, format="nt", dry=False, sync=False, sleep=10, size=DEFAULT_BATCH_SIZE):
    vstore = backend.get_store()
    for fpath in triple_files:
        g = Graph()
        g.parse(source=fpath, format=format)
        named_graph = NG_BASE + fpath.split("/")[-1].split(".")[0]
        logger.info("Processing updates with {} triples to {} and batch size {}.".format(len(g), named_graph, size))
        if dry is True:
            logger.info("Dry run. No changes made.")
        else:
            if sync is True:
                logger.info("Syncing graph to {}.".format(named_graph))
                added, removed = backend.sync_updates(named_graph, g, size=size)
            else:
                logger.info("Posting graph as updates to {}.".format(named_graph))
                added = vstore.bulk_add(named_graph, g, size=size)
                removed = 0
            if (added > 0) or (removed > 0):
                if sleep > 0:
                    logger.info("Sleeping for {} seconds between files.".format(sleep))
                    time.sleep(sleep)
            else:
                logger.info("No changes made to {}.".format(named_graph))
    return True