def org_top_categories(orgs): g = Graph() for org_name in orgs: org_uri = waan_uri(org_name) ln = local_name(org_uri) top_cat = load_incites_json_file(org_name, 'categories-by-year') if len(top_cat) == 0: logger.warning("{} file is empty.".format(org_name)) continue for item in top_cat: cat = item['category'] for tc_yr in item['counts']: count = tc_yr['count'] year = tc_yr['year'] category_uri = get_category_uri(cat) curi = D['topcategory-'] + ln + slugify(cat) + '-{}'.format( year) g.add((curi, RDF.type, WOS.InCitesTopCategory)) g.add( (curi, RDFS.label, Literal("{} - {}".format(org_name, cat)))) g.add((curi, WOS.number, Literal(count))) g.add((curi, WOS.year, Literal(year))) g.add((curi, VIVO.relates, category_uri)) g.add((curi, VIVO.relates, org_uri)) ng = settings.INCITES_TOP_CATEGORIES backend.sync_updates(ng, g) return True
def map_categories(): jrn_key = get_journals() cat_key = read_categories(CATEGORY_FILE) g = Graph() # jrnls to categories g += map_journals_to_categories(jrn_key, cat_key) added, removed = backend.sync_updates(CATEGORY_NG, g) return added, removed
def org_total_counts(orgs): g = Graph() for org_name in orgs: org_uri = waan_uri(org_name) ln = local_name(org_uri) pcounts = load_incites_json_file(org_name, 'total') if len(pcounts) == 0: logger.warning("{} file is empty.".format(org_name)) continue for item in pcounts: curi = D['pubcount-' + ln + '-' + str(item['year'])] g.add((curi, RDF.type, WOS.InCitesPubPerYear)) g.add((curi, RDFS.label, Literal("{} - {}".format(item['year'], item['count'])))) g.add((curi, WOS.number, Literal(item['count']))) g.add((curi, WOS.year, Literal(item['year']))) g.add((org_uri, VIVO.relates, curi)) ng = settings.INCITES_PUB_YEAR_COUNTS backend.sync_updates(ng, g) return True
def process(triple_files, format="nt", dry=False, sync=False, sleep=10, size=DEFAULT_BATCH_SIZE): vstore = backend.get_store() for fpath in triple_files: g = Graph() g.parse(source=fpath, format=format) named_graph = NG_BASE + fpath.split("/")[-1].split(".")[0] logger.info("Processing updates with {} triples to {} and batch size {}.".format(len(g), named_graph, size)) if dry is True: logger.info("Dry run. No changes made.") else: if sync is True: logger.info("Syncing graph to {}.".format(named_graph)) added, removed = backend.sync_updates(named_graph, g, size=size) else: logger.info("Posting graph as updates to {}.".format(named_graph)) added = vstore.bulk_add(named_graph, g, size=size) removed = 0 if (added > 0) or (removed > 0): if sleep > 0: logger.info("Sleeping for {} seconds between files.".format(sleep)) time.sleep(sleep) else: logger.info("No changes made to {}.".format(named_graph)) return True