示例#1
0
def create_resources(json=None):
    rf = ResourceFinder(ts_url=triplestore_url,
                        base_dir=base_dir,
                        base_iri=base_iri,
                        tmp_dir=temp_dir_for_rdf_loading,
                        context_map={context_path: context_file_path},
                        default_dir=default_dir,
                        dir_split=dir_split_number,
                        n_file_item=items_per_file)
    of = None
    if orcid_conf_path is not None:
        of = ORCIDFinder(orcid_conf_path)
    cp = CrossrefProcessor(base_iri, context_path, info_dir, json, rf, of,
                           items_per_file, "")
    cdh = CrossrefDataHandler(graph_set=cp.graph_set(), resource_finder=rf)

    return rf, of, cp, cdh
示例#2
0
def store_all(gs):
    prov = ProvSet(
        gs, base_iri, context_path, default_dir, full_info_dir,
        ResourceFinder(base_dir=base_dir,
                       base_iri=base_iri,
                       tmp_dir=temp_dir_for_rdf_loading,
                       context_map={context_path: context_file_path},
                       dir_split=dir_split_number,
                       n_file_item=items_per_file,
                       default_dir=default_dir), dir_split_number,
        items_per_file, "")  # Prefix set to "" so as to avoid it for prov data
    prov.generate_provenance()

    print("Store the data for %s entities." % str(entity_count))
    res_storer = Storer(gs,
                        context_map={context_path: context_file_path},
                        dir_split=dir_split_number,
                        n_file_item=items_per_file,
                        default_dir=default_dir)

    prov_storer = Storer(prov,
                         context_map={context_path: context_file_path},
                         dir_split=dir_split_number,
                         n_file_item=items_per_file,
                         default_dir=default_dir)

    res_storer.store_all(base_dir, base_iri, context_path,
                         temp_dir_for_rdf_loading)

    prov_storer.store_all(base_dir, base_iri, context_path,
                          temp_dir_for_rdf_loading)

    print("Update the dataset description.")
    dset_handler = DatasetHandler(triplestore_url_real, context_path,
                                  context_file_path, base_iri, base_dir,
                                  full_info_dir, dataset_home,
                                  temp_dir_for_rdf_loading)
    dset_handler.update_dataset_info(gs)
示例#3
0
def update_all(g_set, remove_entity, full_info_dir):
    prov = ProvSet(
        g_set, base_iri, context_path, default_dir, full_info_dir,
        ResourceFinder(base_dir=base_dir,
                       base_iri=base_iri,
                       tmp_dir=temp_dir_for_rdf_loading,
                       context_map={context_path: context_file_path},
                       dir_split=dir_split_number,
                       n_file_item=items_per_file,
                       default_dir=default_dir), dir_split_number,
        items_per_file, "")
    prov.generate_provenance(do_insert=False, remove_entity=remove_entity)

    res_storer = Storer(g_set,
                        context_map={context_path: context_file_path},
                        dir_split=dir_split_number,
                        n_file_item=items_per_file,
                        default_dir=default_dir)

    prov_storer = Storer(prov,
                         context_map={context_path: context_file_path},
                         dir_split=dir_split_number,
                         n_file_item=items_per_file,
                         default_dir=default_dir)

    res_storer.store_all(base_dir,
                         base_iri,
                         context_path,
                         temp_dir_for_rdf_loading,
                         remove_data=True)

    prov_storer.store_all(base_dir, base_iri, context_path,
                          temp_dir_for_rdf_loading)

    dset_handler = DatasetHandler(triplestore_url_real, context_path,
                                  context_file_path, base_iri, base_dir, "",
                                  dataset_home, temp_dir_for_rdf_loading)
    dset_handler.update_dataset_info(g_set)
示例#4
0
    doi_to_remove = []
    doi_to_add = []
    for s, p, o in g.triples((None, LITERAL.hasLiteralValue, None)):
        o_str = str(o)
        lower_o_str = o_str.lower()
        if o_str != lower_o_str:
            doi_to_remove.append((s, p, o))
            doi_to_add.append((s, p, Literal(lower_o_str)))
    for s, p, o in doi_to_remove:
        g.remove((s, p, o))
    for s, p, o in doi_to_add:
        g.add((s, p, o))

    if not args.avoid:
        print("Check additional mapping in the oc/ccc triplestore")
        rf = ResourceFinder(ts_url=triplestore_url, default_dir=default_dir)
        with open(args.table, "a") as f:
            for s, p, o in g.triples((None, DATACITE.hasIdentifier, None)):
                if str(s) not in mapping_table:
                    is_doi = False
                    is_isbn = False
                    id_string = None
                    for s1, p2, o2 in g.triples((o, None, None)):
                        if p2 == DATACITE.usesIdentifierScheme:
                            if o2 == DATACITE.doi:
                                is_doi = True
                            elif o2 == DATACITE.isbn:
                                is_isbn = True
                        elif p2 == LITERAL.hasLiteralValue:
                            id_string = str(o2).strip()
示例#5
0
 for cur_file in sorted(cur_files):
     if s.can_proceed():
         if cur_file.endswith(".json"):
             cur_file_path = cur_dir + sep + cur_file
             cur_local_dir_path = re.sub(
                 "^([0-9]+-[0-9]+-[0-9]+-[0-9]+).+$", "\\1",
                 cur_file)
             with open(cur_file_path) as fp:
                 last_file = cur_file_path
                 last_local_dir = cur_local_dir_path
                 print("\n\nProcess file '%s'\n" % cur_file_path)
                 json_object = json.load(fp)
                 crp = CrossrefProcessor(
                     base_iri, context_path, full_info_dir,
                     json_object,
                     ResourceFinder(ts_url=triplestore_url,
                                    default_dir=default_dir),
                     ORCIDFinder(orcid_conf_path), items_per_file,
                     supplier_prefix)
                 result = crp.process()
                 if result is not None:
                     prov = ProvSet(
                         result, base_iri, context_path,
                         default_dir, full_info_dir,
                         ResourceFinder(
                             base_dir=base_dir,
                             base_iri=base_iri,
                             tmp_dir=temp_dir_for_rdf_loading,
                             context_map={
                                 context_path: context_file_path
                             },
                             dir_split=dir_split_number,