Пример #1
0
def diff_meta(specs):
    """
    :param specs: is of type dictionary.
    For this, it needs the following keys:
        lens_name: the name of this lens
        lens: the URI of the lens about to be created
        lens_target_triples: predicate object for each graph directly involved in the lens
        triples: The number of triples in this graph
        expectedTriples: Because of possible triple removal, this provides the sum of all correspondences
        from all direct target graphs
        removedDuplicates: The number of removed triples in case of duplicates
        insert_query: the insert query that let to the creation of the current lens.
    :return:
    """

    specs[St.triples] = Qry.get_namedgraph_size(specs[St.lens],
                                                isdistinct=False)

    metadata = """
    ##################################################################
    ### METADATA
    ### for the lens: {0}
    ##################################################################
    PREFIX rdfs:        <{1}>
    PREFIX alivocab:    <{2}>
    PREFIX void:        <{3}>
    PREFIX bdb:         <{4}>
    PREFIX lensOp:      <{5}>
    PREFIX specific:    <{12}>

    INSERT DATA
    {{
        ### RESOURCE
        <{0}>
            a                                   bdb:Lens ;
            rdfs:label                          "{10}" ;
            alivocab:operator                   lensOp:difference ;
            void:triples                        {6} ;
            void:subjectsTarget                 <{7}> ;
            void:objectsTarget                  <{8}> ;
            alivocab:singletonGraph             specific:{10} ;
            bdb:assertionMethod                 <{9}{10}> .

        ### ASSERTION METHOD"
        <{9}{10}>
            alivocab:sparql           \"\"\"{11}\"\"\" .
    }}""".format(specs[St.lens], Ns.rdfs, Ns.alivocab, Ns.void, Ns.bdb,
                 Ns.lensOp, specs[St.triples], specs[St.subjectsTarget],
                 specs[St.objectsTarget], Ns.method, specs[St.lens_name],
                 specs[St.insert_query], Ns.singletons)

    # print metadata
    return metadata
Пример #2
0
def linkset_metadata(specs, display=False):

    extra = ""
    if St.reducer in specs[St.source] and len(
            specs[St.source][St.reducer]) > 0:
        extra += "\n        alivocab:subjectsReducer    <{}> ;".format(
            specs[St.source][St.reducer])

    if St.reducer in specs[St.target] and len(
            specs[St.target][St.reducer]) > 0:
        extra += "\n        alivocab:objectsReducer     <{}> ;".format(
            specs[St.target][St.reducer])

    if St.intermediate_graph in specs and len(
            specs[St.intermediate_graph]) > 0:
        extra += "\n        alivocab:intermediate       <{}> ;".format(
            specs[St.intermediate_graph])

    if St.threshold in specs and len(str(specs[St.threshold])) > 0:
        extra += "\n        alivocab:threshold          {} ;".format(
            str(specs[St.threshold]))

    if St.delta in specs and len(str(specs[St.delta])) > 0:
        extra += "\n        alivocab:delta              {} ;".format(
            str(specs[St.delta]))

    source = specs[St.source]
    target = specs[St.target]

    src_aligns = Ls.format_aligns(source[St.aligns])
    trg_aligns = Ls.format_aligns(target[St.aligns])

    # cCROSS CHECK INFORMATION IS USED IN CASE THE ALIGN PROPERTY APPEARS MEANINGLESS
    src_cross_check = Ls.format_aligns(
        source[St.crossCheck]) if St.crossCheck in source else None
    trg_cross_check = Ls.format_aligns(
        target[St.crossCheck]) if St.crossCheck in target else None

    # CROSS CHECK FOR THE WHERE CLAUSE
    cross_check_where = ''
    cross_check_where += "\n    BIND(iri({}) AS ?src_crossCheck)".format(
        src_cross_check) if src_cross_check is not None else ''
    cross_check_where += "\n    BIND(iri({}) AS ?trg_crossCheck)".format(
        trg_cross_check) if trg_cross_check is not None else ''

    # CROSS CHECK FOR THE INSERT CLAUSE
    cross_check_insert = ''
    cross_check_insert += "\n        alivocab:crossCheckSubject        ?src_crossCheck ;" \
        if src_cross_check is not None else ''
    cross_check_insert += "\n        alivocab:crossCheckObject         ?trg_crossCheck ;" \
        if trg_cross_check is not None else ''

    # specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name])
    specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.linkset_name])
    specs[St.link] = "{}{}{}".format(Ns.alivocab, "exactStrSim",
                                     specs[St.sameAsCount])
    specs[St.assertion_method] = "{}{}".format(Ns.method,
                                               specs[St.linkset_name])
    specs[St.justification] = "{}{}".format(Ns.justification,
                                            specs[St.linkset_name])
    specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \
                             "linking approach based on the <{}{}> mechanism.". \
        format(specs[St.link], Ns.mechanism, specs[St.mechanism])

    if str(specs[St.mechanism]).lower() == "intermediate":
        specs[
            St.link_name] = "Exact String Similarity via intermediate dataset"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "The method MATCH VIA INTERMEDIATE DATASET is used to align the" \
                                          " source and the target by using properties that present different " \
                                          "descriptions of a same entity, such as country name and country code. " \
                                          "This is possible by providing an intermediate dataset that binds the " \
                                          "two alternative descriptions to the very same identifier."
        specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \
            format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism])

    if str(specs[St.mechanism]).lower() == "exactstrsim":
        specs[St.link_name] = "Exact String Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "We assume that entities with the aligned predicates sharing the " \
                                          "exact same content are the same. This assumption applies when dealing " \
                                          "with entities such as Organisation."
        specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \
            format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "identity":
        specs[St.link_name] = "Same URI"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[
            St.
            justification_comment] = "We assume that entities with the same URI are identical."
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their identical URI using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "approxstrsim":
        specs[St.link_name] = "Approximate String Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This includes entities with a string similarity in the interval [{} 1[.".\
            format(specs[St.threshold])
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate string similarity" \
                                    " using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "nearbygeosim":
        specs[St.link_name] = "Near by Geo-Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This includes entities near each other by at most {} <{}>.". \
            format(specs[St.unit_value], specs[St.unit_value])
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their nearby Geo-Similarity" \
                                    " using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    specs[St.triples] = Qry.get_namedgraph_size(specs[St.linkset],
                                                isdistinct=False)
    print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples])

    query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}". \
        format("##################################################################",
               "### METADATA FOR {}".format(specs[St.linkset]),
               "##################################################################",
               "PREFIX prov:        <{}>".format(Ns.prov),
               "PREFIX alivocab:    <{}>".format(Ns.alivocab),
               "PREFIX rdfs:        <{}>".format(Ns.rdfs),
               "PREFIX void:        <{}>".format(Ns.void),
               "PREFIX bdb:         <{}>".format(Ns.bdb),

               "INSERT",
               "{",
               "    <{}>".format(specs[St.linkset]),
               "        rdfs:label                  \"{}\" ; ".format(specs[St.linkset_name]),
               "        a                           void:Linkset ;",
               "        void:triples                {} ;".format(specs[St.triples]),
               "        alivocab:sameAsCount        {} ;".format(specs[St.sameAsCount]),
               "        alivocab:alignsMechanism    <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]),
               "        void:subjectsTarget         <{}> ;".format(source[St.graph]),
               "        void:objectsTarget          <{}> ;".format(target[St.graph]),
               "        void:linkPredicate          <{}> ;".format(specs[St.link]),
               "        bdb:subjectsDatatype        <{}> ;".format(source[St.entity_datatype]),
               "        bdb:objectsDatatype         <{}> ;".format(target[St.entity_datatype]),
               "        alivocab:singletonGraph     <{}> ;".format(specs[St.singleton]),
               "        bdb:assertionMethod         <{}> ;".format(specs[St.assertion_method]),
               "        bdb:linksetJustification    <{}> ;{}".format(specs[St.justification], extra),
               "        alivocab:alignsSubjects     ?src_aligns ;",
               "        alivocab:alignsObjects      ?trg_aligns ;{}".format(cross_check_insert),
               "        rdfs:comment                \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]),

               "\n    ### METADATA ABOUT THE LINKTYPE",
               "      <{}>".format(specs[St.link]),
               "        rdfs:comment                \"\"\"{}\"\"\" ;".format(specs[St.link_comment]),
               "        rdfs:label                  \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]),
               "        rdfs:subPropertyOf          <{}> .".format(specs[St.link_subpropertyof]),

               "\n    ### METADATA ABOUT THE LINKSET JUSTIFICATION",
               "    <{}>".format(specs[St.justification]),
               "        rdfs:comment              \"\"\"{}\"\"\" .".format(specs[St.justification_comment]),

               "\n    ### ASSERTION METHOD",
               "    <{}>".format(specs[St.assertion_method]),
               "        alivocab:sparql           \"\"\"{}\"\"\" .".format(specs[St.insert_query]),
               "}",

               "WHERE",
               "{",
               "    BIND(iri({}) AS ?src_aligns)".format(src_aligns),
               "    BIND(iri({}) AS ?trg_aligns){}".format(trg_aligns, cross_check_where),
               "}")
    # print query
    if display is True:
        print query
    return query
Пример #3
0
def lens_refine_geo_metadata(specs, display=False):

    extra = ""
    if St.reducer in specs[St.source] and len(
            specs[St.source][St.reducer]) > 0:
        extra += "\n        ll:subjectsReducer      <{}> ;".format(
            specs[St.source][St.reducer])

    if St.reducer in specs[St.target] and len(
            specs[St.target][St.reducer]) > 0:
        extra += "\n        ll:objectsReducer       <{}> ;".format(
            specs[St.target][St.reducer])

    if St.intermediate_graph in specs and len(
            specs[St.intermediate_graph]) > 0:
        extra += "\n        ll:intermediate         <{}> ;".format(
            specs[St.intermediate_graph])

    if St.threshold in specs and len(str(specs[St.threshold])) > 0:
        extra += "\n        ll:threshold            {} ;".format(
            str(specs[St.threshold]))

    if St.delta in specs and len(str(specs[St.delta])) > 0:
        extra += "\n        ll:delta                {} ;".format(
            str(specs[St.delta]))

    source = specs[St.source]
    target = specs[St.target]

    src_cross_check = Ls.format_aligns(source[St.crossCheck])
    src_long = Ls.format_aligns(source[St.longitude])
    src_lat = Ls.format_aligns(source[St.latitude])

    trg_cross_check = Ls.format_aligns(target[St.crossCheck])
    trg_long = Ls.format_aligns(target[St.longitude])
    trg_lat = Ls.format_aligns(target[St.latitude])

    # specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name])
    specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.lens_name])
    specs[St.link] = "{}{}{}".format(Ns.alivocab, "nearbyGeoSim",
                                     specs[St.sameAsCount])
    specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.lens_name])
    specs[St.justification] = "{}{}".format(Ns.justification,
                                            specs[St.lens_name])
    specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \
                             "linking approach based on the <{}{}> mechanism.". \
        format(specs[St.link], Ns.mechanism, specs[St.mechanism])

    if str(specs[St.mechanism]).lower() == "nearbygeosim":
        specs[St.link_name] = "Near by Geo-Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This includes entities near each other by at most {} <{}>.". \
            format(specs[St.unit_value], specs[St.unit])
        specs[St.lens_comment] = "Linking <{}> to <{}> based on their nearby Geo-Similarity" \
                                    " using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    specs[St.triples] = Qry.get_namedgraph_size(specs[St.lens],
                                                isdistinct=False)
    print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples])

    query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}". \
        format("##################################################################",
               "### METADATA FOR {}".format(specs[St.lens]),
               "##################################################################",
               "PREFIX prov:        <{}>".format(Ns.prov),
               "PREFIX ll:          <{}>".format(Ns.alivocab),
               "PREFIX rdfs:        <{}>".format(Ns.rdfs),
               "PREFIX void:        <{}>".format(Ns.void),
               "PREFIX bdb:         <{}>".format(Ns.bdb),

               "INSERT",
               "{",
               "    <{}>".format(specs[St.lens]),
               "        rdfs:label                  \"{}\" ; ".format(specs[St.lens_name]),
               "        a                           bdb:Lens ;",
               "        void:triples                {} ;".format(specs[St.triples]),
               "        ll:sameAsCount              {} ;".format(specs[St.sameAsCount]),
               "        ll:alignsMechanism          <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]),
               "        void:subjectsTarget         <{}> ;".format(source[St.graph]),
               "        void:objectsTarget          <{}> ;".format(target[St.graph]),
               "        void:linkPredicate          <{}> ;".format(specs[St.link]),
               "        bdb:subjectsDatatype        <{}> ;".format(source[St.entity_datatype]),
               "        bdb:objectsDatatype         <{}> ;".format(target[St.entity_datatype]),
               "        ll:singletonGraph           <{}> ;".format(specs[St.singleton]),
               "        bdb:assertionMethod         <{}> ;".format(specs[St.assertion_method]),
               "        bdb:linksetJustification    <{}> ;{}".format(specs[St.justification], extra),
               "        ll:crossCheckSubject        ?src_crossCheck ;",
               "        ll:crossCheckObject         ?trg_crossCheck ;",

               "        ll:unit                     <{}> ;".format(specs[St.unit]),
               "        ll:unitValue                {} ;".format(specs[St.unit_value]),

               "        ll:alignsSubjects           ( ?src_long ?src_lat ) ;",
               "        ll:alignsObjects            ( ?trg_long ?trg_lat ) ;",

               "        rdfs:comment                \"\"\"{}\"\"\" .".format(specs[St.lens_comment]),

               "\n    ### METADATA ABOUT THE LINKTYPE",
               "      <{}>".format(specs[St.link]),
               "        rdfs:comment                \"\"\"{}\"\"\" ;".format(specs[St.link_comment]),
               "        rdfs:label                  \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]),
               "        rdfs:subPropertyOf          <{}> .".format(specs[St.link_subpropertyof]),

               "\n    ### METADATA ABOUT THE LINKSET JUSTIFICATION",
               "    <{}>".format(specs[St.justification]),
               "        rdfs:comment              \"\"\"{}\"\"\" .".format(specs[St.justification_comment]),

               "\n    ### ASSERTION METHOD",
               "    <{}>".format(specs[St.assertion_method]),
               "        ll:sparql                   \"\"\"{}\"\"\" .".format(specs[St.insert_query]),
               "}",

               "WHERE",
               "{",
               "    BIND(iri({}) AS ?src_crossCheck)".format(src_cross_check),
               "    BIND(iri({}) AS ?trg_crossCheck)".format(trg_cross_check),

               "    BIND(iri({}) AS ?src_long)".format(src_long),
               "    BIND(iri({}) AS ?src_lat)".format(src_lat),

               "    BIND(iri({}) AS ?trg_long)".format(trg_long),
               "    BIND(iri({}) AS ?trg_lat)".format(trg_lat),

               "}")
    # print query
    if display is True:
        print query
    return query
Пример #4
0
def spa_linkset_subset(specs, activated=False):

    if activated is True:

        check = Ls.run_checks(specs, check_type="subset")
        if check[St.result] != "GOOD TO GO":
            return check

        # THE LINKSET DOES NOT EXIT, LETS CREATE IT NOW
        print Ls.linkset_info(specs, specs[St.sameAsCount])

        ##########################################################
        """ 1. GENERATE SUBSET LINKSET INSERT QUERY            """
        ##########################################################
        insert_query = spa_subset_insert(specs)
        # print insert_query

        #############################################################
        """ 2. EXECUTING INSERT SUBSET LINKSET QUERY AT ENDPOINT  """
        #############################################################
        Qry.endpoint(insert_query)

        #############################################################
        """ 3. LINKSET SIZE (NUMBER OF TRIPLES)                   """
        #############################################################
        # LINKSET SIZE (NUMBER OF TRIPLES)
        specs[St.triples] = Qry.get_namedgraph_size(specs[St.linkset])
        print "\t>>> {} TRIPLES INSERTED".format(specs[St.triples])

        # NO MATCH FOUND
        if specs[St.triples] == "0":

            # logger.warning("WE DID NOT INSERT A METADATA AS NO TRIPLE WAS INSERTED.")
            print "WE DID NOT INSERT A METADATA AS NO TRIPLE WAS INSERTED."
            specs[St.insert_query] = insert_query
            # metadata = spa_subset_metadata(source, target, data, size)

            explain_q = "ask {{ GRAPH <{}> {{ ?s <{}> ?o }} }}".format(
                specs[St.linkset], specs[St.source][St.link_old])
            response = Qry.boolean_endpoint_response(explain_q)
            explain = True if response == "true" else False
            # print explain
            if explain is False:
                # logger.warning("{} DOES NOT EXIST IS {}.".format(data[St.link_old], source[St.graph]))
                print "{} DOES NOT EXIST IS {}.".format(
                    specs[St.source][St.link_old], specs[St.source][St.graph])

                message = "{} DOES NOT EXIST IS {}.".format(
                    specs[St.source][St.link_old], specs[St.source][St.graph])

                return {St.message: message, St.error_code: 1, St.result: None}

        # SOME MATCHES WHERE FOUND
        construct_query = "\n{}\n{}\n{}\n".format(
            "PREFIX predicate: <{}>".format(Ns.alivocab),
            "construct { ?x ?y ?z }",
            "where     {{ graph <{}> {{ ?x ?y ?z }} }}".format(
                specs[St.linkset]),
        )
        # print construct_query
        construct_response = Qry.endpointconstruct(construct_query)
        if construct_response is not None:
            construct_response = construct_response.replace(
                '{', "<{}>\n{{".format(specs[St.linkset]), 1)

        # GENERATE LINKSET SINGLETON METADATA QUERY
        singleton_metadata_query = "\n{}\n{}\n{}\n{}\n{}\n{}\n\n".format(
            "PREFIX singMetadata:   <{}>".format(Ns.singletons),
            "PREFIX predicate:      <{}>".format(Ns.alivocab),
            "PREFIX prov:           <{}>".format(Ns.prov),
            "PREFIX rdf:            <{}>".format(Ns.rdf),
            "construct { ?x ?y ?z }",
            "where     {{ graph <{}{}> {{ ?x ?y ?z }} }}".format(
                Ns.singletons, specs[St.linkset_name]),
        )
        # GET THE SINGLETON METADATA USING THE CONSTRUCT QUERY
        singleton_construct = Qry.endpointconstruct(singleton_metadata_query)
        if singleton_construct is not None:
            singleton_construct = singleton_construct.replace(
                '{', "singMetadata:{}\n{{".format(specs[St.linkset_name]), 1)

        #############################################################
        """ 4. LINKSET METADATA                                   """
        #############################################################
        # METADATA
        specs[St.insert_query] = insert_query
        metadata = Gn.spa_subset_metadata(specs)

        ###############################################################
        """ 5. EXECUTING INSERT LINKSET METADATA QUERY AT ENDPOINT  """
        ###############################################################
        # EXECUTING METADATA QUERY AT ENDPOINT
        Qry.endpoint(metadata)

        print "\t>>> WRITING TO FILE"
        write_to_file(graph_name=specs[St.linkset_name],
                      metadata=metadata.replace("INSERT DATA", ""),
                      correspondences=construct_response,
                      singletons=singleton_construct,
                      directory=DIRECTORY)

        print "\tLinkset created as [SUBSET]: ", specs[St.linkset]
        print "\t*** JOB DONE! ***"

        message = "The linkset was created as [{}] with {} triples found!".format(
            specs[St.linkset], specs[St.triples])

        return {
            St.message: message,
            St.error_code: 0,
            St.result: specs[St.linkset]
        }
Пример #5
0
def linkset_refined_metadata(specs, display=False):

    # CONDITIONAL METADATA TO APPEND TO THE REFINED LINKSET

    extra = ""

    if St.extended_graph in specs[St.source] and len(
            specs[St.source][St.extended_graph]) > 0:
        extra += "\n        alivocab:subjectsExtended    <{}> ;".format(
            specs[St.source][St.extended_graph])

    if St.extended_graph in specs[St.target] and len(
            specs[St.target][St.extended_graph]) > 0:
        extra += "\n        alivocab:objectsExtended     <{}> ;".format(
            specs[St.target][St.extended_graph])

    if St.reducer in specs[St.source] and len(
            specs[St.source][St.reducer]) > 0:
        extra += "\n        alivocab:subjectsReducer     <{}> ;".format(
            specs[St.source][St.reducer])

    if St.reducer in specs[St.target] and len(
            specs[St.target][St.reducer]) > 0:
        extra += "\n        alivocab:objectsReducer      <{}> ;".format(
            specs[St.target][St.reducer])

    if St.intermediate_graph in specs and len(
            specs[St.intermediate_graph]) > 0:
        extra += "\n        alivocab:intermediatesTarget <{}> ;".format(
            specs[St.intermediate_graph])

    if St.threshold in specs and len(str(specs[St.threshold])) > 0:
        extra += "\n        alivocab:threshold           {} ;".format(
            str(specs[St.threshold]))

    if St.delta in specs and str(specs[St.delta]) != "0":
        converted = convert_to_float(str(specs[St.delta]))
        if math.isnan(converted) is False:
            extra += "\n        alivocab:delta               {} ;".format(
                converted)

    source = specs[St.source]
    target = specs[St.target]
    src_aligns = Ls.format_aligns(source[St.aligns])
    trg_aligns = Ls.format_aligns(target[St.aligns])

    specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.refined_name])
    specs[St.link] = "{}{}{}".format(Ns.alivocab, "exactStrSim",
                                     specs[St.sameAsCount])
    specs[St.assertion_method] = "{}{}".format(Ns.method,
                                               specs[St.refined_name])
    specs[St.justification] = "{}{}".format(Ns.justification,
                                            specs[St.refined_name])
    specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \
                             "linking approach based on the <{}{}> mechanism.". \
        format(specs[St.link], Ns.mechanism, specs[St.mechanism])

    if str(specs[St.mechanism]).lower() == "exactstrsim":
        specs[St.link_name] = "Exact String Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "We assume that entities with the aligned predicates sharing the " \
                                          "exact same content are same. This assumption applies when dealing " \
                                          "with entities such as Organisation."
        specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \
            format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "identity":
        specs[St.link_name] = "Same URI"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[
            St.
            justification_comment] = "We assume that entities with the same URI are identical."
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their identical URI using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "approxnbrsim":
        specs[St.link_name] = "Approximate Number Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This includes entities with an approximate number similarity" \
                                          " in the interval [0 {}].".format(specs[St.delta])
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate number similarity" \
                                    " using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "approxstrsim":
        specs[St.link_name] = "Approximate String Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This includes entities with a string similarity in the interval [{} 1[.".\
            format(specs[St.threshold])
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate string similarity" \
                                    " using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "intermediate":
        specs[St.link_name] = "Exact String Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This is an implementation of the Exact String Similarity Mechanism over " \
                                          "the aligned predicates."
        specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \
            format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism])

    # CHECKING WHETHER THE REFINED HAS SOME TRIPLES INSERTED
    specs[St.triples] = Qry.get_namedgraph_size(specs[St.refined],
                                                isdistinct=False)

    triples = Qry.get_namedgraph_size(specs[St.linkset], isdistinct=False)
    print "\t>>> {} CORRESPONDENCES IN THE SOURCE".format(triples)
    print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples])
    print "\t>>> {} CORRESPONDENCES DO NOT COMPLY WITH THE NEW CONDITION".format(
        str(int(triples) - int(specs[St.triples])))

    message = "{}<br/>{}<br/>{}".format(
        "{} CORRESPONDENCES IN THE SOURCE".format(triples),
        "{} CORRESPONDENCES INSERTED".format(specs[St.triples]),
        "{} CORRESPONDENCES DO NOT COMPLY WITH THE NEW CONDITION".format(
            str(int(triples) - int(specs[St.triples]))))

    if int(specs[St.triples]) > 0:
        derived_from = specs[St.derivedfrom] if St.derivedfrom in specs else ""
        intermediate = "\n        alivocab:intermediatesTarget    <{}> ;".format(specs[St.intermediate_graph]) \
            if str(specs[St.mechanism]).lower() == "intermediate" else ""

        query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
                "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
                "\n{}\n{}\n{}\n{}\n{}" \
                "\n{}\n{}\n{}" \
                "\n{}\n{}\n{}\n{}\n{}" \
                "\n{}\n{}\n{}\n{}\n{}". \
            format("##################################################################",
                   "### METADATA FOR {}".format(specs[St.refined]),
                   "##################################################################",
                   "PREFIX prov:        <{}>".format(Ns.prov),
                   "PREFIX alivocab:    <{}>".format(Ns.alivocab),
                   "PREFIX rdfs:        <{}>".format(Ns.rdfs),
                   "PREFIX void:        <{}>".format(Ns.void),
                   "PREFIX bdb:         <{}>".format(Ns.bdb),

                   "INSERT",
                   "{",
                   "    <{}>".format(specs[St.refined]),
                   "        a                               void:Linkset ;\n{}".format(derived_from),
                   "        rdfs:label                      \"{}\" ; ".format(specs[St.refined_name]),
                   "        void:triples                    {} ;".format(specs[St.triples]),
                   "        alivocab:sameAsCount            {} ;".format(specs[St.sameAsCount]),
                   "        alivocab:alignsMechanism        <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]),
                   "        void:subjectsTarget             <{}> ;{}".format(source[St.graph], intermediate),
                   "        void:objectsTarget              <{}> ;".format(target[St.graph]),
                   "        void:linkPredicate              <{}> ;".format(specs[St.link]),
                   "        bdb:subjectsDatatype            <{}> ;".format(source[St.entity_datatype]),
                   "        bdb:objectsDatatype             <{}> ;".format(target[St.entity_datatype]),
                   "        alivocab:singletonGraph         <{}> ;".format(specs[St.singleton]),
                   "        bdb:assertionMethod             <{}> ;".format(specs[St.assertion_method]),
                   "        bdb:linksetJustification        <{}> ;{}".format(specs[St.justification], extra),
                   "        alivocab:alignsSubjects         ?src_aligns ;",
                   "        alivocab:alignsObjects          ?trg_aligns ;",
                   "        rdfs:comment                    \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]),

                   "\n    ### METADATA ABOUT THE LINKTYPE",
                   "      <{}>".format(specs[St.link]),
                   "        rdfs:comment                \"\"\"{}\"\"\" ;".format(specs[St.link_comment]),
                   "        rdfs:label                  \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]),
                   "        rdfs:subPropertyOf          <{}> .".format(specs[St.link_subpropertyof]),

                   "\n    ### METADATA ABOUT THE LINKSET JUSTIFICATION",
                   "    <{}>".format(specs[St.justification]),
                   "        rdfs:comment              \"\"\"{}\"\"\" .".format(specs[St.justification_comment]),

                   "\n    ### ASSERTION METHOD",
                   "    <{}>".format(specs[St.assertion_method]),
                   "        alivocab:sparql           \"\"\"{}\"\"\" .".format(specs[St.insert_query]),
                   "}",

                   "WHERE",
                   "{",
                   "    BIND(iri({}) AS ?src_aligns)".format(src_aligns),
                   "    BIND(iri({}) AS ?trg_aligns)".format(trg_aligns),
                   "}")

        if display is True:
            print query
        print "\t>>> Done generating the metadata"
        return {"query": query, "message": message}
    else:
        return {"query": None, "message": message}
Пример #6
0
def union(specs, activated=False):

    if activated is False:
        # logger.warning("THE FUNCTION IS NOT ACTIVATED")
        print("THE FUNCTION IS NOT ACTIVATED")
        return {
            St.message: "THE FUNCTION IS NOT ACTIVATED.",
            St.error_code: 1,
            St.result: None
        }

    print "\nEXECUTING UNION SPECS" \
          "\n======================================================" \
          "========================================================"
    """
    THE generate_lens_name FUNCTION RETURNS THE NAME OF THE UNION AND A
    QUERY THAT ALLOWS TO ASk WHETHER THE LENS TO BE CREATED EXIST BY CHECKING
    WHETHER THERE EXISTS A LENS WITH THE SAME COMPOSITION IN TERMS GRAPHS USED FOR THE UNION
    """

    # SET THE NAME OF THE UNION-LENS
    print "1. DATASETS:", len(specs[St.datasets])
    for ds in specs[St.datasets]:
        print "\t- {}".format(ds)
    info = Lu.generate_lens_name(specs[St.datasets])

    specs[St.lens] = "{}{}".format(Ns.lens, info["name"])
    print "\n2. LENS: ", info["name"]

    # CHECK WHETHER THE LENS EXISTS
    check = run_checks(specs, info["query"])
    if check[St.result] != "GOOD TO GO":
        if check[St.message].__contains__("ALREADY EXISTS"):
            Urq.register_lens(specs, is_created=False)
        return check
    # print "AFTER CHECK"

    # PREPARATION FOR THE CREATION OF THE LENS
    specs[St.lens_target_triples] = ""
    specs[St.expectedTriples] = 0
    specs[St.insert_query] = ""
    lens = specs[St.lens]
    source = "{}{}".format(Ns.tmpgraph, "load00")
    message_2 = Ec.ERROR_CODE_8.replace("#", specs[St.lens])
    count = -1
    insert_ans = False

    try:

        # GO THROUGH THE LINKSETS/LENSES IN THE LENS
        #   1-SUM UP THE EXPECTED NUMBER OF TRIPLES
        #   2-GENERATE THE TRIPLES REPRESENTATION OF GHE GRAPHS COMPOSING THIS LENS
        #   3-GENERATE THE INSERT QUERY FOR MOVING BOTH LINKSET AND SINGLETON GRAPHS TO THE UNION GRAPH
        total_size = 0

        # LOAD ALL GRAPHS IN LOAD00
        specs[St.insert_query] += "DROP SILENT GRAPH <{}{}> ;\n".format(
            Ns.tmpgraph, "load00")

        # ITERATE THROUGH THE PROVIDED GRAPHS
        for linkset in specs[St.datasets]:

            # print "TARGET: ", linkset
            count += 1

            # GET THE TOTAL NUMBER OF CORRESPONDENCE TRIPLES INSERTED
            curr_triples = Qry.get_triples(linkset)
            # PROBABLY THE LINKSET HAS NO SUCH PROPERTY " void:triples  ?triples ."

            if curr_triples is None:
                curr_triples = Qry.get_triples_count(linkset)

            total_size += int(curr_triples)
            print "{} Contains {} triples".format(linkset, curr_triples)

            if curr_triples is not None:
                specs[St.expectedTriples] += int(curr_triples)
            else:
                # THE IS A PROBLEM WITH THE GRAPH FOR SEVERAL POSSIBLE REASONS
                return {
                    St.message: message_2.replace("\n", "<br/>"),
                    St.error_code: 1,
                    St.result: None
                }

            # GENERATE TRIPLES OUT OF THE TARGETS
            specs[
                St.
                lens_target_triples] += "\n\t        void:target                         <{}> ;".format(
                    linkset)

            # GET THE INSERT QUERY
            # BOTH THE LINKSET AND THE SINGLETONS ARE MOVED TO A SINGLE GRAPH
            partial_query = Qry.q_copy_graph(source, source, linkset)
            if count == 0:
                specs[St.insert_query] += partial_query
            else:
                specs[St.insert_query] += " ;\n{}".format(partial_query)

        # INTERSECTION MANIPULATION OVER THE UNION (SOURCE)
        insert_query = union_insert_q(lens, source, specs[St.lens_name])
        # print "manipulation:", manipulation
        specs[St.insert_query] += " ;\n{}".format(insert_query)

        # GENERATE THE LENS UNION
        if activated is True:

            # print specs[St.insert_query]
            insert_ans = Qry.boolean_endpoint_response(specs[St.insert_query])

            specs[St.triples] = Qry.get_namedgraph_size(lens, isdistinct=False)
            if specs[St.triples] == "0":
                message = Ec.ERROR_CODE_9
                print message
                # return None
                return {
                    St.message: message.replace("\n", "<br/>"),
                    St.error_code: 1,
                    St.result: None
                }

            # CHECK WHETHER THE RESULT CONTAINS DUPLICATES
            contains_duplicated = Qry.contains_duplicates(lens)
            print "Contains Opposite Direction Duplicated:", contains_duplicated

            # IF IT DOES, REMOVE THE DUPLICATES
            if contains_duplicated is True:
                # logger.warning("THE LENS CONTAINS DUPLICATES.")
                print "THE LENS CONTAINS DUPLICATES."
                Qry.remove_duplicates(lens)
                # logger.warning("THE DUPLICATES ARE NOW REMOVED.")
                print "THE DUPLICATES ARE NOW REMOVED."

            print "Number of triples loaded              : {}".format(
                total_size)

            specs[St.triples] = Qry.get_namedgraph_size(lens, isdistinct=False)
            print "\t>>> INSERTED:  {}\n\t>>> INSERTED TRIPLES: {}".format(
                insert_ans, specs[St.triples])

            print "Inserted : {}".format(specs[St.triples])
            print "Removed  : {}".format(total_size - int(specs[St.triples]))

            # LOAD THE METADATA
            # NOT GOOD AS THE LENS ALSO HAS A SINGLETON GRAPH
            # inserted_correspondences = int(Qry.get_union_triples(lens))
            inserted_correspondences = int(specs[St.triples])
            # print "inserted_correspondences:", inserted_correspondences
            specs[St.removedDuplicates] = specs[
                St.expectedTriples] - inserted_correspondences
            metadata = Gn.union_meta(specs)
            # print "METADATA:", metadata
            meta_ans = Qry.boolean_endpoint_response(metadata)
            print "\t>>> IS THE METADATA GENERATED AND INSERTED?  {}".format(
                meta_ans)

        construct_response = Qry.get_constructed_graph(specs[St.lens])
        if construct_response is not None:
            print "\t>>> WRITING TO FILE"
            construct_response = construct_response.replace(
                '{', "<{}>\n{{".format(specs[St.lens]), 1)
            write_to_file(graph_name=specs[St.lens_name],
                          metadata=None,
                          correspondences=construct_response,
                          directory=DIRECTORY)
        print "\tLens created as : ", specs[St.lens]

        # REGISTER THE LINKSET
        Urq.register_lens(specs, is_created=True)

        # return specs[St.lens]
        message = "THE LENS WAS CREATED as {}. " \
                  "With initially {} triples loaded, {} duplicated triples were found and removed.".\
            format(specs[St.lens], total_size, total_size - int(specs[St.triples]))

        print "\t*** JOB DONE! ***"
        return {
            St.message: message,
            St.error_code: 0,
            St.result: specs[St.lens]
        }

    except Exception as err:
        # logger.warning(err)
        if insert_ans == "true":
            "DROP THE INSERTED UNION"
            drop_linkset(lens, activated=True)

        print "ERROR IN UNION LENS CREATION:", err
        return {St.message: ERROR_CODE_11, St.error_code: 11, St.result: None}
Пример #7
0
def lens_transitive(specs, activated=False):

    # CHECK BOTH DATASETS FOR SAME MECHANISM
    print "GENERATE THE LENS NAME"
    Lu.composition_lens_name(specs)

    print "GET THE SAME AS COUNT"
    specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.lens_operation])
    # print same_as_count

    # GENERATE THE INSERT QUERY FOR TRANSITIVITY
    # transitive_analyses = lens_transitive_query(specs)
    # if transitive_analyses is None:
    #     return
    # specs[St.insert_query] = transitive_analyses[1]
    # print insert_query
    # exit(0)
    # specs['is_transitive_by'] = transitive_analyses[0]
    ln = get_uri_local_name(specs[St.lens])
    sg = specs[St.subjectsTarget]
    tg = specs[St.objectsTarget]
    ssg = "{}{}".format(Ns.singletons, get_uri_local_name(sg))
    tsg = "{}{}".format(Ns.singletons, get_uri_local_name(tg))

    print "SOURCE: {}".format(sg)
    print "TARGET: {}".format(tg)
    print "1. GENERATING THE INSERT QUERY"
    specs[St.insert_query] = transitive_insert_query(ln, sg, tg, ssg, tsg)

    if activated is True:

        # RUN THE QUERY AT THE END POINT
        print "2. RUNNING THE INSERT QUERY"
        Qry.boolean_endpoint_response(specs[St.insert_query])

        # GET THE SIZE OF THE LENS JUST CREATED ABOVE
        print "3. ETTING THE SIZE OF THE LENS JUST INSERTED"
        size = Qry.get_namedgraph_size(specs[St.lens], isdistinct=False)

        # IF ACTIVATED, INSERT THE METADATA
        if size > 0:

            # GENERATE THE METADATA ABOUT THE LENS JUST CREATED
            print "4. SOME {} TRANSITIVE TRIPLES WERE FOUND".format(size)
            metadata = transitive_metadata(specs, size)
            # print metadata

            print "5. INSERTING THE METADATA"
            Qry.boolean_endpoint_response(metadata)

            print "6. REGISTER THE LENS"
            Urq.register_lens(specs, is_created=True)

            # RUN A CORRESPONDENCE CONSTRUCT QUERY FOR BACKING UP THE DATA TO DISC
            print "7. GENERATE THE CONSTRUCT FOR FILE DUMP"
            construct_correspondence = Qry.endpointconstruct(
                Qry.construct_namedgraph(specs[St.lens]))

            if construct_correspondence is not None:
                construct_correspondence = construct_correspondence.replace(
                    '{', "<{}>\n{{".format(specs[St.lens]), 1)

            # RUN A SINGLETON METADATA CONSTRUCT QUERY FOR BACKING UP THE DATA TO DISC
            construct_singletons = Qry.endpointconstruct(
                Qry.construct_namedgraph("{}{}".format(Ns.singletons,
                                                       specs[St.lens_name])))

            if construct_singletons is not None:
                construct_singletons = construct_singletons. \
                    replace('{', "<{}{}>\n{{".format(Ns.singletons, specs[St.lens_name]), 1)

            # WRITE TO FILE
            print "WRITING TO FILE"
            write_to_file(graph_name=ln,
                          metadata=metadata,
                          directory=DIRECTORY,
                          correspondences=construct_correspondence,
                          singletons=construct_singletons)

            # return specs[St.lens]
            message = "THE LENS WAS CREATED!<br/>URI = {}".format(
                specs[St.lens])
            print message
            print "\t*** JOB DONE! ***"
            return {
                St.message: message,
                St.error_code: 0,
                St.result: specs[St.lens]
            }

    if activated is False:
        logger.warning(
            "THE FUNCTION IS NOT ACTIVATED BUT THE METADATA THAT IS "
            "SUPPOSED TO BE ENTERED IS WRITEN TO THE CONSOLE.")
Пример #8
0
def enrich(specs, directory, endpoint):

    # TODO RUN IT IF THERE IS NOT GRAPH ENRICHED WITH THE SAME NAME

    # specs[St.graph] = "http://grid.ac/20170712"
    print "ENRICHING DATA/GRAPH FROM EXPORT-ALIGNMENT"
    print "GRAPH:", specs[St.graph]
    print "ENTITY TYPE:", specs[St.entity_datatype]
    print "LAT PREDICATE:", specs[St.long_predicate]
    print "LONG PREDICATE:", specs[St.lat_predicate]
    print "FILE DIRECTORY:", directory
    name = Ut.get_uri_local_name(specs[St.graph])

    print endpoint
    data_1 = Qry.virtuoso_request(
        "ask {{ GRAPH <{}> {{ ?x ?y ?z . }} }}".format(specs[St.graph]),
        endpoint)
    data_1 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_1["result"])
    if len(data_1) > 0:
        data_1 = data_1[0].strip() == "true"
        if data_1 is False:
            print "GRAPH: {} {}".format(
                specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.")

    # CHECKING WHETHER BOTH DATASETS ARE AT THE VIRTUOSO TRIPLE STORE
    data_2 = Qry.virtuoso_request(
        "ask {GRAPH <http://geo.risis.eu/gadm>{ ?x ?y ?z . }}", endpoint)
    data_2 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_2["result"])
    if len(data_2) > 0:
        data_2 = data_2[0].strip() == "true"
        if data_2 is False:
            print "GRAPH: {} {}".format(
                specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.")

    if data_1 is False or data_2 is False:
        message = "BECAUSE BOTH DATASETS NEED TO BE PRESENT AT OUR TRIPLES STORE, WE ARE UNABLE TO EXECUTE THE REQUEST."
        return {
            St.message:
            message,
            St.result:
            'The dataset {} '
            'cannot be enriched with GADM boundary  at the moment.'.format(
                specs[St.graph])
        }

    total = 0
    limit = 20000
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    f_path = "{0}{1}{1}{2}_enriched_{3}.ttl".format(directory, os.path.sep,
                                                    name, date)
    b_path = "{0}{1}{1}{2}_enriched_{3}{4}".format(directory, os.path.sep,
                                                   name, date,
                                                   Ut.batch_extension())

    # MAKE SURE THE FOLDER EXISTS
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError as err:
        print "\n\t[utility_LOAD_TRIPLE_STORE:]", err
        return

    print "\n1. GETTING THE TOTAL NUMBER OF TRIPLES."
    count_query = enrich_query(specs, limit=0, offset=0, is_count=True)
    print count_query
    count_res = Qry.virtuoso_request(count_query, endpoint)
    result = count_res['result']

    # GET THE TOTAL NUMBER OF TRIPLES
    if result is None:
        print "NO RESULT FOR THIS ENRICHMENT."
        return count_res

    g = rdflib.Graph()
    g.parse(data=result, format="turtle")
    attribute = rdflib.URIRef("http://www.w3.org/2005/sparql-results#value")
    for subject, predicate, obj in g.triples((None, attribute, None)):
        total = int(obj)

    # NUMBER OF REQUEST NEEDED
    iterations = total / limit if total % limit == 0 else total / limit + 1
    print "\n2. TOTAL TRIPLES TO RETREIVE  : {} \n\tTOTAL NUMBER OF ITERATIONS : {}\n".format(
        total, iterations)

    writer = codecs.open(f_path, "wb", "utf-8")
    batch_writer = codecs.open(b_path, "wb", "utf-8")

    print "3. GENERATING THE BATCH FILE TEXT"
    enriched_graph = "{}_enriched".format(specs[St.graph])
    stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[
        St.stardog_path]

    load_text = """echo "Loading data"
            {}stardog data add {} -g {} "{}"
            """.format(stardog_path, Svr.settings[St.database], enriched_graph,
                       f_path)

    batch_writer.write(to_unicode(load_text))
    batch_writer.close()

    # RUN THE ITERATIONS
    for i in range(0, iterations):

        offset = i * 20000 + 1
        print "\tROUND: {} OFFSET: {}".format(i + 1, offset)

        # print "\t\t1. GENERATING THE ENRICHMENT QUERY"
        virtuoso = enrich_query(specs,
                                limit=limit,
                                offset=offset,
                                is_count=False)
        # print virtuoso
        # exit(0)
        # print Qry.virtuoso(virtuoso)["result"]

        # print "\t\t2. RUNNING THE QUERY + WRITE THE RESULT TO FILE"
        writer.write(Qry.virtuoso_request(virtuoso, endpoint)["result"])

    writer.close()
    print "\n4. RUNNING THE BATCH FILE"
    print "\tTHE DATA IS BEING LOADED OVER HTTP POST." if Svr.settings[St.split_sys] is True \
        else "\tTHE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH."
    # os.system(b_path)

    # RUN THE BATCH FILE
    print "\tFILE: {}".format(f_path)
    print "\tBATCH: {}\n".format(b_path)
    os.chmod(b_path, 0o777)
    Ut.batch_load(b_path)
    if os.path.exists(b_path) is True:
        os.remove(b_path)

    # TODO 1. REGISTER THE DATASET TO BE ENRICHED IF NOT YET REGISTER
    # TODO 2. ADD THE ENRICHED DATASET TO THE RESEARCH QUESTION (REGISTER).
    # TODO 3. MAYBE, CREATE THE LINKSET BETWEEN THE SOURCE AND THE RESULTING

    size = Qry.get_namedgraph_size(enriched_graph)

    print "JOB DONE...!!!!!!"

    return {
        St.message:
        "The select dataset was enriched with the GADM boundary as {}. "
        "{} triples were created.".format(enriched_graph, size),
        St.result:
        enriched_graph
    }
Пример #9
0
def intersecting(specs, activated=False):

    if activated is False:
        print("THE FUNCTION [intersecting] IS NOT ACTIVATED")
        return {
            St.message: "THE FUNCTION [intersecting] IS NOT ACTIVATED.",
            St.error_code: 1,
            St.result: None
        }

    print Ut.headings("EXECUTING INTERSECTION SPECS...")

    # 1. GENERATE THE LENS NAME
    lens_name = generate_lens_name(specs['datasets'], operator="intersection")
    specs[St.lens] = "{}{}".format(Ns.lens, lens_name['name'])
    Ut.update_specification(specs)

    # **********************************
    # 3. GOOD TO GO CHECK
    # **********************************
    query = """
            SELECT *
            {{
                <{}> ?predicate ?object .
            }}
                """.format(specs[St.lens])
    check = Lens_Union.run_checks(specs, query, operator="intersection")

    # NOT GOOD TO GO, IT ALREADY EXISTS
    if check[St.message].__contains__("ALREADY EXISTS"):
        return {
            St.message: check[St.message],
            St.error_code: 71,
            St.result: specs[St.lens]
        }

    # **********************************
    # GOOD TO GO
    # **********************************
    else:

        try:
            specs[St.lens_target_triples] = ""

            # DOCUMENTING START TIME
            lens_start = time.time()

            print "\n4. GENERATE THE INSERT QUERY"
            specs[St.insert_query] = intersection_extended(
                specs, lens_name=specs[St.lens_name])
            print specs[St.insert_query]

            print "\n5. >>> LOOKING FOR INTERSECTING LINKS"
            print "\t", Qry.boolean_endpoint_response(specs[St.insert_query])

            print "\n6. EXTRACTING THE NUMBER OF TRIPLES"
            specs[St.triples] = Qry.get_namedgraph_size("{0}{1}".format(
                Ns.lens, specs[St.lens_name]))

            lens_end = time.time()
            diff = lens_end - lens_start
            print " \n>>> Executed so far in    : {:<14}".format(
                str(datetime.timedelta(seconds=diff)))

            if int(specs[St.triples]) > 0:

                for linkset in specs[St.datasets]:
                    specs[St.lens_target_triples] += \
                        "\n\t        void:target                         <{}> ;".format(linkset)

                print "\n7. INSERTING THE GENERIC METADATA"
                metadata = Gn.intersection_meta(specs)
                # print metadata
                Qry.boolean_endpoint_response(metadata)

                # print "\n8. WRITING TO FILE"

                server_message = "Linksets created as: {}".format(
                    specs[St.lens])
                message = "The linkset was created as [{}] with {} triples found!".format(
                    specs[St.lens], specs[St.triples])

                print "\n\t", server_message

                Urq.register_lens(specs, is_created=True)

                ls_end_2 = time.time()
                diff = ls_end_2 - lens_end
                print ">>> Executed in    : {:<14}".format(
                    str(datetime.timedelta(seconds=diff)))
                print "\t*** JOB DONE! ***"
                return {
                    St.message: message,
                    St.error_code: 0,
                    St.result: specs[St.lens]
                }

            else:
                print "The linkset was not generated as no match could be found"
                print "\t*** JOB DONE! ***"
                return {
                    St.message:
                    "The linkset was not generated as no match could be found",
                    St.error_code: 4,
                    St.result: None
                }

        except Exception as err:
            traceback.print_exc()
            return {
                St.message: Ec.ERROR_CODE_1,
                St.error_code: 5,
                St.result: None
            }


# specs = {
#     'lens_operation': u'intersection',
#     'datasets': [u'http://risis.eu/linkset/grid_20170712_eter_2014_approxStrSim_Organization_altLabel_P1079405301',
#                  u'http://risis.eu/linkset/grid_20170712_eter_2014_approxStrSim_Organization_altLabel_P1661430032',
#                  u'http://risis.eu/linkset/grid_20170712_eter_2014_approxStrSim_Organization_label_N1860664105'],
#     'researchQ_URI': u'http://risis.eu/activity/idea_67a6ce'}

# specs_2 = {'lens_operation': u'intersection',
#     'datasets': [u'http://risis.eu/lens/union_Grid_20170712_Eter_2014_N291690309',
#               u'http://risis.eu/lens/union_Orgreg_20170718_Eter_2014_P1061032980',
#               u'http://risis.eu/lens/union_Orgreg_20170718_Grid_20170712_N1966224323'],
#     'researchQ_URI': u'http://risis.eu/activity/idea_67a6ce'}
#
# specs_3 = {'lens_operation': u'intersection',
#            'datasets': [
#                u'http://risis.eu/linkset/orgreg_20170718_grid_20170712_approxStrSim_University_Entity_current_name_English_N682223883',
#                u'http://risis.eu/linkset/orgreg_20170718_grid_20170712_approxStrSim_University_Entity_current_name_English_P2117262605',
#                u'http://risis.eu/lens/union_Grid_20170712_Eter_2014_N291690309',
#                u'http://risis.eu/lens/union_Orgreg_20170718_Eter_2014_P1061032980',
#                u'http://risis.eu/lens/union_Orgreg_20170718_Grid_20170712_N1966224323'],
#            'researchQ_URI': u'http://risis.eu/activity/idea_67a6ce'}
#
#
#
# print intersection_extended(specs_3, "lens_name", display=False)

# import Alignments.Manage.AdminGraphs as adm
# adm.drop_a_lens("http://risis.eu/lens/intersection_Grid_20170712_Eter_2014_P1326988364", display=True, activated=True)
# print intersecting(specs, activated=True)
Пример #10
0
def refine_lens(specs, activated=False, check_file=False):

    try:

        message = Ec.ERROR_CODE_0.replace('\n', "<br/>")
        if activated is False:
            print Ut.headings("THE FUNCTION [refine_lens] IS NOT ACTIVATED")
            return {St.message: message, St.error_code: 4, St.result: None}

        # 1. UPDATING THE SPECS BY CHANGING LINKSET TO TENS
        specs[St.refined] = specs['linkset']
        specs.pop('linkset')
        Ut.update_specification(specs)

        # CHECKING WHETHER THE LENS IS REFINENABLE
        # Refine.is_refinable(specs[St.refined])

        # PRINTING THE SPECIFICATIONS
        # lensUt.print_specs(specs)

        # ASSIGN THE SAME AS COUNT
        specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism])

        message = Ec.ERROR_CODE_4.replace('\n', "<br/>")
        if specs[St.sameAsCount]:

            source = specs[St.source]
            target = specs[St.target]

            # 2. SET THE LENS NAME
            # *******************************
            print "\n2. SET THE LENS NAME"
            # *******************************
            lensUt.lens_refine_name(specs, 'refine')

            #*******************************
            # GOOD TO GO CHECK
            # *******************************
            query = """
        SELECT *
        {{
            <{}> ?predicate ?object .
        }}
            """.format(specs[St.lens])
            check = Lens_Union.run_checks(specs, query, operator="refine")

            # NOT GOOD TO GO, IT ALREADY EXISTS
            if check[St.message].__contains__("ALREADY EXISTS"):
                return {
                    St.message: check[St.message],
                    St.error_code: 71,
                    St.result: specs[St.lens]
                }

            # *******************************
            # GOOD TO GO
            # *******************************
            else:

                lens_start = time.time()
                # UPDATE THE SPECIFICATION
                Ut.update_specification(specs[St.source])
                Ut.update_specification(specs[St.target])

                # PRINTING THE SPECIFICATIONS
                lensUt.print_specs(specs)

                ########################################################################
                print """\n4. EXECUTING THE GEO-MATCH                                """
                ########################################################################
                geo_match(specs)

                ########################################################################
                print """\n5. EXTRACT THE NUMBER OF TRIPLES                          """
                ########################################################################
                specs[St.triples] = Qry.get_namedgraph_size("{0}{1}".format(
                    Ns.lens, specs[St.lens_name]))

                ########################################################################
                print """\n6. ASSIGN THE SPARQL INSERT QUERY                         """
                ########################################################################
                specs[St.insert_query] = "{} ;\n{};\n{}".format(
                    geo_load_query(specs, True), geo_load_query(specs, False),
                    geo_match_query(specs))

                lens_end = time.time()
                diff = lens_end - lens_start
                print "\n\t>>> Executed so far in    : {:<14}".format(
                    str(datetime.timedelta(seconds=diff)))

                if int(specs[St.triples]) > 0:

                    ########################################################################
                    print """\n4. INSERTING THE GENERIC METADATA                         """
                    ########################################################################
                    metadata = Gn.lens_refine_geo_metadata(specs)
                    Qry.boolean_endpoint_response(metadata)

                    ########################################################################
                    print """\n5. WRITING TO FILE                                        """
                    ########################################################################
                    src = [source[St.graph_name], "", source[St.entity_ns]]
                    trg = [target[St.graph_name], "", target[St.entity_ns]]

                    # linkset_path = "D:\datasets\Linksets\ExactName"
                    linkset_path = DIRECTORY
                    writelinkset(src,
                                 trg,
                                 specs[St.lens_name],
                                 linkset_path,
                                 metadata,
                                 check_file=check_file)
                    server_message = "Linksets created as: {}".format(
                        specs[St.lens])
                    message = "The linkset was created as [{}] with {} triples found!".format(
                        specs[St.lens], specs[St.triples])

                    print "\n\t", server_message

                    Urq.register_lens(specs, is_created=True)

                    ls_end_2 = time.time()
                    diff = ls_end_2 - lens_end
                    print ">>> Executed in    : {:<14}".format(
                        str(datetime.timedelta(seconds=diff)))

                    print "\t*** JOB DONE! ***"

                    return {
                        St.message: message,
                        St.error_code: 0,
                        St.result: specs[St.lens]
                    }

                else:
                    print "\tThe linkset was not generated as no match could be found"
                    print "\t*** JOB DONE! ***"
                    return {
                        St.message: message,
                        St.error_code: 4,
                        St.result: None
                    }

    except Exception as err:
        traceback.print_exc()
        return {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None}
        # print geo_load_query(specs, is_source=True)
        # print geo_load_query(specs, is_source=False)
        # geo_match_query(specs)

        # traceback.print_exception()


# import Alignments.Manage.AdminGraphs as adm
# adm.drop_a_lens("http://risis.eu/lens/refine_union_Grid_20170712_Eter_2014_N291690309", display=True, activated=True)
# refine_lens(specs_example, activated=True, check_file=False)
#
# adm.drop_a_lens("http://risis.eu/lens/refine_union_Orgreg_20170718_Eter_2014_P1061032980", display=True, activated=True)
# refine_lens(specs_example_2, activated=True, check_file=False)
#
# adm.drop_a_lens("http://risis.eu/lens/refine_union_Orgreg_20170718_Grid_20170712_N1966224323", display=True, activated=True)
# refine_lens(specs_example_3, activated=True, check_file=False)