Exemplo n.º 1
0
def spa_subset_insert(specs):

    src_aligns = specs[St.source][St.link_old] \
        if Ls.nt_format(specs[St.source][St.link_old]) else "<{}>".format(specs[St.source][St.link_old])

    insert_query = """
    ###### INSERT SUBSET LINKSET
    PREFIX rdf:        <{}>
    PREFIX singleton:   <{}>
    PREFIX alivocab:    <{}>

    INSERT
    {{
        GRAPH <{}>
        {{
            ?subject    ?singPre    ?object .
        }}

        GRAPH singleton:{}
        {{
            ?singPre    rdf:singletonPropertyOf     alivocab:exactStrSim{} .
            ?singPre    alivocab:hasStrength        1 .
            ?singPre    alivocab:hasEvidence        "Aligned by {} ." .
        }}
    }}
    WHERE
    {{
        GRAPH <{}>
        {{
            ?subject a <{}> ;
                {}  ?object .
        }}

        ### Create A SINGLETON URI
        BIND( replace("{}{}{}_#", "#", STRAFTER(str(UUID()),"uuid:")) as ?pre )
        BIND(iri(?pre) as ?singPre)
    }}
    """.format(Ns.rdf, Ns.singletons, Ns.alivocab, specs[St.linkset],
               specs[St.linkset_name], specs[St.sameAsCount],
               specs[St.source][St.graph_name], specs[St.source][St.graph],
               specs[St.source][St.entity_datatype], src_aligns, Ns.alivocab,
               specs[St.mechanism], specs[St.sameAsCount])
    # print insert_query
    return insert_query
Exemplo n.º 2
0
def linkset_metadata(specs, display=False):

    extra = ""
    if St.reducer in specs[St.source] and len(
            specs[St.source][St.reducer]) > 0:
        extra += "\n        alivocab:subjectsReducer    <{}> ;".format(
            specs[St.source][St.reducer])

    if St.reducer in specs[St.target] and len(
            specs[St.target][St.reducer]) > 0:
        extra += "\n        alivocab:objectsReducer     <{}> ;".format(
            specs[St.target][St.reducer])

    if St.intermediate_graph in specs and len(
            specs[St.intermediate_graph]) > 0:
        extra += "\n        alivocab:intermediate       <{}> ;".format(
            specs[St.intermediate_graph])

    if St.threshold in specs and len(str(specs[St.threshold])) > 0:
        extra += "\n        alivocab:threshold          {} ;".format(
            str(specs[St.threshold]))

    if St.delta in specs and len(str(specs[St.delta])) > 0:
        extra += "\n        alivocab:delta              {} ;".format(
            str(specs[St.delta]))

    source = specs[St.source]
    target = specs[St.target]

    src_aligns = Ls.format_aligns(source[St.aligns])
    trg_aligns = Ls.format_aligns(target[St.aligns])

    # cCROSS CHECK INFORMATION IS USED IN CASE THE ALIGN PROPERTY APPEARS MEANINGLESS
    src_cross_check = Ls.format_aligns(
        source[St.crossCheck]) if St.crossCheck in source else None
    trg_cross_check = Ls.format_aligns(
        target[St.crossCheck]) if St.crossCheck in target else None

    # CROSS CHECK FOR THE WHERE CLAUSE
    cross_check_where = ''
    cross_check_where += "\n    BIND(iri({}) AS ?src_crossCheck)".format(
        src_cross_check) if src_cross_check is not None else ''
    cross_check_where += "\n    BIND(iri({}) AS ?trg_crossCheck)".format(
        trg_cross_check) if trg_cross_check is not None else ''

    # CROSS CHECK FOR THE INSERT CLAUSE
    cross_check_insert = ''
    cross_check_insert += "\n        alivocab:crossCheckSubject        ?src_crossCheck ;" \
        if src_cross_check is not None else ''
    cross_check_insert += "\n        alivocab:crossCheckObject         ?trg_crossCheck ;" \
        if trg_cross_check is not None else ''

    # specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name])
    specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.linkset_name])
    specs[St.link] = "{}{}{}".format(Ns.alivocab, "exactStrSim",
                                     specs[St.sameAsCount])
    specs[St.assertion_method] = "{}{}".format(Ns.method,
                                               specs[St.linkset_name])
    specs[St.justification] = "{}{}".format(Ns.justification,
                                            specs[St.linkset_name])
    specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \
                             "linking approach based on the <{}{}> mechanism.". \
        format(specs[St.link], Ns.mechanism, specs[St.mechanism])

    if str(specs[St.mechanism]).lower() == "intermediate":
        specs[
            St.link_name] = "Exact String Similarity via intermediate dataset"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "The method MATCH VIA INTERMEDIATE DATASET is used to align the" \
                                          " source and the target by using properties that present different " \
                                          "descriptions of a same entity, such as country name and country code. " \
                                          "This is possible by providing an intermediate dataset that binds the " \
                                          "two alternative descriptions to the very same identifier."
        specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \
            format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism])

    if str(specs[St.mechanism]).lower() == "exactstrsim":
        specs[St.link_name] = "Exact String Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "We assume that entities with the aligned predicates sharing the " \
                                          "exact same content are the same. This assumption applies when dealing " \
                                          "with entities such as Organisation."
        specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \
            format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "identity":
        specs[St.link_name] = "Same URI"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[
            St.
            justification_comment] = "We assume that entities with the same URI are identical."
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their identical URI using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "approxstrsim":
        specs[St.link_name] = "Approximate String Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This includes entities with a string similarity in the interval [{} 1[.".\
            format(specs[St.threshold])
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate string similarity" \
                                    " using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "nearbygeosim":
        specs[St.link_name] = "Near by Geo-Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This includes entities near each other by at most {} <{}>.". \
            format(specs[St.unit_value], specs[St.unit_value])
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their nearby Geo-Similarity" \
                                    " using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    specs[St.triples] = Qry.get_namedgraph_size(specs[St.linkset],
                                                isdistinct=False)
    print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples])

    query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}". \
        format("##################################################################",
               "### METADATA FOR {}".format(specs[St.linkset]),
               "##################################################################",
               "PREFIX prov:        <{}>".format(Ns.prov),
               "PREFIX alivocab:    <{}>".format(Ns.alivocab),
               "PREFIX rdfs:        <{}>".format(Ns.rdfs),
               "PREFIX void:        <{}>".format(Ns.void),
               "PREFIX bdb:         <{}>".format(Ns.bdb),

               "INSERT",
               "{",
               "    <{}>".format(specs[St.linkset]),
               "        rdfs:label                  \"{}\" ; ".format(specs[St.linkset_name]),
               "        a                           void:Linkset ;",
               "        void:triples                {} ;".format(specs[St.triples]),
               "        alivocab:sameAsCount        {} ;".format(specs[St.sameAsCount]),
               "        alivocab:alignsMechanism    <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]),
               "        void:subjectsTarget         <{}> ;".format(source[St.graph]),
               "        void:objectsTarget          <{}> ;".format(target[St.graph]),
               "        void:linkPredicate          <{}> ;".format(specs[St.link]),
               "        bdb:subjectsDatatype        <{}> ;".format(source[St.entity_datatype]),
               "        bdb:objectsDatatype         <{}> ;".format(target[St.entity_datatype]),
               "        alivocab:singletonGraph     <{}> ;".format(specs[St.singleton]),
               "        bdb:assertionMethod         <{}> ;".format(specs[St.assertion_method]),
               "        bdb:linksetJustification    <{}> ;{}".format(specs[St.justification], extra),
               "        alivocab:alignsSubjects     ?src_aligns ;",
               "        alivocab:alignsObjects      ?trg_aligns ;{}".format(cross_check_insert),
               "        rdfs:comment                \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]),

               "\n    ### METADATA ABOUT THE LINKTYPE",
               "      <{}>".format(specs[St.link]),
               "        rdfs:comment                \"\"\"{}\"\"\" ;".format(specs[St.link_comment]),
               "        rdfs:label                  \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]),
               "        rdfs:subPropertyOf          <{}> .".format(specs[St.link_subpropertyof]),

               "\n    ### METADATA ABOUT THE LINKSET JUSTIFICATION",
               "    <{}>".format(specs[St.justification]),
               "        rdfs:comment              \"\"\"{}\"\"\" .".format(specs[St.justification_comment]),

               "\n    ### ASSERTION METHOD",
               "    <{}>".format(specs[St.assertion_method]),
               "        alivocab:sparql           \"\"\"{}\"\"\" .".format(specs[St.insert_query]),
               "}",

               "WHERE",
               "{",
               "    BIND(iri({}) AS ?src_aligns)".format(src_aligns),
               "    BIND(iri({}) AS ?trg_aligns){}".format(trg_aligns, cross_check_where),
               "}")
    # print query
    if display is True:
        print query
    return query
Exemplo n.º 3
0
def spa_subset_metadata(specs):
    source = specs[St.source]
    target = specs[St.target]
    src_aligns = Ls.format_aligns(source[St.link_old])

    # cCROSS CHECK INFORMATION IS USED IN CASE THE ALIGN PROPERTY APPEARS MEANINGLESS
    src_cross_check = Ls.format_aligns(
        source[St.crossCheck]) if St.crossCheck in source else None
    trg_cross_check = Ls.format_aligns(
        target[St.crossCheck]) if St.crossCheck in target else None

    # CROSS CHECK FOR THE WHERE CLAUSE
    cross_check_where = ''
    cross_check_where += "\n    BIND(iri({}) AS ?src_crossCheck)".format(
        src_cross_check) if src_cross_check is not None else ''
    cross_check_where += "\n    BIND(iri({}) AS ?trg_crossCheck)".format(
        trg_cross_check) if trg_cross_check is not None else ''

    # CROSS CHECK FOR THE INSERT CLAUSE
    cross_check_insert = ''
    cross_check_insert += "\n        alivocab:crossCheckSubject        ?src_crossCheck ;" \
        if src_cross_check is not None else ''
    cross_check_insert += "\n        alivocab:crossCheckObject         ?trg_crossCheck ;" \
        if trg_cross_check is not None else ''

    metadata = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
               "\n{}\n{}\n{}\n{}" \
               "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
               "\n{}\n{}\n{}\n{}\n{}\n{}" \
               "\n{}\n{}\n{}\n{}\n{}\n{}" \
               "\n{}\n{}\n{}\n{}". \
        format("\t###### METADATA",
               "\tPREFIX prov:        <{}>".format(Ns.prov),
               "\tPREFIX rdfs:      <{}>".format(Ns.rdfs),
               "\tPREFIX void:      <{}>".format(Ns.void),
               "\tPREFIX alivocab:  <{}>".format(Ns.alivocab),
               "\tPREFIX bdb:       <{}>".format(Ns.bdb),

               "\tINSERT",
               "\t{",
               "\t     ### [SUBSET of {}]".format(source[St.graph]),
               "\t     ### METADATA ABOUT THE SUBSET LINKSET",
               "\t     <{}>".format(specs[St.linkset]),
               "\t       a                         void:Linkset ;",
               "\t       rdfs:label                \"{}\" ; ".format(specs[St.linkset_name]),
               "\t       alivocab:alignsMechanism  <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]),
               "\t       alivocab:sameAsCount      {} ;".format(specs[St.sameAsCount]),
               "\t       void:subset               <{}> ;".format(source[St.graph]),
               "\t       void:subjectsTarget       <{}> ;".format(source[St.graph]),
               "\t       void:objectsTarget        <{}> ;".format(target[St.graph]),
               "\t       void:triples              {} ;".format(specs[St.triples]),
               "\t       void:linkPredicate        <{}{}> ;".format(specs[St.link], specs[St.sameAsCount]),
               "\t       bdb:subjectsDatatype      <{}> ;".format(source[St.entity_datatype]),
               "\t       bdb:objectsDatatype       <{}> ;".format(target[St.entity_datatype]),
               "\t       alivocab:singletonGraph   <{}{}> ;".format(Ns.singletons, specs[St.linkset_name]),
               "\t       bdb:assertionMethod       <{}> ;".format(specs[St.assertion_method]),
               "\t       bdb:linksetJustification  <{}> ;".format(specs[St.justification]),
               "\t       alivocab:alignsSubjects   ?src_aligns ;",
               "\t       alivocab:alignsObjects   <{}> ;{}".format(Ns.rsrId, cross_check_insert),
               "\t       rdfs:comment              \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]),

               "\n\t     ### METADATA ABOUT THE LINKSET JUSTIFICATION",
               "\t     <{}>".format(specs[St.justification]),
               "\t       rdfs:comment              \"\"\"{}\"\"\" .".format(specs[St.justification_comment]),
               "\n\t     ### METADATA ABOUT THE LINKTYPE",
               "\t     <{}{}>".format(specs[St.link], specs[St.sameAsCount]),
               "\t       rdfs:comment              \"\"\"{}\"\"\" ;".format(specs[St.link_comment]),
               "\t       rdfs:label                \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]),
               "\t       rdfs:subPropertyOf        <{}> .".format(specs[St.link_subpropertyof]),

               "\n\t     ### ASSERTION METHOD",
               "\t     <{}>".format(specs[St.assertion_method]),
               "\t       alivocab:sparql           \"\"\"{}\"\"\" .".format(specs[St.insert_query]),
               "\t}",

               "\tWHERE",
               "\t{",
               "\t      BIND(iri({}) AS ?src_aligns){}".format(src_aligns, cross_check_where),
               "\t}"
               )
    # print metadata
    return metadata
Exemplo n.º 4
0
def lens_refine_geo_metadata(specs, display=False):

    extra = ""
    if St.reducer in specs[St.source] and len(
            specs[St.source][St.reducer]) > 0:
        extra += "\n        ll:subjectsReducer      <{}> ;".format(
            specs[St.source][St.reducer])

    if St.reducer in specs[St.target] and len(
            specs[St.target][St.reducer]) > 0:
        extra += "\n        ll:objectsReducer       <{}> ;".format(
            specs[St.target][St.reducer])

    if St.intermediate_graph in specs and len(
            specs[St.intermediate_graph]) > 0:
        extra += "\n        ll:intermediate         <{}> ;".format(
            specs[St.intermediate_graph])

    if St.threshold in specs and len(str(specs[St.threshold])) > 0:
        extra += "\n        ll:threshold            {} ;".format(
            str(specs[St.threshold]))

    if St.delta in specs and len(str(specs[St.delta])) > 0:
        extra += "\n        ll:delta                {} ;".format(
            str(specs[St.delta]))

    source = specs[St.source]
    target = specs[St.target]

    src_cross_check = Ls.format_aligns(source[St.crossCheck])
    src_long = Ls.format_aligns(source[St.longitude])
    src_lat = Ls.format_aligns(source[St.latitude])

    trg_cross_check = Ls.format_aligns(target[St.crossCheck])
    trg_long = Ls.format_aligns(target[St.longitude])
    trg_lat = Ls.format_aligns(target[St.latitude])

    # specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name])
    specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.lens_name])
    specs[St.link] = "{}{}{}".format(Ns.alivocab, "nearbyGeoSim",
                                     specs[St.sameAsCount])
    specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.lens_name])
    specs[St.justification] = "{}{}".format(Ns.justification,
                                            specs[St.lens_name])
    specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \
                             "linking approach based on the <{}{}> mechanism.". \
        format(specs[St.link], Ns.mechanism, specs[St.mechanism])

    if str(specs[St.mechanism]).lower() == "nearbygeosim":
        specs[St.link_name] = "Near by Geo-Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This includes entities near each other by at most {} <{}>.". \
            format(specs[St.unit_value], specs[St.unit])
        specs[St.lens_comment] = "Linking <{}> to <{}> based on their nearby Geo-Similarity" \
                                    " using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    specs[St.triples] = Qry.get_namedgraph_size(specs[St.lens],
                                                isdistinct=False)
    print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples])

    query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}" \
            "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}". \
        format("##################################################################",
               "### METADATA FOR {}".format(specs[St.lens]),
               "##################################################################",
               "PREFIX prov:        <{}>".format(Ns.prov),
               "PREFIX ll:          <{}>".format(Ns.alivocab),
               "PREFIX rdfs:        <{}>".format(Ns.rdfs),
               "PREFIX void:        <{}>".format(Ns.void),
               "PREFIX bdb:         <{}>".format(Ns.bdb),

               "INSERT",
               "{",
               "    <{}>".format(specs[St.lens]),
               "        rdfs:label                  \"{}\" ; ".format(specs[St.lens_name]),
               "        a                           bdb:Lens ;",
               "        void:triples                {} ;".format(specs[St.triples]),
               "        ll:sameAsCount              {} ;".format(specs[St.sameAsCount]),
               "        ll:alignsMechanism          <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]),
               "        void:subjectsTarget         <{}> ;".format(source[St.graph]),
               "        void:objectsTarget          <{}> ;".format(target[St.graph]),
               "        void:linkPredicate          <{}> ;".format(specs[St.link]),
               "        bdb:subjectsDatatype        <{}> ;".format(source[St.entity_datatype]),
               "        bdb:objectsDatatype         <{}> ;".format(target[St.entity_datatype]),
               "        ll:singletonGraph           <{}> ;".format(specs[St.singleton]),
               "        bdb:assertionMethod         <{}> ;".format(specs[St.assertion_method]),
               "        bdb:linksetJustification    <{}> ;{}".format(specs[St.justification], extra),
               "        ll:crossCheckSubject        ?src_crossCheck ;",
               "        ll:crossCheckObject         ?trg_crossCheck ;",

               "        ll:unit                     <{}> ;".format(specs[St.unit]),
               "        ll:unitValue                {} ;".format(specs[St.unit_value]),

               "        ll:alignsSubjects           ( ?src_long ?src_lat ) ;",
               "        ll:alignsObjects            ( ?trg_long ?trg_lat ) ;",

               "        rdfs:comment                \"\"\"{}\"\"\" .".format(specs[St.lens_comment]),

               "\n    ### METADATA ABOUT THE LINKTYPE",
               "      <{}>".format(specs[St.link]),
               "        rdfs:comment                \"\"\"{}\"\"\" ;".format(specs[St.link_comment]),
               "        rdfs:label                  \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]),
               "        rdfs:subPropertyOf          <{}> .".format(specs[St.link_subpropertyof]),

               "\n    ### METADATA ABOUT THE LINKSET JUSTIFICATION",
               "    <{}>".format(specs[St.justification]),
               "        rdfs:comment              \"\"\"{}\"\"\" .".format(specs[St.justification_comment]),

               "\n    ### ASSERTION METHOD",
               "    <{}>".format(specs[St.assertion_method]),
               "        ll:sparql                   \"\"\"{}\"\"\" .".format(specs[St.insert_query]),
               "}",

               "WHERE",
               "{",
               "    BIND(iri({}) AS ?src_crossCheck)".format(src_cross_check),
               "    BIND(iri({}) AS ?trg_crossCheck)".format(trg_cross_check),

               "    BIND(iri({}) AS ?src_long)".format(src_long),
               "    BIND(iri({}) AS ?src_lat)".format(src_lat),

               "    BIND(iri({}) AS ?trg_long)".format(trg_long),
               "    BIND(iri({}) AS ?trg_lat)".format(trg_lat),

               "}")
    # print query
    if display is True:
        print query
    return query
Exemplo n.º 5
0
def linkset_refined_metadata(specs, display=False):

    # CONDITIONAL METADATA TO APPEND TO THE REFINED LINKSET

    extra = ""

    if St.extended_graph in specs[St.source] and len(
            specs[St.source][St.extended_graph]) > 0:
        extra += "\n        alivocab:subjectsExtended    <{}> ;".format(
            specs[St.source][St.extended_graph])

    if St.extended_graph in specs[St.target] and len(
            specs[St.target][St.extended_graph]) > 0:
        extra += "\n        alivocab:objectsExtended     <{}> ;".format(
            specs[St.target][St.extended_graph])

    if St.reducer in specs[St.source] and len(
            specs[St.source][St.reducer]) > 0:
        extra += "\n        alivocab:subjectsReducer     <{}> ;".format(
            specs[St.source][St.reducer])

    if St.reducer in specs[St.target] and len(
            specs[St.target][St.reducer]) > 0:
        extra += "\n        alivocab:objectsReducer      <{}> ;".format(
            specs[St.target][St.reducer])

    if St.intermediate_graph in specs and len(
            specs[St.intermediate_graph]) > 0:
        extra += "\n        alivocab:intermediatesTarget <{}> ;".format(
            specs[St.intermediate_graph])

    if St.threshold in specs and len(str(specs[St.threshold])) > 0:
        extra += "\n        alivocab:threshold           {} ;".format(
            str(specs[St.threshold]))

    if St.delta in specs and str(specs[St.delta]) != "0":
        converted = convert_to_float(str(specs[St.delta]))
        if math.isnan(converted) is False:
            extra += "\n        alivocab:delta               {} ;".format(
                converted)

    source = specs[St.source]
    target = specs[St.target]
    src_aligns = Ls.format_aligns(source[St.aligns])
    trg_aligns = Ls.format_aligns(target[St.aligns])

    specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.refined_name])
    specs[St.link] = "{}{}{}".format(Ns.alivocab, "exactStrSim",
                                     specs[St.sameAsCount])
    specs[St.assertion_method] = "{}{}".format(Ns.method,
                                               specs[St.refined_name])
    specs[St.justification] = "{}{}".format(Ns.justification,
                                            specs[St.refined_name])
    specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \
                             "linking approach based on the <{}{}> mechanism.". \
        format(specs[St.link], Ns.mechanism, specs[St.mechanism])

    if str(specs[St.mechanism]).lower() == "exactstrsim":
        specs[St.link_name] = "Exact String Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "We assume that entities with the aligned predicates sharing the " \
                                          "exact same content are same. This assumption applies when dealing " \
                                          "with entities such as Organisation."
        specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \
            format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "identity":
        specs[St.link_name] = "Same URI"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[
            St.
            justification_comment] = "We assume that entities with the same URI are identical."
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their identical URI using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "approxnbrsim":
        specs[St.link_name] = "Approximate Number Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This includes entities with an approximate number similarity" \
                                          " in the interval [0 {}].".format(specs[St.delta])
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate number similarity" \
                                    " using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "approxstrsim":
        specs[St.link_name] = "Approximate String Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This includes entities with a string similarity in the interval [{} 1[.".\
            format(specs[St.threshold])
        specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate string similarity" \
                                    " using the mechanism: {}". \
            format(source[St.graph], target[St.graph], specs[St.mechanism])

    elif str(specs[St.mechanism]).lower() == "intermediate":
        specs[St.link_name] = "Exact String Similarity"
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.mechanism])
        specs[St.justification_comment] = "This is an implementation of the Exact String Similarity Mechanism over " \
                                          "the aligned predicates."
        specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \
            format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism])

    # CHECKING WHETHER THE REFINED HAS SOME TRIPLES INSERTED
    specs[St.triples] = Qry.get_namedgraph_size(specs[St.refined],
                                                isdistinct=False)

    triples = Qry.get_namedgraph_size(specs[St.linkset], isdistinct=False)
    print "\t>>> {} CORRESPONDENCES IN THE SOURCE".format(triples)
    print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples])
    print "\t>>> {} CORRESPONDENCES DO NOT COMPLY WITH THE NEW CONDITION".format(
        str(int(triples) - int(specs[St.triples])))

    message = "{}<br/>{}<br/>{}".format(
        "{} CORRESPONDENCES IN THE SOURCE".format(triples),
        "{} CORRESPONDENCES INSERTED".format(specs[St.triples]),
        "{} CORRESPONDENCES DO NOT COMPLY WITH THE NEW CONDITION".format(
            str(int(triples) - int(specs[St.triples]))))

    if int(specs[St.triples]) > 0:
        derived_from = specs[St.derivedfrom] if St.derivedfrom in specs else ""
        intermediate = "\n        alivocab:intermediatesTarget    <{}> ;".format(specs[St.intermediate_graph]) \
            if str(specs[St.mechanism]).lower() == "intermediate" else ""

        query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
                "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
                "\n{}\n{}\n{}\n{}\n{}" \
                "\n{}\n{}\n{}" \
                "\n{}\n{}\n{}\n{}\n{}" \
                "\n{}\n{}\n{}\n{}\n{}". \
            format("##################################################################",
                   "### METADATA FOR {}".format(specs[St.refined]),
                   "##################################################################",
                   "PREFIX prov:        <{}>".format(Ns.prov),
                   "PREFIX alivocab:    <{}>".format(Ns.alivocab),
                   "PREFIX rdfs:        <{}>".format(Ns.rdfs),
                   "PREFIX void:        <{}>".format(Ns.void),
                   "PREFIX bdb:         <{}>".format(Ns.bdb),

                   "INSERT",
                   "{",
                   "    <{}>".format(specs[St.refined]),
                   "        a                               void:Linkset ;\n{}".format(derived_from),
                   "        rdfs:label                      \"{}\" ; ".format(specs[St.refined_name]),
                   "        void:triples                    {} ;".format(specs[St.triples]),
                   "        alivocab:sameAsCount            {} ;".format(specs[St.sameAsCount]),
                   "        alivocab:alignsMechanism        <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]),
                   "        void:subjectsTarget             <{}> ;{}".format(source[St.graph], intermediate),
                   "        void:objectsTarget              <{}> ;".format(target[St.graph]),
                   "        void:linkPredicate              <{}> ;".format(specs[St.link]),
                   "        bdb:subjectsDatatype            <{}> ;".format(source[St.entity_datatype]),
                   "        bdb:objectsDatatype             <{}> ;".format(target[St.entity_datatype]),
                   "        alivocab:singletonGraph         <{}> ;".format(specs[St.singleton]),
                   "        bdb:assertionMethod             <{}> ;".format(specs[St.assertion_method]),
                   "        bdb:linksetJustification        <{}> ;{}".format(specs[St.justification], extra),
                   "        alivocab:alignsSubjects         ?src_aligns ;",
                   "        alivocab:alignsObjects          ?trg_aligns ;",
                   "        rdfs:comment                    \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]),

                   "\n    ### METADATA ABOUT THE LINKTYPE",
                   "      <{}>".format(specs[St.link]),
                   "        rdfs:comment                \"\"\"{}\"\"\" ;".format(specs[St.link_comment]),
                   "        rdfs:label                  \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]),
                   "        rdfs:subPropertyOf          <{}> .".format(specs[St.link_subpropertyof]),

                   "\n    ### METADATA ABOUT THE LINKSET JUSTIFICATION",
                   "    <{}>".format(specs[St.justification]),
                   "        rdfs:comment              \"\"\"{}\"\"\" .".format(specs[St.justification_comment]),

                   "\n    ### ASSERTION METHOD",
                   "    <{}>".format(specs[St.assertion_method]),
                   "        alivocab:sparql           \"\"\"{}\"\"\" .".format(specs[St.insert_query]),
                   "}",

                   "WHERE",
                   "{",
                   "    BIND(iri({}) AS ?src_aligns)".format(src_aligns),
                   "    BIND(iri({}) AS ?trg_aligns)".format(trg_aligns),
                   "}")

        if display is True:
            print query
        print "\t>>> Done generating the metadata"
        return {"query": query, "message": message}
    else:
        return {"query": None, "message": message}
Exemplo n.º 6
0
def spa_linkset_subset(specs, activated=False):

    if activated is True:

        check = Ls.run_checks(specs, check_type="subset")
        if check[St.result] != "GOOD TO GO":
            return check

        # THE LINKSET DOES NOT EXIT, LETS CREATE IT NOW
        print Ls.linkset_info(specs, specs[St.sameAsCount])

        ##########################################################
        """ 1. GENERATE SUBSET LINKSET INSERT QUERY            """
        ##########################################################
        insert_query = spa_subset_insert(specs)
        # print insert_query

        #############################################################
        """ 2. EXECUTING INSERT SUBSET LINKSET QUERY AT ENDPOINT  """
        #############################################################
        Qry.endpoint(insert_query)

        #############################################################
        """ 3. LINKSET SIZE (NUMBER OF TRIPLES)                   """
        #############################################################
        # LINKSET SIZE (NUMBER OF TRIPLES)
        specs[St.triples] = Qry.get_namedgraph_size(specs[St.linkset])
        print "\t>>> {} TRIPLES INSERTED".format(specs[St.triples])

        # NO MATCH FOUND
        if specs[St.triples] == "0":

            # logger.warning("WE DID NOT INSERT A METADATA AS NO TRIPLE WAS INSERTED.")
            print "WE DID NOT INSERT A METADATA AS NO TRIPLE WAS INSERTED."
            specs[St.insert_query] = insert_query
            # metadata = spa_subset_metadata(source, target, data, size)

            explain_q = "ask {{ GRAPH <{}> {{ ?s <{}> ?o }} }}".format(
                specs[St.linkset], specs[St.source][St.link_old])
            response = Qry.boolean_endpoint_response(explain_q)
            explain = True if response == "true" else False
            # print explain
            if explain is False:
                # logger.warning("{} DOES NOT EXIST IS {}.".format(data[St.link_old], source[St.graph]))
                print "{} DOES NOT EXIST IS {}.".format(
                    specs[St.source][St.link_old], specs[St.source][St.graph])

                message = "{} DOES NOT EXIST IS {}.".format(
                    specs[St.source][St.link_old], specs[St.source][St.graph])

                return {St.message: message, St.error_code: 1, St.result: None}

        # SOME MATCHES WHERE FOUND
        construct_query = "\n{}\n{}\n{}\n".format(
            "PREFIX predicate: <{}>".format(Ns.alivocab),
            "construct { ?x ?y ?z }",
            "where     {{ graph <{}> {{ ?x ?y ?z }} }}".format(
                specs[St.linkset]),
        )
        # print construct_query
        construct_response = Qry.endpointconstruct(construct_query)
        if construct_response is not None:
            construct_response = construct_response.replace(
                '{', "<{}>\n{{".format(specs[St.linkset]), 1)

        # GENERATE LINKSET SINGLETON METADATA QUERY
        singleton_metadata_query = "\n{}\n{}\n{}\n{}\n{}\n{}\n\n".format(
            "PREFIX singMetadata:   <{}>".format(Ns.singletons),
            "PREFIX predicate:      <{}>".format(Ns.alivocab),
            "PREFIX prov:           <{}>".format(Ns.prov),
            "PREFIX rdf:            <{}>".format(Ns.rdf),
            "construct { ?x ?y ?z }",
            "where     {{ graph <{}{}> {{ ?x ?y ?z }} }}".format(
                Ns.singletons, specs[St.linkset_name]),
        )
        # GET THE SINGLETON METADATA USING THE CONSTRUCT QUERY
        singleton_construct = Qry.endpointconstruct(singleton_metadata_query)
        if singleton_construct is not None:
            singleton_construct = singleton_construct.replace(
                '{', "singMetadata:{}\n{{".format(specs[St.linkset_name]), 1)

        #############################################################
        """ 4. LINKSET METADATA                                   """
        #############################################################
        # METADATA
        specs[St.insert_query] = insert_query
        metadata = Gn.spa_subset_metadata(specs)

        ###############################################################
        """ 5. EXECUTING INSERT LINKSET METADATA QUERY AT ENDPOINT  """
        ###############################################################
        # EXECUTING METADATA QUERY AT ENDPOINT
        Qry.endpoint(metadata)

        print "\t>>> WRITING TO FILE"
        write_to_file(graph_name=specs[St.linkset_name],
                      metadata=metadata.replace("INSERT DATA", ""),
                      correspondences=construct_response,
                      singletons=singleton_construct,
                      directory=DIRECTORY)

        print "\tLinkset created as [SUBSET]: ", specs[St.linkset]
        print "\t*** JOB DONE! ***"

        message = "The linkset was created as [{}] with {} triples found!".format(
            specs[St.linkset], specs[St.triples])

        return {
            St.message: message,
            St.error_code: 0,
            St.result: specs[St.linkset]
        }
Exemplo n.º 7
0
def specification_2_linkset_subset(specs, activated=False):

    if activated is True:
        print Ut.headings("EXECUTING LINKSET SUBSET SPECS...")
    else:
        print Ut.headings(
            "THE FUNCTION [specification_2_linkset_subset] IS NOT ACTIVATED")
        return {St.message: Ec.ERROR_CODE_0, St.error_code: 0, St.result: None}

    # ACCESS THE TASK SPECIFIC PREDICATE COUNT
    specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism])

    # UPDATE THE QUERY THAT IS GOING TO BE EXECUTED
    if specs[St.sameAsCount]:

        source = specs[St.source]
        target = specs[St.target]

        # UPDATE THE SPECS OF SOURCE AND TARGETS
        update_specification(source)
        update_specification(target)

        # GENERATE THE NAME OF THE LINKSET
        Ls.set_subset_name(specs)

        # SETTING SOME GENERIC METADATA INFO
        specs[St.link_name] = "same"
        specs[St.linkset_name] = specs[St.linkset_name]
        specs[St.link] = "http://risis.eu/linkset/predicate/{}".format(
            specs[St.link_name])
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.link_name])
        specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name])
        specs[St.assertion_method] = "{}{}".format(Ns.method,
                                                   specs[St.linkset_name])
        specs[St.justification] = "{}{}".format(Ns.justification,
                                                specs[St.linkset_name])

        # COMMENT ON THE LINK PREDICATE
        specs[St.link_comment] = "The predicate <{}> is used in replacement of the linktype <{}> used in the " \
                                 "original <{}> dataset.".format(
            specs[St.link], specs[St.source][St.link_old], specs[St.source][St.graph])

        # COMMENT ON THE JUSTIFICATION FOR THIS LINKSET
        specs[St.justification_comment] = "In OrgRef's a set of entities are linked to GRID. The linking method " \
                                          "used by OrgRef is unknown. Here we assume that it is a curated work " \
                                          "and extracted it as a linkset.",

        # COMMENT ON THE LINKSET ITSELF
        specs[St.linkset_comment] = "The current linkset is a subset of the <{0}> dataset that links <{0}> to " \
                                    "<{1}>. The methodology used by <{0}> to generate this builtin linkset in " \
                                    "unknown.".format(specs[St.source][St.graph], specs[St.target][St.graph])

        source[St.entity_ns] = str(source[St.entity_datatype]).replace(
            source[St.entity_name], '')
        target[St.entity_ns] = str(target[St.entity_datatype]).replace(
            target[St.entity_name], '')

        # GENERATE THE LINKSET
        inserted_linkset = spa_linkset_subset(specs, activated)
        # print "LINKSET SUBSET RESULT:", inserted_linkset

        if inserted_linkset[St.message].__contains__("ALREADY EXISTS"):
            return inserted_linkset

        if specs[St.triples] > "0":

            # REGISTER THE ALIGNMENT
            if inserted_linkset[St.message].__contains__("ALREADY EXISTS"):
                Urq.register_alignment_mapping(specs, created=False)
            else:
                Urq.register_alignment_mapping(specs, created=True)

        return inserted_linkset

    else:
        print Ec.ERROR_CODE_1
        return {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None}
Exemplo n.º 8
0
def load_temp_query(specs, is_source, is_expand=True):

    # UPDATE THE SPECS OF SOURCE AND TARGETS
    if is_expand is False:
        comment_exp = "# "
    else:
        comment_exp = ""

    if is_source is True:
        info = specs[St.source]
        load = "_{}_1".format(specs[St.linkset_name])
        linkset_triple = "\t\t\t?{}  ?predicate ?target".format(info[St.graph_name])
    else:
        info = specs[St.target]
        load = "_{}_2".format(specs[St.linkset_name])
        linkset_triple = "\t\t\t?source  ?predicate ?{}".format(info[St.graph_name])

    # REPLACE RDF TYPE "a" IN CASE ANOTHER TYPE IS PROVIDED
    if St.rdf_predicate in info and info[St.rdf_predicate] is not None:
        rdf_pred = info[St.rdf_predicate] \
            if Ls.nt_format(info[St.rdf_predicate]) else "<{}>".format(info[St.rdf_predicate])
    else:
        rdf_pred = "a"

    # FORMATTING THE ALIGNS PROPERTY
    aligns = info[St.aligns] \
        if Ls.nt_format(info[St.aligns]) else "<{}>".format(info[St.aligns])

    name = info[St.graph_name]
    uri = info[St.graph]

    # ADD THE REDUCER IF SET
    if St.reducer not in info:
        reducer_comment = "#"
        reducer = ""
    else:
        reducer_comment = ""
        reducer = info[St.reducer]

    # EXTRACTION QUERY
    query = """
    INSERT
    {{
        GRAPH <{0}load{8}>
        {{
            ?{5}  alivocab:hasProperty  ?trimmed .
        }}
    }}
    WHERE
    {{

        # THE LINKSET TO EXPAND
        {12}GRAPH <{9}{10}>
        {12}{{
        {12}    {11} .
        {12}}}

        GRAPH <{1}>
        {{
            # RESOURCE IS OF A CERTAIN TYPE
            ?{5}  {2}  <{7}> .

            # EXTRACT THE PROPERTY-VALUE TO ALIGN
            ?{5}  {3}  ?object .

            # LOWER CASE OF THE VALUE
            BIND(lcase(str(?object)) as ?label)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?label, ?regexp, '$1$2') AS ?trimmed)
        }}

        {6}FILTER NOT EXISTS
        {6}{{
        {6}    GRAPH <{4}>
        {6}    {{
        {6}        {{ ?{5}   ?pred   ?obj . }}
        {6}        UNION
        {6}        {{ ?obj   ?pred   ?{5}. }}
        {6}    }}
        {6}}}
    }}
    """.format(
        # 0          1    2         3       4 `      5     6                7                         8
        Ns.tmpgraph, uri, rdf_pred, aligns, reducer, name, reducer_comment, info[St.entity_datatype], load,
        # 9         10                        11             12
        Ns.linkset, specs[St.expanded_name], linkset_triple, comment_exp
    )
    return query
Exemplo n.º 9
0
def refining(specs, insert_query, activated=False):

    refined = {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None}
    diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 1, St.result: None}

    # UPDATE THE SPECS VARIABLE
    # print "UPDATE THE SPECS VARIABLE"
    update_specification(specs)
    update_specification(specs[St.source])
    update_specification(specs[St.target])

    # ACCESS THE TASK SPECIFIC PREDICATE COUNT
    specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism])
    # print "sameAsCount:", specs[St.sameAsCount]

    if specs[St.sameAsCount] is None:
        return {'refined': refined, 'difference': diff}

    # GENERATE THE NAME OF THE LINKSET
    Ls.set_refined_name(specs)
    # print "\nREFINED NAME:", specs[St.refined]
    # print "LINKSET TO REFINE BEFORE CHECK:", specs[St.linkset]

    # CHECK WHETHER OR NOT THE LINKSET WAS ALREADY CREATED
    check = Ls.run_checks(specs, check_type="refine")
    # print "\nREFINED NAME:", specs[St.refined]
    # print "LINKSET TO REFINE:", specs[St.linkset]

    if check[St.message] == "NOT GOOD TO GO":
        # refined = check[St.refined]
        # difference = check["difference"]
        return check

    # print "\nREFINED:", specs[St.refined]
    # print "LINKSET TO REFINE:", specs[St.linkset]
    # print "CHECK:", check

    # THE LINKSET DOES NOT EXIT, LETS CREATE IT NOW
    print Ls.refined_info(specs, specs[St.sameAsCount])

    # POINT TO THE LINKSET THE CURRENT LINKSET WAS DERIVED FROM
    print "1. wasDerivedFrom {}".format(specs[St.linkset])
    specs[St.derivedfrom] = "\t\tprov:wasDerivedFrom\t\t\t<{}> ;".format(
        specs[St.linkset])

    # print "REFINED NAME:",  specs[St.refined_name]
    # print "REFINED:", specs[St.refined]
    # print "LINKSET TO BE REFINED:", specs[St.linkset]

    print "\n2. RETRIEVING THE METADATA ABOUT THE GRAPH TO REFINE"
    # metadata_q = Qry.q_linkset_metadata(specs[St.linkset])
    metadata_q = """
    prefix ll:    <{}>
    SELECT DISTINCT ?type ?singletonGraph
    {{
        # LINKSET METADATA
        <{}>
            a                       ?type ;
            ll:singletonGraph		?singletonGraph .
    }}
    """.format(Ns.alivocab, specs[St.linkset])
    print "QUERY:", metadata_q
    matrix = Qry.sparql_xml_to_matrix(metadata_q)
    # print "\nMETA DATA: ", matrix

    if matrix:

        if matrix[St.message] == "NO RESPONSE":
            print Ec.ERROR_CODE_1
            print matrix[St.message]
            return {'refined': refined, 'difference': diff}

        elif matrix[St.result] is None:
            print matrix[St.message]
            returned = {
                St.message: matrix[St.message],
                St.error_code: 666,
                St.result: None
            }
            return {'refined': returned, 'difference': diff}

    else:
        print Ec.ERROR_CODE_1
        return {'refined': refined, 'difference': diff}

    # GET THE SINGLETON GRAPH OF THE LINKSET TO BE REFINED
    print "\n3. GETTING THE SINGLETON GRAPH OF THE GRAPH TO REFINE"
    specs[St.singletonGraph] = matrix[St.result][1][1]
    # print matrix[St.result][1][0]

    specs[St.insert_query] = insert_query(specs)
    print specs[St.insert_query]

    if type(specs[St.insert_query]) == str:
        is_run = Qry.boolean_endpoint_response(specs[St.insert_query])

    else:
        print "\n4. RUNNING THE EXTRACTION QUERY"
        print specs[St.insert_query][0]
        # is_run = Qry.boolean_endpoint_response(specs[St.insert_query][0])
        Qry.boolean_endpoint_response(specs[St.insert_query][0])

        print "\n5. RUNNING THE FINDING QUERY"
        print specs[St.insert_query][1]
        is_run = Qry.boolean_endpoint_response(specs[St.insert_query][1])

    print "\n>>> RUN SUCCESSFULLY:", is_run.upper()

    # NO INSERTION HAPPENED
    if is_run == "true" or is_run == Ec.ERROR_STARDOG_1:

        # GENERATE THE
        #   (1) LINKSET METADATA
        #   (2) LINKSET OF CORRESPONDENCES
        #   (3) SINGLETON METADATA
        # AND WRITE THEM ALL TO FILE

        print "GENERATING THE METADATA"
        pro_message = refine_metadata(specs)

        # SET THE RESULT ASSUMING IT WENT WRONG
        refined = {
            St.message: Ec.ERROR_CODE_4,
            St.error_code: 4,
            St.result: None
        }
        diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 4, St.result: None}

        server_message = "Linksets created as: [{}]".format(specs[St.refined])
        message = "The linkset was created as [{}]. <br/>{}".format(
            specs[St.refined], pro_message)

        # MESSAGE ABOUT THE INSERTION STATISTICS
        print "\t", server_message

        if int(specs[St.triples]) > 0:

            # UPDATE THE REFINED VARIABLE AS THE INSERTION WAS SUCCESSFUL
            refined = {
                St.message: message,
                St.error_code: 0,
                St.result: specs[St.linkset]
            }

            print "REGISTERING THE ALIGNMENT"
            if refined[St.message].__contains__("ALREADY EXISTS"):
                register_alignment_mapping(specs, created=False)
            else:
                register_alignment_mapping(specs, created=True)

            try:
                print "\nCOMPUTE THE DIFFERENCE AND DOCUMENT IT"
                diff_lens_specs = {
                    St.researchQ_URI: specs[St.researchQ_URI],
                    St.subjectsTarget: specs[St.linkset],
                    St.objectsTarget: specs[St.refined]
                }
                diff = Df.difference(diff_lens_specs, activated=activated)
                message_2 = "\t>>> {} CORRESPONDENCES INSERTED AS THE DIFFERENCE".format(
                    diff_lens_specs[St.triples])
                print message_2
            except Exception as err:
                print "THE DIFFERENCE FAILED: ", str(err.message)

            print "\tLinkset created as: ", specs[St.refined]
            print "\t*** JOB DONE! ***"

            return {'refined': refined, 'difference': diff}

        else:
            print ">>> NO TRIPLE WAS INSERTED BECAUSE NO MATCH COULD BE FOUND"
            return {'refined': refined, 'difference': diff}

    else:
        print "NO MATCH COULD BE FOUND."
Exemplo n.º 10
0
def refine_numeric_query(specs):

    # is_de_duplication = specs[St.source][St.graph] == specs[St.target][St.graph]
    # number_of_load = '1' if is_de_duplication is True else "2"

    # PLAIN NUMBER CHECK
    delta_check = "BIND(ABS(xsd:decimal(?x) - xsd:decimal(?x)) AS ?DELTA)"

    # DATE CHECK
    if specs[St.numeric_approx_type].lower() == "date":
        delta_check = "BIND( (YEAR(xsd:datetime(STR(?x))) - YEAR(xsd:datetime(STR(?y))) ) as ?DELTA )"

    source = specs[St.source]
    target = specs[St.target]

    # FORMATTING THE ALIGNS PROPERTY
    src_aligns = source[St.aligns] \
        if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns])

    trg_aligns = target[St.aligns] \
        if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns])

    src_name = specs[St.source][St.graph_name]
    # src_uri = specs[St.source][St.graph]
    src_uri = source[St.graph] if St.extended_graph not in source else source[
        St.extended_graph]
    # src_aligns = specs[St.source][St.aligns]

    trg_name = specs[St.target][St.graph_name]
    # trg_uri = specs[St.target][St.graph]
    trg_uri = target[St.graph] if St.extended_graph not in target else target[
        St.extended_graph]
    # trg_aligns = specs[St.target][St.aligns]

    extract = """
    PREFIX ll:    <{0}>
    PREFIX prov:  <{1}>
    PREFIX tempG: <{2}>

    DROP SILENT GRAPH tempG:load01 ;
    DROP SILENT GRAPH tempG:load02 ;
    DROP SILENT GRAPH <{3}> ;
    DROP SILENT GRAPH <{4}{5}> ;

    ### 1. LOADING SOURCE AND TARGET TO A TEMPORARY GRAPH
    INSERT
    {{
        GRAPH tempG:load01
        {{
            ### SOURCE DATASET AND ITS ALIGNED PREDICATE
            ?{8}_1 ll:relatesTo1 ?srcTrimmed .
            ### TARGET DATASET AND ITS ALIGNED PREDICATE
            ?{9}_2 ll:relatesTo3 ?trgTrimmed .
        }}
    }}
    WHERE
    {{
        ### LINKSET TO REFINE
        graph <{7}>
        {{
            ?{8}_1 ?pred  ?{9}_2 .
        }}
        ### SOURCE DATASET
        graph <{10}>
        {{
            ### SOURCE DATASET AND ITS ALIGNED PREDICATE
            ?{8}_1 {12} ?value_1 .
            bind (lcase(str(?value_1)) as ?src_value)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?src_value, ?regexp, '$1$2') AS ?srcTrimmed)
        }}
        ### TARGET DATASET
        graph <{11}>
        {{
            ### TARGET DATASET AND ITS ALIGNED PREDICATE
            ?{9}_2 {13} ?value_2 .
            bind (lcase(str(?value_2)) as ?trg_value)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?trg_value, ?regexp, '$1$2') AS ?trgTrimmed)
        }}
    }} """.format(
        # 0          1         2           3                  4              5
        Ns.alivocab,
        Ns.prov,
        Ns.tmpgraph,
        specs[St.refined],
        Ns.singletons,
        specs[St.refined_name],
        # 6           7                  8         9         10       11       12          13
        Ns.tmpvocab,
        specs[St.linkset],
        src_name,
        trg_name,
        src_uri,
        trg_uri,
        src_aligns,
        trg_aligns)

    find = """
    ### 2. FINDING CANDIDATE MATCH BETWEEN THE SOURCE AND TARGET
    PREFIX ll:    <{0}>
    PREFIX prov:  <{1}>
    PREFIX tempG: <{2}>
    INSERT
    {{
        ### MATCH FOUND
        GRAPH <{10}>
        {{
            ?{3}_1 ?newSingletons ?{4}_2 .
        }}
        # METADATA OF MATCH FOUND
        GRAPH <{11}{12}>
        {{
            ?newSingletons
                rdf:singletonPropertyOf     ll:{8}{9} ;
                prov:wasDerivedFrom         ?pred ;
                ll:hasEvidence              ?evidence .
        }}
    }}
    WHERE
    {{
        ### LINKSET TO REFINE
        graph <{5}>
        {{
            ?{3}_1 ?pred  ?{4}_2 .
            bind( iri(replace("{0}{8}{9}_#", "#",  strafter(str(uuid()), "uuid:") )) as ?newSingletons )
        }}
        ### SOURCE AND TARGET LOADED TO A TEMPORARY GRAPH
        GRAPH tempG:load01
        {{
            ?{3}_1 ll:relatesTo1 ?x .
            ?{4}_2 ll:relatesTo3 ?y .
        }}

        # DELTA APPROX CHECK
        {6}

        FILTER( ABS(?DELTA) <= {7} )

        BIND(concat("The DELTA of [", ?x, "] and [", ?y, "] is [", STR(ABS(?DELTA)),
        "] which passed the threshold of [", STR({7}), "]" ) AS ?evidence)
    }}""".format(
        # 0          1        2            3         4         5                  6            7
        Ns.alivocab,
        Ns.prov,
        Ns.tmpgraph,
        src_name,
        trg_name,
        specs[St.linkset],
        delta_check,
        specs[St.delta],
        # 8                  9                      10                 11             12
        specs[St.mechanism],
        specs[St.sameAsCount],
        specs[St.refined],
        Ns.singletons,
        specs[St.refined_name])

    return [extract, find]
Exemplo n.º 11
0
def refine_intermediate_query(specs):

    source = specs[St.source]
    target = specs[St.target]

    # FORMATTING THE ALIGNS PROPERTY
    src_aligns = source[St.aligns] \
        if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns])

    trg_aligns = target[St.aligns] \
        if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns])

    src_name = specs[St.source][St.graph_name]
    # src_uri = specs[St.source][St.graph]
    src_uri = source[St.graph] if St.extended_graph not in source else source[
        St.extended_graph]
    # src_aligns = specs[St.source][St.aligns]

    trg_name = specs[St.target][St.graph_name]
    # trg_uri = specs[St.target][St.graph]
    trg_uri = target[St.graph] if St.extended_graph not in target else target[
        St.extended_graph]
    # trg_aligns = specs[St.target][St.aligns]

    insert = """
    PREFIX alivocab:    <{16}>
    PREFIX prov:        <{17}>

    DROP SILENT GRAPH <{0}load01> ;
    DROP SILENT GRAPH <{0}load02> ;
    DROP SILENT GRAPH <{10}> ;
    DROP SILENT GRAPH <{14}{15}> ;

    INSERT
    {{
        GRAPH <{10}>
        {{
            ?{1} ?newSingletons  ?{3} .
        }}
        ### SINGLETONS' METADATA
        GRAPH <{14}{15}>
        {{
            ?newSingletons
                rdf:singletonPropertyOf     alivocab:{12}{13} ;
                prov:wasDerivedFrom         ?pred ;
                alivocab:hasEvidence        ?evidence .
        }}
    }}

    WHERE
    {{
        ### LINKSET TO REFINE
        graph <{5}>
        {{
            ?{1} ?pred  ?{3} .
            bind( iri(replace("{11}{12}{13}_#", "#",  strafter(str(uuid()), "uuid:") )) as ?newSingletons )
        }}

        ### SOURCE DATASET
        graph <{6}>
        {{
            ### SOURCE DATASET AND ITS ALIGNED PREDICATE
            ?{1} {2} ?value_1 .
            bind (lcase(str(?value_1)) as ?src_value)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?src_value, ?regexp, '$1$2') AS ?src_trimmed)
        }}

        ### TARGET DATASET
        graph <{7}>
        {{
            ### TARGET DATASET AND ITS ALIGNED PREDICATE
            ?{3} {4} ?value_2 .
            bind (lcase(str(?value_2)) as ?trg_value)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?trg_value, ?regexp, '$1$2') AS ?trg_trimmed)
        }}

        ### INTERMEDIATE DATASET
        graph <{9}>
        {{
            ?intermediate_uri
                ?intPred_1 ?value_3 ;
                ?intPred_2 ?value_4 .

            ### VALUES TO LOWER CASE
            bind (lcase(str(?value_3)) as ?src_val)
            bind (lcase(str(?value_4)) as ?trg_val)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp
    }} ;

    DROP SILENT GRAPH <{0}load01> ;)
            BIND(REPLACE(?src_val, ?regexp, '$1$2') AS ?src_trimmed)
            BIND(REPLACE(?trg_val, ?regexp, '$1$2') AS ?trg_trimmed)
            BIND(concat("[", ?src_trimmed, "] aligns with [", ?trg_trimmed, "]") AS ?evidence)
        }}
    DROP SILENT GRAPH <{0}load02>
    """.format(
        # 0          1         2           3         4
        Ns.tmpgraph,
        src_name,
        src_aligns,
        trg_name,
        trg_aligns,
        # 5                6        7        8            9
        specs[St.linkset],
        src_uri,
        trg_uri,
        Ns.tmpvocab,
        specs[St.intermediate_graph],
        # 10               11           12                  13
        specs[St.refined],
        Ns.alivocab,
        specs[St.mechanism],
        specs[St.sameAsCount],
        # 14           15                      16           17
        Ns.singletons,
        specs[St.refined_name],
        Ns.alivocab,
        Ns.prov)

    # print insert
    return insert
Exemplo n.º 12
0
def refine_exact_query(specs):

    source = specs[St.source]
    target = specs[St.target]
    src_graph = source[
        St.graph] if St.extended_graph not in source else source[
            St.extended_graph]
    trg_graph = target[
        St.graph] if St.extended_graph not in target else target[
            St.extended_graph]
    print "src_graph:", src_graph
    print "trg_graph:", trg_graph

    # FORMATTING THE ALIGNS PROPERTY
    src_aligns = source[St.aligns] \
        if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns])

    trg_aligns = target[St.aligns] \
        if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns])

    # GENERATE THE INSERT QUERY
    insert_query = """
    PREFIX prov:        <{}>
    PREFIX rdf:         <{}>
    PREFIX alivocab:    <{}>
    INSERT
    {{
        ### REFINED LINKSET
        GRAPH <{}>
        {{
            ?subject ?newSingletons ?object .
        }}

        ### SINGLETONS' METADATA
        GRAPH <{}{}>
        {{
            ?newSingletons
                rdf:singletonPropertyOf     alivocab:{}{} ;

                ## THIS IS THE TRAIL
                prov:wasDerivedFrom         ?singleton ;

                ## BUT THIS IS ADDED FOR QUERY SIMPLICITY AND EFFICIENCY
                ?sP ?sO ;

                ## THIS IS ITS OWN EVIDENCE
                alivocab:hasEvidence        ?trimmed .
        }}
    }}
    WHERE
    {{
        ### LINKSET
        GRAPH <{}>
        {{
            ?subject ?singleton ?object .
             bind( iri(replace("{}{}{}_#", "#",  strafter(str(uuid()), "uuid:") )) as ?newSingletons )
        }}

        ### METADATA
        graph <{}>
        {{
            ?singleton ?sP ?sO .
        }}

        ### SOURCE DATASET
        GRAPH <{}>
        {{
            ?subject
                a   <{}> ;
                {} 	?s_label .
            BIND(lcase(str(?s_label)) as ?label1)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?label1, ?regexp, '$1$2') AS ?trimmed)
        }}

        ### TARGET DATASET
        GRAPH <{}>
        {{
            ?object
                a   <{}> ;
                {} 	?o_label .
            BIND(lcase(str(?o_label)) as ?label2)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?label2, ?regexp, '$1$2') AS ?trimmed)
        }}
    }}
    """.format(Ns.prov, Ns.rdf, Ns.alivocab, specs[St.refined], Ns.singletons,
               specs[St.refined_name], specs[St.mechanism],
               specs[St.sameAsCount], specs[St.linkset], Ns.alivocab,
               specs[St.mechanism], specs[St.sameAsCount],
               specs[St.singletonGraph], src_graph, source[St.entity_datatype],
               src_aligns, trg_graph, target[St.entity_datatype], trg_aligns)
    # print insert_query
    return insert_query
Exemplo n.º 13
0
def geo_load_query(specs, is_source):

    # UPDATE THE SPECS OF SOURCE AND TARGETS
    if is_source is True:
        info = specs[St.source]
        load = "_{}_1".format(specs[St.lens_name])
        links = "?resource   ?singPre    ?target ."
    else:
        info = specs[St.target]
        load = "_{}_2".format(specs[St.lens_name])
        links = "?source   ?singPre    ?resource ."

    # REPLACE RDF TYPE "rdf:type" IN CASE ANOTHER TYPE IS PROVIDED
    if St.rdf_predicate in info and info[St.rdf_predicate] is not None:
        rdf_pred = info[St.rdf_predicate] \
            if Ls.nt_format(info[St.rdf_predicate]) else "<{}>".format(info[St.rdf_predicate])
    else:
        rdf_pred = "a"

    # FORMATTING THE LONGITUDE PROPERTY
    longitude = info[St.longitude] \
        if Ls.nt_format(info[St.longitude]) else "<{}>".format(info[St.longitude])

    # FORMATTING THE LATITUDE PROPERTY
    latitude = info[St.latitude] \
        if Ls.nt_format(info[St.latitude]) else "<{}>".format(info[St.latitude])

    # EXTRACTING THE RESOURCE GRAPH URI LOCAL NAME
    # name = info[St.graph_name]

    # EXTRACTING THE RESOURCE GRAPH URI
    uri = info[St.graph]

    # ADD THE REDUCER IF SET
    # if St.reducer not in info:
    #     reducer_comment = "#"
    #     reducer = ""
    # else:
    #     reducer_comment = ""
    #     reducer = info[St.reducer]

    if is_source is True:
        message = """######################################################################
    ### INSERTING DATA FROM THE SOURCE
    ######################################################################"""
    else:
        message = """######################################################################
    ### INSERTING MESSAGE FROM THE TARGET
    ######################################################################"""

    query = """
    {5}
    PREFIX geof: <http://www.opengis.net/def/function/geosparql/>
    PREFIX wgs:  <http://www.w3.org/2003/01/geo/wgs84_pos#>
    INSERT
    {{
        GRAPH <{0}load{1}>
        {{
            ?resource  wgs:long  ?longitude .
            ?resource  wgs:lat   ?latitude .
        }}
    }}
    WHERE
    {{
        GRAPH <{8}>
        {{
            {9}
        }}

        GRAPH <{2}>
        {{
            ### LOCATION COORDINATES
            ?resource  {6}  <{7}> .
            ?resource  {3}  ?long .
            ?resource  {4}  ?lat .

            ### MAKING SURE THE COORDINATES ARE WELL FORMATTED
            BIND( STRDT(REPLACE(STR(?long), ",", "."), xsd:float)  as ?longitude )
            BIND( STRDT(REPLACE(STR(?lat), ",", "."), xsd:float)  as ?latitude )

            ### MAKING SURE THE COORDINATES AT DIGITS AND NOT LITERALS
            Filter (?longitude >= 0 || ?longitude <= 0 )
            Filter (?latitude  >= 0 || ?latitude  <= 0 )

            ### GENERATE A LOCATION URI
            BIND( replace("http://risis.eu/#","#", STRAFTER(str(UUID()),"uuid:")) as ?name )
            BIND(iri(?name) as ?location)
        }}
    }}
    """.format(
        # 0          1     2    3          4         5        6         7
        Ns.tmpgraph,
        load,
        uri,
        longitude,
        latitude,
        message,
        rdf_pred,
        info[St.entity_datatype],
        # 8                9
        specs[St.refined],
        links)
    # print query
    return query