示例#1
0
def spa_subset_insert(specs):

    src_aligns = specs[St.source][St.link_old] \
        if Ls.nt_format(specs[St.source][St.link_old]) else "<{}>".format(specs[St.source][St.link_old])

    insert_query = """
    ###### INSERT SUBSET LINKSET
    PREFIX rdf:        <{}>
    PREFIX singleton:   <{}>
    PREFIX alivocab:    <{}>

    INSERT
    {{
        GRAPH <{}>
        {{
            ?subject    ?singPre    ?object .
        }}

        GRAPH singleton:{}
        {{
            ?singPre    rdf:singletonPropertyOf     alivocab:exactStrSim{} .
            ?singPre    alivocab:hasStrength        1 .
            ?singPre    alivocab:hasEvidence        "Aligned by {} ." .
        }}
    }}
    WHERE
    {{
        GRAPH <{}>
        {{
            ?subject a <{}> ;
                {}  ?object .
        }}

        ### Create A SINGLETON URI
        BIND( replace("{}{}{}_#", "#", STRAFTER(str(UUID()),"uuid:")) as ?pre )
        BIND(iri(?pre) as ?singPre)
    }}
    """.format(Ns.rdf, Ns.singletons, Ns.alivocab, specs[St.linkset],
               specs[St.linkset_name], specs[St.sameAsCount],
               specs[St.source][St.graph_name], specs[St.source][St.graph],
               specs[St.source][St.entity_datatype], src_aligns, Ns.alivocab,
               specs[St.mechanism], specs[St.sameAsCount])
    # print insert_query
    return insert_query
示例#2
0
def load_temp_query(specs, is_source, is_expand=True):

    # UPDATE THE SPECS OF SOURCE AND TARGETS
    if is_expand is False:
        comment_exp = "# "
    else:
        comment_exp = ""

    if is_source is True:
        info = specs[St.source]
        load = "_{}_1".format(specs[St.linkset_name])
        linkset_triple = "\t\t\t?{}  ?predicate ?target".format(info[St.graph_name])
    else:
        info = specs[St.target]
        load = "_{}_2".format(specs[St.linkset_name])
        linkset_triple = "\t\t\t?source  ?predicate ?{}".format(info[St.graph_name])

    # REPLACE RDF TYPE "a" IN CASE ANOTHER TYPE IS PROVIDED
    if St.rdf_predicate in info and info[St.rdf_predicate] is not None:
        rdf_pred = info[St.rdf_predicate] \
            if Ls.nt_format(info[St.rdf_predicate]) else "<{}>".format(info[St.rdf_predicate])
    else:
        rdf_pred = "a"

    # FORMATTING THE ALIGNS PROPERTY
    aligns = info[St.aligns] \
        if Ls.nt_format(info[St.aligns]) else "<{}>".format(info[St.aligns])

    name = info[St.graph_name]
    uri = info[St.graph]

    # ADD THE REDUCER IF SET
    if St.reducer not in info:
        reducer_comment = "#"
        reducer = ""
    else:
        reducer_comment = ""
        reducer = info[St.reducer]

    # EXTRACTION QUERY
    query = """
    INSERT
    {{
        GRAPH <{0}load{8}>
        {{
            ?{5}  alivocab:hasProperty  ?trimmed .
        }}
    }}
    WHERE
    {{

        # THE LINKSET TO EXPAND
        {12}GRAPH <{9}{10}>
        {12}{{
        {12}    {11} .
        {12}}}

        GRAPH <{1}>
        {{
            # RESOURCE IS OF A CERTAIN TYPE
            ?{5}  {2}  <{7}> .

            # EXTRACT THE PROPERTY-VALUE TO ALIGN
            ?{5}  {3}  ?object .

            # LOWER CASE OF THE VALUE
            BIND(lcase(str(?object)) as ?label)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?label, ?regexp, '$1$2') AS ?trimmed)
        }}

        {6}FILTER NOT EXISTS
        {6}{{
        {6}    GRAPH <{4}>
        {6}    {{
        {6}        {{ ?{5}   ?pred   ?obj . }}
        {6}        UNION
        {6}        {{ ?obj   ?pred   ?{5}. }}
        {6}    }}
        {6}}}
    }}
    """.format(
        # 0          1    2         3       4 `      5     6                7                         8
        Ns.tmpgraph, uri, rdf_pred, aligns, reducer, name, reducer_comment, info[St.entity_datatype], load,
        # 9         10                        11             12
        Ns.linkset, specs[St.expanded_name], linkset_triple, comment_exp
    )
    return query
示例#3
0
def refine_numeric_query(specs):

    # is_de_duplication = specs[St.source][St.graph] == specs[St.target][St.graph]
    # number_of_load = '1' if is_de_duplication is True else "2"

    # PLAIN NUMBER CHECK
    delta_check = "BIND(ABS(xsd:decimal(?x) - xsd:decimal(?x)) AS ?DELTA)"

    # DATE CHECK
    if specs[St.numeric_approx_type].lower() == "date":
        delta_check = "BIND( (YEAR(xsd:datetime(STR(?x))) - YEAR(xsd:datetime(STR(?y))) ) as ?DELTA )"

    source = specs[St.source]
    target = specs[St.target]

    # FORMATTING THE ALIGNS PROPERTY
    src_aligns = source[St.aligns] \
        if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns])

    trg_aligns = target[St.aligns] \
        if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns])

    src_name = specs[St.source][St.graph_name]
    # src_uri = specs[St.source][St.graph]
    src_uri = source[St.graph] if St.extended_graph not in source else source[
        St.extended_graph]
    # src_aligns = specs[St.source][St.aligns]

    trg_name = specs[St.target][St.graph_name]
    # trg_uri = specs[St.target][St.graph]
    trg_uri = target[St.graph] if St.extended_graph not in target else target[
        St.extended_graph]
    # trg_aligns = specs[St.target][St.aligns]

    extract = """
    PREFIX ll:    <{0}>
    PREFIX prov:  <{1}>
    PREFIX tempG: <{2}>

    DROP SILENT GRAPH tempG:load01 ;
    DROP SILENT GRAPH tempG:load02 ;
    DROP SILENT GRAPH <{3}> ;
    DROP SILENT GRAPH <{4}{5}> ;

    ### 1. LOADING SOURCE AND TARGET TO A TEMPORARY GRAPH
    INSERT
    {{
        GRAPH tempG:load01
        {{
            ### SOURCE DATASET AND ITS ALIGNED PREDICATE
            ?{8}_1 ll:relatesTo1 ?srcTrimmed .
            ### TARGET DATASET AND ITS ALIGNED PREDICATE
            ?{9}_2 ll:relatesTo3 ?trgTrimmed .
        }}
    }}
    WHERE
    {{
        ### LINKSET TO REFINE
        graph <{7}>
        {{
            ?{8}_1 ?pred  ?{9}_2 .
        }}
        ### SOURCE DATASET
        graph <{10}>
        {{
            ### SOURCE DATASET AND ITS ALIGNED PREDICATE
            ?{8}_1 {12} ?value_1 .
            bind (lcase(str(?value_1)) as ?src_value)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?src_value, ?regexp, '$1$2') AS ?srcTrimmed)
        }}
        ### TARGET DATASET
        graph <{11}>
        {{
            ### TARGET DATASET AND ITS ALIGNED PREDICATE
            ?{9}_2 {13} ?value_2 .
            bind (lcase(str(?value_2)) as ?trg_value)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?trg_value, ?regexp, '$1$2') AS ?trgTrimmed)
        }}
    }} """.format(
        # 0          1         2           3                  4              5
        Ns.alivocab,
        Ns.prov,
        Ns.tmpgraph,
        specs[St.refined],
        Ns.singletons,
        specs[St.refined_name],
        # 6           7                  8         9         10       11       12          13
        Ns.tmpvocab,
        specs[St.linkset],
        src_name,
        trg_name,
        src_uri,
        trg_uri,
        src_aligns,
        trg_aligns)

    find = """
    ### 2. FINDING CANDIDATE MATCH BETWEEN THE SOURCE AND TARGET
    PREFIX ll:    <{0}>
    PREFIX prov:  <{1}>
    PREFIX tempG: <{2}>
    INSERT
    {{
        ### MATCH FOUND
        GRAPH <{10}>
        {{
            ?{3}_1 ?newSingletons ?{4}_2 .
        }}
        # METADATA OF MATCH FOUND
        GRAPH <{11}{12}>
        {{
            ?newSingletons
                rdf:singletonPropertyOf     ll:{8}{9} ;
                prov:wasDerivedFrom         ?pred ;
                ll:hasEvidence              ?evidence .
        }}
    }}
    WHERE
    {{
        ### LINKSET TO REFINE
        graph <{5}>
        {{
            ?{3}_1 ?pred  ?{4}_2 .
            bind( iri(replace("{0}{8}{9}_#", "#",  strafter(str(uuid()), "uuid:") )) as ?newSingletons )
        }}
        ### SOURCE AND TARGET LOADED TO A TEMPORARY GRAPH
        GRAPH tempG:load01
        {{
            ?{3}_1 ll:relatesTo1 ?x .
            ?{4}_2 ll:relatesTo3 ?y .
        }}

        # DELTA APPROX CHECK
        {6}

        FILTER( ABS(?DELTA) <= {7} )

        BIND(concat("The DELTA of [", ?x, "] and [", ?y, "] is [", STR(ABS(?DELTA)),
        "] which passed the threshold of [", STR({7}), "]" ) AS ?evidence)
    }}""".format(
        # 0          1        2            3         4         5                  6            7
        Ns.alivocab,
        Ns.prov,
        Ns.tmpgraph,
        src_name,
        trg_name,
        specs[St.linkset],
        delta_check,
        specs[St.delta],
        # 8                  9                      10                 11             12
        specs[St.mechanism],
        specs[St.sameAsCount],
        specs[St.refined],
        Ns.singletons,
        specs[St.refined_name])

    return [extract, find]
示例#4
0
def refine_intermediate_query(specs):

    source = specs[St.source]
    target = specs[St.target]

    # FORMATTING THE ALIGNS PROPERTY
    src_aligns = source[St.aligns] \
        if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns])

    trg_aligns = target[St.aligns] \
        if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns])

    src_name = specs[St.source][St.graph_name]
    # src_uri = specs[St.source][St.graph]
    src_uri = source[St.graph] if St.extended_graph not in source else source[
        St.extended_graph]
    # src_aligns = specs[St.source][St.aligns]

    trg_name = specs[St.target][St.graph_name]
    # trg_uri = specs[St.target][St.graph]
    trg_uri = target[St.graph] if St.extended_graph not in target else target[
        St.extended_graph]
    # trg_aligns = specs[St.target][St.aligns]

    insert = """
    PREFIX alivocab:    <{16}>
    PREFIX prov:        <{17}>

    DROP SILENT GRAPH <{0}load01> ;
    DROP SILENT GRAPH <{0}load02> ;
    DROP SILENT GRAPH <{10}> ;
    DROP SILENT GRAPH <{14}{15}> ;

    INSERT
    {{
        GRAPH <{10}>
        {{
            ?{1} ?newSingletons  ?{3} .
        }}
        ### SINGLETONS' METADATA
        GRAPH <{14}{15}>
        {{
            ?newSingletons
                rdf:singletonPropertyOf     alivocab:{12}{13} ;
                prov:wasDerivedFrom         ?pred ;
                alivocab:hasEvidence        ?evidence .
        }}
    }}

    WHERE
    {{
        ### LINKSET TO REFINE
        graph <{5}>
        {{
            ?{1} ?pred  ?{3} .
            bind( iri(replace("{11}{12}{13}_#", "#",  strafter(str(uuid()), "uuid:") )) as ?newSingletons )
        }}

        ### SOURCE DATASET
        graph <{6}>
        {{
            ### SOURCE DATASET AND ITS ALIGNED PREDICATE
            ?{1} {2} ?value_1 .
            bind (lcase(str(?value_1)) as ?src_value)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?src_value, ?regexp, '$1$2') AS ?src_trimmed)
        }}

        ### TARGET DATASET
        graph <{7}>
        {{
            ### TARGET DATASET AND ITS ALIGNED PREDICATE
            ?{3} {4} ?value_2 .
            bind (lcase(str(?value_2)) as ?trg_value)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?trg_value, ?regexp, '$1$2') AS ?trg_trimmed)
        }}

        ### INTERMEDIATE DATASET
        graph <{9}>
        {{
            ?intermediate_uri
                ?intPred_1 ?value_3 ;
                ?intPred_2 ?value_4 .

            ### VALUES TO LOWER CASE
            bind (lcase(str(?value_3)) as ?src_val)
            bind (lcase(str(?value_4)) as ?trg_val)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp
    }} ;

    DROP SILENT GRAPH <{0}load01> ;)
            BIND(REPLACE(?src_val, ?regexp, '$1$2') AS ?src_trimmed)
            BIND(REPLACE(?trg_val, ?regexp, '$1$2') AS ?trg_trimmed)
            BIND(concat("[", ?src_trimmed, "] aligns with [", ?trg_trimmed, "]") AS ?evidence)
        }}
    DROP SILENT GRAPH <{0}load02>
    """.format(
        # 0          1         2           3         4
        Ns.tmpgraph,
        src_name,
        src_aligns,
        trg_name,
        trg_aligns,
        # 5                6        7        8            9
        specs[St.linkset],
        src_uri,
        trg_uri,
        Ns.tmpvocab,
        specs[St.intermediate_graph],
        # 10               11           12                  13
        specs[St.refined],
        Ns.alivocab,
        specs[St.mechanism],
        specs[St.sameAsCount],
        # 14           15                      16           17
        Ns.singletons,
        specs[St.refined_name],
        Ns.alivocab,
        Ns.prov)

    # print insert
    return insert
示例#5
0
def refine_exact_query(specs):

    source = specs[St.source]
    target = specs[St.target]
    src_graph = source[
        St.graph] if St.extended_graph not in source else source[
            St.extended_graph]
    trg_graph = target[
        St.graph] if St.extended_graph not in target else target[
            St.extended_graph]
    print "src_graph:", src_graph
    print "trg_graph:", trg_graph

    # FORMATTING THE ALIGNS PROPERTY
    src_aligns = source[St.aligns] \
        if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns])

    trg_aligns = target[St.aligns] \
        if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns])

    # GENERATE THE INSERT QUERY
    insert_query = """
    PREFIX prov:        <{}>
    PREFIX rdf:         <{}>
    PREFIX alivocab:    <{}>
    INSERT
    {{
        ### REFINED LINKSET
        GRAPH <{}>
        {{
            ?subject ?newSingletons ?object .
        }}

        ### SINGLETONS' METADATA
        GRAPH <{}{}>
        {{
            ?newSingletons
                rdf:singletonPropertyOf     alivocab:{}{} ;

                ## THIS IS THE TRAIL
                prov:wasDerivedFrom         ?singleton ;

                ## BUT THIS IS ADDED FOR QUERY SIMPLICITY AND EFFICIENCY
                ?sP ?sO ;

                ## THIS IS ITS OWN EVIDENCE
                alivocab:hasEvidence        ?trimmed .
        }}
    }}
    WHERE
    {{
        ### LINKSET
        GRAPH <{}>
        {{
            ?subject ?singleton ?object .
             bind( iri(replace("{}{}{}_#", "#",  strafter(str(uuid()), "uuid:") )) as ?newSingletons )
        }}

        ### METADATA
        graph <{}>
        {{
            ?singleton ?sP ?sO .
        }}

        ### SOURCE DATASET
        GRAPH <{}>
        {{
            ?subject
                a   <{}> ;
                {} 	?s_label .
            BIND(lcase(str(?s_label)) as ?label1)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?label1, ?regexp, '$1$2') AS ?trimmed)
        }}

        ### TARGET DATASET
        GRAPH <{}>
        {{
            ?object
                a   <{}> ;
                {} 	?o_label .
            BIND(lcase(str(?o_label)) as ?label2)

            # VALUE TRIMMING
            BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp)
            BIND(REPLACE(?label2, ?regexp, '$1$2') AS ?trimmed)
        }}
    }}
    """.format(Ns.prov, Ns.rdf, Ns.alivocab, specs[St.refined], Ns.singletons,
               specs[St.refined_name], specs[St.mechanism],
               specs[St.sameAsCount], specs[St.linkset], Ns.alivocab,
               specs[St.mechanism], specs[St.sameAsCount],
               specs[St.singletonGraph], src_graph, source[St.entity_datatype],
               src_aligns, trg_graph, target[St.entity_datatype], trg_aligns)
    # print insert_query
    return insert_query
示例#6
0
def geo_load_query(specs, is_source):

    # UPDATE THE SPECS OF SOURCE AND TARGETS
    if is_source is True:
        info = specs[St.source]
        load = "_{}_1".format(specs[St.lens_name])
        links = "?resource   ?singPre    ?target ."
    else:
        info = specs[St.target]
        load = "_{}_2".format(specs[St.lens_name])
        links = "?source   ?singPre    ?resource ."

    # REPLACE RDF TYPE "rdf:type" IN CASE ANOTHER TYPE IS PROVIDED
    if St.rdf_predicate in info and info[St.rdf_predicate] is not None:
        rdf_pred = info[St.rdf_predicate] \
            if Ls.nt_format(info[St.rdf_predicate]) else "<{}>".format(info[St.rdf_predicate])
    else:
        rdf_pred = "a"

    # FORMATTING THE LONGITUDE PROPERTY
    longitude = info[St.longitude] \
        if Ls.nt_format(info[St.longitude]) else "<{}>".format(info[St.longitude])

    # FORMATTING THE LATITUDE PROPERTY
    latitude = info[St.latitude] \
        if Ls.nt_format(info[St.latitude]) else "<{}>".format(info[St.latitude])

    # EXTRACTING THE RESOURCE GRAPH URI LOCAL NAME
    # name = info[St.graph_name]

    # EXTRACTING THE RESOURCE GRAPH URI
    uri = info[St.graph]

    # ADD THE REDUCER IF SET
    # if St.reducer not in info:
    #     reducer_comment = "#"
    #     reducer = ""
    # else:
    #     reducer_comment = ""
    #     reducer = info[St.reducer]

    if is_source is True:
        message = """######################################################################
    ### INSERTING DATA FROM THE SOURCE
    ######################################################################"""
    else:
        message = """######################################################################
    ### INSERTING MESSAGE FROM THE TARGET
    ######################################################################"""

    query = """
    {5}
    PREFIX geof: <http://www.opengis.net/def/function/geosparql/>
    PREFIX wgs:  <http://www.w3.org/2003/01/geo/wgs84_pos#>
    INSERT
    {{
        GRAPH <{0}load{1}>
        {{
            ?resource  wgs:long  ?longitude .
            ?resource  wgs:lat   ?latitude .
        }}
    }}
    WHERE
    {{
        GRAPH <{8}>
        {{
            {9}
        }}

        GRAPH <{2}>
        {{
            ### LOCATION COORDINATES
            ?resource  {6}  <{7}> .
            ?resource  {3}  ?long .
            ?resource  {4}  ?lat .

            ### MAKING SURE THE COORDINATES ARE WELL FORMATTED
            BIND( STRDT(REPLACE(STR(?long), ",", "."), xsd:float)  as ?longitude )
            BIND( STRDT(REPLACE(STR(?lat), ",", "."), xsd:float)  as ?latitude )

            ### MAKING SURE THE COORDINATES AT DIGITS AND NOT LITERALS
            Filter (?longitude >= 0 || ?longitude <= 0 )
            Filter (?latitude  >= 0 || ?latitude  <= 0 )

            ### GENERATE A LOCATION URI
            BIND( replace("http://risis.eu/#","#", STRAFTER(str(UUID()),"uuid:")) as ?name )
            BIND(iri(?name) as ?location)
        }}
    }}
    """.format(
        # 0          1     2    3          4         5        6         7
        Ns.tmpgraph,
        load,
        uri,
        longitude,
        latitude,
        message,
        rdf_pred,
        info[St.entity_datatype],
        # 8                9
        specs[St.refined],
        links)
    # print query
    return query