Пример #1
0
def main_alignment(alignment):

    # ****************************************************************************
    # GIVEN AN ALIGNMENT, RETURN THE MAIN ONE
    # ****************************************************************************

    try:
        # LOCAL NAME OF THE GRAPH
        name = Ut.get_uri_local_name_plus(alignment)
        print "{:12} : {}".format("LOCAL NAME", name)
        query_search = std_queries["graphs_search"].format(name)
        response = Qry.sparql_xml_to_matrix(query_search)
        results = response["result"]
        if results is not None:
            for i in range(1, len(results)):
                if results[i][0].__contains__("singletons") is False:
                    return results[i][0]

        if str(alignment).__contains__(Ns.singletons):
            return str(alignment).replace(Ns.singletons, Ns.linkset)

        else:
            return alignment

    except ValueError:
        traceback.print_exc()
        return alignment
Пример #2
0
def main_alignment(alignment):

    # LOCAL NAME OF THE GRAPH
    name = Ut.get_uri_local_name_plus(alignment)
    print "{:12} : {}".format("LOCAL NAME", name)
    query = std_queries["graphs_search"].format(name)
    response = Qry.sparql_xml_to_matrix(query)
    results = response["result"]
    if results is not None:
        for i in range(1, len(results)):
            if results[i][0].__contains__("singletons") is False:
                return results[i][0]

    if str(alignment).__contains__(Ns.singletons):
        return str(alignment).replace(Ns.singletons, Ns.linkset)

    else:
        return alignment
Пример #3
0
def export_alignment_all(alignment, directory=None, limit=5000):

    directory = os.path.join(directory, "")
    print directory
    if os.path.isdir(os.path.dirname(directory)) is False or os.path.exists(
            directory) is False:
        print "CREATING THE DIRECTORY"
        os.mkdir(os.path.dirname(directory))

    # COMMENT THE LINKSET OIT IF IT IS EQUAL TO NONE

    # This function returns all the links + some metadata about the alignment.
    # METADATA: source dataset, target dataset and mechanism

    use = alignment
    alignment = str(alignment).strip()
    row_alignment = alignment
    alignment = alignment if Ut.is_nt_format(
        alignment) is True else "<{}>".format(alignment)

    # ****************************************************
    # 1. GET THE METADATA OF THE ALIGNMENT: THE QUERY
    # ****************************************************
    meta = """
    PREFIX ll: <{0}>
    CONSTRUCT {{ {1} ?y ?z. ?z ?p ?o . }}
    WHERE
    {{
        {1} ?y ?z .
        OPTIONAL{{ ?z ?p ?o . }}
        OPTIONAL{{ ?O ?Q ?R . }}
    }} order by ?y
    """.format(Ns.alivocab, alignment)
    # print meta

    # GET THE METADATA OF THE ALIGNMENT: RUN THE QUERY
    meta_construct = Qry.endpointconstruct(meta, clean=False)
    meta_construct = meta_construct.replace("{", "").replace("}", "")
    with open(os.path.join(directory, "metadata.ttl"), "wb") as metadata:
        metadata.write(meta_construct)
    # print meta_construct

    # ****************************************************
    # 2. GET THE CORRESPONDENCES OF THE LINKSET
    # ****************************************************
    # CONSTRUCT QUERY FOR EXTRACTING HE CORRESPONDENCES
    comment = "" if limit else "#"
    query = """
        PREFIX ll: <{}>
        CONSTRUCT {{ ?x ?y ?z }}
        WHERE
        {{
            GRAPH {}
            {{
                ?x ?y ?z
            }}
        }} order by ?x {}LIMIT {}
        """.format(Ns.alivocab, alignment, comment, limit)
    # print query

    # FIRE THE CONSTRUCT FOR CORRESPONDENCES AGAINST THE TRIPLE STORE
    alignment_construct = Qry.endpointconstruct(query, clean=False)
    if alignment_construct:
        alignment_construct = alignment_construct.replace(
            "{", "{}\n{{".format(alignment))
    # print alignment_construct
    with open(os.path.join(directory, "linkset.trig"), "wb") as links:
        links.write(alignment_construct)

    # ****************************************************
    # 3. GET THE METADATA CORRESPONDENCES' PREDICATES
    # ****************************************************
    singleton_graph_uri = Ut.from_alignment2singleton(alignment)
    singleton_query = """
    PREFIX ll: <{0}>
    PREFIX singletons: <{1}>
    CONSTRUCT {{ ?predicate ?x  ?y }}
    WHERE
    {{
        {{
            SELECT ?predicate
            {{
                GRAPH {2}
                {{
                    ?subject ?predicate ?object
                }}
            }} order by ?x {3}LIMIT {4}
        }}
        GRAPH {5}
        {{
            ?predicate ?x  ?y
        }}
    }}
    """.format(Ns.alivocab, Ns.singletons, alignment, comment, limit,
               singleton_graph_uri)
    # print singleton_query

    # FIRE THE CONSTRUCT FOR SINGLETON AGAINST THE TRIPLE STORE
    singleton_construct = Qry.endpointconstruct(singleton_query, clean=False)
    if singleton_construct:
        singleton_construct = singleton_construct.replace(
            "{", "{}\n{{".format(singleton_graph_uri))
    # print singleton_construct
    with open(os.path.join(directory, "singletons.trig"), "wb") as singletons:
        singletons.write(singleton_construct)

    # LOAD THE METADATA USING RDFLIB
    sg = rdflib.Graph()
    sg.parse(data=meta_construct, format="turtle")

    # EXTRACT FROM THE RESPONSE: THE SOURCE AND TARGET DATASETS AND THE ALIGNMENT
    sbj = rdflib.URIRef(use)
    triples_uri = rdflib.URIRef("http://rdfs.org/ns/void#triples")

    # EXTRACT THE ALIGNMENT TYPE
    triples = ""
    for item in sg.objects(sbj, triples_uri):
        triples = item
        print "TRIPLES: ", triples

    if alignment_construct is not None:
        links = "### TRIPLE COUNT: {}\n### LINKSET: {}\n".format(
            triples, alignment) + alignment_construct
        links = links.replace("{", "").replace("}", "")
    message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format(
        row_alignment, triples)

    host = Svr.settings[St.stardog_host_name]
    endpoint = b"http://{}/annex/{}/sparql/query?".format(
        host, Svr.settings[St.database])

    local_name = Ut.get_uri_local_name_plus(alignment)
    file_at_parent_directory = os.path.join(
        os.path.abspath(os.path.join(directory, os.pardir)),
        "{}.zip".format(local_name))

    zipped_file = Ut.zip_folder(directory,
                                output_file_path=file_at_parent_directory)
    print "\t>>> THE ZIPPED FILE IS LOCATED AT:\n\t\t- {}".format(zipped_file)

    # result = result
    # print result
    print "Done with graph: {}".format(alignment)

    # return {'result': {
    #     "generic_metadata": meta_construct,
    #     'specific_metadata': singleton_construct,
    #     'data': alignment_construct}, 'message': message}

    return {'result': zipped_file, 'message': message}
Пример #4
0
def intersection_extended(specs, lens_name, display=False):

    # print Ut.headings("EXTENDED INTERSECTION")
    inter = ""
    insert = Buffer.StringIO()
    insert_sing = Buffer.StringIO()
    model_1 = """
    ### ABOUT {0}
    GRAPH <{0}>
    {{
        {1}
    }}
    """
    model_2 = """
    ### {2}. ABOUT {0}
    GRAPH <{0}>
    {{
        ?{1}    ?pred_{2}   ?{3} .
    }}
    ### SINGLETONS
    GRAPH <{4}>
    {{
        ?pred_{2}   ?x_{2}   ?y_{2} .
    }}"""

    count_graph = 1
    for graph in specs[St.datasets]:

        query = """
    PREFIX void: <{}>
    PREFIX bdb: <{}>
    SELECT distinct ?subTarget ?objTarget ?subjectEntityType ?objectEntityType
    {{
        <{}>
            #void:target*/(void:subjectsTarget|void:objectsTarget)* ?x ;
            void:target*/(void:subjectsTarget|void:objectsTarget)* ?x .

        ?x
            void:subjectsTarget     ?subTarget ;
            void:objectsTarget      ?objTarget ;
            bdb:subjectsDatatype    ?subjectEntityType ;
            bdb:objectsDatatype     ?objectEntityType .

        FILTER NOT EXISTS {{ ?subTarget a void:Linkset }}
        FILTER NOT EXISTS {{ ?objTarget a void:Linkset }}
    }}""".format(Ns.void, Ns.bdb, graph)
        # print "INTERSECTION QUERY:", query
        response = sparql_xml_to_matrix(query)

        if display:
            print "INTERSECTION QUERY:", query
        # print "\nGRAPH:", graph
        # print "RESPONSE:", response
        # exit(0)

        if response:

            targets = response[St.result]

            # IF THE RESULT HAS MORE THAN
            # print "LENGTH:", len(targets)
            if targets is not None and len(targets) > 2:
                union = ""

                for i in range(1, len(targets)):

                    append = "UNION" if i < len(targets) - 1 else ""
                    tab = "" if i == 1 else ""
                    src = Ut.get_uri_local_name(targets[i][0])
                    trg = Ut.get_uri_local_name(targets[i][1])

                    if src[0].isdigit():
                        src = "D{}".format(src)

                    if trg[0].isdigit():
                        trg = "D{}".format(trg)

                    src_TYPE = Ut.get_uri_local_name(targets[i][2])
                    trg_TYPE = Ut.get_uri_local_name(targets[i][3])

                    src_variable = "{}_{}_1".format(src, src_TYPE[short:])

                    if src == trg and src_TYPE == trg_TYPE:
                        trg_variable = "{}_{}_2".format(trg, trg_TYPE[short:])
                    else:
                        trg_variable = "{}_{}_1".format(trg, trg_TYPE[short:])

                    union += "\n\t\t{0}{{ ?{1}  ?predicate_{2}  ?{3} . }} {4}".format(
                        tab, src_variable, i, trg_variable, append)

                union = model_1.format(graph, union)
                # print "UNION:", union
                inter += union

            # ONLY TWO TARGETS
            elif targets and len(targets) == 2:

                src = Ut.get_uri_local_name(targets[1][0])
                trg = Ut.get_uri_local_name(targets[1][1])

                if src[0].isdigit():
                    src = "D{}".format(src)

                if trg[0].isdigit():
                    trg = "D{}".format(trg)

                src_TYPE = Ut.get_uri_local_name(targets[1][2])
                trg_TYPE = Ut.get_uri_local_name(targets[1][3])

                src_variable = "{}_{}_1".format(src, src_TYPE[short:])

                if src == trg and src_TYPE == trg_TYPE:
                    trg_variable = "{}_{}_2".format(trg, trg_TYPE[short:])
                else:
                    trg_variable = "{}_{}_1".format(trg, trg_TYPE[short:])

                inter += model_2.format(
                    graph, src_variable, count_graph, trg_variable,
                    "{}{}".format(Ns.singletons,
                                  Ut.get_uri_local_name_plus(graph)))

                insert.write("\t\t?{}    ?pred_{}   ?{} .\n".format(
                    src_variable, count_graph, trg_variable))
                insert_sing.write(
                    "\t\t?pred_{0}   ?x_{0}     ?y_{0}.\n".format(count_graph))

        count_graph += 1

    # print inter
    # exit(0)
    insert_query = """INSERT\n{{
    ### LINKS
    GRAPH <{5}{4}>
    {{\n{1}\t}}

    ### METADATA
    GRAPH <{6}{4}>
    {{\n{3}\t}}\n}}\nWHERE\n{{{2}\n}}
    """.format("", insert.getvalue(), inter, insert_sing.getvalue(), lens_name,
               Ns.lens, Ns.singletons)
    return insert_query