Пример #1
0
def main_alignment(alignment):

    # ****************************************************************************
    # GIVEN AN ALIGNMENT, RETURN THE MAIN ONE
    # ****************************************************************************

    try:
        # LOCAL NAME OF THE GRAPH
        name = Ut.get_uri_local_name_plus(alignment)
        print "{:12} : {}".format("LOCAL NAME", name)
        query_search = std_queries["graphs_search"].format(name)
        response = Qry.sparql_xml_to_matrix(query_search)
        results = response["result"]
        if results is not None:
            for i in range(1, len(results)):
                if results[i][0].__contains__("singletons") is False:
                    return results[i][0]

        if str(alignment).__contains__(Ns.singletons):
            return str(alignment).replace(Ns.singletons, Ns.linkset)

        else:
            return alignment

    except ValueError:
        traceback.print_exc()
        return alignment
Пример #2
0
def added(early_version, late_version, stat=False, display=True, activated=False):

    if activated is False:
        print "\nTHE FUNCTION [added] IS NOT ACTIVATED"
        return {St.subject: None, St.predicate: None, St.triples: None}

    if stat is False:

        subj_added = subject(late_version, early_version, count=stat)
        prop_added = predicate(late_version, early_version, count=stat)

        # RESPONSE FOR TRIPLES ADDED
        resp_subj_added = Qr.sparql_xml_to_matrix(subj_added)
        resp_prop_added = Qr.sparql_xml_to_matrix(prop_added)

        status = (resp_subj_added[St.result] is not None and len(resp_subj_added[St.result]) > 1) or \
                 (resp_prop_added[St.result] is not None and len(resp_prop_added[St.result]) > 1)

        if display is True:

            # DISPLAY THE RESULTS FOR SUBJECT ADDED
            print "\n>>> DISPLAY THE RESULTS FOR SUBJECT ADDED"
            Qr.display_matrix(resp_subj_added, limit=10, is_activated=True)

            # DISPLAY THE RESULTS FOR PREDICATE ADDED
            print "\n>>> DISPLAY THE RESULTS FOR PREDICATE ADDED"
            Qr.display_matrix(resp_prop_added, limit=10, is_activated=True)

        return {"status": status,
                St.subject: resp_subj_added[St.result],
                St.predicate: resp_prop_added[St.result]}

    else:

        subj_added = subject(late_version, early_version, count=stat)
        prop_added = predicate(late_version, early_version, count=stat)
        resp_subj_added = Qr.sparql_xml_to_matrix(subj_added)
        resp_prop_added = Qr.sparql_xml_to_matrix(prop_added)

        status = (resp_subj_added[St.result] is not None and int(resp_subj_added[St.result][1][0]) > 0)\
            or (resp_prop_added[St.result] is not None and int(resp_prop_added[St.result][1][0]) > 0)

        return {"status": status,
                St.subject: resp_subj_added[St.result][1][0],
                St.predicate: resp_prop_added[St.result][1][0]}
Пример #3
0
def linkset_wasderivedfrom(refined_linkset_uri):
    query = """
    select *
    {{
        <{}>
            <http://www.w3.org/ns/prov#wasDerivedFrom> ?wasDerivedFrom .
    }}
    """.format(refined_linkset_uri)
    # print query
    dictionary_result = Qry.sparql_xml_to_matrix(query)
    # print dictionary_result
    # print dictionary_result
    if dictionary_result:
        if dictionary_result[St.result]:
            return dictionary_result[St.result][1][0]
    return None
Пример #4
0
    def check_constraint():

        text = constraint_text.lower()
        text = text.split(",")

        # CONSTRAINT BUILDER
        c_builder = Buffer.StringIO()
        if constraint_targets is not None:
            for dictionary in constraint_targets:
                graph = dictionary[St.graph]
                data_list = dictionary[St.data]
                properties = data_list[0][St.properties]
                prop = properties[0] if Ut.is_nt_format(properties[0]) else "<{}>".format(properties[0])

                # WRITING THE CONSTRAINT ON THE GRAPH
                graph_q = """
       {{
           GRAPH <{0}>
           {{
               ?lookup {1} ?constraint .
           }}
       }}
       """.format(graph, prop)
                c_builder.write(graph_q) if len(c_builder.getvalue()) == 0  else \
                    c_builder.write("UNION {}".format(graph_q))

            # WRITING THE FILTER
            if len(c_builder.getvalue()) > 0:
                for i in range(0, len(text)):
                    if i == 0 :
                        c_builder.write("""
       FILTER (LCASE(STR(?constraint)) = "{}" """.format(text[i].strip()))
                    else:
                        c_builder.write("""
       || LCASE(STR(?constraint)) = "{}" """.format(text[i].strip()))
                c_builder.write(")")


        # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
        query = Qry.cluster_rsc_strengths_query(resources, linkset)
        query = query.replace("# CONSTRAINTS IF ANY", c_builder.getvalue())
        # print query
        response = Qry.sparql_xml_to_matrix(query)
        if response[St.result] is None:
            return False
        return True
Пример #5
0
def main_alignment(alignment):

    # LOCAL NAME OF THE GRAPH
    name = Ut.get_uri_local_name_plus(alignment)
    print "{:12} : {}".format("LOCAL NAME", name)
    query = std_queries["graphs_search"].format(name)
    response = Qry.sparql_xml_to_matrix(query)
    results = response["result"]
    if results is not None:
        for i in range(1, len(results)):
            if results[i][0].__contains__("singletons") is False:
                return results[i][0]

    if str(alignment).__contains__(Ns.singletons):
        return str(alignment).replace(Ns.singletons, Ns.linkset)

    else:
        return alignment
Пример #6
0
def get_corr_reducer(graph):
    query = """
    SELECT ?uri1 ?uri2
    {{
        GRAPH <{}>
        {{
            ?uri1    ?p    ?uri2 .
        }}
    }}""".format(graph)
    alignment = Qry.sparql_xml_to_matrix(query)
    table_matrix = alignment[St.result]
    reducer_dict = {}
    if len(table_matrix) > 0:
        for row in table_matrix[1:]:
            src_uri = row[0].strip()
            trg_uri = row[1].strip()
            if len(row) == 2 and (src_uri, trg_uri) not in reducer_dict:
                reducer_dict[(src_uri, trg_uri)] = 1
    return reducer_dict
Пример #7
0
def get_table(dataset_specs, reducer=None):

    # ADD THE REDUCER IF SET. THE REDUCER OR (DATASET REDUCER) HELPS ELIMINATING
    # THE COMPUTATION OF SIMILARITY FOR INSTANCES THAT WHERE ALREADY MATCHED
    print "\nLOADING: {} {}".format(dataset_specs[St.graph],
                                    dataset_specs[St.entity_datatype])
    if reducer is None:
        reducer_comment = "#"
        reducer = ""
    else:
        reducer_comment = ""
        reducer = reducer
    aligns = dataset_specs[St.aligns] if Ut.is_nt_format(dataset_specs[St.aligns]) \
        else "<{}>".format(dataset_specs[St.aligns])
    query = """
    SELECT DISTINCT *
    {{
        GRAPH <{0}>
        {{
            ?subject
                a       <{1}> ;
                {2}    ?object .
        }}
        {4}FILTER NOT EXISTS
        {4}{{
        {4}    GRAPH <{3}>
        {4}    {{
        {4}        {{ ?subject   ?pred   ?obj . }}
        {4}        UNION
        {4}        {{ ?obj   ?pred   ?subject. }}
        {4}    }}
        {4}}}
    }} {5}
    """.format(dataset_specs[St.graph], dataset_specs[St.entity_datatype],
               aligns, reducer, reducer_comment, LIMIT)
    table_matrix = Qry.sparql_xml_to_matrix(query)
    # Qry.display_matrix(table_matrix, is_activated=True)
    # print table_matrix
    # print query
    if table_matrix[St.result]:
        print "\tINPUT SIZE: {}".format(str(len(table_matrix[St.result]) - 1))
    return table_matrix[St.result]
Пример #8
0
def get_table(dataset_specs, reducer=None):
    # ADD THE REDUCER IF SET
    if reducer is None:
        reducer_comment = "#"
        reducer = ""
    else:
        reducer_comment = ""
        reducer = reducer
    aligns = dataset_specs[St.aligns] if Ut.is_nt_format(dataset_specs[St.aligns]) \
        else "<{}>".format(dataset_specs[St.aligns])
    query = """
    SELECT DISTINCT *
    {{
        GRAPH <{0}>
        {{
            ?subject
                a       <{1}> ;
                {2}    ?object .
        }}
        {4}FILTER NOT EXISTS
        {4}{{
        {4}    GRAPH <{3}>
        {4}    {{
        {4}        {{ ?subject   ?pred   ?obj . }}
        {4}        UNION
        {4}        {{ ?obj   ?pred   ?subject. }}
        {4}    }}
        {4}}}
    }} {5}
    """.format(
        dataset_specs[St.graph], dataset_specs[St.entity_datatype], aligns,
        reducer, reducer_comment, LIMIT)
    table_matrix = Qry.sparql_xml_to_matrix(query)
    # Qry.display_matrix(table_matrix, is_activated=True)
    # print table_matrix
    # print query
    return table_matrix[St.result]
Пример #9
0
def lens_targets_unique(unique_list, graph):
    def get_targets(graph_uri):
        target_query = PREFIX + """
        ### GET LINKSET METADATA
        SELECT DISTINCT ?g
        WHERE
        {{
            {{
                <{0}> void:subjectsTarget  ?g .
            }}
            UNION
            {{
                <{0}> void:objectsTarget  ?g .
            }}
            UNION
            {{
                <{0}> void:target  ?g .
            }}
            UNION
            {{
                <{0}> alivocab:hasAlignmentTarget  ?alignmentTarget  .
                ?alignmentTarget   alivocab:hasTarget    ?g .
            }}
        }}
        """.format(graph_uri)

        return target_query

    def get_lens_union_targets(lens):
        u_query = PREFIX + """
        select *
        {{
            {{ <{0}> void:target ?target . }} UNION
            {{
                <{0}> alivocab:hasAlignmentTarget  ?alignmentTarget  .
                ?alignmentTarget   alivocab:hasTarget    ?target .
            }}
        }}
        """.format(lens)

        return u_query

    # THIS FUNCTION TAKES AS INPUT A LENS AND FILLS IN THE DICTIONARY
    # ARGUMENT WITH UNIQUE DATASETS INVOLVED IN THE LENS

    # GET THE TYPE OF THE GRAPH: e.g.: http://rdfs.org/ns/void#Linkset
    type_matrix = Qry.get_graph_type(graph)
    # print type_matrix

    if type_matrix[St.message] != "NO RESPONSE":

        if type_matrix[St.result]:

            # THIS IS THE BASE OF THE RECURSION
            if type_matrix[
                    St.result][1][0] == "http://rdfs.org/ns/void#Linkset":

                # QUERY FOR THE GRAPHS/DATASETS
                query = get_targets(graph)
                result = Qry.sparql_xml_to_matrix(query)
                # print query
                # print "\n\nRESULT:", result

                # SAVE THE GRAPH AND MAKE SURE THEY ARE UNIQUE
                for i in range(1, len(result[St.result])):
                    if result[St.result][i][0] not in unique_list:
                        unique_list.append(result[St.result][i][0])
                        # print result[i]
                return

            if type_matrix[St.result][1][
                    0] == "http://vocabularies.bridgedb.org/ops#Lens":
                # print "I am Keanu Reeves"
                # GET THE OPERATOR
                # alivocab:operator	 http://risis.eu/lens/operator/union
                operator = Qry.get_lens_operator(graph)
                print "\nOPERATOR:", operator

                if operator == "http://risis.eu/lens/operator/union":
                    # GET THE LIST OF TARGETS
                    target_matrix = Qry.sparql_xml_to_matrix(
                        get_lens_union_targets(graph))
                    if target_matrix[St.result]:
                        for i in range(1, len(target_matrix[St.result])):
                            lens_targets_unique(unique_list,
                                                target_matrix[St.result][i][0])
Пример #10
0
def run_checks(specs, query):

    print "\n3. RUNNING GOOD TO GO CHECK"
    # print "QUERY FOR CHECK:", query
    # CHECK-1: CHECK WHETHER THE LENS EXIST BY ASKING ITS METADATA WHETHER IT IS COMPOSED OF THE SAME GRAPHS
    print "QUERY:", query
    ask = Qry.sparql_xml_to_matrix(query)
    print "\t3.1 ANSWER 1:", ask['message']

    # ASK IS NOT SUPPOSED TO BE NONE
    # CHECK-1-RESULT: PROBLEM CONNECTING WITH THE SERVER
    if ask is None:
        # print "IN 1"
        print Ec.ERROR_CODE_1
        return {St.message: Ec.ERROR_CODE_1, St.error_code: 1, St.result: None}

    # CHECK-1-RESULT: ASK HAS A RESULT, MEANING THE LENS EXIT UNDER THE SAME COMPOSITION OF GRAPHS
    elif ask[St.message] != "NO RESPONSE":
        print "\tFOUND"
        if ask[St.result]:
            for i in range(1, len(ask[St.result])):
                print "\t\t- {}".format(ask[St.result][i][0])
        # IF THERE IS RESULT WITH THE SAME NUMBER OF TARGETS THEN THE LENS ALREADY EXISTS
        if ask[St.result] and len(ask[St.result]) - 1 == len(
                specs[St.datasets]):

            message = Ec.ERROR_CODE_7.replace("#", specs[St.lens]).replace(
                "@", ask[St.result][1][0])
            print message
            return {
                St.message: message.replace("\n", "<br/>"),
                St.error_code: 1,
                St.result: specs[St.lens]
            }
        print "\tCHECK 1: THERE IS NO METADATA FOR TIS LENS"
        # ELSE
        # WITH THE UNSTATED ELSE, WE GET OUT AND PROCEED TO THE CREATION OF A NEW LENS
    else:
        print "IN 3"
        print Ec.ERROR_CODE_1
        return {St.message: Ec.ERROR_CODE_1, St.error_code: 1, St.result: None}

    # print "GOT OUT!!!"
    update_specification(specs)

    # print "CHECK 2: CHECK WHETHER THE ACTUAL LENS EXISTS UNDER THIS NAME"
    check_02 = "\nASK {{ graph <{}> {{ ?S ?p ?o . }} }}".format(specs[St.lens])
    ask = Qry.boolean_endpoint_response(check_02)
    # print specs
    # print check_02
    # print ask

    if ask is None:
        # PROBLEM CONNECTING WITH THE SERVER
        print " CHECK 2: PROBLEM CONNECTING WITH THE SERVER"
        print Ec.ERROR_CODE_1
        return {
            St.message: Ec.ERROR_CODE_1,
            St.error_code: 1,
            St.result: specs[St.lens]
        }

    if ask == "true":
        print " CHECK 2: THE LINKSET ALREADY EXISTS"
        message = Ec.ERROR_CODE_6.replace("#", specs[St.lens])
        print message
        return {
            St.message: message.replace("\n", "<br/>"),
            St.error_code: 1,
            St.result: specs[St.lens]
        }

    print "\n\tDIAGNOSTICS: GOOD TO GO\n"
    return {
        St.message: "GOOD TO GO",
        St.error_code: 0,
        St.result: "GOOD TO GO"
    }
Пример #11
0
def reconstruct(linkset, gr_type, predicate):

    print "RECONSTRUCTING"
    # pattern = re.compile('[^a-zA-Z]')
    graph_format = "\t{:40} {}"
    sub_obj = None
    source = ""
    target = ""
    correspondence = ""
    singleton = ""
    singleton_triple = "\n\t\t?{:50} ?{:20} ?{} .".format(
        "subject", "sing_predicate", "object")

    singleton_matrix = Qry.sparql_xml_to_matrix(
        Qry.get_singleton_graph(linkset))
    # print "Singleton graph of {}".format(linkset), singleton_matrix
    # exit(0)
    # SINGLETON EXAMPLE
    # GRAPH <http://risis.eu/lens/singletonMetadata/transitive_C000_ExactName>
    # {
    # 	?subject            sing_predicate          ?object .
    # }
    if singleton_matrix is not None and singleton_matrix[
            St.result] is not None:
        singleton_graph = singleton_matrix[St.result][1][0]
        if singleton_graph is not None:
            singleton = "\n{}\n{}\n{}\n{}\n" \
                .format("\tGRAPH <{}>".format(singleton_graph),
                        "\t{",
                        "\t\t?{:50} ?{:20} ?{} .".format("subject", "sing_predicate", "object"),
                        "\t}")
            # print  "\t", singleton

    # print str(graph_type).upper()
    # print str(graph_type).upper() == "LINKSET"

    # ABOUT LINKSET UNION
    if str(gr_type).upper() == "LINKSET":

        print "\nRECONSTRUCTING CASE: Linkset"

        datatype_matrix = Qry.get_linkset_datatypes(linkset)
        # print datatype_matrix

        if datatype_matrix is not None and datatype_matrix[St.result]:
            sub_obj = datatype_matrix[St.result][1][4:6]
            # source = pattern.sub("", str(datatype_matrix [St.result][1][4]))
            source = get_uri_local_name(str(datatype_matrix[St.result][1][4]))
            # target = pattern.sub("", str(datatype_matrix [St.result][1][5]))
            target = get_uri_local_name(str(datatype_matrix[St.result][1][5]))

            # CORRESPONDENCE EXAMPLE
            # GRAPH <http://risis.eu/lens/transitive_C000_ExactName>
            # {
            # 	?leidenRanking ?singPre ?eter .
            # }
            correspondence = "{}\n{}\n{}\n{}".\
                format("\tGRAPH <{}>".format(linkset),
                       "\t{",
                       "\t\t?{:50} ?{:20} ?{} .".format(source, predicate, target),
                       "\t}")

    # DETERMINING WHETHER A LENS IS STEMMED FROM THE SAME subjectsTarget & objectsTarget
    elif str(gr_type).upper() == "LENS":
        print "\nRECONSTRUCTING CASE: Lens"
        #TODO USE PROPERTY PATH
        query = """
        PREFIX bdb: <http://vocabularies.bridgedb.org/ops#>
        PREFIX void: <http://rdfs.org/ns/void#>
        SELECT ?target ?subjectsTarget ?objectsTarget
        {{
          <{}> void:target ?target .
          ?target
            void:subjectsTarget     ?subjectsTarget ;
            void:objectsTarget      ?objectsTarget .
        }}
        """.format(linkset)
        # print query
        evaluation = False

        datatype_matrix = Qry.sparql_xml_to_matrix(query)
        # print "DATATYPE: ", datatype_matrix
        # print len(datatype_matrix)

        if datatype_matrix is None:
            print "THERE IS NO METADATA FOR THIS DATASET. "
            print "\nNO POSSIBLE RECONSTRUCTION FOR {}: {}".format(
                gr_type, linkset)
            print "ARE YOU SURE THE GRAPH IS OF TYPE [{}]?".format(gr_type)
            return None

        elif (datatype_matrix is not None) and (len(datatype_matrix) > 1):
            element = datatype_matrix[St.result][1][1:]
            # print element
            for i in range(1, len(datatype_matrix)):
                check = datatype_matrix[St.result][i][1:]
                evaluation = element == check
                # print check
                # print "result: ", evaluation
                if evaluation is not True:
                    evaluation = False
                    break
                else:
                    evaluation = True

            if evaluation is True:

                # singleton_matrix = sparql_xml_to_matrix(singleton_graph_query, database_name, host)

                sub_obj = element
                # source = pattern.sub("", str(element[0]))
                source = get_uri_local_name(str(element[0]))
                # target = pattern.sub("", str(element[1]))
                target = get_uri_local_name(str(element[1]))

                correspondence = "{}\n{}\n{}\n{}" \
                    .format("\tGRAPH <{}>".format(linkset),
                            "\t{",
                            "\t\t?{:50} ?{:20} ?{} .".format(source, predicate, target),
                            "\t}")

                print graph_format.format(sub_obj[0], sub_obj[1])

            else:
                return None

    # TEMPORARY GRAPH EXAMPLE
    # INSERT
    # {
    #   GRAPH temp:load001
    #   {
    #       ?leidenRanking  ?singPre                ?eter .
    #       ?subject        ?sing_predicate         ?object .
    #   }
    # }
    # WHERE
    # {
    #   GRAPH <http://risis.eu/lens/transitive_C000_ExactName>
    #   {
    # 	    ?leidenRanking  ?singPre                ?eter .
    #   }
    #   GRAPH <http://risis.eu/lens/singletonMetadata/transitive_C000_ExactName>
    #   {
    # 	    ?subject        ?sing_predicate          ?object .
    #   }
    # }
    insert_q = "{}\n{}\n{}\n{}\n{}{}\n{}\n{}" \
               "\n{}\n{}\n{}\n{}{}". \
        format("INSERT",
               "{",
               "   GRAPH tmpgraph:{}".format(predicate),
               "   {",
               "\t\t?{:50} ?{:20} ?{} .".format(source, predicate, target),
               "{}".format(singleton_triple),
               "    }",
               "}",

               "WHERE",
               "{",
               "{}".format(correspondence),
               "{}".format(singleton),
               "}")

    if singleton is not None:
        correspondence += singleton

    if sub_obj is not None:
        print graph_format.format(sub_obj[0], sub_obj[1])

    return [sub_obj, insert_q]
Пример #12
0
def is_refinable(graph):

    # x = "http://risis.eu/lens/union_Grid_20170712_H2020_P1626350579"
    description = Buffer.StringIO()

    query = """
    PREFIX bdb:         <{}>
    PREFIX void:        <{}>
    PREFIX ll:          <{}>
    SELECT DISTINCT ?subjectsTarget  ?objectsTarget ?subjectsDatatype ?objectsDatatype
    {{
        <{}>
            void:target|void:subjectsTarget|void:objectsTarget ?linkset .

        ?linkset
            void:objectsTarget		?objectsTarget ;
            void:subjectsTarget		?subjectsTarget ;
            bdb:objectsDatatype		?objectsDatatype ;
            bdb:subjectsDatatype	?subjectsDatatype .
    }}""".format(Ns.bdb, Ns.void, Ns.alivocab, graph)

    response = Qry.sparql_xml_to_matrix(query)
    # print response

    if response:
        result = response[St.result]
        if result is not None and len(result) == 2:
            description.write(
                "\n{}\nIS REFINABLE AS ALL LINKSETS INVOLVED IN "
                "THE LENS SHARE THE SAME SPECIFICATION DESCRIBED BELOW ...".
                format(graph))
            for i in range(1, len(result)):
                description.write("\n\n\t{:17}: {}".format(
                    result[0][0], result[i][0]))
                description.write("\n\t{:17}: {}".format(
                    result[0][1], result[i][1]))
                description.write("\n\t{:17}: {}".format(
                    result[0][2], result[i][2]))
                description.write("\n\t{:17}: {}\n".format(
                    result[0][3], result[i][3]))

            print description.getvalue()
            return {
                St.message: True,
                St.result: result,
                'description': description
            }

        description.write("\n{}\nIS NOT REFINABLE...".format(graph))
        if result is not None:
            result = response[St.result]
            description.write(
                " AS THE LINKSETS INVOLVED IN "
                "THE LENS DO NOT SHARE THE SAME SPECIFICATIONS AS DESCRIBED BELOW ..."
            )
            for i in range(1, len(result)):
                description.write("\n\n{:17}: {}".format(
                    result[0][0], result[i][0]))
                description.write("\n{:17}: {}".format(result[0][1],
                                                       result[i][1]))
                description.write("\n{:17}: {}".format(result[0][2],
                                                       result[i][2]))
                description.write("\n{:17}: {}\n".format(
                    result[0][3], result[i][3]))

            print description.getvalue()

        description.write(" {}".format(response[St.message]))
        print description.getvalue()
        return {
            St.message: False,
            St.result: response,
            'description': description
        }

    description.write(" {}".format(response[St.message]))
    description.write("\n{}\nIS NOT REFINABLE...".format(graph))
    print description.getvalue()
    return {St.message: False, St.result: response, 'description': description}
Пример #13
0
def university_connected_geo(file_path, merged_lens, country_constraint, activated=False):

        if activated is False:
            print "THE FUNCTION [university_connected] IS NOT ACTIVATED."
            return None

        # VARIABLES
        grid = "<http://www.grid.ac/ontology/hasAddress>/<http://www.grid.ac/ontology/countryCode>"
        eter = "<http://risis.eu/eter_2014/ontology/predicate/Country_Code>"
        country_predicates = [eter, grid]
        graphs = [Data.eter_GRAPH, Data.grid_GRAPH]
        names = [">>> ETER", ">>> GRID"]
        results = [{"result": None}, {"result": None}]
        size = 0
        excel = Builder.StringIO()

        # 1. THE QUERY CONSTRAINT FILTER
        query_filter = ""
        for i in range(0, len(country_constraint)):
            query_filter += "ucase(?country) = ucase(\"{}\")".format(country_constraint[i]) if i == 0 \
                else " || ucase(?country) = ucase(\"{}\")".format(country_constraint[i])

        # 2. MAIN QUERY
        query = """
        PREFIX lens:<http://risis.eu/lens/>
        PREFIX dataset:<http://risis.eu/dataset/>
        PREFIX property:<http://risis.eu/orgreg_20170718/ontology/predicate/>
        PREFIX rsc:<http://risis.eu/orgreg_20170718/ontology/class/>
        SELECT DISTINCT ?entity ?university ?name
        {{
            {{
                SELECT  DISTINCT  ?entity ?university ?name
                {{

                    # UNIVERSITIES IN ORGREG
                    GRAPH <{4}>
                    {{
                        ?orgreg_entity a rsc:University .
                        ?orgreg_entity  property:Entity_current_name_English ?name .
                    }}

                    # UNIVERSITIES CONNECTED
                    GRAPH <{1}>
                    {{
                        ?entity a ?type .
                    }}

                    # ALL UNIVERSITIES CONNECTED IN THE LENS
                    BIND(<{0}> AS ?lens)
                    {{ GRAPH ?lens {{ ?entity ?pred ?orgreg_entity . }}}}
                    UNION
                    {{ GRAPH ?lens {{ ?orgreg_entity ?pred ?entity . }}}}
                }}
            }}

            VALUES ?name_pred
            {{
                <http://www.w3.org/2000/01/rdf-schema#label>
                <http://risis.eu/eter_2014/ontology/predicate/Institution_Name>
                <http://risis.eu/orgreg_20170718/ontology/predicate/Entity_current_name_English>
                <http://risis.eu/orgref_20170703/ontology/predicate/Name>
                <http://risis.eu/leidenRanking_2015/ontology/predicate/actor>
                <http://xmlns.com/foaf/0.1/name>
            }}

            # FETCH ORGANIZATION URI AND NAME
            GRAPH <{1}>
            {{
                ?entity ?name_pred ?university .
                OPTIONAL {{ ?entity {2} ?_country . }}
                BIND (IF(bound(?_country), ?_country , "NONE") AS ?country)
                FILTER ({3})
            }}
        }} ORDER BY ?university
        """

        # QUERY LOOP
        for i in range(0, 2):

            if i > 2:
                continue

            start = time.time()
            cur_query = query.format(merged_lens, graphs[i], country_predicates[i], query_filter, Data.orgreg_GRAPH)
            # print cur_query
            results[i] = Qr.sparql_xml_to_matrix(cur_query)
            # Qr.display_result(query=cur_query, spacing=50, limit=5, is_activated=True)
            temp_size = dict(results[i])["result"].__len__() - 1
            elapsed = str(datetime.timedelta(seconds=time.time() - start))
            sofar = str(datetime.timedelta(seconds=time.time() - begining))
            print "\n {} in {} and so far in [{}]".format(names[i], elapsed, sofar)
            if temp_size > size:
                size = temp_size
                # print cur_query
                # exit(0)

        print "\n >>> MAX SIZE {}".format(size)

        # GOING THROUGH THE SIX DATASETS
        for row in range(1, size + 1):

            excel.write(str(row) + "\t")

            # GO THROUGH EATCH RESULT
            for i in range(0, 2):

                query_results = dict(results[i])["result"]
                if query_results is not None and row < len(query_results):
                    elt = "{}\t{}\t{}".format(query_results[row][0],
                                              query_results[row][1].replace("\t", ""),
                                              query_results[row][2].replace("\t", ""))
                    excel.write(elt + "\t") if i < 1 else excel.write(elt + "\n")
                else:
                    excel.write("\t\t\t") if i < 1 else excel.write("\t\t\n")

            # SAMPLE
            if row == 100:
                print "\n", excel.getvalue()
                #          break

        # SAMPLE
        # print "\n", excel.getvalue()

        with open(name=file_path, mode="wb") as writer:
            writer.write(excel.getvalue())
Пример #14
0
def register_research_question(question):

    print "REGISTERING A RESEARCH QUESTION." \
          "\n======================================================" \
          "========================================================"

    if True:
        # CHECK WHETHER THE RESEARCH QUESTION ALREADY EXISTS
        question = to_bytes(to_unicode(question, "utf-8"))
        existence_query = check_rq_existence(question)
        check = Qry.boolean_endpoint_response(existence_query)

        # LOOK FOR A RESEARCH QUESTION OF THE SAME NAMES GRAPH
        find_query = find_rq(question)

        # AN INTERNAL PROBLEM OCCURRED
        if check is None:
            return check

        # THE RESEARCH QUESTION WAS ALREADY REGISTERED
        elif check == "true":

            find = Qry.sparql_xml_to_matrix(find_query)
            # print find
            if find:
                if find[St.result]:
                    message = MESSAGE_1.replace("@", find[St.result][1][0])
                    print message
                    return {
                        St.message: message.replace("@", "<br/>"),
                        St.result: find[St.result][1][0]
                    }
                return find
            else:
                return find

        # REGISTERING YOUR RESEARCH QUESTION
        else:
            print "REGISTERING THE RESEARCH QUESTION"
            ins_rq = research_question(question)
            # print ins_rq
            inserted = Qry.boolean_endpoint_response(ins_rq)
            print "INSERTED RESULT:", inserted

            #  THE REGISTRATION WAS NOT SUCCESSFUL
            if inserted is None:
                print "THE RESEARCH QUESTION WAS REGISTERED"
                print MESSAGE_3

            # THE REGISTRATION WAS SUCCESSFUL. RETRIEVE THE URI
            if inserted == "true" or inserted == STARDOG_BOOLEAN_BUG_MESSAGE:
                print "THE RESEARCH QUESTION IS REGISTERED"
                find = Qry.sparql_xml_to_matrix(find_query)
                if find:
                    if find[St.result]:
                        message = MESSAGE_2.replace("@", find[St.result][1][0])
                        print message
                        return {
                            St.message: message.replace("@", "<br/>"),
                            St.result: find[St.result][1][0]
                        }

                    return {
                        St.message: MESSAGE_4.replace("@", "<br/>"),
                        St.result: None
                    }
                else:
                    return find

            print {
                St.message: MESSAGE_3.replace("@", "<br/>"),
                St.result: None
            }
Пример #15
0
def cluster_d_test_statss(linkset, network_size=3, targets=None,
                   directory=None, greater_equal=True, print_it=False, limit=None, activated=False):
    network = []
    print "LINK NETWORK INVESTIGATION"
    if activated is False:
        print "\tTHE FUNCTION I NOT ACTIVATED"
        return ""
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    linkset_name = Ut.get_uri_local_name(linkset)
    count_1 = 0
    count_2 = 0
    sheet_builder = Buffer.StringIO()
    analysis_builder = Buffer.StringIO()
    sheet_builder.write("Count	ID					STRUCTURE	E-STRUCTURE-SIZE	A. NETWORK QUALITY"
                        "		M. NETWORK QUALITY		REFERENCE\n")
    linkset = linkset.strip()
    check = False

    # RUN THE CLUSTER
    clusters_0 = Cls.links_clustering(linkset, limit)

    for i_cluster in clusters_0.items():

        # network = []
        resources = ""
        uri_size = 0
        count_1 += 1
        children = i_cluster[1][St.children]
        cluster_size = len(children)
        # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children:
        #     continue

        check = cluster_size >= network_size if greater_equal else cluster_size == network_size

        # NETWORK OF A PARTICULAR SIZE
        if check:
            count_2 += 1
            # file_name = i_cluster[0]

            # 2: FETCHING THE CORRESPONDENTS
            smallest_hash = float('inf')
            child_list = ""
            for child in children:
                hashed = hash(child)
                if hashed <= smallest_hash:
                    smallest_hash = hashed

                # GENERAL INFO 1: RESOURCES INVOLVED
                child_list += "\t{}\n".format(child)

                use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child
                resources += "\n\t\t\t\t{}".format(use)
                if len(child) > uri_size:
                    uri_size = len(child)

            if directory:
                # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME
                file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str(
                    smallest_hash).startswith("-") \
                    else "P{}".format(smallest_hash)

                # QUERY FOR FETCHING ALL LINKED RESOURCES FROM THE LINKSET
                query = """
                PREFIX prov: <{3}>
                PREFIX ll: <{4}>
                SELECT DISTINCT ?lookup ?object ?Strength ?Evidence
                {{
                    VALUES ?lookup{{ {0} }}

                    {{
                        GRAPH <{1}>
                        {{ ?lookup ?predicate ?object .}}
                    }} UNION
                    {{
                        GRAPH <{1}>
                        {{?object ?predicate ?lookup . }}
                    }}

                    GRAPH <{2}>
                    {{
                        ?predicate  prov:wasDerivedFrom  ?DerivedFrom  .
                        OPTIONAL {{ ?DerivedFrom  ll:hasStrength  ?Strength . }}
                        OPTIONAL {{ ?DerivedFrom  ll:hasEvidence  ?Evidence . }}
                    }}
                }}
                            """.format(resources, linkset, linkset.replace("lens", "singletons"),
                                       Ns.prov, Ns.alivocab)
                # print query

                # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
                response = Qry.sparql_xml_to_matrix(query)

                # A DICTIONARY OF KEY: (SUBJECT-OBJECT) VALUE:STRENGTH
                response_dic = dict()
                result = response[St.result]
                if result:
                    for i in range(1, len(result)):
                        key = (result[i][0], result[i][1])
                        if key not in response_dic:
                            response_dic[key] = result[i][2]

                # print response_dic

                # GENERAL INFO 2:
                info = "SIZE    {}   \nCLUSTER {} \nNAME    {}\n".format(cluster_size, count_1, file_name)
                info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size)
                analysis_builder.write("{}\n".format(info))
                print "{:>5} {}".format(count_2, info2)

                analysis_builder.write("RESOURCES INVOLVED\n")
                analysis_builder.write(child_list)
                analysis_builder.write("\nCORRESPONDENT FOUND ")
                analysis_builder.write(
                    Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True))

                # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED
                analysis_builder.write("\n\nDISAMBIGUATION HELPER ")
                if targets is None:
                    analysis_builder.write(Cls.disambiguate_network(linkset, children))
                else:
                    analysis_builder.write(Cls.disambiguate_network_2(children, targets))

                position = i_cluster[1][St.row]
                if St.annotate in i_cluster[1]:
                    analysis_builder.write("\n\nANNOTATED CLUSTER PROCESS")
                    analysis_builder.write(i_cluster[1][St.annotate])

                # THE CLUSTER
                # print "POSITION: {}".format(position)
                # print "\nMATRIX DISPLAY\n"
                # for i in range(0, position):
                #     resource = (i_cluster[1][St.matrix])[i]
                #     print "\t{}".format(resource[:position])
                    # print "\t{}".format(resource)

                # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-)
                network = []
                for i in range(1, position):
                    for j in range(1, position):
                        if (i, j) in (i_cluster[1][St.matrix_d]) and (i_cluster[1][St.matrix_d])[(i, j)] != 0:
                            r = (i_cluster[1][St.matrix_d])[(i, 0)]
                            c = (i_cluster[1][St.matrix_d])[(0, j)]
                            r_name = "{}:{}".format(i, Ut.get_uri_local_name(r))
                            c_name = "{}:{}".format(j, Ut.get_uri_local_name(c))
                            network += [(r_name, c_name)]
                            # network += [(r_smart, c_smart)]
                # print "\tNETWORK", network

            if print_it:
                print ""
                print analysis_builder.getvalue()

            # SETTING THE DIRECTORY
            if directory:
                # linkset_name = Ut.get_uri_local_name(linkset)
                # date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
                temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format(
                    network_size, date, linkset_name, cluster_size, file_name))
                if not os.path.exists(temp_directory):
                    os.makedirs(temp_directory)

                """""""""""""  PLOTTING """""""""""""
                # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz.
                analysis_builder.write(
                    draw_graph(graph=network,
                               file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"),
                               show_image=False)
                )

                """""""""""""  WRITING TO DISC """""""""""""
                # WRITE TO DISC
                Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ),
                                data=analysis_builder.getvalue(), extension="txt")
                analysis_builder = Buffer.StringIO()

        if directory:

            if network:
                automated_decision = metric(network)["AUTOMATED_DECISION"]
                eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name),
                           sheet_builder, linkset, children, automated_decision)
            else:
                print network

        if directory:
            # if len(sheet_builder.getvalue()) > 150 and count_2 == 2:
            if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1:
                tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format(
                    network_size, date, linkset_name))

                """""""""""""  WRITING CLUSTER SHEET TO DISC """""""""""""
                print "\nWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory)
                Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size),
                                data=sheet_builder.getvalue(), extension="txt")

        # if count_2 == 2:
        #     break

    print ">>> FOUND: {}".format(count_2)

    if directory is None:
        return "{}\t{}".format(network_size, count_2)
Пример #16
0
def cluster_d_test(linkset, network_size=3, network_size_max=3, targets=None, constraint_targets=None,
                   constraint_text="", directory=None, greater_equal=True, print_it=False, limit=None,
                   only_good=False, activated=False):

    # FOR CONSTRAINTS TO WORK, IT SHOULD NOT BE NONE

    network = []
    print "\nLINK NETWORK INVESTIGATION"
    if activated is False:
        print "\tTHE FUNCTION I NOT ACTIVATED"
        return ""

    elif network_size > network_size_max and greater_equal is False:
        print "\t[network_size] SHOULD BE SMALLER THAN [network_size_max]"
        return ""

    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    linkset_name = Ut.get_uri_local_name(linkset)
    linkset = linkset.strip()

    if network_size_max - network_size == 0:
        greater_equal = False

    check = False

    # RUN THE CLUSTER
    clusters_0 = Cls.links_clustering(linkset, limit)

    if greater_equal is True:
        temp_size = 0
        for cluster, cluster_val in clusters_0.items():
            new_size = len(list(cluster_val["nodes"]))
            if new_size > temp_size:
                temp_size = new_size
        network_size_max = temp_size
        print "THE BIGGEST NETWORK'S: {}".format(network_size_max)

    def check_constraint():

        text = constraint_text.lower()
        text = text.split(",")

        # CONSTRAINT BUILDER
        c_builder = Buffer.StringIO()
        if constraint_targets is not None:
            for dictionary in constraint_targets:
                graph = dictionary[St.graph]
                data_list = dictionary[St.data]
                properties = data_list[0][St.properties]
                prop = properties[0] if Ut.is_nt_format(properties[0]) else "<{}>".format(properties[0])

                # WRITING THE CONSTRAINT ON THE GRAPH
                graph_q = """
       {{
           GRAPH <{0}>
           {{
               ?lookup {1} ?constraint .
           }}
       }}
       """.format(graph, prop)
                c_builder.write(graph_q) if len(c_builder.getvalue()) == 0  else \
                    c_builder.write("UNION {}".format(graph_q))

            # WRITING THE FILTER
            if len(c_builder.getvalue()) > 0:
                for i in range(0, len(text)):
                    if i == 0 :
                        c_builder.write("""
       FILTER (LCASE(STR(?constraint)) = "{}" """.format(text[i].strip()))
                    else:
                        c_builder.write("""
       || LCASE(STR(?constraint)) = "{}" """.format(text[i].strip()))
                c_builder.write(")")


        # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
        query = Qry.cluster_rsc_strengths_query(resources, linkset)
        query = query.replace("# CONSTRAINTS IF ANY", c_builder.getvalue())
        # print query
        response = Qry.sparql_xml_to_matrix(query)
        if response[St.result] is None:
            return False
        return True

    for index in range(network_size, network_size_max + 1):

        count_1 = 0
        count_2 = 0
        curr_network_size = index
        print "\nCLUSTERS OF SIZE {}".format(index)
        sheet_builder = Buffer.StringIO()
        analysis_builder = Buffer.StringIO()
        sheet_builder.write("Count	ID					STRUCTURE	E-STRUCTURE-SIZE	A. NETWORK QUALITY"
                            "		M. NETWORK QUALITY		REFERENCE\n")

        for cluster, cluster_val in clusters_0.items():

            # network = []
            resources = ""
            uri_size = 0
            count_1 += 1
            children = list(cluster_val["nodes"])
            strengths = cluster_val["strengths"]
            cluster_size = len(children)
            # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children:
            #     continue

            check = cluster_size >= curr_network_size if greater_equal else cluster_size == curr_network_size

            # NETWORK OF A PARTICULAR SIZE
            if check:

                # file_name = i_cluster[0]

                # 2: FETCHING THE CORRESPONDENTS
                smallest_hash = float('inf')
                child_list = ""
                for child in children:

                    # CREATE THE HASHED ID AS THE CLUSTER NAME
                    hashed = hash(child)
                    if hashed <= smallest_hash:
                        smallest_hash = hashed

                    # GENERAL INFO 1: RESOURCES INVOLVED
                    child_list += "\t{}\n".format(child)

                    # LIST OF RESOURCES IN THE CLUTER
                    use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child
                    resources += "\n\t\t\t\t{}".format(use)
                    if len(child) > uri_size:
                        uri_size = len(child)

                # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME
                file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str(
                    smallest_hash).startswith("-") \
                    else "P{}".format(smallest_hash)

                if constraint_targets is not None and check_constraint() is False:
                    continue

                count_2 += 1

                # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
                query = Qry.cluster_rsc_strengths_query(resources, linkset)
                response = Qry.sparql_xml_to_matrix(query)

                # GENERAL INFO 2:
                info = "SIZE    {}   \nCLUSTER {} \nNAME    {}\n".format(cluster_size, count_1, file_name)
                info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size)
                analysis_builder.write("{}\n".format(info))


                analysis_builder.write("RESOURCES INVOLVED\n")
                analysis_builder.write(child_list)
                analysis_builder.write("\nCORRESPONDENT FOUND ")
                analysis_builder.write(
                    Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True))

                # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED
                analysis_builder.write("\n\nDISAMBIGUATION HELPER ")
                if targets is None:
                    analysis_builder.write(Cls.disambiguate_network(linkset, children))
                else:
                    report = Cls.disambiguate_network_2(children, targets)
                    if report is not None:
                        analysis_builder.write(report)

                # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-)
                network = []
                link_count = 0
                for link in cluster_val["links"]:
                    link_count += 1
                    name_1 = "{}-{}".format(Ut.hash_it(link[0]), Ut.get_uri_local_name(link[0]))
                    name_2 = "{}-{}".format(Ut.hash_it(link[1]), Ut.get_uri_local_name(link[1]))
                    network += [(name_1, name_2)]

                #  GET THE AUTOMATED FLAG


                if print_it:
                    print ""
                    print analysis_builder.getvalue()

                # SETTING THE DIRECTORY
                if directory:

                    if network:
                        automated_decision = metric(network)["AUTOMATED_DECISION"]
                        if only_good is True and automated_decision.startswith("GOOD") is not True:
                            count_2 -= 1
                            continue

                        print "{:>5} {}".format(count_2, info2)

                        eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name),
                                   sheet_builder, linkset, children, automated_decision)
                    else:
                        print network


                    # linkset_name = Ut.get_uri_local_name(linkset)
                    # date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
                    temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format(
                        curr_network_size, date, linkset_name, cluster_size, file_name))
                    if not os.path.exists(temp_directory):
                        os.makedirs(temp_directory)

                    """""""""""""  PLOTTING """""""""""""
                    # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz.
                    analysis_builder.write(
                        draw_graph(graph=network,
                                   file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"),
                                   show_image=False)
                    )

                    """""""""""""  WRITING TO DISC """""""""""""
                    # WRITE TO DISC
                    Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ),
                                    data=analysis_builder.getvalue(), extension="txt")
                    analysis_builder = Buffer.StringIO()

            if directory:
                # if len(sheet_builder.getvalue()) > 150 and count_2 == 2:
                if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1:
                    tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format(
                        curr_network_size, date, linkset_name))

                    """""""""""""  WRITING CLUSTER SHEET TO DISC """""""""""""
                    print "\n\tWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory)
                    Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size),
                                    data=sheet_builder.getvalue(), extension="txt")

            # if count_2 == 2:
            #     break

        if greater_equal is True:
            # no need to continue as we already did all network greater of equal to "network-size" input
            break

        print "\t>>> FOUND: {} CLUSTERS OF SIZE {}".format(count_2, curr_network_size)

        if directory is None:
            return "{}\t{}".format(curr_network_size, count_2)
Пример #17
0
def cluster_d_test_stats(linkset, network_size=3, targets=None,
                   directory=None, greater_equal=True, print_it=False, limit=None, activated=False):
    network = []
    print "LINK NETWORK INVESTIGATION"
    if activated is False:
        print "\tTHE FUNCTION I NOT ACTIVATED"
        return ""
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    linkset_name = Ut.get_uri_local_name(linkset)
    count_1 = 0
    count_2 = 0
    sheet_builder = Buffer.StringIO()
    analysis_builder = Buffer.StringIO()
    sheet_builder.write("Count	ID					STRUCTURE	E-STRUCTURE-SIZE	A. NETWORK QUALITY"
                        "		M. NETWORK QUALITY		REFERENCE\n")
    linkset = linkset.strip()
    check = False

    # RUN THE CLUSTER
    clusters_0 = Cls.links_clustering(linkset, limit)

    for cluster, cluster_val in clusters_0.items():

        # network = []
        resources = ""
        uri_size = 0
        count_1 += 1
        children = list(cluster_val["nodes"])
        strengths = cluster_val["strengths"]
        cluster_size = len(children)
        # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children:
        #     continue

        check = cluster_size >= network_size if greater_equal else cluster_size == network_size

        # NETWORK OF A PARTICULAR SIZE
        if check:
            count_2 += 1
            # file_name = i_cluster[0]

            # 2: FETCHING THE CORRESPONDENTS
            smallest_hash = float('inf')
            child_list = ""
            for child in children:
                hashed = hash(child)
                if hashed <= smallest_hash:
                    smallest_hash = hashed

                # GENERAL INFO 1: RESOURCES INVOLVED
                child_list += "\t{}\n".format(child)

                use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child
                resources += "\n\t\t\t\t{}".format(use)
                if len(child) > uri_size:
                    uri_size = len(child)

            if directory:
                # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME
                file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str(
                    smallest_hash).startswith("-") \
                    else "P{}".format(smallest_hash)


                # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
                query = Qry.cluster_rsc_strengths_query(resources, linkset)
                response = Qry.sparql_xml_to_matrix(query)

                # GENERAL INFO 2:
                info = "SIZE    {}   \nCLUSTER {} \nNAME    {}\n".format(cluster_size, count_1, file_name)
                info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size)
                analysis_builder.write("{}\n".format(info))
                print "{:>5} {}".format(count_2, info2)

                analysis_builder.write("RESOURCES INVOLVED\n")
                analysis_builder.write(child_list)
                analysis_builder.write("\nCORRESPONDENT FOUND ")
                analysis_builder.write(
                    Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True))

                # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED
                analysis_builder.write("\n\nDISAMBIGUATION HELPER ")
                if targets is None:
                    analysis_builder.write(Cls.disambiguate_network(linkset, children))
                else:
                    analysis_builder.write(Cls.disambiguate_network_2(children, targets))


                # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-)
                network = []
                link_count = 0
                for link in cluster_val["links"]:
                    link_count += 1
                    name_1 = "{}".format(Ut.get_uri_local_name(link[0]))
                    name_2 = "{}".format(Ut.get_uri_local_name(link[1]))
                    network += [(name_1, name_2)]


            if print_it:
                print ""
                print analysis_builder.getvalue()

            # SETTING THE DIRECTORY
            if directory:
                # linkset_name = Ut.get_uri_local_name(linkset)
                # date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
                temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format(
                    network_size, date, linkset_name, cluster_size, file_name))
                if not os.path.exists(temp_directory):
                    os.makedirs(temp_directory)

                """""""""""""  PLOTTING """""""""""""
                # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz.
                analysis_builder.write(
                    draw_graph(graph=network,
                               file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"),
                               show_image=False)
                )

                """""""""""""  WRITING TO DISC """""""""""""
                # WRITE TO DISC
                Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ),
                                data=analysis_builder.getvalue(), extension="txt")
                analysis_builder = Buffer.StringIO()

                if network:
                    automated_decision = metric(network)["AUTOMATED_DECISION"]
                    eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name),
                               sheet_builder, linkset, children, automated_decision)
                else:
                    print network

        if directory:
            # if len(sheet_builder.getvalue()) > 150 and count_2 == 2:
            if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1:
                tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format(
                    network_size, date, linkset_name))

                """""""""""""  WRITING CLUSTER SHEET TO DISC """""""""""""
                print "\nWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory)
                Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size),
                                data=sheet_builder.getvalue(), extension="txt")

                # if count_2 == 2:
                #     break

    print ">>> FOUND: {}".format(count_2)

    if directory is None:
        return "{}\t{}".format(network_size, count_2)
Пример #18
0
def register_alignment_mapping(alignment_mapping, created):

    print "\nREGISTERING AN [ALIGNMENT-MAPPING]"
    question_uri = alignment_mapping[St.researchQ_URI]

    # MAKE SURE THE WRITE URI IS USED WHEN REGISTERING A REFINED LINKSET
    linkset_uri = alignment_mapping[
        St.refined] if St.refined in alignment_mapping else alignment_mapping[
            St.linkset]
    print "\tLINKSET TO REGISTER:", linkset_uri

    # LINKSET EXISTS
    if linkset_uri:

        # 1 CHECK WHETHER THE ALIGNMENT WAS REGISTERED
        ask_query = linkset_composition(alignment_mapping,
                                        request_ask_select_or_insert="ask")
        # print ask_query

        if ask_query is None:
            return

        ask = Qry.boolean_endpoint_response(ask_query)
        # print ask_query
        print "\t>>> ASK WHETHER THE [ALIGNMENT] WAS REGISTERED:", ask

        # 2 THE ALIGNMENT WAS NOT REGISTERED
        if ask == "false":

            # REGISTER THE ALIGNMENT-MAPPING
            insert_alignment_query = linkset_composition(
                alignment_mapping, request_ask_select_or_insert="insert")
            insert_alignment = Qry.boolean_endpoint_response(
                insert_alignment_query)
            # print insert_alignment_query
            print "\t>>> IS THE [ALIGNMENT] NOW INSERTED?:", insert_alignment

            # 2.1 RETRIEVE THE ALIGNMENT-MAPPING URI
            alignment_uri = None
            alignment_uri_query = ask_query.replace(
                "ASK", "SELECT ?alignmentMapping")
            alignment_uri_resp = Qry.sparql_xml_to_matrix(alignment_uri_query)
            if alignment_uri_resp:
                if alignment_uri_resp[St.result]:
                    alignment_uri = alignment_uri_resp[St.result][1][0]
            print "\t>>> ALIGNMENT REGISTERED AS:", alignment_uri

            if alignment_uri:

                # IF WE ARE DEALING WITH A REFINED LINKSET, REGISTER ITS EVOLUTION
                if St.refined in alignment_mapping:
                    print "REGISTERING THE EVOLUTION OF THIS REFINED LINKSET TO\n\t{}".format(
                        alignment_uri)
                    evolution_str = linkset_evolution(question_uri,
                                                      linkset_uri)
                    register_evolution(question_uri, alignment_uri,
                                       evolution_str)

                # 2.2 ADD THE LINKSET TO THE ALIGNMENT
                assign_ls_query = linkset_createdorused(question_uri,
                                                        alignment_uri,
                                                        alignment_mapping,
                                                        is_created=created)

                is_linkset_registered = Qry.boolean_endpoint_response(
                    assign_ls_query)
                print ">>> IS THE [LINKSET] REGISTERED?:", is_linkset_registered

        # 3 THE ALIGNMENT WAS REGISTERED
        else:

            # CHECK IF THE LINKSET WAS REGISTERED
            # is_linkset_registered_query = ask_query.replace("> .", "> ;\n\t\t?pred\t<{}> .".format(linkset_uri))
            # is_linkset_registered_query = is_linkset_registered_query.replace(">\" .", ">\" ;\n\t\t?pred\t<{}> .".format(linkset_uri))

            is_linkset_registered_query = ask_query.replace(
                "###@SLOT",
                "\n\t\t\t?alignmentMapping ?pred\t<{}> .".format(linkset_uri))

            # print "CHECKING WHETHER THE LINKSET WAS TRULY REGISTERED QUERY:", is_linkset_registered_query
            is_linkset_registered = Qry.boolean_endpoint_response(
                is_linkset_registered_query)
            # print is_linkset_registered_query
            print "\t>>> ASK WHETHER [LINKSET] WAS REGISTERED?:", is_linkset_registered

            if is_linkset_registered == "false":

                # RETRIEVE THE ALIGNMENT-MAPPING URI
                alignment_uri = None
                alignment_uri_query = ask_query.replace(
                    "ASK", "SELECT ?alignmentMapping")
                # print "alignment_uri_query:", alignment_uri_query
                alignment_uri_resp = Qry.sparql_xml_to_matrix(
                    alignment_uri_query)
                if alignment_uri_resp:
                    if alignment_uri_resp[St.result]:
                        alignment_uri = alignment_uri_resp[St.result][1][0]

                if alignment_uri:

                    # IF WE ARE DEALING WITH A REFINED LINKSET,
                    # REGISTER ITS EVOLUTION IF NOT REGISTERED YET
                    if St.refined in alignment_mapping:
                        print "REGISTERING THE EVOLUTION OF THIS REFINED LINKSET"
                        evolution_str = linkset_evolution(
                            question_uri, linkset_uri)
                        register_evolution(question_uri, alignment_uri,
                                           evolution_str)

                    # 2.3 ADD THE LINKSET TO THE ALIGNMENT
                    assign_ls_query = linkset_createdorused(question_uri,
                                                            alignment_uri,
                                                            alignment_mapping,
                                                            is_created=created)

                    is_linkset_registered = Qry.boolean_endpoint_response(
                        assign_ls_query)
                    print "\t>>> IS LINKSET NOW REGISTERED?:", is_linkset_registered
Пример #19
0
def modified(early_version, late_version, stat=False, display=True, activated=False):

    if activated is False:
        print "\nTHE FUNCTION [removed] IS NOT ACTIVATED"
        return {St.subject: None, St.predicate: None, St.triples: None}

    if stat is False:

        # TRIPLES REMOVED
        mod_pred = modified_predicate(early_version, late_version, count=stat)

        # EXECUTING THE PREDICATE MODIFICATION QUERY
        start = time.time()
        resp_mod_pred = Qr.sparql_xml_to_matrix(mod_pred)
        matched_time_1 = str(datetime.timedelta(seconds=time.time() - start))
        print " \t{:50} [{}]".format("... predicate matched in", matched_time_1)

        # DROPPING THE TEMP GRAPH USED FOR THE QUERY
        # print "DROPPING TEMPS..."
        drop = """
        DROP SILENT GRAPH <{}_TEMP>;
        DROP SILENT GRAPH <{}_TEMP> """.format(early_version, late_version)
        start = time.time()
        Qr.endpoint(drop)
        dropped_time_1 = str(datetime.timedelta(seconds=time.time() - start))
        print "\t{:50} [{}]".format("... predicate temp graph dropped in", dropped_time_1)
        print "\t{:50} [{}]".format("... elapse time", str(datetime.timedelta(seconds=time.time() - start)))

        mod_val = modified_value(early_version, late_version, count=stat)

        # EXECUTING THE VALUE MODIFICATION QUERY
        start_2 = time.time()
        resp_mod_val = Qr.sparql_xml_to_matrix(mod_val)
        matched_time_2 = str(datetime.timedelta(seconds=time.time() - start_2))
        print "\t{:50} [{}]".format("... value matched in", matched_time_2)

        # DROPPING THE TEMP GRAPH USED FOR THE QUERY
        # print "DROPPING TEMPS..."
        drop = """
        DROP SILENT GRAPH <{}_TEMP>;
        DROP SILENT GRAPH <{}_TEMP> """.format(early_version, late_version)
        start_2 = time.time()
        Qr.endpoint(drop)
        dropped_time_2 = str(datetime.timedelta(seconds=time.time() - start_2))
        print "\t{:50} [{}]".format("... value temp graph dropped in", dropped_time_2)
        print "\t{:50} [{}]".format("... elapse time", str(datetime.timedelta(seconds=time.time() - start)))

        status = (resp_mod_val[St.result] is not None and len(resp_mod_val[St.result]) > 1) or \
                 (resp_mod_pred[St.result] is not None and len(resp_mod_pred[St.result]) > 1)

        if display is True:

            # DISPLAY THE RESULTS FOR VALUE REMOVED
            print "\n>>> DISPLAY THE RESULTS FOR VALUE MODIFIED"
            Qr.display_matrix(resp_mod_val, spacing=90, limit=10, is_activated=True)

            # DISPLAY THE RESULTS FOR VALUE REMOVED
            print "\n>>> DISPLAY THE RESULTS FOR PREDICATE MODIFIED"
            Qr.display_matrix(resp_mod_pred, spacing=90, limit=10, is_activated=True)

        return {"status": status,
                St.predicate: resp_mod_pred[St.result],
                St.triples: resp_mod_val[St.result]}

    else:

        mod_pred = modified_predicate(early_version, late_version, count=stat)
        mod_val = modified_value(early_version, late_version, count=stat)

        resp_mod_pred = Qr.sparql_xml_to_matrix(mod_pred)
        resp_mod_val = Qr.sparql_xml_to_matrix(mod_val)

        # resp_mod_val = {St.result: None}

        resp_mod_pred_bool = resp_mod_pred[St.result] is None
        resp_mod_val_bool = resp_mod_val[St.result] is None

        status = (resp_mod_pred_bool is not True and int(resp_mod_pred[St.result][1][0]) > 0) or \
                 (resp_mod_val_bool is not None and int(resp_mod_val[St.result][1][0]) > 0)

        return {"status": status,
                St.predicate: resp_mod_pred[St.result][1][0] if resp_mod_pred_bool is not True else None,
                St.triples: resp_mod_val[St.result][1][0] if resp_mod_val_bool is not True else None}
Пример #20
0
def refining(specs, insert_query, activated=False):

    refined = {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None}
    diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 1, St.result: None}

    # UPDATE THE SPECS VARIABLE
    # print "UPDATE THE SPECS VARIABLE"
    update_specification(specs)
    update_specification(specs[St.source])
    update_specification(specs[St.target])

    # ACCESS THE TASK SPECIFIC PREDICATE COUNT
    specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism])
    # print "sameAsCount:", specs[St.sameAsCount]

    if specs[St.sameAsCount] is None:
        return {'refined': refined, 'difference': diff}

    # GENERATE THE NAME OF THE LINKSET
    Ls.set_refined_name(specs)
    # print "\nREFINED NAME:", specs[St.refined]
    # print "LINKSET TO REFINE BEFORE CHECK:", specs[St.linkset]

    # CHECK WHETHER OR NOT THE LINKSET WAS ALREADY CREATED
    check = Ls.run_checks(specs, check_type="refine")
    # print "\nREFINED NAME:", specs[St.refined]
    # print "LINKSET TO REFINE:", specs[St.linkset]

    if check[St.message] == "NOT GOOD TO GO":
        # refined = check[St.refined]
        # difference = check["difference"]
        return check

    # print "\nREFINED:", specs[St.refined]
    # print "LINKSET TO REFINE:", specs[St.linkset]
    # print "CHECK:", check

    # THE LINKSET DOES NOT EXIT, LETS CREATE IT NOW
    print Ls.refined_info(specs, specs[St.sameAsCount])

    # POINT TO THE LINKSET THE CURRENT LINKSET WAS DERIVED FROM
    print "1. wasDerivedFrom {}".format(specs[St.linkset])
    specs[St.derivedfrom] = "\t\tprov:wasDerivedFrom\t\t\t<{}> ;".format(
        specs[St.linkset])

    # print "REFINED NAME:",  specs[St.refined_name]
    # print "REFINED:", specs[St.refined]
    # print "LINKSET TO BE REFINED:", specs[St.linkset]

    print "\n2. RETRIEVING THE METADATA ABOUT THE GRAPH TO REFINE"
    # metadata_q = Qry.q_linkset_metadata(specs[St.linkset])
    metadata_q = """
    prefix ll:    <{}>
    SELECT DISTINCT ?type ?singletonGraph
    {{
        # LINKSET METADATA
        <{}>
            a                       ?type ;
            ll:singletonGraph		?singletonGraph .
    }}
    """.format(Ns.alivocab, specs[St.linkset])
    print "QUERY:", metadata_q
    matrix = Qry.sparql_xml_to_matrix(metadata_q)
    # print "\nMETA DATA: ", matrix

    if matrix:

        if matrix[St.message] == "NO RESPONSE":
            print Ec.ERROR_CODE_1
            print matrix[St.message]
            return {'refined': refined, 'difference': diff}

        elif matrix[St.result] is None:
            print matrix[St.message]
            returned = {
                St.message: matrix[St.message],
                St.error_code: 666,
                St.result: None
            }
            return {'refined': returned, 'difference': diff}

    else:
        print Ec.ERROR_CODE_1
        return {'refined': refined, 'difference': diff}

    # GET THE SINGLETON GRAPH OF THE LINKSET TO BE REFINED
    print "\n3. GETTING THE SINGLETON GRAPH OF THE GRAPH TO REFINE"
    specs[St.singletonGraph] = matrix[St.result][1][1]
    # print matrix[St.result][1][0]

    specs[St.insert_query] = insert_query(specs)
    print specs[St.insert_query]

    if type(specs[St.insert_query]) == str:
        is_run = Qry.boolean_endpoint_response(specs[St.insert_query])

    else:
        print "\n4. RUNNING THE EXTRACTION QUERY"
        print specs[St.insert_query][0]
        # is_run = Qry.boolean_endpoint_response(specs[St.insert_query][0])
        Qry.boolean_endpoint_response(specs[St.insert_query][0])

        print "\n5. RUNNING THE FINDING QUERY"
        print specs[St.insert_query][1]
        is_run = Qry.boolean_endpoint_response(specs[St.insert_query][1])

    print "\n>>> RUN SUCCESSFULLY:", is_run.upper()

    # NO INSERTION HAPPENED
    if is_run == "true" or is_run == Ec.ERROR_STARDOG_1:

        # GENERATE THE
        #   (1) LINKSET METADATA
        #   (2) LINKSET OF CORRESPONDENCES
        #   (3) SINGLETON METADATA
        # AND WRITE THEM ALL TO FILE

        print "GENERATING THE METADATA"
        pro_message = refine_metadata(specs)

        # SET THE RESULT ASSUMING IT WENT WRONG
        refined = {
            St.message: Ec.ERROR_CODE_4,
            St.error_code: 4,
            St.result: None
        }
        diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 4, St.result: None}

        server_message = "Linksets created as: [{}]".format(specs[St.refined])
        message = "The linkset was created as [{}]. <br/>{}".format(
            specs[St.refined], pro_message)

        # MESSAGE ABOUT THE INSERTION STATISTICS
        print "\t", server_message

        if int(specs[St.triples]) > 0:

            # UPDATE THE REFINED VARIABLE AS THE INSERTION WAS SUCCESSFUL
            refined = {
                St.message: message,
                St.error_code: 0,
                St.result: specs[St.linkset]
            }

            print "REGISTERING THE ALIGNMENT"
            if refined[St.message].__contains__("ALREADY EXISTS"):
                register_alignment_mapping(specs, created=False)
            else:
                register_alignment_mapping(specs, created=True)

            try:
                print "\nCOMPUTE THE DIFFERENCE AND DOCUMENT IT"
                diff_lens_specs = {
                    St.researchQ_URI: specs[St.researchQ_URI],
                    St.subjectsTarget: specs[St.linkset],
                    St.objectsTarget: specs[St.refined]
                }
                diff = Df.difference(diff_lens_specs, activated=activated)
                message_2 = "\t>>> {} CORRESPONDENCES INSERTED AS THE DIFFERENCE".format(
                    diff_lens_specs[St.triples])
                print message_2
            except Exception as err:
                print "THE DIFFERENCE FAILED: ", str(err.message)

            print "\tLinkset created as: ", specs[St.refined]
            print "\t*** JOB DONE! ***"

            return {'refined': refined, 'difference': diff}

        else:
            print ">>> NO TRIPLE WAS INSERTED BECAUSE NO MATCH COULD BE FOUND"
            return {'refined': refined, 'difference': diff}

    else:
        print "NO MATCH COULD BE FOUND."
Пример #21
0
def universities_in(file_path, country, activated=False):

    if activated is False:
        print "THE FUNCTION [universities_in] IS NOT ACTIVATED."
        return None

    query_filter = ""
    for i in range(0, len(country)):
        query_filter += "ucase(?country) = ucase(\"{}\")".format(country[i]) if i == 0 \
            else "|| ucase(?country) = ucase(\"{}\")".format(country[i])

    netherlands = """
    SELECT DISTINCT ?subj ?university
    {{
        VALUES ?name_pred
        {{
            <http://www.w3.org/2000/01/rdf-schema#label>
            <http://risis.eu/eter_2014/ontology/predicate/Institution_Name>
            <http://risis.eu/orgreg_20170718/ontology/predicate/Entity_current_name_English>
            <http://risis.eu/orgref_20170703/ontology/predicate/Name>
            <http://risis.eu/leidenRanking_2015/ontology/predicate/actor>
            <http://xmlns.com/foaf/0.1/name>
        }}

        GRAPH <{}>
        {{
            ?subj ?name_pred ?university .
            ?subj {} ?country .
            FILTER ({})
        }}
    }} ORDER BY ?university
    """

    # VARIABLES
    grid = "<http://www.grid.ac/ontology/hasAddress>/<http://www.grid.ac/ontology/countryCode>"
    eter = "<http://risis.eu/eter_2014/ontology/predicate/Country_Code>"
    orgreg = "<http://risis.eu/orgreg_20170718/ontology/predicate/characteristicsOf>" \
             "/<http://risis.eu/orgreg_20170718/ontology/predicate/Country_of_establishment>"
    orgref = "<http://risis.eu/orgref_20170703/ontology/predicate/Country>"
    leiden = "<http://risis.eu/leidenRanking_2015/ontology/predicate/country>"
    h2020 = "<http://risis.eu/cordisH2020/vocab/country>"

    countries = [leiden, eter, orgreg, orgref, h2020, grid]
    graphs = [Data.leiden_GRAPH, Data.eter_GRAPH,
              Data.orgreg_GRAPH, Data.orgref_GRAPH, Data.h2020_GRAPH, Data.grid_GRAPH]
    names = ["\n>>> LEIDEN", "\n>>> ETER", "\n>>> ORGREG", "\n>>> ORGREG", "\n>>> H2020", "\n>>> GRID"]
    results = ["", "", "", "", "", ""]
    size = 0
    excel = Builder.StringIO()

    # QUERY LOOP
    for i in range(0, 6):
        start = time.time()
        query = netherlands.format(graphs[i], countries[i], query_filter)
        # print query
        # Qr.display_result(query=query, spacing=50, limit=5, is_activated=True)
        results[i] = Qr.sparql_xml_to_matrix(query)
        temp_size = dict(results[i])["result"].__len__() - 1
        elapsed = str(datetime.timedelta(seconds=time.time() - start))
        sofar = str(datetime.timedelta(seconds=time.time() - begining))
        print "{} {} in {} and so far in [{}]".format(names[i], temp_size, elapsed, sofar)

        if temp_size > size:
            size = temp_size

    print "\n>>> MAX SIZE {}".format(size)

    for row in range(1, size + 1):
        excel.write(str(row) + "\t")

        # GO THROUGH EATCH RESULT
        for i in range(0, 6):
            query_results = dict(results[i])["result"]
            if row < len(query_results):
                elt = "{}\t{}".format(query_results[row][0], (query_results[row][1]).replace("\t", ""))
                excel.write(elt + "\t") if i < 5 else excel.write(elt + "\n")
            else:
                excel.write("\t\t") if i < 5 else excel.write("\t\t\n")

        # SAMPLE
        if row == 100:
            print "\n", excel.getvalue()
        #     break

    with open(name=file_path, mode="wb") as writer:
        writer.write(excel.getvalue())