Пример #1
0
def filter_ud(graph, blacklist):
    not_words = ["no", "not", "nicht", "kein"]
    edges = [edge for edge in graph.edges(data=True)]
    cond_nodes = []
    for in_node, out_node, t in edges:
        if t["color"] == "case" and out_node.split("_")[0] in blacklist:
            for in_, out_, t_ in edges:
                if t_["color"] == "nmod" and (in_ == in_node
                                              or out_ == in_node):
                    cond_nodes.append(in_node)
        if in_node.split("_")[0] in not_words or out_node.split(
                "_")[0] in not_words:
            cond_nodes.append(in_node)

    to_delete = []

    for cond_node in cond_nodes:
        for node in graph.nodes():
            if cond_node in graph and node in graph:
                if algorithms.has_path(graph, cond_node, node):
                    to_delete.append(node)

    for node in to_delete:
        if node in graph.nodes(default=None):
            graph.remove_node(node)
Пример #2
0
def get_contigs(G, start_node_list, end_node_list):
    """Get all paths from input to output nodes : returns a list of contigs and their size"""
    contigs = []
    for source in start_node_list:
        for target in end_node_list:
            if algorithms.has_path(G, source, target) == True:
                path = algorithms.shortest_path(G, source, target)
                contig = path[0]
                for i in range(len(path) - 1):
                    contig += path[i + 1][-1]
                contigs.append((contig, len(contig)))
    return contigs
Пример #3
0
def get_contigs(graph, list_start_node, list_end_node):
    '''takes a graph, a list of entry nodes and a list of exit nodes
    and returns a list of tuple (contig, contig_size)
    '''
    contigs = []
    for source in list_start_node:
        for target in list_end_node:
            if algorithms.has_path(graph, source, target) == True:
                path = algorithms.shortest_path(graph, source, target)
                contig = path[0]
                for i in range(len(path) - 1):
                    contig += path[i + 1][-1]
                contigs.append((contig, len(contig)))
        return contigs
Пример #4
0
    def asim_jac_nodes_with_backup(self, graph_premise, graph_hypothesis):
        """
        Asymmetric Jaccard similarity between the nodes of the definition graphs, if the score is not 1 it calculates
        the asymmetric Jaccard similarity between the edges without the hypothesis root node
        :param graph_premise: the definition graph of the premise
        :param graph_hypothesis: the definition graph of the hypothesis
        :return: the ratio of overlapping nodes per the length of the hypothesis definition
        """
        node_score = self.asim_jac_nodes(graph_premise, graph_hypothesis)
        edge_score = 0
        if 0.0 < node_score < 1.0:
            root = graph_hypothesis.d_clean(
                graph_hypothesis.root).split("_")[0]
            if root in graph_premise.get_nodes():
                root_id = [
                    node for node in graph_premise.G.nodes()
                    if self.clear_node(node) == root
                ][0]
                graph_premise_only_zero = copy.deepcopy(graph_premise)

                delete_list = []

                for edge in graph_premise_only_zero.G.adj.items():
                    for output_node in edge[1].items():
                        inner_delete_list = []
                        for edge_type in output_node[1].items():
                            if edge_type[1]["color"]:
                                inner_delete_list.append(edge_type[0])
                        for inner_del in inner_delete_list:
                            del output_node[1]._atlas[inner_del]
                        if len(output_node[1]) < 1:
                            delete_list.append(output_node[0])
                    for to_del in delete_list:
                        if to_del in edge[1]._atlas:
                            del edge[1]._atlas[to_del]
                try:
                    if algorithms.has_path(graph_premise_only_zero.G,
                                           graph_premise.root, root_id):
                        return 1.0
                except Exception as e:
                    print("Error occured:", e)
            graph_hypothesis_wo_root = copy.deepcopy(graph_hypothesis)
            graph_hypothesis_wo_root.G.remove_node(
                graph_hypothesis_wo_root.root)
            #edge_score = self.asim_jac_edges(graph_premise, graph_hypothesis_wo_root)
            return self.asim_jac_edges(graph_premise, graph_hypothesis_wo_root)
        #return max([node_score, edge_score])
        return node_score
Пример #5
0
    def filter_graph(self, condition):
        nodes = self.G.nodes(default=None)
        cond_nodes = []
        to_delete = []
        for node in nodes:
            cl = self.d_clean(node)
            if condition == cl.split("_")[0]:
                cond_nodes.append(node)

        for cond_node in cond_nodes:
            for node in nodes:
                if cond_node in self.G and node in self.G:
                    if algorithms.has_path(self.G, cond_node, node):
                        to_delete.append(node)

        for node in to_delete:
            if node in self.G.nodes(default=None):
                self.G.remove_node(node)
Пример #6
0
 def blacklisting(self, graph):
     one_two_blacklist = ["A", "a", "b", "B"]
     for adj in graph.G._adj.values():
         for a in adj.items():
             if {'color': 2} in a[1].values():
                 new_blacklist_item = a[0]
                 for node in graph.G.nodes:
                     if algorithms.has_path(graph.G, new_blacklist_item,
                                            node):
                         blacklist_node = graph.d_clean(node)
                         if blacklist_node != graph.root:
                             one_two_blacklist.append(
                                 blacklist_node.split('_')[0])
                 new_blacklist_item = graph.d_clean(new_blacklist_item)
                 if new_blacklist_item != graph.root:
                     one_two_blacklist.append(
                         new_blacklist_item.split('_')[0])
     return one_two_blacklist
Пример #7
0
    def whitelisting(self, graph):
        whitelist = [graph.root]
        zero_graph = copy.deepcopy(graph)
        delete_list = []
        for edge in zero_graph.G.adj.items():
            for output_node in edge[1].items():
                inner_delete_list = []
                for edge_type in output_node[1].items():
                    if edge_type[1]["color"]:
                        inner_delete_list.append(edge_type[0])
                for inner_del in inner_delete_list:
                    del output_node[1]._atlas[inner_del]
                if len(output_node[1]) < 1:
                    delete_list.append(output_node[0])
            for to_del in delete_list:
                if to_del in edge[1]._atlas:
                    del edge[1]._atlas[to_del]

        for node in zero_graph.G.nodes():
            if algorithms.has_path(zero_graph.G, graph.root, node):
                whitelist.append(node)
        whitelist.append(graph.root)
        return whitelist
Пример #8
0
 def find_complexSV(self):
     
     pool = []
     nodes = self.graph.nodes()
     for n1 in nodes:
         for n2 in nodes:
             if n1 == n2:
                 continue
             pre = has_path(self.graph, n1, n2)
             if pre:
                 # do not consider edge weight
                 paths = list(all_shortest_paths(self.graph, n1, n2, weight=None))
                 for p in paths:
                     if not self._containloop(p):
                         pool.append((len(p), p))
     pool.sort(reverse=True)
     
     # so far, the candidate paths contain no self-loops, but are still redundant
     # check distance-decay for each pair of regions
     queue = [(self.clr, self._change_format(p[1]), self.span, self.balance_type, p[1], self.protocol) for p in pool]
     log.info('Filtering {0} redundant candidates ...'.format(len(queue)))
     jobs = Parallel(n_jobs=self.n_jobs, verbose=10)(delayed(filterAssembly)(*i) for i in queue)
     pre_alleles = []
     for ck, p in jobs:
         if ck:
             pre_alleles.append(p)
     
     # these assembly should exist within the same allele
     alleles = []
     for p in pre_alleles:
         for v in alleles:
             if self._issubset(p, v) or self._issubset(p, self._getreverse(v)):
                 break
         else:
             alleles.append(p)
     
     self.alleles = alleles
Пример #9
0
def answer_quest(q, talker):
    '''
  given question q, interacts with talker and returns
  its best answers
  '''
    max_answers = talker.params.max_answers
    db = talker.db
    sent_data, l2occ = db

    unknowns = []
    q_lemmas = []
    if talker.params.with_answerer:
        answerer = Talker(from_text=q)
        q_sent_data, _ = answerer.db
        for j, q_lemma in enumerate(q_sent_data[0][LEMMA]):
            q_sent_data, q_l2occ = answerer.db
            q_tag = q_sent_data[0][TAG][j]
            if q_tag[0] not in "NVJ": continue  # ppp(q_lemma,q_tag)
            q_lemmas.append((q_lemma, wn_tag(q_tag)))
    else:
        answerer = None
        from nltk.tokenize import word_tokenize
        from nltk.stem import WordNetLemmatizer
        wnl = WordNetLemmatizer()
        toks = word_tokenize(q)
        tag = None
        for t in toks:
            tag = 'n'
            l = wnl.lemmatize(t, tag)
            if l == t:
                tag = 'v'
                l = wnl.lemmatize(t, tag)
            if l == t:
                tag = 'a'
                l = wnl.lemmatize(t, tag)
            l = l.lower()
            q_lemmas.append((l, tag))

    matches = []
    nears = []
    sharesDict = defaultdict(set)
    count = defaultdict(int)

    for q_lemma, wn_q_tag in q_lemmas:
        if not good_word(q_lemma) or q_lemma in ".?": continue

        #  actual QA starts here
        ys = l2occ.get(q_lemma)

        if ys:
            matches.append(q_lemma)
            for sent, _pos in ys:
                sharesDict[sent].add(q_lemma)
                count[q_lemma] += 1
        else:
            if talker.params.expand_query > 0:
                related = wn_all(talker.params.expand_query, 3, q_lemma,
                                 wn_q_tag)
                for r_lemma in related:
                    if not good_word(q_lemma): continue
                    zs = l2occ.get(r_lemma)
                    if not zs:
                        tprint("UNKNOWNS:", q_lemma, '\n')
                        continue
                    nears.append(r_lemma)
                    tprint('EXPANDED:', q_lemma, '-->', r_lemma)
                    sharesDict[sent].add(r_lemma)
                    count[r_lemma] += 1

    print('count:', count)

    ignored = []
    for lemma in count:
        if (count[lemma] > 3):
            ignored.append(lemma)

    print('ignored:', ignored)

    lavg = talker.avg_len

    best = []
    for id in sharesDict:
        sent = sent_data[id][SENT]
        lsent = len(sent)
        if lsent > 2 * lavg:
            sharedNum = len(sharesDict[id])
            if sharedNum == 1:
                shares = list(sharesDict[id])
                if shares[0] in ignored:
                    continue
        r = 0
        for key in matches:
            if (key in ignored):
                if (key in sharesDict[id]):
                    r += 1.0
                continue

            if (nxAlg.has_path(talker.g, key, id)):
                nodes = nxAlg.shortest_path(talker.g, key, id)
                if (len(nodes) < 6):
                    n = math.pow(2, len(nodes) - 1)
                    r += 16.0 / n

        for key in nears:
            if (key in ignored):
                if (key in sharesDict[id]):
                    r += 0.5
                continue

            if (nxAlg.has_path(talker.g, key, id)):
                nodes = nxAlg.shortest_path(talker.g, key, id)
                print('****************nears, key:id=', key, ':', id,
                      ', get nodes, length:', len(nodes), 'nodes:', nodes)
                if (len(nodes) < 6):
                    n = math.pow(2, len(nodes) - 1)
                    r += 8.0 / n
        best.append((r, id, sharesDict[id], sent))

    best.sort(reverse=True)

    answers = []
    last_rank = 0
    for i, b in enumerate(best):
        if i >= max_answers: break
        #ppp(i,b)
        rank, id, shared, sent = b
        if last_rank != 0:
            if rank / last_rank < 0.70: break
        last_rank = rank
        answers.append((id, sent, round(rank, 4), shared))
    return answers, answerer
Пример #10
0
    def hasPath(self,nodA,nodB):

        ''' See if a path exists between two nodes. '''
        
        return alg.has_path(self.graph,nodA,nodB)
    def hasPath(self, nodA, nodB):
        ''' See if a path exists between two nodes. '''

        return alg.has_path(self.graph, nodA, nodB)