Exemplos de BioBayesGraph.get_node_by_name em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: BioBayesGraph.BioBayesGraph

Classe / Tipo: BioBayesGraph

Método / Função: get_node_by_name

Exemplos em hotexamples.com: 2

BioBayesGraph.get_node_by_name em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de BioBayesGraph.BioBayesGraph.BioBayesGraph.get_node_by_name em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

populate_from_phyloxml(5)

set_node_variable_domains(3)

add_prob_dist(2)

set_node_probability_dist(2)

populate_from_newick(2)

populate_from_go_obo_xml(2)

import_from_graphml(2)

get_node_by_name(2)

get_node_variable_count(1)

get_node_variable_domain(1)

get_name_by_node(1)

inference_query(1)

inference_query_leave_one_out(1)

iterinternalnodes(1)

iterleafnodes(1)

export_as_graphml(1)

create_inference_representation(1)

clear_all_evidence(1)

set_node_auxiliary_information(1)

add_virtual_evidence(1)

add_hard_evidence(1)

Métodos Frequentes

populate_from_phyloxml (5)

set_node_variable_domains (3)

add_prob_dist (2)

set_node_probability_dist (2)

populate_from_newick (2)

populate_from_go_obo_xml (2)

import_from_graphml (2)

get_node_by_name (2)

get_node_variable_count (1)

get_node_variable_domain (1)

Métodos Frequentes

get_name_by_node (1)

inference_query (1)

inference_query_leave_one_out (1)

iterinternalnodes (1)

iterleafnodes (1)

export_as_graphml (1)

create_inference_representation (1)

clear_all_evidence (1)

set_node_auxiliary_information (1)

add_virtual_evidence (1)

add_hard_evidence (1)

Métodos Frequentes

add_hard_evidence (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_phylogeny_inference.py Projeto: midnighteuler/BioBayesGraph

class Test_inference(unittest.TestCase): """ Test class for creating graphical model scaffolds from phylogeny files """ def setUp(self): """ Loads a phylogeny. """ phylo_file = os.path.dirname(os.path.realpath(__file__)) + "/example_data/Asp_protease_2.xml" self.phylo_graph = BioBayesGraph() self.graph = self.phylo_graph.populate_from_phyloxml(phylo_file) # Incorporates the code for the ProbDist1 class into the graph class ProbDist1(object): def __init__(self, graph, node, node_to_name_map): # graph, node are respectively: # http://projects.skewed.de/graph-tool/doc/graph_tool.html#graph_tool.Graph # http://projects.skewed.de/graph-tool/doc/graph_tool.html#graph_tool.Vertex # node_to_name_map is a python dictionary in which # any named node's index (can get by int(node_of_interest)) # will map to the phylogenetic name associated. (If exists) self.graph = graph self.node = node self.name_to_node_map = node_to_name_map def compute_virtual_likelihood(self, vals, auxiliary_info): # "vals" is vector of the particular values this node # is taking. # # "auxiliary_info" is the custom information provided # when the virtual evidence was specified. return 1 def compute_pd(self, vals): # Returns the conditional probability for this node at vals. # Get parent node(s): parents = [] for p_node in self.node.in_neighbours(): parents.append(int(p_node)) # Note that you shape this depending on node location and # other properties in the graph. # Also, you can store computations into class-wide variables # (e.g. ClassName.var_to_store) to cache computations. You # could also declare the variable being stored to as global. return 1 self.phylo_graph.add_prob_dist(prob_dist_class=ProbDist1) # Sets all nodes to have two, variables # first with 3 values, second with two values. for node in self.graph.vertices(): node_index = int(node) # Each node has v1, v2 self.phylo_graph.set_node_variable_count(node_index=node_index, num_vars=2) # v1 \in {0,1,2}, v2 \in {0,1} self.phylo_graph.set_node_variable_domains(node_index=node_index, var_domains=[(0, 1, 2), (0, 1)]) # Use the same probability dist (defined in the class above) self.phylo_graph.set_node_probability_dist(node_index=node_index, prob_dist_class="ProbDist1") def testInference(self): """ Runs a query using libdai. """ # Creates one "hard" observation, and one "virtual" observation self.phylo_graph.clear_all_evidence() self.phylo_graph.add_hard_evidence( node_index=self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"), observed_value=(0, 1) # v1 = 0, v2 = 1 ) self.phylo_graph.add_virtual_evidence( node_index=self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"), observed_value=(2, 0), # v1 = 2, v2 = 0 auxiliary_info={"custom_info"}, # info provided to likelihood function ) # phylo_graph.remove_evidence_at_node(node_index=phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299")) self.phylo_graph.create_inference_representation() query_nodes = [ self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"), # Some other node self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"), # Set as virtual observation above self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"), ] # Set as hard observation above q_results = self.phylo_graph.inference_query(query_nodes=query_nodes) expected = { self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"): ((0, 0), 0.166666666667), self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"): ((0, 1), 1.0), self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"): ((0, 1), 0.166666666667), } for qn, marginals in q_results.iteritems(): print "For node", self.phylo_graph.get_name_by_node(qn) for var_val, marg_val in marginals: print var_val, ":", marg_val if var_val == expected[qn][0]: self.assertAlmostEqual(marg_val, expected[qn][1]) def testLeaveOneOut(self): """ Tests leave-one-out inference looping """ # Creates one "hard" observation, and one "virtual" observation self.phylo_graph.clear_all_evidence() self.phylo_graph.add_hard_evidence( node_index=self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"), observed_value=(0, 1) # v1 = 0, v2 = 1 ) self.phylo_graph.add_virtual_evidence( node_index=self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"), observed_value=(2, 0), # v1 = 2, v2 = 0 auxiliary_info={"custom_info"}, # info provided to likelihood function ) q_results = self.phylo_graph.inference_query_leave_one_out() for qn, left_out_results in q_results.iteritems(): print "For node", self.phylo_graph.get_name_by_node(qn) pprint(left_out_results) print "------\n" query_nodes = [ self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"), # Some other node self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"), # Set as virtual observation above self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"), ] # Set as hard observation above q_results = self.phylo_graph.inference_query(query_nodes=query_nodes) expected = { self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"): ((0, 0), 0.166666666667), self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"): ((0, 1), 1.0), self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"): ((0, 1), 0.166666666667), } for qn, marginals in q_results.iteritems(): print "For node", self.phylo_graph.get_name_by_node(qn) for var_val, marg_val in marginals: print var_val, ":", marg_val if var_val == expected[qn][0]: self.assertAlmostEqual(marg_val, expected[qn][1]) print "------\n" q_results = self.phylo_graph.inference_query_leave_one_out() for qn, left_out_results in q_results.iteritems(): print "For node", self.phylo_graph.get_name_by_node(qn) pprint(left_out_results) print "------\n" assert 1 == 2

Exemplo n.º 2

0

Exibir arquivo

Arquivo: Sifter2.py Projeto: midnighteuler/BioBayesGraph

class EvidenceProcessor(object): ''' This is the SIFTER 2.0 evidence handling method. ''' def __init__(self, processor_settings): ''' For SIFTER 2.0, the molecular function gene ontology is loaded into a graph. ''' self.evidence_ontology = BioBayesGraph() self._load_go_ontology(go_file=processor_settings['go_file'], go_format=processor_settings['go_format']) def parse_evidence(self, evidence_file, evidence_format, evidence_constraints): ''' Routing function to parse evidence from different format sources. Doesn't process the evidence; only parses the file. ''' if evidence_format == 'pli': go_ev_set = pli_parser.parser(\ evidence_file=evidence_file, evidence_constraints=evidence_constraints) return go_ev_set else: raise Exception, "Evidence format requested isn't supported." def process_evidence(self, evidence_set, evidence_constraints): '''os.devnull Using the parsed evidence, this places the evidence set and modifies the gene ontology graph in the SIFTER 2.0 way. ''' # For each protein in the evidence set, store the annotation # into the evidence graph go_terms = set([]) for pid_json, annot_json in evidence_set.iteritems(): p_ev_set = json.loads(annot_json['evidence_set']) for go_term, moc in p_ev_set: go_terms.add(go_term) annotated_term_nodes = {} for go_term in go_terms: g_node = self.evidence_ontology.get_node_by_name(go_term) if g_node is None: raise Exception, "GO term, %s doesn't seem to be named in your ontology."%go_term annotated_term_nodes[go_term] = self.evidence_ontology.get_node_by_name(go_term) go_subdag = self._get_ontology_subdag(annotated_term_nodes=annotated_term_nodes) #self._visualize_ontology_subdag(go_subdag, "./sub_dag.pdf") processed_ev_set = {} # Now for each protein, add the graphical model evidence for pid_json, annot_json in evidence_set.iteritems(): p_ev_set = json.loads(annot_json['evidence_set']) processed_ev_set[pid_json] = self._distribute_evidence_to_subdag_leaves(\ sub_dag=go_subdag, evidence_constraints=evidence_constraints, protein_evidence_set=p_ev_set) return processed_ev_set def _get_ontology_subdag(self, annotated_term_nodes): """ Given evidence_set, returns a filtered subgraph of self.evidence_ontology that only contains those nodes or their ancestors. """ # For each annotated node, traverse to the root node of the ontology # to include all its less-specific terms all_term_nodes = set([]) for go_term, annot_term in annotated_term_nodes.iteritems(): #print "Tracing:", annot_term, "which is", go_term for generic_term in self._trace_to_ontology_root(self.evidence_ontology.g.vertex(annot_term)): #print "is_a", self.evidence_ontology.g.vertex_properties['go_id'][generic_term],\ # "i.e.", self.evidence_ontology.g.vertex_properties['go_name'][generic_term] all_term_nodes.add(generic_term) sub_dag = graph_tool.GraphView(self.evidence_ontology.g, vfilt=lambda v: v in all_term_nodes) return sub_dag def _trace_to_ontology_root(self, cur_node): """ Generator to recursively visit all nodes on each path from a node up to the root node. """ #print "Graph node:", cur_node yield cur_node for edge_in in cur_node.out_edges(): if self.evidence_ontology.g.edge_properties['edge_type'][edge_in] == 'is_a': for n in self._trace_to_ontology_root(edge_in.target()): yield n def _get_top_node(self, sub_dag): """ Gives the root node of the sub dag. """ for c in sub_dag.vertices(): if c.out_degree() == 0: return c return None def _get_leaves_from_node(self, sub_dag, top_node): descendant_leaves = set() #print "Top node is: %s"%str(top_node) #print "Successors: %s"%str(godag.successors(top_node)) for c in top_node.in_neighbours(): #print "Out degree is: %i"%godag.out_degree(c) if not(c.in_degree() == 0): descendant_leaves = descendant_leaves.union(self._get_leaves_from_node(sub_dag, c)) else: descendant_leaves.add(c) return descendant_leaves def _visualize_ontology_subdag(self, sub_dag, output_file): """ Draws sub-dag to file. """ #http://projects.skewed.de/graph-tool/doc/search_module.html?highlight=leaf #gprops={'forcelabels':'true'}, #vprops={'label':sub_dag.vertex_properties['go_id'],}, #'xlabel':sub_dag.vertex_properties['go_name']}, #vcolor='#00FF00' pos = graph_tool.draw.graphviz_draw(sub_dag, size=(30,30), ratio="fill", layout="dot", vprops={'label':sub_dag.vertex_properties['go_id'],}, #'xlabel':sub_dag.vertex_properties['go_name']}, output="/dev/null/tmp.pdf") return graph_tool.draw.graph_draw(sub_dag, pos=pos, vertex_text=sub_dag.vertex_properties['go_id'], vertex_font_size=8, nodesfirst=True, #vertex_shape="double_circle", vertex_fill_color="#729fcf", vertex_pen_width=3, output=output_file) def _distribute_evidence_to_subdag_leaves(self, sub_dag, protein_evidence_set, evidence_constraints): """ Propagates the evidence in protein_evidence_set over sub_dag and returns a dictionary of {go_term: probability} by distributing the evidence in the SIFTER 2.0 way. """ def prob_or(p1, p2): return 1.0 - (1.0 - p1) * (1 - p2) def binomial(n, k): bc = [1 for i in range(0, k + 1)] for j in range(1, n - k + 1): for i in range(1, k + 1): bc[i] = bc[i - 1] + bc[i] return bc[k] def probability_of_observing_k_nodes(r_value, k): if (k == 0): return 1.0 / r_value prob = 0 for i in range(1, k + 1): prob = prob + binomial(k, i) * 1 / (r_value ** (i)) return prob def calculate_R_value(total_num_leaves): r_value = 1.0 / (2 ** (1.0 / total_num_leaves) - 1) #error_logger.debug("r_value: %f" % r_value) return r_value #print protein_evidence_set # Candidate function set = leaves starting from the root. root_node = self._get_top_node(sub_dag) candidate_fcns = [sub_dag.vertex_properties['go_id'][k] \ for k in self._get_leaves_from_node(sub_dag, root_node)] # Set initial probabilities in DAG for evidence provided by this protein go_term_likelihoods = {sub_dag.vertex_properties['go_id'][k]: \ {'likelihood':0, 'dag_vertex_id':int(k)} for k in sub_dag.vertices()} for go_term, ev_method in protein_evidence_set: dag_node = self.evidence_ontology.get_node_by_name(go_term) go_term_likelihoods[go_term]['likelihood'] = \ prob_or(go_term_likelihoods[go_term]['likelihood'], evidence_constraints[ev_method]) #error_logger.debug("Used %i piece(s) of evidence (%s) to set initial belief to %f for %s" % (len(ev_methods), str(ev_methods), dag_node_descriptor.likelihood, go_term)) # Now for any that are ancestral, propagate the probabilities down in a wonky way r_value = calculate_R_value(len(candidate_fcns)) for go_term, ev_method in protein_evidence_set: dag_node = sub_dag.vertex(go_term_likelihoods[go_term]['dag_vertex_id']) # Skip if is leaf if dag_node.out_degree() == 0: continue descendant_leaf_set = self._get_leaves_from_node(sub_dag, dag_node) #error_logger.debug(" For: %s leaves descendant from this node: %s" % (go_num, descendant_leaf_set)) # Propagate evidence to leaf nodes parent_prob = go_term_likelihoods[go_term]['likelihood'] transmission_coeff = probability_of_observing_k_nodes(r_value, 0) \ / probability_of_observing_k_nodes(r_value, len(descendant_leaf_set)) for leaf_node in descendant_leaf_set: old_likelihood = go_term_likelihoods[sub_dag.vertex_properties['go_id'][k]]['likelihood'] new_likelihood = prob_or(old_likelihood, parent_prob * transmission_coeff) # Store update go_term_likelihoods[sub_dag.vertex_properties['go_id'][k]]['likelihood'] = new_likelihood #error_logger.debug(" Distributed prob to: %s. Child's likelihood went from: %s to %s" % (leaf_node.goid, old_likelihood, evidence_go_num_dict[leaf_node.goid].likelihood)) # This step is performed in Java code, and has the effect of making all # likelihoods non-zero, though underlying reason for doing this is unknown. # error_logger.debug("Leaf Likelihoods before synchronizing: ") # for leaf_go_num in candidate_functions: # error_logger.debug("Fcn: %s, Likelihood: %.16f" % (leaf_go_num, evidence_go_num_dict[leaf_go_num].likelihood)) def synchronize_likelihoods(leaf_go_nums, r_value, evidence_nodes): prob = 0.0 # Calculate probability of observing subset of power set having size of leaf set # Note, this isn't equivalent to: leaf_subset_prior = # probability_of_observing_k_nodes(len(leaf_go_nums)) num_leaves = len(leaf_go_nums) leaf_subset_prior = 0 for i in range(1, num_leaves + 1): leaf_subset_prior = leaf_subset_prior + binomial(num_leaves - 1, i) * 1 / (r_value ** (i)) # Calculate likelihood of ANY leaf likelihood_of_any_leaf = 0.0 for leaf_go_num in leaf_go_nums: likelihood_of_any_leaf = prob_or(likelihood_of_any_leaf, evidence_nodes[leaf_go_num]['likelihood']) # Not entirely sure what's going on here: # Translated from "synchronizeLikelihoods() in PFunGODAG.java. not_in_a_subset_prior = (1.0 - likelihood_of_any_leaf) * leaf_subset_prior for leaf_go_num in leaf_go_nums: current_likelihood = evidence_nodes[leaf_go_num]['likelihood'] new_likelihood = prob_or(current_likelihood, not_in_a_subset_prior) evidence_nodes[leaf_go_num]['likelihood'] = new_likelihood synchronize_likelihoods(candidate_fcns, r_value, go_term_likelihoods) #error_logger.debug("Leaf Likelihoods after synchronizing: ") #for leaf_go_num in candidate_fcns: # error_logger.debug("Fcn: %s, Likelihood: %.16f" % (leaf_go_num, evidence_go_num_dict[leaf_go_num].likelihood)) # Again, this step is performed in Java code and makes all likelihoods # non-zero, though underlying reason for doing this is unknown. def a_priori_evidence(leaf_go_nums, r_value, evidence_nodes): total = 1.0 count_of_unlikely_leaves = 0 total_num_leaves = len(leaf_go_nums) for leaf_go_num in leaf_go_nums: leaf_likelihood = evidence_nodes[leaf_go_num]['likelihood'] if (leaf_likelihood > 0): total = total * leaf_likelihood else: count_of_unlikely_leaves = count_of_unlikely_leaves + 1 if (count_of_unlikely_leaves > 0): rest = (1.0 / (r_value ** total_num_leaves)) / total a = rest ** (1.0 / count_of_unlikely_leaves) for leaf_go_num in leaf_go_nums: leaf_likelihood = evidence_nodes[leaf_go_num]['likelihood'] # For each zero likelihood, we want to fudge factor a bit. if (leaf_likelihood <= 0): leaf_likelihood = a evidence_nodes[leaf_go_num]['likelihood'] = leaf_likelihood a_priori_evidence(candidate_fcns, r_value, go_term_likelihoods) #error_logger.debug("Leaf Likelihoods after a_priori_evidence: ") #for leaf_go_num in candidate_functions: # error_logger.debug("Fcn: %s, Likelihood: %.16f" % (leaf_go_num, evidence_go_num_dict[leaf_go_num].likelihood)) #error_logger.debug("------------- Done computing leaf likelihoods") return {k:go_term_likelihoods[k]['likelihood'] for k in candidate_fcns} def _load_go_ontology(self, go_file, go_format='oboxml'): """ """ if go_format == 'oboxml': obo_from_gzip = gzip.open(go_file, 'rb') # Ontology aspect can be one of: # [u'molecular_function', u'cellular_component', u'biological_process'] graph = self.evidence_ontology.populate_from_go_obo_xml(\ obo_file_buffer=obo_from_gzip, ontology_aspect='molecular_function') obo_from_gzip.close() elif go_format == 'biobayesgraph': self.evidence_ontology.import_from_graphml(go_file)