def testPhylogenyFromNewick(self):
     """
     Tries to load a newick tree from file.
     """
     phylo_file = os.path.dirname(os.path.realpath(__file__)) + '/example_data/Asp_protease_2.nhx'
     phylo_graph = BioBayesGraph()
     graph = phylo_graph.populate_from_newick(phylo_file)
     
     self.assertTrue(("name" in graph.vertex_properties),
                     msg="Clade names not imported correctly.")
     
     self.assertTrue(("branch_length" in graph.edge_properties),
                     msg="Branch lengths not imported correctly.")
     
     bl_sum = 0.0
     for e in graph.edges():
         bl_sum += float(graph.edge_properties['branch_length'][e])
     
     self.assertTrue(expr=abs(bl_sum - 168.58699) < 1e-6, 
                      msg="Branch lengths not imported correctly "\
                         +"(sum is wrong)")
     
     self.assertEqual(graph.num_vertices(), 608,
                      "Didn't get expected number of nodes from phylogeny.")
     self.assertEqual(graph.num_edges(), 607,
                      "Didn't get expected number of nodes from phylogeny.")
 def testGraphLoad(self):
     """
     Loads graph
     """
     obo_graph = BioBayesGraph()
     obo_graph.import_from_graphml(os.path.dirname(os.path.realpath(__file__)) + "/example_data/MF.bbg")
     
     for k in self.node_prop_types.iterkeys():
         self.assertTrue((k in obo_graph.g.vertex_properties),
                         msg="%s not imported correctly."%k)
 def testExportFormats(self):
     """
     Tries to create a graph from a phylogeny and export it to
     GraphML and other formats.
     """
     phylo_graph = BioBayesGraph()
     phylo_file = os.path.dirname(os.path.realpath(__file__)) + '/example_data/Asp_protease_2.xml'
     tmp_file = StringIO()
     try:
         phylo_graph.populate_from_phyloxml(phylo_file)
         phylo_graph.export_as_graphml(tmp_file) 
     finally:
         tmp_file.close()
 def setUp(self):
     """
     Loads a phylogeny. 
     """
     phylo_file = os.path.dirname(os.path.realpath(__file__)) + "/example_data/Asp_protease_2.xml"
     self.phylo_graph = BioBayesGraph()
     self.graph = self.phylo_graph.populate_from_phyloxml(phylo_file)
Exemplo n.º 5
0
 def __init__(self, processor_settings):
     '''
     For SIFTER 2.0, the molecular function gene ontology is
     loaded into a graph.
     '''
     self.evidence_ontology = BioBayesGraph()
     self._load_go_ontology(go_file=processor_settings['go_file'],
                            go_format=processor_settings['go_format'])
    def setUp(self):
        """
        Loads a phylogeny. 
        """
        phylo_file = os.path.dirname(os.path.realpath(__file__)) + "/example_data/Asp_protease_2.xml"
        self.phylo_graph = BioBayesGraph()
        self.graph = self.phylo_graph.populate_from_phyloxml(phylo_file)

        # Incorporates the code for the ProbDist1 class into the graph
        class ProbDist1(object):
            def __init__(self, graph, node, node_to_name_map):
                # graph, node are respectively:
                #   http://projects.skewed.de/graph-tool/doc/graph_tool.html#graph_tool.Graph
                #   http://projects.skewed.de/graph-tool/doc/graph_tool.html#graph_tool.Vertex
                # node_to_name_map is a python dictionary in which
                # any named node's index (can get by int(node_of_interest))
                # will map to the phylogenetic name associated. (If exists)
                self.graph = graph
                self.node = node
                self.name_to_node_map = node_to_name_map

            def compute_virtual_likelihood(self, vals, auxiliary_info):
                # "vals" is vector of the particular values this node
                # is taking.
                #
                # "auxiliary_info" is the custom information provided
                # when the virtual evidence was specified.
                return 1

            def compute_pd(self, vals):
                # Returns the conditional probability for this node at vals.

                # Get parent node(s):
                parents = []
                for p_node in self.node.in_neighbours():
                    parents.append(int(p_node))

                # Note that you shape this depending on node location and
                # other properties in the graph.
                # Also, you can store computations into class-wide variables
                # (e.g. ClassName.var_to_store) to cache computations. You
                # could also declare the variable being stored to as global.
                return 1

        self.phylo_graph.add_prob_dist(prob_dist_class=ProbDist1)

        # Sets all nodes to have two, variables
        # first with 3 values, second with two values.
        for node in self.graph.vertices():
            node_index = int(node)
            # Each node has v1, v2
            self.phylo_graph.set_node_variable_count(node_index=node_index, num_vars=2)
            # v1 \in {0,1,2}, v2 \in {0,1}
            self.phylo_graph.set_node_variable_domains(node_index=node_index, var_domains=[(0, 1, 2), (0, 1)])
            # Use the same probability dist (defined in the class above)
            self.phylo_graph.set_node_probability_dist(node_index=node_index, prob_dist_class="ProbDist1")
 def testOntologyFromGOOBO(self):
     """
     Tries to load a GO OBO ontology from file.
     """
     obo_graph = BioBayesGraph()
     
     obo_file = os.path.dirname(os.path.realpath(__file__)) + '/example_data/go_daily-termdb.obo-xml.gz'
     obo_from_gzip = gzip.open(obo_file, 'rb')
     
     # Ontology aspect can be one of:
     # [u'molecular_function', u'cellular_component', u'biological_process']
     graph = obo_graph.populate_from_go_obo_xml(obo_file_buffer=obo_from_gzip,
                                                ontology_aspect='molecular_function')
     obo_from_gzip.close()
     
     
     
     for k in self.node_prop_types.iterkeys():
         self.assertTrue((k in graph.vertex_properties),
                         msg="%s not imported correctly."%k)
class Test_inference(unittest.TestCase):
    """
    Test class for creating graphical model scaffolds from phylogeny files
    """

    def setUp(self):
        """
        Loads a phylogeny. 
        """
        phylo_file = os.path.dirname(os.path.realpath(__file__)) + "/example_data/Asp_protease_2.xml"
        self.phylo_graph = BioBayesGraph()
        self.graph = self.phylo_graph.populate_from_phyloxml(phylo_file)

        # Incorporates the code for the ProbDist1 class into the graph
        class ProbDist1(object):
            def __init__(self, graph, node, node_to_name_map):
                # graph, node are respectively:
                #   http://projects.skewed.de/graph-tool/doc/graph_tool.html#graph_tool.Graph
                #   http://projects.skewed.de/graph-tool/doc/graph_tool.html#graph_tool.Vertex
                # node_to_name_map is a python dictionary in which
                # any named node's index (can get by int(node_of_interest))
                # will map to the phylogenetic name associated. (If exists)
                self.graph = graph
                self.node = node
                self.name_to_node_map = node_to_name_map

            def compute_virtual_likelihood(self, vals, auxiliary_info):
                # "vals" is vector of the particular values this node
                # is taking.
                #
                # "auxiliary_info" is the custom information provided
                # when the virtual evidence was specified.
                return 1

            def compute_pd(self, vals):
                # Returns the conditional probability for this node at vals.

                # Get parent node(s):
                parents = []
                for p_node in self.node.in_neighbours():
                    parents.append(int(p_node))

                # Note that you shape this depending on node location and
                # other properties in the graph.
                # Also, you can store computations into class-wide variables
                # (e.g. ClassName.var_to_store) to cache computations. You
                # could also declare the variable being stored to as global.
                return 1

        self.phylo_graph.add_prob_dist(prob_dist_class=ProbDist1)

        # Sets all nodes to have two, variables
        # first with 3 values, second with two values.
        for node in self.graph.vertices():
            node_index = int(node)
            # Each node has v1, v2
            self.phylo_graph.set_node_variable_count(node_index=node_index, num_vars=2)
            # v1 \in {0,1,2}, v2 \in {0,1}
            self.phylo_graph.set_node_variable_domains(node_index=node_index, var_domains=[(0, 1, 2), (0, 1)])
            # Use the same probability dist (defined in the class above)
            self.phylo_graph.set_node_probability_dist(node_index=node_index, prob_dist_class="ProbDist1")

    def testInference(self):
        """
        Runs a query using libdai. 
        """
        # Creates one "hard" observation, and one "virtual" observation
        self.phylo_graph.clear_all_evidence()

        self.phylo_graph.add_hard_evidence(
            node_index=self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"), observed_value=(0, 1)  # v1 = 0, v2 = 1
        )

        self.phylo_graph.add_virtual_evidence(
            node_index=self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"),
            observed_value=(2, 0),  # v1 = 2, v2 = 0
            auxiliary_info={"custom_info"},  # info provided to likelihood function
        )

        # phylo_graph.remove_evidence_at_node(node_index=phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"))
        self.phylo_graph.create_inference_representation()

        query_nodes = [
            self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"),  # Some other node
            self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"),  # Set as virtual observation above
            self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"),
        ]  # Set as hard observation above

        q_results = self.phylo_graph.inference_query(query_nodes=query_nodes)
        expected = {
            self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"): ((0, 0), 0.166666666667),
            self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"): ((0, 1), 1.0),
            self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"): ((0, 1), 0.166666666667),
        }

        for qn, marginals in q_results.iteritems():
            print "For node", self.phylo_graph.get_name_by_node(qn)
            for var_val, marg_val in marginals:
                print var_val, ":", marg_val
                if var_val == expected[qn][0]:
                    self.assertAlmostEqual(marg_val, expected[qn][1])

    def testLeaveOneOut(self):
        """
        Tests leave-one-out inference looping
        """
        # Creates one "hard" observation, and one "virtual" observation
        self.phylo_graph.clear_all_evidence()

        self.phylo_graph.add_hard_evidence(
            node_index=self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"), observed_value=(0, 1)  # v1 = 0, v2 = 1
        )

        self.phylo_graph.add_virtual_evidence(
            node_index=self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"),
            observed_value=(2, 0),  # v1 = 2, v2 = 0
            auxiliary_info={"custom_info"},  # info provided to likelihood function
        )

        q_results = self.phylo_graph.inference_query_leave_one_out()
        for qn, left_out_results in q_results.iteritems():
            print "For node", self.phylo_graph.get_name_by_node(qn)
            pprint(left_out_results)
        print "------\n"
        query_nodes = [
            self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"),  # Some other node
            self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"),  # Set as virtual observation above
            self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"),
        ]  # Set as hard observation above

        q_results = self.phylo_graph.inference_query(query_nodes=query_nodes)
        expected = {
            self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"): ((0, 0), 0.166666666667),
            self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"): ((0, 1), 1.0),
            self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"): ((0, 1), 0.166666666667),
        }

        for qn, marginals in q_results.iteritems():
            print "For node", self.phylo_graph.get_name_by_node(qn)
            for var_val, marg_val in marginals:
                print var_val, ":", marg_val
                if var_val == expected[qn][0]:
                    self.assertAlmostEqual(marg_val, expected[qn][1])
        print "------\n"
        q_results = self.phylo_graph.inference_query_leave_one_out()
        for qn, left_out_results in q_results.iteritems():
            print "For node", self.phylo_graph.get_name_by_node(qn)
            pprint(left_out_results)
        print "------\n"

        assert 1 == 2
Exemplo n.º 9
0
 def __init__(self):
     """
     Constructor
     """
     self.phylo_graph = BioBayesGraph()
     self.evidence_processors = {}
Exemplo n.º 10
0
class SIFTER(object):
    def __init__(self):
        """
        Constructor
        """
        self.phylo_graph = BioBayesGraph()
        self.evidence_processors = {}
        
    def load_phylogeny(self, phylo_file, phylo_format='phyloxml'):
        """
        """
        if phylo_format == 'phyloxml':
            self.phylo_graph.populate_from_phyloxml(phylo_file)
        elif phylo_format == 'newick':
            self.phylo_graph.populate_from_newick(phylo_file)
        else:
            raise Exception, "Phylo format requested isn't supported."
    
    
    def load_evidence_processor(self, evidence_type,
                        evidence_processor_class, processor_settings):
        '''
        Loads evidence processor to internal reference
        '''
        self.evidence_processors[evidence_type] = evidence_processor_class(processor_settings)
        
    def parse_evidence(self, evidence_type, evidence_file,
                            evidence_constraints, evidence_format):
        """
        """
        if not evidence_type in self.evidence_processors:
            raise Exception, "Evidence type requested doesn't have a handler."
        
        return self.evidence_processors[evidence_type].parse_evidence(\
                        evidence_file=evidence_file,
                        evidence_format=evidence_format,
                        evidence_constraints=evidence_constraints)
                        
    def setup_nodes(self, node_to_fcn_model_map):
        '''
        Node to fcn_model_map is a function mapping
        "vertex_id" to {
            'auxiliary_info':{'num_functions':num_fcns,
                              'max_num_simul':3},
            'prob_dist_class':prob_dist_class
        }
        E.g. 
        node_to_fcn_model_map = \
            lambda v: { \
            'auxiliary_info':{'num_functions':num_fcns,
                              'max_num_simul':3},
            'prob_dist_class':'FunctionModels.Sifter2.FunctionModel'
            }
        '''
        dist_fcn_classes = {}
        
        for n in self.phylo_graph.g.vertices():
            node_index = int(n)
            fcn_model_info = node_to_fcn_model_map(node_index)
            
            # Store auxiliary info by node
            self.phylo_graph.set_node_auxiliary_information(node_index=node_index,
                            auxiliary_info=fcn_model_info['auxiliary_info'])
            
            # Make an instance of the custom prob dist function
            dist_model = fcn_model_info['prob_dist_class']
            if dist_model.__name__ not in dist_fcn_classes:
                self.phylo_graph.add_prob_dist(prob_dist_class=dist_model)
                dist_fcn_classes[dist_model.__name__] = dist_model(None,None,None,None)
            dist_inst = dist_fcn_classes[dist_model.__name__]
            
            # Query the number of variables from the custom function
            self.phylo_graph.set_node_variable_count(node_index=node_index,
                                                     num_vars=1)
            # Query the domain of each variable from the custom function
            protein_states = [f for f in dist_inst.possible_protein_states(\
                    fcn_variants_cnt=fcn_model_info['auxiliary_info']['num_functions'],
                    max_fcn_cnt=fcn_model_info['auxiliary_info']['max_num_simul'])]
            
            self.phylo_graph.set_node_variable_domains(node_index=node_index,
                                                       var_domains=[protein_states])
            
            # Store the distribution function in the graph for the node.
            self.phylo_graph.set_node_probability_dist(node_index=node_index,
                            prob_dist_class=fcn_model_info['prob_dist_class'].__name__)
            
            
            
    def process_evidence(self, evidence_type, evidence_set, evidence_constraints):
        '''
        Incorporates evidence into the graph using the
        appropriate processor
        '''
        if not evidence_type in self.evidence_processors:
            raise Exception, "Evidence type requested doesn't have a handler."
        
        return self.evidence_processors[evidence_type].process_evidence(\
                        evidence_set=evidence_set,
                        evidence_constraints=evidence_constraints)
class Test_nodedistributionsetup(unittest.TestCase):
    """
    Test class for creating graphical model scaffolds from phylogeny files
    """

    def setUp(self):
        """
        Loads a phylogeny. 
        """
        phylo_file = os.path.dirname(os.path.realpath(__file__)) + "/example_data/Asp_protease_2.xml"
        self.phylo_graph = BioBayesGraph()
        self.graph = self.phylo_graph.populate_from_phyloxml(phylo_file)

    def testSetLeafNodeVariableInitialization(self):
        """
        Tries to set up graphical model leaf node variables properly. 
        """
        # Sets all nodes to have 3 variables each.
        # And defines the domain of each explicitly
        card_sum1 = 0.0
        for node_index in self.phylo_graph.iterleafnodes():
            # print graph.vertex_properties["name"][graph.vertex(node_index)]
            self.phylo_graph.set_node_variable_count(node_index=node_index, num_vars=3)
            # So var1 can take values in 0 or 1,
            # var2 can take values in {0,1,2,3}
            # and var3 can take values in {'a','b','c'}
            self.phylo_graph.set_node_variable_domains(
                node_index=node_index, var_domains=[(0, 1), (0, 1, 2, 3), ("a", "b", "c")]
            )
            for v_ind in range(self.phylo_graph.get_node_variable_count(node_index)):
                card_sum1 += len(self.phylo_graph.get_node_variable_domain(node_index, v_ind))
        self.assertEqual(card_sum1, 2745.0, "Leaf node variable cardinality sum check didn't pass")  # 305*(2+4+3)

    def testSetInternalNodeVariableInitialization(self):
        """
        Tries to set up graphical model internal node variables properly. 
        """
        card_sum2 = 0.0
        for node_index in self.phylo_graph.iterinternalnodes():
            # print graph.vertex_properties["name"][graph.vertex(node_index)]
            self.phylo_graph.set_node_variable_count(node_index=node_index, num_vars=3)
            # So var1 can take values in 0 or 1,
            # var2 can take values in {0,1,2,3}
            # and var3 can take values in {'a','b','c'}
            self.phylo_graph.set_node_variable_domains(node_index=node_index, var_domains=[(0, 1), (0, 1), (0, 1)])
            for v_ind in range(self.phylo_graph.get_node_variable_count(node_index)):
                card_sum2 += len(self.phylo_graph.get_node_variable_domain(node_index, v_ind))
        self.assertEqual(card_sum2, 1818.0, "Internal node variable cardinality sum check didn't pass")  # 303*(2+2+2)

    def testProbabilityDistInitialization(self):
        """
        Tries to set probability distributions for leaf nodes.
        """
        self.testSetLeafNodeVariableInitialization()

        card_sum1 = 0.0
        for node_index in self.phylo_graph.iterleafnodes():
            # print graph.vertex_properties["name"][graph.vertex(node_index)]
            self.phylo_graph.set_node_variable_count(node_index=node_index, num_vars=3)

            for v_ind in range(self.phylo_graph.get_node_variable_count(node_index)):
                card_sum1 += len(self.phylo_graph.get_node_variable_domain(node_index, v_ind))
        self.assertEqual(card_sum1, 2745.0, "Leaf node variable cardinality sum check didn't pass")  # 305*(2+4+3)
Exemplo n.º 12
0
class EvidenceProcessor(object):
    '''
    This is the SIFTER 2.0 evidence handling
    method.
    '''
    def __init__(self, processor_settings):
        '''
        For SIFTER 2.0, the molecular function gene ontology is
        loaded into a graph.
        '''
        self.evidence_ontology = BioBayesGraph()
        self._load_go_ontology(go_file=processor_settings['go_file'],
                               go_format=processor_settings['go_format'])
    
    def parse_evidence(self, evidence_file, evidence_format, evidence_constraints):
        '''
        Routing function to parse evidence from different format sources.
        Doesn't process the evidence; only parses the file.
        '''
        if evidence_format == 'pli':
            go_ev_set =  pli_parser.parser(\
                evidence_file=evidence_file,
                evidence_constraints=evidence_constraints)
            
            return go_ev_set
        else:
            raise Exception, "Evidence format requested isn't supported."
    
    def process_evidence(self, evidence_set, evidence_constraints):
        '''os.devnull
        Using the parsed evidence, this places the evidence set
        and modifies the gene ontology graph in the SIFTER 2.0 way.
        '''
        # For each protein in the evidence set, store the annotation
        # into the evidence graph            
        go_terms = set([])
        for pid_json, annot_json in evidence_set.iteritems():
            p_ev_set = json.loads(annot_json['evidence_set'])
            for go_term, moc in p_ev_set:
                go_terms.add(go_term)
        
        annotated_term_nodes = {}
        for go_term in go_terms:
            g_node = self.evidence_ontology.get_node_by_name(go_term)
            if g_node is None:
                raise Exception, "GO term, %s doesn't seem to be named in your ontology."%go_term            
            annotated_term_nodes[go_term] = self.evidence_ontology.get_node_by_name(go_term)
        
        go_subdag = self._get_ontology_subdag(annotated_term_nodes=annotated_term_nodes)
        
        #self._visualize_ontology_subdag(go_subdag, "./sub_dag.pdf")
        
        processed_ev_set = {}
        # Now for each protein, add the graphical model evidence
        for pid_json, annot_json in evidence_set.iteritems():
            p_ev_set = json.loads(annot_json['evidence_set'])
        
            
            processed_ev_set[pid_json] = self._distribute_evidence_to_subdag_leaves(\
                                                sub_dag=go_subdag,
                                                evidence_constraints=evidence_constraints,
                                                protein_evidence_set=p_ev_set)
        return processed_ev_set
                                           
    def _get_ontology_subdag(self, annotated_term_nodes):
        """
        Given evidence_set, returns a filtered subgraph of self.evidence_ontology
        that only contains those nodes or their ancestors.
        """
        # For each annotated node, traverse to the root node of the ontology
        # to include all its less-specific terms
        all_term_nodes = set([])
        for go_term, annot_term in annotated_term_nodes.iteritems():
            #print "Tracing:", annot_term, "which is", go_term
            for generic_term in self._trace_to_ontology_root(self.evidence_ontology.g.vertex(annot_term)):
                #print "is_a", self.evidence_ontology.g.vertex_properties['go_id'][generic_term],\
                #      "i.e.", self.evidence_ontology.g.vertex_properties['go_name'][generic_term]
                all_term_nodes.add(generic_term)
        
        sub_dag = graph_tool.GraphView(self.evidence_ontology.g, vfilt=lambda v: v in all_term_nodes)
        
        return sub_dag
    
    def _trace_to_ontology_root(self, cur_node):
        """
        Generator to recursively visit all nodes on each path
        from a node up to the root node.
        """
        #print "Graph node:", cur_node
        yield cur_node
        for edge_in in cur_node.out_edges():                
            if self.evidence_ontology.g.edge_properties['edge_type'][edge_in] == 'is_a':
                for n in self._trace_to_ontology_root(edge_in.target()):
                    yield n
    
    def _get_top_node(self, sub_dag):
        """
        Gives the root node of the sub dag.
        """
        for c in sub_dag.vertices():
            if c.out_degree() == 0:
                return c
        return None
    
    def _get_leaves_from_node(self, sub_dag, top_node):
        descendant_leaves = set()
        
        #print "Top node is: %s"%str(top_node)
        #print "Successors: %s"%str(godag.successors(top_node))
        
        for c in top_node.in_neighbours():
            #print "Out degree is: %i"%godag.out_degree(c)
            if not(c.in_degree() == 0):
                descendant_leaves = descendant_leaves.union(self._get_leaves_from_node(sub_dag, c))
            else:
                descendant_leaves.add(c)
        return descendant_leaves
    
    def _visualize_ontology_subdag(self, sub_dag, output_file):
        """
        Draws sub-dag to file.
        """
        #http://projects.skewed.de/graph-tool/doc/search_module.html?highlight=leaf
        #gprops={'forcelabels':'true'},
        #vprops={'label':sub_dag.vertex_properties['go_id'],},
        #'xlabel':sub_dag.vertex_properties['go_name']},
        #vcolor='#00FF00'
        pos = graph_tool.draw.graphviz_draw(sub_dag,
                                      size=(30,30),
                                      ratio="fill",
                                      layout="dot",
                                      vprops={'label':sub_dag.vertex_properties['go_id'],},
                                              #'xlabel':sub_dag.vertex_properties['go_name']},
                                      output="/dev/null/tmp.pdf")
        return graph_tool.draw.graph_draw(sub_dag,
                                          pos=pos,
                                          vertex_text=sub_dag.vertex_properties['go_id'],
                                          vertex_font_size=8,
                                          nodesfirst=True,
                                          #vertex_shape="double_circle",
                                          vertex_fill_color="#729fcf",
                                          vertex_pen_width=3,
                                          output=output_file)
    
    def _distribute_evidence_to_subdag_leaves(self, sub_dag, protein_evidence_set,
                                              evidence_constraints):
        """
        Propagates the evidence in protein_evidence_set over sub_dag
        and returns a dictionary of {go_term: probability} by distributing
        the evidence in the SIFTER 2.0 way. 
        """
        def prob_or(p1, p2):
            return 1.0 - (1.0 - p1) * (1 - p2)
        
        def binomial(n, k):
            bc = [1 for i in range(0, k + 1)]
            for j in range(1, n - k + 1):
                for i in range(1, k + 1):
                    bc[i] = bc[i - 1] + bc[i]
            return bc[k]
        
        def probability_of_observing_k_nodes(r_value, k):
            if (k == 0):
                return 1.0 / r_value
            prob = 0
            for i in range(1, k + 1):
                prob = prob + binomial(k, i) * 1 / (r_value ** (i))
            return prob
        
        def calculate_R_value(total_num_leaves):
            r_value = 1.0 / (2 ** (1.0 / total_num_leaves) - 1)
            #error_logger.debug("r_value: %f" % r_value)
            return r_value
                
        #print protein_evidence_set
        
        # Candidate function set = leaves starting from the root.
        root_node = self._get_top_node(sub_dag)
        candidate_fcns = [sub_dag.vertex_properties['go_id'][k] \
                          for k in self._get_leaves_from_node(sub_dag, root_node)]
        
        # Set initial probabilities in DAG for evidence provided by this protein
        go_term_likelihoods = {sub_dag.vertex_properties['go_id'][k]: \
                               {'likelihood':0,
                                'dag_vertex_id':int(k)} for k in sub_dag.vertices()}
        for go_term, ev_method in protein_evidence_set:
            dag_node = self.evidence_ontology.get_node_by_name(go_term)
            go_term_likelihoods[go_term]['likelihood'] = \
                    prob_or(go_term_likelihoods[go_term]['likelihood'],
                            evidence_constraints[ev_method])
            #error_logger.debug("Used %i piece(s) of evidence (%s) to set initial belief to %f for %s" % (len(ev_methods), str(ev_methods), dag_node_descriptor.likelihood, go_term))
        
        # Now for any that are ancestral, propagate the probabilities down in a wonky way
        r_value = calculate_R_value(len(candidate_fcns))
        for go_term, ev_method in protein_evidence_set:
            dag_node = sub_dag.vertex(go_term_likelihoods[go_term]['dag_vertex_id'])
            
            # Skip if is leaf
            if dag_node.out_degree() == 0:
                continue
             
            descendant_leaf_set = self._get_leaves_from_node(sub_dag, dag_node)
            #error_logger.debug("     For: %s leaves descendant from this node: %s" % (go_num, descendant_leaf_set))
            
            # Propagate evidence to leaf nodes
            parent_prob = go_term_likelihoods[go_term]['likelihood']
            transmission_coeff = probability_of_observing_k_nodes(r_value, 0) \
                               / probability_of_observing_k_nodes(r_value, len(descendant_leaf_set))
            for leaf_node in descendant_leaf_set:
                old_likelihood = go_term_likelihoods[sub_dag.vertex_properties['go_id'][k]]['likelihood']
                new_likelihood = prob_or(old_likelihood, parent_prob * transmission_coeff)
                # Store update
                go_term_likelihoods[sub_dag.vertex_properties['go_id'][k]]['likelihood'] = new_likelihood
                #error_logger.debug("     Distributed prob to: %s. Child's likelihood went from: %s to %s" % (leaf_node.goid, old_likelihood, evidence_go_num_dict[leaf_node.goid].likelihood))
        
        # This step is performed in Java code, and has the effect of making all
        # likelihoods non-zero, though underlying reason for doing this is unknown.
        # error_logger.debug("Leaf Likelihoods before synchronizing: ")
        # for leaf_go_num in candidate_functions:
        #     error_logger.debug("Fcn: %s, Likelihood: %.16f" % (leaf_go_num, evidence_go_num_dict[leaf_go_num].likelihood))
        def synchronize_likelihoods(leaf_go_nums, r_value, evidence_nodes):
            prob = 0.0
            
            # Calculate probability of observing subset of power set having size of leaf set
            # Note, this isn't equivalent to: leaf_subset_prior =
            # probability_of_observing_k_nodes(len(leaf_go_nums)) 
            num_leaves = len(leaf_go_nums)
            leaf_subset_prior = 0
            for i in range(1, num_leaves + 1):
                leaf_subset_prior = leaf_subset_prior + binomial(num_leaves - 1, i) * 1 / (r_value ** (i))
            
            # Calculate likelihood of ANY leaf
            likelihood_of_any_leaf = 0.0
            for leaf_go_num in leaf_go_nums:
                likelihood_of_any_leaf = prob_or(likelihood_of_any_leaf, evidence_nodes[leaf_go_num]['likelihood'])
            
            # Not entirely sure what's going on here:
            # Translated from "synchronizeLikelihoods() in PFunGODAG.java.
            not_in_a_subset_prior = (1.0 - likelihood_of_any_leaf) * leaf_subset_prior
            for leaf_go_num in leaf_go_nums:
                current_likelihood = evidence_nodes[leaf_go_num]['likelihood']
                new_likelihood = prob_or(current_likelihood, not_in_a_subset_prior)
                evidence_nodes[leaf_go_num]['likelihood'] = new_likelihood
        synchronize_likelihoods(candidate_fcns, r_value, go_term_likelihoods)
        #error_logger.debug("Leaf Likelihoods after synchronizing: ")
        #for leaf_go_num in candidate_fcns:
        #    error_logger.debug("Fcn: %s, Likelihood: %.16f" % (leaf_go_num, evidence_go_num_dict[leaf_go_num].likelihood))
        
        # Again, this step is performed in Java code and makes all likelihoods
        # non-zero, though underlying reason for doing this is unknown.
        def a_priori_evidence(leaf_go_nums, r_value, evidence_nodes):
            total = 1.0
            count_of_unlikely_leaves = 0
            total_num_leaves = len(leaf_go_nums)
            
            for leaf_go_num in leaf_go_nums:
                leaf_likelihood = evidence_nodes[leaf_go_num]['likelihood']
                if (leaf_likelihood > 0):
                    total = total * leaf_likelihood
                else:
                    count_of_unlikely_leaves = count_of_unlikely_leaves + 1
            
            if (count_of_unlikely_leaves > 0):
                rest = (1.0 / (r_value ** total_num_leaves)) / total
                a = rest ** (1.0 / count_of_unlikely_leaves)
            
            for leaf_go_num in leaf_go_nums:
                leaf_likelihood = evidence_nodes[leaf_go_num]['likelihood']
                
                # For each zero likelihood, we want to fudge factor a bit.
                if (leaf_likelihood <= 0):
                    leaf_likelihood = a
                evidence_nodes[leaf_go_num]['likelihood'] = leaf_likelihood
        a_priori_evidence(candidate_fcns, r_value, go_term_likelihoods)
        #error_logger.debug("Leaf Likelihoods after a_priori_evidence: ")
        #for leaf_go_num in candidate_functions:
        #    error_logger.debug("Fcn: %s, Likelihood: %.16f" % (leaf_go_num, evidence_go_num_dict[leaf_go_num].likelihood))
        #error_logger.debug("------------- Done computing leaf likelihoods")
        
        return {k:go_term_likelihoods[k]['likelihood'] for k in candidate_fcns}
    
    def _load_go_ontology(self, go_file, go_format='oboxml'):
        """
        """
        if go_format == 'oboxml':
            obo_from_gzip = gzip.open(go_file, 'rb')
            
            # Ontology aspect can be one of:
            # [u'molecular_function', u'cellular_component', u'biological_process']
            graph = self.evidence_ontology.populate_from_go_obo_xml(\
                                obo_file_buffer=obo_from_gzip,
                                ontology_aspect='molecular_function')
            obo_from_gzip.close()
        elif go_format == 'biobayesgraph':
            self.evidence_ontology.import_from_graphml(go_file)