def testExportFormats(self):
     """
     Tries to create a graph from a phylogeny and export it to
     GraphML and other formats.
     """
     phylo_graph = BioBayesGraph()
     phylo_file = os.path.dirname(os.path.realpath(__file__)) + '/example_data/Asp_protease_2.xml'
     tmp_file = StringIO()
     try:
         phylo_graph.populate_from_phyloxml(phylo_file)
         phylo_graph.export_as_graphml(tmp_file) 
     finally:
         tmp_file.close()
 def testPhylogenyFromPhyloXML(self):
     """
     Tries to load a PhyloXML tree from file.
     """
     phylo_file = os.path.dirname(os.path.realpath(__file__)) + '/example_data/Asp_protease_2.xml'
     phylo_graph = BioBayesGraph()
     graph = phylo_graph.populate_from_phyloxml(phylo_file)
     
     self.assertTrue(("name" in graph.vertex_properties),
                     msg="Clade names not imported correctly.")
     
     self.assertTrue(("branch_length" in graph.edge_properties),
                     msg="Branch lengths not imported correctly.")
     
     bl_sum = 0.0
     for e in graph.edges():
         bl_sum += float(graph.edge_properties['branch_length'][e])
     
     self.assertTrue(expr=abs(bl_sum - 168.58699) < 1e-6, 
                      msg="Branch lengths not imported correctly "\
                         +"(sum is wrong)")
     
     self.assertEqual(graph.num_vertices(), 608,
                      "Didn't get expected number of nodes from phylogeny.")
     self.assertEqual(graph.num_edges(), 607,
                      "Didn't get expected number of nodes from phylogeny.")
class Test_inference(unittest.TestCase):
    """
    Test class for creating graphical model scaffolds from phylogeny files
    """

    def setUp(self):
        """
        Loads a phylogeny. 
        """
        phylo_file = os.path.dirname(os.path.realpath(__file__)) + "/example_data/Asp_protease_2.xml"
        self.phylo_graph = BioBayesGraph()
        self.graph = self.phylo_graph.populate_from_phyloxml(phylo_file)

        # Incorporates the code for the ProbDist1 class into the graph
        class ProbDist1(object):
            def __init__(self, graph, node, node_to_name_map):
                # graph, node are respectively:
                #   http://projects.skewed.de/graph-tool/doc/graph_tool.html#graph_tool.Graph
                #   http://projects.skewed.de/graph-tool/doc/graph_tool.html#graph_tool.Vertex
                # node_to_name_map is a python dictionary in which
                # any named node's index (can get by int(node_of_interest))
                # will map to the phylogenetic name associated. (If exists)
                self.graph = graph
                self.node = node
                self.name_to_node_map = node_to_name_map

            def compute_virtual_likelihood(self, vals, auxiliary_info):
                # "vals" is vector of the particular values this node
                # is taking.
                #
                # "auxiliary_info" is the custom information provided
                # when the virtual evidence was specified.
                return 1

            def compute_pd(self, vals):
                # Returns the conditional probability for this node at vals.

                # Get parent node(s):
                parents = []
                for p_node in self.node.in_neighbours():
                    parents.append(int(p_node))

                # Note that you shape this depending on node location and
                # other properties in the graph.
                # Also, you can store computations into class-wide variables
                # (e.g. ClassName.var_to_store) to cache computations. You
                # could also declare the variable being stored to as global.
                return 1

        self.phylo_graph.add_prob_dist(prob_dist_class=ProbDist1)

        # Sets all nodes to have two, variables
        # first with 3 values, second with two values.
        for node in self.graph.vertices():
            node_index = int(node)
            # Each node has v1, v2
            self.phylo_graph.set_node_variable_count(node_index=node_index, num_vars=2)
            # v1 \in {0,1,2}, v2 \in {0,1}
            self.phylo_graph.set_node_variable_domains(node_index=node_index, var_domains=[(0, 1, 2), (0, 1)])
            # Use the same probability dist (defined in the class above)
            self.phylo_graph.set_node_probability_dist(node_index=node_index, prob_dist_class="ProbDist1")

    def testInference(self):
        """
        Runs a query using libdai. 
        """
        # Creates one "hard" observation, and one "virtual" observation
        self.phylo_graph.clear_all_evidence()

        self.phylo_graph.add_hard_evidence(
            node_index=self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"), observed_value=(0, 1)  # v1 = 0, v2 = 1
        )

        self.phylo_graph.add_virtual_evidence(
            node_index=self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"),
            observed_value=(2, 0),  # v1 = 2, v2 = 0
            auxiliary_info={"custom_info"},  # info provided to likelihood function
        )

        # phylo_graph.remove_evidence_at_node(node_index=phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"))
        self.phylo_graph.create_inference_representation()

        query_nodes = [
            self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"),  # Some other node
            self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"),  # Set as virtual observation above
            self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"),
        ]  # Set as hard observation above

        q_results = self.phylo_graph.inference_query(query_nodes=query_nodes)
        expected = {
            self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"): ((0, 0), 0.166666666667),
            self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"): ((0, 1), 1.0),
            self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"): ((0, 1), 0.166666666667),
        }

        for qn, marginals in q_results.iteritems():
            print "For node", self.phylo_graph.get_name_by_node(qn)
            for var_val, marg_val in marginals:
                print var_val, ":", marg_val
                if var_val == expected[qn][0]:
                    self.assertAlmostEqual(marg_val, expected[qn][1])

    def testLeaveOneOut(self):
        """
        Tests leave-one-out inference looping
        """
        # Creates one "hard" observation, and one "virtual" observation
        self.phylo_graph.clear_all_evidence()

        self.phylo_graph.add_hard_evidence(
            node_index=self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"), observed_value=(0, 1)  # v1 = 0, v2 = 1
        )

        self.phylo_graph.add_virtual_evidence(
            node_index=self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"),
            observed_value=(2, 0),  # v1 = 2, v2 = 0
            auxiliary_info={"custom_info"},  # info provided to likelihood function
        )

        q_results = self.phylo_graph.inference_query_leave_one_out()
        for qn, left_out_results in q_results.iteritems():
            print "For node", self.phylo_graph.get_name_by_node(qn)
            pprint(left_out_results)
        print "------\n"
        query_nodes = [
            self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"),  # Some other node
            self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"),  # Set as virtual observation above
            self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"),
        ]  # Set as hard observation above

        q_results = self.phylo_graph.inference_query(query_nodes=query_nodes)
        expected = {
            self.phylo_graph.get_node_by_name("C7X6P2_9PORP/206-299"): ((0, 0), 0.166666666667),
            self.phylo_graph.get_node_by_name("C8SHB6_9RHIZ/82-171"): ((0, 1), 1.0),
            self.phylo_graph.get_node_by_name("C7PIL1_CHIPD/40-136"): ((0, 1), 0.166666666667),
        }

        for qn, marginals in q_results.iteritems():
            print "For node", self.phylo_graph.get_name_by_node(qn)
            for var_val, marg_val in marginals:
                print var_val, ":", marg_val
                if var_val == expected[qn][0]:
                    self.assertAlmostEqual(marg_val, expected[qn][1])
        print "------\n"
        q_results = self.phylo_graph.inference_query_leave_one_out()
        for qn, left_out_results in q_results.iteritems():
            print "For node", self.phylo_graph.get_name_by_node(qn)
            pprint(left_out_results)
        print "------\n"

        assert 1 == 2
Exemplo n.º 4
0
class SIFTER(object):
    def __init__(self):
        """
        Constructor
        """
        self.phylo_graph = BioBayesGraph()
        self.evidence_processors = {}
        
    def load_phylogeny(self, phylo_file, phylo_format='phyloxml'):
        """
        """
        if phylo_format == 'phyloxml':
            self.phylo_graph.populate_from_phyloxml(phylo_file)
        elif phylo_format == 'newick':
            self.phylo_graph.populate_from_newick(phylo_file)
        else:
            raise Exception, "Phylo format requested isn't supported."
    
    
    def load_evidence_processor(self, evidence_type,
                        evidence_processor_class, processor_settings):
        '''
        Loads evidence processor to internal reference
        '''
        self.evidence_processors[evidence_type] = evidence_processor_class(processor_settings)
        
    def parse_evidence(self, evidence_type, evidence_file,
                            evidence_constraints, evidence_format):
        """
        """
        if not evidence_type in self.evidence_processors:
            raise Exception, "Evidence type requested doesn't have a handler."
        
        return self.evidence_processors[evidence_type].parse_evidence(\
                        evidence_file=evidence_file,
                        evidence_format=evidence_format,
                        evidence_constraints=evidence_constraints)
                        
    def setup_nodes(self, node_to_fcn_model_map):
        '''
        Node to fcn_model_map is a function mapping
        "vertex_id" to {
            'auxiliary_info':{'num_functions':num_fcns,
                              'max_num_simul':3},
            'prob_dist_class':prob_dist_class
        }
        E.g. 
        node_to_fcn_model_map = \
            lambda v: { \
            'auxiliary_info':{'num_functions':num_fcns,
                              'max_num_simul':3},
            'prob_dist_class':'FunctionModels.Sifter2.FunctionModel'
            }
        '''
        dist_fcn_classes = {}
        
        for n in self.phylo_graph.g.vertices():
            node_index = int(n)
            fcn_model_info = node_to_fcn_model_map(node_index)
            
            # Store auxiliary info by node
            self.phylo_graph.set_node_auxiliary_information(node_index=node_index,
                            auxiliary_info=fcn_model_info['auxiliary_info'])
            
            # Make an instance of the custom prob dist function
            dist_model = fcn_model_info['prob_dist_class']
            if dist_model.__name__ not in dist_fcn_classes:
                self.phylo_graph.add_prob_dist(prob_dist_class=dist_model)
                dist_fcn_classes[dist_model.__name__] = dist_model(None,None,None,None)
            dist_inst = dist_fcn_classes[dist_model.__name__]
            
            # Query the number of variables from the custom function
            self.phylo_graph.set_node_variable_count(node_index=node_index,
                                                     num_vars=1)
            # Query the domain of each variable from the custom function
            protein_states = [f for f in dist_inst.possible_protein_states(\
                    fcn_variants_cnt=fcn_model_info['auxiliary_info']['num_functions'],
                    max_fcn_cnt=fcn_model_info['auxiliary_info']['max_num_simul'])]
            
            self.phylo_graph.set_node_variable_domains(node_index=node_index,
                                                       var_domains=[protein_states])
            
            # Store the distribution function in the graph for the node.
            self.phylo_graph.set_node_probability_dist(node_index=node_index,
                            prob_dist_class=fcn_model_info['prob_dist_class'].__name__)
            
            
            
    def process_evidence(self, evidence_type, evidence_set, evidence_constraints):
        '''
        Incorporates evidence into the graph using the
        appropriate processor
        '''
        if not evidence_type in self.evidence_processors:
            raise Exception, "Evidence type requested doesn't have a handler."
        
        return self.evidence_processors[evidence_type].process_evidence(\
                        evidence_set=evidence_set,
                        evidence_constraints=evidence_constraints)
class Test_nodedistributionsetup(unittest.TestCase):
    """
    Test class for creating graphical model scaffolds from phylogeny files
    """

    def setUp(self):
        """
        Loads a phylogeny. 
        """
        phylo_file = os.path.dirname(os.path.realpath(__file__)) + "/example_data/Asp_protease_2.xml"
        self.phylo_graph = BioBayesGraph()
        self.graph = self.phylo_graph.populate_from_phyloxml(phylo_file)

    def testSetLeafNodeVariableInitialization(self):
        """
        Tries to set up graphical model leaf node variables properly. 
        """
        # Sets all nodes to have 3 variables each.
        # And defines the domain of each explicitly
        card_sum1 = 0.0
        for node_index in self.phylo_graph.iterleafnodes():
            # print graph.vertex_properties["name"][graph.vertex(node_index)]
            self.phylo_graph.set_node_variable_count(node_index=node_index, num_vars=3)
            # So var1 can take values in 0 or 1,
            # var2 can take values in {0,1,2,3}
            # and var3 can take values in {'a','b','c'}
            self.phylo_graph.set_node_variable_domains(
                node_index=node_index, var_domains=[(0, 1), (0, 1, 2, 3), ("a", "b", "c")]
            )
            for v_ind in range(self.phylo_graph.get_node_variable_count(node_index)):
                card_sum1 += len(self.phylo_graph.get_node_variable_domain(node_index, v_ind))
        self.assertEqual(card_sum1, 2745.0, "Leaf node variable cardinality sum check didn't pass")  # 305*(2+4+3)

    def testSetInternalNodeVariableInitialization(self):
        """
        Tries to set up graphical model internal node variables properly. 
        """
        card_sum2 = 0.0
        for node_index in self.phylo_graph.iterinternalnodes():
            # print graph.vertex_properties["name"][graph.vertex(node_index)]
            self.phylo_graph.set_node_variable_count(node_index=node_index, num_vars=3)
            # So var1 can take values in 0 or 1,
            # var2 can take values in {0,1,2,3}
            # and var3 can take values in {'a','b','c'}
            self.phylo_graph.set_node_variable_domains(node_index=node_index, var_domains=[(0, 1), (0, 1), (0, 1)])
            for v_ind in range(self.phylo_graph.get_node_variable_count(node_index)):
                card_sum2 += len(self.phylo_graph.get_node_variable_domain(node_index, v_ind))
        self.assertEqual(card_sum2, 1818.0, "Internal node variable cardinality sum check didn't pass")  # 303*(2+2+2)

    def testProbabilityDistInitialization(self):
        """
        Tries to set probability distributions for leaf nodes.
        """
        self.testSetLeafNodeVariableInitialization()

        card_sum1 = 0.0
        for node_index in self.phylo_graph.iterleafnodes():
            # print graph.vertex_properties["name"][graph.vertex(node_index)]
            self.phylo_graph.set_node_variable_count(node_index=node_index, num_vars=3)

            for v_ind in range(self.phylo_graph.get_node_variable_count(node_index)):
                card_sum1 += len(self.phylo_graph.get_node_variable_domain(node_index, v_ind))
        self.assertEqual(card_sum1, 2745.0, "Leaf node variable cardinality sum check didn't pass")  # 305*(2+4+3)