示例#1
0
文件: pacfm_cl.py 项目: ecotox/pacfm
    def init_project(self):

        self.biodb_selector= Selector(self.db_name)
        self.input_builder= InputBuilder(biodb_selector= self.biodb_selector, fam_path= self.input_file_path, calculation_type= self.calculation_type)
示例#2
0
文件: pacfm_cl.py 项目: ecotox/pacfm
class PacfmCL(PacfmBase):
    """
        Commandline interface for PACFM.
    """
    def __init__(self, pw_length=False, pw_sequence=False, pw_minpath=False, pw_key_enzyme=-1, n_association=1000, input_file_path="", output_figure_path="pacfm.png", output_table_path="pacfm.tsv", output_type=0, info_types=[],  abbreviations_path= "abbreviations.txt", links_path= "links.txt", key_enzymes_path="key_enzymes.txt", calculation_type= "sum", db_name="kegg_orthology", colorbar_title= "Relative abundance"):
        PacfmBase.__init__(self, normalized= False, pw_length= pw_length, pw_sequence= pw_sequence, pw_minpath= pw_minpath, pw_key_enzyme= pw_key_enzyme, n_association= n_association, input_file_path= input_file_path, output_figure_path= output_figure_path, output_table_path= output_table_path, output_type= output_type, info_types= info_types,  abbreviations_path= abbreviations_path, links_path= links_path, key_enzymes_path=key_enzymes_path, calculation_type= calculation_type, db_name= db_name, colorbar_title= colorbar_title )

        


    def init_variables(self, **kwargs):
        """
            overriden method
        """
        ###
        self.pw_length= kwargs['pw_length'] 
        self.pw_sequence= kwargs['pw_sequence'] 
        self.pw_minpath= kwargs['pw_minpath']
        self.pw_key_enzyme= kwargs['pw_key_enzyme']
        ###
        self.n_association= kwargs['n_association']
        ###
        self.input_file_path= kwargs['input_file_path']
        self.output_figure_path= kwargs['output_figure_path']
        self.output_table_path= kwargs['output_table_path']
        ###
        self.output_type= kwargs['output_type']
        ###
        self.info_types= kwargs['info_types']
        self.abbreviations_path= kwargs['abbreviations_path']
        self.links_path= kwargs['links_path']
        self.key_enzymes_path= kwargs['key_enzymes_path']
        ###
        self.calculation_type= kwargs['calculation_type']
        self.db_name= kwargs['db_name'] 
        self.colorbar_title= kwargs['colorbar_title']


### COMMANDLINE SPECIFIC ###
    
    def init_project(self):

        self.biodb_selector= Selector(self.db_name)
        self.input_builder= InputBuilder(biodb_selector= self.biodb_selector, fam_path= self.input_file_path, calculation_type= self.calculation_type)
    
    
    def normalize_pathways(self):
        """
            Runs the pathway analysis options.
        """
        
        if self.input_builder:
            self.pw_analyzer= PathwayAnalyzer(self.biodb_selector, self.input_builder)
            #####
            if self.pw_length:
                print "normalizing by sequence length"
                self.pw_analyzer.normalize_by_pathway('sequence_length')
            
            if self.pw_sequence:
                print "normalizing by the number of proteins/enzymes in the pathway"
                self.pw_analyzer.normalize_by_pathway('n_protein')
            
            if self.pw_minpath:
                print "normalizing by the minpath algorithm"
                self.pw_analyzer.normalize_by_algorithm('minpath')
            
            if self.pw_key_enzyme >= 0:
                key_enzyme_type= self.pw_key_enzyme
                if key_enzyme_type == 0 or key_enzyme_type == 1:
                    self.pw_analyzer.filter_pathways_by_key_leaf_features(key_enzyme_type)
            
                ### key_enzyme_type 2 is disabled in the commandline version. 
                elif key_enzyme_type == 3:
                    assert self.n_association >= 0, "You have not set the criteria for the numnber of pathway associations."
                    print "Running the pathway association check. Please wait..."
                    self.pw_analyzer.filter_pathways_by_key_leaf_features(3, self.n_association)
            
            self.data_frame= self.input_builder.assembler.to_data_frame()
            
            ######################################### 
            print "OK"
            
        else:
            print "Select an input file first"

    
    def plot_circos(self):
        """
            Draws the final plot!
        """
        if self.output_type == 0 or self.output_type == 2: 
            h= HighlightContainer()
            h.load()
            
            l=LinkContainer()
            l.load()

            p=PlotContainer()
            p.load()

            for ide in self.input_builder.assembler.ideograms:
                p.get_by_index(ide.level-1).min_value= ide.get_min_value()
                p.get_by_index(ide.level-1).max_value= ide.get_max_value()
            
            self.input_builder.build_circos_inputs_and_run(plots= p, links= l, highlights= h)
            
            d= Drawer(l, p, self.colorbar_title) 
            self.result_image= d.get_output_image()


### info ###
    def save_abbreviations(self):
        if not self.normalized:
            ideograms= self.input_builder.assembler.ideograms
        else:
            ideograms= self.pw_analyzer.ideograms

        for ide in ideograms:
            if ide.level not in self.level_abbreviations:
                self.level_abbreviations[str(ide.level)]= ide.abbreviations
        
        with open(self.abbreviations_path, 'w') as outfile:
            for level in xrange(1,4):
                line1= "##### LEVEL %s #####\n" %level
                outfile.write(line1)
                for k, v in self.level_abbreviations[str(level)].iteritems():
                    line2= "%s: %s\n" %(k,v) 
                    outfile.write(line2)

    def save_key_enzyme_info(self):
        """
            Saves the key enzymes into a file."
        """
        if not self.normalized:
            ide= self.input_builder.assembler.ideograms[-1]
        else:
            ide= self.pw_analyzer.ideograms[-1]
        
        for chrom in ide.chromosomes:
            self.key_enzymes[chrom.name]= [f.name for f in chrom.get_non_null_features()]  
        
        
        with open(self.key_enzymes_path, 'w') as outfile:
            for k, v in self.key_enzymes.iteritems():
                line= "%s: %s\n" %(k, ', '.join(v)) 
                outfile.write(line)

    def save_link_info(self): 
        """
            Saves the pathway assoication info into a file.
        """
        if not self.normalized:
            ide= self.input_builder.assembler.ideograms[-1]
        else:
            ide= self.pw_analyzer.ideograms[-1]
        
        for id, link_coordinate in ide.link_coordinates.iteritems():
            feature= self.biodb_selector.getFeatureByID(id) 
            pws= [coor.get_name_by_level(ide.level) for coor in link_coordinate.coordinates]
            self.enzyme_pathway_link[feature.name] = pws
        
        with open(self.links_path, 'w') as outfile:
            for k, v in self.enzyme_pathway_link.iteritems():
                line= "%s: %s\n" %(k, ', '.join(v)) 
                outfile.write(line)
 
    def save_info(self):
        """
            Saves selected info types into files.
        """
        if "abbreviations" in self.info_types:
            self.save_abbreviations()
        if "links" in self.info_types:
            self.save_link_info()
        if "key_enzymes" in self.info_types:
            self.save_key_enzyme_info()
### info ###       

### output ###
    def save_data(self):
        self.data_frame.to_csv(self.output_table_path, sep= "\t", index_label="Index")
        
    def save_plot(self):
        self.result_image.save(self.output_figure_path)
        self.result_image.close()
   
    def save_output(self):
        if self.output_type == 0:
            self.save_plot()
        elif self.output_type == 1:
            self.save_data()
        elif self.output_type == 2: 
            self.save_plot()
            self.save_data()
示例#3
0
 def setUp(self):
     self.updater = Updater("kegg_orthology_metabolism", '../'+sql_db_path)
     self.updater.store_database()
     self.selector= Selector("kegg_orthology_metabolism", '../'+sql_db_path)
示例#4
0
class BioDBTests(unittest.TestCase):
   
    #def test_foo(self):
    #    self.failUnless(False)
    def setUp(self):
        self.updater = Updater("kegg_orthology_metabolism", '../'+sql_db_path)
        self.updater.store_database()
        self.selector= Selector("kegg_orthology_metabolism", '../'+sql_db_path)

    def test_biodb(self):
        """
        checks if the biodb table exists and returns how many features are inserted.
        """
        print "testing biodb table"
        table_list= [table[0] for table in self.updater.store.execute('select tbl_name from SQLITE_MASTER')]
        self.failIf(self.updater.biodb_table not in table_list)
        #assert self.biodb_table in table_list, "Table does not exist!"


    def test_hierarchies(self):
        """
        checks if hierarchies table exists and returns how many hierarchies are built.
        """
        print "testing hierarchy table"
        table_list= [table[0] for table in self.updater.store.execute('select tbl_name from SQLITE_MASTER')]
        self.failIf(self.updater.hier_table not in table_list)
        

    def test_names_acessions(self):
        """
        checks the uniqueness of names
        """
        print "testing uniqueness of names in each level."
        levelCount = self.selector.getLevelCount()
        for i in range(1, (levelCount+1)): 
            children= self.selector.getFeaturesByLevel(i)
            names= [feature.name for feature in children]
            name_counter= Counter(names)
            for k,v in name_counter.iteritems():
                if v != 1:
                    print k, v
            self.assertEqual(len(names), len(set(names)))

    def test_levels(self):
        """
        1. checks if all leaf nodes have the same level of hierarchies
        2. checks if each feature and its parent have max 1 level in between
        """
        print "testing number of hierarchy levels for each leaf node"
        print "testing if a parent is exactly one level higher than the child in the database hierarchy."
        leaf_features= self.selector.getFeaturesByLevel(self.selector.getLevelCount())
        for feature in leaf_features:    
            lineage= self.selector.getLineages2(feature)
            for v in lineage.values():
                for lin in v:
                    if len(lin) >= 1:
                        nLevels= self.selector.getLevelCount()
                        self.failIf( len(lin) > (nLevels - 1) )
                        diff= feature.level - lin[0].level
                        self.failIf(diff != 1)
                        
                        levels= [i.level for i in lin]
                        self.failIf(levels != range(1,nLevels)[::-1])


    def test_print_stats(self):
        print "Database stats:"
        self.selector.getLevelStats()
    
    def compare_new_with_current_version(self):
        ### currently this task is done by checking database log files from the database source provider
        print "You already have this database in fantom db. If you still want to update the existing database, insert a different database name (e.g kegg_orthology_2) and try again!"


    def no_test_insertions(self):
        ### eliminate the hierarchy groups which don't have appropriate number of levels
        ### 1. check if there is one level between the child and parent hierarchy objects
        for i in range(2, self.selector.getLevelCount()):
            features= self.getFeaturesByLevel(i)
            for feature in features:
                resultList= self.store.find(Hierarchy, BioDB.id == Hierarchy.parentID, Hierarchy.childID == feature.id)
                for result in resultList:
                    #print result.parent.level - feature.level
                   if (feature.level - result.parent.level) > 1:
                        print feature.name, result.parent.name
                        print feature.level, result.parent.level
                        print 
示例#5
0
from biodb.sqling.selector import Selector

s= Selector("ncbi")

s.getLineages2()