Пример #1
0
 def run( self, args ):
     self.reader = ConanCountsReader( args.cncnt_file_name )
     self.writer = ConanSnvMixWriter( args.cnsm_file_name )
     
     ModelRunner.run( self, args )
Пример #2
0
class ConanModelRunner( ModelRunner ):
    def __init__( self ):
        ModelRunner.__init__( self )
        
        self.data_class = JointData
        self.parameters = {}
        self.priors = {}
    
    def run( self, args ):
        self.reader = ConanCountsReader( args.cncnt_file_name )
        self.writer = ConanSnvMixWriter( args.cnsm_file_name )
        
        ModelRunner.run( self, args )
    
    def _classify( self, args ):
        cn_states = self.reader.get_cn_states()
        
        for cn_state in sorted( cn_states ):
            self._classify_cn_state( cn_state )
                
    def _classify_cn_state( self, cn_state ):        
        chr_list = self.reader.get_chr_list( cn_state )

        for chr_name in sorted( chr_list ):            
            self._classify_chromosome( cn_state, chr_name )    
    
    def _train( self, args ):               
        cn_states = self.reader.get_cn_states()
                
        for cn_state in sorted( cn_states ):             
            self._train_cn_state( cn_state, args )
        
        self._write_priors()
    
    def _train_cn_state( self, cn_state, args ):                   
        if args.subsample_size > 0:
            counts = self._subsample( cn_state, args.subsample_size )
        else:
            counts = self.reader.get_counts()

        nclass = {}
        nclass['normal'] = 3
        nclass['tumour'] = constants.cn_state_map[cn_state]
        
        priors = self._get_priors( nclass )
                        
        model = self.model_class( nclass )
        
        data = self.data_class( counts )
        
        self.parameters[cn_state] = model.train( 
                                                data, priors,
                                                args.max_iters,
                                                args.convergence_threshold
                                                )
        
        self.priors[cn_state] = priors
    
    def _classify_chromosome( self, cn_state, chr_name ):
        nclass = {}
        nclass['normal'] = 3
        nclass['tumour'] = constants.cn_state_map[cn_state]              
        
        model = self.model_class( nclass )
                
        counts = self.reader.get_counts( cn_state, chr_name )
       
        jcnt_rows = self.reader.get_rows( cn_state, chr_name )
        
        end = self.reader.get_chr_size( cn_state, chr_name )

        n = int( 1e5 )
        start = 0
        stop = min( n, end )
        

        while start < end:
            sub_counts = counts[start:stop]
            sub_rows = jcnt_rows[start:stop]
                              
            data = self.data_class( sub_counts )            
                
            resp = model.classify( data, self.parameters[cn_state] )
        
            self.writer.write_data( cn_state, chr_name, sub_rows, resp )
            
            start = stop
            stop = min( stop + n, end )
            
    def _subsample( self, cn_state, sample_size ):
        chr_list = self.reader.get_chr_list( cn_state )
        
        sample = []
        
        nrows = self.reader.get_data_set_size( cn_state )
        
        for chr_name in chr_list:
            chr_size = self.reader.get_chr_size( cn_state, chr_name )
            
            chr_sample_size = math.floor( float( chr_size ) / nrows * sample_size )
            
            chr_sample_size = int( chr_sample_size )
            
            chr_sample_size = min( chr_size, chr_sample_size )
            
            chr_sample_indices = random.sample( xrange( chr_size ), chr_sample_size )
            
            chr_counts = self.reader.get_counts( cn_state, chr_name )
            
            chr_sample = chr_counts[chr_sample_indices]
            
            sample.append( chr_sample )
            
        sample = np.vstack( sample )
        
        return sample