Пример #1
0
class Genome_2bit:

    def __init__(self,genome_2bit_file,verbose=False):
        self.genome=TwoBitFile(open(genome_2bit_file,'rb'))
        self.chr_len=dict()
        self.verbose=verbose

        for chr_id in self.genome.keys():
            self.chr_len[chr_id]=len(self.genome[chr_id])
        if verbose:
            print 'Genome initializated'
        
    def extract_sequence(self,coordinate,mask_repetitive=False):
        if mask_repetitive:
            seq= ''.join([mask(c) for c in self.genome[coordinate.chr_id][coordinate.bpstart-1:coordinate.bpend]]).lower()
        else:
            seq= self.genome[coordinate.chr_id][coordinate.bpstart-1:coordinate.bpend].lower()
        
        if coordinate.strand=='-':
            return Sequence.reverse_complement(seq)
        else:
            return seq
    
    def estimate_background(self):
        counting={'a':.0,'c':.0,'g':.0,'t':.0}
        all=0.0
        
        for chr_id in self.genome.keys():
            if self.verbose:
                start_time = time.time()
                print 'Counting on:',chr_id

            for nt in counting.keys():
                
                count_nt=self.genome[chr_id][:].lower().count(nt)
                counting[nt]+=count_nt
                all+=count_nt

            print 'elapsed:',time.time() - start_time
        
        if self.verbose:
            print counting

        for nt in counting.keys():
            counting[nt]/=all
        
        return counting


    def write_meme_background(self,filename):
        counting=self.estimate_background()
        with open(filename,'w+') as outfile:
            for nt in counting.keys():
                outfile.write('%s\t%2.4f\n' % (nt,counting[nt]))

    def write_chr_len(self,filename):
        with open(filename,'w+') as outfile:
            for chr_id in self.genome.keys():
                outfile.write('%s\t%s\n' % (chr_id,self.chr_len[chr_id]) )
Пример #2
0
    def __init__(self,genome_2bit_file,verbose=False):
        self.genome=TwoBitFile(open(genome_2bit_file,'rb'))
        self.chr_len=dict()
        self.verbose=verbose

        for chr_id in self.genome.keys():
            self.chr_len[chr_id]=len(self.genome[chr_id])
        if verbose:
            print 'Genome initializated'
Пример #3
0
class Genome_2bit:

    def __init__(self,genome_2bit_file,verbose=False):
        self.genome=TwoBitFile(open(genome_2bit_file,'rb'))
        self.chr_len=dict()
        self.verbose=verbose

        for chr_id in self.genome.keys():
            self.chr_len[chr_id]=len(self.genome[chr_id])
        if verbose:
            print 'Genome initializated'
        
   
    def estimate_background(self):
        counting={'a':.0,'c':.0,'g':.0,'t':.0}
        all=0.0
        
        for chr_id in self.genome.keys():
            if self.verbose:
                start_time = time.time()
                print 'Counting on:',chr_id

            for nt in counting.keys():
                
                count_nt=self.genome[chr_id][:].lower().count(nt)
                counting[nt]+=count_nt
                all+=count_nt

            print 'elapsed:',time.time() - start_time
        
        if self.verbose:
            print counting

        for nt in counting.keys():
            counting[nt]/=all
        
        return counting


    def write_meme_background(self,filename):
        counting=self.estimate_background()
        with open(filename,'w+') as outfile:
            for nt in counting.keys():
                outfile.write('%s\t%2.4f\n' % (nt.upper(),counting[nt]))

    def write_chr_len(self,filename):
        with open(filename,'w+') as outfile:
            for chr_id in self.genome.keys():
                outfile.write('%s\t%s\n' % (chr_id,self.chr_len[chr_id]) )
Пример #4
0
class Genome_2bit:
    def __init__(self, genome_2bit_file, verbose=False):
        self.genome = TwoBitFile(open(genome_2bit_file, 'rb'))
        self.chr_len = dict()
        self.verbose = verbose

        for chr_id in self.genome.keys():
            self.chr_len[chr_id] = len(self.genome[chr_id])
        if verbose:
            print 'Genome initializated'

    def estimate_background(self):
        counting = {'a': .0, 'c': .0, 'g': .0, 't': .0}
        all = 0.0

        for chr_id in self.genome.keys():
            if self.verbose:
                start_time = time.time()
                print 'Counting on:', chr_id

            for nt in counting.keys():

                count_nt = self.genome[chr_id][:].lower().count(nt)
                counting[nt] += count_nt
                all += count_nt

        if self.verbose:
            print counting

        for nt in counting.keys():
            counting[nt] /= all

        return counting

    def write_meme_background(self, filename):
        counting = self.estimate_background()
        with open(filename, 'w+') as outfile:
            for nt in counting.keys():
                outfile.write('%s\t%2.4f\n' % (nt.upper(), counting[nt]))

    def write_chr_len(self, filename):
        with open(filename, 'w+') as outfile:
            for chr_id in self.genome.keys():
                outfile.write('%s\t%s\n' % (chr_id, self.chr_len[chr_id]))
Пример #5
0
 def _get_reference_data(twobit_file_name, chrom, low, high):
     # Read and return reference data.
     try:
         with open(twobit_file_name, 'rb') as f:
             twobit = TwoBitFile(f)
             if chrom in twobit:
                 seq_data = twobit[chrom].get(int(low), int(high))
                 return GenomeRegion(chrom=chrom, start=low, end=high, sequence=seq_data)
     except OSError as e:
         raise e()
Пример #6
0
    def reference(self, trans, dbkey, chrom, low, high):
        """
        Return reference data for a build.
        """
        self.check_and_reload()
        # If there is no dbkey owner, default to current user.
        dbkey_owner, dbkey = decode_dbkey(dbkey)
        if dbkey_owner:
            dbkey_user = trans.sa_session.query(
                trans.app.model.User).filter_by(username=dbkey_owner).first()
        else:
            dbkey_user = trans.user

        if not self.has_reference_data(dbkey, dbkey_user):
            return None

        #
        # Get twobit file with reference data.
        #
        twobit_file_name = None
        if dbkey in self.genomes:
            # Built-in twobit.
            twobit_file_name = self.genomes[dbkey].twobit_file
        else:
            user_keys = loads(dbkey_user.preferences['dbkeys'])
            dbkey_attributes = user_keys[dbkey]
            fasta_dataset = trans.sa_session.query(
                trans.app.model.HistoryDatasetAssociation).get(
                    dbkey_attributes['fasta'])
            msg = fasta_dataset.convert_dataset(trans, 'twobit')
            if msg:
                return msg
            else:
                twobit_dataset = fasta_dataset.get_converted_dataset(
                    trans, 'twobit')
                twobit_file_name = twobit_dataset.file_name

        # Read and return reference data.
        try:
            twobit = TwoBitFile(open(twobit_file_name))
            if chrom in twobit:
                seq_data = twobit[chrom].get(int(low), int(high))
                return GenomeRegion(chrom=chrom,
                                    start=low,
                                    end=high,
                                    sequence=seq_data)
        except IOError:
            return None