class Genome_2bit: def __init__(self,genome_2bit_file,verbose=False): self.genome=TwoBitFile(open(genome_2bit_file,'rb')) self.chr_len=dict() self.verbose=verbose for chr_id in self.genome.keys(): self.chr_len[chr_id]=len(self.genome[chr_id]) if verbose: print 'Genome initializated' def extract_sequence(self,coordinate,mask_repetitive=False): if mask_repetitive: seq= ''.join([mask(c) for c in self.genome[coordinate.chr_id][coordinate.bpstart-1:coordinate.bpend]]).lower() else: seq= self.genome[coordinate.chr_id][coordinate.bpstart-1:coordinate.bpend].lower() if coordinate.strand=='-': return Sequence.reverse_complement(seq) else: return seq def estimate_background(self): counting={'a':.0,'c':.0,'g':.0,'t':.0} all=0.0 for chr_id in self.genome.keys(): if self.verbose: start_time = time.time() print 'Counting on:',chr_id for nt in counting.keys(): count_nt=self.genome[chr_id][:].lower().count(nt) counting[nt]+=count_nt all+=count_nt print 'elapsed:',time.time() - start_time if self.verbose: print counting for nt in counting.keys(): counting[nt]/=all return counting def write_meme_background(self,filename): counting=self.estimate_background() with open(filename,'w+') as outfile: for nt in counting.keys(): outfile.write('%s\t%2.4f\n' % (nt,counting[nt])) def write_chr_len(self,filename): with open(filename,'w+') as outfile: for chr_id in self.genome.keys(): outfile.write('%s\t%s\n' % (chr_id,self.chr_len[chr_id]) )
def __init__(self,genome_2bit_file,verbose=False): self.genome=TwoBitFile(open(genome_2bit_file,'rb')) self.chr_len=dict() self.verbose=verbose for chr_id in self.genome.keys(): self.chr_len[chr_id]=len(self.genome[chr_id]) if verbose: print 'Genome initializated'
class Genome_2bit: def __init__(self,genome_2bit_file,verbose=False): self.genome=TwoBitFile(open(genome_2bit_file,'rb')) self.chr_len=dict() self.verbose=verbose for chr_id in self.genome.keys(): self.chr_len[chr_id]=len(self.genome[chr_id]) if verbose: print 'Genome initializated' def estimate_background(self): counting={'a':.0,'c':.0,'g':.0,'t':.0} all=0.0 for chr_id in self.genome.keys(): if self.verbose: start_time = time.time() print 'Counting on:',chr_id for nt in counting.keys(): count_nt=self.genome[chr_id][:].lower().count(nt) counting[nt]+=count_nt all+=count_nt print 'elapsed:',time.time() - start_time if self.verbose: print counting for nt in counting.keys(): counting[nt]/=all return counting def write_meme_background(self,filename): counting=self.estimate_background() with open(filename,'w+') as outfile: for nt in counting.keys(): outfile.write('%s\t%2.4f\n' % (nt.upper(),counting[nt])) def write_chr_len(self,filename): with open(filename,'w+') as outfile: for chr_id in self.genome.keys(): outfile.write('%s\t%s\n' % (chr_id,self.chr_len[chr_id]) )
class Genome_2bit: def __init__(self, genome_2bit_file, verbose=False): self.genome = TwoBitFile(open(genome_2bit_file, 'rb')) self.chr_len = dict() self.verbose = verbose for chr_id in self.genome.keys(): self.chr_len[chr_id] = len(self.genome[chr_id]) if verbose: print 'Genome initializated' def estimate_background(self): counting = {'a': .0, 'c': .0, 'g': .0, 't': .0} all = 0.0 for chr_id in self.genome.keys(): if self.verbose: start_time = time.time() print 'Counting on:', chr_id for nt in counting.keys(): count_nt = self.genome[chr_id][:].lower().count(nt) counting[nt] += count_nt all += count_nt if self.verbose: print counting for nt in counting.keys(): counting[nt] /= all return counting def write_meme_background(self, filename): counting = self.estimate_background() with open(filename, 'w+') as outfile: for nt in counting.keys(): outfile.write('%s\t%2.4f\n' % (nt.upper(), counting[nt])) def write_chr_len(self, filename): with open(filename, 'w+') as outfile: for chr_id in self.genome.keys(): outfile.write('%s\t%s\n' % (chr_id, self.chr_len[chr_id]))
def _get_reference_data(twobit_file_name, chrom, low, high): # Read and return reference data. try: with open(twobit_file_name, 'rb') as f: twobit = TwoBitFile(f) if chrom in twobit: seq_data = twobit[chrom].get(int(low), int(high)) return GenomeRegion(chrom=chrom, start=low, end=high, sequence=seq_data) except OSError as e: raise e()
def reference(self, trans, dbkey, chrom, low, high): """ Return reference data for a build. """ self.check_and_reload() # If there is no dbkey owner, default to current user. dbkey_owner, dbkey = decode_dbkey(dbkey) if dbkey_owner: dbkey_user = trans.sa_session.query( trans.app.model.User).filter_by(username=dbkey_owner).first() else: dbkey_user = trans.user if not self.has_reference_data(dbkey, dbkey_user): return None # # Get twobit file with reference data. # twobit_file_name = None if dbkey in self.genomes: # Built-in twobit. twobit_file_name = self.genomes[dbkey].twobit_file else: user_keys = loads(dbkey_user.preferences['dbkeys']) dbkey_attributes = user_keys[dbkey] fasta_dataset = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation).get( dbkey_attributes['fasta']) msg = fasta_dataset.convert_dataset(trans, 'twobit') if msg: return msg else: twobit_dataset = fasta_dataset.get_converted_dataset( trans, 'twobit') twobit_file_name = twobit_dataset.file_name # Read and return reference data. try: twobit = TwoBitFile(open(twobit_file_name)) if chrom in twobit: seq_data = twobit[chrom].get(int(low), int(high)) return GenomeRegion(chrom=chrom, start=low, end=high, sequence=seq_data) except IOError: return None