def trim_additional_merged_contigs(original_contig, merged_contig): open_merged = open(merged_contig) fasta_reader = FastaReader(open_merged) header, sequence = fasta_reader.next() sp_header = header.split('_') trim = int(sp_header[-2].strip('os')) cigar = sp_header[-1] all_cigar = re.findall('(\d+)([MXDI])', cigar) for count, type in all_cigar: if type == "M" or type == "X": trim += int(count) open_merged.close() if trim > 50: logging.info("trim %s of %s" % (trim, original_contig)) open_contig = open(original_contig) fasta_reader = FastaReader(open_contig) header, sequence, fasta_reader.next() header = header + "trim_%s" % trim sequence = sequence[trim:] open_contig.close() open_contig = open(original_contig, 'w') if len(sequence) > 100: open_contig.write(">%s\n%s\n" % (header, sequence)) open_contig.close()
def trim_additional_merged_contigs(original_contig, merged_contig): open_merged=open(merged_contig) fasta_reader=FastaReader(open_merged) header, sequence = fasta_reader.next() sp_header = header.split('_') trim=int(sp_header[-2].strip('os')) cigar=sp_header[-1] all_cigar = re.findall('(\d+)([MXDI])', cigar) for count, type in all_cigar: if type=="M" or type=="X": trim+=int(count) open_merged.close() if trim>50: logging.info("trim %s of %s"%(trim, original_contig)) open_contig=open(original_contig) fasta_reader=FastaReader(open_contig) header, sequence,fasta_reader.next() header=header+"trim_%s"%trim sequence=sequence[trim:] open_contig.close() open_contig=open(original_contig, 'w') if len(sequence)>100: open_contig.write(">%s\n%s\n"%(header,sequence)) open_contig.close()
def get_fasta_length(fasta_file): length = 0 with open(fasta_file) as open_fasta: reader = FastaReader(open_fasta) header, sequence = reader.next() length = len(sequence) return length
def get_fasta_length(fasta_file): length=0 with open(fasta_file) as open_fasta: reader = FastaReader(open_fasta) header,sequence = reader.next() length=len(sequence) return length
def force_merge_consensus(read1_consensus, read2_consensus, output_merge_file): open_output = open(output_merge_file, 'w') open_read1 = open(read1_consensus) open_read2 = open(read2_consensus) fasta_reader1 = FastaReader(open_read1) read1_name, read1_sequence = fasta_reader1.next() open_read1.close() name = "%s_forced_merged" % read1_name array = [read1_sequence] fasta_reader2 = FastaReader(open_read2) for read2_name, read2_sequence in fasta_reader2: name = "%s_%s" % (name, read2_name) array.append("N" * 100) array.append(read2_sequence) open_output.write(">%s\n%s\n" % (name, ''.join(array))) open_read2.close() open_output.close()
def force_merge_consensus(read1_consensus, read2_consensus, output_merge_file): open_output = open(output_merge_file,'w') open_read1 = open(read1_consensus) open_read2 = open(read2_consensus) fasta_reader1 = FastaReader(open_read1) read1_name, read1_sequence = fasta_reader1.next() open_read1.close() name="%s_forced_merged"%read1_name array=[read1_sequence] fasta_reader2 = FastaReader(open_read2) for read2_name, read2_sequence in fasta_reader2: name="%s_%s"%(name,read2_name) array.append("N"*100) array.append(read2_sequence) open_output.write(">%s\n%s\n"%(name, ''.join(array))) open_read2.close() open_output.close()
class GenomeLoader: """Genome Loader take a fasta file and try to find a given chromosome in it. You can specify so it also keep the all the loaded unused chromosome into memory until they are required (keep_in_memory=True). You can also specify so it keep the all the loaded chromosome until the object is destroyed (keep_until_done=True). You can provide a prefix that will be added to the chromsome names if not already there. """ def __init__(self, genome_file, keep_in_memory=True, keep_until_done=False, prefix=''): self.open_genome_file = open_input_file(genome_file) self.reader = FastaReader(self.open_genome_file) self.keep_in_memory = keep_in_memory self.keep_until_done = keep_until_done self.prefix = prefix self.all_chr = {} def load_all(self): self.get_chr('***********************************************') return self.all_chr.keys() def get_chr(self, chr): if not chr.startswith(self.prefix): chr = self.prefix + chr if self.keep_until_done: #return if loaded already fasta_seq = self.all_chr.get(chr) else: #remove if loaded already fasta_seq = self.all_chr.pop(chr, None) if fasta_seq: (header, seq) = fasta_seq logging.debug('return %s' % header) return fasta_seq curr_chr = '' seq = '' while not curr_chr == chr: fasta_seq = self.reader.next() if fasta_seq: (header, seq) = fasta_seq logging.debug('load %s' % header) curr_chr = header.split()[0] if not curr_chr.startswith(self.prefix): curr_chr = self.prefix + curr_chr if (self.keep_in_memory and not curr_chr == chr) or self.keep_until_done: logging.debug('keep %s' % header) self.all_chr[curr_chr] = fasta_seq else: break return fasta_seq def next(self): fasta_seq = None if len(self.all_chr) > 0: chr = self.all_chr.keys[0] if len(self.all_chr) > 0: chr = self.all_chr.keys()[0] if self.keep_until_done: #return if loaded already fasta_seq = self.all_chr.get(chr) else: #remove if loaded already fasta_seq = self.all_chr.pop(chr, None) if not fasta_seq: fasta_seq = self.reader.next() if fasta_seq: (header, seq) = fasta_seq curr_chr = header.split()[0] logging.debug('load %s' % header) if self.keep_until_done: logging.debug('keep %s' % header) self.all_chr[curr_chr] = fasta_seq if fasta_seq: logging.debug('return %s' % header) return fasta_seq return None def __iter__(self): return iter(self.next, None) def __del__(self): self.open_genome_file.close() self.all_chr = None def close(self): self.__del__()
class GenomeLoader: """Genome Loader take a fasta file and try to find a given chromosome in it. You can specify so it also keep the all the loaded unused chromosome into memory until they are required (keep_in_memory=True). You can also specify so it keep the all the loaded chromosome until the object is destroyed (keep_until_done=True). You can provide a prefix that will be added to the chromsome names if not already there. """ def __init__(self,genome_file, keep_in_memory=True, keep_until_done=False, prefix=''): self.open_genome_file=open_input_file(genome_file) self.reader=FastaReader(self.open_genome_file) self.keep_in_memory=keep_in_memory self.keep_until_done=keep_until_done self.prefix=prefix self.all_chr={} def load_all(self): self.get_chr('***********************************************') return self.all_chr.keys() def get_chr(self, chr): if not chr.startswith(self.prefix): chr=self.prefix+chr if self.keep_until_done: #return if loaded already fasta_seq=self.all_chr.get(chr) else: #remove if loaded already fasta_seq=self.all_chr.pop(chr,None) if fasta_seq: (header,seq)=fasta_seq logging.debug('return %s'%header) return fasta_seq curr_chr='' seq='' while not curr_chr==chr: fasta_seq=self.reader.next() if fasta_seq: (header,seq)=fasta_seq logging.debug('load %s'%header) curr_chr=header.split()[0] if not curr_chr.startswith(self.prefix): curr_chr=self.prefix+curr_chr if (self.keep_in_memory and not curr_chr==chr) or self.keep_until_done: logging.debug('keep %s'%header) self.all_chr[curr_chr]=fasta_seq else: break return fasta_seq def next(self): fasta_seq=None if len(self.all_chr)>0: chr=self.all_chr.keys[0] if len(self.all_chr)>0: chr=self.all_chr.keys()[0] if self.keep_until_done: #return if loaded already fasta_seq=self.all_chr.get(chr) else: #remove if loaded already fasta_seq=self.all_chr.pop(chr,None) if not fasta_seq: fasta_seq=self.reader.next() if fasta_seq: (header,seq)=fasta_seq curr_chr=header.split()[0] logging.debug('load %s'%header) if self.keep_until_done: logging.debug('keep %s'%header) self.all_chr[curr_chr]=fasta_seq if fasta_seq: logging.debug('return %s'%header) return fasta_seq return None def __iter__(self): return iter(self.next, None) def __del__(self): self.open_genome_file.close() self.all_chr=None def close(self): self.__del__()