def writeClassifiedFastas(classType,Dirr,resultsDir, df): fasta_files_dict = Get_Dirr_All_Fasta (classType,Dirr) classDict = {} writerDict = {} for key, value in fasta_files_dict.items(): files = {key:value} for filename, classname in files.items(): with open(filename) as fasta: for record in FastaIterator(fasta): #SeqIO.SimpleFastaParser(fasta): title = record[0] seq_id = title.split(None, 1)[0] if (record.id in df.index): classname = df[record.id] if (classname not in writerDict): classname = "".join([c for c in classname if c.isalpha() or c.isdigit() or c==' ']).rstrip() file = resultsDir + '\\' + classname + '.fasta' classHandle = open(file, "w") classDict[classname] = classHandle myWriter = FastaWriter(classDict[classname]) myWriter.write_header() writerDict[classname] = myWriter writerDict[classname].write_record(record) for classname, classHandle in classDict.items(): writerDict[classname].write_footer() classDict[classname].close()
def create_proteins_for_each_peptide(input_path, fasta_input, output_path, final_peptides, allow_change_in_cleavage_sites=False): """ for each sequence create the native protein and create a version of thath protein for each peptide """ final_edited_peptides = final_peptides[final_peptides['edited']] #create a seq-id:sequence dictionary from input fasta file sequences_dict = {} for record in SeqIO.parse(open(input_path + fasta_input, "r"), "fasta"): sequences_dict.update({record.id: record.seq}) writer = FastaWriter(open( output_path + 'proteins_per_peptide_from_' + fasta_input, 'w'), wrap=None) writer.write_header() for key, mrna_sequence, in sequences_dict.items(): #first print the native protein comb_id = key + '|original' protein = mrna_sequence.translate() writer.write_record(SeqRecord(protein, id=comb_id, description='')) edited_peptides = final_edited_peptides[final_edited_peptides['seq_id'] == key] n = 1 for index, row in edited_peptides.iterrows(): #flag editing combination for print\dont print in proteins file edit_prot = True if not allow_change_in_cleavage_sites and edit_prot: if final_peps_df.loc[ index, 'N_terminus'] != 'no_change' or final_peps_df.loc[ index, 'C_terminus'] != 'no_change' or final_peps_df.loc[ index, 'cancelled_cs_in_pep']: edit_prot = False if edit_prot: permutation_coor = tuple( int(x) for x in row['permutation_coor_base0'].split('_') if x != '') protein = mrna_sequence[:permutation_coor[0]].translate( ) + row['biological_extended_peptide'] + mrna_sequence[ permutation_coor[1] + 1:] comb_id = key + '|edited_' + str(n) + '\t' + str( row['editing_combinations_relative_to_coding_seq_base0']) writer.write_record( SeqRecord(protein, id=comb_id, description='')) n += 1 writer.write_footer()
def __init__( self, input, output=None, diags=None, maxhours=None, maxmb=None, clwstrict=False, quiet=True, clw=False ): self._itemp = None self._otemp = None self.maxhours = maxhours self.clwstrict = clwstrict self.clw = clw self.quiet = quiet self.maxmb = maxmb if isinstance(input, str): assert os.path.exists(input), "Can't find file %s" % input self.input = input elif isinstance(input, list): from tempfile import NamedTemporaryFile from Bio.SeqIO.FastaIO import FastaWriter self._itemp = NamedTemporaryFile() self.input = self._itemp.name writer = FastaWriter(self._itemp, wrap=0) writer.write_records(input) self._itemp.flush() else: raise Exception("Unknown input type", input) if isinstance(output, str): self.output = output elif output == None: self._otemp = NamedTemporaryFile() self.output = self._otemp.name
def create_fully_edited_proteins_fasta(input_path, fasta_input, output_path): """ for each sequence create a native protein version and a fully edited version """ mm_headers = {} [ mm_headers.update({mm: re.compile(r'(?<=' + mm + '_base0:\s).*?]')}) for mm in all_mm ] writer = FastaWriter(open( output_path + 'fully_edited_and_native_proteins_from_' + fasta_input, 'w'), wrap=None) writer.write_header() for record in SeqIO.parse(open(input_path + fasta_input, "r"), "fasta"): sites_dict = {} [ sites_dict.update({ mm: sorted( eval( find_by_regex_in_header(record.description, mm_headers[mm]))) }) for mm in all_mm ] sites_number = sum([len(sites_dict[mm]) for mm in all_mm]) length = len(record.seq) comb = tuple([sites_dict[mm] for mm in all_mm]) protein_basic_description = '' #translate native protein seq_id = record.id + '_original' protein = record.seq.translate() writer.write_record( SeqRecord(protein, id=seq_id, description=protein_basic_description)) if sites_number: seq_id = record.id + '_fully_edited' protein_description = protein_basic_description + '| editing_combinations_base0_wrt_to_coding_sequence: ' + str( comb) edited_seq = Seq( edit_rna_as_peptide(str(record.seq), (0, length - 1), comb), generic_dna) protein = edited_seq.translate() writer.write_record( SeqRecord(protein, id=seq_id, description=protein_description)) if len(edited_seq) % 3: print(record.id) print(len(record.seq)) print(len(edited_seq)) writer.write_footer()
def write(f): i = 0 for seqrecord in sequences: if seqrecord.id == "<unknown id>": seqrecord.id = str(i) i+=1 writer = FastaWriter(f) writer.write_file(sequences) f.flush() #IMPORTANT
def write_fasta(sequence, file_handle, wrap=60): """ :param sequence: sequence to write in the file :type sequence: :class:`Bio.SeqRecord.SeqRecord` object :param file_handle: output file handler :type file_handle: """ _LOGGER.info("Writing output to " + file_handle.name + "...") writer = FastaWriter(file_handle, wrap=wrap) writer.write_file(sequence)
def handle_noargs(self, **options): outfilename = options['outfile'] outfileh = open(outfilename, 'w') print "Fetching records." records = Protein.objects.all() seqs = self._records_to_seqs(records) print "Writing records to %s" % outfilename writer = FastaWriter(outfileh, record2title=lambda x: x.id) writer.write_file(seqs) outfileh.close() print "Done."
def _write(self, file, value): """ Write output to fasta file :param folder: file and location of outputfile :param value: :return: """ handle = open(file, "w") writer = FastaWriter(handle, wrap=None) writer.write_file(value) handle.close()
def split_files(fasta_file): """This next section removes line wraps, so I can split the file without interrupting a gene""" from Bio.SeqIO.FastaIO import FastaWriter output_handle = open("nowrap.fasta", "w") seqrecords=[ ] writer = FastaWriter(output_handle, wrap=0) for record in SeqIO.parse(open(fasta_file), "fasta"): seqrecords.append(record) writer.write_file(seqrecords) output_handle.close() """I can always make the number of lines an alterable field""" subprocess.check_call("split -l 200000 nowrap.fasta", shell=True)
def cut_fasta_by_len(fa_file, len_cutoff, outdir, prefix, suffix): # https://stackoverflow.com/questions/273192/how-can-i-create-a-directory-if-it-does-not-exist # Defeats race condition when another thread created the path #if not os.path.exists(outdir): # os.mkdir(outdir) try: os.makedirs(outdir) except OSError as e: if e.errno != errno.EEXIST: raise cut_fa_file = os.path.join(outdir, prefix + ".ge" + str(len_cutoff) + suffix) if os.path.exists(cut_fa_file) and (os.path.getsize(cut_fa_file) > 0): return cut_fa_file if fa_file.endswith(".gz"): in_h = gzip.open(fa_file, 'rt') else: in_h = open(fa_file, 'r') with open(cut_fa_file, 'w') as out_h: #for rec in SeqIO.parse(in_h, 'fasta'): # if len(rec.seq) >= len_cutoff: # SeqIO.write(rec, out_h, 'fasta') # yes, the SeqIO.parse() API is more simple to use, easy to understand # but, try different method, you will find something writer = FastaWriter(out_h) writer.write_header() for rec in FastaIterator(in_h): if len(rec) >= len_cutoff: writer.write_record(rec) writer.write_footer() in_h.close() return cut_fa_file
def create_peptides_fasta(input_path, fasta_input, peps_df, extention=15): writer = FastaWriter(open( input_path + 'peptides_extanded_by' + str(extention) + '_from' + fasta_input, 'w'), wrap=None) writer.write_header() for record in SeqIO.parse(open(input_path + fasta_input, "r"), "fasta"): prot = record.seq.translate() for i, row in peps_df[peps_df['seq_id'] == record.id].iterrows(): rna_pep_coor = row['in_frame_coordinates_base0'].split('_') pep_start = int(rna_pep_coor[1]) / 3 pep_end = int(rna_pep_coor[2]) / 3 seq_start = max(0, pep_start - extention) seq_end = min(pep_end + extention, len(prot)) extented_pep = prot[seq_start:pep_start] + row[ 'biological_peptide'] + prot[min(pep_end + 1, len(prot)):seq_end] if not row['edited']: seq_id = record.id + '_original_' + str( seq_start * 3) + '_' + str( seq_end * 3) + '_pep_id_' + str(i) else: seq_id = record.id + '_' + str(seq_start * 3) + '_' + str( seq_end * 3) + '_editing_range' + row[ 'permutation_coor_base0'] + '_pep_id_' + str(i) writer.write_record( SeqRecord(extented_pep, id=seq_id, description='')) writer.write_footer()
def write_by_og(self, output_folder): ''' Write for each og all the mapped sequences into separate fasta files to a specified folder :param output_folder: folder where files should be stored ''' if not os.path.exists(output_folder): os.makedirs(output_folder) for key, value in tqdm(self.og_records.items(), desc="Writing DNA seq sorted by OG", unit=" OG"): handle = open(os.path.join(output_folder, 'mapped_' + key + '.fa'), "w") writer = FastaWriter(handle, wrap=None) writer.write_file(value) handle.close()
def trim(barcode, length, in_handle, out_handle): """ Trim input sequences, write to out_handle Trims barcodes from FASTA-formatted sequences in in_handle, truncates sequences at provided length, writes to out_handle. """ def inner(sequences, pattern): "Does the trimming" for sequence in sequences: s = str(sequence.seq) m = pattern.match(s) if m: yield sequence[m.end():length] else: print >> sys.stderr, "No match:", sequence.id yield sequence[:length] # Records are read TCAG, we trim the first 4 *flows*, which won't # detect duplicate G's. barcode = barcode.lstrip('G') pattern = re.compile('^{0}'.format(barcode)) sequences = SeqIO.parse(in_handle, 'fasta') trimmed = inner(sequences, pattern) FastaWriter(out_handle, wrap=None).write_file(trimmed)
def write_select_og_dna(self): ''' Write for each species all the DNA sequences into separate fasta files :param output_folder: folder where files should be stored ''' output_folder = os.path.join(self.args.output_path, "reference_ogs_dna") if not os.path.exists(output_folder): os.makedirs(output_folder) for key, value in tqdm(self.ogs.items(), desc="Writing OGs sorted by species", unit=" species"): handle = open(os.path.join(output_folder, key + '.fa'), "w") writer = FastaWriter(handle, wrap=None) writer.write_file(value.dna) handle.close() elif len(self.ogs_dna_by_species) == len(glob.glob(os.path.join(output_folder, '*.fa'))): print('Folder with files already exists and will not be overwritten.')
def writeFasta(fb,seqList): if len(seqList) <= 0: raise ValueError("No data to Persist.") writer = FastaWriter(fb) writer.write_header() for record in seqList: writer.write_record(record) writer.write_footer()
def convert_ill_fasta(self, event): filters = 'Text files (*.txt)|*.txt' dialog = wx.FileDialog(None, style=wx.OPEN, wildcard=filters) if dialog.ShowModal() == wx.ID_OK: self.illumina = dialog.GetPath() self.filename = self.illumina.split('/')[-1] self.filename = self.filename + '.fasta' records = SeqIO.parse(open(self.illumina), "fastq-illumina") handle = open(self.filename, "w") count = FastaWriter(handle, wrap=80).write_file(records) handle.close() print "Converted %i records" % count
def reheader_fasta(fa_in, fa_out, header_function, in_gz, gz): if in_gz: in_h = gzip.open(fa_in, 'rt') else: in_h = open(fa_in, 'r') if gz: out_h = bgzf.BgzfWriter(fa_out, 'wb') else: out_h = open(fa_out, 'w') writer = FastaWriter(out_h) writer.write_header() for rec in FastaIterator(in_h, title2ids=header_function): writer.write_record(rec) writer.write_footer() out_h.close() in_h.close()
def __init__(self, input, output=None, identity=0.8, length=0.8): self._itemp=None self._otemp=None self.identity=identity self.length=length if isinstance(input, str): assert(os.path.exists(input)) self.input = input elif isinstance(input, list): from tempfile import NamedTemporaryFile from Bio.SeqIO.FastaIO import FastaWriter self._itemp = NamedTemporaryFile() self.input = self._itemp.name writer = FastaWriter(self._itemp,wrap=0) writer.write_records(input) self._itemp.flush() else: raise Exception("Unknown input type",input) if isinstance(output, str): self.output = output elif output==None: self._otemp = NamedTemporaryFile() self.output = self
def split(fasta_file, parts): cmd = "grep -c '>' %s"%fasta_file out,err = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).communicate() count = int(out.strip()) part_size = int(math.ceil(float(count)/float(parts))) runtime().debug("Part size",part_size) writer = None handle = None with open(fasta_file) as fasta: for i,record in enumerate(SeqIO.parse(fasta,"fasta")): if i%part_size==0: part = i/part_size newfile=fasta_file+".%i"%part runtime().debug(i,i%part_size,newfile) if handle: handle.close() handle = open(newfile,"w") #print handle if writer: writer.write_footer() writer = FastaWriter(handle) writer.write_header() #print record writer.write_record(record)
def main(args): for fasta in SeqIO.parse(args.fasta, "fasta"): out = open(fasta.id+".fasta", "w") fasta_out = FastaWriter(out, wrap=70) fasta_out.write_header() fasta_out.write_record(fasta) out.close()
def trierFastaByDomain(tgtDomain,fastaDict,step2List,writeFileName,formatFunc): fb = open(writeFileName,'w') writer = FastaWriter(fb) writer.write_header() for record in step2List: score,gName,domain,gID,ARC,RF,reverse,begin,end,desc = formatFunc(record) if domain == tgtDomain: if fastaDict.get(gID) <> None: writer.write_record(fastaDict.get(gID)) ''' else: print "[%s] n'existe pas dans le fiche"%(gID) ''' writer.write_footer() fb.close()
def make_qiime_output(self): # Prepare fasta writer # handle = open(self.qiime_fasta.path, 'w') writer = FastaWriter(handle, wrap=0) writer.write_header() # Counter # counter = defaultdict(int) # Do it # for r in self.only_used.parse_barcodes(): sample_name = r.first.sample.short_name counter[sample_name] += 1 r.read.id = '%s_%i %s' % (sample_name, counter[sample_name], r.read.id) bar_seq = r.read.seq[0:self.pool.bar_len] r.read.description = "orig_bc=%s new_bc=%s bc_diffs=0" % (bar_seq, bar_seq) writer.write_record(r.read[self.trim_fwd:-self.trim_rev]) # Close # writer.write_footer() handle.close()
def cleanUpFasta(fname,fastaDict,step2List,Step2FormatSepFunc,seuil=1e-3): with open(fname,'w') as fb: writer = FastaWriter(fb) writer.write_header() for line in step2List: try: score,gName,domain,gId,ARC,RF,reverse,begin,end,desc = Step2FormatSepFunc(line) if score > seuil: # print "[%s] score [%f] > seuil [%f].\n"%(gName,score,seuil) continue code = fastaDict[gName].seq.tostring() if reverse: code = code[::-1] record = SeqRecord(Seq(code[begin:end],generic_dna),name=gName,id=gId,description=desc) writer.write_record(record) except KeyError: print "[%s] not exists in fasta dictionary.\n"%gName continue writer.write_footer()
def write_dna(self, species, output_folder): handle = open(os.path.join(output_folder, species + '_OGs.fa'), "w") writer = FastaWriter(handle, wrap=None) writer.write_file(self.dna) handle.close()
def select_from_small_file(args): inp_file, db_inp_file, db_out_file, out_file, num = args inp = list(SeqIO.parse(open(inp_file), 'fasta')) shuffle(inp) writer = FastaWriter(open(out_file, 'w'), wrap=0) writer.write_file(inp[:num])
# Python Script to Trim Based on designated start and end ############################################################################### # Written by Mario Muscarella # Last Update 10 May 2013 # Directions: from Bio import SeqIO import sys import glob from Bio.SeqIO.FastaIO import FastaWriter # change these numbers start = 1130 end = 42988 def trim_positions(records, start, end): for record in records: yield record[start:end] #files = glob.glob("*.align") file = "HMWF.align" original_seqs = SeqIO.parse(file, "fasta") trimmed_seqs = trim_positions(original_seqs, start, end) output_handle = open(file+".trim.fasta", "w") count = FastaWriter(output_handle, wrap=0).write_file(trimmed_seqs) output_handle.close() print "Trimmed %i reads" % count
def create_edited_proteins_all_represented_combinations( input_path, fasta_input, output_path, final_peps_df, max_edits_per_pep=None, allow_change_in_cleavage_sites=False): """ for each sequence create the native protein and create a version of that protein for each editing combination represented by that each edited peptide """ #create a seq-id:sequence dictionary from input fasta file sequences_dict = {} for record in SeqIO.parse(open(input_path + fasta_input, "r"), "fasta"): sequences_dict.update({record.id: record.seq}) writer = FastaWriter(open( output_path + 'proteins_per_combination_from_' + fasta_input, 'w'), wrap=None) writer.write_header() #creating a dataframe of all editing cominations per protein # comps_editing_combs = final_peps_df.groupby('seq_id').agg({'editing_combinations_relative_to_sense_orf_base0':lambda x: sorted([comb for sublist in list(x) for comb in sublist])}) comps_editing_combs = final_peps_df.groupby('seq_id')[ 'editing_combinations_relative_to_coding_seq_base0'].aggregate( lambda x: list(x)) #for each seq_id, iterate over all editing combinations and creat edited peptides final_peps_df = final_peps_df.drop_duplicates( subset='seq_id', keep='first' ) #removing duplicates as only data in seq_id level is now needed final_peps_df.set_index('seq_id', inplace=True) for index, combs_nested_list in comps_editing_combs.iteritems(): written_combs = [] n = 1 protein_basic_description = '' length = len(sequences_dict[index]) flattened_comb_list = [c for l in combs_nested_list for c in l] for comb in flattened_comb_list: #flag editing combination for print\dont print in proteins file edit_prot = True if max_edits_per_pep != None: if len([site for edit_type in comb for site in edit_type]) > max_edits_per_pep: edit_prot = False if not allow_change_in_cleavage_sites and edit_prot: if final_peps_df.loc[ index, 'N_terminus'] != 'no_change' or final_peps_df.loc[ index, 'C_terminus'] != 'no_change' or final_peps_df.loc[ index, 'cancelled_cs_in_pep']: edit_prot = False #editing proteins and writing to file if combination not already writen and combination do not exceed editing events if comb not in written_combs and edit_prot: if comb == ([], [], [], [], [], [], [], [], [], [], [], []): #the original sequence comb_id = index + '_original' protein = sequences_dict[index].translate() protein_description = protein_basic_description else: comb_id = index + '_edited_' + str(n) protein_description = protein_basic_description + '| editing_combinations_base0_wrt_to_coding_sequence: ' + str( comb) protein = Seq( edit_rna_as_peptide(str(sequences_dict[index]), (0, length - 1), comb), generic_dna).translate() n += 1 written_combs.append(comb) writer.write_record( SeqRecord(protein, id=comb_id, description=protein_description)) writer.write_footer()
def create_in_frame_rna_file_from_anovar_results_and_coding_mrna_seqs_final_sites_dfs(fasta_file,output_name,out_path,mm_df_dict,stop_as_bad_records,met_as_good_records,last_is_stop,variants_to_use = []): """ input - coding sequences as fasta file sites (wrt to coding sequence) dataframe - result of read_editing_sites_wrt_coding_seqs after ucsc_id column is set to index different dataframes for different mm types output - fasta file in the format of proteomics simulator some of the values in the header will be useless because the input includes that coding sequences so this function does not trim the sequences. """ n_bad = 0 n_good = 0 sites_good = 0 sites_bad = 0 writer = FastaWriter(open(out_path + output_name + '.fasta' , 'w'), wrap=None) writer_bad = FastaWriter(open(out_path + 'bad_seqs_' + output_name + '.fasta' , 'w'), wrap=None) writer.write_header() writer_bad.write_header() for record in SeqIO.parse(open(fasta_file, "r"), "fasta"): mm_loc_dict = {} split_header = record.id.split(';') rec_id = split_header[0] + ';' + split_header[1] use_variant = True if len(variants_to_use): #if a not-empty list is passed for variants_to_use, flag variants that are not in list so they will not be included in uotput if rec_id not in variants_to_use: use_variant = False if use_variant: for mm in all_mm: if mm_df_dict[mm] is None: mm_list = [] else: sites = mm_df_dict[mm] try: mm_list = [int(k)-1 for k in sites.loc[[rec_id]]['position_base1']] except KeyError: mm_list = [] mm_loc_dict.update({mm:mm_list}) # prot_start_nuc = 1 # prot_end_nuc = len(final_sequence) # if last_is_stop: # prot_end_nuc = prot_end_nuc-3 # prot_start = 'first_met_in_original_orf' # prot_end = 'original_sense_strand_orf_end' # strand = '+' # orf_start = 1 # orf_end = len(record.seq) - 3 mm_str = '' for mm in mm_loc_dict: mm_str+= '| '+mm+'_base0: '+ str(mm_loc_dict[mm]) # description_str = mm_str + ' | prot_start: ' + str(prot_start) + ' | prot_end: ' + str(prot_end) + ' | strand: ' + strand + ' | prot_start_nuc: ' + str(prot_start_nuc) + ' | prot_end_nuc: ' + str(prot_end_nuc) + ' | original_orf_start: ' + str(orf_start) + ' | original_orf_end: ' + str(orf_end) description_str = mm_str if last_is_stop: final_sequence = str(record.seq[0:-3]).replace('a','A').replace('g','G').replace('t','T').replace('c','C') else: final_sequence = str(record.seq).replace('a','A').replace('g','G').replace('t','T').replace('c','C') good_record = True if stop_as_bad_records: if '*' in Seq(str(final_sequence), generic_dna).translate(): good_record = False if met_as_good_records: if Seq(str(record.seq[0:3]), generic_dna).translate() != 'M': good_record = False if last_is_stop: if Seq(str(record.seq[-3:len(record.seq)]), generic_dna).translate() != '*': good_record = False if not good_record: writer_bad.write_record(record) n_bad+=1 sites_bad+=sum([len(mm_loc_dict[mm]) for mm in all_mm]) else: if len(final_sequence)%3: final_sequence=final_sequence[0:-len(final_sequence)%3] current_record = SeqRecord(Seq(final_sequence,generic_dna), id = rec_id, description = description_str) writer.write_record(current_record) n_good+=1 sites_good+=sum([len(mm_loc_dict[mm]) for mm in all_mm]) writer.write_footer() if n_bad: writer_bad.write_footer() print(str(n_good) + ' good sequence with ' + str(sites_good) + 'sites') print(str(n_bad) + ' bad sequence with ' + str(sites_bad) + 'sites')
""" Remove unpaired reads from a fasta file. This script can be used for the case that unpaired reads (e.g. as reads were removed during quality trimming) in a pair of fasta files from paired-end sequencing need to be removed. """ import argparse from Bio import SeqIO from Bio.SeqIO.FastaIO import FastaWriter parser = argparse.ArgumentParser() parser.add_argument("fasta_file_to_filter") parser.add_argument("reference_fasta_file") parser.add_argument("--output_fasta", default="output.fa") args = parser.parse_args() # Read reference file header reference_headers = {} for seq_record in SeqIO.parse(args.reference_fasta_file, "fasta"): reference_headers[seq_record.id.split()[0]] = 1 # Read fasta file to filter and write output with open(args.output_fasta, 'w') as output_fh: writer = FastaWriter(output_fh, wrap=0) writer.write_file( filter(lambda seq_record: seq_record.id.split()[0] in reference_headers, SeqIO.parse(args.fasta_file_to_filter, "fasta")))
original_file=sys.argv[1] otu_table=sys.argv[2] project_file=sys.argv[3] import itertools from Bio import SeqIO from Bio.SeqIO.FastaIO import FastaWriter total_fasta = SeqIO.parse(open(original_file,"rU"), "fasta") project_fasta = open(project_file,'w') project_fasta.close() project_fasta = open(project_file,'a') ## read in the csv file and get header names import csv table_normalized_otus = open(otu_table, 'rb') reader = csv.reader(table_normalized_otus, delimiter="\t") headers = reader.next() print headers writer = FastaWriter(project_fasta, wrap=None) writer.write_header() for records in total_fasta: # print records.name if records.name in headers: writer.write_record(records) writer.write_footer()
def write_fasta_output(fasta_output_file, filtered_seqs): handle = open(fasta_output_file, "w") writer = FastaWriter(handle) writer.write_file(filtered_seqs) handle.close()
for i in codes: z=[x.description for x in fa if i in x.description] if len(z)>0: new_name=df2[i] full_name=z[0] master_dict.update({full_name : new_name}) for i in fa: if i.description in master_dict.keys(): i.id=master_dict[i.description] i.description="" ## Write temporary file handle = open('temp.fa', "w") writer = FastaWriter(handle, wrap=0) writer.write_file(fa) handle.close() ## Read in temporary file and print properly formatted fasta x = open("temp.fa", "r") y=x.readlines() z=''.join(y) if z[-1]=='\n': z=z[:-1] print (z) os.remove("temp.fa")