def write(f): i = 0 for seqrecord in sequences: if seqrecord.id == "<unknown id>": seqrecord.id = str(i) i+=1 writer = FastaWriter(f) writer.write_file(sequences) f.flush() #IMPORTANT
def write_fasta(sequence, file_handle, wrap=60): """ :param sequence: sequence to write in the file :type sequence: :class:`Bio.SeqRecord.SeqRecord` object :param file_handle: output file handler :type file_handle: """ _LOGGER.info("Writing output to " + file_handle.name + "...") writer = FastaWriter(file_handle, wrap=wrap) writer.write_file(sequence)
def handle_noargs(self, **options): outfilename = options['outfile'] outfileh = open(outfilename, 'w') print "Fetching records." records = Protein.objects.all() seqs = self._records_to_seqs(records) print "Writing records to %s" % outfilename writer = FastaWriter(outfileh, record2title=lambda x: x.id) writer.write_file(seqs) outfileh.close() print "Done."
def _write(self, file, value): """ Write output to fasta file :param folder: file and location of outputfile :param value: :return: """ handle = open(file, "w") writer = FastaWriter(handle, wrap=None) writer.write_file(value) handle.close()
def split_files(fasta_file): """This next section removes line wraps, so I can split the file without interrupting a gene""" from Bio.SeqIO.FastaIO import FastaWriter output_handle = open("nowrap.fasta", "w") seqrecords=[ ] writer = FastaWriter(output_handle, wrap=0) for record in SeqIO.parse(open(fasta_file), "fasta"): seqrecords.append(record) writer.write_file(seqrecords) output_handle.close() """I can always make the number of lines an alterable field""" subprocess.check_call("split -l 200000 nowrap.fasta", shell=True)
def split_files(fasta_file): """This next section removes line wraps, so I can split the file without interrupting a gene""" from Bio.SeqIO.FastaIO import FastaWriter output_handle = open("nowrap.fasta", "w") seqrecords=[ ] writer = FastaWriter(output_handle, wrap=0) for record in SeqIO.parse(open(fasta_file), "fasta"): seqrecords.append(record) writer.write_file(seqrecords) output_handle.close() """I can always make the number of lines an alterable field""" subprocess.check_call("split -l 200000 nowrap.fasta", shell=True)
def write_by_og(self, output_folder): ''' Write for each og all the mapped sequences into separate fasta files to a specified folder :param output_folder: folder where files should be stored ''' if not os.path.exists(output_folder): os.makedirs(output_folder) for key, value in tqdm(self.og_records.items(), desc="Writing DNA seq sorted by OG", unit=" OG"): handle = open(os.path.join(output_folder, 'mapped_' + key + '.fa'), "w") writer = FastaWriter(handle, wrap=None) writer.write_file(value) handle.close()
def write_select_og_dna(self): ''' Write for each species all the DNA sequences into separate fasta files :param output_folder: folder where files should be stored ''' output_folder = os.path.join(self.args.output_path, "reference_ogs_dna") if not os.path.exists(output_folder): os.makedirs(output_folder) for key, value in tqdm(self.ogs.items(), desc="Writing OGs sorted by species", unit=" species"): handle = open(os.path.join(output_folder, key + '.fa'), "w") writer = FastaWriter(handle, wrap=None) writer.write_file(value.dna) handle.close() elif len(self.ogs_dna_by_species) == len(glob.glob(os.path.join(output_folder, '*.fa'))): print('Folder with files already exists and will not be overwritten.')
""" Remove unpaired reads from a fasta file. This script can be used for the case that unpaired reads (e.g. as reads were removed during quality trimming) in a pair of fasta files from paired-end sequencing need to be removed. """ import argparse from Bio import SeqIO from Bio.SeqIO.FastaIO import FastaWriter parser = argparse.ArgumentParser() parser.add_argument("fasta_file_to_filter") parser.add_argument("reference_fasta_file") parser.add_argument("--output_fasta", default="output.fa") args = parser.parse_args() # Read reference file header reference_headers = {} for seq_record in SeqIO.parse(args.reference_fasta_file, "fasta"): reference_headers[seq_record.id.split()[0]] = 1 # Read fasta file to filter and write output with open(args.output_fasta, 'w') as output_fh: writer = FastaWriter(output_fh, wrap=0) writer.write_file( filter(lambda seq_record: seq_record.id.split()[0] in reference_headers, SeqIO.parse(args.fasta_file_to_filter, "fasta")))
def write_fasta_output(fasta_output_file, filtered_seqs): handle = open(fasta_output_file, "w") writer = FastaWriter(handle) writer.write_file(filtered_seqs) handle.close()
def write_dna(self, species, output_folder): handle = open(os.path.join(output_folder, species + '_OGs.fa'), "w") writer = FastaWriter(handle, wrap=None) writer.write_file(self.dna) handle.close()
z=[x.description for x in fa if i in x.description] if len(z)>0: new_name=df2[i] full_name=z[0] master_dict.update({full_name : new_name}) for i in fa: if i.description in master_dict.keys(): i.id=master_dict[i.description] i.description="" ## Write temporary file handle = open('temp.fa', "w") writer = FastaWriter(handle, wrap=0) writer.write_file(fa) handle.close() ## Read in temporary file and print properly formatted fasta x = open("temp.fa", "r") y=x.readlines() z=''.join(y) if z[-1]=='\n': z=z[:-1] print (z) os.remove("temp.fa") ########## BIN
def select_from_small_file(args): inp_file, db_inp_file, db_out_file, out_file, num = args inp = list(SeqIO.parse(open(inp_file), 'fasta')) shuffle(inp) writer = FastaWriter(open(out_file, 'w'), wrap=0) writer.write_file(inp[:num])