Пример #1
0
	new_headers = headers
	new_seqs = seqs
	for append_fname in options.append_fasta:
		if not os.path.isfile(append_fname):
			raise IOError("# Error: file {} does not exist".format(append_fname))
		with open(append_fname,'r') as inf:
			# Read a FASTA file?
			(app_headers, app_seqs) = biofile.readFASTA(inf)
			info_outs.write("# Read {:d} sequences\n".format(len(app_seqs)))
			assert len(app_seqs) == len(new_seqs)
			if options.check_headers:
				for (h1, h2) in zip(headers, app_headers):
					assert h1==h2, "# Error: headers do not match:\n\t{}\n\t{}".format(h1,h2)
			for (i,s) in enumerate(new_seqs):
				new_seqs[i] += app_seqs[i]

	# Write output
	biofile.writeFASTA(new_seqs, fasta_outs, headers=new_headers)
	n_written = len(new_seqs)

	# Write out stopping time
	data_outs.write("# Run finished {}\n".format(util.timestamp()))

	# Shut down output
	if not options.fasta_out_fname is None:
		info_outs.write("# Wrote {} entries to {}\n".format(n_written, options.fasta_out_fname))
		outf.close()

	
 
Пример #2
0
    if local_fname is None:
        local_fname = "tmp{:d}".format(random.randint(0, 1e20))
    # Fetch file from OrthoDB
    if not options.orthodb_id is None:
        #local_fname = "uniprot-yeast.txt"
        remote_fname = "http://cegg.unige.ch/orthodb7/fasta.fasta?ogs={:s}".format(
            options.orthodb_id)
        urllib.urlretrieve(remote_fname, local_fname)
        print "# Downloaded {} to {}".format(remote_fname, local_fname)
        info_outs.write("# Downloaded {} to {}\n".format(
            remote_fname, local_fname))

    # Read input
    if not os.path.isfile(local_fname):
        raise IOError("# Error: file {} does not exist".format(local_fname))
    with open(local_fname, 'r') as inf:
        # Read a FASTA file?
        (headers, seqs) = biofile.readFASTA(inf)
        if options.fasta_out_fname is None:
            # Write data
            biofile.writeFASTA(seqs, fasta_outs, headers=headers)

    # Write out stopping time
    info_outs.write("# Run finished {}\n".format(util.timestamp()))

    # Shut down output
    if not options.fasta_out_fname is None:
        info_outs.write("# Fetched {} sequences to {}\n".format(
            len(headers), options.fasta_out_fname))
        outf.close()
Пример #3
0
    new_headers = headers
    new_seqs = seqs
    for append_fname in options.append_fasta:
        if not os.path.isfile(append_fname):
            raise IOError(
                "# Error: file {} does not exist".format(append_fname))
        with open(append_fname, 'r') as inf:
            # Read a FASTA file?
            (app_headers, app_seqs) = biofile.readFASTA(inf)
            info_outs.write("# Read {:d} sequences\n".format(len(app_seqs)))
            assert len(app_seqs) == len(new_seqs)
            if options.check_headers:
                for (h1, h2) in zip(headers, app_headers):
                    assert h1 == h2, "# Error: headers do not match:\n\t{}\n\t{}".format(
                        h1, h2)
            for (i, s) in enumerate(new_seqs):
                new_seqs[i] += app_seqs[i]

    # Write output
    biofile.writeFASTA(new_seqs, fasta_outs, headers=new_headers)
    n_written = len(new_seqs)

    # Write out stopping time
    data_outs.write("# Run finished {}\n".format(util.timestamp()))

    # Shut down output
    if not options.fasta_out_fname is None:
        info_outs.write("# Wrote {} entries to {}\n".format(
            n_written, options.fasta_out_fname))
        outf.close()
Пример #4
0
                opt_seq = ''
                for (aai, aa) in enumerate(prot_seq):
                    #opt_seq += opt_codon_dict[aa] #random.choice(codons[aa])
                    codons_to_choose_from = codons[aa]
                    # If avoiding codons and we have a choice, eliminate the avoided codon.
                    if options.avoid_sequence and len(
                            codons_to_choose_from) > 1:
                        try:
                            codons_to_choose_from.remove(orig_codons[aai])
                        except ValueError:  # codon to be avoided not among codon choices anyway
                            pass
                    opt_seq += random.choice(codons_to_choose_from)
                assert translate.translate(opt_seq) == prot_seq
                header_line = "{0} Fop = {1:.4f}, CAI = {2:.4f}, GC = {3:.2f}".format(
                    id, cai.getFop(opt_seq, opt_codons), cai_fxn(opt_seq),
                    cai.getGC(opt_seq))
                info_outs.write("# Optimized {}\n".format(header_line))
                opt_headers.append(header_line)
                opt_seqs.append(opt_seq)
        out_seqs = opt_seqs
        out_headers = opt_headers
        biofile.writeFASTA(out_seqs, data_outs, headers=out_headers)
    elif options.reverse_translate:  # Write out sequences but don't optimize
        (out_headers, out_seqs) = zip(*seqs)
        biofile.writeFASTA(out_seqs, data_outs, headers=out_headers)

    if not options.out_fname is None:
        info_outs.write("# Wrote {0} optimized sequences to {1}\n".format(
            len(opt_seqs), options.out_fname))
        outf.close()
Пример #5
0
    # Iterate over tree and write out FASTA in tree-sorted order
    n_written = 0
    sorted_headers = []
    sorted_seqs = []
    for indiv in tree.get_terminals():
        #print(indiv.name)
        #spec = extractSpeciesName(h)
        spec = indiv.name.strip()
        spec.replace('\\', '')
        #print("^{}$\n".format(spec))
        if spec in header_dict:
            hdr = header_dict[spec]
            if options.generate_short_ids:
                hdr = "{} {}".format(short_species_names[spec], hdr)
            seq = seq_dict[spec]
            sorted_headers.append(hdr)
            sorted_seqs.append(seq)
        else:
            #print(dir(indiv))
            info_outs.write("# Can't find {}\n".format(spec))

    if not options.fasta_out_fname is None:
        fasta_outs = util.OutStreams(
            open(os.path.expanduser(options.fasta_out_fname), 'w'))
        biofile.writeFASTA(sorted_seqs, fasta_outs, headers=sorted_headers)
        info_outs.write("# Wrote {} entries to {}\n".format(
            len(sorted_seqs), options.fasta_out_fname))

    # Write out stopping time
    data_outs.write("# Run finished {}\n".format(util.timestamp()))
Пример #6
0
		info_outs.write("# No starting position or sequence given; nothing to do. Exiting\n")

	new_headers = []
	new_seqs = []
	for (h,seq) in zip(headers,seqs):
		if not options.exclude:
			ex_seq = seq[start_index:end_index]
		else: # Exclude the sequence
			#assert options.end_aa < len(seq)
			#assert options.begin_aa < options.end_aa
			ex_seq = seq[0:start_index] + seq[end_index:]
		#degapped_seq = seq.replace(gap,"")
		new_seqs.append(ex_seq)
		new_headers.append(h)
	seqs = new_seqs
	headers = new_headers

	# Write output
	biofile.writeFASTA(seqs, fasta_outs, headers=headers)
	n_written = len(seqs)

	# Write out stopping time
	data_outs.write("# Run finished {}\n".format(util.timestamp()))

	# Shut down output
	if not options.fasta_out_fname is None:
		info_outs.write("# Wrote {} entries to {}\n".format(n_written, options.fasta_out_fname))
		outf.close()

	
Пример #7
0
					codons[aa] = [c for c in translate.getCodonsForAA(aa, rna=False) if relad_dict[c] >= options.min_rel_adapt]
				opt_seq = ''
				for (aai, aa) in enumerate(prot_seq):
					#opt_seq += opt_codon_dict[aa] #random.choice(codons[aa])
					codons_to_choose_from = codons[aa]
					# If avoiding codons and we have a choice, eliminate the avoided codon.
					if options.avoid_sequence and len(codons_to_choose_from)>1:
						try:
							codons_to_choose_from.remove(orig_codons[aai])
						except ValueError: # codon to be avoided not among codon choices anyway
							pass
					opt_seq += random.choice(codons_to_choose_from)
				assert translate.translate(opt_seq) == prot_seq
				header_line = "{0} Fop = {1:.4f}, CAI = {2:.4f}, GC = {3:.2f}".format(id, cai.getFop(opt_seq, opt_codons), cai_fxn(opt_seq), cai.getGC(opt_seq))
				info_outs.write("# Optimized {}\n".format(header_line))
				opt_headers.append(header_line)
				opt_seqs.append(opt_seq)
		out_seqs = opt_seqs
		out_headers = opt_headers
		biofile.writeFASTA(out_seqs, data_outs, headers=out_headers)
	elif options.reverse_translate: # Write out sequences but don't optimize
		(out_headers,out_seqs) = zip(*seqs)
		biofile.writeFASTA(out_seqs, data_outs, headers=out_headers)


	if not options.out_fname is None:
		info_outs.write("# Wrote {0} optimized sequences to {1}\n".format(len(opt_seqs), options.out_fname))
		outf.close()