try: record = load_genbank(seq_dir+filename) except IOError: print "failed to load Genbank file" break except Exception: print "failed to handle Genbank file" break else: print "...", seq_format = 'gbk' elif filename.find(".fas") > 0: # process fasta (for mfas, load first record) try: record = load_fasta(seq_dir+filename) except IOError: print "failed to load Fasta file as single-record file" break except Exception: try: record = load_multifasta(seq_dir+filename)[0] except IOError: print "failed to load Fasta file as multi-record file" break except Exception: print "failed to handle Fasta file" break print "...", seq_format = 'fas'
try: record = load_genbank(seq_dir + filename) except IOError: print "failed to load Genbank file" break except Exception: print "failed to handle Genbank file" break else: print "...", seq_format = 'gbk' elif filename.find(".fas") > 0: # process fasta (for mfas, load first record) try: record = load_fasta(seq_dir + filename) except IOError: print "failed to load Fasta file as single-record file" break except Exception: try: record = load_multifasta(seq_dir + filename)[0] except IOError: print "failed to load Fasta file as multi-record file" break except Exception: print "failed to handle Fasta file" break print "...", seq_format = 'fas'
## script to combine several fasta sequences into a single one in a specific order from sys import argv from libs.common import load_fasta, write_genbank from Bio.SeqFeature import SeqFeature, FeatureLocation from Bio.Alphabet import generic_dna origin_dir = "data/"+argv[1]+"/" destin_file = origin_dir+argv[2]+".gbk" base_name = argv[3] # adapt this part order = [(7,0), (17,1), (15,0), (16,1), (11,0), (6,0), (5,0), (1,0), (2,0), (3,0), (9,0), (13,0), (10,0), (4,0), (8,0), (12,0)] filename = origin_dir+base_name+str(order[0][0])+".fas" record = load_fasta(filename) if order[0][1]: record = record.reverse_complement() c_note = '_RC' else: c_note = '' space_loc = FeatureLocation(0, len(record.seq)) quals = {'locus_tag': 'ctg_'+str(order[0][0])+c_note} feature = SeqFeature(location=space_loc, type='contig', id='ctg_'+str(order[0][0])+c_note, qualifiers=quals) record.features.append(feature) c_note = ''
query = line[0] subject = line[1] print subject rev_flag = False if line[8] < line[9]: q_start, q_stop = line[8] - 1, line[9] rev_flag = False else: q_start, q_stop = line[9] - 1, line[8] rev_flag = True c_start, c_stop = q_start - capture_span, q_stop + capture_span master_seq = load_fasta("data/contigs_fas/" + subject + ".fas") if c_start < 0: c_start = 0 if c_stop > len(master_seq.seq): c_stop = len(master_seq.seq) seq_bit = master_seq[c_start:c_stop] if rev_flag: seq_bit = seq_bit.reverse_complement() record = SeqRecord(id=subject, seq=seq_bit.seq, description=descript) records.append(record) rec_file = ctx_dir + subject + "_" + query + "_ctxt.fas" write_fasta(rec_file, record)
## script to combine several fasta files into a single one import re from sys import argv from libs.common import from_dir, load_fasta, load_genbank, write_fasta origin_dir = "data/" + argv[1] destin_file = origin_dir + "/" + argv[2] + ".fas" file_ext = argv[3] filenames = from_dir(origin_dir, re.compile(r'.*\.' + file_ext)) records = [] for filename in filenames: # load record if file_ext == 'fas': records.append(load_fasta(origin_dir + "/" + filename)) elif file_ext == 'gbk': records.append(load_genbank(origin_dir + "/" + filename)) print filename write_fasta(destin_file, records)
## script to convert sets of fasta files to genbank format import re from sys import argv from libs.common import from_dir, ensure_dir, load_fasta, write_genbank from Bio.Alphabet import generic_dna origin_dir = "data/"+argv[1] destin_dir = "data/"+argv[2]+"/" ensure_dir([destin_dir]) filenames = from_dir(origin_dir, re.compile(r'.*\.fas.*')) for filename in filenames: rec_name = filename[:filename.find('.fas')] record = load_fasta(origin_dir+"/"+filename) # make a genbank file of the contig gbk_file = "".join([destin_dir, rec_name, ".gbk"]) record.name = rec_name record.id = rec_name record.seq.alphabet = generic_dna write_genbank(gbk_file, record)
## script to combine several fasta files into a single one import re from sys import argv from libs.common import from_dir, load_fasta, load_genbank, write_fasta origin_dir = "data/"+argv[1] destin_file = origin_dir+"/"+argv[2]+".fas" file_ext = argv[3] filenames = from_dir(origin_dir, re.compile(r'.*\.'+file_ext)) records = [] for filename in filenames: # load record if file_ext == 'fas': records.append(load_fasta(origin_dir+"/"+filename)) elif file_ext == 'gbk': records.append(load_genbank(origin_dir+"/"+filename)) print filename write_fasta(destin_file, records)
query = line[0] subject = line[1] print subject rev_flag = False if line[8] < line[9]: q_start, q_stop = line[8]-1, line[9] rev_flag = False else: q_start, q_stop = line[9]-1, line[8] rev_flag = True c_start, c_stop = q_start-capture_span, q_stop+capture_span master_seq = load_fasta("data/contigs_fas/"+subject+".fas") if c_start < 0: c_start = 0 if c_stop > len(master_seq.seq): c_stop = len(master_seq.seq) seq_bit = master_seq[c_start:c_stop] if rev_flag: seq_bit = seq_bit.reverse_complement() record = SeqRecord(id=subject, seq=seq_bit.seq, description=descript) records.append(record) rec_file = ctx_dir+subject+"_"+query+"_ctxt.fas" write_fasta(rec_file, record)
while True: if genome['input'] == 'cgbk': print "ignoring cgbk file" break elif genome['input'] == 'gbk': try: records = [load_genbank(origin_file)] except IOError: print "failed to load file" break elif genome['input'] == 'fas': try: records = [load_fasta(origin_file)] except IOError: print "failed to load file" break elif genome['input'] == 'mfas': try: records = load_multifasta(origin_file) except IOError: print "failed to load file" break else: print "input not recognized" break
from sys import argv from libs.common import load_fasta, write_fasta origin_dir = "data/" + argv[1] + "/" destin_file = origin_dir + argv[2] + ".fas" base_name = argv[3] # adapt this part order = [(22, 0), (4, 0), (57, 1), (43, 1), (64, 0), (18, 0), (54, 0), (36, 1), (20, 1), (2, 1), (40, 1), (17, 1), (35, 1), (38, 1), (37, 1), (55, 1), (19, 1), (47, 1), (11, 0), (46, 0), (61, 0), (41, 1), (15, 0), (1, 1), (5, 1), (6, 0), (13, 1), (8, 0), (23, 0), (16, 1), (10, 0), (60, 0), (14, 0), (42, 0), (39, 0), (48, 0), (9, 1), (21, 0), (3, 1), (58, 1), (32, 0)] filename = origin_dir + base_name + str(order[0][0]) + ".fas" record = load_fasta(filename) if order[0][1]: record = record.reverse_complement() for index in order[1:]: filename = origin_dir + base_name + str(index[0]) + ".fas" new_rec = load_fasta(filename) if index[1]: new_rec = new_rec.reverse_complement() record += new_rec record += "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" record.id = argv[2] write_fasta(destin_file, record)