Пример #1
0
            try:
                record = load_genbank(seq_dir+filename)
            except IOError:
                print "failed to load Genbank file"
                break
            except Exception:
                print "failed to handle Genbank file"
                break
            else:
                print "...",
                seq_format = 'gbk'

        elif filename.find(".fas") > 0:
            # process fasta (for mfas, load first record)
            try:
                record = load_fasta(seq_dir+filename)
            except IOError:
                print "failed to load Fasta file as single-record file"
                break
            except Exception:
                try:
                    record = load_multifasta(seq_dir+filename)[0]
                except IOError:
                    print "failed to load Fasta file as multi-record file"
                    break
                except Exception:
                    print "failed to handle Fasta file"
                    break
            print "...",
            seq_format = 'fas'
Пример #2
0
            try:
                record = load_genbank(seq_dir + filename)
            except IOError:
                print "failed to load Genbank file"
                break
            except Exception:
                print "failed to handle Genbank file"
                break
            else:
                print "...",
                seq_format = 'gbk'

        elif filename.find(".fas") > 0:
            # process fasta (for mfas, load first record)
            try:
                record = load_fasta(seq_dir + filename)
            except IOError:
                print "failed to load Fasta file as single-record file"
                break
            except Exception:
                try:
                    record = load_multifasta(seq_dir + filename)[0]
                except IOError:
                    print "failed to load Fasta file as multi-record file"
                    break
                except Exception:
                    print "failed to handle Fasta file"
                    break
            print "...",
            seq_format = 'fas'
Пример #3
0
## script to combine several fasta sequences into a single one in a specific order

from sys import argv
from libs.common import load_fasta, write_genbank
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio.Alphabet import generic_dna

origin_dir = "data/"+argv[1]+"/"
destin_file = origin_dir+argv[2]+".gbk"
base_name = argv[3]

# adapt this part
order = [(7,0), (17,1), (15,0), (16,1), (11,0), (6,0), (5,0), (1,0), (2,0), (3,0), (9,0), (13,0), (10,0), (4,0), (8,0), (12,0)]

filename = origin_dir+base_name+str(order[0][0])+".fas"
record = load_fasta(filename)

if order[0][1]:
    record = record.reverse_complement()
    c_note = '_RC'
else:
    c_note = ''

space_loc = FeatureLocation(0, len(record.seq))
quals = {'locus_tag': 'ctg_'+str(order[0][0])+c_note}
feature = SeqFeature(location=space_loc, type='contig',
                     id='ctg_'+str(order[0][0])+c_note, qualifiers=quals)
record.features.append(feature)

c_note = ''
Пример #4
0
    query = line[0]
    subject = line[1]

    print subject

    rev_flag = False
    if line[8] < line[9]:
        q_start, q_stop = line[8] - 1, line[9]
        rev_flag = False
    else:
        q_start, q_stop = line[9] - 1, line[8]
        rev_flag = True

    c_start, c_stop = q_start - capture_span, q_stop + capture_span

    master_seq = load_fasta("data/contigs_fas/" + subject + ".fas")

    if c_start < 0:
        c_start = 0
    if c_stop > len(master_seq.seq):
        c_stop = len(master_seq.seq)

    seq_bit = master_seq[c_start:c_stop]

    if rev_flag:
        seq_bit = seq_bit.reverse_complement()
    record = SeqRecord(id=subject, seq=seq_bit.seq, description=descript)
    records.append(record)

    rec_file = ctx_dir + subject + "_" + query + "_ctxt.fas"
    write_fasta(rec_file, record)
Пример #5
0
## script to combine several fasta files into a single one

import re
from sys import argv
from libs.common import from_dir, load_fasta, load_genbank, write_fasta

origin_dir = "data/" + argv[1]
destin_file = origin_dir + "/" + argv[2] + ".fas"
file_ext = argv[3]

filenames = from_dir(origin_dir, re.compile(r'.*\.' + file_ext))

records = []

for filename in filenames:
    # load record
    if file_ext == 'fas':
        records.append(load_fasta(origin_dir + "/" + filename))
    elif file_ext == 'gbk':
        records.append(load_genbank(origin_dir + "/" + filename))

    print filename

write_fasta(destin_file, records)
Пример #6
0
## script to convert sets of fasta files to genbank format

import re
from sys import argv
from libs.common import from_dir, ensure_dir, load_fasta, write_genbank
from Bio.Alphabet import generic_dna

origin_dir = "data/"+argv[1]
destin_dir = "data/"+argv[2]+"/"

ensure_dir([destin_dir])

filenames = from_dir(origin_dir, re.compile(r'.*\.fas.*'))

for filename in filenames:
    rec_name = filename[:filename.find('.fas')]
    record = load_fasta(origin_dir+"/"+filename)

    # make a genbank file of the contig
    gbk_file = "".join([destin_dir, rec_name, ".gbk"])
    record.name = rec_name
    record.id = rec_name
    record.seq.alphabet = generic_dna
    write_genbank(gbk_file, record)

Пример #7
0
## script to combine several fasta files into a single one

import re
from sys import argv
from libs.common import from_dir, load_fasta, load_genbank, write_fasta

origin_dir = "data/"+argv[1]
destin_file = origin_dir+"/"+argv[2]+".fas"
file_ext = argv[3]

filenames = from_dir(origin_dir, re.compile(r'.*\.'+file_ext))

records = []

for filename in filenames:
    # load record
    if file_ext == 'fas':
        records.append(load_fasta(origin_dir+"/"+filename))
    elif file_ext == 'gbk':
        records.append(load_genbank(origin_dir+"/"+filename))

    print filename

write_fasta(destin_file, records)
Пример #8
0
    query = line[0]
    subject = line[1]

    print subject

    rev_flag = False
    if line[8] < line[9]:
        q_start, q_stop = line[8]-1, line[9]
        rev_flag = False
    else:
        q_start, q_stop = line[9]-1, line[8]
        rev_flag = True

    c_start, c_stop = q_start-capture_span, q_stop+capture_span

    master_seq = load_fasta("data/contigs_fas/"+subject+".fas")

    if c_start < 0:
        c_start = 0
    if c_stop > len(master_seq.seq):
        c_stop = len(master_seq.seq)

    seq_bit = master_seq[c_start:c_stop]

    if rev_flag:
        seq_bit = seq_bit.reverse_complement()
    record = SeqRecord(id=subject, seq=seq_bit.seq, description=descript)
    records.append(record)

    rec_file = ctx_dir+subject+"_"+query+"_ctxt.fas"
    write_fasta(rec_file, record)
Пример #9
0
    while True:

        if genome['input'] == 'cgbk':
            print "ignoring cgbk file"
            break

        elif genome['input'] == 'gbk':
            try:
                records = [load_genbank(origin_file)]
            except IOError:
                print "failed to load file"
                break

        elif genome['input'] == 'fas':
            try:
                records = [load_fasta(origin_file)]
            except IOError:
                print "failed to load file"
                break

        elif genome['input'] == 'mfas':
            try:
                records = load_multifasta(origin_file)
            except IOError:
                print "failed to load file"
                break

        else:
            print "input not recognized"
            break
Пример #10
0
from sys import argv
from libs.common import load_fasta, write_fasta

origin_dir = "data/" + argv[1] + "/"
destin_file = origin_dir + argv[2] + ".fas"
base_name = argv[3]

# adapt this part
order = [(22, 0), (4, 0), (57, 1), (43, 1), (64, 0), (18, 0), (54, 0), (36, 1),
         (20, 1), (2, 1), (40, 1), (17, 1), (35, 1), (38, 1), (37, 1), (55, 1),
         (19, 1), (47, 1), (11, 0), (46, 0), (61, 0), (41, 1), (15, 0), (1, 1),
         (5, 1), (6, 0), (13, 1), (8, 0), (23, 0), (16, 1), (10, 0), (60, 0),
         (14, 0), (42, 0), (39, 0), (48, 0), (9, 1), (21, 0), (3, 1), (58, 1),
         (32, 0)]

filename = origin_dir + base_name + str(order[0][0]) + ".fas"
record = load_fasta(filename)
if order[0][1]:
    record = record.reverse_complement()

for index in order[1:]:
    filename = origin_dir + base_name + str(index[0]) + ".fas"
    new_rec = load_fasta(filename)
    if index[1]:
        new_rec = new_rec.reverse_complement()
    record += new_rec
    record += "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"

record.id = argv[2]

write_fasta(destin_file, record)