Пример #1
0
opt = dict()
with open(conf) as handle:
    for line in handle.readlines():
        if not line.startswith("//") and line.rstrip() is not "":
            val = line.split("=")
            opt[val[0]] = val[1].rstrip()

# filename intermediates
saileft = opt[base] + ".left.sai"
sairight = opt[base] + ".right.sai"
samfile = opt[base] + ".sam"
bamclean = opt[base] + ".clean.bam"
bamfile = opt[base] + ".bam"
bamindex = opt[base] + ".bai"
''' create objects from utility libraries '''
seq_obj = seq_util.SplitFile(opt[numseq])
bwa_obj = align_util.BWA(opt[prefix])
st_obj = samtool_util.Use()
''' split large fastq file into multiple smaller fastq files '''
left_files = seq_obj.split_fastq(opt[seqleft])
right_files = seq_obj.split_fastq(opt[seqright])
''' do alignments '''
rg = "@RG\tID:" + opt[rgid] + "\tSM:" + opt[rgsm] + "\tLB:" + opt[
    rglb] + "\tPL:" + opt[rgpl] + "\tPU:" + opt[rgpu]
samfiles = bwa_obj.multifastq_call(left_files, right_files, rg, opt[base])
''' groom sam files () '''
cmd = "groom_sam.py"

bamfiles = list()
jobids = list()
for eachfile in samfiles:
Пример #2
0
#!/usr/bin/env python

import seq_util
import sys

try:
	fastq = sys.argv[1]
	num_seq = sys.argv[2]
except IOError as (errno,strerror):
	print "usage: break_fastq.py fastq_file number_of_seqs_per_file"

seq_obj = seq_util.SplitFile(num_seq)
files = seq_obj.split_fastq(fastq)

for produce in files:
	print produce
Пример #3
0
import seq_util
import sys

lineperseq = 4

try:
    fastq = sys.argv[1]
    num_seq = sys.argv[2]

    # buffer length, default 100Mb
    try:
        buff_len = int(sys.argv[3])
    except:
        buff_len = 100000000

    # option to clobber file or leave there and skip
    try:
        noclobber = bool(sys.argv[4])  # if defined, translates to true
    except:
        noclobber = False

except IOError as (errno, strerror):
    print "usage: break_fastq.py fastq_file number_of_seqs_per_file buffer_length_bytes clobber_file (optional) "

seq_obj = seq_util.SplitFile(num_seq, lineperseq)
files = seq_obj.split_fastq_buffer(fastq, buff_len, noclobber)

for produce in files:
    print produce