Пример #1
0
def start_ab(args, logger):
   '''Perform alignment and bam processing'''
   
   import os
   import subprocess
   
   final_bam = args.outbam
   
   # initialize library file from given arguments
   library = genobox_modules.initialize_library(args.libfile, args.se, args.pe1, args.pe2, args.sample, args.mapq, args.libs, args.pl)
   
   # start run
   if args.sample:
      print "--------------------------------------"
      print "Processing sample: %s" % args.sample
   print "--------------------------------------"
      
   print "Starting alignment"
   (bamfiles, library) = start_alignment(args, logger)
   print "Starting bam processing"
   final_bam = start_bamprocess(library, genobox_modules.unique(bamfiles.values()), args.mapq, args.libs, args.tmpdir, args.queue, final_bam, args.realignment, args.known, args.fa, args.sample, args.partition, logger)
   
   # remove queuing system outfiles
   genobox_modules.rm_files(['run_genobox_*', 'semaphores.*'])
   
   print "Done"
   print "--------------------------------------"
Пример #2
0
def start_ab(args, logger):
    '''Perform alignment and bam processing'''

    import os
    import subprocess

    final_bam = args.outbam

    # initialize library file from given arguments
    library = genobox_modules.initialize_library(args.libfile, args.se,
                                                 args.pe1, args.pe2,
                                                 args.sample, args.mapq,
                                                 args.libs, args.pl)

    # start run
    if args.sample:
        print "--------------------------------------"
        print "Processing sample: %s" % args.sample
    print "--------------------------------------"

    print "Starting alignment"
    (bamfiles, library) = start_alignment(args, logger)
    print "Starting bam processing"
    final_bam = start_bamprocess(library,
                                 genobox_modules.unique(bamfiles.values()),
                                 args.mapq, args.libs, args.tmpdir, args.queue,
                                 final_bam, args.realignment, args.known,
                                 args.fa, args.sample, args.partition, logger)

    # remove queuing system outfiles
    genobox_modules.rm_files(['run_genobox_*', 'semaphores.*'])

    print "Done"
    print "--------------------------------------"
Пример #3
0
def clean(f):
   '''Clean up tmp and raw files'''
   
   import genobox_modules
   import os
      
   # finding files to delete
   f_base = os.path.split(f)[1]
   f_base = f_base.replace('.raw.vcf.gz', '')
   f_base = 'tmp/tmp.' + f_base
   files_to_delete = []
   files_to_delete.append(f_base+'.header.vcf')
   files_to_delete.append(f_base+'.indels.pass.vcf')
   files_to_delete.append(f_base+'.indels.pass.vcf.idx')
   files_to_delete.append(f_base+'.raw.vcf.gz.indels.vcf')
   files_to_delete.append(f_base+'.raw.vcf.gz.indels.vcf.idx')
   files_to_delete.append(f_base+'.raw.vcf.gz.ref.vcf')
   files_to_delete.append(f_base+'.raw.vcf.gz.ref.vcf.idx')
   files_to_delete.append(f_base+'.raw.vcf.gz.snps.vcf')
   files_to_delete.append(f_base+'.raw.vcf.gz.snps.vcf.idx')
   files_to_delete.append(f_base+'.ref.pass.vcf')
   files_to_delete.append(f_base+'.ref.pass.vcf.idx')
   files_to_delete.append(f_base+'.snps.pass.vcf')
   files_to_delete.append(f_base+'.snps.pass.vcf.idx')
   
   # deleting files
   genobox_modules.rm_files(files_to_delete)
Пример #4
0
def start_gv(args, logger):
    '''Perform alignment and bam processing'''

    import os
    import subprocess

    genobox_modules.check_genome(args.genome)
    final_bcf = 'genotyping/%s.all.bcf' % args.sample

    # start run
    if args.sample:
        print "--------------------------------------"
        print "Processing sample: %s" % args.sample
    print "--------------------------------------"

    if args.caller == 'samtools':
        print "Starting genotyping (samtools)"
        final_bcf = start_genotyping(args.bam, args.genome, args.fa,
                                     args.prior, args.pp, args.queue,
                                     final_bcf, args.sample, args.partition,
                                     logger)

        print "Starting vcffiltering"
        final_vcf = start_vcffilter(final_bcf, args.genome, args.caller,
                                    args.Q, args.ex, args.rmsk, args.ab,
                                    args.prune, args.ovar, args.queue,
                                    args.sample, args.partition, logger)

        print "Start dbsnp"
        final_dbsnp_vcf = start_dbsnp(final_vcf, args.ex, args.dbsnp,
                                      args.ovar, args.queue, args.partition,
                                      logger)

        print "Start bcf2ref"
        start_bcf2ref(final_bcf, args.genome, args.Q, args.ex, args.dbsnp,
                      args.rmsk, 'genotyping/indels_for_filtering.vcf',
                      args.oref, args.queue, args.sample, args.partition,
                      logger)

    elif args.caller == 'gatk':
        print "Start genotyping (gatk)"
        vcffiles = start_genotyping_gatk(args.bam, args.genome, args.fa,
                                         args.dbsnp, args.call_conf,
                                         args.call_emit, args.output_mode,
                                         args.queue, args.sample,
                                         args.partition, logger)

        print "Start vcffiltering (gatk)"
        final_vcfs = start_vcffilter_gatk(vcffiles, args.genome, args.fa,
                                          args.Q, args.rmsk, args.ab,
                                          args.prune, args.queue, args.sample,
                                          args.partition, logger)

    # remove queuing system outfiles
    genobox_modules.rm_files(['run_genobox_*', 'semaphores.*'])

    print "Done"
    print "--------------------------------------"
Пример #5
0
def start_genotyping(bam, chr, fa, prior, pp, queue, o, sample, partition, logger):
   '''Starts genotyping using samtools of input bam file'''
   
   import subprocess
   import genobox_modules
   from genobox_classes import Moab
   from genobox_classes import Semaphore   
   import os
   
   if not os.path.exists('genotyping'):
      os.makedirs('genotyping')
   
   # set queueing
   paths = genobox_modules.setSystem()
   home = os.getcwd()
   cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=172800'
   cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=172800'
   cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=172800'
   cpuF = 'nodes=1:ppn=2,mem=2gb,walltime=172800'
   cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=172800'
   
   # create calls
   bamindex_calls = bam_index(bam)
   (mpileup_calls, bcffiles) = mpileup(bam, chr, fa, prior, pp)
   bcfcombine_calls = bcf_combine(bcffiles, o)
   bcfindex_calls = bcf_index(o)
   consensus_calls = consensus(o, sample)
   
   # submit jobs #
   print "Submitting jobs"   
   bamindex_moab = Moab(bamindex_calls, logfile=logger, runname='run_genobox_bamindex', queue=queue, cpu=cpuC, partition=partition)
   mpileup_moab = Moab(mpileup_calls, logfile=logger, runname='run_genobox_mpileup', queue=queue, cpu=cpuF, depend=True, depend_type='expand', depend_val=[len(mpileup_calls)], depend_ids=bamindex_moab.ids, partition=partition)
   bcfcombine_moab = Moab(bcfcombine_calls, logfile=logger, runname='run_genobox_bcfcombine', queue=queue, cpu=cpuC, depend=True, depend_type='conc', depend_val=[len(mpileup_calls)], depend_ids=mpileup_moab.ids, partition=partition)
   bcfindex_moab = Moab(bcfindex_calls, logfile=logger, runname='run_genobox_bcfindex', queue=queue, cpu=cpuC, depend=True, depend_type='one2one', depend_val=[1], depend_ids=bcfcombine_moab.ids, partition=partition)
   #consensus_moab = Moab(consensus_calls, logfile=logger, runname='run_genobox_consensus', queue=queue, cpu=cpuA, depend=True, depend_type='one2one', depend_val=[1], depend_ids=bcfcombine_moab.ids, partition=partition)
   
   # release jobs #
   print "Releasing jobs"
   #bamindex_moab.release()
   #mpileup_moab.release()
   #bcfcombine_moab.release()
   #bcfindex_moab.release()
   #consensus_moab.release()
      
   # semaphore (consensus is currently not waited for)
   print "Waiting for jobs to finish ..."
   s = Semaphore(bcfindex_moab.ids, home, 'genotyping', queue, 20, 2*86400)
   s.wait()
   print "--------------------------------------"
   
   # remove temporary files
   genobox_modules.rm_files(bcffiles)
   
   # return output bcf
   return o
Пример #6
0
def start_gv(args, logger):
   '''Perform alignment and bam processing'''
   
   import os
   import subprocess
   
   genobox_modules.check_genome(args.genome)
   final_bcf = 'genotyping/%s.all.bcf' % args.sample
   
   # start run
   if args.sample:
      print "--------------------------------------"
      print "Processing sample: %s" % args.sample
   print "--------------------------------------"
     
   if args.caller == 'samtools':
      print "Starting genotyping (samtools)"
      final_bcf = start_genotyping(args.bam, args.genome, args.fa, args.prior, args.pp, args.queue, final_bcf, args.sample, args.partition, logger)
      
      print "Starting vcffiltering"
      final_vcf = start_vcffilter(final_bcf, args.genome, args.caller, args.Q, args.ex, args.rmsk, args.ab, args.prune, args.ovar, args.queue, args.sample, args.partition, logger)
      
      print "Start dbsnp"
      final_dbsnp_vcf = start_dbsnp(final_vcf, args.ex, args.dbsnp, args.ovar, args.queue, args.partition, logger)
      
      print "Start bcf2ref"
      start_bcf2ref(final_bcf, args.genome, args.Q, args.ex, args.dbsnp, args.rmsk, 'genotyping/indels_for_filtering.vcf', args.oref, args.queue, args.sample, args.partition, logger)
   
   elif args.caller ==  'gatk':
      print "Start genotyping (gatk)"
      vcffiles = start_genotyping_gatk(args.bam, args.genome, args.fa, args.dbsnp, args.call_conf, args.call_emit, args.output_mode, args.queue, args.sample, args.partition, logger)
      
      print "Start vcffiltering (gatk)"
      final_vcfs = start_vcffilter_gatk(vcffiles, args.genome, args.fa, args.Q, args.rmsk, args.ab, args.prune, args.queue, args.sample, args.partition, logger)
   
   # remove queuing system outfiles
   genobox_modules.rm_files(['run_genobox_*', 'semaphores.*'])
   
   print "Done"
   print "--------------------------------------"
Пример #7
0
files['filterAll'] = 'genotyping/tmp.all.bcf.%s.flt.vcf.gz' % args.chr
files['filterAll_tbi'] = 'genotyping/tmp.all.bcf.%s.flt.vcf.gz.tbi' % args.chr
files['dbsnp_ann'] = 'genotyping/tmp.all.bcf.%s.flt.ann.vcf.gz' % args.chr
files['rmsk'] = 'genotyping/tmp.all.bcf.%s.flt.ann.nr.vcf.gz' % args.chr
files['indel_filt'] = 'genotyping/tmp.indel_filtered.%s.vcf' % args.chr

# vcf_filter_All
vcf_filterAll(args.bcf, args.chr_id, args.d, args.D, args.Q, args.ex, files['filterAll'])

# tabix
vcf_tabix(files['filterAll'])

# dbsnp
vcf_annotate_dbsnp(files['filterAll'], args.dbsnp, files['dbsnp_ann'])

# rmsk filtering
if args.chr.find('MT') > -1:
   # if chromosome short name is chrMT or MT run manual filtering for MT only
   manual_rmsk_filter(files['dbsnp_ann'], args.chr, args.rmsk, files['rmsk'])
else:
   # filter for rmsk using BEDtools
   vcf_filter_rmsk(files['dbsnp_ann'], args.rmsk, files['rmsk'])

# indel filter
vcf_filter_indels(files['rmsk'], args.chr, args.indels, files['indel_filt'], args.o)

# remove tmp files
genobox_modules.rm_files(files.values())


Пример #8
0
def start_abgv(args, logger):
    '''Start alignment, bam processing, genotyping, vcffiltering, dbsnp annotation, bcf2ref'''

    import os
    import subprocess

    # check genome file
    genobox_modules.check_genome(args.genome)

    final_bam = 'alignment/%s.flt.sort.rmdup.bam' % args.sample
    final_bcf = 'genotyping/%s.all.bcf' % args.sample

    # initialize library file from given arguments
    library = genobox_modules.initialize_library(args.libfile, args.se,
                                                 args.pe1, args.pe2,
                                                 args.sample, args.mapq,
                                                 args.libs, args.pl)

    # start run
    if args.sample:
        print "--------------------------------------"
        print "Processing sample: %s" % args.sample
    print "--------------------------------------"

    # toggle start trimming
    #if args.no_trim == False:
    #   print "Starting trimming"
    #   (se_files, pe1_files, pe2_files) = start_trim(args, logger)
    #   library.update(Trim=se_files+pe1_files+pe2_files)

    print "Starting alignment"
    (bamfiles, library) = start_alignment(args, logger)

    print "Starting bam processing"
    final_bam = start_bamprocess(library,
                                 genobox_modules.unique(bamfiles.values()),
                                 args.mapq, args.libs, args.tmpdir, args.queue,
                                 final_bam, args.realignment, args.known,
                                 args.fa, args.sample, args.partition, logger)

    print "Starting bam stats"
    start_bamstats(args, final_bam, args.partition, logger, wait=False)

    print "Starting genotyping"
    if args.caller == 'samtools':
        final_bcf = start_genotyping(final_bam, args.genome, args.fa,
                                     args.prior, args.pp, args.queue,
                                     final_bcf, args.sample, args.partition,
                                     logger)
        print "Starting vcffiltering"
        final_vcf = start_vcffilter(final_bcf, args.genome, args.caller,
                                    args.Q, args.ex, args.rmsk, args.ab,
                                    args.prune, args.ovar, args.queue,
                                    args.sample, args.partition, logger)
        print "Start dbsnp"
        final_dbsnp_vcf = start_dbsnp(final_vcf, args.ex, args.dbsnp,
                                      args.ovar, args.queue, args.partition,
                                      logger)
        print "Start bcf2ref"
        start_bcf2ref(final_bcf, args.genome, args.Q, args.ex, args.dbsnp,
                      args.rmsk, 'genotyping/indels_for_filtering.vcf',
                      args.oref, args.queue, args.sample, args.partition,
                      logger)
    elif args.caller == 'gatk':
        print "Start genotyping (gatk)"
        vcffiles = start_genotyping_gatk(final_bam, args.genome, args.fa,
                                         args.dbsnp, args.call_conf,
                                         args.args.call_emit, args.output_mode,
                                         args.queue, args.sample,
                                         args.partition, logger)
        print "Start vcffiltering (gatk)"
        final_vcfs = start_vcffilter_gatk(vcffiles, args.genome, args.fa,
                                          args.Q, args.rmsk, args.ab,
                                          args.prune, args.queue, args.sample,
                                          args.partition, args.logger)

    # remove queuing system outfiles
    genobox_modules.rm_files(['run_genobox_*', 'semaphores.*'])

    print "Done"
    print "--------------------------------------"
    print "Raw genotyping is written in genotyping/all.bcf"
    print "High confidence variants: %s" % args.ovar
    print "High confidence reference: %s" % args.oref
    print "--------------------------------------"
Пример #9
0
def start_abgv(args, logger):
    """Start alignment, bam processing, genotyping, vcffiltering, dbsnp annotation, bcf2ref"""

    import os
    import subprocess

    # check genome file
    genobox_modules.check_genome(args.genome)

    final_bam = "alignment/%s.flt.sort.rmdup.bam" % args.sample
    final_bcf = "genotyping/%s.all.bcf" % args.sample

    # initialize library file from given arguments
    library = genobox_modules.initialize_library(
        args.libfile, args.se, args.pe1, args.pe2, args.sample, args.mapq, args.libs, args.pl
    )

    # start run
    if args.sample:
        print "--------------------------------------"
        print "Processing sample: %s" % args.sample
    print "--------------------------------------"

    # toggle start trimming
    # if args.no_trim == False:
    #   print "Starting trimming"
    #   (se_files, pe1_files, pe2_files) = start_trim(args, logger)
    #   library.update(Trim=se_files+pe1_files+pe2_files)

    print "Starting alignment"
    (bamfiles, library) = start_alignment(args, logger)

    print "Starting bam processing"
    final_bam = start_bamprocess(
        library,
        genobox_modules.unique(bamfiles.values()),
        args.mapq,
        args.libs,
        args.tmpdir,
        args.queue,
        final_bam,
        args.realignment,
        args.known,
        args.fa,
        args.sample,
        args.partition,
        logger,
    )

    print "Starting bam stats"
    start_bamstats(args, final_bam, args.partition, logger, wait=False)

    print "Starting genotyping"
    if args.caller == "samtools":
        final_bcf = start_genotyping(
            final_bam,
            args.genome,
            args.fa,
            args.prior,
            args.pp,
            args.queue,
            final_bcf,
            args.sample,
            args.partition,
            logger,
        )
        print "Starting vcffiltering"
        final_vcf = start_vcffilter(
            final_bcf,
            args.genome,
            args.caller,
            args.Q,
            args.ex,
            args.rmsk,
            args.ab,
            args.prune,
            args.ovar,
            args.queue,
            args.sample,
            args.partition,
            logger,
        )
        print "Start dbsnp"
        final_dbsnp_vcf = start_dbsnp(final_vcf, args.ex, args.dbsnp, args.ovar, args.queue, args.partition, logger)
        print "Start bcf2ref"
        start_bcf2ref(
            final_bcf,
            args.genome,
            args.Q,
            args.ex,
            args.dbsnp,
            args.rmsk,
            "genotyping/indels_for_filtering.vcf",
            args.oref,
            args.queue,
            args.sample,
            args.partition,
            logger,
        )
    elif args.caller == "gatk":
        print "Start genotyping (gatk)"
        vcffiles = start_genotyping_gatk(
            final_bam,
            args.genome,
            args.fa,
            args.dbsnp,
            args.call_conf,
            args.args.call_emit,
            args.output_mode,
            args.queue,
            args.sample,
            args.partition,
            logger,
        )
        print "Start vcffiltering (gatk)"
        final_vcfs = start_vcffilter_gatk(
            vcffiles,
            args.genome,
            args.fa,
            args.Q,
            args.rmsk,
            args.ab,
            args.prune,
            args.queue,
            args.sample,
            args.partition,
            args.logger,
        )

    # remove queuing system outfiles
    genobox_modules.rm_files(["run_genobox_*", "semaphores.*"])

    print "Done"
    print "--------------------------------------"
    print "Raw genotyping is written in genotyping/all.bcf"
    print "High confidence variants: %s" % args.ovar
    print "High confidence reference: %s" % args.oref
    print "--------------------------------------"
Пример #10
0
paths = genobox_modules.setSystem()
home = os.getcwd()

# get genome file
genome = get_genome(args.genome)

# perform varfilter to get filtered vcf
vcf_files = bcf2varfilter(args.bcf, genome, args.Q, 'genotyping/tmp.flt.')

# combine to one file
cat_vcfs(vcf_files, 'genotyping/tmp.flt.all.vcf')

# remove in annotated repeat (rmsk)
vcf_filter_rmsk('genotyping/tmp.flt.all.vcf', args.rmsk, 'genotyping/tmp.flt.all.rmsk.vcf')

# filter haploid chromosomes for heterozygote calls
vcf_filter_haploid('genotyping/tmp.flt.all.rmsk.vcf', genome, 'genotyping/tmp.flt.all.rmsk.hetfilt.vcf')

# filter for allelic balance
vcf_filter_allelic_balance('genotyping/tmp.flt.all.rmsk.hetfilt.vcf', args.ab, args.caller, 'genotyping/tmp.flt.all.rmsk.hetfilt.abfilt.vcf')

# pruning of nearby calls
vcf_filter_prune('genotyping/tmp.flt.all.rmsk.hetfilt.abfilt.vcf', args.prune, args.o)

# write indels for filtering of reference calls
write_indels_for_filtering(args.o, args.ex)

# remove temporary files
genobox_modules.rm_files(['genotyping/tmp.flt*'])

Пример #11
0
files = {}
files["filterAll"] = "genotyping/tmp.all.bcf.%s.flt.vcf.gz" % args.chr
files["filterAll_tbi"] = "genotyping/tmp.all.bcf.%s.flt.vcf.gz.tbi" % args.chr
files["dbsnp_ann"] = "genotyping/tmp.all.bcf.%s.flt.ann.vcf.gz" % args.chr
files["rmsk"] = "genotyping/tmp.all.bcf.%s.flt.ann.nr.vcf.gz" % args.chr
files["indel_filt"] = "genotyping/tmp.indel_filtered.%s.vcf" % args.chr

# vcf_filter_All
vcf_filterAll(args.bcf, args.chr_id, args.d, args.D, args.Q, args.ex, files["filterAll"])

# tabix
vcf_tabix(files["filterAll"])

# dbsnp
vcf_annotate_dbsnp(files["filterAll"], args.dbsnp, files["dbsnp_ann"])

# rmsk filtering
if args.chr.find("MT") > -1:
    # if chromosome short name is chrMT or MT run manual filtering for MT only
    manual_rmsk_filter(files["dbsnp_ann"], args.chr, args.rmsk, files["rmsk"])
else:
    # filter for rmsk using BEDtools
    vcf_filter_rmsk(files["dbsnp_ann"], args.rmsk, files["rmsk"])

# indel filter
vcf_filter_indels(files["rmsk"], args.chr, args.indels, files["indel_filt"], args.o)

# remove tmp files
genobox_modules.rm_files(files.values())
Пример #12
0
genome = get_genome(args.genome)

# perform varfilter to get filtered vcf
vcf_files = bcf2varfilter(args.bcf, genome, args.Q, 'genotyping/tmp.flt.')

# combine to one file
cat_vcfs(vcf_files, 'genotyping/tmp.flt.all.vcf')

# remove in annotated repeat (rmsk)
vcf_filter_rmsk('genotyping/tmp.flt.all.vcf', args.rmsk,
                'genotyping/tmp.flt.all.rmsk.vcf')

# filter haploid chromosomes for heterozygote calls
vcf_filter_haploid('genotyping/tmp.flt.all.rmsk.vcf', genome,
                   'genotyping/tmp.flt.all.rmsk.hetfilt.vcf')

# filter for allelic balance
vcf_filter_allelic_balance('genotyping/tmp.flt.all.rmsk.hetfilt.vcf', args.ab,
                           args.caller,
                           'genotyping/tmp.flt.all.rmsk.hetfilt.abfilt.vcf')

# pruning of nearby calls
vcf_filter_prune('genotyping/tmp.flt.all.rmsk.hetfilt.abfilt.vcf', args.prune,
                 args.o)

# write indels for filtering of reference calls
write_indels_for_filtering(args.o, args.ex)

# remove temporary files
genobox_modules.rm_files(['genotyping/tmp.flt*'])
Пример #13
0
#!/panvol1/simon/bin/python2.7

from genobox_modules import rm_files
import subprocess
import os

rm_files([
    'run_genobox_velveth.*', 'run_genobox_velvetg.*',
    'run_genobox_interleave.*', '*.interleaved', 'pbsjob.tmp*',
    'run_genobox_velvetaccept.*', 'run_mlst_trim.*'
])

if os.path.exists('trimmed'):
    subprocess.call('rm -r trimmed/', shell=True)
Пример #14
0
#!/panvol1/simon/bin/python2.7

from genobox_modules import rm_files
import subprocess
import os

rm_files(['run_genobox_velveth.*', 'run_genobox_velvetg.*', 'run_genobox_interleave.*', '*.interleaved', 'pbsjob.tmp*', 'run_genobox_velvetaccept.*', 'run_mlst_trim.*'])

if os.path.exists('trimmed'):
   subprocess.call('rm -r trimmed/', shell=True)
Пример #15
0
def start_genotyping(bam, chr, fa, prior, pp, queue, o, sample, partition,
                     logger):
    '''Starts genotyping using samtools of input bam file'''

    import subprocess
    import genobox_modules
    from genobox_classes import Moab
    from genobox_classes import Semaphore
    import os

    if not os.path.exists('genotyping'):
        os.makedirs('genotyping')

    # set queueing
    paths = genobox_modules.setSystem()
    home = os.getcwd()
    cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=172800'
    cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=172800'
    cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=172800'
    cpuF = 'nodes=1:ppn=2,mem=2gb,walltime=172800'
    cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=172800'

    # create calls
    bamindex_calls = bam_index(bam)
    (mpileup_calls, bcffiles) = mpileup(bam, chr, fa, prior, pp)
    bcfcombine_calls = bcf_combine(bcffiles, o)
    bcfindex_calls = bcf_index(o)
    consensus_calls = consensus(o, sample)

    # submit jobs #
    print "Submitting jobs"
    bamindex_moab = Moab(bamindex_calls,
                         logfile=logger,
                         runname='run_genobox_bamindex',
                         queue=queue,
                         cpu=cpuC,
                         partition=partition)
    mpileup_moab = Moab(mpileup_calls,
                        logfile=logger,
                        runname='run_genobox_mpileup',
                        queue=queue,
                        cpu=cpuF,
                        depend=True,
                        depend_type='expand',
                        depend_val=[len(mpileup_calls)],
                        depend_ids=bamindex_moab.ids,
                        partition=partition)
    bcfcombine_moab = Moab(bcfcombine_calls,
                           logfile=logger,
                           runname='run_genobox_bcfcombine',
                           queue=queue,
                           cpu=cpuC,
                           depend=True,
                           depend_type='conc',
                           depend_val=[len(mpileup_calls)],
                           depend_ids=mpileup_moab.ids,
                           partition=partition)
    bcfindex_moab = Moab(bcfindex_calls,
                         logfile=logger,
                         runname='run_genobox_bcfindex',
                         queue=queue,
                         cpu=cpuC,
                         depend=True,
                         depend_type='one2one',
                         depend_val=[1],
                         depend_ids=bcfcombine_moab.ids,
                         partition=partition)
    #consensus_moab = Moab(consensus_calls, logfile=logger, runname='run_genobox_consensus', queue=queue, cpu=cpuA, depend=True, depend_type='one2one', depend_val=[1], depend_ids=bcfcombine_moab.ids, partition=partition)

    # release jobs #
    print "Releasing jobs"
    #bamindex_moab.release()
    #mpileup_moab.release()
    #bcfcombine_moab.release()
    #bcfindex_moab.release()
    #consensus_moab.release()

    # semaphore (consensus is currently not waited for)
    print "Waiting for jobs to finish ..."
    s = Semaphore(bcfindex_moab.ids, home, 'genotyping', queue, 20, 2 * 86400)
    s.wait()
    print "--------------------------------------"

    # remove temporary files
    genobox_modules.rm_files(bcffiles)

    # return output bcf
    return o