Exemplo n.º 1
0
    def __init__(self, job_name, out_sh, directory, submit,
                 samtools_sort_args=''):
        # samtools_sort_args = cl.args['samtools_sort_args']
        # directory = cl.args['dir']

        # add final forward slash if it's not the current directory and it's
        # not the empty string. May cause bugs if your current directory is
        # the base directory '/', but I'm not too worried about that :)
        if directory != '' and not directory.endswith('/'):
            directory += '/'

        for sam in glob('%s*.sam' % directory):
            bam = sam.replace('.sam', '.bam')

            qsub_commands = []

            # Samtools view flags:
            # '-b': output is BAM
            # '-S': input is SAM
            qsub_commands.append('samtools view -bS %s > %s' % (sam, bam))

            sorted_prefix = bam.replace('.bam', '.sorted')
            qsub_commands.append('samtools sort %s %s %s' %
                                 (samtools_sort_args, bam, sorted_prefix))

            # submitter_prefix = 'sam2bam_sort_index_%s' % (bam)
            # submitter_sh = submitter_prefix + '.sh'
            sub = qtools.Submitter(queue_type='PBS',
                                   sh_filename=out_sh,
                                   commands=qsub_commands,
                                   job_name=job_name, nodes=1, ppn=16,
                                   queue='home')
            sub.write_sh(submit=submit)
Exemplo n.º 2
0
    def __init__(self,
                 gtf,
                 gff,
                 index_dir,
                 job_name,
                 out_sh,
                 submit,
                 index_gff_py='index_gff.py'):
        gtf_or_gff = gtf if gtf is not None else gff

        # qs.add_Q_resource('-l', 'bigmem')
        # qs.add_Q_resource('-l', 'h_vmem=30G')
        # qs.add_Q_resource('-e', submitter_err)
        # qs.add_Q_resource('-o', submitter_out)

        commands = []

        if gtf is not None:
            gff = gtf.replace('gtf', 'gff')
            gtf2gff = 'gtf2gff3.pl %s > %s' % (gtf, gff)
            commands.append(gtf2gff)

        command = '%s --index %s %s' % (index_gff_py, gff, index_dir)
        commands.append(command)

        qs = qtools.Submitter(queue_type='PBS',
                              sh_filename=out_sh,
                              commands=commands,
                              job_name=job_name,
                              nodes=1,
                              ppn=1,
                              queue='home',
                              walltime='0:30:00')
        qs.write_sh(submit=submit)
Exemplo n.º 3
0
    def __init__(self,
                 sample_info_file,
                 submit=True,
                 out_sh=None,
                 queue_type=None):
        sInfo = pd.read_table(sample_info_file)

        cmds = []

        for row, dat in sInfo.iterrows():
            id = dat['Sample ID']
            bam = dat['Bam File']
            try:
                species = dat['Species']
            except:
                species = "hg19"

            try:
                strand = dat['Strand']
                assert strand in ['flip', 'sense',
                                  'both']  #antisense, sense, run both
            except:
                strand = 'both'

            out = id + ".splices"

            if (strand == 'sense') or (strand == 'both'):
                oldsplice_command = "oldsplice.py -b %s -s %s -o %s --splice_type SE --splice_type MXE --processors 16" % (
                    bam, species, out)
                cmds.append(oldsplice_command)
            if (strand == 'flip') or (strand == 'both'):
                oldsplice_command = "oldsplice.py -f -b %s -s %s -o %s --splice_type SE --splice_type MXE --processors 16" % (
                    bam, species, out.replace(".splices", ".flip.splices"))
                cmds.append(oldsplice_command)

        sh_filename = 'runOldsplice.sh' if out_sh is None else out_sh
        sub = qtools.Submitter(array=True,
                               queue="home",
                               nodes=1,
                               commands=cmds,
                               sh_filename=sh_filename,
                               job_name="oldsplice",
                               max_running=1000,
                               ppn=16,
                               queue_type=queue_type)
        sub.job(submit=submit)
Exemplo n.º 4
0
from gscripts import qtools

import pandas as pd

import sys


sInfo = pd.read_table(sys.argv[1])

cmds = []

Sub = qtools.Submitter()
for row, dat in sInfo.iterrows():
    id = dat['Sample ID']
    bam = dat['Bam File']
    try:
        species = dat['Species']
        if species == "hg19":
            if "gff" in dat:
                gff = dat['gff']
            else:
                gff = "/home/yeo-lab/genomes/hg19/miso_annotations/SE.hg19.gff3"
        elif species == "panTro4":
            if 'gff' in dat:
                gff = dat['gff']
            else:
                gff = '/oasis/tscc/scratch/mlovci/apeSeq/genomes/all_genomes/annotations/chimp/events/commonshortest/SE.panTro4.gff3'
'

        else:
            raise ValueError("I don't know where species %s's gff file is" % (dat['Species']))