示例#1
0
def runQsubCommand(job, input_parameters, bash_config, debug):

    # initialize debug
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: runQsubCommand():'])
    if debug: debug_list.append(['-'])

    # initialize qsub parameters
    job_id_name = {}
    script_path = os.path.join(input_parameters['qsub_dir_path'],
                               '%s.sh' % job)
    input_parameters['qs_out_name'] = os.path.join(
        input_parameters['qsub_dir_path'], '%s.qout' % job)

    # generate bash script
    BASH = makeBashScript(bash_config)
    call = BASH % input_parameters
    if debug: debug_list.append(['BASH SCRIPT:\n%s' % call])

    # save bash script
    with open(script_path, 'w') as tmp_script:
        tmp_script.write(call)

    # run qsub command and get job name
    os.system('qsub -N %s %s' % (job, script_path))

    # remove bash script
    if input_parameters['save_bash_call'].lower() not in ['save', 'true']:
        os.remove(script_path)

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)
示例#2
0
def getQuantCombinations(job_requests, fastq_ref, input_parameters, debug):

    # initialize debug
    debug_list = []
    format = '{:<15}'
    if debug: debug_list.append(['def: getQuantCombinations():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['combo', 'list'])
    if debug: debug_list.append(['-'])

    # initialize parameters
    fastq_combos = {}
    align_fq_comb = {}
    align_jobs = []
    quant_jobs = []

    # get possible alignment and quantification jobs
    for job in job_requests.keys():
        if re.search(r'_align$', job):
            align_jobs.append(re.sub(r'_align$', '', job))
        if re.search(r'_quant$', job): quant_jobs.append(job)

    # get possible fastq combinations
    # transforms comb job list into
    print("%s Status: Make fastq combinations ..." % (getTime()))
    for combi_str in filter(None,
                            input_parameters['quant_comb_list'].split(';')):
        fq_combi = ''
        combi_list = []
        for combi in sorted(filter(None, combi_str.split(','))):
            # make fq combi key
            fq_combi = '%s_%s' % (fq_combi, combi)
            # save fastq names into ordered list (according to fq_combi) from -> fastq_ref[j] = [ref_name, num_reads]
            combi_list.append(fastq_ref[combi][0])
        # get final fq_combi entry into fastq_combos -> fastq_combos['_fq1_fq2_fq3'] = ['fq_name_1','fq_name_2','fq_name_3']
        if debug: debug_list.append([fq_combi, combi_list])
        fastq_combos[fq_combi] = combi_list
    print("%s Combinations: %s" % (getTime(), fastq_combos.keys()))

    # get possible aligner + fastq_combo combinations
    if debug: debug_list.append(['-'])
    print("%s Status: Make aligner-fastq combinations ..." % (getTime()))
    for aligner in align_jobs:
        for fq_combi in fastq_combos.keys():
            aln_fq = '%s%s' % (aligner, fq_combi)
            # get fastq - aligner combination -> align_fq_comb['ALIGNTOOL1_fq1_fq2_fq3'] = ['fq_name_1','fq_name_2','fq_name_3']
            if debug: debug_list.append([aln_fq, fastq_combos[fq_combi]])
            align_fq_comb[aln_fq] = fastq_combos[fq_combi]
    print("%s Combinations: %s" % (getTime(), align_fq_comb.keys()))

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return
    return align_fq_comb
示例#3
0
def makeBashConfig(job_list, input_parameters, job_config_data, debug):

    # initialize debug
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: makeBashConfig():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['job', 'script'])
    if debug: debug_list.append(['-'])

    # initialize bash config
    bash_config = {}

    # save bash config
    for job in job_list.keys():
        if job_list[job] in ['request', 'ask', 'new']:
            # get job config for requested job
            job_config = job_config_data[job]
            # save bash pre and seq config
            config = {}
            print("%s Status: Create \'bash_pre_script\' for \'%s\' ..." %
                  (getTime(), job))
            config['bash_pre_script'] = job_config['bash_pre_script']
            print("%s Status: Create \'bash_seq_script\' for \'%s\' ..." %
                  (getTime(), job))
            config['bash_seq_script'] = job_config['bash_seq_script']
            # go through 'script_order' list and save script command
            print("%s Status: Create \'bash_main_script\' for \'%s\' ..." %
                  (getTime(), job))
            script = ''
            for order_item in job_config['script_order']:
                if order_item != 'options':
                    script += job_config[order_item] + ' '
                else:
                    for option in job_config['script_options']:
                        if input_parameters[option]:
                            script += job_config[option] + ' '
            print('%s Script: %s' % (getTime(), script))
            if debug: debug_list.append([job, script])
            # save bash main script
            config['bash_main_script'] = script
            # save config for job into bash config
            bash_config[job] = config

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return bash config
    return bash_config
示例#4
0
def getJobRequests(possible_jobs, input_parameters, debug):

    # initialize debug
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: getJobRequests():'])
    if debug: debug_list.append(['-'])

    # job dict
    job_requests = {}

    # top job type
    job_base = input_parameters['only_make_job'].lower(
    ) if input_parameters['only_make_job'].lower() in [
        'index', 'align', 'quant'
    ] else 'quant'

    # get aligner and quantifier
    if debug: debug_list.append(['job combi', 'job possible'])
    if debug: debug_list.append(['-'])
    for job_request in filter(None,
                              input_parameters['arq_job_list'].split(',')):
        job_types = ['index', 'align', 'quant']
        while job_base in job_types:
            job_type = job_types.pop(0)
            job_ext = '%s_%s' % (job_request.upper(), job_type)
            job_possible = True if job_ext in possible_jobs.keys() else False
            if debug: debug_list.append([job_ext, '%s' % job_possible])
            if job_possible:
                job_requests[job_ext] = ''

    # get job dependencies
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['job', 'job_requests'])
    if debug: debug_list.append(['-'])
    for job_key in job_requests.keys():
        job_requests = getDependency(possible_jobs, job_requests, job_key, [])
        if debug: debug_list.append([job_key, job_requests.keys()])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return all job requests
    return job_requests
示例#5
0
def getArgsFastq(fastq_links, input_parameters, debug):

    # initialize parameters
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: getArgsFastq():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['ref_name', 'fastq_path', 'fastq_num'])
    if debug: debug_list.append(['-'])
    SEQ = True if input_parameters['seq_style_fq'].lower() in ['seq', 'true'
                                                               ] else False

    # open fastq links file and save links to {<ref_name>:<fastq_path>} dictionary
    fastq_files = {}
    fastq_ref = {}
    first = True if SEQ else False
    ref_name, fastq_path, num_reads = ('', ) * 3
    with open(fastq_links, 'r') as fastq:
        for i, line in enumerate(fastq, 1):
            # skip header
            if not first:
                line = line.strip()
                try:
                    if not SEQ:
                        ref_name, fastq_path = line.split('\t')
                    else:
                        ref_name, ref_tube, bc_name, bs_seq, lane_id, num_reads, fq_path, fq_name = line.split(
                            '\t')
                        if input_parameters['test_call']:
                            fq_path = '%s/test_files/' % input_parameters[
                                'arq_path']
                        fastq_path = '%s%s' % (fq_path, fq_name)
                except:
                    # print debug if an error occurs
                    if debug: printDebug(debug_list, format)
                    print(
                        "Error: Fastq links file \'%s\' is invalid in line \'%s\'! Can't read \'%s\'."
                        % (fastq_links, i, line))
                    sys.exit()
                j = ('%s' % i) if not SEQ else ('%s' % (i - 1))
                if debug: debug_list.append([ref_name, fastq_path, j])
                # chech if fastq file exists
                if not os.path.isfile("%s" % (fastq_path)):
                    if debug: printDebug(debug_list, format)
                    print(
                        "Error: No fastq file named \'%s\' in \'%s\' line \'%s\'!"
                        % (fastq_path, fastq_links, i))
                    sys.exit()
                # save ref name and fastq link to dictionary
                fastq_files[ref_name] = fastq_path
                fastq_ref[j] = [ref_name, num_reads]
            first = False

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return dictionary with fastq links and list with fastq data
    return fastq_files, fastq_ref
示例#6
0
def checkArgsProject(project, debug):

    # initialize parameters
    proj_dir_prefix = ''
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: checkArgsProject():'])
    if debug: debug_list.append(['-'])

    # check if project has a file path
    proj_path = os.path.dirname(project)
    proj_base = os.path.basename(project)
    if debug: debug_list.append(['proj_path', proj_path])
    if debug: debug_list.append(['proj_base', proj_base])
    if not proj_base:
        print("Error: Project name not defined properly!" % ())
        sys.exit()
    elif proj_path and not os.path.isdir(proj_path):
        print("Error: \'%s\' has a path, but \'%s\' is not a directory!" %
              (proj_base, proj_path))
        sys.exit()
    # ask user if this file path should be used as prefix
    elif proj_path and os.path.isdir(proj_path):
        print("%s Warning: \'%s\' has the path \'%s\'!" %
              (getTime(), proj_base, proj_path))
        print(
            "%s Warning: This will overwrite all other \'PROJ_DIR_PREFIX\' variables."
            % (getTime()))
        user = userCall("Continue? y[es] / n[o]", True)
        proj_dir_prefix = proj_path

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return arguments
    return proj_dir_prefix, proj_base
示例#7
0
def getArgsConfig(config, input_parameters, debug):

    # initialize parameters
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: getArgsConfig():'])
    if debug: debug_list.append(['-'])
    if debug:
        debug_list.append(
            ['config param', 'vaiable name', 'config data', 'terminal data'])
    if debug: debug_list.append(['-'])

    # read config file and validate parameters
    with open(config, 'r') as cFile:
        for i, line in enumerate(cFile, 1):
            line = line.strip()
            # skip comment lines
            if not line or re.match(r'^\#', line):
                continue
            try:
                conf_param, conf_data = line.split('\t')
            except:
                # print debug if an error occurs
                if debug: printDebug(debug_list, format)
                print(
                    "Error: Config file line \'%s\' is invalid! Can't read \'%s\'."
                    % (i, line))
                sys.exit()
            # get mandatory parameters
            valid_conf_param = False
            for param, data in input_parameters.iteritems():
                # only save conf_data if data is empty
                if conf_param.lower() == param:
                    if debug:
                        debug_list.append([conf_param, param, conf_data, data])
                    if not data: input_parameters[param] = conf_data
                    valid_conf_param = True
                    break
            if not valid_conf_param:
                # print debug if an error occurs
                if debug: printDebug(debug_list, format)
                print(
                    "Error: Parameter \'%s\' in your config file is not supported!"
                    % (conf_param))
                sys.exit()

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return parameters
    return input_parameters
示例#8
0
def checkArguments(input_parameters, debug):

    # initialize parameters
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: checkArguments():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['param', 'data'])
    if debug: debug_list.append(['-'])

    # check all parsed arguments in the input_parameters dictionary
    for param, data in input_parameters.iteritems():
        # save date if debug option is active
        if debug: debug_list.append([param, data])
        # check for files
        if param in [
                'fasta_gen_file', 'gtf_index_file', 'fastq1_links',
                'fastq2_links'
        ]:
            if not os.path.isfile("%s" % (data)):
                if debug: printDebug(debug_list, format)
                print("Error: No \'%s\' file named \'%s\'!" % (param, data))
                sys.exit()
        # check for tool directories
        elif param and re.match(r'tool_dir_', param):
            if not os.path.isdir("%s" % (data)):
                if debug: printDebug(debug_list, format)
                print("Error: No \'%s\' directory named \'%s\'!" %
                      (param, data))
                sys.exit()
        else:
            ''' TODO: check other arguments '''
            pass

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)
示例#9
0
def makeMainDirectory(input_parameters, debug):

    # initialize parameters
    project_directory = input_parameters['proj_dir_path']
    progress_data = []
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: makeMainDirectory():'])
    if debug: debug_list.append(['-'])

    # directory list
    dir_list = ['index', 'align', 'quant', 'qsub', 'fastq', 'comb']
    # save project directories
    input_parameters['index_dir_path'] = '%s/indexes' % (project_directory)
    input_parameters['align_dir_path'] = '%s/alignments/%s' % (
        project_directory, input_parameters['data_set_name'])
    input_parameters['quant_dir_path'] = '%s/quantifications/%s' % (
        project_directory, input_parameters['data_set_name'])
    input_parameters['qsub_dir_path'] = '%s/qsub_data' % (project_directory)
    input_parameters['fastq_dir_path'] = '%s/fastq_data' % (project_directory)
    input_parameters['comb_dir_path'] = '%s/comb_data/%s' % (
        project_directory, input_parameters['data_set_name'])

    # make new project folders
    new_project = False
    if not os.path.isdir(project_directory):
        # main project folder
        print("%s Status: Create \'%s\' main folder ..." %
              (getTime(), input_parameters['p_name_suffix']))
        os.mkdir(project_directory)
        new_project = True
    # make arq data directories
    if not new_project:
        print("%s Warning: \'%s\' already exists! Checking progress ..." %
              (getTime(), project_directory))
    # make data folders
    if debug: debug_list.append(['dir key', 'dir path'])
    if debug: debug_list.append(['-'])
    for data_type in dir_list:
        if not os.path.isdir(input_parameters['%s_dir_path' % data_type]):
            print("%s Status: Create \'%s\' folder ..." %
                  (getTime(), data_type))
            if debug:
                debug_list.append([
                    '%s_dir_path' % data_type,
                    input_parameters['%s_dir_path' % data_type]
                ])
            os.makedirs('%s' % (input_parameters['%s_dir_path' % data_type]))
        else:
            print("%s Warning: \'%s\' folder already exists ..." %
                  (getTime(), data_type))
    # remove qsub error file if exists
    if os.path.isfile('%s/qsub.error' % (input_parameters['qsub_dir_path'])):
        print("%s Status: Remove \'qsub.error\' file ..." % (getTime()))
        os.remove('%s/qsub.error' % (input_parameters['qsub_dir_path']))
    # check tool progress
    if not new_project:
        print("%s Status: Check tool progress ..." % (getTime()))
        for mainDirs in [
                input_parameters['index_dir_path'],
                input_parameters['align_dir_path'],
                input_parameters['quant_dir_path']
        ]:
            if debug: debug_list.append(['mainDirs', mainDirs])
            # list all contents and check if content is a directory
            for jobName in os.listdir(mainDirs):
                subDirs = '%s/%s' % (mainDirs, jobName)
                if debug: debug_list.append(['subDirs', subDirs])
                if os.path.isdir(subDirs):
                    # save progress into progress_data
                    print("%s Warning: Job " % getTime() +
                          '{:<15}'.format("\'%s\'" %
                                          (jobName)) + " ... is already done!")
                    progress_data.append(jobName)
                    if debug:
                        debug_list.append([
                            jobName,
                            True if jobName in progress_data else False
                        ])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return parameters
    return input_parameters, progress_data
示例#10
0
def makeToolDirectories(job_requests, input_parameters, job_type, debug):

    # initialize debug
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: makeToolDirectories():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['job', 'setting'])
    if debug: debug_list.append(['-'])

    # initiate tool job requests
    job_list = {}
    for job in job_requests.keys():
        # save all tool jobs to job_list
        if re.search(r'_%s$' % job_type, job):
            job_list[job] = job_requests[job]
            if debug: debug_list.append([job, job_list[job]])

    # declare tool job folders
    # job options: 'request', 'overwrite', 'skip', 'skipped', 'finished', 'required', 'ask', 'new'
    if debug: debug_list.append(['-'])
    for job in job_list.keys():
        job_dir_name = '%s_dir' % job
        job_folder = '%s/%s' % (input_parameters['%s_dir_path' % job_type],
                                job)
        input_parameters[job_dir_name] = job_folder
        if job_list[job] in ['request', 'overwrite', 'required', 'new']:
            # check if it is an alignment job and ask if all alignments should be redone
            if job_list[job] == 'overwrite' and job_type in ['align', 'quant']:
                print(
                    "%s Warning: You requested \'overwrite\' for job \'%s\'!" %
                    (getTime(), job))
                overwrite_all = userCall(
                    "Type: y[es] to remove all / n[o] to decide for each job part",
                    False)
                job_list[job] = 'overwrite' if overwrite_all else 'ask'
            # check for overwrite option and remove directory
            if job_list[job] == 'overwrite':
                removeJob(job_folder, [''], debug)
            # make new folder
            if not os.path.isdir(job_folder) and job_list[job] in [
                    'request', 'overwrite', 'new'
            ]:
                print("%s Status: Create \'%s\' folder ..." % (getTime(), job))
                os.mkdir(job_folder)
                if job_list[job] == 'overwrite': job_list[job] = 'request'
                if debug: debug_list.append([job, job_list[job]])
            elif not os.path.isdir(job_folder) and job_list[job] == 'required':
                if debug: printDebug(debug_list, format)
                print("Error: \'%s\' is required but doesn't exist!" %
                      (job_folder))
                sys.exit()
            elif os.path.isdir(job_folder) and job_list[job] in [
                    'required', 'ask', 'new'
            ]:
                pass
            else:
                if debug: printDebug(debug_list, format)
                print("Error: \'%s\' already exists!" % (job_folder))
                sys.exit()
        elif job_list[job] == 'skip':
            job_list[job] = 'skipped'
            if debug: debug_list.append([job, job_list[job]])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return index jobs
    return job_list, input_parameters
示例#11
0
def runQsubJobs(job_list, input_parameters, bash_config, align_files,
                fq1_files, fq2_files, job_type, debug):

    # initialize debug
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: runQsubJobs():'])
    if debug: debug_list.append(['-'])
    if debug and job_type == 'align':
        debug_list.append(['job', 'fastq', 'mode'])
    if debug and job_type == 'index': debug_list.append(['job', 'mode'])
    if debug and job_type == 'quant':
        debug_list.append(['job', 'combination', 'mode'])
    if debug: debug_list.append(['-'])

    # initialize parameters
    file_status = {}
    job_queue = []
    queues_empty = False
    q_running = False
    bowtie_running = True if 'BOWTIE_index' in job_list.keys() else False
    TOOLS_running = []
    error_file = False

    # make job queues
    print("%s Status: Create queue ..." % (getTime()))
    for job in job_list.keys():
        queue = []
        # save jobs directly when index is requested
        if job_type == 'index':
            if job_list[job] == 'request':
                job_queue.append(job)
        # save fastq jobs when align is requested or combi align jobs when quant is requested
        elif job_type in ['align', 'quant']:
            for file_name in job_list[job].keys():
                if job_list[job][file_name] in ['overwrite', 'request', 'new']:
                    queue.append(file_name)
            file_status[job] = queue
            job_queue.append(job)

    # run jobs
    print("%s Status: Run jobs ..." % (getTime()))
    while job_queue or q_running:
        # set user naming
        user_naming = ''
        # set q_running to True, because job_queue isn't empty
        q_running = True
        # print tool queue
        print('%s Queue: %s' % (getTime(), job_queue))
        # take first job from queue
        job = job_queue.pop(0) if job_queue else ''
        # start qsub for index jobs
        if job_type == 'index' and job:
            # append TOPHAT_index to queue if bowtie isn't done yet
            if job == 'TOPHAT_index' and bowtie_running:
                job_queue.append(job)
            # start index job
            else:
                if debug: debug_list.append([job, job_list[job]])
                runQsubCommand(job, input_parameters, bash_config[job], debug)
                job_list[job] = 'running'
                if debug: debug_list.append([job, job_list[job]])
                if (input_parameters['single_tool_job'].lower()
                        in ['single', 'true']):
                    TOOLS_running.append(job)
        elif job_type in ['align', 'quant'] and job:
            queue = file_status.pop(job) if file_status.keys() else []
            # print queue
            print('%s %s: %s' %
                  (getTime(), re.match(r'[A-Z]+', job).group(0), queue))
            # submit quantification or alignment jobs
            while queue and not (job in TOOLS_running):
                file_name = queue.pop(0)
                if debug:
                    debug_list.append(
                        [job, file_name, job_list[job][file_name]])
                if job_type == 'align':
                    input_parameters['fastq1_file'] = fq1_files[file_name]
                    input_parameters['fastq2_file'] = fq2_files[file_name]
                if job_type == 'quant':
                    """ TODO: get to work with combi alignments (kallisto) """
                    input_parameters['fastq1_file'] = fq1_files[
                        align_files[file_name][0]]
                    input_parameters['fastq2_file'] = fq2_files[
                        align_files[file_name][0]]
                    input_parameters['ALIGNER_align_dir'] = '%s/%s_align' % (
                        input_parameters['align_dir_path'],
                        file_name.partition('_')[0])
                    input_parameters['alignment_prefix'] = align_files[
                        file_name][0]
                    combi_string = ''
                    for align_name in align_files[file_name]:
                        combi_string = '%s%s/%s.bam ' % (
                            combi_string,
                            input_parameters['ALIGNER_align_dir'], align_name)
                    combi_string = combi_string.strip()
                    input_parameters['combi_align_list'] = combi_string
                    input_parameters['comb_out_prefix'] = file_name
                    if job in input_parameters['quant_name_list'].keys():
                        user_naming = '_%s' % input_parameters[
                            'quant_name_list'][job]
                    else:
                        user_naming = '_default'
                    input_parameters['quant_ref_file'] = '%s/%s/%s%s.ref' % (
                        input_parameters['quant_dir_path'], job, file_name,
                        user_naming)
                input_parameters['%s_out_prefix' %
                                 job_type] = '%s%s' % (file_name, user_naming)
                input_parameters['%s_out_dir' % job_type] = '%s/%s/%s%s' % (
                    input_parameters['%s_dir_path' % job_type], job, file_name,
                    user_naming)
                # start qsub for alignment and quantification jobs
                if job_list[job][file_name] in ['overwrite', 'request', 'new']:
                    if job_type == 'quant':
                        if len(align_files[file_name]) > 1:
                            makeCombAlign(input_parameters)
                            input_parameters['comb_in_file'] = '%s/%s.bam' % (
                                input_parameters['comb_dir_path'],
                                input_parameters['comb_out_prefix'])
                        else:
                            input_parameters['comb_in_file'] = '%s/%s.bam' % (
                                input_parameters['ALIGNER_align_dir'],
                                input_parameters['alignment_prefix'])
                        # create quantification parameter ref file
                        writeQuantRef(input_parameters, bash_config[job])
                    qsub_name = '%s-%s' % (
                        job, input_parameters['%s_out_prefix' % job_type])
                    runQsubCommand(qsub_name, input_parameters,
                                   bash_config[job], debug)
                    job_list[job][file_name] = 'running'
                    if debug:
                        debug_list.append(
                            [job, file_name, job_list[job][file_name]])
                # force only one job per tool
                if (input_parameters['single_tool_job'].lower()
                        in ['single', 'true']):
                    TOOLS_running.append(job)
            # save queue if it isn't empty (for when single_tool_job option is active)
            if queue:
                file_status[job] = queue
                job_queue.append(job)
        # print tool status
        if (input_parameters['single_tool_job'].lower() in ['single', 'true']):
            print('%s Running: %s' % (getTime(), TOOLS_running))
        print('%s Qstat: ... working ...' % getTime())
        # sleep this long when jobs are in the queue and bowtie is running
        if job_queue and bowtie_running:
            time.sleep(int(input_parameters['sleep_time_qs']))
        # sleep this long when jobs are in the queue and tools are running
        elif job_queue and TOOLS_running:
            time.sleep(int(input_parameters['sleep_time_qs']))
        # sleep this long when the queue is empty
        elif not job_queue:
            time.sleep(int(input_parameters['sleep_time_qs']))
        # get qstat job info
        (out, err) = subprocess.Popen('qstat',
                                      stdout=subprocess.PIPE).communicate()
        # set bowtie running to false if BOWTIE job is not in the queue and not running anymore
        if ('BOWTIE_index' not in job_queue) and not re.search('BOWTIE', out):
            bowtie_running = False
        # check if tool in TOOLS_running are still running
        for active in TOOLS_running:
            if not re.search(active[:8], out):
                TOOLS_running.remove(active)
        # check if any job is running
        if not out:
            q_running = False

    # there was an error with qsub ... exiting
    ''' TODO: make specific error detection (make new file inside bash script) and only dismiss dependent jobs '''
    if os.path.isfile('%s/qsub.error' % (input_parameters['qsub_dir_path'])):
        print(
            "%s Warning: There was an error with qsub! Check qsub_data/qsub.error for more information."
            % getTime())
        if input_parameters['ignore_qs_error'].lower() not in [
                'ignore', 'true'
        ]:
            user = userCall("Continue? y[es] / n[o]", True)

    # update job info
    if debug: debug_list.append(['-'])
    for job in job_list.keys():
        if job_type == 'index':
            if job_list[job] == 'running':
                job_list[job] = 'finished'
            if debug: debug_list.append([job, job_list[job]])
        elif job_type == 'align':
            for fq_name in job_list[job].keys():
                if job_list[job][fq_name] == 'running':
                    job_list[job][fq_name] = 'finished'
                if debug: debug_list.append([job, job_list[job]])
                if debug:
                    debug_list.append([job, fq_name, job_list[job][fq_name]])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    return job_list
示例#12
0
def checkProgress(job_list, job_parts, input_parameters, job_type, debug):

    # initialize debug
    debug_list = []
    format = '{:<15}'
    if debug: debug_list.append(['def: checkProgress():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['job', 'fastq', 'status', 'loop'])
    if debug: debug_list.append(['-'])

    # initialize parameters
    todo_jobs = {}
    job_parts_keys = []
    quant_align_combi_list = input_parameters['quant_align_combi_list']

    # make job list for fastq jobs
    for job in job_list.keys():
        # get job part keys
        job_parts_keys = job_parts.keys()
        # make modified job_list for quantification jobs
        user_naming = ''
        if job_type == 'quant':
            job_parts_keys = []
            for job_name in job_parts.keys():
                if job in input_parameters['quant_name_list'].keys():
                    # add user defined name tag
                    user_naming = '_%s' % input_parameters['quant_name_list'][
                        job]
                    job_name = '%s%s' % (job_name, user_naming)
                else:
                    user_naming = '_default'
                    # add 'default' name tag if no user defined name tag was given for this job
                    job_name = '%s%s' % (job_name, user_naming)
                if ('%s_%s' %
                    (job.partition('_')[0],
                     job_name.partition('_')[0])) in quant_align_combi_list:
                    job_parts_keys.append(job_name)
        # make dict for each job type
        job_requests = {}
        # check each job when 'ask' was requested
        if job_list[job] in ['ask', 'new']:
            # check existing tool and job part data and save it to todo_jobs dict
            job_tool_dir = '%s/%s' % (input_parameters['%s_dir_path' %
                                                       job_type], job)
            for jobName in os.listdir(job_tool_dir):
                jobBaseName = os.path.basename(os.path.splitext(jobName)[0])
                if debug:
                    debug_list.append([job, jobName, jobBaseName, 'basename'])
                # only save existing jobs which are in the job part file list
                if jobBaseName in job_parts_keys:
                    if job_type == 'quant':
                        jobBaseName = re.sub(r'\_[a-zA-Z0-9]+$', '',
                                             jobBaseName)
                    job_requests['%s' % jobBaseName] = ''
                    if debug:
                        debug_list.append(
                            [job, jobName, jobBaseName, 'bname in keys'])
            if debug: debug_list.append(['-'])
            # iterate through requested job part names
            for job_name in job_parts.keys():
                if (job_type in [
                        'index', 'align'
                ]) or (job_type == 'quant' and
                       (('%s_%s' %
                         (job.partition('_')[0], job_name.partition('_')[0]))
                        in quant_align_combi_list)):
                    # job string
                    job_string = '%s/%s/%s%s' % (input_parameters[
                        '%s_dir_path' % job_type], job, job_name, user_naming)
                    # check if job was already done
                    if job_name in job_requests.keys(
                    ) and job_list[job] == 'ask':
                        print(
                            "%s Warning: Job part \'%s%s\' for \'%s\' already exists!"
                            % (getTime(), job_name, user_naming, job))
                        overwrite = userCall(
                            "Type: y[es] to overwrite / n[o] to skip this job part",
                            False)
                        if overwrite:
                            removeJob(job_string, ['', '.sam', '.bam', '.ref'],
                                      debug)
                            job_requests[job_name] = 'overwrite'
                            print(
                                "%s Status: Create \'%s%s\' folder for \'%s\' ..."
                                % (getTime(), job_name, user_naming, job))
                            os.mkdir(job_string)
                        else:
                            job_requests[job_name] = 'skip'
                        if debug:
                            debug_list.append([
                                job, job_name, job_requests[job_name],
                                'job exists'
                            ])
                    elif job_name not in job_requests.keys():
                        job_requests[job_name] = 'request'
                        print(
                            "%s Status: Create \'%s%s\' folder for \'%s\' ..."
                            % (getTime(), job_name, user_naming, job))
                        os.mkdir(job_string)
                        if debug:
                            debug_list.append([
                                job, job_name, job_requests[job_name],
                                'new job'
                            ])
                    else:
                        job_requests[job_name] = 'skipped'
                        if debug:
                            debug_list.append([
                                job, job_name, job_requests[job_name], 'else'
                            ])
            if debug: debug_list.append(['-'])
        # save overwrite all and request jobs
        elif job_list[job] in ['overwrite', 'request']:
            for job_name in job_parts.keys():
                if (job_type in [
                        'index', 'align'
                ]) or (job_type == 'quant' and
                       (('%s_%s' %
                         (job.partition('_')[0], job_name.partition('_')[0]))
                        in quant_align_combi_list)):
                    # job string
                    job_string = '%s/%s/%s%s' % (input_parameters[
                        '%s_dir_path' % job_type], job, job_name, user_naming)
                    print("%s Status: Create \'%s%s\' folder for \'%s\' ..." %
                          (getTime(), job_name, user_naming, job))
                    os.mkdir(job_string)
                    job_requests[job_name] = job_list[job]
                    if debug:
                        debug_list.append([
                            job, job_name, job_requests[job_name],
                            'overwrite or request'
                        ])
        # save to fastq jobs
        if not job_list[job] == 'skipped':
            todo_jobs[job] = job_requests
        if debug: debug_list.append(['-'])

    # add todo_jobs debug
    if debug: debug_list.append(['-'])
    if debug:
        for job in todo_jobs.keys():
            for part in todo_jobs[job]:
                debug_list.append(
                    [job, part, todo_jobs[job][part], 'todo_jobs'])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return fastq_jobs
    return todo_jobs
示例#13
0
def checkJobRequests(progress_data, job_requests, input_parameters, debug):

    # initialize parameters
    debug_list = []
    format = '{:<15}'
    if debug: debug_list.append(['def: checkJobRequests():'])
    if debug: debug_list.append(['-'])
    if debug:
        debug_list.append(
            ['job', 'requests[job]', 'progress[job]', 'skip all', 'over all'])
    if debug: debug_list.append(['-'])

    # iterate through job requests and ask user to skip job if already done
    do_for_all = {
        'for_all_index': '',
        'for_all_align': '',
        'for_all_quant': '',
        'for_all_new': ''
    }
    # skip      -> skip all <TOOL> jobs
    # overwrite -> overwrite all <TOOL> jobs (in case of alignment/quantification, it will be asked for every alignment/quantification)
    # new       -> make only jobs which aren't done yet
    user_options = {
        's': 'skip',
        'skip': 'skip',
        's all': 'skip',
        'skip all': 'skip',
        'o': 'overwrite',
        'overwrite': 'overwrite',
        'o all': 'overwrite',
        'overwrite all': 'overwrite',
        'n': 'new',
        'new': 'new',
        'n all': 'new',
        'new all': 'new'
    }
    # make config setting list for user_set_over, user_set_skip and user_set_new
    conf_settings = {}
    options = ['over', 'skip', 'new']
    for opt in options:
        for job in filter(None,
                          input_parameters['user_set_%s' % opt].split(',')):
            conf_settings[job] = opt[0]
    # ask user
    for job in job_requests.keys():
        job_done = True if job in progress_data else False
        # call if job was already done
        if job_done and job == 'AFREE_align':
            job_requests[job] = 'skip'
        elif job_done:
            all_job = 'for_all_%s' % job.rpartition('_')[2]
            if job not in conf_settings.keys():
                user_input = do_for_all[all_job] if do_for_all[
                    all_job] else raw_input(
                        "%s Warning: What to do with existing \'%s\' job \'%s\'? a[bort] / s[kip] / o[verwrite] / n[ew] [all]: "
                        % (getTime(), job[-5:], job))
            else:
                user_input = conf_settings[job]
            input_valid = False
            # only accept  (case insensitive) as input
            while not input_valid:
                # handle do for all cases
                if user_input.lower() in [
                        's all', 'skip all', 'o all', 'overwrite all', 'n all',
                        'new all'
                ]:
                    do_for_all[all_job] = user_options[user_input.lower()]
                # do input command
                if user_input.lower() in user_options.keys():
                    job_requests[job] = user_options[user_input.lower()]
                    input_valid = True
                    print("%s Status: Setting for job " % getTime() +
                          '{:<15}'.format("\'%s\'" % (job)) + " -> \'%s\'!" %
                          (job_requests[job]))
                    if debug:
                        debug_list.append([
                            job, job_requests[job], job_done, user_input,
                            do_for_all[all_job]
                        ])
                # abort if requested
                elif user_input.lower() in ['a', 'abort']:
                    if debug: printDebug(debug_list, format)
                    print("%s Warning: Script interrupted by user ... " %
                          getTime())
                    sys.exit()
                # repeat until input is valid
                else:
                    user_input = raw_input(
                        "%s Warning: Wrong input! Use a[bort] / s[kip] / o[verwrite] / n[ew] [all]: "
                        % getTime())
        # job not yet done
        else:
            job_requests[job] = 'request'
            print("%s Status: Setting for job " % getTime() +
                  '{:<15}'.format("\'%s\'" %
                                  (job)) + " -> \'%s\'!" % (job_requests[job]))
            if debug:
                debug_list.append([job, job_requests[job], job_done, '', ''])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return updated job_requests
    return job_requests
示例#14
0
def getArguments(input_files, input_options, input_parameters, debug):

    # initialize parameters
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: getArguments():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['key', 'value'])
    if debug: debug_list.append(['-'])

    # check project argument and save new project name if project had a path
    input_parameters['proj_dir_prefix'], input_parameters[
        'p_name_suffix'] = checkArgsProject(input_options['project'], debug)
    if debug:
        debug_list.append(['p_name_suffix', input_parameters['p_name_suffix']])

    # check for command line input files and save if valid
    ''' TODO: do for input_options '''
    for type, file in input_files.iteritems():
        if file and os.path.isfile("%s" % (file)) and not type == 'config':
            input_parameters[type] = file
            if debug: debug_list.append([type, file])
        elif file and not os.path.isfile("%s" % (file)):
            print("Error: No \'%s\' file named \'%s\'!" % (type, file))
            sys.exit()

    # check command line prefix option and save if not already declared
    prefix = input_options['prefix']
    if prefix and not os.path.isdir(prefix):
        print("Error: \'%s\' is not a directory!" % (prefix))
        sys.exit()
    elif prefix and not input_parameters['proj_dir_prefix']:
        input_parameters['proj_dir_prefix'] = prefix

    # get config arguments
    print("%s Status: Save config parameters ..." % (getTime()))
    input_parameters = getArgsConfig(input_files['config'], input_parameters,
                                     debug)
    if debug:
        debug_list.append(
            ['proj_dir_prefix', input_parameters['proj_dir_prefix']])

    # configure test data if test was requested
    if input_parameters['test_call']:
        input_parameters[
            'proj_dir_prefix'] = '%s/test_files' % input_parameters['arq_path']
        input_parameters[
            'fasta_gen_file'] = '%s/test_files/EF204940.fa' % input_parameters[
                'arq_path']
        input_parameters[
            'gtf_index_file'] = '%s/test_files/ef204940.gtf' % input_parameters[
                'arq_path']
        input_parameters[
            'fastq1_links'] = '%s/test_files/arq_proj_test.fq1.info' % input_parameters[
                'arq_path']
        input_parameters[
            'fastq2_links'] = '%s/test_files/arq_proj_test.fq2.info' % input_parameters[
                'arq_path']

    # define default parameters
    ''' TODO: make definition '''

    # validate arguments
    checkArguments(input_parameters, debug)

    # define index base name without path
    if not 'index_base_name' in input_parameters.keys():
        input_parameters['index_base_name'] = os.path.basename(
            os.path.splitext(input_parameters['fasta_gen_file'])[0])
    if debug:
        debug_list.append(
            ['index_base_name', input_parameters['index_base_name']])

    # define tool directories
    ''' TODO: enable global tool command '''

    # save and validate fastq files
    print("%s Status: Save fastq files ..." % (getTime()))
    ''' TODO: make option for automatic fastq2 file read and enable 'paired end' option as input parameter '''
    fastq1_files, fastq_ref = getArgsFastq(input_parameters['fastq1_links'],
                                           input_parameters, debug)
    if input_parameters['fastq2_links']:
        fastq2_files, fastq_ref2 = getArgsFastq(
            input_parameters['fastq2_links'], input_parameters, debug)
        for fq_name in fastq1_files.keys():
            if not ((re.sub(r'\_1.fq.gz$', '', fastq1_files[fq_name])
                     == re.sub(r'\_2.fq.gz$', '', fastq2_files[fq_name])) or
                    (re.sub(r'\-1.fastq.gz$', '', fastq1_files[fq_name])
                     == re.sub(r'\-2.fastq.gz$', '', fastq2_files[fq_name]))):
                print(
                    "%s Warning: The links of \'%s\' are not equal after substituting the enumeration!"
                    % (getTime(), fq_name))
                ''' TODO: DO TEST '''
                user = userCall("Continue? y[es] / n[o]", True)

    # declare project folder
    input_parameters['proj_dir_path'] = '%s/%s' % (
        input_parameters['proj_dir_prefix'], input_parameters['p_name_suffix'])
    if debug:
        debug_list.append(['proj_dir_path', input_parameters['proj_dir_path']])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return arguments
    return input_parameters, fastq1_files, fastq2_files, fastq_ref