Пример #1
0
def main(argv, errorlogger = None, runcommand = None, runstatslogger = None):
    global parser

    options, args = parser.parse_args(argv)

    (code, message) =  _execute_bowtie2(options, logger = errorlogger)

    nameprefix = re.sub(r'.sam$', '', options.samout)

    _convert_to_bam(options.samout, nameprefix + ".bam")

    bedout = nameprefix + ".bed"
    bedout_sampledown = nameprefix + ".sample_down.bed"
    samout_sampledown = nameprefix + ".sample_down.sam"

    sample_down_transform_sam(options.samout, bedout, samout_sampledown, bedout_sampledown, 5000000, options.q30filter)


    if code != 0:
        a= '\nERROR\tCannot successfully execute\n'
        outputStr =  a

        eprintf(outputStr + "\n")

        if errorlogger:
           errorlogger.printf(outputStr +"\n")
        return code

    return 0
Пример #2
0
def halt_on_invalid_input(input_output_list, filetypes, sample_subset):

    for samplePath in input_output_list.keys():
        sampleName = path.basename(input_output_list[samplePath])
        ''' in the selected list'''
        if not sampleName in sample_subset:
            continue

        if filetypes[samplePath][0] == 'UNKNOWN':
            eprintf(
                "ERROR\tIncorrect input sample %s. Check for bad characters or format\n!",
                samplePath)
            return False

    return True
Пример #3
0
def parse_multiseq_parameters(filename):
    """ Return 2D dict of params (and values, if applicable) which should be on
    """
    # The qiime_config object is a default dict: if keys are not
    # present, {} is returned
    def return_empty_dict():
        return dict()

    try:
        filep = open(filename, 'r')
    except:
        eprintf("ERROR: cannot open the parameter file " + sQuote(filename) ) 
        exit_process("ERROR: cannot open the parameter file " + sQuote(filename), errorCode = 0 ) 

    result = {}
    
    lines = filep.readlines()
    for line in lines:
        line = line.strip()
        if line and not line.startswith('#'):
            fields = line.split()
            try:
                script_id, parameter_id = fields[0].split(':')
                value = ','.join([ x.strip() for x in fields[1:] ])
                value = re.sub(',,',',',value)
                globalcodes.exit_code = 1
            except :
                eprintf("ERROR\tInvalid line %s in file %s\n", line.strip(), filename)
                continue
                
            #if value.upper() == 'FALSE' or value.upper() == 'NONE':
            #    continue
            #elif value.upper() == 'TRUE':
            #    value = None
            #else:
            #    pass
            
            try:
                result[script_id][parameter_id] = value
            except KeyError:
                result[script_id] = {parameter_id:value}
    filep.close()
    #result['filename'] = filename
    return result
Пример #4
0
def execute_tasks(s, verbose=0, block=0):
    """Run list of commands, one after another """
    #logger.write("Executing commands.\n\n")
    contextBlocks = s.getContextBlocks()
    contextBlock = contextBlocks[block]

    for c in contextBlock:
        if c.status == 'stop':
            print "Stopping!"
            s.stepslogger.write('%s\t%s\n' % (c.name, "STOPPED"))
            return (0, '')

        if verbose >= 0:
            eprintf("\n\n\nEXECUTING STEP : %s [%s]\n", c.name, c.status)
            eprintf("EXECUTING COMMAND : %s\n", ', '.join(c.commands))

        if verbose >= 1:
            printList('INPUT LIST', c.getInputList())
            printList('OUTPUT LIST', c.getOutputList())

        eprintf("%s" % (c.message))

        if c.status in ['redo']:
            c.removeOutput(s)
            if c.isInputAvailable(errorlogger=s.errorlogger):
                s.stepslogger.write('%s\t%s\n' % (c.name, "RUNNING"))
                result = [0, 'Error while executing step ' + c.name]
                try:
                    result = execute(s, c)
                except:
                    s.errorlogger.printf("ERROR\t%s\n", result[1])
                    result[0] = 1

                if result[0] == 0:
                    eprintf('..... Redo Success!\n')
                    s.stepslogger.write('%s\t%s\n' % (c.name, "SUCCESS"))
                else:
                    eprintf('..... Failed!\n')
                    # eprintf('%s result \n',  result )
                    s.stepslogger.write('%s\t%s\n' % (c.name, "FAILED"))
            else:
                eprintf('..... Skipping [NO INPUT]!\n')
                if verbose:
                    missingList = c.getMissingList(errorlogger=s.errorlogger)
                    printList('MISSING INPUT LIST', missingList)

                s.stepslogger.write('%s\t%s\n' % (c.name, "MISSING_INPUT"))

        elif c.status in ['yes']:
            if not c.isOutputAvailable():
                if c.isInputAvailable(errorlogger=s.errorlogger):
                    s.stepslogger.write('%s\t%s\n' % (c.name, "RUNNING"))

                    result = [0, 'Error while executing  step ' + c.name]
                    try:
                        result = execute(s, c)
                    except:
                        s.errorlogger.printf("ERROR\t%s\n", result[1])
                        result[0] = 1

                    if result[0] == 0:
                        eprintf('..... Success!\n')
                        s.stepslogger.write('%s\t%s\n' % (c.name, "SUCCESS"))
                    else:
                        eprintf('..... Failed!\n')
                        s.stepslogger.write('%s\t%s\n' % (c.name, "FAILED"))
                else:
                    eprintf('..... Skipping [NO INPUT]!\n')
                    if verbose:
                        missingList = c.getMissingList(
                            errorlogger=s.errorlogger)
                        printList('MISSING INPUT LIST', missingList)

                    s.stepslogger.write('%s\t%s\n' % (c.name, "SKIPPED"))
            else:
                eprintf('..... Already Computed!\n')
                s.stepslogger.write('%s\t%s\n' % (c.name, "ALREADY_COMPUTED"))

        elif c.status in ['skip']:
            eprintf('..... Skipping!\n')
            s.stepslogger.write('%s\t%s\n' % (c.name, "SKIPPED"))
Пример #5
0
def printList(listName, missingList):
    eprintf("%s:\n", listName)
    for missingItem in missingList:
        eprintf("     %s\n", missingItem)
Пример #6
0
def main(argv, errorlogger=None, runcommand=None, runstatslogger=None):
    global parser

    options, args = parser.parse_args(argv)

    gene_on_symbol_bed = options.expression_mat_dir + PATHDELIM + options.sample_name + ".gene_on_symbol.bed"
    (code, message) = _execute_bedtools_intersect(
        options.alignment_dir + PATHDELIM + options.sample_name + ".bed",
        options.preprocess_dir + PATHDELIM + options.sample_name +
        ".gene_anno_symbol.bed",
        gene_on_symbol_bed,
        additional_params="-wo | sort -k 4,4 - ")

    gene_on_cds_bed = options.expression_mat_dir + PATHDELIM + options.sample_name + ".gene_on_cds.bed"
    (code, message) = _execute_bedtools_intersect(
        options.alignment_dir + PATHDELIM + options.sample_name + ".bed",
        options.preprocess_dir + PATHDELIM + options.sample_name +
        ".gene_anno_cds.bed",
        gene_on_cds_bed,
        additional_params="-c | sort -k 4,4 -  ")

    gene_on_3utr_bed = options.expression_mat_dir + PATHDELIM + options.sample_name + ".gene_on_3utr.bed"
    (code, message) = _execute_bedtools_intersect(
        options.alignment_dir + PATHDELIM + options.sample_name + ".bed",
        options.preprocess_dir + PATHDELIM + options.sample_name +
        ".gene_anno_3utr.bed",
        gene_on_3utr_bed,
        additional_params="-c  | sort -k 4,4 -")

    gene_on_5utr_bed = options.expression_mat_dir + PATHDELIM + options.sample_name + ".gene_on_5utr.bed"
    (code, message) = _execute_bedtools_intersect(
        options.alignment_dir + PATHDELIM + options.sample_name + ".bed",
        options.preprocess_dir + PATHDELIM + options.sample_name +
        ".gene_anno_5utr.bed",
        gene_on_5utr_bed,
        additional_params="-c | sort -k 4,4 - ")
    gene_on_TTSdis_bed = options.expression_mat_dir + PATHDELIM + options.sample_name + ".gene_on_TTSdis.bed"
    (code, message) = _execute_bedtools_intersect(
        options.alignment_dir + PATHDELIM + options.sample_name + ".bed",
        options.preprocess_dir + PATHDELIM + options.sample_name +
        ".gene_anno_TTSdis.bed",
        gene_on_TTSdis_bed,
        additional_params="-c | sort -k 4,4 -")

    # sample_down_transform_sam(options.samout, bedout, samout_sampledown, bedout_sampledown, 5000000, options.q30filter)
    # cmd1 = "bedtools intersect -a %s -b %s  -wo   | sort -k 4,4 - >  %s" % (
    # conf_dict['General']['bed'], annotation_dir + conf_dict['General']['outname'] + '_gene_anno_symbol.bed',
    # conf_dict['General']['outname'] + '_on_symbol.bed')
    # cmd2 = "bedtools intersect -a %s -b %s -c | sort -k 4,4 - > %s" % (
    # conf_dict['General']['bed'], annotation_dir + conf_dict['General']['outname'] + '_gene_anno_cds.bed',
    # conf_dict['General']['outname'] + '_on_cds.bed')
    # cmd3 = "bedtools intersect -a %s -b %s -c | sort -k 4,4 - > %s" % (
    # conf_dict['General']['bed'], annotation_dir + conf_dict['General']['outname'] + '_gene_anno_3utr.bed',
    # conf_dict['General']['outname'] + '_on_3utr.bed')
    # cmd4 = "bedtools intersect -a %s -b %s -c | sort -k 4,4 - > %s" % (
    # conf_dict['General']['bed'], annotation_dir + conf_dict['General']['outname'] + '_gene_anno_5utr.bed',
    # conf_dict['General']['outname'] + '_on_5utr.bed')
    # cmd5 = "bedtools intersect -a %s -b %s -c | sort -k 4,4 - > %s" % (
    # conf_dict['General']['bed'], annotation_dir + conf_dict['General']['outname'] + '_gene_anno_TTSdis.bed',
    # conf_dict['General']['outname'] + '_on_TTSdis.bed')

    combined_hits_bed = options.expression_mat_dir + pathDelim(
    ) + options.sample_name + ".combined.bed"
    try:
        combine_reads(options.extracted_barcodes, gene_on_cds_bed,
                      gene_on_3utr_bed, gene_on_5utr_bed, gene_on_symbol_bed,
                      gene_on_TTSdis_bed, combined_hits_bed, 2)
    except:
        print(traceback.print_exc(10))
        sys.exit(3)

    combined_hits_bed_sorted = options.expression_mat_dir + pathDelim(
    ) + options.sample_name + ".combined.sorted.bed"
    cmd = "sort -k 7,7 -k 5,5 %s > %s" % (combined_hits_bed,
                                          combined_hits_bed_sorted)
    result = getstatusoutput(cmd)

    qcmatrix = options.expression_mat_dir + pathDelim(
    ) + options.sample_name + ".qcmatrix.txt"
    expmatrix = options.expression_mat_dir + pathDelim(
    ) + options.sample_name + ".expmatrix.txt"
    qcmatrix_full = options.expression_mat_dir + pathDelim(
    ) + options.sample_name + ".qcmatrix_full.txt"

    try:
        generate_matrix(options.gene_annotation_file, combined_hits_bed_sorted,
                        True, qcmatrix_full, qcmatrix, expmatrix, 2, True)
    except:
        print(traceback.print_exc(10))
        sys.exit(3)

    if code != 0:
        a = '\nERROR\tCannot successfully execute\n'
        outputStr = a

        eprintf(outputStr + "\n")

        if errorlogger:
            errorlogger.printf(outputStr + "\n")
        return code

    return 0
Пример #7
0
def main(argv):
    global parser
    (opts, args) = parser.parse_args()
    if not valid_arguments(opts, args):
        print(usage)
        sys.exit(0)

    eprintf("COMMAND : %s\n", sys.argv[0] + ' ' + ' '.join(argv))
    # initialize the input directory or file

    barcodes = opts.barcodes
    reads = opts.reads
    refgeneanot = opts.geneannot
    refgenome = opts.refgenome
    refindex_dir = opts.refindex_dir
    refindex_name = opts.refindex_name
    sample_name = opts.sample

    output_dir = path.abspath(opts.output_dir)
    verbose = opts.verbose
    globalerrorlogger = WorkflowLogger(generate_log_fp(
        output_dir, basefile_name='global_errors_warnings'),
                                       open_mode='w')

    if opts.config_file:  #if provided with command line
        config_file = opts.config_file
    elif path.exists(
            multiseq_config):  #if the file exists in the current folder
        config_file = multiseq_config
    else:  # otherwise get it from config/ folder
        config_file = cmd_folder + PATHDELIM + multiseq_config

    # try to load the parameter file
    try:
        if opts.parameter_fp:  # if provided with command line
            parameter_fp = opts.parameter_fp
        elif path.exists(multiseq_param_file
                         ):  # if multiseq_params exists in current folder
            parameter_fp = multiseq_param_file
        else:  # otherwise get it from config/ folder
            parameter_fp = cmd_folder + PATHDELIM + multiseq_param
    except IOError:
        raise (
            IOError,
            "Can't open parameters file (%s). Does it exist? Do you have read access?"
            % opts.parameter_fp)

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    command_line_params = {}
    command_line_params['verbose'] = opts.verbose

    if not path.exists(parameter_fp):
        eprintf("%-10s: No parameters file %s found!\n" %
                ('WARNING', parameter_fp))
        eprintf("%-10s: Creating a parameters file %s found!\n" %
                ('INFO', parameter_fp))
        create_multiseq_parameters(parameter_fp, cmd_folder)
    params = parse_multiseq_parameters(parameter_fp)
    parameter = Parameters()
    paramobj = Params(params)

    if not path.isfile(barcodes):
        eprintf("%-10s: File %s does not exist!\n" % ('WARNING', barcodes))
        halt_process(0)
    if not path.isfile(reads):
        eprintf("%-10s: File %s does not exist!\n" % ('WARNING', reads))
        halt_process(0)
    if not path.isfile(refgeneanot):
        eprintf("%-10s: File %s does not exist!\n" % ('WARNING', refgeneanot))
        halt_process(0)
    if not path.isfile(refgenome):
        eprintf("%-10s: File %s does not exist!\n" % ('WARNING', refgenome))
        halt_process(0)
    if not os.path.isdir(refindex_dir):
        eprintf("%-10s: Folder %s does not exist!\n" %
                ('WARNING', refindex_dir))
        halt_process(0)
    if not path.exists(opts.output_dir):
        eprintf("%-10s: Folder %s does not exist!\n", opts.output_dir)
        halt_process(1)

    #check the pipeline configuration
    if not path.exists(config_file):
        eprintf("%-10s: No config file %s found!\n" % ('WARNING', config_file))
        eprintf("%-10s: Creating a config file %s!\n" % ('INFO', config_file))
        if not environment_variables_defined("MULTISEQ_PATH"):
            eprintf(
                "%-10s: shell variable %s not defined to generate config file %s!\n"
                % ('INFO', 'MULTISEQ_PATH', config_file))
            sys.exit(0)

        status = create_multiseq_configuration(config_file, cmd_folder)
        if status[0] == 1:
            eprintf("%s", status[1])
            halt_process(0)
    config_settings = read_pipeline_configuration(config_file,
                                                  globalerrorlogger)

    #if not staticDiagnose(config_settings, params, logger = globalerrorlogger):
    #    eprintf("ERROR\tFailed to pass the test for required scripts and inputs before run\n")
    #    globalerrorlogger.printf("ERROR\tFailed to pass the test for required scripts and inputs before run\n")
    #    halt_process(0)

    samplesData = {}
    try:
        # load the sample information
        print("RUNNING Multiseq version 1.0")

        s = SampleData()
        s.setParameter('PROTOCOL_NAME', "DROP-SEQ")
        s.setParameter('SEQ_TYPE', "cDNA")
        s.setParameter('ref_genome_sequences', refgenome)
        s.setParameter('ref_gene_annotations', refgeneanot)
        s.setParameter('refindex_dir', refindex_dir)
        s.setParameter('refindex_name', refindex_name)
        s.setParameter('sample_name', sample_name)
        s.setInputOutput(inputFiles=[barcodes, reads], output_dir=output_dir)
        s.prepareToRun()

        run_multiseq(s,
                     globallogger=globalerrorlogger,
                     command_line_params=command_line_params,
                     params=params,
                     config_settings=config_settings,
                     status_update_callback=status_update_callback)
    except:
        exit_process(str(traceback.format_exc(10)), logger=globalerrorlogger)

    eprintf("            ***********                \n")
    eprintf("INFO : FINISHED PROCESSING THE SAMPLES \n")
    eprintf("             THE END                   \n")
    eprintf("            ***********                \n")
    #eprintf(" EXIT CODE %s\n", globalcodes.exit_code)
    halt_process(0)
Пример #8
0
def sigint_handler(signum, frame):
    eprintf("Received TERMINATION signal\n")
    exit_process()