Exemplo n.º 1
0
def run_gzip_process(cluster_name, dataset_type, log, function=None):
    '''
    Run a gzip process.
    '''

    # initialize the control variable
    OK = True

    # get the gzip code and name
    gzip_code = xlib.get_gzip_code()
    gzip_name = xlib.get_gzip_name()

    # get the gzip option dictionary
    gzip_option_dict = xlib.get_option_dict(get_gzip_config_file(dataset_type))

    # get the experiment identification
    experiment_id = gzip_option_dict['identification']['experiment_id']

    # get the gzip process script path in the local computer
    gzip_process_script = get_gzip_process_script(dataset_type)

    # get the gzip process starter path in the local computer
    gzip_process_starter = get_gzip_process_starter(dataset_type)

    # warn that the log window does not have to be closed
    if not isinstance(log, xlib.DevStdOut):
        log.write('This process might take several minutes. Do not close this window, please wait!\n')

    # check the gzip config file
    log.write(f'{xlib.get_separator()}\n')
    log.write('Checking the {0} config file ...\n'.format(gzip_name))
    (OK, error_list) = check_gzip_config_file(dataset_type, strict=True)
    if OK:
        log.write('The file is OK.\n')
    else:
        log.write('*** ERROR: The config file is not valid.\n')
        log.write('Please correct this file or recreate the config files.\n')

    # create the SSH client connection
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Connecting the SSH client ...\n')
        (OK, error_list, ssh_client) = xssh.create_ssh_client_connection(cluster_name)
        if OK:
            log.write('The SSH client is connected.\n')
        else:
            for error in error_list:
                log.write(f'{error}\n')

    # create the SSH transport connection
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Connecting the SSH transport ...\n')
        (OK, error_list, ssh_transport) = xssh.create_ssh_transport_connection(cluster_name)
        if OK:
            log.write('The SSH transport is connected.\n')
        else:
            for error in error_list:
                log.write(f'{error}\n')

    # create the SFTP client 
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Connecting the SFTP client ...\n')
        sftp_client = xssh.create_sftp_client(ssh_transport)
        log.write('The SFTP client is connected.\n')

    # warn that the requirements are being verified 
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Checking process requirements ...\n')

    # check the master is running
    if OK:
        (master_state_code, master_state_name) = xec2.get_node_state(cluster_name)
        if master_state_code != 16:
            log.write(f'*** ERROR: The cluster {cluster_name} is not running. Its state is {master_state_code} ({master_state_name}).\n')
            OK = False

    # warn that the requirements are OK 
    if OK:
        log.write('Process requirements are OK.\n')

    # determine the run directory in the cluster
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Determining the run directory in the cluster ...\n')
        if dataset_type == 'reference':
            current_run_dir = xlib.get_cluster_current_run_dir('reference', gzip_code)
        elif dataset_type == 'database':
            current_run_dir = xlib.get_cluster_current_run_dir('database', gzip_code)
        else:
            current_run_dir = xlib.get_cluster_current_run_dir(experiment_id, gzip_code)
        command = f'mkdir --parents {current_run_dir}'
        (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command)
        if OK:
            log.write(f'The directory path is {current_run_dir}.\n')
        else:
            log.write(f'*** ERROR: Wrong command ---> {command}\n')

    # build the gzip process script
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Building the process script {0} ...\n'.format(gzip_process_script))
        (OK, error_list) = build_gzip_process_script(cluster_name, dataset_type, current_run_dir)
        if OK:
            log.write('The file is built.\n')
        else:
            log.write('*** ERROR: The file could not be built.\n')

    # upload the gzip process script to the cluster
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Uploading the process script {0} to the directory {1} ...\n'.format(gzip_process_script, current_run_dir))
        cluster_path = '{0}/{1}'.format(current_run_dir, os.path.basename(gzip_process_script))
        (OK, error_list) = xssh.put_file(sftp_client, gzip_process_script, cluster_path)
        if OK:
            log.write('The file is uploaded.\n')
        else:
            for error in error_list:
                log.write(f'{error}\n')

    # set run permision to the gzip process script in the cluster
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Setting on the run permision of {0}/{1} ...\n'.format(current_run_dir, os.path.basename(gzip_process_script)))
        command = 'chmod u+x {0}/{1}'.format(current_run_dir, os.path.basename(gzip_process_script))
        (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command)
        if OK:
            log.write('The run permision is set.\n')
        else:
            log.write(f'*** ERROR: Wrong command ---> {command}\n')

    # build the gzip process starter
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Building the process starter {0} ...\n'.format(gzip_process_starter))
        (OK, error_list) = build_gzip_process_starter(dataset_type, current_run_dir)
        if OK:
            log.write('The file is built.\n')
        else:
            log.write('***ERROR: The file could not be built.\n')

    # upload the gzip process starter to the cluster
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Uploading the process starter {0} to the directory {1} ...\n'.format(gzip_process_starter, current_run_dir))
        cluster_path = '{0}/{1}'.format(current_run_dir, os.path.basename(gzip_process_starter))
        (OK, error_list) = xssh.put_file(sftp_client, gzip_process_starter, cluster_path)
        if OK:
            log.write('The file is uploaded.\n')
        else:
            for error in error_list:
                log.write(f'{error}\n')

    # set run permision to the gzip process starter in the cluster
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Setting on the run permision of {0}/{1} ...\n'.format(current_run_dir, os.path.basename(gzip_process_starter)))
        command = 'chmod u+x {0}/{1}'.format(current_run_dir, os.path.basename(gzip_process_starter))
        (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command)
        if OK:
            log.write('The run permision is set.\n')
        else:
            log.write(f'*** ERROR: Wrong command ---> {command}\n')

    # submit the gzip process
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Submitting the process script {0}/{1} ...\n'.format(current_run_dir, os.path.basename(gzip_process_starter)))
        OK = xssh.submit_script(cluster_name, ssh_client, current_run_dir, os.path.basename(gzip_process_starter), log)

    # close the SSH transport connection
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Closing the SSH transport connection ...\n')
        xssh.close_ssh_transport_connection(ssh_transport)
        log.write('The connection is closed.\n')

    # close the SSH client connection
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        log.write('Closing the SSH client connection ...\n')
        xssh.close_ssh_client_connection(ssh_client)
        log.write('The connection is closed.\n')

    # warn that the log window can be closed
    if not isinstance(log, xlib.DevStdOut):
        log.write(f'{xlib.get_separator()}\n')
        log.write('You can close this window now.\n')

    # execute final function
    if function is not None:
        function()

    # return the control variable
    return OK
Exemplo n.º 2
0
def check_gzip_config_file(dataset_type, strict):
    '''
    Check the gzip config file of a run.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # intitialize variable used when value is not found
    not_found = '***NOTFOUND***'.upper()

    # get the option dictionary
    try:
        gzip_option_dict = xlib.get_option_dict(get_gzip_config_file(dataset_type))
    except Exception as e:
        error_list.append(f'*** EXCEPTION: "{e}".')
        error_list.append('*** ERROR: The option dictionary could not be built from the config file')
        OK = False
    else:

        # get the sections list
        sections_list = []
        for section in gzip_option_dict.keys():
            sections_list.append(section)
        sections_list.sort()

        # check section "identification"
        if 'identification' not in sections_list:
            error_list.append('*** ERROR: the section "identification" is not found.')
            OK = False
        else:

            # check section "identification" - key "experiment_id"
            experiment_id = gzip_option_dict.get('identification', {}).get('experiment_id', not_found)
            if experiment_id == not_found:
                error_list.append('*** ERROR: the key "experiment_id" is not found in the section "identification".')
                OK = False
            elif dataset_type == 'reference' and experiment_id.upper() != 'NONE':
                error_list.append('*** ERROR: the key "experiment_id" has to be always NONE')
                OK = False

            # check section "identification" - key "dataset_type"
            dataset_type_2 = gzip_option_dict.get('identification', {}).get('dataset_type', not_found)
            if dataset_type_2 == not_found:
                error_list.append('*** ERROR: the key "dataset_type" is not found in the section "identification".')
                OK = False
            else:
                if dataset_type in ['reference', 'read']:
                    if dataset_type_2.lower() != dataset_type:
                        error_list.append('*** ERROR: the key "dataset_type" has to be {0}.'.format(dataset_type))
                        OK = False
                elif dataset_type == 'result':
                    if dataset_type_2.lower() not in ['result', 'whole-result']:
                        error_list.append('*** ERROR: the key "dataset_type" has to be result or whole-result.')
                        OK = False

            # check section "identification" - key "dataset_id"
            dataset_id = gzip_option_dict.get('identification', {}).get('dataset_id', not_found)
            if dataset_id == not_found:
                error_list.append('*** ERROR: the key "dataset_id" is not found in the section "identification".')
                OK = False

        # check section "gzip parameters"
        if 'gzip parameters' not in sections_list:
            error_list.append('*** ERROR: the section "gzip parameters" is not found.')
            OK = False
        else:

            # check section "gzip parameters" - key "action"
            action = gzip_option_dict.get('gzip parameters', {}).get('action', not_found)
            if action == not_found:
                error_list.append('*** ERROR: the key "action" is not found in the section "gzip parameters".')
                OK = False
            else:
                if action.lower() not in ['compress', 'decompress']:
                    error_list.append('*** ERROR: the key "action" has to be compress or decompress.')
                    OK = False

        # check section "file-1"
        if dataset_type_2.lower() in ['reference', 'database', 'read', 'result']:
            if 'file-1' not in sections_list:
                error_list.append('*** ERROR: the section "file-1" is not found.')
                OK = False

        # check all sections "file-n"
        if dataset_type_2.lower() in ['reference', 'database', 'read', 'result']:
            for section in sections_list:

                if section not in ['identification', 'gzip parameters']:

                    # check than the section identification is like file-n 
                    if not re.match('^file-[0-9]+$', section):
                        error_list.append(f'*** ERROR: the section "{section}" has a wrong identification.')
                        OK = False

                    else:

                        # check section "file-n" - key "dataset_subdirectory"
                        dataset_subdirectory = gzip_option_dict.get(section, {}).get('dataset_subdirectory', not_found)
                        if dataset_subdirectory == not_found:
                            error_list.append('*** ERROR: the key "dataset_subdirectory" is not found in the section "{0}".'.format(section))
                            OK = False
                        elif not xlib.is_valid_path(dataset_subdirectory, 'linux'):
                            error_list.append('*** ERROR: the file {0} in the key "dataset_subdirectory" of the section "{1}" has a non valid file name.'.format(dataset_subdirectory, section))
                            OK = False

                        # check section "file-n" - key "file_name"
                        file_name = gzip_option_dict.get(section, {}).get('file_name', not_found)
                        if file_name == not_found:
                            error_list.append('*** ERROR: the key "file_name" is not found in the section "{0}".'.format(section))
                            OK = False
                        elif not xlib.is_valid_path(file_name, 'linux'):
                            error_list.append('*** ERROR: the file {0} in the key "file_name" of the section "{1}" has a non valid file name.'.format(file_name, section))
                            OK = False

    # warn that the results config file is not valid if there are any errors
    if not OK:
        error_list.append('\nThe {0} config file is not valid. Please, correct this file or recreate it.'.format(xlib.get_gzip_name()))

    # return the control variable and the error list
    return (OK, error_list)
Exemplo n.º 3
0
def get_result_dataset_dict(cluster_name, experiment_id, status, passed_connection, ssh_client):
    '''
    Get a dictionary with the result datasets of an experiment in the cluster.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the result directory in the cluster
    cluster_result_dir = xlib.get_cluster_result_dir()

    # initialize the dictionary of the result datasets
    result_dataset_dict = {}

    # create the SSH client connection
    if not passed_connection:
        (OK, error_list, ssh_client) = xssh.create_ssh_client_connection(cluster_name, 'master')

    # verify the result directory is created
    if OK:
        command = '[ -d {0} ] && echo RC=0 || echo RC=1'.format(cluster_result_dir)
        (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command)
        if stdout[len(stdout) - 1] != 'RC=0':
            error_list.append('*** ERROR: There is not any volume mounted in the result directory.\n')
            error_list.append('You must link a volume in the mounting point {0} for the template {1}.\n'.format(cluster_result_dir, cluster_name))
            OK = False

    # get the dictionary of the result datasets
    if OK:
        if status == 'uncompressed':
            command = 'cd  {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'.format(cluster_result_dir, experiment_id)
        elif status == 'compressed':
            command = 'cd {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^d > /dev/null && echo $list; done;'.format(cluster_result_dir, experiment_id)
        (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command)
        if OK:
            if status == 'uncompressed':
                input_pattern = '{0}-(.+)-(.+)'
                output_pattern = '{0} ({1} {2})'
            elif status == 'compressed':
                input_pattern = '{0}-(.+)-(.+).tar.gz'
                output_pattern = '{0} ({1} {2}) [compressed]'
            for line in stdout:
                line = line.rstrip('\n')
                if line != 'lost+found':
                    result_dataset_id = line
                    if result_dataset_id.startswith(xlib.get_cd_hit_est_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_cd_hit_est_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_cd_hit_est_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_fastqc_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_fastqc_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_fastqc_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_gzip_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_gzip_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_gzip_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_insilico_read_normalization_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_insilico_read_normalization_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_insilico_read_normalization_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_quast_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_quast_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_quast_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_ref_eval_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_ref_eval_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_ref_eval_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_rnaquast_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_rnaquast_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_rnaquast_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_rsem_eval_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_rsem_eval_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_rsem_eval_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_soapdenovotrans_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_soapdenovotrans_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_soapdenovotrans_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_star_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_star_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_star_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_transabyss_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_transabyss_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_transabyss_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_transcript_filter_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_transcript_filter_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_transcript_filter_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_transcriptome_blastx_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_transcriptome_blastx_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_transcriptome_blastx_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_transrate_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_transrate_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_transrate_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_trimmomatic_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_trimmomatic_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_trimmomatic_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_trinity_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_trinity_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_trinity_name(), date, time)
                    else:
                        result_dataset_name = result_dataset_id
                    result_dataset_dict[result_dataset_id] = {'result_dataset_id': result_dataset_id, 'result_dataset_name': result_dataset_name}

    # close the SSH client connection
    if OK and not passed_connection:
        xssh.close_ssh_client_connection(ssh_client)

    # return the control variable, error list and dictionary of the result datasets
    return (OK, error_list, result_dataset_dict)
Exemplo n.º 4
0
def form_list_cluster_experiment_processes():
    '''
    List the processes of an experiment in the cluster.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment('Logs - List experiment processes in the cluster')

    # get the cluster name
    print(xlib.get_separator())
    if xec2.get_running_cluster_list(volume_creator_included=False) != []:
        cluster_name = cinputs.input_cluster_name(volume_creator_included=False, help=True)
    else:
        print('WARNING: There is not any running cluster.')
        OK = False

    # create the SSH client connection
    if OK:
        (OK, error_list, ssh_client) = xssh.create_ssh_client_connection(cluster_name, 'master')
        for error in error_list:
            log.write('{0}\n'.format(error))

    # get experiment identification
    if OK:
        experiment_id = cinputs.input_experiment_id(ssh_client, help=True)
        if experiment_id == '':
            print('WARNING: The cluster {0} has not experiment data.'.format(cluster_name))
            OK = False

    # get the result dataset list of the experiment
    if OK:
        command = 'cd  {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'.format(xlib.get_cluster_result_dir(), experiment_id)
        (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command)
        if OK:
            result_dataset_id_list = []
            for line in stdout:
                line = line.rstrip('\n')
                if line != 'lost+found':
                    result_dataset_id_list.append(line)

    # print the result dataset identification list of the experiment
    if OK:
        print(xlib.get_separator())
        if result_dataset_id_list == []:
            print('*** WARNING: There is not any result dataset of the experiment {0}.'.format(experiment_id))
        else:
            result_dataset_id_list.sort()
            # set data width
            result_dataset_width = 25
            bioinfo_app_width = 25
            # set line template
            line_template = '{0:' + str(result_dataset_width) + '}   {1:' + str(bioinfo_app_width) + '}'
            # print header
            print(line_template.format('Result dataset', 'Bioinfo app / Utility'))
            print(line_template.format('=' * result_dataset_width, '=' * bioinfo_app_width))
            # print detail lines
            for result_dataset_id in result_dataset_id_list:
                if result_dataset_id.startswith(xlib.get_bedtools_code()+'-'):
                    bioinfo_app_name = xlib.get_bedtools_name()
                elif result_dataset_id.startswith(xlib.get_blastplus_code()+'-'):
                    bioinfo_app_name = xlib.get_blastplus_name()
                elif result_dataset_id.startswith(xlib.get_bowtie2_code()+'-'):
                    bioinfo_app_name = xlib.get_bowtie2_name()
                elif result_dataset_id.startswith(xlib.get_busco_code()+'-'):
                    bioinfo_app_name = xlib.get_busco_name()
                elif result_dataset_id.startswith(xlib.get_cd_hit_code()+'-'):
                    bioinfo_app_name = xlib.get_cd_hit_est_name()
                elif result_dataset_id.startswith(xlib.get_cd_hit_code()+'-'):
                    bioinfo_app_name = xlib.get_cd_hit_est_name()
                elif result_dataset_id.startswith(xlib.get_detonate_code()+'-'):
                    bioinfo_app_name = xlib.get_detonate_name()
                elif result_dataset_id.startswith(xlib.get_emboss_code()+'-'):
                    bioinfo_app_name = xlib.get_emboss_name()
                elif result_dataset_id.startswith(xlib.get_fastqc_code()+'-'):
                    bioinfo_app_name = xlib.get_fastqc_name()
                elif result_dataset_id.startswith(xlib.get_gmap_code()+'-'):
                    bioinfo_app_name = xlib.get_gmap_name()
                elif result_dataset_id.startswith(xlib.get_gmap_gsnap_code()+'-'):
                    bioinfo_app_name = xlib.get_gmap_gsnap_name()
                elif result_dataset_id.startswith(xlib.get_gzip_code()+'-'):
                    bioinfo_app_name = xlib.get_gzip_name()
                elif result_dataset_id.startswith(xlib.get_insilico_read_normalization_code()+'-'):
                    bioinfo_app_name = xlib.get_insilico_read_normalization_name()
                elif result_dataset_id.startswith(xlib.get_miniconda3_code()+'-'):
                    bioinfo_app_name = xlib.get_miniconda3_name()
                elif result_dataset_id.startswith(xlib.get_ngshelper_code()+'-'):
                    bioinfo_app_name = xlib.get_ngshelper_name()
                elif result_dataset_id.startswith(xlib.get_quast_code()+'-'):
                    bioinfo_app_name = xlib.get_quast_name()
                elif result_dataset_id.startswith(xlib.get_r_code()+'-'):
                    bioinfo_app_name = xlib.get_r_name()
                elif result_dataset_id.startswith(xlib.get_ref_eval_code()+'-'):
                    bioinfo_app_name = xlib.get_ref_eval_name()
                elif result_dataset_id.startswith(xlib.get_rnaquast_code()+'-'):
                    bioinfo_app_name = xlib.get_rnaquast_name()
                elif result_dataset_id.startswith(xlib.get_rsem_code()+'-'):
                    bioinfo_app_name = xlib.get_rsem_name()
                elif result_dataset_id.startswith(xlib.get_rsem_eval_code()+'-'):
                    bioinfo_app_name = xlib.get_rsem_eval_name()
                elif result_dataset_id.startswith(xlib.get_samtools_code()+'-'):
                    bioinfo_app_name = xlib.get_samtools_name()
                elif result_dataset_id.startswith(xlib.get_soapdenovotrans_code()+'-'):
                    bioinfo_app_name = xlib.get_soapdenovotrans_name()
                elif result_dataset_id.startswith(xlib.get_star_code()+'-'):
                    bioinfo_app_name = xlib.get_star_name()
                elif result_dataset_id.startswith(xlib.get_transabyss_code()+'-'):
                    bioinfo_app_name = xlib.get_transabyss_name()
                elif result_dataset_id.startswith(xlib.get_transcript_filter_code()+'-'):
                    bioinfo_app_name = xlib.get_transcript_filter_name()
                elif result_dataset_id.startswith(xlib.get_transcriptome_blastx_code()+'-'):
                    bioinfo_app_name = xlib.get_transcriptome_blastx_name()
                elif result_dataset_id.startswith(xlib.get_transrate_code()+'-'):
                    bioinfo_app_name = xlib.get_transrate_name()
                elif result_dataset_id.startswith(xlib.get_trimmomatic_code()+'-'):
                    bioinfo_app_name = xlib.get_trimmomatic_name()
                elif result_dataset_id.startswith(xlib.get_trinity_code()+'-'):
                    bioinfo_app_name = xlib.get_trinity_name()
                else:
                    bioinfo_app_name = 'xxx'
                print(line_template.format(result_dataset_id, bioinfo_app_name))

    # close the SSH client connection
    if OK:
        xssh.close_ssh_client_connection(ssh_client)

    # show continuation message 
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Exemplo n.º 5
0
    def execute(self, event=None):
        '''
        Execute the list the result logs in the cluster.
        '''

        # validate inputs
        OK = self.validate_inputs()
        if not OK:
            message = 'Some input values are not OK.'
            tkinter.messagebox.showerror('{0} - {1}'.format(xlib.get_project_name(), self.head), message)

        # get the run dictionary of the experiment
        if OK:
            # -- command = 'ls {0}/{1}'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get())
            command = 'cd  {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get())
            (OK, stdout, stderr) = xssh.execute_cluster_command(self.ssh_client, command)
            if OK:
                result_dataset_dict = {}
                for line in stdout:
                    line = line.rstrip('\n')
                    if line != 'lost+found':
                        result_dataset_id = line
                        try:
                            pattern = r'^(.+)\-(.+)\-(.+)$'
                            mo = re.search(pattern, result_dataset_id)
                            bioinfo_app_code = mo.group(1).strip()
                            yymmdd = mo.group(2)
                            hhmmss = mo.group(3)
                            date = '20{0}-{1}-{2}'.format(yymmdd[:2], yymmdd[2:4], yymmdd[4:])
                            time = '{0}:{1}:{2}'.format(hhmmss[:2], hhmmss[2:4], hhmmss[4:])
                        except:
                            bioinfo_app_code = 'xxx'
                            date = '0000-00-00'
                            time = '00:00:00'
                        if result_dataset_id.startswith(xlib.get_bedtools_code()+'-'):
                            bioinfo_app_name = xlib.get_bedtools_name()
                        elif result_dataset_id.startswith(xlib.get_blastplus_code()+'-'):
                            bioinfo_app_name = xlib.get_blastplus_name()
                        elif result_dataset_id.startswith(xlib.get_bowtie2_code()+'-'):
                            bioinfo_app_name = xlib.get_bowtie2_name()
                        elif result_dataset_id.startswith(xlib.get_busco_code()+'-'):
                            bioinfo_app_name = xlib.get_busco_name()
                        elif result_dataset_id.startswith(xlib.get_cd_hit_code()+'-'):
                            bioinfo_app_name = xlib.get_cd_hit_name()
                        elif result_dataset_id.startswith(xlib.get_cd_hit_est_code()+'-'):
                            bioinfo_app_name = xlib.get_cd_hit_est_name()
                        elif result_dataset_id.startswith(xlib.get_detonate_code()+'-'):
                            bioinfo_app_name = xlib.get_detonate_name()
                        elif result_dataset_id.startswith(xlib.get_emboss_code()+'-'):
                            bioinfo_app_name = xlib.get_emboss_name()
                        elif result_dataset_id.startswith(xlib.get_fastqc_code()+'-'):
                            bioinfo_app_name = xlib.get_fastqc_name()
                        elif result_dataset_id.startswith(xlib.get_gmap_code()+'-'):
                            bioinfo_app_name = xlib.get_gmap_name()
                        elif result_dataset_id.startswith(xlib.get_gmap_gsnap_code()+'-'):
                            bioinfo_app_name = xlib.get_gmap_gsnap_name()
                        elif result_dataset_id.startswith(xlib.get_gzip_code()+'-'):
                            bioinfo_app_name = xlib.get_gzip_name()
                        elif result_dataset_id.startswith(xlib.get_insilico_read_normalization_code()+'-'):
                            bioinfo_app_name = xlib.get_insilico_read_normalization_name()
                        elif result_dataset_id.startswith(xlib.get_miniconda3_code()+'-'):
                            bioinfo_app_name = xlib.get_miniconda3_name()
                        elif result_dataset_id.startswith(xlib.get_ngshelper_code()+'-'):
                            bioinfo_app_name = xlib.get_ngshelper_name()
                        elif result_dataset_id.startswith(xlib.get_quast_code()+'-'):
                            bioinfo_app_name = xlib.get_quast_name()
                        elif result_dataset_id.startswith(xlib.get_r_code()+'-'):
                            bioinfo_app_name = xlib.get_r_name()
                        elif result_dataset_id.startswith(xlib.get_ref_eval_code()+'-'):
                            bioinfo_app_name = xlib.get_ref_eval_name()
                        elif result_dataset_id.startswith(xlib.get_rnaquast_code()+'-'):
                            bioinfo_app_name = xlib.get_rnaquast_name()
                        elif result_dataset_id.startswith(xlib.get_rsem_code()+'-'):
                            bioinfo_app_name = xlib.get_rsem_name()
                        elif result_dataset_id.startswith(xlib.get_rsem_eval_code()+'-'):
                            bioinfo_app_name = xlib.get_rsem_eval_name()
                        elif result_dataset_id.startswith(xlib.get_samtools_code()+'-'):
                            bioinfo_app_name = xlib.get_samtools_name()
                        elif result_dataset_id.startswith(xlib.get_soapdenovotrans_code()+'-'):
                            bioinfo_app_name = xlib.get_soapdenovotrans_name()
                        elif result_dataset_id.startswith(xlib.get_star_code()+'-'):
                            bioinfo_app_name = xlib.get_star_name()
                        elif result_dataset_id.startswith(xlib.get_transabyss_code()+'-'):
                            bioinfo_app_name = xlib.get_transabyss_name()
                        elif result_dataset_id.startswith(xlib.get_transcript_filter_code()+'-'):
                            bioinfo_app_name = xlib.get_transcript_filter_name()
                        elif result_dataset_id.startswith(xlib.get_transcriptome_blastx_code()+'-'):
                            bioinfo_app_name = xlib.get_transcriptome_blastx_name()
                        elif result_dataset_id.startswith(xlib.get_transrate_code()+'-'):
                            bioinfo_app_name = xlib.get_transrate_name()
                        elif result_dataset_id.startswith(xlib.get_trimmomatic_code()+'-'):
                            bioinfo_app_name = xlib.get_trimmomatic_name()
                        elif result_dataset_id.startswith(xlib.get_trinity_code()+'-'):
                            bioinfo_app_name = xlib.get_trinity_name()
                        else:
                            bioinfo_app_name = 'xxx'
                        result_dataset_dict[result_dataset_id] = {'experiment_id': self.wrapper_experiment_id.get(), 'result_dataset_id': result_dataset_id, 'bioinfo_app': bioinfo_app_name, 'date': date, 'time': time}

        # verify if there are any nodes running
        if OK:
            if result_dataset_dict == {}:
                message = 'There is not any run.'
                tkinter.messagebox.showwarning('{0} - {1}'.format(xlib.get_project_name(), self.head), message)

        # build the data list
        if OK:
            data_list = ['experiment_id', 'result_dataset_id', 'bioinfo_app', 'date', 'time']

        # build the data dictionary
        if OK:
            data_dict = {}
            data_dict['experiment_id']= {'text': 'Experiment id. / Process', 'width': 200, 'aligment': 'left'}
            data_dict['result_dataset_id'] = {'text': 'Result dataset', 'width': 200, 'aligment': 'left'}
            data_dict['bioinfo_app'] = {'text': 'Bioinfo app / Utility', 'width': 200, 'aligment': 'left'}
            data_dict['date'] = {'text': 'Date', 'width': 80, 'aligment': 'right'}
            data_dict['time'] = {'text': 'Time', 'width': 80, 'aligment': 'right'}

        # create the dialog Table to show the nodes running
        if OK:
            dialog_table = gdialogs.DialogTable(self, 'Experiment runs in {0}/{1}'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get()), 400, 900, data_list, data_dict, result_dataset_dict, 'view_result_logs', [self.wrapper_cluster_name.get()])
            self.wait_window(dialog_table)

        # close the form
        if OK:
            self.close()
Exemplo n.º 6
0
def form_list_cluster_experiment_processes():
    '''
    List the processes of an experiment in the cluster.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(
        'Logs - List experiment processes in the cluster')

    # get the cluster name
    print(xlib.get_separator())
    if xec2.get_running_cluster_list(only_environment_cluster=True,
                                     volume_creator_included=False) != []:
        cluster_name = cinputs.input_cluster_name(
            volume_creator_included=False, help=True)
    else:
        print('WARNING: There is not any running cluster.')
        OK = False

    # create the SSH client connection
    if OK:
        (OK, error_list,
         ssh_client) = xssh.create_ssh_client_connection(cluster_name)
        for error in error_list:
            print(error)

    # get experiment identification
    if OK:
        experiment_id = cinputs.input_experiment_id(ssh_client, help=True)
        if experiment_id == '':
            print(
                f'WARNING: The cluster {cluster_name} does not have experiment data.'
            )
            OK = False

    # get the result dataset list of the experiment
    if OK:
        command = f'cd  {xlib.get_cluster_result_dir()}/{experiment_id}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'
        (OK, stdout, _) = xssh.execute_cluster_command(ssh_client, command)
        if OK:
            result_dataset_id_list = []
            for line in stdout:
                line = line.rstrip('\n')
                if line != 'lost+found':
                    result_dataset_id_list.append(line)

    # print the result dataset identification list of the experiment
    if OK:
        print(xlib.get_separator())
        if result_dataset_id_list == []:
            print(
                f'*** WARNING: There is not any result dataset of the experiment {experiment_id}.'
            )
        else:
            result_dataset_id_list.sort()
            # set data width
            result_dataset_width = 30
            bioinfo_app_width = 25
            # set line
            line = '{0:' + str(result_dataset_width) + '}   {1:' + str(
                bioinfo_app_width) + '}'
            # print header
            print(line.format('Result dataset', 'Bioinfo app / Utility'))
            print(
                line.format('=' * result_dataset_width,
                            '=' * bioinfo_app_width))
            # print detail lines
            for result_dataset_id in result_dataset_id_list:

                if result_dataset_id.startswith(xlib.get_bedtools_code() +
                                                '-'):
                    bioinfo_app_name = xlib.get_bedtools_name()

                elif result_dataset_id.startswith(xlib.get_blastplus_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_blastplus_name()

                elif result_dataset_id.startswith(xlib.get_bcftools_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_bcftools_name()

                elif result_dataset_id.startswith(xlib.get_bowtie2_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_bowtie2_name()

                elif result_dataset_id.startswith(xlib.get_busco_code() + '-'):
                    bioinfo_app_name = xlib.get_busco_name()

                elif result_dataset_id.startswith(xlib.get_cd_hit_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cd_hit_name()

                elif result_dataset_id.startswith(xlib.get_cd_hit_est_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cd_hit_est_name()

                elif result_dataset_id.startswith(xlib.get_cuffdiff_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cuffdiff_name()

                elif result_dataset_id.startswith(xlib.get_cufflinks_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cufflinks_name()

                elif result_dataset_id.startswith(
                        xlib.get_cufflinks_cuffmerge_code() + '-'):
                    bioinfo_app_name = xlib.get_cufflinks_cuffmerge_name()

                elif result_dataset_id.startswith(xlib.get_cuffnorm_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cuffnorm_name()

                elif result_dataset_id.startswith(xlib.get_cuffquant_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cuffquant_name()

                elif result_dataset_id.startswith(xlib.get_cutadapt_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cutadapt_name()

                elif result_dataset_id.startswith(
                        xlib.get_ddradseq_simulation_code() + '-'):
                    bioinfo_app_name = xlib.get_ddradseq_simulation_name()

                elif result_dataset_id.startswith(
                        xlib.get_ddradseqtools_code() + '-'):
                    bioinfo_app_name = xlib.get_ddradseqtools_name()

                elif result_dataset_id.startswith(xlib.get_detonate_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_detonate_name()

                elif result_dataset_id.startswith(xlib.get_diamond_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_diamond_name()

                elif result_dataset_id.startswith(xlib.get_emboss_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_emboss_name()

                elif result_dataset_id.startswith(
                        xlib.get_entrez_direct_code() + '-'):
                    bioinfo_app_name = xlib.get_entrez_direct_name()

                elif result_dataset_id.startswith(xlib.get_express_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_express_name()

                elif result_dataset_id.startswith(xlib.get_fastqc_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_fastqc_name()

                elif result_dataset_id.startswith(xlib.get_ggtrinity_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ggtrinity_name()

                elif result_dataset_id.startswith(xlib.get_gmap_gsnap_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_gmap_gsnap_name()

                elif result_dataset_id.startswith(xlib.get_gmap_code() + '-'):
                    bioinfo_app_name = xlib.get_gmap_name()

                elif result_dataset_id.startswith(xlib.get_gsnap_code() + '-'):
                    bioinfo_app_name = xlib.get_gsnap_name()

                elif result_dataset_id.startswith(xlib.get_gzip_code() + '-'):
                    bioinfo_app_name = xlib.get_gzip_name()

                elif result_dataset_id.startswith(xlib.get_hisat2_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_hisat2_name()

                elif result_dataset_id.startswith(xlib.get_htseq_code() + '-'):
                    bioinfo_app_name = xlib.get_htseq_name()

                elif result_dataset_id.startswith(xlib.get_htseq_count_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_htseq_count_name()

                elif result_dataset_id.startswith(
                        xlib.get_insilico_read_normalization_code() + '-'):
                    bioinfo_app_name = xlib.get_insilico_read_normalization_name(
                    )

                elif result_dataset_id.startswith(xlib.get_ipyrad_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ipyrad_name()

                elif result_dataset_id.startswith(xlib.get_kallisto_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_kallisto_name()

                elif result_dataset_id.startswith(xlib.get_miniconda3_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_miniconda3_name()

                elif result_dataset_id.startswith(xlib.get_ngshelper_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ngshelper_name()

                elif result_dataset_id.startswith(xlib.get_quast_code() + '-'):
                    bioinfo_app_name = xlib.get_quast_name()

                elif result_dataset_id.startswith(xlib.get_r_code() + '-'):
                    bioinfo_app_name = xlib.get_r_name()

                elif result_dataset_id.startswith(xlib.get_raddesigner_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_raddesigner_name()

                elif result_dataset_id.startswith(xlib.get_ref_eval_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ref_eval_name()

                elif result_dataset_id.startswith(xlib.get_rnaquast_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_rnaquast_name()

                elif result_dataset_id.startswith(xlib.get_rsem_code() + '-'):
                    bioinfo_app_name = xlib.get_rsem_name()

                elif result_dataset_id.startswith(xlib.get_rsem_eval_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_rsem_eval_name()

                elif result_dataset_id.startswith(xlib.get_rsitesearch_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_rsitesearch_name()

                elif result_dataset_id.startswith(xlib.get_samtools_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_samtools_name()

                elif result_dataset_id.startswith(xlib.get_soapdenovo2_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_soapdenovo2_name()

                elif result_dataset_id.startswith(
                        xlib.get_soapdenovotrans_code() + '-'):
                    bioinfo_app_name = xlib.get_soapdenovotrans_name()

                elif result_dataset_id.startswith(xlib.get_star_code() + '-'):
                    bioinfo_app_name = xlib.get_star_name()

                elif result_dataset_id.startswith(xlib.get_starcode_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_starcode_name()

                elif result_dataset_id.startswith(xlib.get_toa_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gene_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_taxonomy_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_nucleotide_gi_code(
                        ) + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_protein_gi_code()
                        + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gene_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_monocots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_merge_annotations_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_merge_annotations_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_diamond_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nt_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_aminoacid_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_nucleotide_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_refseq_plant_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_rebuild_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_recreate_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name(
                    )

                elif result_dataset_id.startswith(xlib.get_tophat_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_tophat_name()

                elif result_dataset_id.startswith(xlib.get_transabyss_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_transabyss_name()

                elif result_dataset_id.startswith(
                        xlib.get_transcript_filter_code() + '-'):
                    bioinfo_app_name = xlib.get_transcript_filter_name()

                elif result_dataset_id.startswith(
                        xlib.get_transcriptome_blastx_code() + '-'):
                    bioinfo_app_name = xlib.get_transcriptome_blastx_name()

                elif result_dataset_id.startswith(
                        xlib.get_transdecoder_code() + '-'):
                    bioinfo_app_name = xlib.get_transdecoder_name()

                elif result_dataset_id.startswith(xlib.get_transrate_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_transrate_name()

                elif result_dataset_id.startswith(xlib.get_trimmomatic_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_trimmomatic_name()

                elif result_dataset_id.startswith(xlib.get_trinity_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_trinity_name()

                elif result_dataset_id.startswith(
                        xlib.get_variant_calling_code() + '-'):
                    bioinfo_app_name = xlib.get_variant_calling_name()

                elif result_dataset_id.startswith(xlib.get_vcftools_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_vcftools_name()

                elif result_dataset_id.startswith(
                        xlib.get_vcftools_perl_libraries_code() + '-'):
                    bioinfo_app_name = xlib.get_vcftools_perl_libraries_name()

                elif result_dataset_id.startswith(xlib.get_vsearch_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_vsearch_name()

                else:
                    bioinfo_app_name = 'xxx'

                print(line.format(result_dataset_id, bioinfo_app_name))

    # close the SSH client connection
    if OK:
        xssh.close_ssh_client_connection(ssh_client)

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Exemplo n.º 7
0
def build_gzip_process_script(cluster_name, dataset_type, current_run_dir):
    '''
    Build the current gzip process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the gzip option dictionary
    gzip_option_dict = xlib.get_option_dict(get_gzip_config_file(dataset_type))

    # get the options
    experiment_id = gzip_option_dict['identification']['experiment_id']
    dataset_type_2 = gzip_option_dict['identification']['dataset_type']
    dataset_id = gzip_option_dict['identification']['dataset_id']
    action = gzip_option_dict['gzip parameters']['action']

    # get the sections list
    sections_list = []
    for section in gzip_option_dict.keys():
        sections_list.append(section)
    sections_list.sort()

    # build the dataset subdirectory and file name lists
    dataset_subdirectory_list = []
    file_name_list = []
    for section in sections_list:
        # if the section identification is like library-n
        if re.match('^file-[0-9]+$', section):
            dataset_subdirectory = gzip_option_dict[section][
                'dataset_subdirectory']
            dataset_subdirectory_list.append(dataset_subdirectory)
            file_name = gzip_option_dict[section]['file_name']
            file_name_list.append(file_name)

    # get the dataset directory
    if dataset_type_2 == 'reference':
        dataset_dir = xlib.get_cluster_reference_dataset_dir(dataset_id)
    elif dataset_type_2 == 'database':
        dataset_dir = xlib.get_cluster_database_dataset_dir(dataset_id)
    elif dataset_type_2 == 'read':
        dataset_dir = xlib.get_cluster_experiment_read_dataset_dir(
            experiment_id, dataset_id)
    elif dataset_type_2 == 'result':
        dataset_dir = xlib.get_cluster_experiment_result_dataset_dir(
            experiment_id, dataset_id)
    elif dataset_type_2 == 'whole-result':
        dataset_dir = xlib.get_cluster_experiment_result_dataset_dir(
            experiment_id, dataset_id)

    # write the gzip process script
    try:
        if not os.path.exists(
                os.path.dirname(get_gzip_process_script(dataset_type_2))):
            os.makedirs(
                os.path.dirname(get_gzip_process_script(dataset_type_2)))
        with open(get_gzip_process_script(dataset_type_2),
                  mode='w',
                  encoding='utf8',
                  newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format(
                'SEP="#########################################"'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'
                .format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function run_gzip_process'))
            file_id.write('{0}\n'.format('{'))
            if dataset_type_2 in ['reference', 'database', 'read', 'result']:
                file_id.write('{0}\n'.format(
                    '    cd {0}'.format(current_run_dir)))
                for i in range(len(dataset_subdirectory_list)):
                    file_id.write('{0}\n'.format('    echo "$SEP"'))
                    file_id.write('{0}\n'.format(
                        '    echo "Compressing/decompressing {0}/{1}/{2} ..."'.
                        format(dataset_dir, dataset_subdirectory_list[i],
                               file_name_list[i])))
                    file_id.write('{0}\n'.format('    /usr/bin/time \\'))
                    file_id.write('{0}\n'.format(
                        '        --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'
                    ))
                    if action == 'compress':
                        file_id.write('{0}\n'.format(
                            '        gzip {0}/{1}/{2}'.format(
                                dataset_dir, dataset_subdirectory_list[i],
                                file_name_list[i])))
                    elif action == 'decompress':
                        file_id.write('{0}\n'.format(
                            '        gzip --decompress {0}/{1}/{2}'.format(
                                dataset_dir, dataset_subdirectory_list[i],
                                file_name_list[i])))
                    file_id.write('{0}\n'.format('    RC=$?'))
                    file_id.write('{0}\n'.format(
                        '    if [ $RC -ne 0 ]; then manage_error gzip $RC; fi')
                                  )
            elif dataset_type_2 == 'whole-result':
                file_id.write('{0}\n'.format(
                    '    cd {0}'.format(current_run_dir)))
                file_id.write('{0}\n'.format('    echo "$SEP"'))
                file_id.write('{0}\n'.format(
                    '    echo "Compressing/decompressing {0} ..."'.format(
                        dataset_dir)))
                file_id.write('{0}\n'.format('    /usr/bin/time \\'))
                file_id.write('{0}\n'.format(
                    '        --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'
                ))
                if action == 'compress':
                    file_id.write('{0}\n'.format(
                        '        tar --create --gzip --verbose --file={0}.tar.gz {0}'
                        .format(dataset_dir)))
                elif action == 'decompress':
                    file_id.write('{0}\n'.format(
                        '        tar --extract --gzip --verbose --file={0} --directory=/'
                        .format(dataset_dir)))
                file_id.write('{0}\n'.format('    RC=$?'))
                file_id.write('{0}\n'.format(
                    '    if [ $RC -ne 0 ]; then manage_error tar $RC; fi'))
                file_id.write('{0}\n'.format('    echo "$SEP"'))
                file_id.write('{0}\n'.format(
                    '    echo "Removing {0} ..."'.format(dataset_dir)))
                file_id.write('{0}\n'.format('    /usr/bin/time \\'))
                file_id.write('{0}\n'.format(
                    '        --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'
                ))
                file_id.write('{0}\n'.format(
                    '        rm -rf {0}'.format(dataset_dir)))
                file_id.write('{0}\n'.format('    RC=$?'))
                file_id.write('{0}\n'.format(
                    '    if [ $RC -ne 0 ]; then manage_error rm $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_gzip_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {0} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_gzip_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write(
                '{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_gzip_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {0} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_gzip_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write(
                '{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'
            ))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('run_gzip_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(
            get_gzip_process_script(dataset_type_2)))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)