def run_gzip_process(cluster_name, dataset_type, log, function=None): ''' Run a gzip process. ''' # initialize the control variable OK = True # get the gzip code and name gzip_code = xlib.get_gzip_code() gzip_name = xlib.get_gzip_name() # get the gzip option dictionary gzip_option_dict = xlib.get_option_dict(get_gzip_config_file(dataset_type)) # get the experiment identification experiment_id = gzip_option_dict['identification']['experiment_id'] # get the gzip process script path in the local computer gzip_process_script = get_gzip_process_script(dataset_type) # get the gzip process starter path in the local computer gzip_process_starter = get_gzip_process_starter(dataset_type) # warn that the log window does not have to be closed if not isinstance(log, xlib.DevStdOut): log.write('This process might take several minutes. Do not close this window, please wait!\n') # check the gzip config file log.write(f'{xlib.get_separator()}\n') log.write('Checking the {0} config file ...\n'.format(gzip_name)) (OK, error_list) = check_gzip_config_file(dataset_type, strict=True) if OK: log.write('The file is OK.\n') else: log.write('*** ERROR: The config file is not valid.\n') log.write('Please correct this file or recreate the config files.\n') # create the SSH client connection if OK: log.write(f'{xlib.get_separator()}\n') log.write('Connecting the SSH client ...\n') (OK, error_list, ssh_client) = xssh.create_ssh_client_connection(cluster_name) if OK: log.write('The SSH client is connected.\n') else: for error in error_list: log.write(f'{error}\n') # create the SSH transport connection if OK: log.write(f'{xlib.get_separator()}\n') log.write('Connecting the SSH transport ...\n') (OK, error_list, ssh_transport) = xssh.create_ssh_transport_connection(cluster_name) if OK: log.write('The SSH transport is connected.\n') else: for error in error_list: log.write(f'{error}\n') # create the SFTP client if OK: log.write(f'{xlib.get_separator()}\n') log.write('Connecting the SFTP client ...\n') sftp_client = xssh.create_sftp_client(ssh_transport) log.write('The SFTP client is connected.\n') # warn that the requirements are being verified if OK: log.write(f'{xlib.get_separator()}\n') log.write('Checking process requirements ...\n') # check the master is running if OK: (master_state_code, master_state_name) = xec2.get_node_state(cluster_name) if master_state_code != 16: log.write(f'*** ERROR: The cluster {cluster_name} is not running. Its state is {master_state_code} ({master_state_name}).\n') OK = False # warn that the requirements are OK if OK: log.write('Process requirements are OK.\n') # determine the run directory in the cluster if OK: log.write(f'{xlib.get_separator()}\n') log.write('Determining the run directory in the cluster ...\n') if dataset_type == 'reference': current_run_dir = xlib.get_cluster_current_run_dir('reference', gzip_code) elif dataset_type == 'database': current_run_dir = xlib.get_cluster_current_run_dir('database', gzip_code) else: current_run_dir = xlib.get_cluster_current_run_dir(experiment_id, gzip_code) command = f'mkdir --parents {current_run_dir}' (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command) if OK: log.write(f'The directory path is {current_run_dir}.\n') else: log.write(f'*** ERROR: Wrong command ---> {command}\n') # build the gzip process script if OK: log.write(f'{xlib.get_separator()}\n') log.write('Building the process script {0} ...\n'.format(gzip_process_script)) (OK, error_list) = build_gzip_process_script(cluster_name, dataset_type, current_run_dir) if OK: log.write('The file is built.\n') else: log.write('*** ERROR: The file could not be built.\n') # upload the gzip process script to the cluster if OK: log.write(f'{xlib.get_separator()}\n') log.write('Uploading the process script {0} to the directory {1} ...\n'.format(gzip_process_script, current_run_dir)) cluster_path = '{0}/{1}'.format(current_run_dir, os.path.basename(gzip_process_script)) (OK, error_list) = xssh.put_file(sftp_client, gzip_process_script, cluster_path) if OK: log.write('The file is uploaded.\n') else: for error in error_list: log.write(f'{error}\n') # set run permision to the gzip process script in the cluster if OK: log.write(f'{xlib.get_separator()}\n') log.write('Setting on the run permision of {0}/{1} ...\n'.format(current_run_dir, os.path.basename(gzip_process_script))) command = 'chmod u+x {0}/{1}'.format(current_run_dir, os.path.basename(gzip_process_script)) (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command) if OK: log.write('The run permision is set.\n') else: log.write(f'*** ERROR: Wrong command ---> {command}\n') # build the gzip process starter if OK: log.write(f'{xlib.get_separator()}\n') log.write('Building the process starter {0} ...\n'.format(gzip_process_starter)) (OK, error_list) = build_gzip_process_starter(dataset_type, current_run_dir) if OK: log.write('The file is built.\n') else: log.write('***ERROR: The file could not be built.\n') # upload the gzip process starter to the cluster if OK: log.write(f'{xlib.get_separator()}\n') log.write('Uploading the process starter {0} to the directory {1} ...\n'.format(gzip_process_starter, current_run_dir)) cluster_path = '{0}/{1}'.format(current_run_dir, os.path.basename(gzip_process_starter)) (OK, error_list) = xssh.put_file(sftp_client, gzip_process_starter, cluster_path) if OK: log.write('The file is uploaded.\n') else: for error in error_list: log.write(f'{error}\n') # set run permision to the gzip process starter in the cluster if OK: log.write(f'{xlib.get_separator()}\n') log.write('Setting on the run permision of {0}/{1} ...\n'.format(current_run_dir, os.path.basename(gzip_process_starter))) command = 'chmod u+x {0}/{1}'.format(current_run_dir, os.path.basename(gzip_process_starter)) (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command) if OK: log.write('The run permision is set.\n') else: log.write(f'*** ERROR: Wrong command ---> {command}\n') # submit the gzip process if OK: log.write(f'{xlib.get_separator()}\n') log.write('Submitting the process script {0}/{1} ...\n'.format(current_run_dir, os.path.basename(gzip_process_starter))) OK = xssh.submit_script(cluster_name, ssh_client, current_run_dir, os.path.basename(gzip_process_starter), log) # close the SSH transport connection if OK: log.write(f'{xlib.get_separator()}\n') log.write('Closing the SSH transport connection ...\n') xssh.close_ssh_transport_connection(ssh_transport) log.write('The connection is closed.\n') # close the SSH client connection if OK: log.write(f'{xlib.get_separator()}\n') log.write('Closing the SSH client connection ...\n') xssh.close_ssh_client_connection(ssh_client) log.write('The connection is closed.\n') # warn that the log window can be closed if not isinstance(log, xlib.DevStdOut): log.write(f'{xlib.get_separator()}\n') log.write('You can close this window now.\n') # execute final function if function is not None: function() # return the control variable return OK
def check_gzip_config_file(dataset_type, strict): ''' Check the gzip config file of a run. ''' # initialize the control variable and the error list OK = True error_list = [] # intitialize variable used when value is not found not_found = '***NOTFOUND***'.upper() # get the option dictionary try: gzip_option_dict = xlib.get_option_dict(get_gzip_config_file(dataset_type)) except Exception as e: error_list.append(f'*** EXCEPTION: "{e}".') error_list.append('*** ERROR: The option dictionary could not be built from the config file') OK = False else: # get the sections list sections_list = [] for section in gzip_option_dict.keys(): sections_list.append(section) sections_list.sort() # check section "identification" if 'identification' not in sections_list: error_list.append('*** ERROR: the section "identification" is not found.') OK = False else: # check section "identification" - key "experiment_id" experiment_id = gzip_option_dict.get('identification', {}).get('experiment_id', not_found) if experiment_id == not_found: error_list.append('*** ERROR: the key "experiment_id" is not found in the section "identification".') OK = False elif dataset_type == 'reference' and experiment_id.upper() != 'NONE': error_list.append('*** ERROR: the key "experiment_id" has to be always NONE') OK = False # check section "identification" - key "dataset_type" dataset_type_2 = gzip_option_dict.get('identification', {}).get('dataset_type', not_found) if dataset_type_2 == not_found: error_list.append('*** ERROR: the key "dataset_type" is not found in the section "identification".') OK = False else: if dataset_type in ['reference', 'read']: if dataset_type_2.lower() != dataset_type: error_list.append('*** ERROR: the key "dataset_type" has to be {0}.'.format(dataset_type)) OK = False elif dataset_type == 'result': if dataset_type_2.lower() not in ['result', 'whole-result']: error_list.append('*** ERROR: the key "dataset_type" has to be result or whole-result.') OK = False # check section "identification" - key "dataset_id" dataset_id = gzip_option_dict.get('identification', {}).get('dataset_id', not_found) if dataset_id == not_found: error_list.append('*** ERROR: the key "dataset_id" is not found in the section "identification".') OK = False # check section "gzip parameters" if 'gzip parameters' not in sections_list: error_list.append('*** ERROR: the section "gzip parameters" is not found.') OK = False else: # check section "gzip parameters" - key "action" action = gzip_option_dict.get('gzip parameters', {}).get('action', not_found) if action == not_found: error_list.append('*** ERROR: the key "action" is not found in the section "gzip parameters".') OK = False else: if action.lower() not in ['compress', 'decompress']: error_list.append('*** ERROR: the key "action" has to be compress or decompress.') OK = False # check section "file-1" if dataset_type_2.lower() in ['reference', 'database', 'read', 'result']: if 'file-1' not in sections_list: error_list.append('*** ERROR: the section "file-1" is not found.') OK = False # check all sections "file-n" if dataset_type_2.lower() in ['reference', 'database', 'read', 'result']: for section in sections_list: if section not in ['identification', 'gzip parameters']: # check than the section identification is like file-n if not re.match('^file-[0-9]+$', section): error_list.append(f'*** ERROR: the section "{section}" has a wrong identification.') OK = False else: # check section "file-n" - key "dataset_subdirectory" dataset_subdirectory = gzip_option_dict.get(section, {}).get('dataset_subdirectory', not_found) if dataset_subdirectory == not_found: error_list.append('*** ERROR: the key "dataset_subdirectory" is not found in the section "{0}".'.format(section)) OK = False elif not xlib.is_valid_path(dataset_subdirectory, 'linux'): error_list.append('*** ERROR: the file {0} in the key "dataset_subdirectory" of the section "{1}" has a non valid file name.'.format(dataset_subdirectory, section)) OK = False # check section "file-n" - key "file_name" file_name = gzip_option_dict.get(section, {}).get('file_name', not_found) if file_name == not_found: error_list.append('*** ERROR: the key "file_name" is not found in the section "{0}".'.format(section)) OK = False elif not xlib.is_valid_path(file_name, 'linux'): error_list.append('*** ERROR: the file {0} in the key "file_name" of the section "{1}" has a non valid file name.'.format(file_name, section)) OK = False # warn that the results config file is not valid if there are any errors if not OK: error_list.append('\nThe {0} config file is not valid. Please, correct this file or recreate it.'.format(xlib.get_gzip_name())) # return the control variable and the error list return (OK, error_list)
def get_result_dataset_dict(cluster_name, experiment_id, status, passed_connection, ssh_client): ''' Get a dictionary with the result datasets of an experiment in the cluster. ''' # initialize the control variable and the error list OK = True error_list = [] # get the result directory in the cluster cluster_result_dir = xlib.get_cluster_result_dir() # initialize the dictionary of the result datasets result_dataset_dict = {} # create the SSH client connection if not passed_connection: (OK, error_list, ssh_client) = xssh.create_ssh_client_connection(cluster_name, 'master') # verify the result directory is created if OK: command = '[ -d {0} ] && echo RC=0 || echo RC=1'.format(cluster_result_dir) (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command) if stdout[len(stdout) - 1] != 'RC=0': error_list.append('*** ERROR: There is not any volume mounted in the result directory.\n') error_list.append('You must link a volume in the mounting point {0} for the template {1}.\n'.format(cluster_result_dir, cluster_name)) OK = False # get the dictionary of the result datasets if OK: if status == 'uncompressed': command = 'cd {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'.format(cluster_result_dir, experiment_id) elif status == 'compressed': command = 'cd {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^d > /dev/null && echo $list; done;'.format(cluster_result_dir, experiment_id) (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command) if OK: if status == 'uncompressed': input_pattern = '{0}-(.+)-(.+)' output_pattern = '{0} ({1} {2})' elif status == 'compressed': input_pattern = '{0}-(.+)-(.+).tar.gz' output_pattern = '{0} ({1} {2}) [compressed]' for line in stdout: line = line.rstrip('\n') if line != 'lost+found': result_dataset_id = line if result_dataset_id.startswith(xlib.get_cd_hit_est_code()+'-'): mo = re.match(input_pattern.format(xlib.get_cd_hit_est_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_cd_hit_est_name(), date, time) elif result_dataset_id.startswith(xlib.get_fastqc_code()+'-'): mo = re.match(input_pattern.format(xlib.get_fastqc_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_fastqc_name(), date, time) elif result_dataset_id.startswith(xlib.get_gzip_code()+'-'): mo = re.match(input_pattern.format(xlib.get_gzip_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_gzip_name(), date, time) elif result_dataset_id.startswith(xlib.get_insilico_read_normalization_code()+'-'): mo = re.match(input_pattern.format(xlib.get_insilico_read_normalization_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_insilico_read_normalization_name(), date, time) elif result_dataset_id.startswith(xlib.get_quast_code()+'-'): mo = re.match(input_pattern.format(xlib.get_quast_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_quast_name(), date, time) elif result_dataset_id.startswith(xlib.get_ref_eval_code()+'-'): mo = re.match(input_pattern.format(xlib.get_ref_eval_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_ref_eval_name(), date, time) elif result_dataset_id.startswith(xlib.get_rnaquast_code()+'-'): mo = re.match(input_pattern.format(xlib.get_rnaquast_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_rnaquast_name(), date, time) elif result_dataset_id.startswith(xlib.get_rsem_eval_code()+'-'): mo = re.match(input_pattern.format(xlib.get_rsem_eval_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_rsem_eval_name(), date, time) elif result_dataset_id.startswith(xlib.get_soapdenovotrans_code()+'-'): mo = re.match(input_pattern.format(xlib.get_soapdenovotrans_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_soapdenovotrans_name(), date, time) elif result_dataset_id.startswith(xlib.get_star_code()+'-'): mo = re.match(input_pattern.format(xlib.get_star_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_star_name(), date, time) elif result_dataset_id.startswith(xlib.get_transabyss_code()+'-'): mo = re.match(input_pattern.format(xlib.get_transabyss_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_transabyss_name(), date, time) elif result_dataset_id.startswith(xlib.get_transcript_filter_code()+'-'): mo = re.match(input_pattern.format(xlib.get_transcript_filter_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_transcript_filter_name(), date, time) elif result_dataset_id.startswith(xlib.get_transcriptome_blastx_code()+'-'): mo = re.match(input_pattern.format(xlib.get_transcriptome_blastx_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_transcriptome_blastx_name(), date, time) elif result_dataset_id.startswith(xlib.get_transrate_code()+'-'): mo = re.match(input_pattern.format(xlib.get_transrate_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_transrate_name(), date, time) elif result_dataset_id.startswith(xlib.get_trimmomatic_code()+'-'): mo = re.match(input_pattern.format(xlib.get_trimmomatic_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_trimmomatic_name(), date, time) elif result_dataset_id.startswith(xlib.get_trinity_code()+'-'): mo = re.match(input_pattern.format(xlib.get_trinity_code()), result_dataset_id) date = mo.group(1) time = mo.group(2) result_dataset_name = output_pattern.format(xlib.get_trinity_name(), date, time) else: result_dataset_name = result_dataset_id result_dataset_dict[result_dataset_id] = {'result_dataset_id': result_dataset_id, 'result_dataset_name': result_dataset_name} # close the SSH client connection if OK and not passed_connection: xssh.close_ssh_client_connection(ssh_client) # return the control variable, error list and dictionary of the result datasets return (OK, error_list, result_dataset_dict)
def form_list_cluster_experiment_processes(): ''' List the processes of an experiment in the cluster. ''' # initialize the control variable OK = True # print the header clib.clear_screen() clib.print_headers_with_environment('Logs - List experiment processes in the cluster') # get the cluster name print(xlib.get_separator()) if xec2.get_running_cluster_list(volume_creator_included=False) != []: cluster_name = cinputs.input_cluster_name(volume_creator_included=False, help=True) else: print('WARNING: There is not any running cluster.') OK = False # create the SSH client connection if OK: (OK, error_list, ssh_client) = xssh.create_ssh_client_connection(cluster_name, 'master') for error in error_list: log.write('{0}\n'.format(error)) # get experiment identification if OK: experiment_id = cinputs.input_experiment_id(ssh_client, help=True) if experiment_id == '': print('WARNING: The cluster {0} has not experiment data.'.format(cluster_name)) OK = False # get the result dataset list of the experiment if OK: command = 'cd {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'.format(xlib.get_cluster_result_dir(), experiment_id) (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command) if OK: result_dataset_id_list = [] for line in stdout: line = line.rstrip('\n') if line != 'lost+found': result_dataset_id_list.append(line) # print the result dataset identification list of the experiment if OK: print(xlib.get_separator()) if result_dataset_id_list == []: print('*** WARNING: There is not any result dataset of the experiment {0}.'.format(experiment_id)) else: result_dataset_id_list.sort() # set data width result_dataset_width = 25 bioinfo_app_width = 25 # set line template line_template = '{0:' + str(result_dataset_width) + '} {1:' + str(bioinfo_app_width) + '}' # print header print(line_template.format('Result dataset', 'Bioinfo app / Utility')) print(line_template.format('=' * result_dataset_width, '=' * bioinfo_app_width)) # print detail lines for result_dataset_id in result_dataset_id_list: if result_dataset_id.startswith(xlib.get_bedtools_code()+'-'): bioinfo_app_name = xlib.get_bedtools_name() elif result_dataset_id.startswith(xlib.get_blastplus_code()+'-'): bioinfo_app_name = xlib.get_blastplus_name() elif result_dataset_id.startswith(xlib.get_bowtie2_code()+'-'): bioinfo_app_name = xlib.get_bowtie2_name() elif result_dataset_id.startswith(xlib.get_busco_code()+'-'): bioinfo_app_name = xlib.get_busco_name() elif result_dataset_id.startswith(xlib.get_cd_hit_code()+'-'): bioinfo_app_name = xlib.get_cd_hit_est_name() elif result_dataset_id.startswith(xlib.get_cd_hit_code()+'-'): bioinfo_app_name = xlib.get_cd_hit_est_name() elif result_dataset_id.startswith(xlib.get_detonate_code()+'-'): bioinfo_app_name = xlib.get_detonate_name() elif result_dataset_id.startswith(xlib.get_emboss_code()+'-'): bioinfo_app_name = xlib.get_emboss_name() elif result_dataset_id.startswith(xlib.get_fastqc_code()+'-'): bioinfo_app_name = xlib.get_fastqc_name() elif result_dataset_id.startswith(xlib.get_gmap_code()+'-'): bioinfo_app_name = xlib.get_gmap_name() elif result_dataset_id.startswith(xlib.get_gmap_gsnap_code()+'-'): bioinfo_app_name = xlib.get_gmap_gsnap_name() elif result_dataset_id.startswith(xlib.get_gzip_code()+'-'): bioinfo_app_name = xlib.get_gzip_name() elif result_dataset_id.startswith(xlib.get_insilico_read_normalization_code()+'-'): bioinfo_app_name = xlib.get_insilico_read_normalization_name() elif result_dataset_id.startswith(xlib.get_miniconda3_code()+'-'): bioinfo_app_name = xlib.get_miniconda3_name() elif result_dataset_id.startswith(xlib.get_ngshelper_code()+'-'): bioinfo_app_name = xlib.get_ngshelper_name() elif result_dataset_id.startswith(xlib.get_quast_code()+'-'): bioinfo_app_name = xlib.get_quast_name() elif result_dataset_id.startswith(xlib.get_r_code()+'-'): bioinfo_app_name = xlib.get_r_name() elif result_dataset_id.startswith(xlib.get_ref_eval_code()+'-'): bioinfo_app_name = xlib.get_ref_eval_name() elif result_dataset_id.startswith(xlib.get_rnaquast_code()+'-'): bioinfo_app_name = xlib.get_rnaquast_name() elif result_dataset_id.startswith(xlib.get_rsem_code()+'-'): bioinfo_app_name = xlib.get_rsem_name() elif result_dataset_id.startswith(xlib.get_rsem_eval_code()+'-'): bioinfo_app_name = xlib.get_rsem_eval_name() elif result_dataset_id.startswith(xlib.get_samtools_code()+'-'): bioinfo_app_name = xlib.get_samtools_name() elif result_dataset_id.startswith(xlib.get_soapdenovotrans_code()+'-'): bioinfo_app_name = xlib.get_soapdenovotrans_name() elif result_dataset_id.startswith(xlib.get_star_code()+'-'): bioinfo_app_name = xlib.get_star_name() elif result_dataset_id.startswith(xlib.get_transabyss_code()+'-'): bioinfo_app_name = xlib.get_transabyss_name() elif result_dataset_id.startswith(xlib.get_transcript_filter_code()+'-'): bioinfo_app_name = xlib.get_transcript_filter_name() elif result_dataset_id.startswith(xlib.get_transcriptome_blastx_code()+'-'): bioinfo_app_name = xlib.get_transcriptome_blastx_name() elif result_dataset_id.startswith(xlib.get_transrate_code()+'-'): bioinfo_app_name = xlib.get_transrate_name() elif result_dataset_id.startswith(xlib.get_trimmomatic_code()+'-'): bioinfo_app_name = xlib.get_trimmomatic_name() elif result_dataset_id.startswith(xlib.get_trinity_code()+'-'): bioinfo_app_name = xlib.get_trinity_name() else: bioinfo_app_name = 'xxx' print(line_template.format(result_dataset_id, bioinfo_app_name)) # close the SSH client connection if OK: xssh.close_ssh_client_connection(ssh_client) # show continuation message print(xlib.get_separator()) input('Press [Intro] to continue ...')
def execute(self, event=None): ''' Execute the list the result logs in the cluster. ''' # validate inputs OK = self.validate_inputs() if not OK: message = 'Some input values are not OK.' tkinter.messagebox.showerror('{0} - {1}'.format(xlib.get_project_name(), self.head), message) # get the run dictionary of the experiment if OK: # -- command = 'ls {0}/{1}'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get()) command = 'cd {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get()) (OK, stdout, stderr) = xssh.execute_cluster_command(self.ssh_client, command) if OK: result_dataset_dict = {} for line in stdout: line = line.rstrip('\n') if line != 'lost+found': result_dataset_id = line try: pattern = r'^(.+)\-(.+)\-(.+)$' mo = re.search(pattern, result_dataset_id) bioinfo_app_code = mo.group(1).strip() yymmdd = mo.group(2) hhmmss = mo.group(3) date = '20{0}-{1}-{2}'.format(yymmdd[:2], yymmdd[2:4], yymmdd[4:]) time = '{0}:{1}:{2}'.format(hhmmss[:2], hhmmss[2:4], hhmmss[4:]) except: bioinfo_app_code = 'xxx' date = '0000-00-00' time = '00:00:00' if result_dataset_id.startswith(xlib.get_bedtools_code()+'-'): bioinfo_app_name = xlib.get_bedtools_name() elif result_dataset_id.startswith(xlib.get_blastplus_code()+'-'): bioinfo_app_name = xlib.get_blastplus_name() elif result_dataset_id.startswith(xlib.get_bowtie2_code()+'-'): bioinfo_app_name = xlib.get_bowtie2_name() elif result_dataset_id.startswith(xlib.get_busco_code()+'-'): bioinfo_app_name = xlib.get_busco_name() elif result_dataset_id.startswith(xlib.get_cd_hit_code()+'-'): bioinfo_app_name = xlib.get_cd_hit_name() elif result_dataset_id.startswith(xlib.get_cd_hit_est_code()+'-'): bioinfo_app_name = xlib.get_cd_hit_est_name() elif result_dataset_id.startswith(xlib.get_detonate_code()+'-'): bioinfo_app_name = xlib.get_detonate_name() elif result_dataset_id.startswith(xlib.get_emboss_code()+'-'): bioinfo_app_name = xlib.get_emboss_name() elif result_dataset_id.startswith(xlib.get_fastqc_code()+'-'): bioinfo_app_name = xlib.get_fastqc_name() elif result_dataset_id.startswith(xlib.get_gmap_code()+'-'): bioinfo_app_name = xlib.get_gmap_name() elif result_dataset_id.startswith(xlib.get_gmap_gsnap_code()+'-'): bioinfo_app_name = xlib.get_gmap_gsnap_name() elif result_dataset_id.startswith(xlib.get_gzip_code()+'-'): bioinfo_app_name = xlib.get_gzip_name() elif result_dataset_id.startswith(xlib.get_insilico_read_normalization_code()+'-'): bioinfo_app_name = xlib.get_insilico_read_normalization_name() elif result_dataset_id.startswith(xlib.get_miniconda3_code()+'-'): bioinfo_app_name = xlib.get_miniconda3_name() elif result_dataset_id.startswith(xlib.get_ngshelper_code()+'-'): bioinfo_app_name = xlib.get_ngshelper_name() elif result_dataset_id.startswith(xlib.get_quast_code()+'-'): bioinfo_app_name = xlib.get_quast_name() elif result_dataset_id.startswith(xlib.get_r_code()+'-'): bioinfo_app_name = xlib.get_r_name() elif result_dataset_id.startswith(xlib.get_ref_eval_code()+'-'): bioinfo_app_name = xlib.get_ref_eval_name() elif result_dataset_id.startswith(xlib.get_rnaquast_code()+'-'): bioinfo_app_name = xlib.get_rnaquast_name() elif result_dataset_id.startswith(xlib.get_rsem_code()+'-'): bioinfo_app_name = xlib.get_rsem_name() elif result_dataset_id.startswith(xlib.get_rsem_eval_code()+'-'): bioinfo_app_name = xlib.get_rsem_eval_name() elif result_dataset_id.startswith(xlib.get_samtools_code()+'-'): bioinfo_app_name = xlib.get_samtools_name() elif result_dataset_id.startswith(xlib.get_soapdenovotrans_code()+'-'): bioinfo_app_name = xlib.get_soapdenovotrans_name() elif result_dataset_id.startswith(xlib.get_star_code()+'-'): bioinfo_app_name = xlib.get_star_name() elif result_dataset_id.startswith(xlib.get_transabyss_code()+'-'): bioinfo_app_name = xlib.get_transabyss_name() elif result_dataset_id.startswith(xlib.get_transcript_filter_code()+'-'): bioinfo_app_name = xlib.get_transcript_filter_name() elif result_dataset_id.startswith(xlib.get_transcriptome_blastx_code()+'-'): bioinfo_app_name = xlib.get_transcriptome_blastx_name() elif result_dataset_id.startswith(xlib.get_transrate_code()+'-'): bioinfo_app_name = xlib.get_transrate_name() elif result_dataset_id.startswith(xlib.get_trimmomatic_code()+'-'): bioinfo_app_name = xlib.get_trimmomatic_name() elif result_dataset_id.startswith(xlib.get_trinity_code()+'-'): bioinfo_app_name = xlib.get_trinity_name() else: bioinfo_app_name = 'xxx' result_dataset_dict[result_dataset_id] = {'experiment_id': self.wrapper_experiment_id.get(), 'result_dataset_id': result_dataset_id, 'bioinfo_app': bioinfo_app_name, 'date': date, 'time': time} # verify if there are any nodes running if OK: if result_dataset_dict == {}: message = 'There is not any run.' tkinter.messagebox.showwarning('{0} - {1}'.format(xlib.get_project_name(), self.head), message) # build the data list if OK: data_list = ['experiment_id', 'result_dataset_id', 'bioinfo_app', 'date', 'time'] # build the data dictionary if OK: data_dict = {} data_dict['experiment_id']= {'text': 'Experiment id. / Process', 'width': 200, 'aligment': 'left'} data_dict['result_dataset_id'] = {'text': 'Result dataset', 'width': 200, 'aligment': 'left'} data_dict['bioinfo_app'] = {'text': 'Bioinfo app / Utility', 'width': 200, 'aligment': 'left'} data_dict['date'] = {'text': 'Date', 'width': 80, 'aligment': 'right'} data_dict['time'] = {'text': 'Time', 'width': 80, 'aligment': 'right'} # create the dialog Table to show the nodes running if OK: dialog_table = gdialogs.DialogTable(self, 'Experiment runs in {0}/{1}'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get()), 400, 900, data_list, data_dict, result_dataset_dict, 'view_result_logs', [self.wrapper_cluster_name.get()]) self.wait_window(dialog_table) # close the form if OK: self.close()
def form_list_cluster_experiment_processes(): ''' List the processes of an experiment in the cluster. ''' # initialize the control variable OK = True # print the header clib.clear_screen() clib.print_headers_with_environment( 'Logs - List experiment processes in the cluster') # get the cluster name print(xlib.get_separator()) if xec2.get_running_cluster_list(only_environment_cluster=True, volume_creator_included=False) != []: cluster_name = cinputs.input_cluster_name( volume_creator_included=False, help=True) else: print('WARNING: There is not any running cluster.') OK = False # create the SSH client connection if OK: (OK, error_list, ssh_client) = xssh.create_ssh_client_connection(cluster_name) for error in error_list: print(error) # get experiment identification if OK: experiment_id = cinputs.input_experiment_id(ssh_client, help=True) if experiment_id == '': print( f'WARNING: The cluster {cluster_name} does not have experiment data.' ) OK = False # get the result dataset list of the experiment if OK: command = f'cd {xlib.get_cluster_result_dir()}/{experiment_id}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;' (OK, stdout, _) = xssh.execute_cluster_command(ssh_client, command) if OK: result_dataset_id_list = [] for line in stdout: line = line.rstrip('\n') if line != 'lost+found': result_dataset_id_list.append(line) # print the result dataset identification list of the experiment if OK: print(xlib.get_separator()) if result_dataset_id_list == []: print( f'*** WARNING: There is not any result dataset of the experiment {experiment_id}.' ) else: result_dataset_id_list.sort() # set data width result_dataset_width = 30 bioinfo_app_width = 25 # set line line = '{0:' + str(result_dataset_width) + '} {1:' + str( bioinfo_app_width) + '}' # print header print(line.format('Result dataset', 'Bioinfo app / Utility')) print( line.format('=' * result_dataset_width, '=' * bioinfo_app_width)) # print detail lines for result_dataset_id in result_dataset_id_list: if result_dataset_id.startswith(xlib.get_bedtools_code() + '-'): bioinfo_app_name = xlib.get_bedtools_name() elif result_dataset_id.startswith(xlib.get_blastplus_code() + '-'): bioinfo_app_name = xlib.get_blastplus_name() elif result_dataset_id.startswith(xlib.get_bcftools_code() + '-'): bioinfo_app_name = xlib.get_bcftools_name() elif result_dataset_id.startswith(xlib.get_bowtie2_code() + '-'): bioinfo_app_name = xlib.get_bowtie2_name() elif result_dataset_id.startswith(xlib.get_busco_code() + '-'): bioinfo_app_name = xlib.get_busco_name() elif result_dataset_id.startswith(xlib.get_cd_hit_code() + '-'): bioinfo_app_name = xlib.get_cd_hit_name() elif result_dataset_id.startswith(xlib.get_cd_hit_est_code() + '-'): bioinfo_app_name = xlib.get_cd_hit_est_name() elif result_dataset_id.startswith(xlib.get_cuffdiff_code() + '-'): bioinfo_app_name = xlib.get_cuffdiff_name() elif result_dataset_id.startswith(xlib.get_cufflinks_code() + '-'): bioinfo_app_name = xlib.get_cufflinks_name() elif result_dataset_id.startswith( xlib.get_cufflinks_cuffmerge_code() + '-'): bioinfo_app_name = xlib.get_cufflinks_cuffmerge_name() elif result_dataset_id.startswith(xlib.get_cuffnorm_code() + '-'): bioinfo_app_name = xlib.get_cuffnorm_name() elif result_dataset_id.startswith(xlib.get_cuffquant_code() + '-'): bioinfo_app_name = xlib.get_cuffquant_name() elif result_dataset_id.startswith(xlib.get_cutadapt_code() + '-'): bioinfo_app_name = xlib.get_cutadapt_name() elif result_dataset_id.startswith( xlib.get_ddradseq_simulation_code() + '-'): bioinfo_app_name = xlib.get_ddradseq_simulation_name() elif result_dataset_id.startswith( xlib.get_ddradseqtools_code() + '-'): bioinfo_app_name = xlib.get_ddradseqtools_name() elif result_dataset_id.startswith(xlib.get_detonate_code() + '-'): bioinfo_app_name = xlib.get_detonate_name() elif result_dataset_id.startswith(xlib.get_diamond_code() + '-'): bioinfo_app_name = xlib.get_diamond_name() elif result_dataset_id.startswith(xlib.get_emboss_code() + '-'): bioinfo_app_name = xlib.get_emboss_name() elif result_dataset_id.startswith( xlib.get_entrez_direct_code() + '-'): bioinfo_app_name = xlib.get_entrez_direct_name() elif result_dataset_id.startswith(xlib.get_express_code() + '-'): bioinfo_app_name = xlib.get_express_name() elif result_dataset_id.startswith(xlib.get_fastqc_code() + '-'): bioinfo_app_name = xlib.get_fastqc_name() elif result_dataset_id.startswith(xlib.get_ggtrinity_code() + '-'): bioinfo_app_name = xlib.get_ggtrinity_name() elif result_dataset_id.startswith(xlib.get_gmap_gsnap_code() + '-'): bioinfo_app_name = xlib.get_gmap_gsnap_name() elif result_dataset_id.startswith(xlib.get_gmap_code() + '-'): bioinfo_app_name = xlib.get_gmap_name() elif result_dataset_id.startswith(xlib.get_gsnap_code() + '-'): bioinfo_app_name = xlib.get_gsnap_name() elif result_dataset_id.startswith(xlib.get_gzip_code() + '-'): bioinfo_app_name = xlib.get_gzip_name() elif result_dataset_id.startswith(xlib.get_hisat2_code() + '-'): bioinfo_app_name = xlib.get_hisat2_name() elif result_dataset_id.startswith(xlib.get_htseq_code() + '-'): bioinfo_app_name = xlib.get_htseq_name() elif result_dataset_id.startswith(xlib.get_htseq_count_code() + '-'): bioinfo_app_name = xlib.get_htseq_count_name() elif result_dataset_id.startswith( xlib.get_insilico_read_normalization_code() + '-'): bioinfo_app_name = xlib.get_insilico_read_normalization_name( ) elif result_dataset_id.startswith(xlib.get_ipyrad_code() + '-'): bioinfo_app_name = xlib.get_ipyrad_name() elif result_dataset_id.startswith(xlib.get_kallisto_code() + '-'): bioinfo_app_name = xlib.get_kallisto_name() elif result_dataset_id.startswith(xlib.get_miniconda3_code() + '-'): bioinfo_app_name = xlib.get_miniconda3_name() elif result_dataset_id.startswith(xlib.get_ngshelper_code() + '-'): bioinfo_app_name = xlib.get_ngshelper_name() elif result_dataset_id.startswith(xlib.get_quast_code() + '-'): bioinfo_app_name = xlib.get_quast_name() elif result_dataset_id.startswith(xlib.get_r_code() + '-'): bioinfo_app_name = xlib.get_r_name() elif result_dataset_id.startswith(xlib.get_raddesigner_code() + '-'): bioinfo_app_name = xlib.get_raddesigner_name() elif result_dataset_id.startswith(xlib.get_ref_eval_code() + '-'): bioinfo_app_name = xlib.get_ref_eval_name() elif result_dataset_id.startswith(xlib.get_rnaquast_code() + '-'): bioinfo_app_name = xlib.get_rnaquast_name() elif result_dataset_id.startswith(xlib.get_rsem_code() + '-'): bioinfo_app_name = xlib.get_rsem_name() elif result_dataset_id.startswith(xlib.get_rsem_eval_code() + '-'): bioinfo_app_name = xlib.get_rsem_eval_name() elif result_dataset_id.startswith(xlib.get_rsitesearch_code() + '-'): bioinfo_app_name = xlib.get_rsitesearch_name() elif result_dataset_id.startswith(xlib.get_samtools_code() + '-'): bioinfo_app_name = xlib.get_samtools_name() elif result_dataset_id.startswith(xlib.get_soapdenovo2_code() + '-'): bioinfo_app_name = xlib.get_soapdenovo2_name() elif result_dataset_id.startswith( xlib.get_soapdenovotrans_code() + '-'): bioinfo_app_name = xlib.get_soapdenovotrans_name() elif result_dataset_id.startswith(xlib.get_star_code() + '-'): bioinfo_app_name = xlib.get_star_name() elif result_dataset_id.startswith(xlib.get_starcode_code() + '-'): bioinfo_app_name = xlib.get_starcode_name() elif result_dataset_id.startswith(xlib.get_toa_code() + '-'): bioinfo_app_name = xlib.get_toa_name() elif result_dataset_id.startswith( xlib.get_toa_process_download_basic_data_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_basic_data_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_dicots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_gene_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_gene_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_go_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_go_name() elif result_dataset_id.startswith( xlib.get_toa_process_download_gymno_01_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_interpro_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_interpro_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_monocots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_taxonomy_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name( ) elif result_dataset_id.startswith( xlib. get_toa_process_gilist_viridiplantae_nucleotide_gi_code( ) + '-'): bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name( ) elif result_dataset_id.startswith( xlib. get_toa_process_gilist_viridiplantae_protein_gi_code() + '-'): bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_basic_data_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_basic_data_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_dicots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_gene_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_gene_name() elif result_dataset_id.startswith( xlib.get_toa_process_load_go_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_go_name() elif result_dataset_id.startswith( xlib.get_toa_process_load_gymno_01_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_interpro_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_interpro_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_monocots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_merge_annotations_code() + '-'): bioinfo_app_name = xlib.get_toa_process_merge_annotations_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_nr_blastplus_db_code() + '-'): bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_nr_diamond_db_code() + '-'): bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_nt_blastplus_db_code() + '-'): bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_pipeline_aminoacid_code() + '-'): bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_pipeline_nucleotide_code() + '-'): bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_dicots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_gymno_01_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_monocots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_refseq_plant_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_rebuild_toa_database_code() + '-'): bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_recreate_toa_database_code() + '-'): bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name( ) elif result_dataset_id.startswith(xlib.get_tophat_code() + '-'): bioinfo_app_name = xlib.get_tophat_name() elif result_dataset_id.startswith(xlib.get_transabyss_code() + '-'): bioinfo_app_name = xlib.get_transabyss_name() elif result_dataset_id.startswith( xlib.get_transcript_filter_code() + '-'): bioinfo_app_name = xlib.get_transcript_filter_name() elif result_dataset_id.startswith( xlib.get_transcriptome_blastx_code() + '-'): bioinfo_app_name = xlib.get_transcriptome_blastx_name() elif result_dataset_id.startswith( xlib.get_transdecoder_code() + '-'): bioinfo_app_name = xlib.get_transdecoder_name() elif result_dataset_id.startswith(xlib.get_transrate_code() + '-'): bioinfo_app_name = xlib.get_transrate_name() elif result_dataset_id.startswith(xlib.get_trimmomatic_code() + '-'): bioinfo_app_name = xlib.get_trimmomatic_name() elif result_dataset_id.startswith(xlib.get_trinity_code() + '-'): bioinfo_app_name = xlib.get_trinity_name() elif result_dataset_id.startswith( xlib.get_variant_calling_code() + '-'): bioinfo_app_name = xlib.get_variant_calling_name() elif result_dataset_id.startswith(xlib.get_vcftools_code() + '-'): bioinfo_app_name = xlib.get_vcftools_name() elif result_dataset_id.startswith( xlib.get_vcftools_perl_libraries_code() + '-'): bioinfo_app_name = xlib.get_vcftools_perl_libraries_name() elif result_dataset_id.startswith(xlib.get_vsearch_code() + '-'): bioinfo_app_name = xlib.get_vsearch_name() else: bioinfo_app_name = 'xxx' print(line.format(result_dataset_id, bioinfo_app_name)) # close the SSH client connection if OK: xssh.close_ssh_client_connection(ssh_client) # show continuation message print(xlib.get_separator()) input('Press [Intro] to continue ...')
def build_gzip_process_script(cluster_name, dataset_type, current_run_dir): ''' Build the current gzip process script. ''' # initialize the control variable and the error list OK = True error_list = [] # get the gzip option dictionary gzip_option_dict = xlib.get_option_dict(get_gzip_config_file(dataset_type)) # get the options experiment_id = gzip_option_dict['identification']['experiment_id'] dataset_type_2 = gzip_option_dict['identification']['dataset_type'] dataset_id = gzip_option_dict['identification']['dataset_id'] action = gzip_option_dict['gzip parameters']['action'] # get the sections list sections_list = [] for section in gzip_option_dict.keys(): sections_list.append(section) sections_list.sort() # build the dataset subdirectory and file name lists dataset_subdirectory_list = [] file_name_list = [] for section in sections_list: # if the section identification is like library-n if re.match('^file-[0-9]+$', section): dataset_subdirectory = gzip_option_dict[section][ 'dataset_subdirectory'] dataset_subdirectory_list.append(dataset_subdirectory) file_name = gzip_option_dict[section]['file_name'] file_name_list.append(file_name) # get the dataset directory if dataset_type_2 == 'reference': dataset_dir = xlib.get_cluster_reference_dataset_dir(dataset_id) elif dataset_type_2 == 'database': dataset_dir = xlib.get_cluster_database_dataset_dir(dataset_id) elif dataset_type_2 == 'read': dataset_dir = xlib.get_cluster_experiment_read_dataset_dir( experiment_id, dataset_id) elif dataset_type_2 == 'result': dataset_dir = xlib.get_cluster_experiment_result_dataset_dir( experiment_id, dataset_id) elif dataset_type_2 == 'whole-result': dataset_dir = xlib.get_cluster_experiment_result_dataset_dir( experiment_id, dataset_id) # write the gzip process script try: if not os.path.exists( os.path.dirname(get_gzip_process_script(dataset_type_2))): os.makedirs( os.path.dirname(get_gzip_process_script(dataset_type_2))) with open(get_gzip_process_script(dataset_type_2), mode='w', encoding='utf8', newline='\n') as file_id: file_id.write('{0}\n'.format('#!/bin/bash')) file_id.write('{0}\n'.format( '#-------------------------------------------------------------------------------' )) file_id.write('{0}\n'.format( 'SEP="#########################################"')) file_id.write('{0}\n'.format( '#-------------------------------------------------------------------------------' )) file_id.write('{0}\n'.format('function init')) file_id.write('{0}\n'.format('{')) file_id.write('{0}\n'.format(' INIT_DATETIME=`date --utc +%s`')) file_id.write('{0}\n'.format( ' FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`' )) file_id.write('{0}\n'.format(' echo "$SEP"')) file_id.write('{0}\n'.format( ' echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."' .format(cluster_name))) file_id.write('{0}\n'.format('}')) file_id.write('{0}\n'.format( '#-------------------------------------------------------------------------------' )) file_id.write('{0}\n'.format('function run_gzip_process')) file_id.write('{0}\n'.format('{')) if dataset_type_2 in ['reference', 'database', 'read', 'result']: file_id.write('{0}\n'.format( ' cd {0}'.format(current_run_dir))) for i in range(len(dataset_subdirectory_list)): file_id.write('{0}\n'.format(' echo "$SEP"')) file_id.write('{0}\n'.format( ' echo "Compressing/decompressing {0}/{1}/{2} ..."'. format(dataset_dir, dataset_subdirectory_list[i], file_name_list[i]))) file_id.write('{0}\n'.format(' /usr/bin/time \\')) file_id.write('{0}\n'.format( ' --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\' )) if action == 'compress': file_id.write('{0}\n'.format( ' gzip {0}/{1}/{2}'.format( dataset_dir, dataset_subdirectory_list[i], file_name_list[i]))) elif action == 'decompress': file_id.write('{0}\n'.format( ' gzip --decompress {0}/{1}/{2}'.format( dataset_dir, dataset_subdirectory_list[i], file_name_list[i]))) file_id.write('{0}\n'.format(' RC=$?')) file_id.write('{0}\n'.format( ' if [ $RC -ne 0 ]; then manage_error gzip $RC; fi') ) elif dataset_type_2 == 'whole-result': file_id.write('{0}\n'.format( ' cd {0}'.format(current_run_dir))) file_id.write('{0}\n'.format(' echo "$SEP"')) file_id.write('{0}\n'.format( ' echo "Compressing/decompressing {0} ..."'.format( dataset_dir))) file_id.write('{0}\n'.format(' /usr/bin/time \\')) file_id.write('{0}\n'.format( ' --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\' )) if action == 'compress': file_id.write('{0}\n'.format( ' tar --create --gzip --verbose --file={0}.tar.gz {0}' .format(dataset_dir))) elif action == 'decompress': file_id.write('{0}\n'.format( ' tar --extract --gzip --verbose --file={0} --directory=/' .format(dataset_dir))) file_id.write('{0}\n'.format(' RC=$?')) file_id.write('{0}\n'.format( ' if [ $RC -ne 0 ]; then manage_error tar $RC; fi')) file_id.write('{0}\n'.format(' echo "$SEP"')) file_id.write('{0}\n'.format( ' echo "Removing {0} ..."'.format(dataset_dir))) file_id.write('{0}\n'.format(' /usr/bin/time \\')) file_id.write('{0}\n'.format( ' --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\' )) file_id.write('{0}\n'.format( ' rm -rf {0}'.format(dataset_dir))) file_id.write('{0}\n'.format(' RC=$?')) file_id.write('{0}\n'.format( ' if [ $RC -ne 0 ]; then manage_error rm $RC; fi')) file_id.write('{0}\n'.format('}')) file_id.write('{0}\n'.format( '#-------------------------------------------------------------------------------' )) file_id.write('{0}\n'.format('function end')) file_id.write('{0}\n'.format('{')) file_id.write('{0}\n'.format(' END_DATETIME=`date --utc +%s`')) file_id.write('{0}\n'.format( ' FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`' )) file_id.write('{0}\n'.format(' calculate_duration')) file_id.write('{0}\n'.format(' echo "$SEP"')) file_id.write('{0}\n'.format( ' echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."' )) file_id.write('{0}\n'.format(' echo "$SEP"')) file_id.write('{0}\n'.format(' RECIPIENT={0}'.format( xconfiguration.get_contact_data()))) file_id.write('{0}\n'.format( ' SUBJECT="{0}: {1} process"'.format( xlib.get_project_name(), xlib.get_gzip_name()))) file_id.write('{0}\n'.format( ' MESSAGE="The {0} process in node $HOSTNAME of cluster {0} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"' .format(xlib.get_gzip_name(), cluster_name))) file_id.write('{0}\n'.format( ' mail --append "Content-type: text/html;" --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"' )) file_id.write('{0}\n'.format(' exit 0')) file_id.write('{0}\n'.format('}')) file_id.write('{0}\n'.format( '#-------------------------------------------------------------------------------' )) file_id.write('{0}\n'.format('function manage_error')) file_id.write('{0}\n'.format('{')) file_id.write('{0}\n'.format(' END_DATETIME=`date --utc +%s`')) file_id.write('{0}\n'.format( ' FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`' )) file_id.write('{0}\n'.format(' calculate_duration')) file_id.write('{0}\n'.format(' echo "$SEP"')) file_id.write( '{0}\n'.format(' echo "ERROR: $1 returned error $2"')) file_id.write('{0}\n'.format( ' echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."' )) file_id.write('{0}\n'.format(' echo "$SEP"')) file_id.write('{0}\n'.format(' RECIPIENT={0}'.format( xconfiguration.get_contact_data()))) file_id.write('{0}\n'.format( ' SUBJECT="{0}: {1} process"'.format( xlib.get_project_name(), xlib.get_gzip_name()))) file_id.write('{0}\n'.format( ' MESSAGE="The {0} process in node $HOSTNAME of cluster {0} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"' .format(xlib.get_gzip_name(), cluster_name))) file_id.write('{0}\n'.format( ' mail --append "Content-type: text/html;" --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"' )) file_id.write('{0}\n'.format(' exit 3')) file_id.write('{0}\n'.format('}')) file_id.write('{0}\n'.format( '#-------------------------------------------------------------------------------' )) file_id.write('{0}\n'.format('function calculate_duration')) file_id.write('{0}\n'.format('{')) file_id.write('{0}\n'.format( ' DURATION=`expr $END_DATETIME - $INIT_DATETIME`')) file_id.write('{0}\n'.format(' HH=`expr $DURATION / 3600`')) file_id.write( '{0}\n'.format(' MM=`expr $DURATION % 3600 / 60`')) file_id.write('{0}\n'.format(' SS=`expr $DURATION % 60`')) file_id.write('{0}\n'.format( ' FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`' )) file_id.write('{0}\n'.format('}')) file_id.write('{0}\n'.format( '#-------------------------------------------------------------------------------' )) file_id.write('{0}\n'.format('init')) file_id.write('{0}\n'.format('run_gzip_process')) file_id.write('{0}\n'.format('end')) except: error_list.append('*** ERROR: The file {0} can not be created'.format( get_gzip_process_script(dataset_type_2))) OK = False # return the control variable and the error list return (OK, error_list)