def moveFiles(file_list, dest_dir, query_len): """ extract run number and index, cp the files to the destination dir and log the move :param file_list: list of files to be moved :param dest_dir: the destination of the move :param query_len: the length of the query (used to check) """ count = 0 for file in file_list: # throw error/exit if file isn't found in src_dir if not os.path.isfile(file): print( '%s cannot be found and therefore can not be moved. ' 'Please check %s for the directory with the run number in the filename' % (file, COUNT_LTS)) sys.exit(1) dest_full_path = os.path.join(dest_dir, os.path.basename(file)) print('...copying {} to {}'.format(os.path.basename(file), dest_full_path)) cmd = 'rsync -aHv {} {}'.format(file, dest_full_path) utils.executeSubProcess(cmd) count = count + 1 if not count == 3 * query_len: print( "\nThe number of files moved is {}. The number of rows in the query is {}.\n \ If moving count and bam files (default), the number of files should be twice the number of rows.\n \ If it is not, Check the query, and {}, and try again".format( count, query_len, COUNT_LTS)) else: print("Your data has been copied to your output directory!")
def main(argv): """ :param argv: cmd line arguments """ print( '\nWARNING: IF YOU MOVING THE OUTPUT OF ALIGN_COUNTS TO LTS_ALIGN_EXPR, PLEASE USE SCRIPT MOVE_ALIGNMENT_COUNT_FILES.PY\n') print('IMPORTANT: read the print statements carefully. Details are important in this one.') args = parseArgs(argv) print('\nDo you want to copy a directory or individual file(s)? Enter \'d\' or \'f\': ') response = input() try: if response not in ['d', 'f']: raise ValueError('MustEnterRecognizedLetter') except ValueError: print( '\nlast chance: only \'d\' or \'f\' are recognized.\nDo you want to copy a directory or individual file(s)? Enter \'d\' or \'f\': ') response = input() if response not in ['d', 'f']: sys.exit('only \'d\' or \'f\' are recognized. Try again from the beginning.') if response == 'd': source = utils.removeForwardSlash(args.source) elif response == 'f': print( '\nIf this is a single file, enter \'s\'. Else, hit enter. The assumption if you do not enter \'s\' is that\n' 'you wish to move the contents of a directory. The script will take care of the forward slash formatting: ') response = input() if response == 's': source = args.source else: source = utils.addForwardSlash(args.source) try: if not os.path.isdir(args.destination): raise FileNotFoundError('DirectoryDoesNotExist') except FileNotFoundError: print( '\nThe Directory you wish to copy the files to Does Not Exist. If you wish to create the directory, enter \'y\'. Else, the script will exit.\n') response = input() if response == 'y': utils.mkdirp(args.destination) destination = utils.addForwardSlash(args.destination) else: sys.exit('Script exiting -- Correct the filepath and try again if you wish.') else: destination = utils.addForwardSlash(args.destination) cmd = 'rsync -aHv %s %s' % (source, destination) print('\nexecuting %s\n' % cmd) utils.executeSubProcess(cmd) print('\nRsync Complete!')
def main(argv): """ main method :param argv: cmd line arguments """ # parse cmd line arguments args = parseArgs(argv) print('...parsing cmd line arguments') query_sheet_path = args.query_sheet try: if not os.path.isfile(query_sheet_path): raise FileNotFoundError('DNE: %s' %query_sheet_path) except FileNotFoundError: print('The query sheet path is not valid. Check and try again') else: query_df = utils.readInDataframe(query_sheet_path) # store interactive flag try: interactive_flag = args.interactive except AttributeError: interactive_flag = False run_list = list(query_df.runNumber.unique()) # create paths from /scratch to the run directory sd = StandardData(config_file=args.config_file, interactive=interactive_flag) run_path_list = [os.path.join(sd.align_count_results, 'run_'+str(x)+'_samples') for x in run_list] # check that paths exist TODO: CHECK CONTENTS OF SUBDIRECTORY FOR COMPLETENESS print('...validating paths to run directories') validated_run_path_list = validatePaths(sd, run_list, run_path_list) # write lookup file of run number paths for the sbatch cmd (see https://htcfdocs.readthedocs.io/en/latest/runningjobs/) lookup_filename = 'qual_assess_1_lookup_' + str(sd.year_month_day) + '_' + str(utils.hourMinuteSecond()) + '.txt' lookup_output_path = os.path.join(sd.job_scripts, lookup_filename) print('...writing lookup file for sbatch script to: %s' %lookup_output_path) with open(lookup_output_path, 'w') as file: file.write('\n'.join(map(str, validated_run_path_list))) # write sbatch script to run qual_assess on all runs in lookup file above script = writeSbatchScript(sd, args.user_name, validated_run_path_list, lookup_output_path, query_sheet_path) sbatch_filename = 'qual_assess_1_batch_' + str(sd.year_month_day) + '_' + str(utils.hourMinuteSecond() + '.sbatch') qual_assess_job_script_path = os.path.join(sd.job_scripts, sbatch_filename) print('...writing sbatch script to: %s' %qual_assess_job_script_path) with open(qual_assess_job_script_path, "w") as f: f.write(script) cmd = 'sbatch %s' %qual_assess_job_script_path utils.executeSubProcess(cmd) print('\nCheck status by cat\'ing the sbatch file above and then cat\'ing the .out file in the sbatch script\n')
def report(self, key_columns_only=False): """ The intent is for this to be used to generate a full report on the entire database. However, any number of subdirectories may be passed up to all of the subdirectories in database_files :params key_columns_only: only check actual filename and key columns for adherence to specs """ # remove old sheets from the same day if they exist if os.path.isfile(self.accuracy_check_output_file): remove_cmd = 'rm %s' % self.accuracy_check_output_file utils.executeSubProcess(remove_cmd) if key_columns_only: self.accuracy_check_output_file = self.accuracyCheckFilename( 'keyColumn') for subdirectory_name, subdirectory_path_list in self.database_dict.items( ): self.subdirectoryReport(subdirectory_name, subdirectory_path_list, key_columns_only)
def main(argv): # store suffixes of the files we wish to move count_suffix = '_read_count.tsv' novoalign_log_suffix = '_novoalign.log' sorted_alignment_suffix = '_sorted_aligned_reads.bam' # parse cmd line arguments args = parseArgs(argv) try: if not args.query_sheet.endswith('.csv'): raise ValueError('NotCsv') except ValueError: sys.exit( '%s does not end with a .csv. Are you sure it is a .csv? -qs takes the .csv output of queryDB.py. Check and resubmit.' ) if args.leading_zero_rn: leading_zero_list = args.leading_zero_rn else: leading_zero_list = '' # read in database_df (The path to the result of a query against the metadata base using queryDB) database_df = pd.read_csv(args.query_sheet) # create a directory for the experiment destination_directory = os.path.join(args.output_directory, args.experiment_name) cmd = "mkdir -p {}".format(destination_directory) utils.executeSubProcess(cmd) # get list of count files count_file_list = filepathList(database_df, count_suffix, leading_zero_list) # get list of novoalign logs novoalign_log_list = filepathList(database_df, novoalign_log_suffix, leading_zero_list) # get list of sorted alignment files sorted_alignment_list = filepathList(database_df, sorted_alignment_suffix, leading_zero_list) # concat the lists together file_list = count_file_list + novoalign_log_list + sorted_alignment_list # move the files from /lts to the output directory (generally the user's scratch) moveFiles(file_list, destination_directory, len(database_df))
def setGenomeFiles(self): """ set genome_files path and download, if genome_files DNE. If config_file has genome_files = https://... Then the zip file will be downloaded from that path TODO: error checking if the config_file https path doesn't work """ # if genome_files is set in config file if hasattr(self, 'genome_files'): # if the config_file has an entry genome_files = 'https://...' (link to the hosted genome files in /lts -- it is important that there be a single source for genome_files) if self.genome_files.startswith('https'): # and the file genome_files DNE in user_rnaseq_pipeline_directory, download from path if not os.path.isdir( os.path.join(self.user_rnaseq_pipeline_directory, 'genome_files')): zipped_genome_files_path = os.path.join( self.user_rnaseq_pipeline_directory, 'genome_files.zip') download_genome_files_cmd = 'wget -O %s %s' % ( zipped_genome_files_path, self.genome_files) utils.executeSubProcess(download_genome_files_cmd) unzip_genome_files_cmd = 'unzip %s -d %s && rm %s' % ( zipped_genome_files_path, self.user_rnaseq_pipeline_directory, zipped_genome_files_path) utils.executeSubProcess(unzip_genome_files_cmd) # set path of self.genome_files to subdir of user_rnaseq_pipeline directory setattr( self, 'genome_files', os.path.join(self.user_rnaseq_pipeline_directory, 'genome_files')) # if the file DNE and interactive flag is set to False (not in interactive session on htcf), then download from /lts if not (self.interactive or os.path.exists(self.genome_files)): genome_files_full_path = os.path.join(self.lts_rnaseq_data, self.pipeline_version, 'genome_files.zip') cmd = 'unzip {} -d {}'.format(genome_files_full_path, self.user_rnaseq_pipeline_directory) utils.executeSubProcess(cmd)
def main(argv): """ main method :param argv: cmd line arguments """ # parse cmd line arguments args = parseArgs(argv) query_sheet_path = args.query_sheet try: if not os.path.isfile(query_sheet_path): raise FileNotFoundError except FileNotFoundError: print('Query sheet path not valid. Check and try again.') try: interactive_flag = args.interactive except AttributeError: interactive_flag = False # instantiate DatabaseObject --> mostly this will be for access to StandardData paths db = DatabaseObject(query_sheet_path=query_sheet_path, config_file=args.config_file, interactive=interactive_flag) # read in dataframe db.query_df = utils.readInDataframe(db.query_sheet_path) # add column organism which identifies either KN99 or S288C_R64 depending on whether genotype1 starts with CNAG # TODO: this is point of weakness -- need to keep an eye here db.query_df['organism'] = np.where(db.query_df['genotype1'].str.startswith('CNAG'), 'KN99', 'S288C_R64') # cast libraryDate to datetime format db.query_df['libraryDate'] = pd.to_datetime(db.query_df['libraryDate']) # create strandedness column based on libraryDate. May change to prep protocol at some point, but for now this is best db.query_df['strandedness'] = np.where(db.query_df['libraryDate'] > '2015-10-25', 'reverse', 'no') # add leading zero to runNumber, if necessary -- take care of in loop db.query_df['runNumber'] = db.query_df['runNumber'].astype(str) # new dictionary to store run_directory in dataframe run_directory_list = [] for index, row in db.query_df.iterrows(): # some early runs have run numbers that start with zero in /lts. 0s are dropped in df b/c they are read in as ints # this step adds the zero and casts the row to str run_num_tmp = int(float(row['runNumber']))# TODO: super ugly, needs to be fixed. Not sure why this is now getting read in as 4422.0, eg as of 20200923 if run_num_tmp in db._run_numbers_with_zeros: # TODO: Probably the best way to is to always read runnumbers as strings -- requires changing _run_num_with_zeros keys to strings, and checking the rest of the codebase that uses this run_number = str(db._run_numbers_with_zeros[run_num_tmp]) else: run_number = run_num_tmp # create run directory name, eg run_1234_samples run_directory = 'run_' + str(run_number) + '_samples' # SEE TODO above # add to list run_directory_list.append(run_directory) # create fastqfilename path try: fastq_filename = os.path.basename(row['fastqFileName']).rstrip() except TypeError: sys.exit("%s <-- not a fastqfilename?" %row['fastqFileName']) fastq_scratch_path = os.path.join(db.scratch_sequence, run_directory, fastq_filename) # move fastq file to scratch if it is not already tehre if not os.path.exists(fastq_scratch_path): fastq_lts_path = os.path.join(db.lts_sequence, run_directory, fastq_filename) scratch_run_directory_path = os.path.join(db.scratch_sequence, run_directory) utils.mkdirp(scratch_run_directory_path) print('...moving %s to %s' %(fastq_lts_path, scratch_run_directory_path)) rsync_cmd = 'rsync -aHv %s %s' %(fastq_lts_path, scratch_run_directory_path) utils.executeSubProcess(rsync_cmd) # update fastqFileName in query_df db.query_df.loc[index, 'fastqFileName'] = fastq_scratch_path # add column runDirectory from run_directory_list db.query_df['runDirectory'] = run_directory_list # use OrganismDataObject to get paths to novoalign_index and annotation files kn99_organism_data = OrganismData(organism='KN99') kn99_novoalign_index = kn99_organism_data.novoalign_index # this is annotations + nc, t, r RNA with nc,t,r RNA annotations overlapping with protein coding ON SAME STRAND removed. rRNA retained kn99_annotation_file = kn99_organism_data.annotation_file # this is annotations + nc, t, r RNA with nc,t,r RNA annotations overlapping protein coding removed regardless of strand. rRNA retained kn99_annotation_file_no_strand = kn99_organism_data.annotation_file_no_strand kn99_genome = kn99_organism_data.genome s288c_r64_organism_data = OrganismData(organism='S288C_R64') s288c_r64_novoalign_index = s288c_r64_organism_data.novoalign_index s288c_r64_annotation_file = s288c_r64_organism_data.annotation_file s288c_r64_genome = s288c_r64_organism_data.genome # filter nextflow_fastqfile_df = db.query_df[['runDirectory', 'fastqFileName', 'organism', 'strandedness']] for index, row in nextflow_fastqfile_df.iterrows(): try: if not os.path.isfile(row['fastqFileName']): raise FileNotFoundError('fastqFileNotFoundInScratch') except FileNotFoundError: print('file %s was not successfully moved from lts to scratch' %row['fastqFileName']) print('\nnextflow fastq file .csv head:\n') print(nextflow_fastqfile_df.head()) print('\n') # write out fastq_file_list_output_path = os.path.join(db.job_scripts, 'nextflow_fastqfile_list' + '_' + args.name + '.csv') print('...writing out to %s' % fastq_file_list_output_path) nextflow_fastqfile_df.to_csv(fastq_file_list_output_path, index=False) # config_header goes at the top of the config -- includes date created and StandardObject instructions config_header = "/*\n" \ "* -------------------------------------------------\n" \ "* Brentlab nextflow rnaseq_pipeline configuration\n" \ "* -------------------------------------------------\n" \ "* created with create_nextflow_config.py on %s\n" \ "* note: this is for a specific job for a specific user\n" \ "* and not intended as a general config file. To re-create\n" \ "* this job, you will need to run create_nextflow_config.py\n" \ "* with the same query_sheet input\n" \ "*/\n\n" % db.year_month_day # params section has all relevant path parameters to run the pipeline params_section = "// params necessary for the pipeline\n" \ "params {\n" \ "\tfastq_file_list = \"%s\"\n" \ "\tlts_sequence = \"%s\"\n" \ "\tscratch_sequence = \"%s\"\n" \ "\tlts_align_expr = \"%s\"\n" \ "\talign_count_results = \"%s\"\n" \ "\tlog_dir = \"%s\"\n" \ "\tKN99_novoalign_index = \"%s\"\n" \ "\tKN99_annotation_file = \"%s\"\n" \ "\tKN99_annotation_file_no_strand = \"%s\"\n" \ "\tKN99_genome = \"%s\"\n" \ "\tS288C_R64_novoalign_index = \"%s\"\n" \ "\tS288C_R64_annotation_file = \"%s\"\n" \ "\tS288C_R64_genome = \"%s\"\n" \ "}\n\n" % (fastq_file_list_output_path, db.lts_sequence, db.scratch_sequence, db.lts_align_expr, db.align_count_results, db.log_dir, kn99_novoalign_index, kn99_annotation_file, kn99_annotation_file_no_strand, kn99_genome, s288c_r64_novoalign_index, s288c_r64_annotation_file, s288c_r64_genome) # write out and submit sbatch script with named/combined output/err nextflow_config_path = os.path.join(db.job_scripts, args.name + '_nextflow.config') print('...writing nextflow job config file to %s' % nextflow_config_path) with open(nextflow_config_path, 'w') as nextflow_config_file: nextflow_config_file.write(config_header) nextflow_config_file.write(params_section) sbatch_script_name = args.name + '_nextflow' nextflow_sbatch_path = os.path.join(db.job_scripts, sbatch_script_name + '.sbatch') # write sbatch script to submit nextflow job print('...writing sbatch script to %s' %nextflow_sbatch_path) with open(nextflow_sbatch_path, 'w') as nf_sbatch_file: nf_sbatch_file.write('#!/bin/bash\n' '#SBATCH --mem=15G\n' '#SBATCH -o %s/%s.out\n' '#SBATCH -J %s\n\n' 'ml rnaseq_pipeline\n\n' 'nextflow -C %s run $CODEBASE/tools/align_count_pipeline.nf\n' %(db.sbatch_log, sbatch_script_name, sbatch_script_name, nextflow_config_path)) sbatch_cmd = 'sbatch %s' %nextflow_sbatch_path print('\nsubmitting sbatch script with cmd:\n\t%s' %sbatch_cmd) utils.executeSubProcess(sbatch_cmd) print('\nCheck progress by entering:\n\ttail %s/%s.out' %(db.sbatch_log, sbatch_script_name)) print('\nTo run this in an interactive session, do the following:\n\t' 'interactive\n\tnextflow -C %s run $CODEBASE/tools/align_count_pipeline.nf\n' % nextflow_config_path) print('If this job fails or is interrupted, you can resume it from where it failed by adding the flag -r to the nextflow command in the .sbatch file and resubmitting to sbatch')
def standardDirectoryStructure(self): """ checks for and creates if necessary the expected directory structure in /scratch/mblab/$USER/rnaseq_pipeline """ # offer method to set user_scratch in config file try: if not os.path.isdir(self.user_scratch): raise NotADirectoryError('UserScratchDirectoryNotPresent') except AttributeError: # set attribute user_scratch (this is where rnaseq_pipeline and all subordinate folders/files will be user_scratch = os.path.join(self.mblab_scratch, self._user) setattr(self, 'user_scratch', user_scratch) except NotADirectoryError: utils.mkdirp(self.user_scratch) # if it does not already exist, create user_rnaseq_pipeline in user_scratch and set attribute setattr(self, 'user_rnaseq_pipeline_directory', '{}/rnaseq_pipeline'.format(self.user_scratch)) utils.mkdirp(self.user_rnaseq_pipeline_directory) # create necessary subdirectories in rnaseq_pipeline process_directories = [ 'reports', 'align_count_results', 'query', 'sbatch_log', 'log/%s' % self.year_month_day, 'job_scripts', 'rnaseq_tmp', 'experiments', 'scratch_sequence' ] # TODO: MAKE SBATCH_LOG LIKE LOG WITH YEAR_MONTH_DAY SUBDIR for directory in process_directories: # store path path = os.path.join(self.user_rnaseq_pipeline_directory, directory) # this will only create the path if it dne utils.mkdirp(path) # set attr to directory (the names in process_directories) unless log, which is treated specially if directory == 'log/%s' % self.year_month_day: # distinguish the log directory ($USER/rnaseq_pipeline/log) self.log_dir = os.path.join( self.user_rnaseq_pipeline_directory, 'log/%s' % self.year_month_day) utils.mkdirp(self.log_dir) # from the daily log file ($USER/rnaseq_pipeline/log/<year-month-day>) self.log_file_path = os.path.join( self.log_dir, '%s.log' % self.year_month_day) self.createStandardDataLogger() else: setattr(self, directory, path) try: database_files_path = os.path.join( self.user_rnaseq_pipeline_directory, 'database_files') if not os.path.isdir(database_files_path): raise NotADirectoryError('DatabaseFilesNotFound: %s' % database_files_path) except NotADirectoryError: cmd = 'git clone https://github.com/BrentLab/database_files.git %s' % database_files_path utils.executeSubProcess(cmd) finally: setattr(self, 'database_files', database_files_path) if self.interactive: print( 'Remember you will not be able to access lts_align_expr or lts_sequence in an interactive session on htcf' ) else: # check for directories to be soft linked from /lts/mblab/Crypto/rnaseq_pipeline (self.lts_rnaseq_data) lts_dirs_to_softlink = ['lts_align_expr', 'lts_sequence'] try: utils.softLinkAndSetAttr(self, lts_dirs_to_softlink, self.lts_rnaseq_data, self.user_rnaseq_pipeline_directory) except FileNotFoundError: print( 'WARNING: The source of %s does not exist and are not accessible. In the future, it is better to include the flag\n' 'interactive=True in the constructor of a StandardData object when you are in an interactive session.' % lts_dirs_to_softlink) setattr( self, 'lts_align_expr', os.path.join(self.user_rnaseq_pipeline_directory, 'lts_align_expr')) setattr( self, 'lts_sequence', os.path.join(self.user_rnaseq_pipeline_directory, 'lts_sequence')) # TODO: priority figure out how to do this without pulling from /lts. put link to genome_files.zip in config maybe # unzip genome files from /lts/mblab/Crypto/rnaseq_data/1.0/genome_files to self.user_rnaseq_pipeline_directory self.setGenomeFiles() # check that all files present in the OrganismDataConfig.ini file in the subdirectories of genome_files exist try: self.checkGenomeFiles() except NotADirectoryError: print( 'Genome Files are incomplete. Delete genome_files completely and re-run StandardDataObject or child ' 'to re-download genome_files.\nNote: this cannot be done from an interactive session on HTCF.' ) except FileNotFoundError: print( 'Genome Files are incomplete. Delete genome_files completely and re-run StandardDataObject or child ' 'to re-download genome_files.\nNote: this cannot be done from an interactive session on HTCF.' )
def main(argv): # parse command line input and store as more descriptive variables print('...parsing input') args = parse_args(argv) try: if not os.path.isdir(args.fastq_path): raise NotADirectoryError('FastqDirectoryDoesNotExist') except NotADirectoryError: print( 'The path to %s for the raw fastq_files does not exist. Correct and re-submit.\n' 'Remember this directory cannot be in long term storage') # in event a run_####_samples is not passed, ask user for a replacement for run_number try: run_number = utils.getRunNumber(args.fastq_path) except AttributeError: run_number = input( 'No run number found. Enter a number, word or phrase to be appended to run_ that will be used to create a\n' 'subdirectory in output: ') print('...creating OrganismDataObject') od = OrganismData(organism=args.organism, fastq_path=args.fastq_path, strandness=args.strandness, email=args.user_email, run_number=run_number) # check directory structure and set organism data (see OrganismData.setOrganismData()) od.setOrganismData() # create logger for this script if od logger is set if os.path.isfile(od.log_file_path): logger = utils.createLogger(od.log_file_path, 'align_count.py', 'INFO') else: logger = utils.createStdOutLogger(name='align_count_logger') # add attribute output_dir od.output_dir = os.path.join(args.output_directory, 'run_{}'.format(od.run_number)) # store align_only flag from cmd line align_only = args.align_only print('...extracting list of fastq files to process') fastq_list_file = '%s/run_%s_fastq_list.txt' % (od.job_scripts, od.run_number) logger.info('The fastq list file path is %s' % fastq_list_file) print('The fastq list file path is %s' % fastq_list_file) # extract all files with the extensions in the list from od.fastq_path fastq_file_list = utils.getFileListFromDirectory( od.fastq_path, ["fastq.gz", "fastq", "fq.gz", "fq"]) # store length of list num_fastqs = len(fastq_file_list) # write list to file with open(fastq_list_file, 'w') as file: for fastq_basename in fastq_file_list: file.write('%s\n' % fastq_basename) if not os.path.isfile(fastq_list_file): sys.exit("list of fastq files at %s does not exist" % fastq_list_file) else: print('list of fastq files may be found at %s' % fastq_list_file) print('...writing sbatch job_script') # create path for sbatch job_script sbatch_job_script_path = '%s/run_%s_mblab_rnaseq.sbatch' % (od.job_scripts, od.run_number) logger.info('sbatch job script path is %s' % sbatch_job_script_path) # create a slurm submission script and write to ./job_scripts SbatchWriter.writeAlignCountJobScript(sbatch_job_script_path, od.output_dir, fastq_list_file, num_fastqs, od.novoalign_index, od.annotation_file, od.feature_type, od.strandness, align_only) if not os.path.isfile(sbatch_job_script_path): sys.exit('sbatch job_script does not exist at path %s' % sbatch_job_script_path) else: print('sbatch script may be found at %s' % sbatch_job_script_path) # submit sbatch job print('...submitting sbatch job') if od.email is None: cmd = "sbatch %s" % sbatch_job_script_path utils.executeSubProcess(cmd) else: cmd = "sbatch --mail-type=END,FAIL --mail-user=%s %s" % ( od.email, sbatch_job_script_path) utils.executeSubProcess(cmd) print('\nannotation and pipeline information recorded in {}/run_{}/{}'. format(od.output_dir, od.run_number, 'pipeline_info')) pipeline_info_subdir_path = os.path.join( od.output_dir, "{}_pipeline_info".format(od.organism)) utils.mkdirp(pipeline_info_subdir_path) # write version info from the module .lua file (see the .lua whatis statements) pipeline_info_txt_file_path = os.path.join(pipeline_info_subdir_path, 'pipeline_info.txt') cmd_pipeline_info = "module whatis rnaseq_pipeline 2> {}".format( pipeline_info_txt_file_path) utils.executeSubProcess(cmd_pipeline_info) # include the date processed in pipeline_info_subdir_path/pipeline_into.txt with open(pipeline_info_txt_file_path, "a+") as file: file.write("\n") current_datetime = od.year_month_day + '_' + utils.hourMinuteSecond() file.write('Date processed: %s' % current_datetime) file.write("\n") # include the head of the gff/gtf, also cmd_annotation_info = "head {} >> {}".format(od.annotation_file, pipeline_info_txt_file_path) utils.executeSubProcess(cmd_annotation_info) # include copy of job script cmd_cp_job_script_to_pipeline_info = 'rsync -aHv %s %s' % ( sbatch_job_script_path, pipeline_info_subdir_path) utils.executeSubProcess(cmd_cp_job_script_to_pipeline_info) # include copy of list of fastq files cmd_cp_fastq_file_list_to_pipeline_info = 'rsync -aHv %s %s' % ( fastq_list_file, pipeline_info_subdir_path) utils.executeSubProcess(cmd_cp_fastq_file_list_to_pipeline_info)