def main(): version = '3.1' args = utils.parseArguments(version) general_start_time = time.time() time_str = time.strftime("%Y%m%d-%H%M%S") # Check if output directory exists outdir = os.path.abspath(os.path.join(args.outdir, '')) if not os.path.isdir(outdir): os.makedirs(outdir) # Start logger if not args.noLog: sys.stdout = utils.Logger(outdir, time_str) print '\n' + '==========> INNUca.py <==========' print '\n' + 'Program start: ' + time.ctime() # Tells where the logfile will be stored if not args.noLog: print '\n' + 'LOGFILE:' print sys.stdout.getLogFile() # Print command print '\n' + 'COMMAND:' script_path = os.path.abspath(sys.argv[0]) print sys.executable + ' ' + script_path + ' ' + ' '.join(sys.argv[1:]) # Print directory where programme was lunch print '\n' + 'PRESENT DIRECTORY:' print os.getcwd() # Print program version print '\n' + 'VERSION INNUca.py:' utils.scriptVersionGit(version, os.getcwd(), script_path, args.noGitInfo) # Get CPU information utils.get_cpu_information(outdir, time_str) # Get trueCoverage_ReMatCh settings trueCoverage_config = get_trueCoverage_config( args.skipTrueCoverage, args.trueConfigFile.name if args.trueConfigFile is not None else None, args.speciesExpected, script_path) # Check programms programs_version_dictionary = {} programs_version_dictionary['gunzip'] = ['--version', '>=', '1.6'] # Java check first for java dependents check next if not (args.skipFastQC and args.skipTrimmomatic and (args.skipPilon or args.skipSPAdes)): # programs_version_dictionary['java'] = ['-version', '>=', '1.8'] programs_version_dictionary['java'] = [None, '>=', '1.8' ] # For OpenJDK compatibility missingPrograms, programs_version_dictionary = utils.checkPrograms( programs_version_dictionary) if len(missingPrograms) > 0: sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) if not args.skipTrueCoverage or trueCoverage_config is not None: include_rematch_dependencies_path(args.doNotUseProvidedSoftware) programs_version_dictionary['rematch.py'] = ['--version', '>=', '3.2'] programs_version_dictionary['bcftools'] = ['--version', '==', '1.3.1'] if not (args.skipTrueCoverage and ( (args.skipAssemblyMapping and args.skipPilon) or args.skipSPAdes)): programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9'] programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1'] if not args.skipFastQC: programs_version_dictionary['fastqc'] = ['--version', '==', '0.11.5'] if not args.skipTrimmomatic: programs_version_dictionary['trimmomatic-0.36.jar'] = [ '-version', '==', '0.36' ] if args.runPear: programs_version_dictionary['pear'] = ['--version', '>=', '0.9.10'] if not args.skipSPAdes: programs_version_dictionary['spades.py'] = ['--version', '>=', '3.9.0'] if not (args.skipPilon or args.skipSPAdes): programs_version_dictionary['pilon-1.18.jar'] = [ '--version', '==', '1.18' ] if not (args.skipMLST or args.skipSPAdes): programs_version_dictionary['mlst'] = ['--version', '>=', '2.4'] # Set and print PATH variable utils.setPATHvariable(args, script_path) missingPrograms, programs_version_dictionary = utils.checkPrograms( programs_version_dictionary) if len(missingPrograms) > 0: sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) # .jar paths jar_path_trimmomatic = None if not args.skipTrimmomatic: jar_path_trimmomatic = programs_version_dictionary[ 'trimmomatic-0.36.jar'][3] jar_path_pilon = None if not args.skipPilon and not args.skipSPAdes: jar_path_pilon = programs_version_dictionary['pilon-1.18.jar'][3] rematch_script = None # ReMatCh path if not args.skipTrueCoverage: rematch_script = programs_version_dictionary['rematch.py'][3] # pairEnd_filesSeparation_list = args.pairEnd_filesSeparation pairEnd_filesSeparation_list = None samples, inputDirectory, removeCreatedSamplesDirectories, indir_same_outdir = get_samples( args.inputDirectory, args.fastq, outdir, pairEnd_filesSeparation_list) # Start running the analysis print '\n' + 'RUNNING INNUca.py' # Prepare run report file samples_report_path = os.path.join(outdir, 'samples_report.' + time_str + '.tab') utils.start_sample_report_file(samples_report_path) number_samples_successfully = 0 number_samples_pass = 0 number_samples_warning = 0 # Get MLST scheme to use scheme = 'unknown' species_genus, mlst_scheme_genus = None, None if not args.skipMLST and not args.skipSPAdes: scheme, species_genus, mlst_scheme_genus = mlst.getScheme( args.speciesExpected) # Print path to blastn mlst.getBlastPath() # Memory available_memory_GB = utils.get_free_memory() / (1024.0**2) # Determine SPAdes maximum memory spadesMaxMemory = None if not args.skipSPAdes: print '' spadesMaxMemory = spades.define_memory(args.spadesMaxMemory, args.threads, available_memory_GB) # Determine .jar maximum memory jarMaxMemory = 'off' if not (args.skipTrimmomatic and (args.skipSPAdes or args.skipPilon)): print '' jarMaxMemory = utils.define_jar_max_memory(args.jarMaxMemory, args.threads, available_memory_GB) # Run INNUca for each sample sample_report_json = {} for sample in samples: sample_start_time = time.time() print '\n' + 'Sample: ' + sample + '\n' # Create sample outdir sample_outdir = os.path.abspath(os.path.join(outdir, sample, '')) if not os.path.isdir(sample_outdir): os.makedirs(sample_outdir) # Get fastq files fastq_files = utils.searchFastqFiles( os.path.join(inputDirectory, sample, ''), pairEnd_filesSeparation_list, False) if len(fastq_files) == 1: print 'Only one fastq file was found: ' + str(fastq_files) print 'Pair-End sequencing is required. Moving to the next sample' continue elif len(fastq_files) == 0: print 'No compressed fastq files were found. Continue to the next sample' continue print 'The following files will be used:' print str(fastq_files) + '\n' # Run INNUca.py analysis run_successfully, pass_qc, run_report = run_INNUca( sample, sample_outdir, fastq_files, args, script_path, scheme, spadesMaxMemory, jar_path_trimmomatic, jar_path_pilon, jarMaxMemory, trueCoverage_config, rematch_script, species_genus, mlst_scheme_genus) # Save sample fail report utils.write_fail_report(os.path.join(sample_outdir, 'fail_report.txt'), run_report) # Save warning report write_warning_report(os.path.join(sample_outdir, 'warning_report.txt'), run_report) # Get raw reads files size fileSize = sum(os.path.getsize(fastq) for fastq in fastq_files) # Remove sample directory if it was created during the process if removeCreatedSamplesDirectories and not indir_same_outdir: utils.removeDirectory(os.path.join(inputDirectory, sample, '')) print 'END ' + sample + ' analysis' time_taken = utils.runTime(sample_start_time) # Save run report warning, json_pass_qc = utils.write_sample_report( samples_report_path, sample, run_successfully, pass_qc, time_taken, fileSize, run_report) # Save runs statistics if run_successfully: number_samples_successfully += 1 if pass_qc: if warning: number_samples_warning += 1 else: number_samples_pass += 1 sample_report_json[sample] = { 'run_successfully': run_successfully, 'pass_qc': json_pass_qc, 'modules_run_report': run_report } # Save combine_samples_reports combine_reports.combine_reports(outdir, outdir, args.json, time_str, len(samples)) # Save sample_report in json if args.json: import json with open(os.path.join(outdir, 'samples_report.' + time_str + '.json'), 'wt') as writer: json.dump(sample_report_json, writer) # Remove temporary folder with symlink to fastq files in case of --fastq use if args.inputDirectory is None and args.fastq is not None: utils.removeDirectory(os.path.join(inputDirectory, '')) # Run report print '\n' + 'END INNUca.py' print '\n' + 'Pipeline problems: {not_run_successfully} samples'.format( not_run_successfully=(len(samples) - number_samples_successfully)) print '\n' + 'FAIL: {number_samples_fail} samples'.format( number_samples_fail=(len(samples) - number_samples_pass - number_samples_warning)) print '\n' + 'WARNING: {number_samples_warning} samples'.format( number_samples_warning=number_samples_warning) print '\n' + 'PASS: {number_samples_pass} samples'.format( number_samples_pass=number_samples_pass) time_taken = utils.runTime(general_start_time) del time_taken # Check whether INNUca.py run at least one sample successfully if number_samples_successfully == 0: sys.exit('No samples run successfully!')
def main(): version = '2.0' args = utils.parseArguments(version) general_start_time = time.time() time_str = time.strftime("%Y%m%d-%H%M%S") # Check if output directory exists outdir = os.path.abspath(os.path.join(args.outdir, '')) if not os.path.isdir(outdir): os.makedirs(outdir) # Start logger sys.stdout = utils.Logger(outdir, time_str) print '\n' + '==========> INNUca.py <==========' print '\n' + 'Program start: ' + time.ctime() # Tells where the logfile will be stored print '\n' + 'LOGFILE:' print sys.stdout.getLogFile() # Print command print '\n' + 'COMMAND:' script_path = os.path.abspath(sys.argv[0]) print sys.executable + ' ' + script_path + ' ' + ' '.join(sys.argv[1:]) # Print directory where programme was lunch print '\n' + 'PRESENT DIRECTORY :' print os.getcwd() # Print program version print '\n' + 'VERSION INNUca.py:' utils.scriptVersionGit(version, os.getcwd(), script_path) # Get CPU information utils.get_cpu_information(outdir, time_str) # Set and print PATH variable utils.setPATHvariable(args.doNotUseProvidedSoftware, script_path) # Check programms programs_version_dictionary = {} programs_version_dictionary['gunzip'] = ['--version', '>=', '1.6'] if (not args.skipTrueCoverage or (not args.skipPilon and not args.skipSPAdes)): programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9'] programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1'] if not (args.skipFastQC and args.skipTrimmomatic and (args.skipPilon or args.skipSPAdes)): programs_version_dictionary['java'] = ['-version', '>=', '1.8'] if not args.skipFastQC: programs_version_dictionary['fastqc'] = ['--version', '==', '0.11.5'] if not args.skipTrimmomatic: programs_version_dictionary['trimmomatic-0.36.jar'] = [ '-version', '==', '0.36' ] if not args.skipSPAdes: programs_version_dictionary['spades.py'] = ['--version', '>=', '3.9.0'] if not args.skipPilon and not args.skipSPAdes: programs_version_dictionary['pilon-1.18.jar'] = [ '--version', '==', '1.18' ] if not args.skipMLST and not args.skipSPAdes: programs_version_dictionary['mlst'] = ['--version', '>=', '2.4'] missingPrograms, programs_version_dictionary = utils.checkPrograms( programs_version_dictionary) if len(missingPrograms) > 0: sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) # .jar paths jar_path_trimmomatic = None if not args.skipTrimmomatic: jar_path_trimmomatic = programs_version_dictionary[ 'trimmomatic-0.36.jar'][3] jar_path_pilon = None if not args.skipPilon and not args.skipSPAdes: jar_path_pilon = programs_version_dictionary['pilon-1.18.jar'][3] # Check if input directory exists with fastq files and store samples name that have fastq files inputDirectory = os.path.abspath(os.path.join(args.inputDirectory, '')) # pairEnd_filesSeparation_list = args.pairEnd_filesSeparation pairEnd_filesSeparation_list = None print '' samples, removeCreatedSamplesDirectories, indir_same_outdir = utils.checkSetInputDirectory( inputDirectory, outdir, pairEnd_filesSeparation_list) # Start running the analysis print '\n' + 'RUNNING INNUca.py' # Prepare run report file samples_report_path = os.path.join(outdir, 'samples_report.' + time_str + '.tab') utils.start_sample_report_file(samples_report_path) number_samples_successfully = 0 number_samples_pass = 0 # Get MLST scheme to use scheme = 'unknown' if not args.skipMLST and not args.skipSPAdes: scheme = mlst.getScheme(args.speciesExpected) # Get path to blastn mlst.getBlastPath() # Get trueCoverage_ReMatCh settings trueCoverage_config = None if not args.skipTrueCoverage: trueCoverage_reference = None trueCoverage_config_file = None trueCoverage_config = None if args.trueConfigFile is None: print 'No trueCoverage_ReMatCh config file was provided. Search for default files' trueCoverage_config_file, trueCoverage_reference = trueCoverage.check_existing_default_config( args.speciesExpected, script_path) else: trueCoverage_config_file = args.trueConfigFile.name if trueCoverage_config_file is not None: trueCoverage_config = trueCoverage.parse_config( trueCoverage_config_file) if args.trueConfigFile is None and trueCoverage_config is not None: trueCoverage_config['reference_file'] = trueCoverage_reference if trueCoverage_config is not None: print 'The following trueCoverage_ReMatCh config file will be used: ' + trueCoverage_config_file print 'The following trueCoverage_ReMatCh reference file will be used: ' + trueCoverage_config[ 'reference_file'] + '\n' else: print 'No trueCoverage_ReMatCh config file was found' # Memory available_memory_GB = utils.get_free_memory() / (1024.0**2) # Determine SPAdes maximum memory spadesMaxMemory = None if not args.skipSPAdes: print '' spadesMaxMemory = spades.define_memory(args.spadesMaxMemory, args.threads, available_memory_GB) # Determine .jar maximum memory jarMaxMemory = 'off' if not (args.skipTrimmomatic and (args.skipSPAdes or args.skipPilon)): print '' jarMaxMemory = utils.define_jar_max_memory(args.jarMaxMemory, args.threads, available_memory_GB) # Run INNUca for each sample for sample in samples: sample_start_time = time.time() print '\n' + 'Sample: ' + sample + '\n' # Create sample outdir sample_outdir = os.path.abspath(os.path.join(outdir, sample, '')) if not os.path.isdir(sample_outdir): os.makedirs(sample_outdir) # Get fastq files fastq_files = utils.searchFastqFiles( os.path.join(inputDirectory, sample, ''), pairEnd_filesSeparation_list, False) if len(fastq_files) == 1: print 'Only one fastq file was found: ' + str(fastq_files) print 'Pair-End sequencing is required. Moving to the next sample' continue print 'The following files will be used:' print str(fastq_files) + '\n' # Run INNUca.py analysis run_successfully, pass_qc, run_report = run_INNUca( sample, sample_outdir, fastq_files, args, script_path, scheme, spadesMaxMemory, jar_path_trimmomatic, jar_path_pilon, jarMaxMemory, trueCoverage_config) # Save sample fail report fail_report_path = os.path.join(sample_outdir, 'fail_report.txt') utils.write_fail_report(fail_report_path, run_report) # Save runs statistics if run_successfully: number_samples_successfully += 1 if pass_qc: number_samples_pass += 1 # Get raw reads files size fileSize = sum(os.path.getsize(fastq) for fastq in fastq_files) # Remove sample directory if it was created during the process if removeCreatedSamplesDirectories and not indir_same_outdir: utils.removeDirectory(os.path.join(inputDirectory, sample, '')) print 'END ' + sample + ' analysis' time_taken = utils.runTime(sample_start_time) # Save run report utils.write_sample_report(samples_report_path, sample, run_successfully, pass_qc, time_taken, fileSize, run_report) # Run report print '\n' + 'END INNUca.py' print '\n' + str(number_samples_successfully) + ' samples out of ' + str( len(samples)) + ' run successfully' print '\n' + str(number_samples_pass) + ' samples out of ' + str( number_samples_successfully ) + ' (run successfully) PASS INNUca.py analysis' time_taken = utils.runTime(general_start_time) del time_taken # Check whether INNUca.py run at least one sample successfully if number_samples_successfully == 0: sys.exit('No samples run successfully!')