Exemplo n.º 1
0
def get_trueCoverage_config(skipTrueCoverage, trueConfigFile, speciesExpected,
                            script_path):
    trueCoverage_config = None
    if not skipTrueCoverage:
        trueCoverage_reference = None
        trueCoverage_config_file = None
        trueCoverage_config = None

        if trueConfigFile is None:
            print 'No trueCoverage_ReMatCh config file was provided. Search for default files'
            trueCoverage_config_file, trueCoverage_reference = trueCoverage.check_existing_default_config(
                speciesExpected, script_path)
        else:
            trueCoverage_config_file = trueConfigFile

        if trueCoverage_config_file is not None:
            trueCoverage_config = trueCoverage.parse_config(
                trueCoverage_config_file)
        if trueConfigFile is None and trueCoverage_config is not None:
            trueCoverage_config['reference_file'] = trueCoverage_reference

        if trueCoverage_config is not None:
            print 'The following trueCoverage_ReMatCh config file will be used: ' + trueCoverage_config_file
            print 'The following trueCoverage_ReMatCh reference file will be used: ' + trueCoverage_config[
                'reference_file'] + '\n'
        else:
            print 'No trueCoverage_ReMatCh config file was found'
    return trueCoverage_config
Exemplo n.º 2
0
def get_trueCoverage_config(skipTrueCoverage, trueConfigFile, speciesExpected, script_path):
    trueCoverage_config = None
    if not skipTrueCoverage:
        trueCoverage_reference = None
        trueCoverage_config_file = None
        trueCoverage_config = None

        if trueConfigFile is None:
            print 'No trueCoverage_ReMatCh config file was provided. Search for default files'
            trueCoverage_config_file, trueCoverage_reference = trueCoverage.check_existing_default_config(speciesExpected, script_path)
        else:
            trueCoverage_config_file = trueConfigFile

        if trueCoverage_config_file is not None:
            trueCoverage_config = trueCoverage.parse_config(trueCoverage_config_file)
        if trueConfigFile is None and trueCoverage_config is not None:
            trueCoverage_config['reference_file'] = trueCoverage_reference

        if trueCoverage_config is not None:
            print 'The following trueCoverage_ReMatCh config file will be used: ' + trueCoverage_config_file
            print 'The following trueCoverage_ReMatCh reference file will be used: ' + trueCoverage_config['reference_file'] + '\n'
        else:
            print 'No trueCoverage_ReMatCh config file was found'
    return trueCoverage_config
Exemplo n.º 3
0
def main():
    version = '2.0'
    args = utils.parseArguments(version)

    general_start_time = time.time()
    time_str = time.strftime("%Y%m%d-%H%M%S")

    # Check if output directory exists
    outdir = os.path.abspath(os.path.join(args.outdir, ''))
    if not os.path.isdir(outdir):
        os.makedirs(outdir)

    # Start logger
    sys.stdout = utils.Logger(outdir, time_str)

    print '\n' + '==========> INNUca.py <=========='
    print '\n' + 'Program start: ' + time.ctime()

    # Tells where the logfile will be stored
    print '\n' + 'LOGFILE:'
    print sys.stdout.getLogFile()

    # Print command
    print '\n' + 'COMMAND:'
    script_path = os.path.abspath(sys.argv[0])
    print sys.executable + ' ' + script_path + ' ' + ' '.join(sys.argv[1:])

    # Print directory where programme was lunch
    print '\n' + 'PRESENT DIRECTORY :'
    print os.getcwd()

    # Print program version
    print '\n' + 'VERSION INNUca.py:'
    utils.scriptVersionGit(version, os.getcwd(), script_path)

    # Get CPU information
    utils.get_cpu_information(outdir, time_str)

    # Set and print PATH variable
    utils.setPATHvariable(args.doNotUseProvidedSoftware, script_path)

    # Check programms
    programs_version_dictionary = {}
    programs_version_dictionary['gunzip'] = ['--version', '>=', '1.6']
    if (not args.skipTrueCoverage
            or (not args.skipPilon and not args.skipSPAdes)):
        programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9']
        programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1']
    if not (args.skipFastQC and args.skipTrimmomatic and
            (args.skipPilon or args.skipSPAdes)):
        programs_version_dictionary['java'] = ['-version', '>=', '1.8']
    if not args.skipFastQC:
        programs_version_dictionary['fastqc'] = ['--version', '==', '0.11.5']
    if not args.skipTrimmomatic:
        programs_version_dictionary['trimmomatic-0.36.jar'] = [
            '-version', '==', '0.36'
        ]
    if not args.skipSPAdes:
        programs_version_dictionary['spades.py'] = ['--version', '>=', '3.9.0']
    if not args.skipPilon and not args.skipSPAdes:
        programs_version_dictionary['pilon-1.18.jar'] = [
            '--version', '==', '1.18'
        ]
    if not args.skipMLST and not args.skipSPAdes:
        programs_version_dictionary['mlst'] = ['--version', '>=', '2.4']
    missingPrograms, programs_version_dictionary = utils.checkPrograms(
        programs_version_dictionary)
    if len(missingPrograms) > 0:
        sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms))

    # .jar paths
    jar_path_trimmomatic = None
    if not args.skipTrimmomatic:
        jar_path_trimmomatic = programs_version_dictionary[
            'trimmomatic-0.36.jar'][3]

    jar_path_pilon = None
    if not args.skipPilon and not args.skipSPAdes:
        jar_path_pilon = programs_version_dictionary['pilon-1.18.jar'][3]

    # Check if input directory exists with fastq files and store samples name that have fastq files
    inputDirectory = os.path.abspath(os.path.join(args.inputDirectory, ''))
    # pairEnd_filesSeparation_list = args.pairEnd_filesSeparation
    pairEnd_filesSeparation_list = None
    print ''
    samples, removeCreatedSamplesDirectories, indir_same_outdir = utils.checkSetInputDirectory(
        inputDirectory, outdir, pairEnd_filesSeparation_list)

    # Start running the analysis
    print '\n' + 'RUNNING INNUca.py'

    # Prepare run report file
    samples_report_path = os.path.join(outdir,
                                       'samples_report.' + time_str + '.tab')
    utils.start_sample_report_file(samples_report_path)

    number_samples_successfully = 0
    number_samples_pass = 0

    # Get MLST scheme to use
    scheme = 'unknown'
    if not args.skipMLST and not args.skipSPAdes:
        scheme = mlst.getScheme(args.speciesExpected)

    # Get path to blastn
    mlst.getBlastPath()

    # Get trueCoverage_ReMatCh settings
    trueCoverage_config = None
    if not args.skipTrueCoverage:
        trueCoverage_reference = None
        trueCoverage_config_file = None
        trueCoverage_config = None

        if args.trueConfigFile is None:
            print 'No trueCoverage_ReMatCh config file was provided. Search for default files'
            trueCoverage_config_file, trueCoverage_reference = trueCoverage.check_existing_default_config(
                args.speciesExpected, script_path)
        else:
            trueCoverage_config_file = args.trueConfigFile.name

        if trueCoverage_config_file is not None:
            trueCoverage_config = trueCoverage.parse_config(
                trueCoverage_config_file)
        if args.trueConfigFile is None and trueCoverage_config is not None:
            trueCoverage_config['reference_file'] = trueCoverage_reference

        if trueCoverage_config is not None:
            print 'The following trueCoverage_ReMatCh config file will be used: ' + trueCoverage_config_file
            print 'The following trueCoverage_ReMatCh reference file will be used: ' + trueCoverage_config[
                'reference_file'] + '\n'
        else:
            print 'No trueCoverage_ReMatCh config file was found'

    # Memory
    available_memory_GB = utils.get_free_memory() / (1024.0**2)
    # Determine SPAdes maximum memory
    spadesMaxMemory = None
    if not args.skipSPAdes:
        print ''
        spadesMaxMemory = spades.define_memory(args.spadesMaxMemory,
                                               args.threads,
                                               available_memory_GB)
    # Determine .jar maximum memory
    jarMaxMemory = 'off'
    if not (args.skipTrimmomatic and (args.skipSPAdes or args.skipPilon)):
        print ''
        jarMaxMemory = utils.define_jar_max_memory(args.jarMaxMemory,
                                                   args.threads,
                                                   available_memory_GB)

    # Run INNUca for each sample
    for sample in samples:
        sample_start_time = time.time()

        print '\n' + 'Sample: ' + sample + '\n'

        # Create sample outdir
        sample_outdir = os.path.abspath(os.path.join(outdir, sample, ''))
        if not os.path.isdir(sample_outdir):
            os.makedirs(sample_outdir)

        # Get fastq files
        fastq_files = utils.searchFastqFiles(
            os.path.join(inputDirectory, sample, ''),
            pairEnd_filesSeparation_list, False)
        if len(fastq_files) == 1:
            print 'Only one fastq file was found: ' + str(fastq_files)
            print 'Pair-End sequencing is required. Moving to the next sample'
            continue

        print 'The following files will be used:'
        print str(fastq_files) + '\n'

        # Run INNUca.py analysis
        run_successfully, pass_qc, run_report = run_INNUca(
            sample, sample_outdir, fastq_files, args, script_path, scheme,
            spadesMaxMemory, jar_path_trimmomatic, jar_path_pilon,
            jarMaxMemory, trueCoverage_config)

        # Save sample fail report
        fail_report_path = os.path.join(sample_outdir, 'fail_report.txt')
        utils.write_fail_report(fail_report_path, run_report)

        # Save runs statistics
        if run_successfully:
            number_samples_successfully += 1
        if pass_qc:
            number_samples_pass += 1

        # Get raw reads files size
        fileSize = sum(os.path.getsize(fastq) for fastq in fastq_files)

        # Remove sample directory if it was created during the process
        if removeCreatedSamplesDirectories and not indir_same_outdir:
            utils.removeDirectory(os.path.join(inputDirectory, sample, ''))

        print 'END ' + sample + ' analysis'
        time_taken = utils.runTime(sample_start_time)

        # Save run report
        utils.write_sample_report(samples_report_path, sample,
                                  run_successfully, pass_qc, time_taken,
                                  fileSize, run_report)

    # Run report
    print '\n' + 'END INNUca.py'
    print '\n' + str(number_samples_successfully) + ' samples out of ' + str(
        len(samples)) + ' run successfully'
    print '\n' + str(number_samples_pass) + ' samples out of ' + str(
        number_samples_successfully
    ) + ' (run successfully) PASS INNUca.py analysis'
    time_taken = utils.runTime(general_start_time)
    del time_taken

    # Check whether INNUca.py run at least one sample successfully
    if number_samples_successfully == 0:
        sys.exit('No samples run successfully!')