Пример #1
0
def main(args, outs):
    hostname = socket.gethostname()

    print "Checking run folder..."
    tk_preflight.check_rta_complete(args.run_path)

    print "Checking RunInfo.xml..."
    runinfo = tk_preflight.check_runinfo_xml(args.run_path)

    if not args.allow_no_barcodes:
        ok, msg = check_reads(runinfo)
        if not ok:
            martian.exit(msg)

    print "Checking system environment..."
    ok, msg = tk_preflight.check_ld_library_path()
    if not ok:
        martian.exit(msg)

    # Presence of SampleSheet.csv interferes with demux.
    # Ask customer to move it. Under older RTA, bcl2fastq looks for it
    # in Data/Intensities/BaseCalls while under newer RTA, it looks for it
    # at the top of the run folder.
    bc_dir = os.path.join(args.run_path, "Data", "Intensities", "BaseCalls")
    for ss_dir in [args.run_path, bc_dir]:
        ilmn_sample_sheet = os.path.join(ss_dir, "SampleSheet.csv")

        external = True
        try:
            import kitten
            external = False
        except ImportError:
            pass

        if external and os.path.exists(ilmn_sample_sheet):
            martian.exit(
                "On machine: %s, SampleSheet.csv found in run folder that would interfere with demux:\n%s\nPlease move, rename, or delete the file and run demux again."
                % (hostname, ilmn_sample_sheet))

    if args.check_executables:
        print "Checking bcl2fastq..."
        # Determine the RTA version of the run and whether this instrument
        # requires i2 to RC'd
        (rta_version, rc_i2_read,
         bcl_params) = tenkit.bcl.get_rta_version(args.run_path)
        martian.log_info("RTA Version: %s" % rta_version)
        martian.log_info("BCL Params: %s" % str(bcl_params))

        # Determine the best available bcl2fastq version to use
        # Will call martian.exit() with an error message if there isn't
        # a compatible version available
        (major_ver,
         full_ver) = tenkit.bcl.check_bcl2fastq(hostname, rta_version)
        martian.log_info("Running bcl2fastq mode: %s.  Version: %s" %
                         (major_ver, full_ver))

    ok, msg = tk_preflight.check_open_fh()
    if not ok:
        martian.exit(msg)
Пример #2
0
def main(args, outs):
    hostname = socket.gethostname()

    if args.output_format == 'bam' and args.read_group is None:
        martian.exit(
            "Please specify a read_group to populate the @RG field of the BAM file"
        )

    if args.sample_id is not None:
        if not re.match("^[\w-]+$", args.sample_id):
            martian.exit(
                "Sample name may only contain letters, numbers, underscores, and dashes: "
                + args.sample_id)

    for sample_def in args.sample_def:
        read_path = sample_def["read_path"]
        if not read_path.startswith('/'):
            martian.exit(
                "Specified FASTQ folder must be an absolute path: %s" %
                read_path)
        if not os.path.exists(read_path):
            martian.exit(
                "On machine: %s, specified FASTQ folder does not exist: %s" %
                (hostname, read_path))
        if not os.access(read_path, os.X_OK):
            martian.exit(
                "On machine: %s, longranger does not have permission to open FASTQ folder: %s"
                % (hostname, read_path))
        if not os.listdir(read_path):
            martian.exit("Specified FASTQ folder is empty: " + read_path)

        library_id = sample_def.get("library_id")
        if library_id is not None:
            if not re.match("^[\w-]+$", library_id):
                martian.exit(
                    "Library name may only contain letters, numbers, underscores, and dashes: "
                    + library_id)

        lanes = sample_def["lanes"]
        if lanes is not None:
            for lane in lanes:
                if not tk_preflight.is_int(lane):
                    martian.exit(
                        "Lanes must be a comma-separated list of numbers.")

        ok, msg = tk_preflight.check_sample_indices(sample_def)
        if not ok:
            martian.exit(msg)

    # Check open file handles limit
    ok, msg = tk_preflight.check_open_fh()
    if not ok:
        martian.exit(msg)

    martian.log_info(tk_preflight.record_package_versions())
Пример #3
0
def main(args, outs):
    hostname = socket.gethostname()

    print "Checking run folder..."
    tk_preflight.check_rta_complete(args.run_path)

    print "Checking RunInfo.xml..."
    runinfo = tk_preflight.check_runinfo_xml(args.run_path)

    print "Checking system environment..."
    ok, msg = tk_preflight.check_ld_library_path()
    if not ok:
        martian.exit(msg)

    print "Checking barcode whitelist..."
    tk_preflight.check_barcode_whitelist(args.barcode_whitelist)

    if args.check_executables:
        print "Checking bcl2fastq..."
        (rta_version, rc_i2_read,
         bcl_params) = tk_bcl.get_rta_version(args.run_path)
        martian.log_info("RTA Version: %s" % rta_version)
        martian.log_info("BCL Params: %s" % str(bcl_params))

        (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version)
        martian.log_info("Running bcl2fastq mode: %s.  Version: %s" %
                         (major_ver, full_ver))

    if '--no-lane-splitting' in args.bcl2fastq2_args:
        martian.exit("The --no-lane-splitting option is not supported.")

    print "Emitting run information..."
    martian.log_info("-------mkfastq diagnostic start-------")
    emit_info(args)

    print "Checking read specification..."
    check_read_params(args, runinfo)
    martian.log_info("-------mkfastq diagnostic end-------")

    print "Checking samplesheet specs..."
    check_specs(args)

    print "Checking for dual index flowcell..."
    check_dual_index(args, runinfo)

    ok, msg = tk_preflight.check_open_fh()
    if not ok:
        martian.exit(msg)
Пример #4
0
def main(args, outs):
    hostname = socket.gethostname()

    print "Checking run folder..."
    tk_preflight.check_rta_complete(args.run_path)

    print "Checking RunInfo.xml..."
    tk_preflight.check_runinfo_xml(args.run_path)

    print "Checking system environment..."
    ok, msg = tk_preflight.check_ld_library_path()
    if not ok:
        martian.exit(msg)

    print "Checking barcode whitelist..."
    tk_preflight.check_barcode_whitelist(args.barcode_whitelist)

    if args.check_executables:
        print "Checking bcl2fastq..."
        (rta_version, rc_i2_read, bcl_params) = tk_bcl.get_rta_version(args.run_path)
        martian.log_info("RTA Version: %s" % rta_version)
        martian.log_info("BCL Params: %s" % str(bcl_params))

        (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version)
        martian.log_info("Running bcl2fastq mode: %s.  Version: %s" % (major_ver, full_ver))

    ok, msg = tk_preflight.check_open_fh()
    if not ok:
        martian.exit(msg)

    if args.output_path is not None:
        tk_preflight.check_folder_or_create("--output-dir", args.output_path, hostname, permission=os.W_OK|os.X_OK)

    if args.interop_output_path is not None:
        tk_preflight.check_folder_or_create("--interop-dir", args.interop_output_path, hostname, permission=os.W_OK|os.X_OK)

    if args.max_bcl2fastq_threads < 1:
        msg = "Cannot run bcl2fastq with zero threads."
        martian.exit(msg)
Пример #5
0
def check_environment():
    check(tk_preflight.check_open_fh())
Пример #6
0
def main(args, outs):
    hostname = socket.gethostname()

    # Sample ID / pipestance name
    if args.sample_id is not None:
        if not re.match("^[\w-]+$", args.sample_id):
            martian.exit("Sample name may only contain letters, numbers, underscores, and dashes: " + args.sample_id)

    # FASTQ input
    for sample_def in args.sample_def:
        #if not tk_preflight.check_is_chromium(sample_def):
        #    martian.exit("This version of Longranger does not support GemCode data. Please use Longranger 1.2 instead.")
        read_path = sample_def["read_path"]
        if not read_path:
            martian.exit("Must specify a read_path containing FASTQs.")
        if not read_path.startswith('/'):
            martian.exit("Specified FASTQ folder must be an absolute path: %s" % read_path)
        if not os.path.exists(read_path):
            martian.exit("On machine: %s, specified FASTQ folder does not exist: %s" % (hostname, read_path))
        if not os.access(read_path, os.X_OK):
            martian.exit("On machine: %s, longranger does not have permission to open FASTQ folder: %s" % (hostname, read_path))
        if not os.listdir(read_path):
            martian.exit("Specified FASTQ folder is empty: " + read_path)

        library_id = sample_def.get("library_id")
        if library_id is not None:
            if not re.match("^[\w-]+$", library_id):
                martian.exit("Library name may only contain letters, numbers, underscores, and dashes: " + library_id)

        lanes = sample_def["lanes"]
        if lanes is not None:
            for lane in lanes:
                if not is_int(lane):
                    martian.exit("Lanes must be a comma-separated list of numbers.")

        ok, msg = tk_preflight.check_sample_indices(sample_def)
        if not ok:
            martian.exit(msg)

    # Reference
    MAX_CONTIGS = 1000
    ok, msg = tk_preflight.check_refdata(args.reference_path, MAX_CONTIGS)
    if ok:
        martian.log_info(msg)
    else:
        martian.exit(msg)

    # Sex (given reference)
    if args.sex is not None:
        if args.sex.lower() not in ["m", "male", "f", "female"]:
            martian.exit("Sex of sample must be 'm', 'male', 'f', or 'female'.")
    else:
        if tenkit.reference.load_male_chromosomes(args.reference_path) == None:
            martian.exit("Must specify sex of sample, or use a reference package that includes a sex_chromosomes.tsv file.\nFor more details, see http://support.10xgenomics.com/genome-exome/software/pipelines/latest/advanced/references")

        ref = tenkit.reference.open_reference(args.reference_path)
        male_chrom = tenkit.reference.load_male_chromosomes(args.reference_path)
        for m in male_chrom:
            if m not in ref:
                martian.exit("Reference issue in sex_chromosomes.tsv. Male-specific chromosome '%s' does not exist in reference" % m)

        auto_chrom = tenkit.reference.load_autosomal_chromosomes(args.reference_path)
        if auto_chrom is None:
            martian.exit("No autosomal chromosome listed in sex_chromosomes.tsv. Please list an autosomal chromosome to use as a reference for sex determination")

        for a in auto_chrom:
            if a not in ref:
                martian.exit("Reference issue in sex_chromosomes.tsv. Autosomal chromosome '%s' does not exist in reference" % a) 

    # Open file handles limit - per LONGRANGER-1758, only check this on the execution machine.
    # We can tell if we're on the execution machine by looking at args.check_executables
    if args.check_executables:
        ok, msg = tk_preflight.check_open_fh()
        if not ok:
            martian.exit(msg)

    # Targets
    if args.targets is not None:
        tk_preflight.check_file("targets", args.targets, hostname)
        tk_preflight.check_bed(args.targets, args.reference_path)

        if args.target_blacklist is None:
            print "\nWARNING: You selected targeted mode but did not provide a --cnvfilter.\nPlease note this may result in a high number of false positive CNV calls.\nFor more details, see http://support.10xgenomics.com/genome-exome/software\n"

    # Target blacklist
    if args.target_blacklist is not None:
        tk_preflight.check_file("cnvfilter", args.target_blacklist, hostname)
        tk_preflight.check_bed(args.target_blacklist, args.reference_path)

    # Restrict locus
    if tenkit.reference.is_tenx(args.reference_path):
        if args.restrict_locus is not None:
            if not re.match("^chr[A-Za-z0-9]{1,2}:[0-9]+\.\.[0-9]+$", args.restrict_locus):
                martian.exit("restrict_locus must be of the form 'chrXX:start..end'.")

    # Pre-called
    if args.vc_precalled is not None:
        tk_preflight.check_file("pre-called VCF", args.vc_precalled, hostname)
        check_vcf(args.vc_precalled, args)

    # VC mode
    if not re.match("^(disable|freebayes|gatk:/.*\.jar|precalled:/.*\.vcf)$", args.vc_mode):
        martian.exit("vc_mode must be of the form 'freebayes', 'gatk:/path/to/gatk_jar_file.jar', 'disable'.")

    if args.vc_precalled is None and args.vc_mode == "disable":
        martian.exit("Because you have not provided a pre-called VCF file, variant calling cannot be disabled.")

    vc_args = args.vc_mode.split(":")
    vc_mode = vc_args[0]
    if vc_mode == "precalled":
        if args.vc_precalled is not None:
            martian.exit("Please specify a pre-called VCF file using only one method.")
        precalled_vars_path = vc_args[1]
        tk_preflight.check_file("pre-called VCF", precalled_vars_path, hostname)
        check_vcf(precalled_vars_path, args)
    elif vc_mode == "gatk":
        jar_path = vc_args[1]
        if not jar_path.startswith('/'):
            martian.exit("Specified GATK jar file must be an absolute path: %s" % jar_path)
        if not os.path.exists(jar_path):
            martian.exit("On machine: %s, specified GATK jar file does not exist: %s" % (hostname, jar_path))
        if os.path.isdir(jar_path):
            martian.exit("Please specify a GATK jar file, not a folder.")
        if args.check_executables:
            check_gatk(jar_path, hostname)

        check_gatk_ref(args.reference_path)

    # VC ground truth
    if args.vc_ground_truth is not None:
        tk_preflight.check_file("VCF ground truth", args.vc_ground_truth, hostname)
        check_vcf(args.vc_ground_truth, args)

    # SV min QV
    if args.sv_min_qv is not None and args.sv_min_qv < 0:
        martian.exit("sv_min_qv must be a positive integer.")

    # SV ground truth
    if args.sv_ground_truth is not None:
        tk_preflight.check_file("SV ground truth", args.sv_ground_truth, hostname)

    martian.log_info(tk_preflight.record_package_versions())
Пример #7
0
def main(args, outs):
    hostname = socket.gethostname()
    tk_preflight.record_package_versions()

    ## no barcode whitelist
    if args.barcode_whitelist is None:
        martian.exit("No barcode whitelist specified.")

    ## there must be a barcode in each sample
    ## and it should be 16 bases long
    ## and it should be on read 1 or read 2
    for sd in args.sample_def:
        if sd.get("bc_length", 0) != 16 or sd.get("bc_in_read",
                                                  3) not in [1, 2]:
            martian.exit("Barcode must be 16 bases and on read1 or read2.")

    print "Checking FASTQ folder..."
    for sample_def in args.sample_def:
        read_path = sample_def["read_path"]
        if not read_path:
            martian.exit("Must specify a read_path containing FASTQs.")
        if not read_path.startswith('/'):
            martian.exit(
                "Specified FASTQ folder must be an absolute path: %s" %
                read_path)
        if not os.path.exists(read_path):
            martian.exit(
                "On machine: %s, specified FASTQ folder does not exist: %s" %
                (hostname, read_path))
        if not os.access(read_path, os.X_OK):
            martian.exit(
                "On machine: %s, supernova does not have permission to open FASTQ folder: %s"
                % (hostname, read_path))
        if not os.listdir(read_path):
            martian.exit("Specified FASTQ folder is empty: " + read_path)

        library_id = sample_def.get("library_id")
        if library_id is not None:
            if not re.match("^[\w-]+$", library_id):
                martian.exit(
                    "Library name may only contain letters, numbers, underscores, and dashes: "
                    + library_id)

        lanes = sample_def["lanes"]
        if lanes is not None:
            for lane in lanes:
                if not is_int(lane):
                    martian.exit(
                        "Lanes must be a comma-separated list of numbers.")

    # Open file handles limit - per SUPERNOVA-152, only check this on the execution machine.
    # We can tell if we're on the execution machine by looking at args.check_executables
    if args.check_executables:
        ok, msg = tk_preflight.check_open_fh()
        if not ok:
            martian.exit(msg)

    ## compile a list of fastq files
    fastq_files = []
    if args.input_mode == "BCL_PROCESSOR":
        # Validate the sample_def fields are correct
        for (idx, sample_item) in enumerate(args.sample_def):
            # validate
            check_key(idx, sample_item, "sample_indices", [list, type(None)])
            check_key(idx, sample_item, "read_path", [str, unicode])
            check_key(idx, sample_item, "lanes", [list, type(None)])

        main_read_type = "RA"
        find_func = tk_fasta.find_input_fastq_files_10x_preprocess

        for read_chunk in args.sample_def:
            sample_index_strings, msg = tk_preflight.check_sample_indices(
                read_chunk)
            if sample_index_strings is None:
                martian.exit(msg)

            path = read_chunk['read_path']
            lanes = read_chunk['lanes']

            for sample_index in sample_index_strings:
                reads = find_func(path, main_read_type, sample_index, lanes)
                fastq_files.extend(reads)
    elif args.input_mode == "ILMN_BCL2FASTQ":
        # Validate the sample_def fields are correct
        for (idx, sample_item) in enumerate(args.sample_def):
            # validate
            check_key(idx, sample_item, "read_path", [str, unicode])
            check_key(idx, sample_item, "lanes", [list, type(None)])
            check_key(idx, sample_item, "sample_names", [list, type(None)])

        find_func = tk_fasta.find_input_fastq_files_bcl2fastq_demult

        for read_chunk in args.sample_def:
            sample_names = read_chunk['sample_names']
            path = read_chunk['read_path']
            lanes = read_chunk['lanes']

            for sample_name in sample_names:
                reads = find_func(path, "R1", sample_name, lanes)
                fastq_files.extend(reads)
                reads = find_func(path, "R3", sample_name, lanes)
                fastq_files.extend(reads)
    else:
        martian.throw("Unrecognized input_mode: %s" % args.input_mode)

    ## if we found nothing then break
    if len(fastq_files) == 0:
        martian.exit(
            "No input FASTQs were found with the requested lanes and sample indices."
        )

    ## make sure they are okay first
    check_fastqs(fastq_files)

    total_reads = 0.0
    global_avg = 0.0
    num_files = 0
    for fn in fastq_files:
        reads_fn, avg_read_len_fn = estimate_read_count_and_length(
            fn, num_reads=1000)
        total_reads += reads_fn
        global_avg += avg_read_len_fn
        num_files += 1
    global_avg = global_avg / num_files
    martian.log_info(
        "Estimated read length = %.1f, Estimated total read input = %.1f" %
        (global_avg, total_reads))

    PreflightAlert = alerts.AlertLogger(stage="preflight")
    PreflightAlert.issue("mean_read_length", global_avg)

    # verify type and range for downsampling parameters
    # Note that non-numerical values for bc_subsample_rate and target_reads in mro trickle down as 'None'
    if args.downsample is not None:
        bc_subsample_rate = args.downsample.get("bc_subsample_rate", None)
        if bc_subsample_rate is not None:
            if not isinstance(bc_subsample_rate, float) and not isinstance(
                    bc_subsample_rate, int):
                martian.exit(
                    "Specified barcode fraction: %s is not a fraction. Please specify a valid float between 0 and 1."
                    % str(bc_subsample_rate))
            if bc_subsample_rate <= 0 or bc_subsample_rate > 1:
                martian.exit(
                    "Specified barcode fraction: %s is not between 0 and 1. Please specify a valid float between 0 and 1."
                    % str(bc_subsample_rate))
            if abs(bc_subsample_rate) < 1e-5:
                martian.exit(
                    "Specified barcode fraction: %s is too close to 0 and thus impractical."
                    % str(bc_subsample_rate))

        target_reads = args.downsample.get("target_reads", None)
        if target_reads is not None:
            if not isinstance(target_reads, int) and not isinstance(
                    target_reads, float):
                martian.exit(
                    "Specified maxreads: %s is not a number. Please specify an integer larger than one for maxreads"
                    % str(target_reads))
            if target_reads < 1:
                martian.exit(
                    "Specified maxreads: %s is less than one. Please specify an integer larger than one for maxreads"
                    % str(target_reads))
Пример #8
0
def main(args, outs):
    hostname = socket.gethostname()

    # Sample ID / pipestance name
    if args.sample_id is not None:
        if not re.match("^[\w-]+$", args.sample_id):
            martian.exit("Sample name may only contain letters, numbers, underscores, and dashes: " + args.sample_id)

    # Check numerical options
    # types are already checked by mrp so only need to check ranges
    if args.force_cells is not None and (args.force_cells < 1 or
        args.force_cells > 20000):
        martian.exit("MRO parameter force_cells must be a positive integer"\
            " <= 20000.")

    # check min_ploidy, max_ploidy
    if args.cnv_params is not None:
        min_ploidy = args.cnv_params.get("min_ploidy", None)
        max_ploidy = args.cnv_params.get("max_ploidy", None)
        if min_ploidy is not None and min_ploidy <= 0:
            martian.exit("Command line argument soft-min-avg-ploidy must be a "\
                "positive real number.")
        if max_ploidy is not None and (max_ploidy <= 0 or max_ploidy > 8.0):
            martian.exit("Command line argument soft-max-avg-ploidy must be a "\
                "positive real number <= 8.")
        if (min_ploidy is not None and max_ploidy is not None and 
            max_ploidy <= min_ploidy):
            martian.exit("Command line arguments must satisfy "\
                "soft-min-avg-ploidy < soft-max-avg-ploidy.")

    # check downsample options
    if args.downsample is not None and len(args.downsample.keys()) > 0:
        keys = args.downsample.keys()
        if len(keys) > 1:
            martian.exit("Please supply either maxreads or downsample but not "\
                "both.")
        key = keys[0]
        value = args.downsample[key]
        param_map = {"target_reads" : "maxreads", "gigabases" : "downsample"}
        bad_value = False
        try:
            float(value)
            bad_value = value < 1e-12
        except ValueError:
            bad_value = True
        if bad_value:
            cs_key = param_map[key]
            martian.exit("Command line argument %s must be a positive number" %
                cs_key)

    # FASTQ input
    for idx, sample_def in enumerate(args.sample_def):
        read_path = sample_def["read_path"]
        if not read_path:
            martian.exit("Must specify a read_path containing FASTQs.")
        if not read_path.startswith('/'):
            martian.exit("Specified FASTQ folder must be an absolute path: %s" % read_path)
        if not os.path.exists(read_path):
            martian.exit("On machine: %s, specified FASTQ folder does not exist: %s" % (hostname, read_path))
        if not os.access(read_path, os.X_OK):
            martian.exit("On machine: %s, longranger does not have permission to open FASTQ folder: %s" % (hostname, read_path))
        if not os.listdir(read_path):
            martian.exit("Specified FASTQ folder is empty: " + read_path)

        library_id = sample_def.get("library_id")
        if library_id is not None:
            if not re.match("^[\w-]+$", library_id):
                martian.exit("Library name may only contain letters, numbers, underscores, and dashes: " + library_id)

        lanes = sample_def["lanes"]
        if lanes is not None:
            for lane in lanes:
                if not tk_preflight.is_int(lane):
                    martian.exit("Lanes must be a comma-separated list of numbers.")

        if args.fastq_mode == "BCL_PROCESSOR":
            sample_indices, msg = tk_preflight.check_sample_indices(sample_def)
            if sample_indices is None:
                martian.exit(msg)

            find_func = tk_fasta.find_input_fastq_files_10x_preprocess
            reads = []
            for sample_index in sample_indices:
                # process interleaved reads
                reads.extend(find_func(read_path, "RA", sample_index, lanes))
            if len(reads) == 0:
                martian.exit("No input FASTQs were found for the requested parameters.")
        elif args.fastq_mode == "ILMN_BCL2FASTQ":
            sample_names = sample_def.get("sample_names", None)
            if sample_names is None:
                martian.exit("Entry {} in sample_def missing required field: sample_names".format(idx))
            find_func = tk_fasta.find_input_fastq_files_bcl2fastq_demult
            reads1 = []
            reads2 = []
            for sample_name in sample_names:
                r1 = find_func(read_path, "R1", sample_name, lanes)
                r2 = find_func(read_path, "R2", sample_name, lanes)
                if len(r1) != len(r2):
                    martian.exit("Entry {} in sample_defs are missing input FASTQs.".format(idx))
                reads1.extend(r1)
                reads2.extend(r2)
            if len(reads1) == 0 and len(reads2) == 0:
                martian.exit("No input FASTQs were found for the requested parameters.")
        else:
            martian.exit("Unrecognized fastq_mode: {}".format(args.fastq_mode))

    # Reference
    ok, msg = tk_preflight.check_refdata(args.reference_path, max_contigs=None)
    if ok:
        martian.log_info(msg)
    else:
        martian.exit(msg)
    contig_defs_json_path = os.path.join(args.reference_path, "fasta", 
        "contig-defs.json")
    faidx_path = os.path.join(args.reference_path, "fasta", 
        "genome.fa.fai")
    error_msg = contig_manager.verify_contig_defs(contig_defs_json_path,
        faidx_path)
    if error_msg is not None:
        martian.exit(error_msg)

    try:
        ref = contig_manager.contig_manager(args.reference_path)
    except Exception as e:
        martian.exit("Unexpected error occurred.\n%s"%str(e))

    # too many contigs
    primary = ref.primary_contigs(allow_sex_chromosomes=True)
    num_primary_contigs = len(primary)
    if num_primary_contigs > 100:
        martian.exit("There can be at most 100 primary contigs.")

    # contig length checks
    chrom_length_dict = ref.get_contig_lengths()

    contig_length_exit = 500 * 1000
    contig_length_warn = 10 ** 7
    offending_contigs_warn = []
    offending_contigs_exit = []
    for c in primary:
        clen = chrom_length_dict[c]
        if clen < contig_length_exit:
            offending_contigs_exit.append(c)
        elif clen < contig_length_warn:
            offending_contigs_warn.append(c)
    if len(offending_contigs_exit) > 0:
        martian.exit("Primary contig(s) \"%s\" are shorter than %d bases. "\
            "Every primary contig must be at least %d bases "\
            "in length."%(",".join(offending_contigs_exit), contig_length_exit,
                          contig_length_exit))
    elif (not args.check_executables) and len(offending_contigs_warn) > 0:
        martian.alarm("Primary contig(s) \"%s\" are shorter than %d bases. "\
            "Every primary contig is recommended to be at least %d bases "\
            "in length."%(",".join(offending_contigs_warn), contig_length_warn,
                          contig_length_warn))

    # Open file handles limit 
    if args.check_executables:
        ok, msg = tk_preflight.check_open_fh()
        if not ok:
            martian.exit(msg)

    martian.log_info(tk_preflight.record_package_versions())
def check_filehandle_limit():
    """checks file handles"""
    ok, msg = tk_preflight.check_open_fh()
    if not ok:
        martian.exit(msg)
Пример #10
0
def main(args, outs):
    hostname = socket.gethostname()

    print "Checking sample info..."
    ok, msg = tk_preflight.check_gem_groups(args.sample_def)
    if not ok:
        martian.exit(msg)

    print "Checking FASTQ folder..."
    for sample_def in args.sample_def:
        read_path = sample_def["read_path"]
        if not read_path.startswith('/'):
            martian.exit(
                "Specified FASTQ folder must be an absolute path: %s" %
                read_path)
        if not os.path.exists(read_path):
            martian.exit(
                "On machine: %s, specified FASTQ folder does not exist: %s" %
                (hostname, read_path))
        if not os.access(read_path, os.X_OK):
            martian.exit(
                "On machine: %s, cellranger does not have permission to open FASTQ folder: %s"
                % (hostname, read_path))
        if not os.listdir(read_path):
            martian.exit("Specified FASTQ folder is empty: " + read_path)

        lanes = sample_def["lanes"]
        if lanes is not None:
            for lane in lanes:
                if not is_int(lane):
                    martian.exit(
                        "Lanes must be a comma-separated list of numbers.")

        ok, msg = tk_preflight.check_sample_indices(sample_def)
        if not ok:
            martian.exit(msg)

    if args.reference_path is None and args.vdj_reference_path is None:
        martian.exit(
            "Must specify either reference_path or vdj_reference_path.")

    print "Checking transcriptome..."
    if args.reference_path is not None:
        ok, msg = cr_preflight.check_refdata(args.reference_path)
        if not ok:
            martian.exit(msg)

    if args.vdj_reference_path is not None:
        ok, msg = vdj_preflight.check_refdata(args.vdj_reference_path)
        if not ok:
            martian.exit(msg)

    print "Checking chemistry..."
    ok, msg = cr_chem.check_chemistry_defs()
    if not ok:
        martian.exit(msg)

    ok, msg = cr_chem.check_chemistry_arg(args.chemistry_name)
    if not ok:
        martian.exit(msg)

    if args.chemistry_name == cr_chem.CUSTOM_CHEMISTRY_NAME:
        ok, msg = cr_chem.check_chemistry_def(args.custom_chemistry_def)
        if not ok:
            martian.exit(msg)

    # Open file handles limit - per CELLRANGER-824, only check this on the execution machine.
    # We can tell if we're on the execution machine by looking at args.check_executables
    if args.check_executables:
        print "Checking system environment..."
        ok, msg = tk_preflight.check_open_fh()
        if not ok:
            martian.exit(msg)

    print "Checking optional arguments..."
    if args.recovered_cells is not None and args.force_cells is not None:
        martian.exit(
            "Cannot specify both --force-cells and --expect-cells (or --cells) in the same run."
        )

    cr_preflight.record_package_versions()
Пример #11
0
def main(args, outs):
    hostname = socket.gethostname()
    tk_preflight.record_package_versions()

    ## no barcode whitelist
    if args.barcode_whitelist is None:
        martian.exit("No barcode whitelist specified.")

    ## there must be a barcode in each sample
    ## and it should be 16 bases long
    ## and it should be on read 1 or read 2
    for sd in args.sample_def:
        if sd.get("bc_length", 0) != 16 or sd.get("bc_in_read",
                                                  3) not in [1, 2]:
            martian.exit("Barcode must be 16 bases and on read1 or read2.")

    print "Checking FASTQ folder..."
    for sample_def in args.sample_def:
        read_path = sample_def["read_path"]
        if not read_path:
            martian.exit("Must specify a read_path containing FASTQs.")
        if not read_path.startswith('/'):
            martian.exit(
                "Specified FASTQ folder must be an absolute path: %s" %
                read_path)
        if not os.path.exists(read_path):
            martian.exit(
                "On machine: %s, specified FASTQ folder does not exist: %s" %
                (hostname, read_path))
        if not os.access(read_path, os.X_OK):
            martian.exit(
                "On machine: %s, supernova does not have permission to open FASTQ folder: %s"
                % (hostname, read_path))
        if not os.listdir(read_path):
            martian.exit("Specified FASTQ folder is empty: " + read_path)

        library_id = sample_def.get("library_id")
        if library_id is not None:
            if not re.match("^[\w-]+$", library_id):
                martian.exit(
                    "Library name may only contain letters, numbers, underscores, and dashes: "
                    + library_id)

        lanes = sample_def["lanes"]
        if lanes is not None:
            for lane in lanes:
                if not is_int(lane):
                    martian.exit(
                        "Lanes must be a comma-separated list of numbers.")

    # Open file handles limit
    ok, msg = tk_preflight.check_open_fh()
    if not ok:
        martian.exit(msg)

    ## compile a list of fastq files
    fastq_files = []
    if args.input_mode == "BCL_PROCESSOR":
        # Validate the sample_def fields are correct
        for (idx, sample_item) in enumerate(args.sample_def):
            # validate
            check_key(idx, sample_item, "sample_indices", [list, type(None)])
            check_key(idx, sample_item, "read_path", [str, unicode])
            check_key(idx, sample_item, "lanes", [list, type(None)])

        main_read_type = "RA"
        find_func = tk_fasta.find_input_fastq_files_10x_preprocess

        for read_chunk in args.sample_def:
            sample_index_strings, msg = tk_preflight.check_sample_indices(
                read_chunk)
            if sample_index_strings is None:
                martian.exit(msg)

            path = read_chunk['read_path']
            lanes = read_chunk['lanes']

            for sample_index in sample_index_strings:
                reads = find_func(path, main_read_type, sample_index, lanes)
                fastq_files.extend(reads)
    elif args.input_mode == "ILMN_BCL2FASTQ":
        # Validate the sample_def fields are correct
        for (idx, sample_item) in enumerate(args.sample_def):
            # validate
            check_key(idx, sample_item, "read_path", [str, unicode])
            check_key(idx, sample_item, "lanes", [list, type(None)])
            check_key(idx, sample_item, "sample_names", [list, type(None)])

        find_func = tk_fasta.find_input_fastq_files_bcl2fastq_demult

        for read_chunk in args.sample_def:
            sample_names = read_chunk['sample_names']
            path = read_chunk['read_path']
            lanes = read_chunk['lanes']

            for sample_name in sample_names:
                reads = find_func(path, "R1", sample_name, lanes)
                fastq_files.extend(reads)
                reads = find_func(path, "R3", sample_name, lanes)
                fastq_files.extend(reads)
    else:
        martian.throw("Unrecognized input_mode: %s" % args.input_mode)

    ## if we found nothing then break
    if len(fastq_files) == 0:
        martian.exit(
            "No input FASTQs were found with the requested lanes and sample indices."
        )

    ## make sure they are okay first
    check_fastqs(fastq_files)

    total_reads = 0.0
    global_avg = 0.0
    num_files = 0
    for fn in fastq_files:
        reads_fn, avg_read_len_fn = estimate_read_count_and_length(
            fn, num_reads=1000)
        total_reads += reads_fn
        global_avg += avg_read_len_fn
        num_files += 1
    global_avg = global_avg / num_files
    martian.log_info(
        "Estimated read length = %.1f, Estimated total read input = %.1f" %
        (global_avg, total_reads))

    exit_msg = "We observe many reads shorter than 125 bases. The ideal read length for Supernova is 150 bases. Reads shorter than the ideal length are likely to yield a lower quality assembly, and the algorithm has not been tested on short reads. Because reads are too short, execution will be terminated."
    warn_msg = "We observe many reads shorter than 150 bases.The ideal read length for Supernova is 150 bases. Reads shorter than the ideal length are likely to yield a lower quality assembly."
    if global_avg < 125:
        martian.exit(exit_msg)
    elif global_avg < 149:
        martian.alarm(warn_msg)