Пример #1
0
def check_command_line_args(options, args, parser):
    # check command line arguments
    if len(args) < 3:
        parser.error("Incorrect number of command line arguments")
    fastq_files = args[0:2]
    output_dir = args[2]
    # check that input fastq files exist
    read_lengths = []
    for mate, fastq_file in enumerate(fastq_files):
        if not os.path.isfile(args[0]):
            parser.error("mate '%d' fastq file '%s' is not valid" %
                         (mate, fastq_file))
        logging.debug("Checking read length for file %s" % (fastq_file))
        read_lengths.append(get_read_length(fastq_file))
        logging.debug("Read length for file %s: %d" %
                      (fastq_file, read_lengths[-1]))
    # check that mate read lengths are equal
    if len(set(read_lengths)) > 1:
        parser.error("read lengths mate1=%d and mate2=%d are unequal" %
                     (read_lengths[0], read_lengths[1]))
    # check that seed length < read length
    if any(options.segment_length > rlen for rlen in read_lengths):
        parser.error("seed length %d cannot be longer than read length" %
                     (options.segment_length))
    # check that output dir is not a regular file
    if os.path.exists(output_dir) and (not os.path.isdir(output_dir)):
        parser.error(
            "Output directory name '%s' exists and is not a valid directory" %
            (output_dir))
    if check_executable(options.bowtie_build_bin):
        logging.debug("Checking for 'bowtie-build' binary... found")
    else:
        parser.error("bowtie-build binary not found or not executable")
    # check that bowtie program exists
    if check_executable(options.bowtie_bin):
        logging.debug("Checking for 'bowtie' binary... found")
    else:
        parser.error("bowtie binary not found or not executable")
    # check that alignment index exists
    if os.path.isdir(options.index_dir):
        logging.debug("Checking for chimerascan index directory... found")
    else:
        parser.error("chimerascan alignment index directory '%s' not valid" %
                     (options.index_dir))
    # check that alignment index file exists
    align_index_file = os.path.join(options.index_dir,
                                    config.BOWTIE_INDEX_FILE)
    if os.path.isfile(align_index_file):
        logging.debug("Checking for bowtie index file... found")
    else:
        parser.error("chimerascan bowtie index file '%s' invalid" %
                     (align_index_file))
    # check for sufficient processors
    if options.num_processors < config.BASE_PROCESSORS:
        logging.warning(
            "Please specify >=2 processes using '-p' to allow program to run efficiently"
        )
Пример #2
0
 def check_config(self):
     # check that input fastq files exist
     config_passed = True
     read_lengths = []
     for mate,fastq_file in enumerate(self.fastq_files):
         if not os.path.isfile(fastq_file):
             logging.error("mate '%d' fastq file '%s' is not valid" % 
                           (mate, fastq_file))
             config_passed = False
         read_lengths.append(get_read_length(fastq_file))
         logging.debug("Checking file %s" % (fastq_file))
         logging.debug("File %s read length=%d" % (fastq_file, read_lengths[-1]))
     # check that mate read lengths are equal
     if len(set(read_lengths)) > 1:
         logging.error("Unequal read lengths mate1=%d and mate2=%d" % 
                       (read_lengths[0], read_lengths[1]))
         config_passed = False
     # check that seed length < read length
     if any(self.segment_length > rlen for rlen in read_lengths):
         logging.error("seed length %d cannot be longer than read length" % 
                      (self.segment_length))
         config_passed = False
     # check that output dir is not a regular file
     if os.path.exists(self.output_dir) and (not os.path.isdir(self.output_dir)):
         logging.error("Output directory name '%s' exists and is not a valid directory" % 
                       (self.output_dir))
         config_passed = False
     if check_executable(self.bowtie_build_bin):
         logging.debug("Checking for 'bowtie-build' binary... found")
     else:
         logging.error("bowtie-build binary not found or not executable")
         config_passed = False
     # check that bowtie program exists
     if check_executable(self.bowtie_bin):
         logging.debug("Checking for 'bowtie' binary... found")
     else:
         logging.error("bowtie binary not found or not executable")
         config_passed = False
     # check that alignment index exists
     if os.path.isdir(self.index_dir):
         logging.debug("Checking for chimerascan index directory... found")
         # check that alignment index file exists
         align_index_file = os.path.join(self.index_dir, config.BOWTIE_INDEX_FILE)
         if os.path.isfile(align_index_file):
             logging.debug("Checking for bowtie index file... found")
         else:
             logging.error("chimerascan bowtie index file '%s' invalid" % (align_index_file))
             config_passed = False
     else:
         logging.error("chimerascan alignment index directory '%s' not valid" % 
                       (self.index_dir))
         config_passed = False
     # check for sufficient processors
     if self.num_processors < config.BASE_PROCESSORS:
         logging.warning("Please specify >=2 processes using '-p' to allow program to run efficiently")
     return config_passed
Пример #3
0
def check_command_line_args(options, args, parser):
    # check command line arguments
    if len(args) < 3:
        parser.error("Incorrect number of command line arguments")
    fastq_files = args[0:2]
    output_dir = args[2]
    # check that input fastq files exist
    read_lengths = []
    for mate,fastq_file in enumerate(fastq_files):
        if not os.path.isfile(args[0]):
            parser.error("mate '%d' fastq file '%s' is not valid" % 
                         (mate, fastq_file))
        logging.debug("Checking read length for file %s" % 
                      (fastq_file))
        read_lengths.append(get_read_length(fastq_file))
        logging.debug("Read length for file %s: %d" % 
                      (fastq_file, read_lengths[-1]))
    # check that mate read lengths are equal
    if len(set(read_lengths)) > 1:
        parser.error("read lengths mate1=%d and mate2=%d are unequal" % 
                     (read_lengths[0], read_lengths[1]))
    # check that seed length < read length
    if any(options.segment_length > rlen for rlen in read_lengths):
        parser.error("seed length %d cannot be longer than read length" % 
                     (options.segment_length))
    # check that output dir is not a regular file
    if os.path.exists(output_dir) and (not os.path.isdir(output_dir)):
        parser.error("Output directory name '%s' exists and is not a valid directory" % 
                     (output_dir))
    if check_executable(options.bowtie_build_bin):
        logging.debug("Checking for 'bowtie-build' binary... found")
    else:
        parser.error("bowtie-build binary not found or not executable")
    # check that bowtie program exists
    if check_executable(options.bowtie_bin):
        logging.debug("Checking for 'bowtie' binary... found")
    else:
        parser.error("bowtie binary not found or not executable")
    # check that alignment index exists
    if os.path.isdir(options.index_dir):
        logging.debug("Checking for chimerascan index directory... found")
    else:
        parser.error("chimerascan alignment index directory '%s' not valid" % 
                     (options.index_dir))
    # check that alignment index file exists
    align_index_file = os.path.join(options.index_dir, config.BOWTIE_INDEX_FILE)
    if os.path.isfile(align_index_file):
        logging.debug("Checking for bowtie index file... found")
    else:
        parser.error("chimerascan bowtie index file '%s' invalid" % (align_index_file))
    # check for sufficient processors
    if options.num_processors < config.BASE_PROCESSORS:
        logging.warning("Please specify >=2 processes using '-p' to allow program to run efficiently")
Пример #4
0
def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    parser = argparse.ArgumentParser(
        description="Build alignment indexes for use with chimerascan")
    parser.add_argument("ref_fasta_file", help="reference genome FASTA file")
    parser.add_argument("transcript_feature_file", help="transcript features")
    parser.add_argument("output_dir",
                        help="directory where indexes will be created")
    args = parser.parse_args()
    # check that input files exist
    if not os.path.isfile(args.ref_fasta_file):
        parser.error("Reference fasta file '%s' not found" %
                     (args.ref_fasta_file))
    if not os.path.isfile(args.transcript_feature_file):
        parser.error("Gene feature file '%s' not found" %
                     (args.transcript_feature_file))
    # check that output dir is not a regular file
    if os.path.exists(
            args.output_dir) and (not os.path.isdir(args.output_dir)):
        parser.error("Output directory name '%s' exists and is not a valid "
                     "directory" % (args.output_dir))
    # check that bowtie2-build program exists
    if check_executable(config.BOWTIE2_BUILD_BIN):
        logging.debug("Checking for '%s' binary... found" %
                      (config.BOWTIE2_BUILD_BIN))
    else:
        parser.error("%s binary not found or not executable" %
                     (config.BOWTIE2_BUILD_BIN))
    # run main index creation function
    retcode = create_chimerascan_index(args.output_dir, args.ref_fasta_file,
                                       args.transcript_feature_file)
    return retcode
Пример #5
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    parser = argparse.ArgumentParser(description="Build alignment indexes for use with chimerascan")
    parser.add_argument("ref_fasta_file", help="reference genome FASTA file")
    parser.add_argument("transcript_feature_file", help="transcript features")
    parser.add_argument("output_dir", help="directory where indexes will be created")
    args = parser.parse_args()
    # check that input files exist
    if not os.path.isfile(args.ref_fasta_file):
        parser.error("Reference fasta file '%s' not found" % (args.ref_fasta_file))
    if not os.path.isfile(args.transcript_feature_file):
        parser.error("Gene feature file '%s' not found" % (args.transcript_feature_file))
    # check that output dir is not a regular file
    if os.path.exists(args.output_dir) and (not os.path.isdir(args.output_dir)):
        parser.error("Output directory name '%s' exists and is not a valid "
                     "directory" % (args.output_dir))
    # check that bowtie2-build program exists
    if check_executable(config.BOWTIE2_BUILD_BIN):
        logging.debug("Checking for '%s' binary... found" % (config.BOWTIE2_BUILD_BIN))
    else:
        parser.error("%s binary not found or not executable" % (config.BOWTIE2_BUILD_BIN))
    # run main index creation function
    retcode = create_chimerascan_index(args.output_dir, args.ref_fasta_file, 
                                       args.transcript_feature_file)
    return retcode
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    parser = OptionParser("usage: %prog [options] <reference_genome.fa> "
                          "<genepred_genes.txt> <index_output_dir>")
    parser.add_option("--bowtie-dir", dest="bowtie_dir", default="",
                      help="Path to the 'bowtie' software (by default, "
                      "expects the 'bowtie' and 'bowtie-build' "
                      "binaries to be in current PATH)")
    options, args = parser.parse_args()
    # check command line arguments
    if len(args) < 3:
        parser.error("Incorrect number of command line arguments")
    ref_fasta_file = args[0]
    gene_feature_file = args[1]
    output_dir = args[2]
    # check that input files exist
    if not os.path.isfile(ref_fasta_file):
        parser.error("Reference fasta file '%s' not found" % (ref_fasta_file))
    if not os.path.isfile(gene_feature_file):
        parser.error("Gene feature file '%s' not found" % (gene_feature_file))
    # check that output dir is not a regular file
    if os.path.exists(output_dir) and (not os.path.isdir(output_dir)):
        parser.error("Output directory name '%s' exists and is not a valid "
                     "directory" % (output_dir))
    # check that bowtie-build program exists
    bowtie_build_bin = os.path.join(options.bowtie_dir, "bowtie-build")
    if check_executable(bowtie_build_bin):
        logging.debug("Checking for 'bowtie-build' binary... found")
    else:
        parser.error("bowtie-build binary not found or not executable")
    # run main index creation function
    retcode = create_chimerascan_index(output_dir, ref_fasta_file, 
                                       gene_feature_file, bowtie_build_bin)
    sys.exit(retcode)
Пример #7
0
def _setup_and_open_files(genome_index, transcripts, input_file, output_file,
                          library_type, input_sam, output_sam):
    # create SAM header from genome index
    logging.debug("Creating genome SAM header")
    if not check_executable(config.BOWTIE2_INSPECT_BIN):
        logging.error("Cannot find bowtie2-inspect binary")
        return config.JOB_ERROR
    # get references/lengths from bowtie2
    ref_list = get_references_from_bowtie2_index(genome_index)
    # open input BAM file and add to header
    if input_sam:
        mode = "r"
    else:
        mode = "rb"
    infh = pysam.Samfile(input_file, mode)
    header_dict = dict(infh.header)
    header_dict['SQ'] = [{
        'SN': seqname,
        'LN': seqlen
    } for seqname, seqlen in ref_list]
    # open output BAM file with new header
    if output_sam:
        mode = "wh"
    else:
        mode = "wb"
    outfh = pysam.Samfile(output_file, mode, header=header_dict)
    # setup reference name mappings
    genome_rname_tid_map = dict(
        (rname, i) for i, rname in enumerate(outfh.references))
    transcriptome_rname_tid_map = dict(
        (rname, i) for i, rname in enumerate(infh.references))
    # read transcript feature and prepare data structure for conversion
    logging.debug("Creating transcript to genome map")
    transcript_tid_map = {}
    for t in transcripts:
        exons = [(start, end) for start, end in t.exons]
        negstrand = True if t.strand == "-" else False
        if negstrand:
            exons.reverse()
        transcript_tid = transcriptome_rname_tid_map[str(t.tx_id)]
        genome_tid = genome_rname_tid_map[t.chrom]
        transcript_tid_map[transcript_tid] = (genome_tid, negstrand, exons)
    return infh, outfh, transcript_tid_map
def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    parser = OptionParser("usage: %prog [options] <reference_genome.fa> "
                          "<gene_models.txt> <index_output_dir>")
    #parser.add_option('-i', '--min-fragment-size', dest="min_fragment_size", default=0)
    #parser.add_option('-I', '--max-fragment-size', dest="max_fragment_size", default=700)
    parser.add_option("--bowtie-dir",
                      dest="bowtie_dir",
                      default="",
                      help="Path to the 'bowtie' software (by default, "
                      "expects the 'bowtie' and 'bowtie-build' "
                      "binaries to be in current PATH)")
    options, args = parser.parse_args()
    # check command line arguments
    if len(args) < 3:
        parser.error("Incorrect number of command line arguments")
    ref_fasta_file = args[0]
    gene_feature_file = args[1]
    output_dir = args[2]
    # check that input files exist
    if not os.path.isfile(ref_fasta_file):
        parser.error("Reference fasta file '%s' not found" % (ref_fasta_file))
    if not os.path.isfile(gene_feature_file):
        parser.error("Gene feature file '%s' not found" % (gene_feature_file))
    # check that output dir is not a regular file
    if os.path.exists(output_dir) and (not os.path.isdir(output_dir)):
        parser.error("Output directory name '%s' exists and is not a valid "
                     "directory" % (output_dir))
    # check that bowtie-build program exists
    bowtie_build_bin = os.path.join(options.bowtie_dir, "bowtie-build")
    if check_executable(bowtie_build_bin):
        logging.debug("Checking for 'bowtie-build' binary... found")
    else:
        parser.error("bowtie-build binary not found or not executable")
    # run main index creation function
    retcode = create_chimerascan_index(output_dir, ref_fasta_file,
                                       gene_feature_file, bowtie_build_bin)
    #                                       min_fragment_size=options.min_fragment_size,
    #                                       max_fragment_size=options.max_fragment_size)
    sys.exit(retcode)
def _setup_and_open_files(genome_index, transcripts,
                          input_file, output_file, 
                          library_type, input_sam, 
                          output_sam):
    # create SAM header from genome index
    logging.debug("Creating genome SAM header")
    if not check_executable(config.BOWTIE2_INSPECT_BIN):
        logging.error("Cannot find bowtie2-inspect binary")
        return config.JOB_ERROR
    # get references/lengths from bowtie2
    ref_list = get_references_from_bowtie2_index(genome_index)
    # open input BAM file and add to header
    if input_sam:
        mode = "r"
    else:
        mode = "rb"
    infh = pysam.Samfile(input_file, mode)
    header_dict = dict(infh.header)
    header_dict['SQ'] = [{'SN': seqname, 'LN': seqlen} for seqname,seqlen in ref_list]
    # open output BAM file with new header
    if output_sam:
        mode = "wh"
    else:
        mode = "wb"
    outfh = pysam.Samfile(output_file, mode, header=header_dict)
    # setup reference name mappings
    genome_rname_tid_map = dict((rname,i) for i,rname in enumerate(outfh.references))    
    transcriptome_rname_tid_map = dict((rname,i) for i,rname in enumerate(infh.references))
    # read transcript feature and prepare data structure for conversion
    logging.debug("Creating transcript to genome map")
    transcript_tid_map = {}
    for t in transcripts:
        exons = [(start, end) for start, end in t.exons]
        negstrand = True if t.strand == "-" else False
        if negstrand:
            exons.reverse()
        transcript_tid = transcriptome_rname_tid_map[str(t.tx_id)]
        genome_tid = genome_rname_tid_map[t.chrom]
        transcript_tid_map[transcript_tid] = (genome_tid, negstrand, exons)        
    return infh, outfh, transcript_tid_map
def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    parser = OptionParser(
        "usage: %prog [options] <reference_genome.fa> <gene_models.txt> <index_output_dir>"
    )
    parser.add_option("--bowtie-build-bin",
                      dest="bowtie_build_bin",
                      default="bowtie-build",
                      help="Path to 'bowtie-build' program")
    options, args = parser.parse_args()
    # check command line arguments
    if len(args) < 3:
        parser.error("Incorrect number of command line arguments")
    ref_fasta_file = args[0]
    gene_feature_file = args[1]
    output_dir = args[2]
    # check that input files exist
    if not os.path.isfile(ref_fasta_file):
        parser.error("Reference fasta file '%s' not found" % (ref_fasta_file))
    if not os.path.isfile(gene_feature_file):
        parser.error("Gene feature file '%s' not found" % (gene_feature_file))
    # check that output dir is not a regular file
    if os.path.exists(output_dir) and (not os.path.isdir(output_dir)):
        parser.error(
            "Output directory name '%s' exists and is not a valid directory" %
            (output_dir))
    # check that bowtie-build program exists
    if check_executable(options.bowtie_build_bin):
        logging.debug("Checking for 'bowtie-build' binary... found")
    else:
        parser.error("bowtie-build binary not found or not executable")
    # run main index creation function
    retcode = create_chimerascan_index(output_dir, ref_fasta_file,
                                       gene_feature_file,
                                       options.bowtie_build_bin)
    sys.exit(retcode)
Пример #11
0
 def check_config(self):
     # check that input fastq files exist
     config_passed = True
     for mate, fastq_file in enumerate(self.fastq_files):
         if not os.path.isfile(fastq_file):
             logging.error("mate '%d' fastq file '%s' is not valid" %
                           (mate, fastq_file))
             config_passed = False
     # check read lengths with trimming applied
     logging.debug("Checking read lengths")
     read_lengths = [detect_read_length(fq) for fq in self.fastq_files]
     total_trimming = self.trim5 + self.trim3
     for i, rlen in enumerate(read_lengths):
         trimmed_rlen = rlen - total_trimming
         logging.debug("File %s read length: %d after trimming: %d" %
                       (self.fastq_files[i], rlen, trimmed_rlen))
         if trimmed_rlen < config.MIN_SEGMENT_LENGTH:
             logging.error(
                 "Trimmed read length is less than the minimum length of %d"
                 % (trimmed_rlen, config.MIN_SEGMENT_LENGTH))
             config_passed = False
     # check that mate read lengths are equal
     if len(set(read_lengths)) > 1:
         logging.error("Unequal read lengths mate1=%d and mate2=%d" %
                       (read_lengths[0], read_lengths[1]))
         config_passed = False
     # check that seed length < read length
     if self.segment_length is not None:
         if any((self.segment_length > rlen) for rlen in read_lengths):
             logging.error(
                 "seed length %d cannot be longer than read length" %
                 (self.segment_length))
             config_passed = False
     # ensure local anchor length is larger than minimum
     if self.local_anchor_length < config.LOCAL_ANCHOR_LENGTH_MIN:
         logging.error(
             "Local anchor length of %d < %d" %
             (self.local_anchor_length, config.LOCAL_ANCHOR_LENGTH_MIN))
         config_passed = False
     # check that output dir is not a regular file
     if os.path.exists(
             self.output_dir) and (not os.path.isdir(self.output_dir)):
         logging.error(
             "Output directory name '%s' exists and is not a valid directory"
             % (self.output_dir))
         config_passed = False
     if check_executable(config.BOWTIE2_BUILD_BIN):
         logging.debug("Checking for '%s' binary... found" %
                       config.BOWTIE2_BUILD_BIN)
     else:
         logging.error("%s binary not found or not executable" %
                       config.BOWTIE2_BUILD_BIN)
         config_passed = False
     # check that bowtie program exists
     if check_executable(os.path.join(config.BOWTIE2_BIN)):
         logging.debug("Checking for '%s' binary... found" %
                       config.BOWTIE2_BIN)
     else:
         logging.error("%s binary not found or not executable" %
                       config.BOWTIE2_BIN)
         config_passed = False
     # check that alignment index exists
     if os.path.isdir(self.index_dir):
         logging.debug("Checking for chimerascan index directory... found")
         # check that alignment index files exist
         for f in config.TRANSCRIPTOME_BOWTIE2_FILES:
             filename = os.path.join(self.index_dir, f)
             if not os.path.isfile(filename):
                 logging.error("chimerascan index file '%s' invalid" %
                               (filename))
                 config_passed = False
                 break
         for f in config.GENOME_BOWTIE2_FILES:
             filename = os.path.join(self.index_dir, f)
             if not os.path.isfile(filename):
                 logging.error("chimerascan index file '%s' invalid" %
                               (filename))
                 config_passed = False
                 break
     else:
         logging.error(
             "chimerascan alignment index directory '%s' not valid" %
             (self.index_dir))
         config_passed = False
     # check for sufficient processors
     if self.num_processors < config.BASE_PROCESSORS:
         logging.warning(
             "Please specify >=2 processes using '-p' to allow program to run efficiently"
         )
     return config_passed
Пример #12
0
 def check_config(self):
     # check that input fastq files exist
     config_passed = True
     for mate,fastq_file in enumerate(self.fastq_files):
         if not os.path.isfile(fastq_file):
             logging.error("mate '%d' fastq file '%s' is not valid" % 
                           (mate, fastq_file))
             config_passed = False
     # check read lengths with trimming applied
     logging.debug("Checking read lengths")
     read_lengths = [detect_read_length(fq) for fq in self.fastq_files]
     total_trimming = self.trim5 + self.trim3
     for i,rlen in enumerate(read_lengths):
         trimmed_rlen = rlen - total_trimming
         logging.debug("File %s read length: %d after trimming: %d" % 
                       (self.fastq_files[i], rlen, trimmed_rlen))
         if trimmed_rlen < config.MIN_SEGMENT_LENGTH:
             logging.error("Trimmed read length is less than the minimum length of %d" % 
                           (trimmed_rlen, config.MIN_SEGMENT_LENGTH))
             config_passed = False
     # check that mate read lengths are equal
     if len(set(read_lengths)) > 1:
         logging.error("Unequal read lengths mate1=%d and mate2=%d" % 
                       (read_lengths[0], read_lengths[1]))
         config_passed = False
     # check that seed length < read length
     if self.segment_length is not None:
         if any((self.segment_length > rlen) for rlen in read_lengths):
             logging.error("seed length %d cannot be longer than read length" % 
                           (self.segment_length))
             config_passed = False
     # ensure local anchor length is larger than minimum
     if self.local_anchor_length < config.LOCAL_ANCHOR_LENGTH_MIN:
         logging.error("Local anchor length of %d < %d" % 
                       (self.local_anchor_length, config.LOCAL_ANCHOR_LENGTH_MIN))
         config_passed = False
     # check that output dir is not a regular file
     if os.path.exists(self.output_dir) and (not os.path.isdir(self.output_dir)):
         logging.error("Output directory name '%s' exists and is not a valid directory" % 
                       (self.output_dir))
         config_passed = False
     if check_executable(config.BOWTIE2_BUILD_BIN):
         logging.debug("Checking for '%s' binary... found" % config.BOWTIE2_BUILD_BIN)
     else:
         logging.error("%s binary not found or not executable" % config.BOWTIE2_BUILD_BIN)
         config_passed = False
     # check that bowtie program exists
     if check_executable(os.path.join(config.BOWTIE2_BIN)):
         logging.debug("Checking for '%s' binary... found" % config.BOWTIE2_BIN)
     else:
         logging.error("%s binary not found or not executable" % config.BOWTIE2_BIN)
         config_passed = False
     # check that alignment index exists
     if os.path.isdir(self.index_dir):
         logging.debug("Checking for chimerascan index directory... found")
         # check that alignment index files exist
         for f in config.TRANSCRIPTOME_BOWTIE2_FILES:
             filename = os.path.join(self.index_dir, f)
             if not os.path.isfile(filename):
                 logging.error("chimerascan index file '%s' invalid" % (filename))
                 config_passed = False
                 break
         for f in config.GENOME_BOWTIE2_FILES:
             filename = os.path.join(self.index_dir, f)
             if not os.path.isfile(filename):
                 logging.error("chimerascan index file '%s' invalid" % (filename))
                 config_passed = False
                 break
     else:
         logging.error("chimerascan alignment index directory '%s' not valid" % 
                       (self.index_dir))
         config_passed = False
     # check for sufficient processors
     if self.num_processors < config.BASE_PROCESSORS:
         logging.warning("Please specify >=2 processes using '-p' to allow program to run efficiently")
     return config_passed
Пример #13
0
 def check_config(self):
     # check that input fastq files exist
     config_passed = True
     read_lengths = []
     for mate, fastq_file in enumerate(self.fastq_files):
         if not os.path.isfile(fastq_file):
             logging.error("mate '%d' fastq file '%s' is not valid" %
                           (mate, fastq_file))
             config_passed = False
         read_lengths.append(get_read_length(fastq_file))
         logging.debug("Checking file %s" % (fastq_file))
         logging.debug("File %s read length=%d" %
                       (fastq_file, read_lengths[-1]))
     # check that mate read lengths are equal
     if len(set(read_lengths)) > 1:
         logging.error("Unequal read lengths mate1=%d and mate2=%d" %
                       (read_lengths[0], read_lengths[1]))
         config_passed = False
     # check that seed length < read length
     if any(self.segment_length > rlen for rlen in read_lengths):
         logging.error("seed length %d cannot be longer than read length" %
                       (self.segment_length))
         config_passed = False
     # check that output dir is not a regular file
     if os.path.exists(
             self.output_dir) and (not os.path.isdir(self.output_dir)):
         logging.error(
             "Output directory name '%s' exists and is not a valid directory"
             % (self.output_dir))
         config_passed = False
     if check_executable(self.bowtie_build_bin):
         logging.debug("Checking for 'bowtie-build' binary... found")
     else:
         logging.error("bowtie-build binary not found or not executable")
         config_passed = False
     # check that bowtie program exists
     if check_executable(self.bowtie_bin):
         logging.debug("Checking for 'bowtie' binary... found")
     else:
         logging.error("bowtie binary not found or not executable")
         config_passed = False
     # check that alignment index exists
     if os.path.isdir(self.index_dir):
         logging.debug("Checking for chimerascan index directory... found")
         # check that alignment index file exists
         align_index_file = os.path.join(self.index_dir,
                                         config.BOWTIE_INDEX_FILE)
         if os.path.isfile(align_index_file):
             logging.debug("Checking for bowtie index file... found")
         else:
             logging.error("chimerascan bowtie index file '%s' invalid" %
                           (align_index_file))
             config_passed = False
     else:
         logging.error(
             "chimerascan alignment index directory '%s' not valid" %
             (self.index_dir))
         config_passed = False
     # check for sufficient processors
     if self.num_processors < config.BASE_PROCESSORS:
         logging.warning(
             "Please specify >=2 processes using '-p' to allow program to run efficiently"
         )
     return config_passed