def _check_use_index(self): if self.argparse_args.use_index is None: return # end if allowed_values = {'true', 'false', 'auto',} use_index_string = self.argparse_args.use_index if not use_index_string in allowed_values: error_msg = '\nError: invalid value of the `--use-index` option: `{}`' \ 'Allowed_values: {}' \ .format(use_index_string, ', '.join(allowed_values)) raise FatalError(error_msg) # end if blast_task = self.argparse_args.blast_task if blast_task is None: blast_task = src.blast.BLAST_TASKS[0] # end if blast_task_cant_use_index = use_index_string == 'true' \ and not blast_task in src.blast.TASKS_SUPPORT_INDEXED_SEARCH if blast_task_cant_use_index: error_msg = '\nError: BLAST task {} cannot use indexed search' \ .format(blast_task) raise FatalError(error_msg)
def _parse_primers(self): sep = ',' primer_pairs = list() n_lines = sum(1 for _ in open(self.primers_fpath, 'rt')) n_lines_is_even = n_lines % 2 == 0 if not n_lines_is_even: error_msg = '\nError: Cannot parse primers from file `{}`.\n' \ 'There are {} lines in this file.\n' \ 'There must be even number of lines ' \ '(and therefore even number of primers), though.' \ .format(self.primers_fpath, self.primers_fpath) raise FatalError(error_msg) # end if print('{} - Parsing primers...'.format(getwt())) reference_seq = src.fasta.read_fasta_sequence(self.reference_fpath) find_start_pos = 0 with open(self.primers_fpath, 'rt') as primers_file: for _ in range(n_lines // 2): try: left_primer_seq, right_primer_seq = self._parse_primer_pair( primers_file, sep) self.max_primer_len = max(self.max_primer_len, len(left_primer_seq), len(right_primer_seq)) left_start, left_end = self._find_primer_anneal_coords( left_primer_seq, reference_seq, Orientation.LEFT, beg=find_start_pos) find_start_pos = left_start right_start, right_end = self._find_primer_anneal_coords( src.sequences.reverse_complement(right_primer_seq), reference_seq, Orientation.RIGHT, beg=find_start_pos) primer_pairs.append( PrimerPair( Primer(left_start, left_end), Primer(right_start, right_end), )) except ValueError as err: error_msg = '\nError: cannot parse a line in file `{}`.\n{}' \ .format(self.primers_fpath, err) raise FatalError(error_msg) # end try # end for # end with print('{} - Primers: found annealing coordinates'.format(getwt())) return primer_pairs
def read_fasta_sequence(file_path): with fs.open_file_may_by_gzipped(file_path, 'rt') as fasta_file: fasta_file.readline() # pass header sequence = '' line = fasta_file.readline().strip().upper() line_counter = 1 while not (_is_header(line) or line == ''): line_counter += 1 if not verify_sequence(line): non_iupac_chars = get_non_iupac_chars(line) error_msg = '\nError: a non-IUPAC character found' \ ' in line #{} of file `{}`.\n' \ 'Bad characters are the following:\n {}' \ .format(line_counter, file_path, ', '.join(non_iupac_chars)) raise FatalError(error_msg) # end if sequence += line line = fasta_file.readline().strip().upper() # end while # end with return sequence
def create_dir(dirpath): if not os.path.exists(dirpath): try: os.makedirs(dirpath) except OSError as err: error_msg = '\nError: cannot create directory `{}`' \ .format(dirpath) raise FatalError(error_msg)
def is_gzipped(fpath): if fpath.endswith('.gz'): try: with gzip.open(fpath) as _: pass # end with except gzip.BadGzipFile as err: error_msg = '\nError: bad gzip file: {}'.format(err) raise FatalError(error_msg) except OSError as err: error_msg = '\nError: cannot open file: {}'.format(err) raise FatalError(error_msg) # end try else: return False # end if return True
def init_file(fpath): try: with open(fpath, 'wt') as _: pass # end with except OSError as err: error_msg = '\nError: cannot initialize output file `{}`:\n {}' \ .format(fpath, err) raise FatalError(error_msg)
def _check_blast_task(self): if self.argparse_args.blast_task is None: return # end if blast_task_argument = self.argparse_args.blast_task if not blast_task_argument in src.blast.BLAST_TASKS: error_msg = '\nError: invalid name of a blast task: `{}`.' \ 'Allowed values: {}' \ .format(blast_task_argument, ', '.join(src.blast.BLAST_TASKS)) raise FatalError(error_msg)
def _check_outdpath(self): if self.argparse_args.outdir is None: return # end if try: fs.create_dir(self.argparse_args.outdir) except FatalErrors as err: error_msg = '\nError: cannot create directory `{}`:\n {}' \ .format(self.argparse_args.outdir, err) raise FatalError(error_msg)
def _check_chunk_size(self): if self.argparse_args.chunk_size is None: return # end if chunk_size_string = self.argparse_args.chunk_size try: _check_int_string_gt0(chunk_size_string) except _AtoiGreaterThanZeroError as err: error_msg = '\nError: invalid chunk size: `{}`\n {}' \ .format(chunk_size_string, err) raise FatalError(error_msg)
def _check_threads_num(self): if self.argparse_args.threads is None: return # end if threads_num_string = self.argparse_args.threads try: _check_int_string_gt0(threads_num_string) except _AtoiGreaterThanZeroError as err: error_msg = '\nError: invalid number of threads: `{}`\n {}' \ .format(threads_num_string, err) raise FatalError(error_msg)
def _check_min_len(self): if self.argparse_args.min_len is None: return # end if min_len_string = self.argparse_args.min_len try: _check_int_string_gt0(min_len_string) except _AtoiGreaterThanZeroError as err: error_msg = '\nError: invalid minimum length: `{}`:\n {}' \ .format(min_len_string, err) raise FatalError(error_msg)
def _check_primer_ext_len(self): if self.argparse_args.primer_5ext is None: return # end if primer_ext_string = self.argparse_args.primer_5ext try: _check_int_string_ge0(primer_ext_string) except _AtoiGreaterOrEqualToZeroError as err: error_msg = '\nError: invalid size of primer coordinates extention: `{}`\n {}' \ .format(primer_ext_string, err) raise FatalError(error_msg)
def _check_reads_fpaths(self): try: _check_file_type_combination(self.argparse_args) except _InvalidFileCombinationError as err: raise FatalError(str(err)) # end try kromsatel_mode = _detect_kromsatel_mode(self.argparse_args) try: self._reads_files_exist(kromsatel_mode) except FileNotFoundError as err: raise FatalError(str(err)) # end try # Check if paired-end read files specified are the same file if kromsatel_mode == KromsatelModes.IlluminaPE: if self.argparse_args.reads_R1 == self.argparse_args.reads_R2: error_msg = '\nError: the file of forward (R1) and ' \ 'the file of reverse (R2) reads are the same file:\n `{}`' \ .format(self.argparse_args.reads_R1) raise FatalError(error_msg)
def _make_blast_db(reference_fpath, db_fpath): makeblastdb_cmd = _configure_makeblastdb_cmd(reference_fpath, db_fpath) pipe = sp.Popen(makeblastdb_cmd, shell=True, stdout=sp.PIPE, stderr=sp.PIPE) stdout_stderr = pipe.communicate() if pipe.returncode != 0: error_msg = '\nError: Cannot create a blast database' \ '{}\n Command: `{}`' \ .format(stdout_stderr[1].decode('utf-8'), makeblastdb_cmd) raise FatalError(error_msg)
def _check_fixed_crop_len(self): if self.argparse_args.crop_len is None: return # end if crop_len_string = self.argparse_args.crop_len auto_detect_crop_len = (crop_len_string == 'auto') if not auto_detect_crop_len: try: _check_int_string_ge0(crop_len_string) except _AtoiGreaterOrEqualToZeroError as err: error_msg = '\nError: invalid crop length: `{}`\n {}' \ ' Also, it may be `auto`.' \ .format(crop_len_string, err) raise FatalError(error_msg)
def _index_database(db_fpath): makembindex_cmd = _configure_makembindex_cmd(db_fpath) pipe = sp.Popen(makembindex_cmd, shell=True, stdout=sp.PIPE, stderr=sp.PIPE) stdout_stderr = pipe.communicate() if pipe.returncode != 0: error_msg = '\nError: Cannot index the blast database `{}`' \ '{}\n Command: `{}`' \ .format(db_fpath, stdout_stderr[1].decode('utf-8'), makembindex_cmd) raise FatalError(error_msg)
def _check_mandatory_args(self): mandatory_args = ( self.argparse_args.primers, self.argparse_args.reference, ) mandarory_args_descriptions = ( '-p/--primers', '-r/--reference', ) for arg, description in zip(mandatory_args, mandarory_args_descriptions): if arg is None: error_msg = '\nError: argument {} is mandatory'.format(description) raise FatalError(error_msg)
def check_program(program): # Check if program is in PATH pathdirs = os.environ['PATH'].split(os.pathsep) utility_found = False for directory in pathdirs: if os.path.exists(directory) and program in os.listdir(directory): utility_found = True break # end if # end for if not utility_found: error_msg = '\nError: program `{}` from BLAST+ toolkit is not installed.' \ 'If this error still occures although you have installed everything' \ ' -- make sure that this program is added to PATH)' \ .format(program) raise FatalError(error_msg)
def _detect_kromsatel_mode(argparse_args): read_pass_string = _create_read_pass_string(argparse_args) if read_pass_string == 'FRl': return KromsatelModes.IlluminaPE elif read_pass_string == 'frL': return KromsatelModes.Nanopore elif read_pass_string == 'Frl': return KromsatelModes.IlluminaSE # end if # Execution should not reach here error_msg = '\nInternal error. Please, contact the developer' \ ' and tell him abouth this error.\n' \ 'Error description: "kromsatel mode error in _detect_kromsatel_mode".' raise FatalError(error_msg)
def blast_align(reads_chunk, kromsatel_args): query_fpath = os.path.join(kromsatel_args.tmp_dir_path, 'kromsatel_query_{}.fasta'.format(os.getpid())) src.fastq.write_fastq2fasta(reads_chunk, query_fpath) alignment_fpath = os.path.join( kromsatel_args.tmp_dir_path, 'kromsatel_alignment_{}.json'.format(os.getpid())) if kromsatel_args.kromsatel_mode == KromsatelModes.Nanopore: blast_cmd = _configure_blastn_cmd_nanopore(query_fpath, kromsatel_args.db_fpath, kromsatel_args.blast_task, kromsatel_args.use_index, alignment_fpath) else: blast_cmd = _configure_blastn_cmd_illumina(query_fpath, kromsatel_args.db_fpath, kromsatel_args.blast_task, kromsatel_args.use_index, alignment_fpath) # end if # Launch blastn pipe = sp.Popen(blast_cmd, shell=True, stderr=sp.PIPE) stdout_stderr = pipe.communicate() if pipe.returncode != 0: error_msg = '\nError: an error occured while performing BLAST search:' \ '{}'.format(stdout_stderr[1].decode('utf-8')) raise FatalError(error_msg) # end if fs.rm_file_warn_on_error(query_fpath) with open(alignment_fpath, 'rt') as alignment_file: aligmnents = json.load(alignment_file) # end with fs.rm_file_warn_on_error(alignment_fpath) return aligmnents['BlastOutput2']
def _check_primers_fpath(self): if not os.path.exists(self.argparse_args.primers): error_msg = '\nError: file `{}` does not exist' \ .format(self.argparse_args.primers) raise FatalError(error_msg)
def _check_reference_fpath(self): if not os.path.exists(self.argparse_args.reference): error_msg = '\nError: file `{}` does not exist' \ .format(self.argparse_args.reference) raise FatalError(error_msg)