def setup_run(self, cluster, params=None): """Creates temp file with cluster reads and provides CLI-ready parameter list, using self.params Args: cluster: iterable of read-like objects or path to reads params: Overrides self.params Returns: run_input_path: path to input file params (str): str of params for run """ if not params: params = self.params run_input_path = None if isinstance(cluster, str): # input path provided if not os.path.exists(cluster): raise ValueError( 'Input to {} is not valid, please provide either a list of read objects or a valid path to fasta file' .format(self.get_name())) else: run_input_path = cluster else: # list of seq objects provided try: f = tempfile.NamedTemporaryFile(suffix='.fa', delete=False) f.write(fasta_from_seq(*zip(*[(x.id, x.seq) for x in cluster]))) run_input_path = f.name except AttributeError as e: log.error( 'Input to {} is not valid, please provide either a list of read objects or a valid path to fasta file' .format(self.get_name())) raise e return run_input_path, params
def ava(self, reads=None, params=None): """Runs minimap with query == target for all-to-all mapping Args: reads: iterable of read-like objects params: mapping parameters. Overrides self.params Returns: result: result of self.run with params using self.paf_parser Raises: ValueError: if 'ava-pb' not in params (or self.params if params not provided). """ if not params: params = self.params elif not params: params = 'ava-pb' elif 'ava' not in params: raise ValueError( '''This MinimapWrapper has parameter configurations that do not included all-to-all mapping. Please change the params attribute or manually overide the parameters when calling MinimapWrapper.ava''' ) reads_file = self.create_temp_file( fasta_from_seq(*zip(*[(x.id, x.seq) for x in reads]))) return self.run( reads_file.name, reads_file.name, params=params ) ## NOTE / TODO using tempfile.name twice MAY NOT work on some systems
def call_cluster(self, consensus_seq): ## Takes a list of string sequences and returns a consensus sequence with tempfile.NamedTemporaryFile(delete=True) as f: f.write(fasta_from_seq('consensus', consensus_seq)) command = [self.minimap.src, '-cx map-pb', f.name, self.allele_db] mapping_output = self.minimap.run(command)
def call_cluster(self, cluster, filter_function=None, result_filter=None, temp_file_path=None): import tempfile if len(cluster) == 1: log.warn('Cluster {} has single read, not calling'.format( cluster.id)) try: cluster.consensus_seq = None cluster.consensus_builder = None cluster.set_call(None) cluster.candidates = None cluster.candidates_method = str(self) except AttributeError as e: pass finally: return None consensus_seq = None consensus_seq_id = None f = None is_cluster_inst = False # flag for filling descriptive attributes if hasattr(cluster, '__getitem__' ): # assumed to be list of sequences, get consensus try: if temp_file_path: with open(temp_file_path, 'wb') as f: f.write( fasta_from_seq(*zip(*[(x.id, x.seq) for x in cluster]))) consensus_seq = self.consensus_builder.generate_consensus( temp_file_path if temp_file_path else cluster) if not consensus_seq: cluster.consensus = None cluster.candidates_method = str(self) return consensus_seq_id = 'cons' log.info('Generated consensus with:\n{}'.format( str(self.consensus_builder))) log.debug('Output:\n{}'.format(consensus_seq)) try: cluster.consensus = consensus_seq cluster.consensus_method = str(self.consensus_builder) except AttributeError as e: pass except TypeError as e: ## No consensus builder is set raise ValueError( 'Cluster calling: list of cluster sequences provided but no consensus builder instantiated.' ) else: if isinstance(cluster, basestring): # input is path if os.path.exists(cluster): cons_path = cluster else: raise ValueError( 'Cluster calling input invalid. String provided but is not valid path. If trying to cast as Bio.Seq.Seq-like object' ) else: # input is consensus seq consensus_seq = cluster.seq consensus_seq_id = cluster.id ## save blasr target in all cases except path as input if consensus_seq: try: f = open( temp_file_path, 'wb+') if temp_file_path else tempfile.NamedTemporaryFile( delete=False) f.write(str(fasta_from_seq(consensus_seq_id, consensus_seq))) cons_path = f.name f.close() except AttributeError as e: raise ValueError( 'Cluster calling input invalid. Provide iterable of cluster sequences, path to cluster consensus or Bio.Seq.Seq-like object to call' ) ## run blasr mapping of consensus_seq against allele database command = [self.blasr.src, '', self.allele_db, cons_path] try: mapping_output = self.blasr.run(*command) except ValueError as e: log.warn('Blasr returned no mapping') try: cluster.set_call(None) cluster.candidates = None cluster.candidates_method = str(self) except AttributeError as e: pass finally: return None f.close() ## select from mapping the desired result as the call if not filter_function: filter_function = self.filter_function try: mapping_output = sorted(mapping_output, key=filter_function) cluster_call = mapping_output[0] except ValueError as e: log.error('Invalid blasr mapping value') log.debug('\n'.join([str(x) for x in mapping_output])) raise e if not result_filter: result_filter = self.result_filter result = result_filter(cluster_call) try: cluster.set_call([result]) cluster.candidates = list(mapping_output) cluster.candidates_method = str(self) except AttributeError as e: return result
def run(self, query, target, src=None, params=None, parser=None, output_path=None): """Runs minimap using subprocess. Args: query: iterable of read-like objects or path to fasta target: iterable of read-like objects or path to fasta src (str) path to minimap executable. self.src if None params (str): string of minimap parameters. self.params if None parser (func(x)): parser func for minimap stdout result. MinimapWrapper.paf_parser if None output_path (str): cache path to save mapping result to Note: read-like requires 'id' and 'seq' attributes Returns: output: result of parser """ from subprocess import Popen, PIPE from os.path import exists ## Check type(query), make temp file and write query seqs as needed if isinstance(query, basestring): if not exists(query): log.error( 'Provided query path is invalid, please provide a path as a string or Bio.SeqIO-like objects' ) query_path = query query_file = None else: # try: # try: query_file = self.create_temp_file(write_data=fasta_from_seq(*zip( *[(x.id, x.seq) for x in query]))) # except TypeError: # is not iterable # query_file = self.create_temp_file(write_data=fasta_from_seq(query.id, query.seq)) # except AttributeError as e: # log.error('Provided query input is invalid, please provide a path as a string or Bio.SeqIO-like objects') # raise e query_path = query_file.name ## Check type(target), make temp file and write target seqs as needed if isinstance(target, basestring): if not exists(target): log.error( 'Provided target path is invalid, please provide a path as a string or Bio.SeqIO-like objects' ) target_path = target target_file = None else: try: try: target_file = self.create_temp_file( write_data=fasta_from_seq(*zip(*[(x.id, x.seq) for x in target]))) except TypeError: # is not iterable target_file = self.create_temp_file( write_data=fasta_from_seq(target.id, target.seq)) except AttributeError as e: log.error( 'Provided target input is invalid, please provide a path as a string or Bio.SeqIO-like objects' ) raise e target_path = target_file.name if not src: src = self.src if not params: params = self.params if not output_path: output_path = self.output_path if not parser: parser = MinimapWrapper.paf_parser command = ' '.join([src, params, target_path, query_path]) log.debug('Running minimap:\n{}'.format(command)) process = Popen(command.split(), stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() ## save / cache output if needed if output_path: try: with open(output_path, 'wb') as f: f.write(stdout) except OSError as e: log.error( 'Provided minimap output path is not valid, output will be discarded' ) if not stdout.strip(): log.error('Minimap returned no mapping') log.debug(stderr) log.debug(stdout) with open(query_path, 'r') as f: log.debug(f.readlines()) with open(target_path, 'r') as f: log.debug(f.readlines()) raise ValueError('Minimap returned no mapping') output = parser(stdout.strip()) if query_file: query_file.close() if target_file: target_file.close() return output