def submit(self, sequence): sequence_id = get_sequence_id(sequence) output_path = self._get_result_path(sequence_id) if not os.path.isfile( output_path) and not self._worker.working_on_sequence_id( sequence_id): self._worker.submit(sequence) return sequence_id
def test_result(mock_run): sequence = "TRY" sequence_id = get_sequence_id(sequence) mock_run.return_value = {sequence_id: 'OK'} worker = Worker() worker.submit(sequence) ok_(worker.has_sequence_id(sequence_id)) worker.start() sleep(1.0) ok_(worker.result_for_sequence_id(sequence_id) is not None)
def test_job(mock_interproscan): sequence = "TRY" sequence_id = get_sequence_id(sequence) mock_interproscan.return_value = {sequence_id: "OK"} job_id = job_manager.submit(sequence) eq_(job_id, sequence_id) while True: status = job_manager.get_status(job_id) if status == 'SUCCESS': result = job_manager.get_result(job_id) ok_(result is not None) return ok_(status in ['PENDING', 'STARTED'])
def split_proteins(path): ns_uri = "http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5" ET.register_namespace("", ns_uri) ns_map = {'p': ns_uri} tree = ET.parse(path) output = {} for protein in tree.getroot().findall('p:protein', namespaces=ns_map): sequence = protein.find('p:sequence', namespaces=ns_map).text matches = ET.Element('protein-matches') matches.append(protein) indent_xml(matches) sequence_id = get_sequence_id(sequence) output[sequence_id] = ET.tostring(matches).decode('ascii') return output
def run(self): _log.info("starting interproscan worker") while True: with self._lock: self._working_sequences = self._queued_sequences self._queued_sequences = set() if len(self._working_sequences) <= 0: continue from interproscan_web.controllers.job import job_manager try: results = interproscan.run(self._working_sequences) for sequence_id in results: self._results[sequence_id] = results[sequence_id] job_manager.store(sequence_id, self._results[sequence_id]) except Exception as e: with self._lock: for sequence in self._working_sequences: self._exceptions[get_sequence_id(sequence)] = e
def run(self, sequences): fasta_path = tempfile.mktemp() xml_path = tempfile.mktemp() job_name = "interproscan_%s" % str(uuid.uuid4()) write_fasta( fasta_path, {get_sequence_id(sequence): sequence for sequence in sequences}) try: self._execute([ self.interproscan_path, '--goterms', '--formats', 'xml', '--disable-precalc', '--input', fasta_path, '--outfile', xml_path, '--seqtype', 'p' ]) return split_proteins(xml_path) finally: for p in [fasta_path, xml_path]: if os.path.isfile(p): os.remove(p)
def working_on_sequence_id(self, sequence_id): with self._lock: return any([get_sequence_id(sequence) == sequence_id for sequence in self._working_sequences])
def has_sequence_id(self, sequence_id): with self._lock: return any([get_sequence_id(sequence) == sequence_id for sequence in (self._working_sequences | self._queued_sequences)])