Exemplo n.º 1
0
    def submit(self, sequence):
        sequence_id = get_sequence_id(sequence)
        output_path = self._get_result_path(sequence_id)

        if not os.path.isfile(
                output_path) and not self._worker.working_on_sequence_id(
                    sequence_id):
            self._worker.submit(sequence)

        return sequence_id
Exemplo n.º 2
0
def test_result(mock_run):
    sequence = "TRY"
    sequence_id = get_sequence_id(sequence)

    mock_run.return_value = {sequence_id: 'OK'}

    worker = Worker()
    worker.submit(sequence)

    ok_(worker.has_sequence_id(sequence_id))

    worker.start()
    sleep(1.0)

    ok_(worker.result_for_sequence_id(sequence_id) is not None)
Exemplo n.º 3
0
def test_job(mock_interproscan):
    sequence = "TRY"
    sequence_id = get_sequence_id(sequence)

    mock_interproscan.return_value = {sequence_id: "OK"}

    job_id = job_manager.submit(sequence)
    eq_(job_id, sequence_id)

    while True:
        status = job_manager.get_status(job_id)

        if status == 'SUCCESS':
            result = job_manager.get_result(job_id)
            ok_(result is not None)
            return

        ok_(status in ['PENDING', 'STARTED'])
Exemplo n.º 4
0
def split_proteins(path):

    ns_uri = "http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5"
    ET.register_namespace("", ns_uri)

    ns_map = {'p': ns_uri}

    tree = ET.parse(path)
    output = {}
    for protein in tree.getroot().findall('p:protein', namespaces=ns_map):
        sequence = protein.find('p:sequence', namespaces=ns_map).text
        matches = ET.Element('protein-matches')
        matches.append(protein)

        indent_xml(matches)

        sequence_id = get_sequence_id(sequence)
        output[sequence_id] = ET.tostring(matches).decode('ascii')

    return output
Exemplo n.º 5
0
    def run(self):
        _log.info("starting interproscan worker")
        while True:
            with self._lock:
                self._working_sequences = self._queued_sequences
                self._queued_sequences = set()

            if len(self._working_sequences) <= 0:
                continue

            from interproscan_web.controllers.job import job_manager

            try:
                results = interproscan.run(self._working_sequences)
                for sequence_id in results:
                    self._results[sequence_id] = results[sequence_id]
                    job_manager.store(sequence_id, self._results[sequence_id])
            except Exception as e:
                with self._lock:
                    for sequence in self._working_sequences:
                        self._exceptions[get_sequence_id(sequence)] = e
Exemplo n.º 6
0
    def run(self, sequences):
        fasta_path = tempfile.mktemp()
        xml_path = tempfile.mktemp()
        job_name = "interproscan_%s" % str(uuid.uuid4())

        write_fasta(
            fasta_path,
            {get_sequence_id(sequence): sequence
             for sequence in sequences})

        try:
            self._execute([
                self.interproscan_path, '--goterms', '--formats', 'xml',
                '--disable-precalc', '--input', fasta_path, '--outfile',
                xml_path, '--seqtype', 'p'
            ])

            return split_proteins(xml_path)
        finally:
            for p in [fasta_path, xml_path]:
                if os.path.isfile(p):
                    os.remove(p)
Exemplo n.º 7
0
 def working_on_sequence_id(self, sequence_id):
     with self._lock:
         return any([get_sequence_id(sequence) == sequence_id for sequence in self._working_sequences])
Exemplo n.º 8
0
 def has_sequence_id(self, sequence_id):
     with self._lock:
         return any([get_sequence_id(sequence) == sequence_id for sequence in (self._working_sequences | self._queued_sequences)])