Exemplo n.º 1
0
    def test_run_all_compinchi_fail(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.pdbfileurl = 'file://' + temp_dir
            params.compinchi = 'file://' + temp_dir

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            task = DataImportTask(temp_dir, params)
            task._retrysleep = 0
            open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close()
            open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close()
            task.run()
            self.assertEquals(
                task.get_error(), 'Unable to download file ' + 'from ' +
                params.compinchi + ' to ' + task.get_components_inchi_file())

        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 2
0
    def test_get_set_of_pbdid_from_pdb_seqres_txt_with_400k_file(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = MakeBlastDBTask(temp_dir, params)
            task.create_dir()
            f = open(task.get_pdb_seqres_txt(), 'w')
            perms = itertools.permutations(
                string.ascii_lowercase + '123456789', 4)
            counter = 0
            try:
                limit = 400000
                while counter < limit:
                    f.write('>' + ''.join(map(str, perms.next())) +
                            '_A mol:protein length:165  T4 LYSOZYME\n')
                    f.write('MVLSEGEWQLVLH\n')
                    counter += 1
            except StopIteration:
                pass
            f.flush()
            f.close()

            pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
            self.assertEqual(len(pdbset), counter)

        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 3
0
    def test_get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres_w_hits(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = DataImportTask(temp_dir, params)
            task.create_dir()
            f = open(task.get_crystalph_tsv(), 'w')
            f.write('PDB_ID  _exptl_crystal_grow.pH\n')
            f.write('4X09\t6.5\n')
            f.write('4rfr\t8\n')
            f.write('4XET\t6.2\n')
            f.write('4XF1\t6.2\n')
            f.write('4XF3\t6.2\n')
            f.flush()
            f.close()

            makeblast = MakeBlastDBTask(temp_dir, params)
            makeblast.create_dir()
            f = open(makeblast.get_pdb_seqres_txt(), 'w')
            f.write('>4rfr_A mol:protein length:154  MYOGLOBIN\n')
            f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRVK'
                    'HLKTEAEMKASEDLKKHG\n')
            f.write('>102l_A mol:protein length:165  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAAKSELDKA'
                    'IGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAAL'
                    'INMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRV'
                    'ITTFRTGTWDAYKNL\n')
            f.flush()
            f.close()

            pdbset = task.get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres()
            self.assertEqual(len(pdbset), 1)
            self.assertEqual('4RFR' in pdbset, True)
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 4
0
    def test_run_where_everything_is_successful(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            fakegz = os.path.join(temp_dir, 'fake.gz')

            f = gzip.open(fakegz, 'wb')
            f.write('hello\n')
            f.flush()
            f.close()

            params.pdbsequrl = 'file://' + fakegz
            params.makeblastdb = 'echo'
            task = MakeBlastDBTask(temp_dir, params)
            task._retrysleep = 0
            task._maxretries = 1
            task.run()
            self.assertEqual(task.get_error(), None)

            # check echo.stdout file for valid arguments
            f = open(os.path.join(task.get_dir(), 'echo.stdout'), 'r')
            line = f.readline()

            self.assertEqual(
                line, '-in ' + task.get_pdb_seqres_txt() + ' -out ' +
                os.path.join(task.get_dir(), 'pdb_db') + ' -dbtype prot\n')

            f.close()

            lines = task.get_email_log().split('\n')
            self.assertEqual(lines[2], '# sequence(s): 0')
            f.close()
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 5
0
 def __init__(self, path, args):
     super(DataImportTask, self).__init__(path, args)
     self.set_name(DataImportTask.TASK_NAME)
     makeblast = MakeBlastDBTask('', args)
     self.set_stage(makeblast.get_stage() + 1)
     self.set_status(D3RTask.UNKNOWN_STATUS)
     self._maxretries = 3
     self._retrysleep = 1
Exemplo n.º 6
0
 def test_constructor(self):
     params = D3RParameters()
     task = MakeBlastDBTask('/foo', params)
     self.assertEqual(task.get_name(), 'makeblastdb')
     self.assertEqual(task.get_stage(), 1)
     self.assertEqual(task.get_status(), D3RTask.UNKNOWN_STATUS)
     self.assertEqual(task.get_path(), '/foo')
     self.assertEqual(task.get_dir_name(), 'stage.1.makeblastdb')
     test_task.try_update_status_from_filesystem(self, task)
Exemplo n.º 7
0
 def test_can_run_where_task_failed(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         open(os.path.join(task.get_dir(), 'error'), 'a').close()
         self.assertEqual(task.can_run(), False)
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 8
0
 def test_get_set_of_pbdid_from_pdb_seqres_txt_no_file(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
         self.assertEqual(len(pdbset), 0)
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 9
0
 def test_get_sequence_count_file_has_zero_size(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         open(task.get_pdb_seqres_txt(), 'a').close()
         self.assertEqual(task._get_sequence_count_message(),
                          '# sequence(s): 0')
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 10
0
 def test_run_where_pdbsequrl_is_not_set(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.run()
         self.assertEqual(
             task.get_error(), 'cannot download files cause '
             'pdbsequrl not set')
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 11
0
 def test_run_where_makeblastdb_is_not_set(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         params.pdbsequrl = 'pdbsequrl'
         task = MakeBlastDBTask(temp_dir, params)
         task.run()
         self.assertEqual(
             task.get_error(), 'cannot make blast database '
             'cause makeblastdb not set')
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 12
0
 def test_get_set_of_pbdid_from_pdb_seqres_txt_empty_file(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         open(task.get_pdb_seqres_txt(), 'a').close()
         self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True)
         pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
         self.assertEqual(len(pdbset), 0)
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 13
0
 def test_get_sequence_count_file_has_multiple_seqs(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         f = open(task.get_pdb_seqres_txt(), 'w')
         f.write('>hi\n>seq\n>are\n')
         f.flush()
         f.close()
         self.assertEqual(task._get_sequence_count_message(),
                          '# sequence(s): 3')
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 14
0
    def can_run(self):
        """Determines if task can actually run

           This method first verifies the `MakeBlastDBTask` and
           `DataImportTask` task have
           `D3RTask.COMPLETE_STATUS` for status.  The method then
           verifies a `BlastNFilterTask` does not already exist.
             If above is not true then self.set_error() is set
             with information about the issue
           :return: True if can run otherwise False
        """
        self._can_run = False
        self._error = None
        # check blast
        make_blastdb = MakeBlastDBTask(self._path, self._args)
        make_blastdb.update_status_from_filesystem()
        if make_blastdb.get_status() != D3RTask.COMPLETE_STATUS:
            logger.info('Cannot run ' + self.get_name() + ' task ' +
                        'because ' + make_blastdb.get_name() + ' task' +
                        'has a status of ' + make_blastdb.get_status())
            self.set_error(make_blastdb.get_name() + ' task has ' +
                           make_blastdb.get_status() + ' status')
            return False

        # check data import
        data_import = DataImportTask(self._path, self._args)
        data_import.update_status_from_filesystem()
        if data_import.get_status() != D3RTask.COMPLETE_STATUS:
            logger.info('Cannot run ' + self.get_name() + ' task ' +
                        'because ' + data_import.get_name() + ' task' +
                        'has a status of ' + data_import.get_status())
            self.set_error(data_import.get_name() + ' task has ' +
                           data_import.get_status() + ' status')
            return False

        # check blast is not complete and does not exist

        self.update_status_from_filesystem()
        if self.get_status() == D3RTask.COMPLETE_STATUS:
            logger.debug("No work needed for " + self.get_name() + " task")
            return False

        if self.get_status() != D3RTask.NOTFOUND_STATUS:
            logger.warning(self.get_name() + " task was already " +
                           "attempted, but there was a problem")
            self.set_error(self.get_dir_name() + ' already exists and ' +
                           'status is ' + self.get_status())
            return False
        self._can_run = True
        return True
Exemplo n.º 15
0
 def test_get_set_of_pbdid_from_pdb_seqres_txt_file_no_seqs(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         f = open(task.get_pdb_seqres_txt(), 'w')
         f.write('hi\nhow\nare\nyou')
         f.flush()
         f.close()
         self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True)
         pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
         self.assertEqual(len(pdbset), 0)
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 16
0
 def test_run_where_download_fails(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         params.pdbsequrl = 'file://doesnotexist'
         params.makeblastdb = 'makeblastdb'
         task = MakeBlastDBTask(temp_dir, params)
         task._retrysleep = 0
         task._maxretries = 1
         task.run()
         self.assertEqual(
             task.get_error(),
             'Unable to download file: ' + 'file://doesnotexist')
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 17
0
    def test_run_all_success(self):
        temp_dir = tempfile.mkdtemp()
        try:
            fakeftp = FtpFileTransfer(None)
            mftp = D3RParameters()

            fakeftp.set_connection(mftp)
            fakeftp.set_remote_dir('/foo2')
            mftp.get = Mock()

            params = D3RParameters()
            params.pdbfileurl = 'file://' + temp_dir
            params.compinchi = 'file://' + temp_dir

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            task = DataImportTask(temp_dir, params)
            task.set_file_transfer(fakeftp)
            task._retrysleep = 0
            open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close()
            open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close()
            open(os.path.join(temp_dir, task.COMPINCHI_ICH), 'a').close()

            task.run()
            self.assertEquals(task.get_error(), None)

            # check line count is 1 now which indicates
            # standard was added
            self.assertEqual(
                util.get_file_line_count(task.get_nonpolymer_tsv()), 1)
            self.assertEqual(util.get_file_line_count(task.get_sequence_tsv()),
                             1)
            self.assertEqual(
                util.get_file_line_count(task.get_oldsequence_tsv()), 1)
            self.assertEqual(
                util.get_file_line_count(task.get_crystalph_tsv()), 1)

            mftp.get.assert_called_with('/foo2/' +
                                        DataImportTask.PARTICIPANT_LIST_CSV,
                                        local=task.get_participant_list_csv())
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 18
0
    def test_can_run_does_not_exist_or_error(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = DataImportTask(temp_dir, params)

            # no make blast db
            self.assertEquals(task.can_run(), False)
            self.assertEquals(task.get_error(),
                              'makeblastdb task has notfound status')
            self.assertEquals(task._can_run, False)

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()

            # make blast db failed
            err_file = os.path.join(make_blast.get_dir(), D3RTask.ERROR_FILE)
            open(err_file, 'a').close()
            self.assertEquals(task.can_run(), False)
            self.assertEquals(task.get_error(),
                              'makeblastdb task has error status')
            self.assertEquals(task._can_run, False)

            os.remove(err_file)

            # make blast db success
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            self.assertEquals(task.can_run(), True)
            self.assertEquals(task.get_error(), None)
            self.assertEquals(task._can_run, True)

            task.create_dir()
            open(os.path.join(task.get_dir(), D3RTask.ERROR_FILE), 'a').close()
            self.assertEquals(task.can_run(), False)
            self.assertEquals(task._can_run, False)
            self.assertEquals(
                task.get_error(),
                task.get_dir_name() + ' already exists and ' + 'status is ' +
                D3RTask.ERROR_STATUS)

        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 19
0
    def test_get_set_of_pbdid_from_pdb_seqres_txt_with_seqs(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = MakeBlastDBTask(temp_dir, params)
            task.create_dir()
            f = open(task.get_pdb_seqres_txt(), 'w')
            f.write('>101m_A mol:protein length:154  MYOGLOBIN\n')
            f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRVK'
                    'HLKTEAEMKASEDLKKHG\n')
            f.write('>102l_A mol:protein length:165  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAAKSELDKA'
                    'IGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAAL'
                    'INMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRV'
                    'ITTFRTGTWDAYKNL\n')
            f.write('>102l_A mol:protein length:154  MYOGLOBIN\n')
            f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHL'
                    'KTEAEMKASEDLKKAGVTVLTALGAILKKKGHHEAELKPLAQSHATKHKI'
                    'PIKYLEFISEAIIHVLHSRHPGNFGADAQGAMNKALELFRKDIAAKYKELGYQG\n')
            f.write('>103l_A mol:protein length:167  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNSLDAAKSELD'
                    'KAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRA'
                    'ALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAK'
                    'RVITTFRTGTWDAYKNL\n')
            f.write('>10jj3m_A mol:protein length:154  MYOGLOBIN\n')
            f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRF\n')
            f.write('>104l_A mol:protein length:166  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSAAE\n')
            f.write('>104l_B mol:protein length:166  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSAKNL\n')

            f.flush()
            f.close()
            self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True)
            pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
            self.assertEqual(len(pdbset), 4)
            self.assertEqual('101M' in pdbset, True)
            self.assertEqual('102L' in pdbset, True)
            self.assertEqual('103L' in pdbset, True)
            self.assertEqual('104L' in pdbset, True)
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 20
0
    def test_run_all_nonpolymer_fail(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.pdbfileurl = 'file://' + temp_dir
            params.compinchi = 'file://' + temp_dir

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            task = DataImportTask(temp_dir, params)
            task._retrysleep = 0
            task.run()
            self.assertEquals(
                task.get_error(), 'Unable to download file ' + 'from ' +
                params.pdbfileurl + ' to ' + task.get_nonpolymer_tsv())
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 21
0
    def get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres(self):
        """Gets set of PDBIDs that are in both tsv and sequence file

           Examines `DataImportTask.CRYSTALPH_TSV` and
           `MakeBlastDBTask.PDB_SEQRES_TXT` and returns a set of PDBIDs
           that are in both files
           :returns: set of PDBIDs uppercase that are in both files above
        """
        make_blastdb = MakeBlastDBTask(self._path, self._args)

        if not os.path.isfile(make_blastdb.get_pdb_seqres_txt()):
            logger.warning('No ' + make_blastdb.get_pdb_seqres_txt() +
                           ' file found')
            return set()

        c_pdbid_set = self.get_set_of_pdbid_from_crystalph_tsv()

        if len(c_pdbid_set) == 0:
            logger.warning('No PDBIds found in ' + self.get_crystalph_tsv())
            return set()

        seq_pdbid_set = make_blastdb.get_set_of_pbdid_from_pdb_seqres_txt()

        if len(seq_pdbid_set) == 0:
            logger.warning('No PDBIds found in ' +
                           make_blastdb.get_pdb_seqres_txt())
            return set()

        common_pdbid = set()

        # iterate through tsv pdb ids and return any found in
        # sequence pdb id set
        for id in c_pdbid_set:
            if id in seq_pdbid_set:
                common_pdbid.add(id)

        logger.debug('Found ' + str(len(common_pdbid)) + ' PDBIDs in ' +
                     self.get_crystalph_tsv() + ' and ' +
                     make_blastdb.get_pdb_seqres_txt())

        return common_pdbid
Exemplo n.º 22
0
    def test_run_where_gunzip_fails(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            fakegz = os.path.join(temp_dir, 'fake.gz')

            f = open(fakegz, 'w')
            f.write('hello\n')
            f.flush()
            f.close()

            params.pdbsequrl = 'file://' + fakegz
            params.makeblastdb = 'makeblastdb'
            task = MakeBlastDBTask(temp_dir, params)
            task._retrysleep = 0
            task._maxretries = 1
            task.run()
            self.assertEqual(
                task.get_error(),
                'Unable to uncompress file: ' + task.get_pdb_seqres_txt())
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 23
0
    def test_get_uploadable_files(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = MakeBlastDBTask(temp_dir, params)
            task.create_dir()

            # test on empty dir
            self.assertEqual(task.get_uploadable_files(), [])

            # test with stderr/stdout files
            stdout = os.path.join(task.get_dir(), 'makeblastdb.stdout')
            open(stdout, 'a').close()
            stderr = os.path.join(task.get_dir(), 'makeblastdb.stderr')
            open(stderr, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 2)
            flist.index(stdout)
            flist.index(stderr)

            # test with pdb_seqres.txt.gz file
            open(task.get_pdb_seqres_txt_gz(), 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 3)
            flist.index(stdout)
            flist.index(stderr)
            flist.index(task.get_pdb_seqres_txt_gz())

            os.unlink(task.get_pdb_seqres_txt_gz())

            # test where pdb_seqres.txt is a dir unlikely but why not check
            os.makedirs(task.get_pdb_seqres_txt_gz())
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 2)
            flist.index(stdout)
            flist.index(stderr)
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 24
0
    def test_run_all_success_except_participant_download_fails(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.pdbfileurl = 'file://' + temp_dir
            params.compinchi = 'file://' + temp_dir

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            task = DataImportTask(temp_dir, params)
            task._retrysleep = 0
            open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close()
            open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close()
            open(os.path.join(temp_dir, task.COMPINCHI_ICH), 'a').close()

            task.run()
            self.assertEquals(task.get_error(), None)

            # check line count is 1 now which indicates
            # standard was added
            self.assertEqual(
                util.get_file_line_count(task.get_nonpolymer_tsv()), 1)
            self.assertEqual(util.get_file_line_count(task.get_sequence_tsv()),
                             1)
            self.assertEqual(
                util.get_file_line_count(task.get_oldsequence_tsv()), 1)
            self.assertEqual(
                util.get_file_line_count(task.get_crystalph_tsv()), 1)

            self.assertTrue(task.get_email_log().startswith(
                '\nWARNING: Unable to download'))
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 25
0
    def test_get_set_of_pbdid_from_pdb_seqres_txt_wrong_len_pdbids(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = MakeBlastDBTask(temp_dir, params)
            task.create_dir()
            f = open(task.get_pdb_seqres_txt(), 'w')
            f.write('>1m_A mol:protein length:154  MYOGLOBIN\n')
            f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRVK'
                    'HLKTEAEMKASEDLKKHG\n')
            f.write('>abcdel_A mol:protein length:165  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAAKSELDKA'
                    'IGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAAL'
                    'INMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRV'
                    'ITTFRTGTWDAYKNL\n')

            f.flush()
            f.close()
            self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True)
            pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
            self.assertEqual(len(pdbset), 0)
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 26
0
    def test_run_where_makeblastdb_fails(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            fakegz = os.path.join(temp_dir, 'fake.gz')

            f = gzip.open(fakegz, 'wb')
            f.write('hello\n')
            f.flush()
            f.close()

            params.pdbsequrl = 'file://' + fakegz
            params.makeblastdb = 'false'
            task = MakeBlastDBTask(temp_dir, params)
            task._retrysleep = 0
            task._maxretries = 1
            task.run()
            self.assertEqual(
                task.get_error(), 'Non zero exit code: 1 '
                'received. Standard out:'
                '  Standard error: ')
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 27
0
    def test_get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres_empty_seq(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = DataImportTask(temp_dir, params)
            task.create_dir()
            f = open(task.get_crystalph_tsv(), 'w')
            f.write('PDB_ID  _exptl_crystal_grow.pH\n')
            f.write('4X09\t6.5\n')
            f.write('4rfr\t8\n')
            f.write('4XET\t6.2\n')
            f.write('4XF1\t6.2\n')
            f.write('4XF3\t6.2\n')
            f.flush()
            f.close()

            makeblast = MakeBlastDBTask(temp_dir, params)
            makeblast.create_dir()
            open(makeblast.get_pdb_seqres_txt(), 'a').close()

            pdbset = task.get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres()
            self.assertEqual(len(pdbset), 0)
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 28
0
    def run(self):
        """Runs blastnfilter task after verifying dataimport was good

           Method requires can_run() to be called before hand with
           successful outcome
           Otherwise method invokes D3RTask.start then this method
           creates a directory and invokes blastnfilter script and
           postanalysis script.  Upon completion results are
           analyzed and success or error status is set
           appropriately and D3RTask.end is invoked
           """
        super(BlastNFilterTask, self).run()

        if self._can_run is False:
            logger.debug(self.get_dir_name() +
                         ' cannot run cause _can_run flag '
                         'is False')
            return

        data_import = DataImportTask(self._path, self._args)

        make_blastdb = MakeBlastDBTask(self._path, self._args)

        try:
            loglevel = self.get_args().loglevel
        except AttributeError:
            logger.debug('No log level set in arguments using WARNING')
            loglevel = 'WARNING'

        # verify sequence.tsv file exists on filesystem.
        # if not fall back to oldsequence.tsv file
        sequencetsv = data_import.get_sequence_tsv()
        if not os.path.isfile(sequencetsv):
            logger.warning(sequencetsv + ' file not found. falling '
                           'back to old file')
            self.append_to_email_log('\n ' + sequencetsv + ' file not found ' +
                                     'falling back to ' +
                                     data_import.get_oldsequence_tsv() + '\n')
            sequencetsv = data_import.get_oldsequence_tsv()

        cmd_to_run = (self.get_args().blastnfilter + ' --nonpolymertsv ' +
                      data_import.get_nonpolymer_tsv() + ' --sequencetsv ' +
                      sequencetsv + ' --pdbblastdb ' + make_blastdb.get_dir() +
                      ' --compinchi ' +
                      data_import.get_components_inchi_file() +
                      ' --crystalpH ' + data_import.get_crystalph_tsv() +
                      ' --pdbdb ' + self.get_args().pdbdb + ' --log ' +
                      loglevel + ' --outdir ' + self.get_dir())

        blastnfilter_name = os.path.basename(self.get_args().blastnfilter)

        self.run_external_command(
            blastnfilter_name,
            cmd_to_run,
            False,
        )

        self.set_status(D3RTask.COMPLETE_STATUS)

        cmd_to_run = (self.get_args().postanalysis + ' --compinchi ' +
                      data_import.get_components_inchi_file() + ' ' +
                      self.get_dir())

        postanalysis_name = os.path.basename(self.get_args().postanalysis)

        self.run_external_command(postanalysis_name, cmd_to_run, False)

        try:
            # examine output to get candidate hit count DR-12
            hit_stats = self._parse_blastnfilter_output_for_hit_stats()
            if hit_stats is not None:
                self.append_to_email_log(hit_stats)
        except Exception:
            logger.exception("Error caught exception")

        # assess the result
        self.end()
Exemplo n.º 29
0
    def test_can_run(self):
        tempDir = tempfile.mkdtemp()

        try:
            # try where makeblastdb is not complete
            params = D3RParameters()
            blastTask = BlastNFilterTask(tempDir, params)
            self.assertEqual(blastTask.can_run(), False)

            # try where makeblastdb failed
            blastDb = MakeBlastDBTask(tempDir, params)
            blastDb.create_dir()
            errorFile = os.path.join(blastDb.get_path(),
                                     blastDb.get_dir_name(),
                                     D3RTask.ERROR_FILE)
            open(errorFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(),
                             'makeblastdb task has error status')

            # try where data import is not complete
            completeFile = os.path.join(blastDb.get_path(),
                                        blastDb.get_dir_name(),
                                        D3RTask.COMPLETE_FILE)
            open(completeFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(),
                             'dataimport task has ' + 'notfound status')

            # try where data import failed
            dataImport = DataImportTask(tempDir, params)
            dataImport.create_dir()
            errorFile = os.path.join(dataImport.get_path(),
                                     dataImport.get_dir_name(),
                                     D3RTask.ERROR_FILE)
            open(errorFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(),
                             'dataimport task has error status')

            # try where blast can run
            os.remove(errorFile)
            completeFile = os.path.join(dataImport.get_dir(),
                                        D3RTask.COMPLETE_FILE)
            open(completeFile, 'a').close()
            self.assertEqual(blastTask.can_run(), True)
            self.assertEqual(blastTask.get_error(), None)

            # try where blast exists
            blastTask.create_dir()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(
                blastTask.get_error(),
                blastTask.get_dir_name() + ' already exists and' +
                ' status is unknown')

            # try where blast is complete
            completeFile = os.path.join(blastTask.get_path(),
                                        blastTask.get_dir_name(),
                                        D3RTask.COMPLETE_FILE)
            open(completeFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(), None)

        finally:
            shutil.rmtree(tempDir)
Exemplo n.º 30
0
    def test_run_with_blast_success_useoldseq_and_postanalysis_fail(self):
        temp_dir = tempfile.mkdtemp()

        try:
            params = D3RParameters()
            params.blastnfilter = '/bin/echo'
            params.postanalysis = os.path.join(temp_dir, 'foo.py')
            params.pdbdb = '/pdbdb'
            blasttask = BlastNFilterTask(temp_dir, params)
            blasttask._can_run = True

            txt_file = os.path.join(blasttask.get_dir(), 'summary.txt')

            txt_contents = ('INPUT SUMMARY\\n' + '  sequences:  177\\n' +
                            '  complexes:  149\\n')
            # create fake blastnfilter script that makes csv files
            f = open(params.postanalysis, 'w')
            f.write('#! /usr/bin/env python\n\n')
            f.write('f = open(\'' + txt_file + '\', \'w\')\n')
            f.write('f.write(\'' + txt_contents + '\\n\')\n')
            f.write('f.flush()\nf.close()\n')
            f.flush()
            f.close()
            os.chmod(params.postanalysis, stat.S_IRWXU)

            blasttask.run()
            self.assertEqual(blasttask.get_status(), D3RTask.COMPLETE_STATUS)
            self.assertEqual(blasttask.get_error(), None)
            complete_file = os.path.join(blasttask.get_dir(),
                                         D3RTask.COMPLETE_FILE)

            self.assertEqual(os.path.isfile(complete_file), True)

            std_err_file = os.path.join(blasttask.get_dir(), 'echo.stderr')

            self.assertEqual(os.path.isfile(std_err_file), True)

            std_out_file = os.path.join(blasttask.get_dir(), 'echo.stdout')

            dataimport = DataImportTask(temp_dir, params)
            makeblast = MakeBlastDBTask(temp_dir, params)

            f = open(std_out_file, 'r')
            echo_out = f.read().replace('\n', '')
            echo_out.index('--nonpolymertsv ' +
                           os.path.join(temp_dir, dataimport.get_dir_name(),
                                        DataImportTask.NONPOLYMER_TSV))
            echo_out.index(' --sequencetsv ' +
                           os.path.join(temp_dir, dataimport.get_dir_name(),
                                        DataImportTask.OLDSEQUENCE_TSV))
            echo_out.index(' --pdbblastdb ' +
                           os.path.join(temp_dir, makeblast.get_dir_name()))
            echo_out.index(' --compinchi ' +
                           os.path.join(temp_dir, dataimport.get_dir_name(),
                                        DataImportTask.COMPINCHI_ICH))
            echo_out.index(' --outdir ' +
                           os.path.join(temp_dir, blasttask.get_dir_name()))
            echo_out.index(' --crystalpH ' +
                           os.path.join(temp_dir, dataimport.get_dir_name(),
                                        DataImportTask.CRYSTALPH_TSV))
            echo_out.index(' --pdbdb /pdbdb ')
            f.close()

            self.assertEqual(os.path.isfile(std_out_file), True)
            self.assertEquals(blasttask.get_status(), D3RTask.COMPLETE_STATUS)
            self.assertEquals(
                os.path.exists(
                    os.path.join(blasttask.get_dir(), 'foo.py.stderr')), True)
            self.assertEquals(
                os.path.exists(
                    os.path.join(blasttask.get_dir(), 'foo.py.stdout')), True)
            res = blasttask.get_email_log().rstrip('\n')
            res.index('/bin/echo')
            res.index('# txt files found: 0')
            res.index('Output from summary.txt')
            res.index('  sequences:  177')
            res.index('  complexes:  149')
            res.index(dataimport.get_sequence_tsv() +
                      ' file not found falling back to ' +
                      dataimport.get_oldsequence_tsv())
        finally:
            shutil.rmtree(temp_dir)