Exemplo n.º 1
0
    def test_get_set_of_pbdid_from_pdb_seqres_txt_with_400k_file(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = MakeBlastDBTask(temp_dir, params)
            task.create_dir()
            f = open(task.get_pdb_seqres_txt(), 'w')
            perms = itertools.permutations(
                string.ascii_lowercase + '123456789', 4)
            counter = 0
            try:
                limit = 400000
                while counter < limit:
                    f.write('>' + ''.join(map(str, perms.next())) +
                            '_A mol:protein length:165  T4 LYSOZYME\n')
                    f.write('MVLSEGEWQLVLH\n')
                    counter += 1
            except StopIteration:
                pass
            f.flush()
            f.close()

            pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
            self.assertEqual(len(pdbset), counter)

        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 2
0
    def test_run_all_compinchi_fail(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.pdbfileurl = 'file://' + temp_dir
            params.compinchi = 'file://' + temp_dir

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            task = DataImportTask(temp_dir, params)
            task._retrysleep = 0
            open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close()
            open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close()
            task.run()
            self.assertEquals(
                task.get_error(), 'Unable to download file ' + 'from ' +
                params.compinchi + ' to ' + task.get_components_inchi_file())

        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 3
0
    def test_get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres_w_hits(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = DataImportTask(temp_dir, params)
            task.create_dir()
            f = open(task.get_crystalph_tsv(), 'w')
            f.write('PDB_ID  _exptl_crystal_grow.pH\n')
            f.write('4X09\t6.5\n')
            f.write('4rfr\t8\n')
            f.write('4XET\t6.2\n')
            f.write('4XF1\t6.2\n')
            f.write('4XF3\t6.2\n')
            f.flush()
            f.close()

            makeblast = MakeBlastDBTask(temp_dir, params)
            makeblast.create_dir()
            f = open(makeblast.get_pdb_seqres_txt(), 'w')
            f.write('>4rfr_A mol:protein length:154  MYOGLOBIN\n')
            f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRVK'
                    'HLKTEAEMKASEDLKKHG\n')
            f.write('>102l_A mol:protein length:165  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAAKSELDKA'
                    'IGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAAL'
                    'INMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRV'
                    'ITTFRTGTWDAYKNL\n')
            f.flush()
            f.close()

            pdbset = task.get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres()
            self.assertEqual(len(pdbset), 1)
            self.assertEqual('4RFR' in pdbset, True)
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 4
0
 def test_can_run_where_task_failed(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         open(os.path.join(task.get_dir(), 'error'), 'a').close()
         self.assertEqual(task.can_run(), False)
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 5
0
 def test_get_set_of_pbdid_from_pdb_seqres_txt_no_file(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
         self.assertEqual(len(pdbset), 0)
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 6
0
 def test_get_sequence_count_file_has_zero_size(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         open(task.get_pdb_seqres_txt(), 'a').close()
         self.assertEqual(task._get_sequence_count_message(),
                          '# sequence(s): 0')
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 7
0
 def test_get_set_of_pbdid_from_pdb_seqres_txt_empty_file(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         open(task.get_pdb_seqres_txt(), 'a').close()
         self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True)
         pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
         self.assertEqual(len(pdbset), 0)
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 8
0
 def test_get_sequence_count_file_has_multiple_seqs(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         f = open(task.get_pdb_seqres_txt(), 'w')
         f.write('>hi\n>seq\n>are\n')
         f.flush()
         f.close()
         self.assertEqual(task._get_sequence_count_message(),
                          '# sequence(s): 3')
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 9
0
 def test_get_set_of_pbdid_from_pdb_seqres_txt_file_no_seqs(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         task = MakeBlastDBTask(temp_dir, params)
         task.create_dir()
         f = open(task.get_pdb_seqres_txt(), 'w')
         f.write('hi\nhow\nare\nyou')
         f.flush()
         f.close()
         self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True)
         pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
         self.assertEqual(len(pdbset), 0)
     finally:
         shutil.rmtree(temp_dir)
Exemplo n.º 10
0
    def test_run_all_success(self):
        temp_dir = tempfile.mkdtemp()
        try:
            fakeftp = FtpFileTransfer(None)
            mftp = D3RParameters()

            fakeftp.set_connection(mftp)
            fakeftp.set_remote_dir('/foo2')
            mftp.get = Mock()

            params = D3RParameters()
            params.pdbfileurl = 'file://' + temp_dir
            params.compinchi = 'file://' + temp_dir

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            task = DataImportTask(temp_dir, params)
            task.set_file_transfer(fakeftp)
            task._retrysleep = 0
            open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close()
            open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close()
            open(os.path.join(temp_dir, task.COMPINCHI_ICH), 'a').close()

            task.run()
            self.assertEquals(task.get_error(), None)

            # check line count is 1 now which indicates
            # standard was added
            self.assertEqual(
                util.get_file_line_count(task.get_nonpolymer_tsv()), 1)
            self.assertEqual(util.get_file_line_count(task.get_sequence_tsv()),
                             1)
            self.assertEqual(
                util.get_file_line_count(task.get_oldsequence_tsv()), 1)
            self.assertEqual(
                util.get_file_line_count(task.get_crystalph_tsv()), 1)

            mftp.get.assert_called_with('/foo2/' +
                                        DataImportTask.PARTICIPANT_LIST_CSV,
                                        local=task.get_participant_list_csv())
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 11
0
    def test_can_run_does_not_exist_or_error(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = DataImportTask(temp_dir, params)

            # no make blast db
            self.assertEquals(task.can_run(), False)
            self.assertEquals(task.get_error(),
                              'makeblastdb task has notfound status')
            self.assertEquals(task._can_run, False)

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()

            # make blast db failed
            err_file = os.path.join(make_blast.get_dir(), D3RTask.ERROR_FILE)
            open(err_file, 'a').close()
            self.assertEquals(task.can_run(), False)
            self.assertEquals(task.get_error(),
                              'makeblastdb task has error status')
            self.assertEquals(task._can_run, False)

            os.remove(err_file)

            # make blast db success
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            self.assertEquals(task.can_run(), True)
            self.assertEquals(task.get_error(), None)
            self.assertEquals(task._can_run, True)

            task.create_dir()
            open(os.path.join(task.get_dir(), D3RTask.ERROR_FILE), 'a').close()
            self.assertEquals(task.can_run(), False)
            self.assertEquals(task._can_run, False)
            self.assertEquals(
                task.get_error(),
                task.get_dir_name() + ' already exists and ' + 'status is ' +
                D3RTask.ERROR_STATUS)

        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 12
0
    def test_get_set_of_pbdid_from_pdb_seqres_txt_with_seqs(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = MakeBlastDBTask(temp_dir, params)
            task.create_dir()
            f = open(task.get_pdb_seqres_txt(), 'w')
            f.write('>101m_A mol:protein length:154  MYOGLOBIN\n')
            f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRVK'
                    'HLKTEAEMKASEDLKKHG\n')
            f.write('>102l_A mol:protein length:165  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAAKSELDKA'
                    'IGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAAL'
                    'INMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRV'
                    'ITTFRTGTWDAYKNL\n')
            f.write('>102l_A mol:protein length:154  MYOGLOBIN\n')
            f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHL'
                    'KTEAEMKASEDLKKAGVTVLTALGAILKKKGHHEAELKPLAQSHATKHKI'
                    'PIKYLEFISEAIIHVLHSRHPGNFGADAQGAMNKALELFRKDIAAKYKELGYQG\n')
            f.write('>103l_A mol:protein length:167  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNSLDAAKSELD'
                    'KAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRA'
                    'ALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAK'
                    'RVITTFRTGTWDAYKNL\n')
            f.write('>10jj3m_A mol:protein length:154  MYOGLOBIN\n')
            f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRF\n')
            f.write('>104l_A mol:protein length:166  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSAAE\n')
            f.write('>104l_B mol:protein length:166  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSAKNL\n')

            f.flush()
            f.close()
            self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True)
            pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
            self.assertEqual(len(pdbset), 4)
            self.assertEqual('101M' in pdbset, True)
            self.assertEqual('102L' in pdbset, True)
            self.assertEqual('103L' in pdbset, True)
            self.assertEqual('104L' in pdbset, True)
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 13
0
    def test_run_all_nonpolymer_fail(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.pdbfileurl = 'file://' + temp_dir
            params.compinchi = 'file://' + temp_dir

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            task = DataImportTask(temp_dir, params)
            task._retrysleep = 0
            task.run()
            self.assertEquals(
                task.get_error(), 'Unable to download file ' + 'from ' +
                params.pdbfileurl + ' to ' + task.get_nonpolymer_tsv())
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 14
0
    def test_run_all_success_except_participant_download_fails(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.pdbfileurl = 'file://' + temp_dir
            params.compinchi = 'file://' + temp_dir

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            task = DataImportTask(temp_dir, params)
            task._retrysleep = 0
            open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close()
            open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close()
            open(os.path.join(temp_dir, task.COMPINCHI_ICH), 'a').close()

            task.run()
            self.assertEquals(task.get_error(), None)

            # check line count is 1 now which indicates
            # standard was added
            self.assertEqual(
                util.get_file_line_count(task.get_nonpolymer_tsv()), 1)
            self.assertEqual(util.get_file_line_count(task.get_sequence_tsv()),
                             1)
            self.assertEqual(
                util.get_file_line_count(task.get_oldsequence_tsv()), 1)
            self.assertEqual(
                util.get_file_line_count(task.get_crystalph_tsv()), 1)

            self.assertTrue(task.get_email_log().startswith(
                '\nWARNING: Unable to download'))
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 15
0
    def test_get_uploadable_files(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = MakeBlastDBTask(temp_dir, params)
            task.create_dir()

            # test on empty dir
            self.assertEqual(task.get_uploadable_files(), [])

            # test with stderr/stdout files
            stdout = os.path.join(task.get_dir(), 'makeblastdb.stdout')
            open(stdout, 'a').close()
            stderr = os.path.join(task.get_dir(), 'makeblastdb.stderr')
            open(stderr, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 2)
            flist.index(stdout)
            flist.index(stderr)

            # test with pdb_seqres.txt.gz file
            open(task.get_pdb_seqres_txt_gz(), 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 3)
            flist.index(stdout)
            flist.index(stderr)
            flist.index(task.get_pdb_seqres_txt_gz())

            os.unlink(task.get_pdb_seqres_txt_gz())

            # test where pdb_seqres.txt is a dir unlikely but why not check
            os.makedirs(task.get_pdb_seqres_txt_gz())
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 2)
            flist.index(stdout)
            flist.index(stderr)
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 16
0
    def test_get_set_of_pbdid_from_pdb_seqres_txt_wrong_len_pdbids(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = MakeBlastDBTask(temp_dir, params)
            task.create_dir()
            f = open(task.get_pdb_seqres_txt(), 'w')
            f.write('>1m_A mol:protein length:154  MYOGLOBIN\n')
            f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRVK'
                    'HLKTEAEMKASEDLKKHG\n')
            f.write('>abcdel_A mol:protein length:165  T4 LYSOZYME\n')
            f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAAKSELDKA'
                    'IGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAAL'
                    'INMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRV'
                    'ITTFRTGTWDAYKNL\n')

            f.flush()
            f.close()
            self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True)
            pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt()
            self.assertEqual(len(pdbset), 0)
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 17
0
    def test_get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres_empty_seq(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = DataImportTask(temp_dir, params)
            task.create_dir()
            f = open(task.get_crystalph_tsv(), 'w')
            f.write('PDB_ID  _exptl_crystal_grow.pH\n')
            f.write('4X09\t6.5\n')
            f.write('4rfr\t8\n')
            f.write('4XET\t6.2\n')
            f.write('4XF1\t6.2\n')
            f.write('4XF3\t6.2\n')
            f.flush()
            f.close()

            makeblast = MakeBlastDBTask(temp_dir, params)
            makeblast.create_dir()
            open(makeblast.get_pdb_seqres_txt(), 'a').close()

            pdbset = task.get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres()
            self.assertEqual(len(pdbset), 0)
        finally:
            shutil.rmtree(temp_dir)
Exemplo n.º 18
0
    def test_can_run(self):
        tempDir = tempfile.mkdtemp()

        try:
            # try where makeblastdb is not complete
            params = D3RParameters()
            blastTask = BlastNFilterTask(tempDir, params)
            self.assertEqual(blastTask.can_run(), False)

            # try where makeblastdb failed
            blastDb = MakeBlastDBTask(tempDir, params)
            blastDb.create_dir()
            errorFile = os.path.join(blastDb.get_path(),
                                     blastDb.get_dir_name(),
                                     D3RTask.ERROR_FILE)
            open(errorFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(),
                             'makeblastdb task has error status')

            # try where data import is not complete
            completeFile = os.path.join(blastDb.get_path(),
                                        blastDb.get_dir_name(),
                                        D3RTask.COMPLETE_FILE)
            open(completeFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(),
                             'dataimport task has ' + 'notfound status')

            # try where data import failed
            dataImport = DataImportTask(tempDir, params)
            dataImport.create_dir()
            errorFile = os.path.join(dataImport.get_path(),
                                     dataImport.get_dir_name(),
                                     D3RTask.ERROR_FILE)
            open(errorFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(),
                             'dataimport task has error status')

            # try where blast can run
            os.remove(errorFile)
            completeFile = os.path.join(dataImport.get_dir(),
                                        D3RTask.COMPLETE_FILE)
            open(completeFile, 'a').close()
            self.assertEqual(blastTask.can_run(), True)
            self.assertEqual(blastTask.get_error(), None)

            # try where blast exists
            blastTask.create_dir()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(
                blastTask.get_error(),
                blastTask.get_dir_name() + ' already exists and' +
                ' status is unknown')

            # try where blast is complete
            completeFile = os.path.join(blastTask.get_path(),
                                        blastTask.get_dir_name(),
                                        D3RTask.COMPLETE_FILE)
            open(completeFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(), None)

        finally:
            shutil.rmtree(tempDir)