Пример #1
0
	def addLinsJobs(PredictionSet, ProtocolID):
		raise colortext.Exception("Do you really want to run this?")
		colortext.printf("\nAdding Lin's mutations to %s prediction set." % PredictionSet, "lightgreen")
		KeepHETATMLines = False
		FilterTester.openDB()

		# Filter by the DummySource set of experiments
		er1 = ExperimentResultSet(ddGdb)
		ef1 = ExperimentFilter()
		ef1.setSource(ExperimentFilter.DummySource)
		er1.addFilter(ef1)

		# Filter by the particular PDB
		sr = StructureResultSet(ddGdb, 'WHERE PDB_ID="3K0NB_lin"')
		er1 = ExperimentResultSet.fromIDs(ddGdb, er1.getFilteredIDs()).filterBySet(sr)
		FilterTester.test(er1)

		experimentIDs = sorted(list(er1.getFilteredIDs()))
		colortext.message("\nThe number of unique experiments is %d.\n" % len(experimentIDs))
		ddG_connection = db.ddG()
		count = 0
		for experimentID in experimentIDs:
			ddG_connection.addPrediction(experimentID, PredictionSet, ProtocolID, KeepHETATMLines, StoreOutput = True)
			count += 1
			if count >= 10:
				colortext.write(".")
				colortext.flush()
				count = 0
		print("")
def check_JSON_dataset(dataset_ID):
    # I substitute PDB IDs so this function does a simple check to make sure that the mutations still look okay (this is a simple check - the mutations may not be correct)

    colortext.message('Reading PDB IDs...')
    PDB_ids = set([record['PDBFileID'] for record in JSON_datasets[dataset_ID]['data']])

    colortext.message('Loading %s PDBs...' % len(PDB_ids))
    for PDB_id in PDB_ids:
        if not(cached_pdbs.get(PDB_id)):
            print('Reading %s' % PDB_id)
            colortext.write('.', 'yellow')
            sys.stdout.flush()
            cached_pdbs[PDB_id] = PDB(ddGdb.execute_select('SELECT Content FROM PDBFile WHERE ID=%s', parameters=(PDB_id,))[0]['Content'])
    print('')

    count = 0
    for record in JSON_datasets[dataset_ID]['data']:
        pdb_id = record['PDBFileID']
        p = cached_pdbs[pdb_id]
        #colortext.printf('pdb_id', color='cyan')
        #pprint.pprint(record)
        #pprint.pprint(record['Mutations'])
        for m in record['Mutations']:
            chain_id = m['Chain']
            residue_id = m['ResidueID']
            residue_aa = m['WildTypeAA']
            padded_id = ChainResidueID2String(chain_id, residue_id)
            if p.atom_sequences[chain_id][padded_id].ResidueAA != residue_aa:
                print(pdb_id, chain_id, residue_id, residue_aa)
                print(p.atom_sequences[chain_id][padded_id].ResidueAA, residue_aa)
            assert(p.atom_sequences[chain_id][padded_id].ResidueAA == residue_aa)
        count += 1
    print('Successfully checked %d datapoints.' % count)
Пример #3
0
	def runLizsSet(PredictionSet, ProtocolID):
		raise colortext.Exception("Do you really want to run this?")
		colortext.printf("\nAdding Liz's data set to %s prediction set." % PredictionSet, "lightgreen")
		KeepHETATMLines = False
		FilterTester.openDB()

		# Filter by the DummySource set of experiments
		er1 = ExperimentResultSet(ddGdb)
		ef1 = ExperimentFilter()
		ef1.setSource(ExperimentFilter.LizKellogg)
		er1.addFilter(ef1)
		FilterTester.test(er1)

		experimentIDs = sorted(list(er1.getFilteredIDs()))
		colortext.message("\nThe number of unique experiments is %d.\n" % len(experimentIDs))
		ddG_connection = db.ddG()
		count = 0
		for experimentID in experimentIDs:
			ddG_connection.addPrediction(experimentID, PredictionSet, ProtocolID, KeepHETATMLines, StoreOutput = True)
			count += 1
			if count >= 10:
				colortext.write(".")
				colortext.flush()
				count = 0
		print("")
Пример #4
0
    def get_sqlalchemy_schema(self, restrict_to_tables = []):
        colortext.warning(' *** MySQL schema ***')
        schema = []
        #print(self.intermediate_schema)

        typedefs = {'sqlalchemy.types' : set(), 'sqlalchemy.dialects.mysql' : set()}

        for tbl in self.tables:
            if (not restrict_to_tables) or (tbl in restrict_to_tables):
                colortext.message(tbl)

                print(self.db_interface.execute("SHOW CREATE TABLE %s" % tbl))[0]['Create Table']
                print('')
                code = []
                code.append("class %s(DeclarativeBase):" % tbl)
                code.append("    __tablename__ = '%s'\n" % tbl)
                #print('\n'.join(code))

                intermediate_table = self.intermediate_schema[tbl]
                for field in intermediate_table:
                    s = field.to_sql_alchemy(typedefs)
                    code.append('    {0}'.format(s))
                    #print(s)
                code.append('\n')
                #print('')
                schema.extend(code)

        imports = []
        for module, types in sorted(typedefs.iteritems()):
            imports.append('from %s import %s' % (module, ', '.join(sorted(types))))
        schema = imports + [''] + schema

        colortext.warning('*** SQLAlchemy class definitions ***')
        print('\n'.join(schema))
def count_num_residues_in_active_jobs():
    '''I wrote this function to try to narrow down into which jobs ran the longest as I suspect that this is due to long PDB chains.'''
    ddGdb = ddgdbapi.ddGDatabase()
    active_jobs = ddGdb.execute_select("SELECT DISTINCT ExperimentID FROM Prediction WHERE Status='active'")
    colortext.message("\n%d jobs are active" % len(active_jobs))

    from klab.bio.rcsb import parseFASTAs

    chains_in_active_jobs = {}
    PDB_chain_lengths ={}
    for active_job in active_jobs:
        r = ddGdb.execute_select('SELECT PDBFileID, Chain FROM Experiment INNER JOIN ExperimentChain ON ExperimentID=Experiment.ID WHERE ExperimentID=%s', parameters=(active_job['ExperimentID']))
        assert(len(r) == 1)
        r = r[0]

        key = (r['PDBFileID'], r['Chain'])

        if PDB_chain_lengths.get(key) == None:
            fasta = ddGdb.execute_select("SELECT FASTA FROM PDBFile WHERE ID=%s", parameters = (r['PDBFileID'],))
            assert(len(fasta) == 1)
            fasta = fasta[0]['FASTA']
            f = parseFASTAs(fasta)
            PDB_chain_lengths[key] = len(f[r['PDBFileID']][r['Chain']])
        chain_length = PDB_chain_lengths[key]

        chains_in_active_jobs[key] = chains_in_active_jobs.get(key, [chain_length, 0])
        chains_in_active_jobs[key][1] += 1

    if chains_in_active_jobs:
        colortext.message("Chains in currently active jobs:\n")

        print("PDB\tChain\tChain SEQRES length\tJobs remaining")
        for k,v in sorted(chains_in_active_jobs.iteritems(), key=lambda x: x[1][0]):
            print("%s\t  %s\t%s\t%s" % (k[0], k[1], str(v[0]).center(19), str(v[1]).center(14)))
Пример #6
0
 def match(self, other):
     ''' This is a noisy terminal-printing function at present since there is no need to make it a proper API function.'''
     colortext.message("FASTA Match")
     for frompdbID, fromchains in sorted(self.iteritems()):
         matched_pdbs = {}
         matched_chains = {}
         for fromchain, fromsequence in fromchains.iteritems():
             for topdbID, tochains in other.iteritems():
                 for tochain, tosequence in tochains.iteritems():
                     if fromsequence == tosequence:
                         matched_pdbs[topdbID] = matched_pdbs.get(topdbID, set())
                         matched_pdbs[topdbID].add(fromchain)
                         matched_chains[fromchain] = matched_chains.get(fromchain, [])
                         matched_chains[fromchain].append((topdbID, tochain))
         foundmatches = []
         colortext.printf("  %s" % frompdbID, color="silver")
         for mpdbID, mchains in matched_pdbs.iteritems():
             if mchains == set(fromchains.keys()):
                 foundmatches.append(mpdbID)
                 colortext.printf("  PDB %s matched PDB %s on all chains" % (mpdbID, frompdbID), color="white")
         if foundmatches:
             for fromchain, fromsequence in fromchains.iteritems():
                 colortext.printf("    %s" % (fromchain), color = "silver")
                 colortext.printf("      %s" % (fromsequence), color = self.unique_sequences[fromsequence])
                 mstr = []
                 for mchain in matched_chains[fromchain]:
                     if mchain[0] in foundmatches:
                         mstr.append("%s chain %s" % (mchain[0], mchain[1]))
                 colortext.printf("	  Matches: %s" % ", ".join(mstr))
         else:
             colortext.error("    No matches found.")
Пример #7
0
def main():

    # Create up the database session
    dbi = DatabaseInterface(can_email = True)
    tsession = dbi.get_session()

    # Create a map from usernames to the database IDs (typically initials)
    user_map = {}
    for u in tsession.query(Users):
        user_map[u.lab_username] = u.ID

    # Read the import path from the database
    colortext.message('\nPrimers import script')
    colortext.pcyan('Database admin contacts: {0}'.format(', '.join(dbi.get_admin_contacts())))
    colortext.warning('Registered users: {0}\n'.format(', '.join(   ['{0} ({1})'.format(v, k) for k, v in sorted(user_map.iteritems(), key = lambda x: x[1])])))

    errors = []
    import_path = tsession.query(DBConstants).filter(DBConstants.Parameter == u'import_path').one().Value
    import_path_folders = sorted([d for d in os.listdir(import_path) if os.path.isdir(os.path.join(import_path,d))])
    for ipf in import_path_folders:
        if ipf in user_map:
            user_folder = os.path.join(import_path, ipf)
            user_id = user_map[ipf]
            primers_file = os.path.join(user_folder, 'primers.tsv')
            if os.path.exists(primers_file):
                case_errors = []
                try:
                    parse(dbi, primers_file, user_id, case_errors)
                    if case_errors:
                        errors.append("Errors occurred processing '{0}':\n\t{1}".format(primers_file, '\n\t'.join(case_errors)))
                        colortext.warning(errors[-1])
                except Exception, e:
                    errors.append("Errors occurred processing '{0}': {1}\n\t{2}\n{3}".format(primers_file, str(e), '\n\t'.join(case_errors), traceback.format_exc()))
                    colortext.warning('Error: {0}\n{1}'.format(str(e), traceback.format_exc()))
Пример #8
0
	def showAllEligibleProTherm(PredictionSet, ProtocolID, KeepHETATMLines):
		#inserter = JobInserter()
		colortext.printf("\nAdding ProTherm mutations to %s prediction set." % PredictionSet, "lightgreen")
		#ddGdb = dbi.ddGDatabase()
		
		MAX_RESOLUTION = 2.1
		MAX_NUMRES_PROTHERM = 350
		MAX_STANDARD_DEVIATION = 1.0

		FilterTester.openDB()
		
		if False:
			t1 = time.time()
			er1 = ExperimentResultSet(ddGdb)
			er1.addFilter(ExperimentFilter.OnSource(ExperimentFilter.ProTherm))
			er1.addFilter(ExperimentFilter.NumberOfMutations(1, 1))
			er1.addFilter(ExperimentFilter.NumberOfChains(1, 1))
			er1.addFilter(ExperimentFilter.StandardDeviation(None, MAX_STANDARD_DEVIATION))
			er1.addFilter(StructureFilter.Resolution(None, MAX_RESOLUTION))
			er1.addFilter(StructureFilter.Techniques(StructureFilter.XRay))
			FilterTester.test(er1)
			t2 = time.time()
			print(t2 - t1)
		
		# This method usually takes around 65% of the time as the method above 
		t1 = time.time()
		ef1 = ExperimentFilter()
		ef1.setSource(ExperimentFilter.ProTherm)
		er1 = ExperimentResultSet(ddGdb)
		er1.addFilter(ExperimentFilter.OnSource(ExperimentFilter.ProTherm))
		FilterTester.test(er1)
		ef1.setNumberOfMutations(1, 1)
		ef1.setNumberOfChains(1, 1)
		ef1.setStandardDeviation(None, MAX_STANDARD_DEVIATION)
		sf1 = StructureFilter()
		sf1.setResolution(None, MAX_RESOLUTION)
		sf1.setTechniques(StructureFilter.XRay)
		er1 = ExperimentResultSet(ddGdb)
		er1.addFilter(ef1)
		er1.addFilter(sf1)
		FilterTester.test(er1)
		t2 = time.time()
		print(t2 - t1)
		
		experimentIDs = sorted(list(er1.getFilteredIDs()))
		colortext.message("\nThe number of unique ProTherm experiments with:\n\t- one mutation;\n\t- structures solved by X-ray diffraction and with <= %d residues;\n\t- a maximum standard deviation in experimental results of <= %0.2f;\n\t- and a resolution of <= %0.2f Angstroms.\nis %d.\n" % (MAX_NUMRES_PROTHERM, MAX_STANDARD_DEVIATION, MAX_RESOLUTION, len(experimentIDs)))
		ddG_connection = db.ddG()
		count = 0
		sys.exit(0)
		print("")
		for experimentID in experimentIDs:
			ddG_connection.addPrediction(experimentID, PredictionSet, ProtocolID, KeepHETATMLines, StoreOutput = True)
			count += 1
			if count >= 10:
				colortext.write(".")
				colortext.flush()
				count = 0
		print("")
Пример #9
0
 def updateBirthdays(self, bdays):
     raise Exception('update')
     eventstbl = self.getEventsTable("main")
     for dt, details in sorted(bdays.iteritems()):
         bdaykey = datetime(dt.year, dt.month, dt.day)
         if eventstbl.get((bdaykey, details["title"])):
             if str(eventstbl[(bdaykey, details["title"])]["title"]) == str(details["title"]):
                 continue
         colortext.message("adding " + details["title"])
         self.addBirthday(dt, details["title"], details["location"])
Пример #10
0
    def plot(self, table_name, RFunction, output_filename = None, filetype = "pdf"):
        '''Results is expect to be a list of dicts each of which has the keys ExperimentID and ddG.'''
        if (not self.analysis_tables) or (not table_name):
            raise Exception("There are no analysis tables to plot.")
        if not table_name in self.analysis_tables.keys():
            raise Exception("The analysis table '%s' does not exist." % table_name)

        R_return_values = {}
        gplot = None
        analysis_table = self.analysis_tables[table_name]
        if self.quiet_level >= 3:
            print(table_name)
            print(RFunction)
        if len(analysis_table.points) == 1:
            raise Exception("The analysis table %s set only has one data point. At least two points are required." % table_name)
        else:
            inputfname = self.CreateCSVFile(table_name)
            if self.quiet_level >= 3:
                print(inputfname)
            try:
                if self.quiet_level >= 2:
                    colortext.printf("Running %s." % RFunction)
                    if output_filename:
                        colortext.printf("Saving graph as %s with filename %s." % (filetype, output_filename))

                output_fname = output_filename
                if not output_fname:
                    output_fname = rosettahelper.writeTempFile(".", "")

                R_output = RFunction(inputfname, output_fname, filetype)
                R_return_values = RUtilities.parse_R_output(R_output)

                colortext.message(table_name)
                print("  %s" % str(RFunction))
                for k, v in sorted(R_return_values.iteritems()):
                    print("  %s: %s" % (str(k), str(v)))

                if not output_filename:
                    contents = rosettahelper.readBinaryFile(output_fname)
                    delete_file(output_fname)
                    description = None
                    for file_suffix, details in RFunctions.iteritems():
                        if details[1] == RFunction:
                            description = details[0]
                    assert(description)
                    gplot = AnalysisObject(table_name, description, filetype, contents)
                else:
                    gplot = output_filename

            except Exception, e:
                import traceback
                colortext.error(traceback.format_exc())
                delete_file(inputfname)
                raise Exception(e)
            delete_file(inputfname)
Пример #11
0
def fix_1AYE_InputFiles(prediction_set):
    '''This is a once-off function which should only be run once per prediction set as each run changes the mutfile and this change should only occur once.'''
    import pickle
    ddGdb = ddgdbapi.ddGDatabase()

    BadPredictions = sorted(set([(r['PredictionID'], r['Status']) for r in ddGdb.execute_select('''
    SELECT Prediction.ID AS PredictionID, Status FROM Prediction INNER JOIN UserDataSetExperiment ON UserDataSetExperiment.ID=Prediction.UserDataSetExperimentID WHERE PredictionSet=%s AND PDBFileID='1AYE'
    ''', parameters=(prediction_set,))]))
    BadPredictionIDs = sorted(set([r[0] for r in BadPredictions]))
    print(BadPredictions)
    num_active = len([r for r in BadPredictions if r[1] == 'active'])
    num_queued = len([r for r in BadPredictions if r[1] == 'queued'])
    statuses = sorted(set([r[1] for r in BadPredictions]))
    if ('active' in statuses) or ('queued' in statuses):
        colortext.error("Cannot proceed - there are %d active jobs and %d queued in the list that need to be fixed up. Stop the DDG scheduler, remove the queued constraint, and rerun this function. " % (num_active, num_queued))
        if num_active:
            print("%d active jobs: %s" % (num_active, ", ".join([str(r[0]) for r in BadPredictions if r[1] == 'active'])))
        if num_queued:
            print("%d queued jobs: %s" % (num_queued, ", ".join([str(r[0]) for r in BadPredictions if r[1] == 'queued'])))
        return

    for PredictionID in BadPredictionIDs:
        r = ddGdb.execute_select("SELECT InputFiles FROM Prediction WHERE ID=%s", parameters=(PredictionID,))
        assert(len(r) == 1)
        r = r[0]

        InputFiles = pickle.loads(r['InputFiles'])
        assert(InputFiles.keys() == ['MUTFILE'])
        mutfile = InputFiles['MUTFILE']

        colortext.message("\n%d" % PredictionID)

        colortext.warning('original')
        print(mutfile)

        lines = mutfile.split("\n")
        assert(lines[0].startswith('total'))
        num_muts = int(lines[0][5:])
        assert(lines[1] == str(num_muts))
        for x in range(2, num_muts + 2):
            mutline = lines[x]
            tokens = mutline.split()
            tokens[1] = str(int(tokens[1]) - 1)
            lines[x] = " ".join(tokens)

        new_mutfile = "\n".join(lines)
        colortext.warning('fixed')
        print(new_mutfile)

        p = pickle.dumps({'MUTFILE' : new_mutfile})
Пример #12
0
def test_sequences(b, sequences):
    failed_cases = []
    c = 0
    for sequence in sequences:
        try:
            c += 1
            colortext.message('\n{0}/{1}: {2}'.format(c, len(sequences), sequence))
            hits = b.by_sequence(sequence)
            if hits:
                colortext.warning('{0} hits: {1}'.format(len(hits), ','.join(hits)))
            else:
                colortext.warning('No hits')
        except Exception, e:
            colortext.error('FAILED')
            failed_cases.append((sequence, str(e), traceback.format_exc()))
Пример #13
0
    def updateEvents(self, calendar_id, newEvents):
        currentEvents = self.getEventsTable(calendar_id)

        #colortext.message(newEvents)
        #colortext.warning(currentEvents)

        # Events to remove
        toRemove = []
        for startdateTitle, event in sorted(currentEvents.iteritems()):
            if event["title"].find("birthday") != -1:
                # Don't remove birthdays
                continue
            if newEvents.get(startdateTitle):
                newEvent = newEvents[startdateTitle]
                if newEvent["enddate"] == event["enddate"]:
                    if event["location"].startswith(newEvent["location"]):
                        if str(newEvent["title"]) == str(event["title"]):
                            # Don't remove events which are in both newEvents and the calendar
                            continue

            # Remove events which are on the calendar but not in newEvents
            toRemove.append(startdateTitle)

        # Events to add
        toAdd = []
        for startdateTitle, event in sorted(newEvents.iteritems()):
            if currentEvents.get(startdateTitle):
                currentEvent = currentEvents[startdateTitle]
                if currentEvent["enddate"] == event["enddate"]:
                    if currentEvent["location"].startswith(event["location"]):
                        if str(currentEvent["title"]) == str(event["title"]):
                            # Don't add events which are in both newEvents and the calendar
                            continue
            # Add events which are in newEvents but not on the calendar
            toAdd.append(startdateTitle)

        if toRemove:
            colortext.error("Removing these %d events:" % len(toRemove))
            for dtTitle in toRemove:
                colortext.warning(dtTitle)
                self.removeEvent(calendar_id, currentEvents[dtTitle]["event"].id)

        if toAdd:
            colortext.message("Adding these %d events:" % len(toAdd))
            for dtTitle in toAdd:
                newEvent = newEvents[dtTitle]
                #print(dtTitle, newEvent)
                self.addNewEvent(calendar_id, dtTitle[0], newEvent["enddate"], newEvent["location"], newEvent["title"])
Пример #14
0
def test_pdb_files(b, pdb_ids):

    failed_cases = []
    c = 0
    for pdb_id in pdb_ids:
        try:
            c += 1
            colortext.message('\n{0}/{1}: {2}'.format(c, len(pdb_ids), pdb_id))
            hits = b.by_pdb(pdb_id)
            if hits:
                colortext.warning('{0} hits: {1}'.format(len(hits), ','.join(hits)))
            else:
                colortext.warning('No hits')
        except Exception, e:
            colortext.error('FAILED')
            failed_cases.append((pdb_id, str(e), traceback.format_exc()))
Пример #15
0
    def CreateAnalysisTables(self):
        ddGdb = self.ddGdb
        PredictionSet = self.PredictionSet
        predictions = PredictionScores(ddGdb, PredictionSet, self.ddG_score_type, score_cap = self.score_cap)
        predicted_scores = predictions.Predictions

        s = "Analyzing %d predictions in PredictionSet '%s' for UserDataSet '%s'. " % (predictions.NumberOfPredictions, predictions.PredictionSet.replace("_", "\_"), predictions.UserDataSetName)
        if self.score_cap:
            s += "Running analysis over the following analysis sets: '%s' with predicted scores capped at +-%0.2f." % (join(predictions.AnalysisSets, "', '"), self.score_cap)
        else:
            s += "Running analysis over the following analysis sets: '%s'." % (join(predictions.AnalysisSets, "', '"))
        self.description.append(("black", s))
        if self.quiet_level >= 1:
            colortext.message("Analyzing %d predictions in PredictionSet '%s' for UserDataSet '%s'." % (predictions.NumberOfPredictions, predictions.PredictionSet, predictions.UserDataSetName))
            colortext.message("Running analysis over the following analysis sets: '%s'." % (join(predictions.AnalysisSets, "', '")))

        analysis_tables = {}
        # Analyze data for
        for AnalysisSet in predictions.AnalysisSets:
            analysis_table = AnalysisTable()

            experiments = UserDataSetExperimentalScores(ddGdb, predictions.UserDataSetID, AnalysisSet)

            count = 0
            numMissing = 0
            for section, sectiondata in sorted(experiments.iteritems()):
                for recordnumber, record_data in sorted(sectiondata.iteritems()):
                    count += 1
                    PDB_ID = record_data["PDB_ID"]
                    ExperimentID = record_data["ExperimentID"]
                    ExperimentalDDG = record_data["ExperimentalDDG"]
                    if predicted_scores.get(ExperimentID) and predicted_scores[ExperimentID].get(PDB_ID):
                        PredictedDDG = predicted_scores[ExperimentID][PDB_ID]["PredictedDDG"]
                        analysis_table.add(AnalysisPoint(ExperimentalDDG, PredictedDDG, ExperimentID = ExperimentID, PDB_ID = PDB_ID, section = section, recordnumber = recordnumber))
                    else:
                        numMissing += 1
            if numMissing > 0 and self.quiet_level >= 1:
                self.description.append(("Bittersweet", "Missing %d predictions out of %d records for analysis set %s." % (numMissing, count, AnalysisSet)))
                colortext.warning("Missing %d predictions out of %d records for analysis set %s." % (numMissing, count, AnalysisSet))
            analysis_tables[AnalysisSet] = analysis_table

        self.analysis_tables = analysis_tables
Пример #16
0
def print_existing_experimental_data():
    # These PDB files existed in the database before the import so I am interested to see whether any of the experimental
    # data matches the requested predictions
    print('')
    ppi_api = get_ppi_api()
    for pdb_id in ['1A2K', '1K5D', '1I2M']:
        colortext.message(pdb_id)
        complex_ids = ppi_api.search_complexes_by_pdb_id(pdb_id)
        if complex_ids:
            assert(len(complex_ids) == 1)
            complex_id = complex_ids[0]
            colortext.warning('Complex #{0}'.format(complex_id))
            pprint.pprint(ppi_api.get_complex_details(complex_id))

        mutation_records = mutations_dataframe[mutations_dataframe['pdb'].str.contains(pdb_id)]# mutations_dataframe.loc[mutations_dataframe['pdb'][0:4] == pdb_id]
        with pandas.option_context('display.max_rows', None, 'display.max_columns', None):
            print mutation_records

    # There is no experimental binding affinity data at present
    assert(not(ppi_api.DDG_db.execute_select('SELECT * FROM PPMutagenesisPDBMutation WHERE PPComplexID IN (202, 119, 176) ORDER BY PPComplexID, Chain, ResidueID, MutantAA')))
Пример #17
0
    def remove_all_cancelled_events(self, calendar_ids = []):

        for calendar_id in calendar_ids or self.calendar_ids:
            colortext.message('Removing cancelled events in %s' % calendar_id)
            events = self.service.events().list(calendarId = self.configured_calendar_ids[calendar_id]).execute()
            print(len(events['items']))

            for event in events['items']:
                dt = None
                nb = DeepNonStrictNestedBunch(event)
                if nb.status == 'cancelled':
                    if nb.recurringEventId:
                        colortext.warning(nb.recurringEventId)
                        # Retrieve all occurrences of the recurring event within the timeframe
                        start_time = datetime(year=2010, month=1, day=1, tzinfo=self.timezone).isoformat()
                        end_time = datetime(year=2015, month=1, day=1, tzinfo=self.timezone).isoformat()
                        for e in self.get_recurring_events(calendar_id, nb.id, start_time, end_time, maxResults = 10):
                            print(e)
                    else:
                        colortext.warning(nb)
Пример #18
0
    def add_bidet(self):
        raise Exception('update')
        main_calendar = GoogleCalendar.from_file('/admin/calendars.json', ['main'])
        notices_calendar = GoogleCalendar.from_file('/admin/calendars.json', ['notices'])
        timezone = main_calendar.timezone
        event_ids = set()
        seen_notices = set()
        for year in range(2014, 2017):
        #for year in range(2014, 2015):
            colortext.message('\n\nTagging events in %d:\n' % year)
            extra_days = 0
            if year % 4 == 0:
                extra_days = 1
            start_time = datetime(year=year, month=1, day=1, hour=0, minute=0, second=0, tzinfo=timezone)
            end_time = start_time + timedelta(days = 730 + extra_days, seconds = -1)
            start_time, end_time = start_time.isoformat(), end_time.isoformat()

            #main_meetings = main_calendar.get_events(start_time, end_time, ignore_cancelled = True, get_recurring_events_as_instances = False)
            #for m in main_meetings:
            #    if m.extendedProperties.shared:
            #        event_type = m.extendedProperties.shared['event_type']
            #        if event_type == 'Birthday'

            notices = notices_calendar.get_events(start_time, end_time, ignore_cancelled = True, get_recurring_events_as_instances = False)
            for n in notices:
                if n.id in seen_notices:
                    continue
                seen_notices.add(n.id)
                if n.extendedProperties.shared and n.extendedProperties.shared.event_type:
                    event_type = n.extendedProperties.shared['event_type']
                    if event_type == 'Birthday':
                        print(n.summary, n.id)
                        print(n.start)
                        event_body = main_calendar.service.events().get(calendarId = main_calendar.configured_calendar_ids["notices"], eventId=n.id).execute()
                        event_body['gadget'] = {
                            'display' : 'icon',
                            'iconLink' : 'https://guybrush.ucsf.edu/images/cake.png',
                            'title' : n.summary,
                            #'type' : 'application/x-google-gadgets+xml',
                        }
                        created_event = main_calendar.service.events().update(calendarId = main_calendar.configured_calendar_ids["notices"], eventId = n.id, body = event_body).execute()
Пример #19
0
    def addNewEvent(self, calendar_id, startdate, enddate, location, title):
        colortext.message("\nAdding %s on %s at %s" % (title, startdate, location))

        #start_time = startdate.strftime('%Y-%m-%dT%H:%M:%S').isoformat()
        #end_time =	 enddate.strftime('%Y-%m-%dT%H:%M:%S').isoformat()
        start_time = startdate.isoformat()
        end_time =	 enddate.isoformat()

        loc = location
        if loc.startswith("Tahoe"):
            loc = "%s, 10 minutes outside Truckee, CA @ 39.328455,-120.184078" % loc
        else:
            if location.startswith("BH "):
                loc = "%s, Byers Hall" % loc
            loc = "%s, removeEvent/Mission Bay, San Francisco, CA @ 37.767952,-122.392214" % loc

        for i in range(3):
            try:
                self.service.events().insert(
                    calendarId = self.configured_calendar_ids[calendar_id],
                    body = {
                        "start" : {
                            "timeZone" : self.timezone_string,
                            "dateTime" : start_time,
                        },
                        "end" : {
                            "timeZone" : self.timezone_string,
                            "dateTime" : end_time,
                        },
                        "location" : loc,
                        "summary" : title,
                        "description" : title
                    }).execute()
                break
            except Exception, e:
                colortext.error("An error occurred:")
                colortext.error(traceback.format_exc())
                colortext.error(e)
                colortext.error("Trying again.")
                time.sleep(2)
Пример #20
0
def test_pdbml_speed():

    test_cases = [
        '1WSY',
        '1YGV',
        '487D',
        '1HIO',
        '1H38',
        '3ZKB',
    ]
    for test_case in test_cases:
        print("\n")

        colortext.message("Creating PDBML object for %s" % test_case)
        #PDBML.retrieve(test_case, cache_dir = cache_dir)

        print("")
        colortext.printf("Using the old minidom class", color = 'cyan')
        t1 = time.clock()
        p_minidom = PDBML_slow.retrieve(test_case, cache_dir = cache_dir)
        t2 = time.clock()
        colortext.message("Done in %0.2fs!" % (t2 - t1))

        print("")
        colortext.printf("Using the new sax class", color = 'cyan')
        t1 = time.clock()
        p_sax = PDBML.retrieve(test_case, cache_dir = cache_dir)
        t2 = time.clock()
        colortext.message("Done in %0.2fs!" % (t2 - t1))

        colortext.write("\nEquality test: ", color = 'cyan')
        try:
            assert(p_minidom.atom_to_seqres_sequence_maps.keys() == p_sax.atom_to_seqres_sequence_maps.keys())
            for c, s_1 in p_minidom.atom_to_seqres_sequence_maps.iteritems():
                s_2 = p_sax.atom_to_seqres_sequence_maps[c]
                assert(str(s_1) == str(s_2))
            colortext.message("passed\n")
        except:
            colortext.error("failed\n")
def determine_structure_scores(DDG_api, skip_if_we_have_pairs = 50):
    pp = pprint.PrettyPrinter(indent=4)

    ddGdb = DDG_api.ddGDB
    ddGdb_utf = ddgdbapi.ddGDatabase(use_utf = True)
    # Get the list of completed prediction set
    completed_prediction_sets = get_completed_prediction_sets(DDG_api)
    print(completed_prediction_sets)

    # Create the mapping from the old score types to the ScoreMethod record IDs
    ScoreMethodMap = {}
    results = ddGdb_utf.execute('SELECT * FROM ScoreMethod')
    for r in results:
        if r['MethodName'] == 'Global' and r['MethodType'] == 'Protocol 16':
            ScoreMethodMap[("kellogg", "total")] = r['ID']
        if r['Authors'] == 'Noah Ollikainen':
            if r['MethodName'] == 'Local' and r['MethodType'] == 'Position' and r['Parameters'] == u'8Å radius':
                ScoreMethodMap[("noah_8,0A", "positional")] = r['ID']
            if r['MethodName'] == 'Local' and r['MethodType'] == 'Position (2-body)' and r['Parameters'] == u'8Å radius':
                ScoreMethodMap[("noah_8,0A", "positional_twoscore")] = r['ID']
            if r['MethodName'] == 'Global' and r['MethodType'] == 'By residue' and r['Parameters'] == u'8Å radius':
                ScoreMethodMap[("noah_8,0A", "total")] = r['ID']

    # For each completed prediction set, determine the structure scores
    for prediction_set in completed_prediction_sets:
        #if prediction_set not in ['Ubiquitin scan: UQ_con_yeast p16']:
        #    continue

        predictions = ddGdb.execute('SELECT ID, ddG, Scores, status, ScoreVersion FROM Prediction WHERE PredictionSet=%s ORDER BY ID', parameters=(prediction_set,))
        num_predictions = len(predictions)

        # Pass #1: Iterate over all Predictions and make sure that they gave completed and contain all the scores we expect
        colortext.message('Prediction set: %s' % prediction_set)
        colortext.warning('Checking that all data exists...')
        for prediction in predictions:
            #assert(prediction['status'] == 'done')
            PredictionID = prediction['ID']
            if PredictionID != 72856:
                continue
            global_scores = pickle.loads(prediction['ddG'])
            assert(global_scores)
            assert(prediction['ScoreVersion'] == 0.23)
            if not prediction['Scores']:
                raise Exception("This prediction needs to be scored with Noah's method.")

            gs2 = json.loads(prediction['Scores'])
            if True not in set([k.find('noah') != -1 for k in gs2['data'].keys()]):
                raise Exception("This prediction needs to be scored with Noah's method.")
            assert (gs2['data']['kellogg'] == global_scores['data']['kellogg'])

        # Pass #2: Iterate over all completed Predictions with null StructureScores.
        # For each Prediction, determine and store the structure scores
        count = 0
        for prediction in predictions:

            count += 1
            PredictionID = prediction['ID']
            colortext.message('%s: %d of %d (Prediction #%d)' % (prediction_set, count, num_predictions, PredictionID))

            #if PredictionID != 72856:
            #if PredictionID < 73045: continue
            if prediction['status'] == 'failed':
                colortext.error('Skipping failed prediction %d.' % PredictionID)
                continue
            if prediction['status'] == 'queued':
                colortext.warning('Skipping queued prediction %d.' % PredictionID)
                continue
            if prediction['status'] == 'postponed':
                colortext.printf('Skipping postponed prediction %d.' % PredictionID, 'cyan')
                continue

            # Store the ensemble scores
            try:
                global_scores = json.loads(prediction['Scores'])['data']
            except:
                raise colortext.Exception("Failed reading the Scores field's JSON object. The Prediction Status is %(status)s. The Scores field is: '%(Scores)s'." % prediction)
            for score_type, inner_data in global_scores.iteritems():
                for inner_score_type, data in inner_data.iteritems():
                    components = {}
                    if score_type == 'kellogg' and inner_score_type == 'total':
                        components = data['components']
                        ddG = data['ddG']

                    elif score_type == 'noah_8,0A' and inner_score_type == 'positional':
                        ddG = data['ddG']
                    elif score_type == 'noah_8,0A' and inner_score_type == 'positional_twoscore':
                        ddG = data['ddG']
                    elif score_type == 'noah_8,0A' and inner_score_type == 'total':
                        ddG = data['ddG']
                    else:
                        continue
                        raise Exception('Unhandled score types: "%s", "%s".' % (score_type, inner_score_type))

                    ScoreMethodID = ScoreMethodMap[(score_type, inner_score_type)]
                    new_record = dict(
                        PredictionID = PredictionID,
                        ScoreMethodID = ScoreMethodID,
                        ScoreType = 'DDG',
                        StructureID = -1, # This score is for the Prediction rather than a structure
                        DDG = ddG,
                    )
                    assert(not(set(components.keys()).intersection(set(new_record.keys()))))
                    new_record.update(components)
                    ddGdb.insertDictIfNew('PredictionStructureScore', new_record, ['PredictionID', 'ScoreMethodID', 'ScoreType', 'StructureID'])

            if skip_if_we_have_pairs != None:
                # Skip this case if we have a certain number of existing records (much quicker since we do not have to extract the binary)
                num_wt = ddGdb.execute_select("SELECT COUNT(ID) AS NumRecords FROM PredictionStructureScore WHERE PredictionID=%s AND ScoreType='WildType'", parameters=(PredictionID,))[0]['NumRecords']
                num_mut = ddGdb.execute_select("SELECT COUNT(ID) AS NumRecords FROM PredictionStructureScore WHERE PredictionID=%s AND ScoreType='Mutant'", parameters=(PredictionID,))[0]['NumRecords']
                print(num_wt, num_mut)
                if num_wt == num_mut and num_mut == skip_if_we_have_pairs:
                    continue

            # Store the ddg_monomer scores for each structure
            grouped_scores = DDG_api.get_ddg_monomer_scores_per_structure(PredictionID)
            for structure_id, wt_scores in sorted(grouped_scores['WildType'].iteritems()):
                new_record = dict(
                    PredictionID = PredictionID,
                    ScoreMethodID = ScoreMethodMap[("kellogg", "total")],
                    ScoreType = 'WildType',
                    StructureID = structure_id,
                    DDG = None,
                )
                new_record.update(wt_scores)
                ddGdb.insertDictIfNew('PredictionStructureScore', new_record, ['PredictionID', 'ScoreMethodID', 'ScoreType', 'StructureID'])
            for structure_id, wt_scores in sorted(grouped_scores['Mutant'].iteritems()):
                new_record = dict(
                    PredictionID = PredictionID,
                    ScoreMethodID = ScoreMethodMap[("kellogg", "total")],
                    ScoreType = 'Mutant',
                    StructureID = structure_id,
                    DDG = None,
                )
                new_record.update(wt_scores)
                ddGdb.insertDictIfNew('PredictionStructureScore', new_record, ['PredictionID', 'ScoreMethodID', 'ScoreType', 'StructureID'])

            # Test to make sure that we can pick a best pair of structures (for generating a PyMOL session)
            assert(DDG_api.determine_best_pair(PredictionID) != None)
Пример #22
0
 def _create_input_files(self):
     colortext.message('self.outdir: ' + self.outdir)
     write_file(self._filepath('scaffold.pdb'), self.Scaffold.pdb_contents)
     write_file(self._filepath('model.pdb'), self.Model.pdb_contents)
     if self.Crystal:
         write_file(self._filepath('crystal.pdb'), self.Crystal.pdb_contents)
        rosetta_scripts_binary = sorted(release_binaries)[0]
    elif other_binaries:
        rosetta_scripts_binary = sorted(other_binaries)[0]
    if not rosetta_scripts_binary:
        raise colortext.Exception(
            'No RosettaScripts binary could be located in {0}.'.format(
                rosetta_binary_path))
    rosetta_scripts_binary = rosetta_scripts_binary[1]

    for c in cases:
        pruned_structure_directory = c[0]
        output_directory = c[1]

        # Iterate through the dataset cases
        colortext.message(
            'Adding loop residues back to the pruned structures in {0}.'.
            format(pruned_structure_directory))
        file_filter = os.path.join(pruned_structure_directory, '*.pdb')
        for pdb_file in sorted(glob.glob(file_filter)):
            pdb_prefix = os.path.splitext(
                os.path.split(pdb_file)[1])[0].lower()
            file_prefix = os.path.splitext(pdb_file)[0]
            fasta_file = file_prefix + '.fasta'
            loop_file = file_prefix + '.loop.json'
            assert (os.path.exists(fasta_file))
            assert (os.path.exists(loop_file))

            # Convert the FASTA headers back into PDB residue IDs
            fasta_contents = read_file(fasta_file)
            headers = [
                l for l in fasta_contents.split('\n') if l.startswith('>')
Пример #24
0
                pdb_chain_to_pfam_mapping[pdb_id][chain_id].add(pfam_acc)

                pfam_to_pdb_chain_mapping[pfam_acc] = pfam_to_pdb_chain_mapping.get(pfam_acc, set())
                pfam_to_pdb_chain_mapping[pfam_acc].add(pdb_key)

        self.pdb_chain_to_pfam_mapping = pdb_chain_to_pfam_mapping
        self.pfam_to_pdb_chain_mapping = pfam_to_pdb_chain_mapping


    def get_pfam_accession_numbers_from_pdb_id(self, pdb_id):
        '''Note: an alternative is to use the RCSB API e.g. http://www.rcsb.org/pdb/rest/hmmer?structureId=1cdg.'''
        pdb_id = pdb_id.lower()
        if self.pdb_chain_to_pfam_mapping.get(pdb_id):
            return self.pdb_chain_to_pfam_mapping[pdb_id].copy()

    def get_pfam_accession_numbers_from_pdb_chain(self, pdb_id, chain):
        '''Note: an alternative is to use the RCSB API e.g. http://www.rcsb.org/pdb/rest/hmmer?structureId=1cdg.'''
        return self.pdb_chain_to_pfam_mapping.get(pdb_id.lower(), {}).get(chain)

    def get_pdb_chains_from_pfam_accession_number(self, pfam_acc):
        return self.pfam_to_pdb_chain_mapping.get(pfam_acc)


if __name__ == '__main__':
    pfam_api = Pfam()
    colortext.warning(pfam_api.get_pfam_accession_numbers_from_pdb_chain('1TVA', 'A'))
    colortext.warning(pfam_api.get_pfam_accession_numbers_from_pdb_chain('1CDG', 'A'))
    colortext.warning(pfam_api.get_pfam_accession_numbers_from_pdb_id('1A2c'))

    colortext.message(pfam_api.get_pdb_chains_from_pfam_accession_number('PF14716'))
Пример #25
0
if __name__ == '__main__':
    import pprint
    gc = GoogleCalendar.from_file('test.json', ['main', 'rosetta_dev', 'regular_meetings', 'vacations'])

    tests = ['events']
    #'admin'
    # acl
    if 'acl' in tests:
        gc.get_calendar_users('main')

    # calendarList
    if 'calendarList' in tests:
        gc.get_calendars()
        v = gc.get_calendar('vacations')
        colortext.message('Description: %s' % v.description)
        colortext.warning('Role: %s' % v.accessRole)
        colortext.warning('Time zone: %s' % v.timeZone)

    # colors
    if 'colors' in tests:
        gc.get_colors()

    # events
    if 'events' in tests:
        for evnt in gc.get_upcoming_events_within_the_current_month():
            pass
            #print(evnt.datetime_o, evnt.description, evnt.location)

        colortext.warning('***')
        for evnt in gc.get_events_within_a_given_month(2014, 12):
Пример #26
0
    def CreateAnalysisTables(self):
        ddGdb = self.ddGdb
        PredictionSet = self.PredictionSet
        predictions = PredictionScores(ddGdb,
                                       PredictionSet,
                                       self.ddG_score_type,
                                       score_cap=self.score_cap)
        predicted_scores = predictions.Predictions

        s = "Analyzing %d predictions in PredictionSet '%s' for UserDataSet '%s'. " % (
            predictions.NumberOfPredictions,
            predictions.PredictionSet.replace(
                "_", "\_"), predictions.UserDataSetName)
        if self.score_cap:
            s += "Running analysis over the following analysis sets: '%s' with predicted scores capped at +-%0.2f." % (
                join(predictions.AnalysisSets, "', '"), self.score_cap)
        else:
            s += "Running analysis over the following analysis sets: '%s'." % (
                join(predictions.AnalysisSets, "', '"))
        self.description.append(("black", s))
        if self.quiet_level >= 1:
            colortext.message(
                "Analyzing %d predictions in PredictionSet '%s' for UserDataSet '%s'."
                % (predictions.NumberOfPredictions, predictions.PredictionSet,
                   predictions.UserDataSetName))
            colortext.message(
                "Running analysis over the following analysis sets: '%s'." %
                (join(predictions.AnalysisSets, "', '")))

        analysis_tables = {}
        # Analyze data for
        for AnalysisSet in predictions.AnalysisSets:
            analysis_table = AnalysisTable()

            experiments = UserDataSetExperimentalScores(
                ddGdb, predictions.UserDataSetID, AnalysisSet)

            count = 0
            numMissing = 0
            for section, sectiondata in sorted(experiments.iteritems()):
                for recordnumber, record_data in sorted(
                        sectiondata.iteritems()):
                    count += 1
                    PDB_ID = record_data["PDB_ID"]
                    ExperimentID = record_data["ExperimentID"]
                    ExperimentalDDG = record_data["ExperimentalDDG"]
                    if predicted_scores.get(ExperimentID) and predicted_scores[
                            ExperimentID].get(PDB_ID):
                        PredictedDDG = predicted_scores[ExperimentID][PDB_ID][
                            "PredictedDDG"]
                        analysis_table.add(
                            AnalysisPoint(ExperimentalDDG,
                                          PredictedDDG,
                                          ExperimentID=ExperimentID,
                                          PDB_ID=PDB_ID,
                                          section=section,
                                          recordnumber=recordnumber))
                    else:
                        numMissing += 1
            if numMissing > 0 and self.quiet_level >= 1:
                self.description.append((
                    "Bittersweet",
                    "Missing %d predictions out of %d records for analysis set %s."
                    % (numMissing, count, AnalysisSet)))
                colortext.warning(
                    "Missing %d predictions out of %d records for analysis set %s."
                    % (numMissing, count, AnalysisSet))
            analysis_tables[AnalysisSet] = analysis_table

        self.analysis_tables = analysis_tables
Пример #27
0
    def plot(self,
             table_name,
             RFunction,
             output_filename=None,
             filetype="pdf"):
        '''Results is expect to be a list of dicts each of which has the keys ExperimentID and ddG.'''
        if (not self.analysis_tables) or (not table_name):
            raise Exception("There are no analysis tables to plot.")
        if not table_name in self.analysis_tables.keys():
            raise Exception("The analysis table '%s' does not exist." %
                            table_name)

        R_return_values = {}
        gplot = None
        analysis_table = self.analysis_tables[table_name]
        if self.quiet_level >= 3:
            print(table_name)
            print(RFunction)
        if len(analysis_table.points) == 1:
            raise Exception(
                "The analysis table %s set only has one data point. At least two points are required."
                % table_name)
        else:
            inputfname = self.CreateCSVFile(table_name)
            if self.quiet_level >= 3:
                print(inputfname)
            try:
                if self.quiet_level >= 2:
                    colortext.printf("Running %s." % RFunction)
                    if output_filename:
                        colortext.printf(
                            "Saving graph as %s with filename %s." %
                            (filetype, output_filename))

                output_fname = output_filename
                if not output_fname:
                    output_fname = rosettahelper.writeTempFile(".", "")

                R_output = RFunction(inputfname, output_fname, filetype)
                R_return_values = RUtilities.parse_R_output(R_output)

                colortext.message(table_name)
                print("  %s" % str(RFunction))
                for k, v in sorted(R_return_values.iteritems()):
                    print("  %s: %s" % (str(k), str(v)))

                if not output_filename:
                    contents = rosettahelper.readBinaryFile(output_fname)
                    delete_file(output_fname)
                    description = None
                    for file_suffix, details in RFunctions.iteritems():
                        if details[1] == RFunction:
                            description = details[0]
                    assert (description)
                    gplot = AnalysisObject(table_name, description, filetype,
                                           contents)
                else:
                    gplot = output_filename

            except Exception, e:
                import traceback
                colortext.error(traceback.format_exc())
                delete_file(inputfname)
                raise Exception(e)
            delete_file(inputfname)
def main(prediction_ids = None, memory_free='3.0G', cfg = None):
    # This uses the version of Rosetta from your cluster template settings file
    settings = parse_settings.get_dict()
    rosetta_scripts_path = settings['local_rosetta_installation_path'] + '/source/bin/' + 'rosetta_scripts' + settings['local_rosetta_binary_type']
    ppi_api = get_interface_with_config_file(rosetta_scripts_path = rosetta_scripts_path, rosetta_database_path = '/home/kyleb/rosetta/working_branches/alascan/database')

    t1, t2 = None, None

    # Read the keep_hetatm_lines optional setting
    keep_hetatm_lines = False
    keep_all_lines = False
    try: keep_hetatm_lines = cfg.keep_hetatm_lines
    except: colortext.warning('Note: keep_hetatm_lines is not specified in {0}. Defaulting to {1}.'.format(sys.argv[1], keep_hetatm_lines))
    try: keep_all_lines = cfg.keep_all_lines
    except: colortext.warning('Note: keep_all_lines is not specified in {0}. Defaulting to {1}.'.format(sys.argv[1], keep_all_lines))

    prediction_set_id = cfg.prediction_set_id

    if prediction_ids == None:
        assert( len(sys.argv) > 1 )
        cfg = importlib.import_module(sys.argv[1], package=None)

        protocol_name = cfg.protocol_name

        suppress_warnings = True

        if not ppi_api.prediction_set_exists(prediction_set_id):
            print 'Creating new prediction set:', prediction_set_id
            t1 = time.time()
            ppi_api.add_prediction_set(prediction_set_id, halted = True, priority = 7, allow_existing_prediction_set = False, description = cfg.prediction_set_description)

            # Populate the prediction set with jobs from a (tagged subset of a) user dataset
            print 'Created PredictionSet:', prediction_set_id
            ppi_api.add_prediction_run(prediction_set_id, cfg.user_dataset_name, keep_all_lines = keep_all_lines, keep_hetatm_lines = keep_hetatm_lines, tagged_subset = cfg.tagged_subset, extra_rosetta_command_flags = '-ignore_zero_occupancy false -ignore_unrecognized_res', show_full_errors = True, suppress_warnings = suppress_warnings)
            t2 = time.time()

        existing_job = False
        end_job_name  = '%s_%s' % (getpass.getuser(), prediction_set_id)
        if not os.path.exists(job_output_directory):
            os.makedirs(job_output_directory)

        for d in os.listdir(job_output_directory):
            if os.path.isdir(os.path.join(job_output_directory, d)) and end_job_name in d:
                print 'Found existing job:', d
                job_name = d
                existing_job = True
        if not existing_job:
            job_name = '%s-%s' % (time.strftime("%y%m%d"), end_job_name)

            ppi_api.add_development_protocol_command_lines(
                prediction_set_id, protocol_name, 'minimize_with_cst', ''
            )
            # 2x because bugs
            ppi_api.add_development_protocol_command_lines(
                prediction_set_id, protocol_name, 'minimize_with_cst', ''
            )

        prediction_ids = sorted(ppi_api.get_prediction_ids(prediction_set_id))
        output_dir = os.path.join(job_output_directory, job_name )
    else:
        # Prediction_ids passed in
        job_name = '%s-%s_%s-rerun' % (time.strftime("%y%m%d"), getpass.getuser(), prediction_set_id)

        output_dir = os.path.join(job_output_directory, job_name )
        if os.path.isdir(output_dir):
            shutil.rmtree(output_dir)
        existing_job = False

    settings['scriptname'] = prediction_set_id + '_run'
    settings['tasks_per_process'] = 5
    settings['mem_free'] = memory_free
    settings['output_dir'] = output_dir
    settings['rosetta_args_list'] = [
        '-in:file:fullatom',
        '-ignore_zero_occupancy false',
        '-ignore_unrecognized_res',
        '-fa_max_dis 9.0',
        '-ddg::harmonic_ca_tether 0.5',
        '-ddg::constraint_weight 1.0',
        '-ddg::out_pdb_prefix min_cst_0.5',
        '-ddg::sc_min_only false',
    ]
    settings['rosetta_args_list'].extend(cfg.extra_flags)
    print settings['rosetta_args_list']

    # Now get run settings from database and save to pickle file
    job_dict = {}
    output_data_dir = os.path.join(settings['output_dir'], 'data')

    if not os.path.isdir(output_data_dir):
        os.makedirs(output_data_dir)

    if t1 != None and t2 != None and len(prediction_ids) != 0:
        print('Time taken for {0} predictions: {1}s ({2}s per prediction).'.format(len(prediction_ids), t2-t1, (t2-t1)/len(prediction_ids)))
    print('File cache statistics:')
    pprint.pprint(ppi_api.get_file_content_cache_stats())
    settings['numjobs'] = len(prediction_ids)
    app_name = 'minimize_with_cst'
    settings['appname'] = app_name

    print('')

    t1 = time.time()

    # Progress counter setup
    colortext.message('Creating input data for %d predictions.' % (len(prediction_ids)))
    count, records_per_dot = 0, 50
    print("|" + ("*" * (int(len(prediction_ids)/records_per_dot)-2)) + "|")
    for prediction_id in prediction_ids:
        # Progress counter
        count += 1
        if count % records_per_dot == 0: colortext.write(".", "cyan", flush = True)

        # Check if job already ran
        prediction_id_dir = os.path.join(output_dir, str(prediction_id))
        if existing_job:
            if os.path.isdir( prediction_id_dir ):
                pdb_output_files = [x for x in os.listdir( prediction_id_dir ) if '.pdb' in x]
            else:
                pdb_output_files = []
            if len(pdb_output_files) >= 1:
                print 'Skipping', prediction_id
                settings['numjobs'] = settings['numjobs'] - 1
                continue
            if os.path.isdir(prediction_id_dir):
                print 'Job directory %s already exists, deleting' % prediction_id_dir
                shutil.rmtree(prediction_id_dir)
            # else:
            #     print 'Creating new job directory %s' % prediction_id_dir

        job_data_dir = os.path.join(output_data_dir, str(prediction_id))

        # Allow us to resume from an interrupted setup
        truncate_content = None
        all_files_exist = os.path.exists(job_data_dir) and os.path.exists(os.path.join(job_data_dir, '.ready'))
        if all_files_exist:
            truncate_content = 0

        job_details = ppi_api.get_job_details(prediction_id, truncate_content = truncate_content)
        file_tuples = [] # List of names, contents
        for file_info in job_details['Files']['Input']:
            file_tuples.append( (file_info['Filename'], file_info['Content']) )
        substitution_parameters = json.loads(job_details['JSONParameters'])

        # Scrub the folder
        if not all_files_exist:
            if os.path.isdir(job_data_dir):
                shutil.rmtree(job_data_dir)
            os.makedirs(job_data_dir)

        files_dict = {} # Maps name to filepath position
        for file_name, file_contents in file_tuples:
            new_file_location = os.path.join(job_data_dir, file_name)
            if not all_files_exist:
                if '.pdb' in file_name:
                    if keep_hetatm_lines or keep_all_lines:
                        write_file(new_file_location, file_contents)
                    else:
                        write_file(new_file_location, '\n'.join([l for l in file_contents.split('\n') if l.startswith('ATOM')]))
                else:
                    with open(new_file_location, 'w') as f:
                        f.write(file_contents)
            files_dict[file_name] = os.path.relpath(new_file_location, settings['output_dir'])
        if not all_files_exist:
            write_file(os.path.join(job_data_dir, '.ready'), '')

        argdict = {
            'input_file_list' : [files_dict[substitution_parameters['%%input_pdb%%']]],
        }
        for file_name, file_location in files_dict.iteritems():
            if 'params' in file_name:
                argdict['-extra_res_fa'] = file_location
        job_dict[prediction_id] = argdict


    t2 = time.time()

    print('')
    if count != 0:
        print('Time taken for {0} predictions: {1}s ({2}s per prediction).'.format(count, t2-t1, (t2-t1)/count))
    print('File cache statistics:')
    pprint.pprint(ppi_api.get_file_content_cache_stats())

    print('')
    if len(job_dict) > 0:
        write_run_file(settings, database_run = False, job_dict = job_dict)
        print 'Job files written to directory:', os.path.abspath(output_dir)
    else:
        print 'No tasks to process, not writing job files'
Пример #29
0
    #sys.path.insert(0, '/home/oconchus/dev/')
    #sys.path.insert(0, "/home/oconchus/dev/klab")
else:
    import klab

import klab.colortext as colortext
from ddglib.ppi_api import get_interface_with_config_file as get_ppi_interface_with_config_file


# Set up database connection
try:
    ppi_api = get_ppi_interface_with_config_file(host_config_name = 'kortemmelab')
except:
    colortext.error('Database connection failed.')
    raise
colortext.message('Connected to database.')


# Pick a scoring method
score_method_id = ppi_api.get_score_method_id('Rescore-Talaris2014', method_authors = 'kyle', method_type = 'ddg_monomer rescore')

# Get the best structures for prediction 23849
wild_type_complexes = ppi_api.get_top_x_scores(23849, score_method_id, 'WildTypeComplex', 3, component = 'total', order_by = 'ASC')
wild_type_filenames = []
for wtc in wild_type_complexes:
    wild_type_filenames.append([f for f in glob.glob('repacked_wt*_round_{0}.*'.format(wtc['StructureID']))][0])
print(wild_type_filenames)


mutant_complexes = ppi_api.get_top_x_scores(23849, score_method_id, 'MutantComplex', 3, component = 'total', order_by = 'ASC')
mutant_filenames = []
def generate_JSON_dataset(dataset_ID, pdb_data, pub_data):

    record_data = {}

    #1LRP
    #1LMB

    # 1 JSON object per dataset record
    failure_count = 0
    records = ddGdb.execute_select('SELECT * FROM DataSetDDG WHERE DataSetID=%s', parameters=(dataset_ID,))
    colortext.warning('Starting with %d records.' % (len(records)))
    mutation_count = {1:0, 2:0, 3:0, 4:0, 5:0}
    for r in records:

        mutation_is_reversed = r['MutationIsReversed'] == 1
        d = dict(
            _DataSetDDGID = r['ID'],
            RecordID = r['RecordNumber'],
            AggregateType = r['AggregateType'],
            DDG = r['PublishedValue'],
            PDBFileID = r['PDBFileID'],
            DerivedMutation = mutation_is_reversed,
        )

        # Parse PDB
        if not(cached_pdbs.get(r['PDBFileID'])):
            cached_pdbs[r['PDBFileID']] = PDB(ddGdb.execute_select('SELECT Content FROM PDBFile WHERE ID=%s', parameters=(r['PDBFileID'],))[0]['Content'])

        # Store PDB data
        PDBResolution = None,
        PDBMethodOfDetermination = None,
        try:
            PDBResolution = cached_pdbs[r['PDBFileID']].get_resolution()
        except: pass
        try:
            PDBMethodOfDetermination = cached_pdbs[r['PDBFileID']].get_techniques()
        except: pass
        pdb_data[r['PDBFileID']] = dict(
            Resolution = PDBResolution,
            MethodOfDetermination = PDBMethodOfDetermination,
        )

        assay_DDGs = ddGdb.execute_select('''
            SELECT *
            FROM DataSetDDGSource
            INNER JOIN ExperimentAssayDDG ON DataSetDDGSource.ExperimentAssayID = ExperimentAssayDDG.ExperimentAssayID AND DataSetDDGSource.Type = ExperimentAssayDDG.Type
            INNER JOIN ExperimentAssay ON ExperimentAssayDDG.ExperimentAssayID = ExperimentAssay.ID
            WHERE DataSetDDGID=%s''', parameters=(r['ID'],))

        ExperimentID = set([a['ExperimentID'] for a in assay_DDGs])
        if len(ExperimentID) != 1:
            colortext.message('%d records passed' % len(record_data))
            # Cases where 1FLV and 1FTG need to be elided
            if sorted(ExperimentID) in ([113699, 113830], [113704, 113832], [113705, 113836]):
                ExperimentID = [sorted(ExperimentID)[0]]
            elif sorted(ExperimentID) in ([112149, 112591],):
                # ExperimentID is used below for mutation details but these agree in this case. 1LZ1, 2BQA
                ExperimentID = [sorted(ExperimentID)[0]]
            elif sorted(ExperimentID) in (
                    [112141, 112583L], [112136, 112578], [112137, 112579], [112142, 112584], [112139, 112581],
                    [112140, 112582], [112146, 112588], [112147, 112589], [112148, 112590]
                ):
                # ExperimentID is used below for mutation details but these agree in this case. 1REX, 2BQA
                ExperimentID = [sorted(ExperimentID)[0]]
            elif sorted(ExperimentID) in ([112227, 112323], [112288, 113039], [111587, 112379]):
                # ExperimentID is used below for mutation details but these agree in this case. 2LZM, 1L63
                ExperimentID = [sorted(ExperimentID)[0]]
            else:
                colortext.warning(
                    '\n'.join(['%(PDBFileID)s %(Chain)s %(WildTypeAA)s %(ResidueID)s %(MutantAA)s' % rii for rii in ddGdb.execute_select('''
                    SELECT * FROM `ExperimentMutation` INNER JOIN Experiment ON Experiment.ID=ExperimentID WHERE `ExperimentID` IN (%s)''' % ','.join(map(str, ExperimentID)))]))
                pprint.pprint(r)
                colortext.error(map(int, ExperimentID))
                #pprint.pprint(assay_DDGs)
                print(sorted(ExperimentID))
        assert(len(ExperimentID) == 1)
        ExperimentID = ExperimentID.pop()
        d['_ExperimentID'] = ExperimentID

        experimental_DDGs = []
        for a in assay_DDGs:
            experimental_DDGs.append(dict(
                DDG = a['Value'],
                DDGType = a['Type'],
                Publication = a['Publication'],
                LocationOfValueInPublication = a['LocationOfValueInPublication'],
                Temperature = a['Temperature'],
                pH= a['pH'],
            ))
            # Store Publication data
            pub_data[a['Publication']] = cached_publications[a['Publication']]
        d['ExperimentalDDGs'] = experimental_DDGs

        # Retrieve mutations
        mutation_records = ddGdb.execute_select('SELECT * FROM ExperimentMutation WHERE ExperimentID=%s ORDER BY ResidueID', parameters=(ExperimentID,))
        if dataset_ID == "AlaScan-GPK_2014/09/25":
            assert(len(mutation_records) == 1)

        mutations = []
        failed_check = False
        mutation_count[len(mutation_records)] += 1
        for mutation in mutation_records:
            mutation_d = {}
            #if ExperimentID == 109911:
            #    d['PDBFileID'] = '1WQ5' # Hack for one 1BKS case

            mutation_d['Chain'] = mutation['Chain']
            mutation_d['ResidueID'] = mutation['ResidueID']
            if mutation_is_reversed:
                mutation_d['MutantAA'] = mutation['WildTypeAA']
                mutation_d['WildTypeAA'] = mutation['MutantAA']
            else:
                mutation_d['WildTypeAA'] = mutation['WildTypeAA']
                mutation_d['MutantAA'] = mutation['MutantAA']

            if dataset_ID == "AlaScan-GPK_2014/09/25":
                if d['PDBFileID'] == '1LMB':
                    mutation_d['Chain'] = '3' # Hack for the PDB replacement 1LRP (3.2A) -> 1LMB (1.8A)
                if d['PDBFileID'] == '1U5P' and int(mutation_d['ResidueID']) < 1600:
                    mutation_d['ResidueID'] = str(int(mutation_d['ResidueID']) + 1762) # Hack for the PDB replacement 1AJ3, NMR -> 1U5P (2A)
            if dataset_ID == "Kellogg_10.1002/prot.22921_2010/12/03":
                if d['PDBFileID'] == '1U5P' and int(mutation_d['ResidueID']) < 1600:
                    mutation_d['ResidueID'] = str(int(mutation_d['ResidueID']) + 1762) # Hack for the PDB replacement 1AJ3, NMR -> 1U5P (2A)

            mutated_residue = ddGdb.execute_select('SELECT * FROM PDBResidue WHERE PDBFileID=%s AND Chain=%s AND ResidueID=%s', parameters=(d['PDBFileID'], mutation_d['Chain'], ResidueID2String(mutation_d['ResidueID'])))
            if len(mutated_residue) == 0:
                colortext.warning('Skipping Experiment #%d (%s) in %s due to missing residue %s.' % (ExperimentID, d['PDBFileID'], dataset_ID, mutation_d['ResidueID']))
                #print('SELECT * FROM PDBResidue WHERE PDBFileID=%s AND Chain=%s AND ResidueID=%s' % (d['PDBFileID'], mutation_d['Chain'], ResidueID2String(mutation_d['ResidueID'])))
                #pprint.pprint(d)
                #pprint.pprint(mutations)
                #pprint.pprint(mutation_d)
                #print(ExperimentID)
                #print(mutated_residue)
                #print(10*'*')
                #print('\n')
                failure_count += 1
                failed_check = True
                break
            assert(len(mutated_residue) == 1)

            mutated_residue = mutated_residue[0]
            mutation_d['DSSPExposure'] = mutated_residue['MonomericExposure']
            mutation_d['DSSPType'] = mutated_residue['MonomericDSSP']
            mutation_d['DSSPSimpleSSType'] = dssp_elision.get(mutation_d['DSSPType'])
            assert(mutation_d['DSSPType'] != None)
            assert(mutation_d['DSSPSimpleSSType'] != None)
            mutations.append(mutation_d)

        if failed_check:
            print('FAILED CHECK')
            continue
        d['Mutations'] = mutations

        if dataset_ID == "Potapov_10.1093/protein/gzp030_2009/09/01":
            key = '%s_%s_%s' % (d['PDBFileID'], '+'.join(['%s:%s:%s' % (mutation_d['Chain'], mutation_d['ResidueID'].strip(), mutation_d['MutantAA']) for mutation_d in mutations]), d['RecordID'])
        else:
            key = '%s_%s' % (d['PDBFileID'], '+'.join(['%s:%s:%s' % (mutation_d['Chain'], mutation_d['ResidueID'].strip(), mutation_d['MutantAA']) for mutation_d in mutations]))

        if record_data.get(key):
            colortext.warning('KEY EXISTS: %s' % key)
            print('Existing record: %s' % pprint.pformat(record_data[key]))
            print('New record: %s' % pprint.pformat(d))
            failure_count += 1
        record_data[key] = d

    colortext.message('Mutation count')
    colortext.warning(pprint.pformat(mutation_count))

    if failure_count > 0:
        colortext.error('Total length of dataset: %d. Failed on %d records.' % (len(record_data), failure_count))
    else:
        colortext.message('Total length of dataset: %d. ' % (len(record_data)))

    record_list = []
    for k, v in sorted(record_data.iteritems()):
        record_list.append(v)

    colortext.message('Adding dataset %s with %d records, %d PDB files, and %d references.' % (dataset_ID, len(record_list), len(pdb_data), len(pub_data)))
    JSON_datasets[dataset_ID]['data'] = record_list
Пример #31
0
def check_existing_complexes_by_name():
    '''Check whether any of the complexes exist in the database.'''

    # Ran is short for "RAs-related Nuclear protein" and is also known as "GTP-binding nuclear protein Ran"
    ppi_api = get_ppi_api()
    ids = ppi_api.get_complex_ids_matching_protein_name('gsp')
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('ran'))
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('ras'))

    # This gives us these complexes, amongst others:
    #
    # 77
    # Ran GTPase-GDP, Ran GTPase-GDP, Ran GTPase-GDP
    # Importin beta-1 subunit, Importin β1, Importin &beta;1
    #
    # 119
    # Ran GTPase, Ran GTPase, Ran GTPase
    # Ran GAP, Ran GAP, Ran GAP
    #
    # 176
    # Ran GTPase-GDP, Ran GTPase-GDP, Ran GTPase-GDP
    # Regulator of chromosome condensation, RCC1, RCC1
    #
    # 202
    # Ran GTPase-GDP, Ran GTPase-GDP, Ran GTPase-GDP
    # Nuclear transport factor 2, NTF2, NTF2
    #
    # 29
    # Ras GTPase.GDP, Ras GTPase.GDP, Ras GTPase.GDP
    # Ras GAP, Ras GAP, Ras GAP
    #
    # 65
    # Ras GTPase.GTP, H-Ras, H-Ras
    # Son of sevenless-1, Sos, Sos
    #
    # 201
    # Ras GTPase, Ras GTPase, Ras GTPase
    # Phosphoinositide 3-kinase, PI3K, PI3K
    #
    # 280
    # Ras.GNP, Ras.GNP, Ras.GNP
    # RalGDS Ras-interacting domain, RalGDS RID, RalGDS RID

    ids = []
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('importin'))
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('KARYOPHERIN'))
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('TRANSPORTIN'))
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('NTF2'))
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('YRB1P'))
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('RANBP1'))
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('EXP5'))
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('CSE1'))
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('RANGAP'))
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('RANBP2'))
    ids.extend(ppi_api.get_complex_ids_matching_protein_name('RCC1'))

    for id in ids:
        d = ppi_api.get_complex_details(id)
        colortext.warning(id)
        print('{0}, {1}, {2}'.format(d['LName'].encode('utf-8').strip(), d['LShortName'].encode('utf-8').strip(), d['LHTMLName'].encode('utf-8').strip()))
        print('{0}, {1}, {2}'.format(d['RName'].encode('utf-8').strip(), d['RShortName'].encode('utf-8').strip(), d['RHTMLName'].encode('utf-8').strip()))

    # This gives us these complexes:
    #
    # 77
    # Ran GTPase-GDP, Ran GTPase-GDP, Ran GTPase-GDP
    # Importin beta-1 subunit, Importin β1, Importin &beta;1
    #
    # 202
    # Ran GTPase-GDP, Ran GTPase-GDP, Ran GTPase-GDP
    # Nuclear transport factor 2, NTF2, NTF2
    #
    # 176
    # Ran GTPase-GDP, Ran GTPase-GDP, Ran GTPase-GDP
    # Regulator of chromosome condensation, RCC1, RCC1
    #
    # SELECT DISTINCT `PDBFileID` FROM `PPIPDBPartnerChain` WHERE `PPComplexID` IN (77, 202, 176)
    # returns
    # 1F59, 1IBR, 1QG4, 1A12, 1OUN and 1I2M, 1A2K
    #
    # Some of these are unbound. Get the complexes:
    #
    # SELECT DISTINCT `PDBFileID` FROM `PPIPDBPartnerChain`
    # INNER JOIN PPIPDBSet ON PPIPDBPartnerChain.PPComplexID=PPIPDBSet.PPComplexID AND PPIPDBPartnerChain.SetNumber=PPIPDBSet.SetNumber
    # WHERE PPIPDBPartnerChain.PPComplexID IN (77, 202, 176) AND IsComplex=1
    #
    # returns only three hits:
    #  complex #77  -> 1IBR (A|B);
    #  complex #176 -> 1I2M (A|B) where Tina uses A|B (chains may be renamed); and
    #  complex #202 -> 1A2K (C|AB) where Tina uses A|B (chains may be renamed).
    #
    # We also have:
    #  complex #119 -> 1K5D (AB|C) where Tina uses A|B
    #
    # 1IBR -> Ran (human)|Importin β1 (human)
    # Tina has:
    #    2BKU -> RAN (dog)|Importin β1 (yeast)
    #    3EA5 -> RAN (human)|Importin β1 (yeast)
    # 3EA5 and 1IBR do not match on chains B at all and have one mutation in chain A
    # Similarly for 2BKU and 1IBR.
    #
    # However what came out of this is that 3EA5 and 2BKU are related i.e. that RAN is almost the same sequence in both.
    # The only difference is one mutation in chain A: index 40, A->P and that 3EA5 has a longer sequence for chain A
    #

    colortext.message('\n\n1IBR')
    p1 = PDB(retrieve_pdb('1IBR'))
    pprint.pprint(p1.seqres_sequences)
    colortext.message('\n\n2BKU')
    p2 = PDB(retrieve_pdb('2BKU'))
    pprint.pprint(p2.seqres_sequences)
    a1 = str(p1.seqres_sequences['A'])
    a2 = str(p2.seqres_sequences['A'])

    #3EA5
    a1 = 'MAAQGEPQVQFKLVLVGDGGTGKTTFVKRHLTGEFEKKYVATLGVEVHPLVFHTNRGPIKFNVWDTAGQEKFGGLRDGYYIQAQCAIIMFDVTSRVTYKNVPNWHRDLVRVCENIPIVLCGNKVDIKDRKVKAKSIVFHRKKNLQYYDISAKSNYNFEKPFLWLARKLIGDPNLEFVAMPCLAPPEVVMDPALAAQYEHDLEVAQTTALPDEDDDL'
    a1 = 'MSTAEFAQLLENSILSPDQNIRLTSETQLKKLSNDNFLQFAGLSSQVLIDENTKLEGRILAALTLKNELVSKDSVKTQQFAQRWITQVSPEAKNQIKTNALTALVSIEPRIANAAAQLIAAIADIELPHGAWPELMKIMVDNTGAEQPENVKRASLLALGYMCESADPQSQALVSSSNNILIAIVQGAQSTETSKAVRLAALNALADSLIFIKNNMEREGERNYLMQVVCEATQAEDIEVQAAAFGCLCKIMSKYYTFMKPYMEQALYALTIATMKSPNDKVASMTVEFWSTICEEEIDIAYELAQFPQSPLQSYNFALSSIKDVVPNLLNLLTRQNEDPEDDDWNVSMSAGACLQLFAQNCGNHILEPVLEFVEQNITADNWRNREAAVMAFGSIMDGPDKVQRTYYVHQALPSILNLMNDQSLQVKETTAWCIGRIADSVAESIDPQQHLPGVVQACLIGLQDHPKVATNCSWTIINLVEQLAEATPSPIYNFYPALVDGLIGAANRIDNEFNARASAFSALTTMVEYATDTVAETSASISTFVMDKLGQTMSVDENQLTLEDAQSLQELQSNILTVLAAVIRKSPSSVEPVADMLMGLFFRLLEKKDSAFIEDDVFYAISALAASLGKGFEKYLETFSPYLLKALNQVDSPVSITAVGFIADISNSLEEDFRRYSDAMMNVLAQMISNPNARRELKPAVLSVFGDIASNIGADFIPYLNDIMALCVAAQNTKPENGTLEALDYQIKVLEAVLDAYVGIVAGLHDKPEALFPYVGTIFQFIAQVAEDPQLYSEDATSRAAVGLIGDIAAMFPDGSIKQFYGQDWVIDYIKRTRSGQLFSQATKDTARWAREQQKRQLSL'
    #2BKU
    a2 = 'MAAQGEPQVQFKLVLVGDGGTGKTTFVKRHLTGEFEKKYVPTLGVEVHPLVFHTNRGPIKFNVWDTAGQEKFGGLRDGYYIQAQCAIIMFDVTSRVTYKNVPNWHRDLVRVCENIPIVLCGNKVDIKDRKVKAKSIVFHRKKNLQYYDISAKSNYNFEKPFLWLARKLIGDPNLEFV'
    a2 = 'MSTAEFAQLLENSILSPDQNIRLTSETQLKKLSNDNFLQFAGLSSQVLIDENTKLEGRILAALTLKNELVSKDSVKTQQFAQRWITQVSPEAKNQIKTNALTALVSIEPRIANAAAQLIAAIADIELPHGAWPELMKIMVDNTGAEQPENVKRASLLALGYMCESADPQSQALVSSSNNILIAIVQGAQSTETSKAVRLAALNALADSLIFIKNNMEREGERNYLMQVVCEATQAEDIEVQAAAFGCLCKIMSKYYTFMKPYMEQALYALTIATMKSPNDKVASMTVEFWSTICEEEIDIAYELAQFPQSPLQSYNFALSSIKDVVPNLLNLLTRQNEDPEDDDWNVSMSAGACLQLFAQNCGNHILEPVLEFVEQNITADNWRNREAAVMAFGSIMDGPDKVQRTYYVHQALPSILNLMNDQSLQVKETTAWCIGRIADSVAESIDPQQHLPGVVQACLIGLQDHPKVATNCSWTIINLVEQLAEATPSPIYNFYPALVDGLIGAANRIDNEFNARASAFSALTTMVEYATDTVAETSASISTFVMDKLGQTMSVDENQLTLEDAQSLQELQSNILTVLAAVIRKSPSSVEPVADMLMGLFFRLLEKKDSAFIEDDVFYAISALAASLGKGFEKYLETFSPYLLKALNQVDSPVSITAVGFIADISNSLEEDFRRYSDAMMNVLAQMISNPNARRELKPAVLSVFGDIASNIGADFIPYLNDIMALCVAAQNTKPENGTLEALDYQIKVLEAVLDAYVGIVAGLHDKPEALFPYVGTIFQFIAQVAEDPQLYSEDATSRAAVGLIGDIAAMFPDGSIKQFYGQDWVIDYIKRTRSGQLFSQATKDTARWAREQQKRQLSL'
    print(a1 == a2)
    if not a1 == a2:
        # horribly inefficient (casting to str each time) but not worth rewriting
        assert(len(a1) == len(a2))
        for x in range(len(a1)):
            if str(a1)[x] != str(a2)[x]:
                print(x, str(a1)[x], str(a2)[x])
        # one mutation A->C near the end of the sequence: VAMPALAP -> VAMPCLAP

    assert(str(p1.seqres_sequences['A']) == str(p1.seqres_sequences['C']))
    assert(str(p1.seqres_sequences['B']) == str(p1.seqres_sequences['D']))
    assert(str(p2.seqres_sequences['A']) == str(p2.seqres_sequences['C']))
    assert(str(p2.seqres_sequences['B']) == str(p2.seqres_sequences['D']))
    print('')
Пример #32
0
def main(FixedIDs = [], radii = [6.0, 7.0, 8.0, 9.0]):
    max_processors = get_number_of_processors()

    rescore_process_file = "/tmp/klab_rescore.txt"
    parser = OptionParser()
    parser.add_option("-n", "--numprocesses", default=1, type='int', dest="num_processes", help="The number of processes used for the rescoring. The cases are split according to this number.", metavar="NUM_PROCESSES")
    parser.add_option("-p", "--process", default=1, type='int', dest="process", help="The ID of this process. This should be an integer between 1 and the number of processes used for the rescoring.", metavar="PROCESS_ID")
    parser.add_option("-d", "--delete",  action="store_true", dest="delete", help="Delete the process tracking file %s." % rescore_process_file)
    parser.add_option("-s", "--set",  type='string', dest="prediction_set", help="The prediction set to rescore.")
    (options, args) = parser.parse_args()

    if options.delete and os.path.exists(rescore_process_file):
        print("Removing %s." % rescore_process_file)
        os.remove(rescore_process_file)

    num_processes = options.num_processes
    prediction_set = options.prediction_set
    process_id = options.process

    for i in FixedIDs:
        assert(type(i) == type(1))

    # SELECT * FROM `Prediction` WHERE `PredictionSet`= 'RosCon2013_P16_score12prime'  AND Status='done' LIMIT 1
    # Check prediction set
    if not prediction_set:
        raise colortext.Exception("A prediction set must be specified.")
    else:
        if FixedIDs:
            results = ddGdb.execute("SELECT DISTINCT PredictionSet FROM Prediction WHERE ID IN (%s)" % ",".join(map(str, FixedIDs)))
            if len(results) != 1:
                raise colortext.Exception("Error: The fixed IDs cover %d different prediction sets." % len(results))
        else:
            results = ddGdb.execute("SELECT ID FROM PredictionSet WHERE ID=%s", parameters=(prediction_set,))
        if not results:
            raise colortext.Exception("The prediction set '%s' does not exist in the database." % prediction_set)

    if num_processes < 1:
        raise colortext.Exception("At least 1 processor must be used.")
    if num_processes > max_processors:
        raise colortext.Exception("Only %d processors/cores were detected. Cannot run with %d processes." % (max_processors, num_processes))
    if num_processes > (max_processors * 0.75):
        colortext.warning("Warning: Using %d processors/cores out of %d which is %0.2f%% of the total available." % (num_processes, max_processors, (100.0*float(num_processes)/float(max_processors))))
    if not(1 <= process_id <= min(max_processors, num_processes)):
        raise colortext.Exception("The process ID %d must be between 1 and the number of processes, %d." % (process_id, num_processes))

    if os.path.exists(rescore_process_file):
        lines = readFileLines(rescore_process_file)
        idx = lines[0].find("numprocesses")
        if idx == -1:
            raise Exception("Badly formatted %s." % rescore_process_file)
        existing_num_processes = int(lines[0][idx+len("numprocesses"):])
        if existing_num_processes != num_processes:
            raise colortext.Exception("You specified the number of processes to be %d but %s already specifies it as %d." % (num_processes, rescore_process_file, existing_num_processes))
        for line in [line for line in lines[1:] if line.strip()]:
            idx = line.find("process")
            if idx == -1:
                raise colortext.Exception("Badly formatted %s. Line is '%s'." % (rescore_process_file, line))
            existing_process = int(line[idx+len('process'):])
            if process_id == existing_process:
                raise colortext.Exception("Process %d is already logged as running. Check if this is so and edit %s." % (process_id, rescore_process_file))
        F = open(rescore_process_file, 'a')
        F.write("process %d\n" % process_id)
        F.close()
    else:
        F = open(rescore_process_file, 'w')
        F.write("numprocesses %d\n" % num_processes)
        F.write("process %d\n" % process_id)
        F.close()

    output_dir = os.path.join('rescoring', str(process_id))
    if not(os.path.exists(output_dir)):
        os.makedirs(output_dir)
    abs_output_dir = os.path.abspath(os.path.join(os.getcwd(), output_dir))
    print("Running process in %s.\n" % abs_output_dir)

    ReallyFixedIDs = False

    results = ddGdb.execute("SELECT ID, ExperimentID, Scores FROM Prediction WHERE PredictionSet=%s AND Status='done' AND ScoreVersion <> %s", parameters=(prediction_set, float(current_score_revision),))
    if not(FixedIDs) and results:
        raise WrongScoreRevisionException("Score versions found which are not %s. Need to update table structure." % current_score_revision)
    else:
        # Hacky way to run multiple processes
        if ReallyFixedIDs:
            num_to_score = len(remaining_unscored)
            num_for_this_to_score = num_to_score / num_processes
            IDs_to_score = remaining_unscored[(process_id-1) * num_for_this_to_score : (process_id) * num_for_this_to_score]
            results = ddGdb.execute("SELECT ID, ExperimentID, Scores, UserDataSetExperimentID FROM Prediction WHERE ID IN (%s)" % (",".join(map(str, IDs_to_score))))
        elif FixedIDs:
            results = ddGdb.execute("SELECT ID, ExperimentID, Scores, UserDataSetExperimentID FROM Prediction WHERE ID IN (%s) AND MOD(ID,%s)=%s" % (",".join(map(str, FixedIDs)), num_processes,process_id-1))
        else:
            results = ddGdb.execute("SELECT ID, ExperimentID, Scores, UserDataSetExperimentID FROM Prediction WHERE PredictionSet=%s AND Status='done' AND ScoreVersion=%s AND MOD(ID,%s)=%s", parameters=(prediction_set, float(current_score_revision),num_processes,process_id-1))

    count = 0
    cases_computed = 0
    total_time_in_secs = 0

    number_of_cases_left = len(results) * len(radii)

    failed_cases = []
    colortext.printf("Rescoring %d predictions over %d radii...\n" % (len(results), len(radii)), 'lightgreen')
    for r in results:
        t = Timer()
        t.add('Preamble')
        inner_count = 0

        mutations = ddGdb.execute('SELECT * FROM ExperimentMutation WHERE ExperimentID=%s', parameters=(r['ExperimentID'],))
        mutation_str = ', '.join(['%s %s%s%s' % (m['Chain'], m['WildTypeAA'], m['ResidueID'], m['MutantAA']) for m in mutations])
        extracted_data = False

        details = ddGdb.execute_select('SELECT Prediction.ID, PDBFileID, Chain FROM Prediction INNER JOIN Experiment ON Prediction.ExperimentID=Experiment.ID INNER JOIN ExperimentChain ON Prediction.ExperimentID=ExperimentChain.ExperimentID WHERE Prediction.ID=%s', parameters=(r['ID'],))
        details = ddGdb.execute_select('SELECT Prediction.ID, PDBFileID, Chain FROM Prediction INNER JOIN Experiment ON Prediction.ExperimentID=Experiment.ID INNER JOIN ExperimentChain ON Prediction.ExperimentID=ExperimentChain.ExperimentID WHERE Prediction.ID=%s', parameters=(r['ID'],))
        colortext.message("Prediction: %d, %s chain %s. Mutations: %s. Experiment ID #%d. UserDataSetExperimentID #%d." % (details[0]['ID'], details[0]['PDBFileID'], details[0]['Chain'], mutation_str, r['ExperimentID'], r['UserDataSetExperimentID']))

        experiment_pdbID = ddGdb.execute('SELECT PDBFileID FROM Experiment WHERE ID=%s', parameters=(r['ExperimentID'],))[0]['PDBFileID']
        print('Experiment PDB file ID = %s' % experiment_pdbID)
        pdbID = ddGdb.execute('SELECT UserDataSetExperiment.PDBFileID FROM Prediction INNER JOIN UserDataSetExperiment ON UserDataSetExperimentID=UserDataSetExperiment.ID WHERE Prediction.ID=%s', parameters=(r['ID'],))[0]['PDBFileID']
        print('UserDataSetExperiment PDB file ID = %s' % pdbID)

        count += 1
        if True:#len(mutations) == 1:
            timestart = time.time()

            #mutation = mutations[0]
            dbchains = sorted(set([mutation['Chain'] for mutation in mutations]))
            # todo: note: assuming monomeric structures here
            assert(len(dbchains) == 1)
            dbchain = dbchains[0]
            #mutantaa = mutation['MutantAA']

            ddG_dict = json.loads(r['Scores'])
            kellogg_ddG = ddG_dict['data']['kellogg']['total']['ddG']

            #assert(ddG_dict['version'] == current_score_revision)

            all_done = True
            for radius in radii:
                score_name = ('noah_%0.1fA' % radius).replace(".", ",")
                if not(ddG_dict['data'].get(score_name)):
                    all_done = False
                else:
                    cases_computed += 1
                    number_of_cases_left -= 1
            if all_done:
                print('Prediction %d: done.' % r["ID"])
                continue

            # Extract data
            t.add('Grab data')
            #archivefile = None
            #prediction_data_path = ddGdb.execute('SELECT Value FROM _DBCONSTANTS WHERE VariableName="PredictionDataPath"')[0]['Value']
            #job_data_path = os.path.join(prediction_data_path, '%d.zip' % r['ID'])
            #print(job_data_path)
            #assert(os.path.exists(job_data_path))
            #archivefile = readBinaryFile(job_data_path)
            archivefile = DDG_interface.getData(r['ID'])
            zipfilename = os.path.join(output_dir, "%d.zip" % r['ID'])
            F = open(zipfilename, "wb")
            F.write(archivefile)
            F.close()

            t.add('Extract data')
            zipped_content = zipfile.ZipFile(zipfilename, 'r', zipfile.ZIP_DEFLATED)
            tmpdir = None
            repacked_files = []
            mutant_files = []

            rosetta_resids = []
            try:
                tmpdir = makeTemp755Directory(output_dir)
                highestIndex = -1
                foundResfile = False
                foundMutfile = False

                presumed_mutation = None
                for fname in sorted(zipped_content.namelist()):
                    if fname.endswith(".pdb"):
                        if fname.startswith("%s/mut_" % r['ID']) or fname.startswith("%s/repacked_" % r['ID']):
                            structnum = int(fname[fname.rindex('_')+1:-4])
                            if fname.startswith("%s/mut_" % r['ID']):
                                if presumed_mutation:
                                    assert(presumed_mutation == os.path.split(fname)[1].split('_')[1])
                                else:
                                    presumed_mutation = os.path.split(fname)[1].split('_')[1]
                                newfname = 'mutant_%02d' % structnum
                            if fname.startswith("%s/repacked_" % r['ID']):
                                newfname = 'repacked_%02d' % structnum
                            highestIndex = max(highestIndex, structnum)

                            newfilepath = os.path.join(tmpdir, newfname)
                            writeFile(newfilepath, zipped_content.read(fname))

                            if fname.startswith("%s/mut_" % r['ID']):
                                mutant_files.append(newfilepath)
                            if fname.startswith("%s/repacked_" % r['ID']):
                                repacked_files.append(newfilepath)
                        #elif fname.startswith("%s/%s-%s" % (r['ID'],r['ExperimentID'],pdbID)) or fname.startswith("%s/repacked_" % r['ID']):
                        #    writeFile(os.path.join(tmpdir, '%s.pdb' % pdbID), zipped_content.read(fname))
                    if fname.startswith("%s/%s-%s.resfile" % (r['ID'],r['ExperimentID'],experiment_pdbID)):
                        raise Exception('This case needs to be updated (see the mutfile section below). We mainly use mutfiles now so I did not update this section.')
                        foundResfile = True
                        lines = zipped_content.read(fname).split("\n")
                        assert(len(lines) == 3)
                        assert(lines[0] == "NATAA")
                        assert(lines[1] == "start")
                        resfile_mutation = lines[2].split(" ")
                        assert(len(resfile_mutation) == 4)
                        rosetta_resid = resfile_mutation[0]
                        rosetta_chain = resfile_mutation[1]
                        rosetta_mutaa = resfile_mutation[3]
                        assert(mutantaa == rosetta_mutaa)
                        assert(dbchain == rosetta_chain)
                        assert(resfile_mutation[2] == 'PIKAA')
                        assert(len(rosetta_mutaa) == 1)
                    if fname.startswith("%s/%s-%s.mutfile" % (r['ID'],r['ExperimentID'],experiment_pdbID)):
                        foundMutfile = True
                        lines = zipped_content.read(fname).split("\n")
                        assert(lines[0].startswith('total '))
                        num_mutations = int(lines[0][6:])
                        assert(lines[1] == str(num_mutations))
                        # todo: note: assuming monomeric structures here
                        rosetta_chain = ddGdb.execute("SELECT Chain FROM ExperimentChain WHERE ExperimentID=%s", parameters=(r['ExperimentID'],))
                        assert(len(rosetta_chain) == 1)
                        rosetta_chain = rosetta_chain[0]['Chain']

                        resfile_mutations = lines[2:]
                        for resfile_mutation in resfile_mutations:
                            resfile_mutation = resfile_mutation.split(" ")
                            assert(len(resfile_mutation) == 3)
                            rosetta_resids.append(resfile_mutation[1])
                            rosetta_mutaa = resfile_mutation[2]
                            assert(dbchain == rosetta_chain)
                            assert(len(rosetta_mutaa) == 1)

                # Make sure the wtaa->mutantaa types match the structures
                assert(not(foundResfile))
                if not foundMutfile:
                    raise Exception('This case needs to be updated (see the mutfile section below). This was added as a hack for cases where I did not store the mutfile so I did not update this section.')
                    input_files = ddGdb.execute_select('SELECT InputFiles FROM Prediction WHERE ID=%s', parameters=(r['ID'],))
                    assert(len(input_files) == 1)
                    lines = pickle.loads(input_files[0]['InputFiles'])['MUTFILE'].split("\n")

                    #lines = regenerate_mutfile(r['ID']).split("\n")
                    assert(len(lines) == 3)
                    assert(lines[0] == "total 1")
                    assert(lines[1] == "1")
                    resfile_mutation = lines[2].split(" ")
                    assert(len(resfile_mutation) == 3)
                    rosetta_resid = resfile_mutation[1]
                    rosetta_chain = ddGdb.execute("SELECT Chain FROM ExperimentChain WHERE ExperimentID=%s", parameters=(r['ExperimentID'],))
                    assert(len(rosetta_chain) == 1)
                    rosetta_chain = rosetta_chain[0]['Chain']
                    rosetta_mutaa = resfile_mutation[2]
                    assert(dbchain == rosetta_chain)
                    assert(len(rosetta_mutaa) == 1)
                    assert("%s%s%s" % (resfile_mutation[0], resfile_mutation[1], resfile_mutation[2]) == presumed_mutation)

                fullresids = []

                for rosetta_resid in rosetta_resids:
                    fullresid = None
                    if rosetta_resid.isdigit():
                        fullresid = '%s%s%s ' % (rosetta_chain, (4-len(rosetta_resid)) * ' ', rosetta_resid)
                    else:
                        assert(False)
                        fullresid = '%s%s%s' % (rosetta_chain, (5-len(rosetta_resid)) * ' ', rosetta_resid)
                    fullresids.append(fullresid)


                resultst1 = ddGdb.execute_select("SELECT ExperimentID, UserDataSetExperimentID FROM Prediction WHERE ID=%s", parameters = (r['ID'],))
                assert(len(resultst1) == 1)
                ExperimentIDt1 = resultst1[0]['ExperimentID']
                UserDataSetExperimentIDt1 = resultst1[0]['UserDataSetExperimentID']

                if UserDataSetExperimentIDt1:
                    resultst2 = ddGdb.execute_select("SELECT PDBFileID FROM UserDataSetExperiment WHERE ID=%s", parameters = (UserDataSetExperimentIDt1,))
                else:
                    resultst2 = ddGdb.execute_select("SELECT PDBFileID FROM Experiment WHERE ID=%s", parameters = (ExperimentIDt1,))
                assert(len(resultst2) == 1)

                prediction_PDB_ID = resultst2[0]['PDBFileID']

                if False and prediction_PDB_ID not in ['1TEN', '1AYE', '1H7M'] + ['1A2P', '1BNI', '1STN']:
                    for fullresid in fullresids:
                        wtaa = None
                        for m in mutations:
                            # Hack for ub_RPN13
                            if prediction_PDB_ID == 'ub_RPN13' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) - 109):
                                wtaa = m['WildTypeAA']
                            # Hack for ub_RPN13_yeast
                            elif prediction_PDB_ID == 'uby_RPN13' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) - 109):
                                wtaa = m['WildTypeAA']
                            # Hack for ub_OTU
                            elif prediction_PDB_ID == 'ub_OTU' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) - 172):
                                wtaa = m['WildTypeAA']
                            # Hack for ub_OTU_yeast
                            elif prediction_PDB_ID == 'uby_OTU' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) - 172):
                                wtaa = m['WildTypeAA']
                            # Hack for ub_UQcon
                            elif prediction_PDB_ID == 'ub_UQcon' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) + 213): # starts at 501
                                wtaa = m['WildTypeAA']
                            # Hack for uby_UQcon
                            elif prediction_PDB_ID == 'uby_UQcon' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) - 287):
                                wtaa = m['WildTypeAA']
                            elif m['Chain'] == fullresid[0] and m['ResidueID'] == fullresid[1:].strip():
                                wtaa = m['WildTypeAA']
                        if (wtaa == None):
                            colortext.error(prediction_PDB_ID)
                            colortext.error('wtaa == None')
                            colortext.error('fullresid = %s' % str(fullresid))
                            colortext.error(str(mutations))
                            colortext.warning([rosetta_resid.strip() for rosetta_resid in rosetta_resids])
                            #sys.exit(0)
                        assert(wtaa != None)
                        assert(PDB.from_filepath(repacked_files[0]).get_residue_id_to_type_map()[fullresid] == wtaa)
                    #assert(PDB(mutant_files[0]).get_residue_id_to_type_map()[fullresid] == mutantaa)

                for radius in radii:
                    score_name = ('noah_%0.1fA' % radius).replace(".", ",")

                    if ddG_dict['data'].get(score_name):
                        print('Radius %0.1f: done.' % radius)
                        continue
                    cases_computed += 1
                    number_of_cases_left -= 1

                    t.add('Radius %0.3f: repacked' % radius)
                    colortext.printf("Prediction ID: %d. Calculating radius %0.1f. Calculation #%d of %d." % (r['ID'], radius, cases_computed, len(results) * len(radii)), 'orange')

                    repacked_score = NoahScore()
                    repacked_score.calculate(repacked_files, rosetta_chain, sorted([rosetta_resid.strip() for rosetta_resid in rosetta_resids]), radius = radius)
                    colortext.message("Repacked")
                    print(repacked_score)

                    t.add('Radius %0.3f: mutant' % radius)
                    mutant_score = NoahScore()
                    mutant_score.calculate(mutant_files, rosetta_chain, sorted([rosetta_resid.strip() for rosetta_resid in rosetta_resids]), radius = radius)
                    colortext.printf("Mutant", color = 'cyan')
                    print(mutant_score)

                    t.add('Radius %0.3f: postamble' % radius)
                    colortext.printf("ddG", color = 'lightpurple')
                    ddg_score = repacked_score.ddg(mutant_score)
                    print(ddg_score)

                    colortext.printf("Liz's ddG", color = 'yellow')
                    print("Total score: %0.3f" % kellogg_ddG)

                    ddG_dict['version'] = '0.23'
                    if ddG_dict['version'] == '0.1':
                        ddG_dict['version'] = '0.21'
                        ddG_dict['data'] = {
                            'kellogg' : {
                                'total' : ddG_dict['data'],
                            },
                            'noah': {
                                'total' : {'ddG' : ddg_score.total},
                                'positional' : {'ddG' : ddg_score.positional},
                                'positional_twoscore' : {'ddG' : ddg_score.positional_twoscore},
                            },
                        }
                    elif ddG_dict['version'] == '0.2':
                        ddG_dict['version'] = '0.21'
                        ddG_dict['data']['noah']['total']['ddG'] = ddg_score.total
                        ddG_dict['data']['noah']['positional']['ddG'] = ddg_score.positional
                        ddG_dict['data']['noah']['positional_twoscore']['ddG'] = ddg_score.positional_twoscore
                    elif ddG_dict['version'] == '0.22':
                        ddG_dict['data'][score_name] = {'total' : {}, 'positional' : {}, 'positional_twoscore' : {}}
                        ddG_dict['data'][score_name]['total']['ddG'] = ddg_score.total
                        ddG_dict['data'][score_name]['positional']['ddG'] = ddg_score.positional
                        ddG_dict['data'][score_name]['positional_twoscore']['ddG'] = ddg_score.positional_twoscore
                    elif ddG_dict['version'] == '0.23':
                        ddG_dict['data'][score_name] = {'total' : {}, 'positional' : {}, 'positional_twoscore' : {}}
                        ddG_dict['data'][score_name]['total']['ddG'] = ddg_score.total
                        ddG_dict['data'][score_name]['positional']['ddG'] = ddg_score.positional
                        ddG_dict['data'][score_name]['positional_twoscore']['ddG'] = ddg_score.positional_twoscore

                    jsonified_ddG = json.dumps(ddG_dict)
                    ddGdb.execute('UPDATE Prediction SET Scores=%s WHERE ID=%s', parameters=(jsonified_ddG, r['ID'],))
                t.add('Cleanup')
                shutil.rmtree(tmpdir)
                os.remove(zipfilename)

            except Exception, e:
                print("Exception! In prediction %d" % r['ID'], str(e))
                failed_cases.append(r['ID'])
                import traceback
                print(traceback.format_exc())
                if tmpdir:
                    shutil.rmtree(tmpdir)

            total_time_in_secs += t.sum()
            average_time_taken = float(total_time_in_secs)/float(cases_computed or 1)
            estimate_remaining_time = number_of_cases_left * average_time_taken

            t.stop()
            colortext.printf("**Profile**", 'orange')
            print(t)
            colortext.message("Time taken for this case: %0.2fs." % t.sum())
            colortext.message("Average time taken per case: %0.2fs." % average_time_taken)
            colortext.message("Estimated time remaining: %dh%dm%ds." % (int(estimate_remaining_time/3600), int((estimate_remaining_time/60) % 60), estimate_remaining_time % 60))
            print("\n")
Пример #33
0
 def printAllEvents(self, calendar_id, year = None):
     colortext.message('Events on Calendar: %s' % (self.get_calendar(calendar_id).summary))
     eventstbl = self.getEventsTable(calendar_id, year)
     for startdateTitle, details in sorted(eventstbl.iteritems()):
         startdate = startdateTitle[0]
         print(("%s -> %s at %s: %s" % (startdate, details["enddate"], details["location"][0:details["location"].find("@")], details["title"])).encode('ascii', 'ignore'))