def compareDistances(actives, decoys): actives_fps = utilities.getFingerprintList(actives)[0] decoys_fps = utilities.getFingerprintList(decoys)[0] distances_min = [] distances_max = [] for decoy in decoys_fps: dists = utilities.getMolDistFromSet(decoy, actives_fps) distances_min.append(dists[0]) distances_max.append(dists[1]) return numpy.mean(distances_min), numpy.mean(distances_max)
def playWithResults(results, decoys, actives_test_set): actives_test_set_fps, keys = utilities.getFingerprintList(actives_test_set) actives_test_set_pic50 = [-1.0 * numpy.log10(actives_test_set[chmblid]["ic50"] / 10e9) for chmblid in keys] actives_test_set_fps = numpy.asarray(actives_test_set_fps) actives_test_set_pic50 = numpy.asarray(actives_test_set_pic50) # keys = decoys.keys() # decoys_fingerprint_data = [decoys[cmpnd_id]['fingerprint'] for cmpnd_id in keys] # decoys_fingerprint_data = numpy.asarray(decoys_fingerprint_data) # zeros = [10.75 for x in keys] # best model from cross-validation best_model_idx = results["scores"].index(max(results["scores"])) print "Best score: " + str(results["scores"][best_model_idx]) print "Average score: " + str(numpy.mean(results["scores"])) # predicted_best = results['predicted_values'][best_model_idx] # true_best = results['true_values'][best_model_idx] # final model on training set final_model = results["final_model"] predicted_train = final_model.predict(results["fingerprint_data"]) print "Score of final model on the molecules from the training set: " + str( final_model.score(results["fingerprint_data"], results["activity_data"]) ) # predictions_all = final_model.predict(results['fingerprint_data_validation_set']) # predictions_decoys = final_model.predict(decoys_fingerprint_data) predictions_test_set = final_model.predict(actives_test_set_fps) # print "Score of final model on the validation set: " + str(final_model.score(results['fingerprint_data_validation_set'], results['activity_data_validation_set'])) print "Score of final model on the molecules filtered out during clustering: " + str( final_model.score(actives_test_set_fps, actives_test_set_pic50) ) span = (min(results["activity_data"]) - 0.25, max(results["activity_data"] + 0.25)) plt.plot((span[0], span[1]), (span[0], span[1]), linestyle="--") # plt.plot(results['activity_data_validation_set'], predictions_all, marker='o', linestyle='None', label="Validation set performance") # plt.plot(true_best, predicted_best, marker='+', linestyle='None', label="Performance of the best model in the particular X-validation step") # plt.plot(zeros, predictions_decoys, marker='o', linestyle='None', label="decoys") plt.plot( results["activity_data"], predicted_train, marker="o", linestyle="None", label="Performance on the training set" ) plt.plot( actives_test_set_pic50, predictions_test_set, marker="o", linestyle="None", label="Performance on the molecules filtered out during clustering (validation set)", ) plt.xlabel("True values") plt.ylabel("Predicted values") plt.ylim(span) plt.xlim(span) plt.legend() plt.show()
def predict(classmodel, regressmodel, molfile_path): print "Starting predictions for: " + molfile_path suppl = Chem.SDMolSupplier(molfile_path) mols = dict() for mol in suppl: pmol = PropertyMol.PropertyMol(mol) mols[pmol.GetProp("_Name")] = {"RDKit" : pmol} fingerprinter.appendMorganFingerprints(mols, dump=None) actives = pickle.load(open(ACTIVES_DUMP, 'rb')) found_sth = False for mol in mols: prediction = classmodel.predict(mols[mol]['fingerprint']) fingerprints_actives = utilities.getFingerprintList(actives)[0] min_distance = utilities.getMolDistFromSet(mols[mol]['fingerprint'], fingerprints_actives)[0] if min_distance <= APPLICABILITY_DOMAIN_DISTANCE_THRESHOLD and prediction[0]: print mol + " is active" print "Predicted pIC50: " + str(regressmodel.predict(mols[mol]['fingerprint'])[0]) found_sth = True if not found_sth: print "None of the molecules within the specified set was found to be active."
def estimateDistanceThreshold(mols): fps = utilities.getFingerprintList(mols)[0] dists = utilities.generateDistMatrix(fps)[0] return numpy.median(dists)