Exemplo n.º 1
0
def compareDistances(actives, decoys):
    actives_fps = utilities.getFingerprintList(actives)[0]
    decoys_fps = utilities.getFingerprintList(decoys)[0]
    distances_min = []
    distances_max = []
    for decoy in decoys_fps:
        dists = utilities.getMolDistFromSet(decoy, actives_fps)
        distances_min.append(dists[0])
        distances_max.append(dists[1])
    return numpy.mean(distances_min), numpy.mean(distances_max)
Exemplo n.º 2
0
def playWithResults(results, decoys, actives_test_set):
    actives_test_set_fps, keys = utilities.getFingerprintList(actives_test_set)
    actives_test_set_pic50 = [-1.0 * numpy.log10(actives_test_set[chmblid]["ic50"] / 10e9) for chmblid in keys]
    actives_test_set_fps = numpy.asarray(actives_test_set_fps)
    actives_test_set_pic50 = numpy.asarray(actives_test_set_pic50)

    # keys = decoys.keys()
    # decoys_fingerprint_data = [decoys[cmpnd_id]['fingerprint'] for cmpnd_id in keys]
    # decoys_fingerprint_data = numpy.asarray(decoys_fingerprint_data)
    # zeros = [10.75 for x in keys]

    # best model from cross-validation
    best_model_idx = results["scores"].index(max(results["scores"]))
    print "Best score: " + str(results["scores"][best_model_idx])
    print "Average score: " + str(numpy.mean(results["scores"]))
    # predicted_best = results['predicted_values'][best_model_idx]
    # true_best = results['true_values'][best_model_idx]

    # final model on training set
    final_model = results["final_model"]
    predicted_train = final_model.predict(results["fingerprint_data"])
    print "Score of final model on the molecules from the training set: " + str(
        final_model.score(results["fingerprint_data"], results["activity_data"])
    )

    # predictions_all = final_model.predict(results['fingerprint_data_validation_set'])
    # predictions_decoys = final_model.predict(decoys_fingerprint_data)
    predictions_test_set = final_model.predict(actives_test_set_fps)
    # print "Score of final model on the validation set: " + str(final_model.score(results['fingerprint_data_validation_set'], results['activity_data_validation_set']))
    print "Score of final model on the molecules filtered out during clustering: " + str(
        final_model.score(actives_test_set_fps, actives_test_set_pic50)
    )

    span = (min(results["activity_data"]) - 0.25, max(results["activity_data"] + 0.25))

    plt.plot((span[0], span[1]), (span[0], span[1]), linestyle="--")
    # plt.plot(results['activity_data_validation_set'], predictions_all, marker='o', linestyle='None', label="Validation set performance")
    # plt.plot(true_best, predicted_best, marker='+', linestyle='None', label="Performance of the best model in the particular X-validation step")
    # plt.plot(zeros, predictions_decoys, marker='o', linestyle='None', label="decoys")
    plt.plot(
        results["activity_data"], predicted_train, marker="o", linestyle="None", label="Performance on the training set"
    )
    plt.plot(
        actives_test_set_pic50,
        predictions_test_set,
        marker="o",
        linestyle="None",
        label="Performance on the molecules filtered out during clustering (validation set)",
    )
    plt.xlabel("True values")
    plt.ylabel("Predicted values")
    plt.ylim(span)
    plt.xlim(span)
    plt.legend()
    plt.show()
Exemplo n.º 3
0
def predict(classmodel, regressmodel, molfile_path):
    print "Starting predictions for: " + molfile_path
    suppl = Chem.SDMolSupplier(molfile_path)
    mols = dict()
    for mol in suppl:
        pmol = PropertyMol.PropertyMol(mol)
        mols[pmol.GetProp("_Name")] = {"RDKit" : pmol}
    fingerprinter.appendMorganFingerprints(mols, dump=None)
    actives = pickle.load(open(ACTIVES_DUMP, 'rb'))

    found_sth = False
    for mol in mols:
        prediction = classmodel.predict(mols[mol]['fingerprint'])
        fingerprints_actives = utilities.getFingerprintList(actives)[0]
        min_distance = utilities.getMolDistFromSet(mols[mol]['fingerprint'], fingerprints_actives)[0]
        if min_distance <= APPLICABILITY_DOMAIN_DISTANCE_THRESHOLD and prediction[0]:
            print  mol + " is active"
            print "Predicted pIC50: " + str(regressmodel.predict(mols[mol]['fingerprint'])[0])
            found_sth = True
    if not found_sth:
        print "None of the molecules within the specified set was found to be active."
Exemplo n.º 4
0
def estimateDistanceThreshold(mols):
    fps = utilities.getFingerprintList(mols)[0]
    dists = utilities.generateDistMatrix(fps)[0]
    return numpy.median(dists)