示例#1
0
def test_similarity():
    filename_a = os.path.join("data", "276.pdb")
    filename_b = os.path.join("data", "4629.pdb")
    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)
    assert cluster.compute_similarity(
        activesite_a,
        activesite_b) == cluster.compute_similarity(activesite_b, activesite_a)
    assert cluster.compute_similarity(activesite_a, activesite_a) == 1
    assert cluster.compute_similarity(activesite_b, activesite_b) == 1
示例#2
0
def test_similarity():
    filename_a = os.path.join("data", "276.pdb")
    filename_b = os.path.join("data", "4629.pdb")

    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)

    # update this assertion
    assert cluster.compute_similarity(activesite_a,
                                      activesite_b) == 0.959349593495935
    assert cluster.compute_similarity(activesite_a, activesite_a) == 0.0
def test_similarity():
    filename_a = os.path.join("data", "276.pdb")
    filename_b = os.path.join("data", "4629.pdb")

    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)

    assert cluster.compute_similarity(activesite_a,
                                      activesite_b) == float("inf")
    assert round(cluster.compute_similarity(activesite_a, activesite_a),
                 1) == 0.0
示例#4
0
def test_similarity():
    filename_a = os.path.join("data", "276.pdb")
    filename_b = os.path.join("data", "4629.pdb")

    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)

    # testing that the distance between the two sites is as expected
    assert cluster.compute_similarity(activesite_a,
                                      activesite_b) == 26.464581285329228
    # testing the the distance between a site and itself is 0
    assert cluster.compute_similarity(activesite_a, activesite_a) == 0
示例#5
0
def test_similarity():
    filename_a = os.path.join("data", "276.pdb")
    filename_b = os.path.join("data", "4629.pdb")

    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)

    # update this assertion
    #check that the distance between A and B is the same as between B and A
    assert cluster.compute_similarity(
        activesite_a,
        activesite_b) == cluster.compute_similarity(activesite_b, activesite_a)
    #check that the distance between A and A is 0
    assert cluster.compute_similarity(activesite_a, activesite_a) == 0.0
    #check that the distance is always positive
    assert cluster.compute_similarity(activesite_a, activesite_a) >= 0.0
示例#6
0
def test_similarity():
    filename_a = os.path.join("data", "276.pdb")
    filename_b = os.path.join("data", "4629.pdb")

    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)
    assert cluster.compute_similarity(
        activesite_a.counts,
        activesite_b.counts) == np.linalg.norm(activesite_a.counts -
                                               activesite_b.counts)
    assert cluster.compute_similarity(activesite_a.counts,
                                      activesite_a.counts) == 0
    assert cluster.compute_similarity(
        activesite_a.counts,
        activesite_b.counts) == cluster.compute_similarity(
            activesite_b.counts, activesite_a.counts)
示例#7
0
def test_distance():  #test that similarity between two same ones is 100%
    filename_a = os.path.join("data", "276.pdb")
    filename_b = os.path.join("data", "276.pdb")

    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)

    # update this assertion
    assert cluster.compute_similarity(activesite_a, activesite_b) == 100.0
示例#8
0
def test_similarity():
    filename_a = os.path.join("data", "276.pdb")
    filename_b = os.path.join("data", "4629.pdb")

    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)

    # update this assertion

    #First test: reflexivity of this distance metric
    assert cluster.compute_similarity(activesite_a, activesite_a) == 0.0

    #Second test: symmetric
    trans_1 = cluster.compute_similarity(activesite_a, activesite_b)
    trans_2 = cluster.compute_similarity(activesite_b, activesite_a)
    assert trans_1 == trans_2

    #Third test: non-negativity
    assert trans_1, trans_2 >= 0
示例#9
0
def test_similarity():
    filename_a = os.path.join("data", "276.pdb")
    filename_b = os.path.join("data", "4629.pdb")

    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)

    assert cluster.compute_similarity(activesite_a,
                                      activesite_b,
                                      comparator="hydrophobicity") == 11.1556
示例#10
0
def test_similarity():
    filename_a = os.path.join("data", "46495.pdb")
    filename_b = os.path.join("data", "23812.pdb")

    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)

    cluster.get_order_residues([activesite_a])
    cluster.get_order_residues([activesite_b])
    # update this assertion
    assert (cluster.compute_similarity(activesite_a, activesite_b)) == 13
示例#11
0
def assign_single_site_to_cluster(site, centroids):
    '''
    check which cluster centroid is closest to the given site and assign the
    site to that cluster
    '''
    loc = site.counts
    dists = {}
    for c in centroids.keys():
        dist = cl.compute_similarity(loc, centroids[c])
        dists[dist] = c
    closest = dists[min(dists.keys())]
    return closest
示例#12
0
def test_similarity():
    filename_a = os.path.join("data", "276.pdb")
    filename_b = os.path.join("data", "4629.pdb")
    activesite_a = io.read_active_site(filename_a)
    activesite_b = io.read_active_site(filename_b)

    # dist(a,b) == expected
    assert round(compute_similarity(activesite_a, activesite_b), 3) == 13.857
    # dist(a,a) == 0
    assert compute_similarity(activesite_a, activesite_a) == 0
    # dist(a,b) == dist(b,a)
    assert compute_similarity(activesite_a,
                              activesite_b) == compute_similarity(
                                  activesite_b, activesite_a)

    # sign(dist(a,b)) == +
    active_sites = read_active_sites("data")
    for i in range(len(active_sites)):
        for j in range(len(active_sites)):
            if i != j:
                assert compute_similarity(active_sites[i], active_sites[j]) > 0
示例#13
0
def compute_similarity_matrix(sites):
    ''' copy of computer similarity matrix from utils '''

    simMat = []
    names = []
    for i in range(len(sites)):
        names.append(sites[i].name)
        row = []
        for j in range(len(sites)):
            row.append(cl.compute_similarity(sites[i].counts, sites[j].counts))
        simMat.append(row)
    simMat = pd.DataFrame(simMat, columns=names, index=names)

    return simMat
示例#14
0
def compute_new_cluster_sim(new_clust_avg, simMat_update, sites_dict,
                            clusters):
    '''
    compute the similarity of the newly created cluster to the rest of the clusters
    '''
    newSim = []
    for site in simMat_update.columns:
        if site not in sites_dict:
            s = compute_cluster_center(clusters[site], sites_dict, aa_df)
        else:
            s = sites_dict[site].counts

        newSim.append(cl.compute_similarity(new_clust_avg, s))
    newSim.append(0.0)
    return newSim
示例#15
0
def compute_similarity_matrix(sites):
    '''
    Compute the similarity matrix of all sites by all sites using the similarity algorithm in the cluster.py script
    In: list of sites
    out: similarity matrix as pandas dataframe
    '''
    simMat = []
    names = []
    for i in range(len(sites)):
        names.append(sites[i].name)
        row = []
        for j in range(len(sites)):
            row.append(cl.compute_similarity(sites[i].counts, sites[j].counts))
        simMat.append(row)
    simMat = pd.DataFrame(simMat, columns=names, index=names)

    return simMat
示例#16
0
def check_change_in_centroids(old_centroids, new_centroids):
    ''' check how far the centroids have moved '''
    diff = 0
    for c in old_centroids.keys():
        diff += cl.compute_similarity(old_centroids[c], new_centroids[c])
    return diff