Пример #1
0
def get_codebooks(instances, sizes):
    codebooks = list()

    list_sampled_features = list()
    n = len(instances)
    for x, instance in enumerate(instances):
        if x % 10 == 0:
            print "Processing (%d/%d) %.2f%% Images" % (x, n, 100. * x / n)
        deses = calc_surf_features(instance)[1]
        list_sampled_features.append(deses)
    sampled_features = np.concatenate(list_sampled_features)

    for size in sizes:
        np.random.shuffle(sampled_features)
        subset = sampled_features[:_num_surf_features_codebook]

        distances = scipy.spatial.distance.pdist(subset, 'cityblock')
        distances = scipy.spatial.distance.squareform(distances)

        indices = kmedoids.cluster(distances,
                                   k=size,
                                   maxIters=_max_k_medoids_iters)[1]
        codebook = subset[indices]
        codebooks.append(codebook)
    return codebooks
Пример #2
0
def worker(X, k, round):
    """multithreading worker"""
    key = methodName + "/" + dataset + "/individuals/" + "/" + methodName + "_" + dataset + "_k_" + str(
        k) + "_round_" + str(round)
    file = key + "_labels.csv"
    if (os.path.exists(file)):
        return
    labels, medoids = kmedoids.cluster(X, k)
    np.savetxt(file, labels, fmt="%d")
Пример #3
0
def construct_codebook(instances, codebook_size):

	surf_feature_samples = sample_surf_features(instances)

	distances = scipy.spatial.distance.pdist(surf_feature_samples, 'cityblock')
	distances = scipy.spatial.distance.squareform(distances)

	indices = kmedoids.cluster(distances, k=codebook_size, maxIters=_max_k_medoids_iters)[1]
	codebook = surf_feature_samples[indices]

	return codebook
Пример #4
0
def construct_codebook(instances, codebook_size):

    surf_feature_samples = sample_surf_features(instances)

    distances = scipy.spatial.distance.pdist(surf_feature_samples, 'cityblock')
    distances = scipy.spatial.distance.squareform(distances)

    indices = kmedoids.cluster(distances,
                               k=codebook_size,
                               maxIters=_max_k_medoids_iters)[1]
    codebook = surf_feature_samples[indices]

    return codebook
def create_codebook(indir):
    if _read_cache and os.path.exists(_cache_codebook_file):
        print "\tReading Cache"
        f = open(_cache_codebook_file)
        try:
            codebook = cPickle.load(f)
            f.close()
            print "Done\n"
            return codebook
        except Exception as e:
            print "\tError loading codebook:", e
            print "\tComputing From scratch"
            f.close()

    # sample some files
    im_files = list()
    for subdir in os.listdir(indir):
        rdir = os.path.join(indir, subdir)
        for f in os.listdir(rdir):
            if f.endswith(".jpg"):
                img_file = os.path.join(rdir, f)
                im_files.append(img_file)
    codebook_files = random.sample(
        im_files, int(_perc_docs_for_codebook * len(im_files)))

    # construct the codebook
    surfs = np.concatenate(
        map(lambda x: extract_surf_features(x)[1], codebook_files))
    np.random.shuffle(surfs)
    surfs = surfs[:_max_surf_features]
    distances = scipy.spatial.distance.pdist(surfs, 'cityblock')
    distances = scipy.spatial.distance.squareform(distances)
    indices = kmedoids.cluster(distances,
                               k=_codebook_size,
                               maxIters=_max_k_medoids_iters)[1]
    codebook = surfs[indices]

    if _write_cache:
        print "\tWriting Codebook to Cache"
        f = open(_cache_codebook_file, 'w')
        try:
            cPickle.dump(codebook, f)
        except Exception as e:
            print "\tCould not write to cache:", e
        f.close()
    print "Done\n"

    return codebook
Пример #6
0
def construct_codebook(instances):
    print "Constructing Codebook"
    if _read_cache and os.path.exists(_cache_codebook_file):
        print "\tReading Cache"
        f = open(_cache_codebook_file)
        try:
            codebook = cPickle.load(f)
            f.close()
            print "Done\n"
            return codebook
        except Exception as e:
            print "\tError loading codebook:", e
            print "\tComputing From scratch"
            f.close()

    surf_feature_samples = sample_surf_features(instances)
    print "\tNumber of SURFs for codebook construction: ", surf_feature_samples.shape[
        0]

    if _use_k_medoids:
        print "\tComputing Distances"
        distances = scipy.spatial.distance.pdist(surf_feature_samples,
                                                 'cityblock')
        distances = scipy.spatial.distance.squareform(distances)
        print "\tDone\n"

        print "\tRunning Kmedoids"
        indices = kmedoids.cluster(distances,
                                   k=_codebook_size,
                                   maxIters=_max_k_medoids_iters)[1]
        codebook = surf_feature_samples[indices]
        print "\tDone\n"
    else:
        codebook = surf_feature_samples[:_codebook_size]

    if _write_cache:
        print "\tWriting Codebook to Cache"
        f = open(_cache_codebook_file, 'w')
        try:
            cPickle.dump(codebook, f)
        except Exception as e:
            print "\tCould not write to cache:", e
        f.close()
    print "Done\n"
    return codebook
def create_codebook(indir):
	if _read_cache and os.path.exists(_cache_codebook_file):
		print "\tReading Cache"
		f = open(_cache_codebook_file)
		try:
			codebook = cPickle.load(f)
			f.close()
			print "Done\n"
			return codebook
		except Exception as e:
			print "\tError loading codebook:", e
			print "\tComputing From scratch"
			f.close()

	# sample some files
	im_files = list()
	for subdir in os.listdir(indir):
		rdir = os.path.join(indir, subdir)
		for f in os.listdir(rdir):
			if f.endswith(".jpg"):
				img_file = os.path.join(rdir, f)
				im_files.append(img_file)
	codebook_files = random.sample(im_files, int(_perc_docs_for_codebook * len(im_files)))

	# construct the codebook
	surfs = np.concatenate(map(lambda x: extract_surf_features(x)[1], codebook_files))
	np.random.shuffle(surfs)
	surfs = surfs[:_max_surf_features]
	distances = scipy.spatial.distance.pdist(surfs, 'cityblock')
	distances = scipy.spatial.distance.squareform(distances)
	indices = kmedoids.cluster(distances, k=_codebook_size, maxIters=_max_k_medoids_iters)[1]
	codebook = surfs[indices]

	if _write_cache:
		print "\tWriting Codebook to Cache"
		f = open(_cache_codebook_file, 'w')
		try:
			cPickle.dump(codebook, f)
		except Exception as e:
			print "\tCould not write to cache:", e
		f.close()
	print "Done\n"

	return codebook
Пример #8
0
def get_codebooks(instances, sizes):
	codebooks = list()

	list_sampled_features = list()
	n = len(instances)
	for x,instance in enumerate(instances):
		if x % 10 == 0:
			print "Processing (%d/%d) %.2f%% Images" % (x, n, 100. * x / n)
		deses = calc_surf_features(instance)[1]
		list_sampled_features.append(deses)
	sampled_features = np.concatenate(list_sampled_features)

	for size in sizes:
		np.random.shuffle(sampled_features)
		subset = sampled_features[:_num_surf_features_codebook]

		distances = scipy.spatial.distance.pdist(subset, 'cityblock')
		distances = scipy.spatial.distance.squareform(distances)

		indices = kmedoids.cluster(distances, k=size, maxIters=_max_k_medoids_iters)[1]
		codebook = subset[indices]
		codebooks.append(codebook)
	return codebooks
Пример #9
0
class cleavage_site(object):
    index = 0
    name = ""
    structure = ""

    def _init(self):
        """"""

cleavage_site_list = pickle.load(open("F:\\JIC\\FoldTest\\cleavage_site_list.pickle","r"))

subset = cleavage_site_list[0:16]



upper_triangle = []
distances = np.zeros((16, 16))

for i in range(0,16):#column
    for j in range(i + 1,16):
        
        distance = RNADistance.RNADistance(subset[i].structure,subset[j].structure) * 1.0 + random.random()
        distances[i,j] = distance
        distances[j,i] = distance
        upper_triangle.append(distance)

distances = np.loadtxt("U:\\JICWork\\rna_distance_matrix_71_wt.txt")

import kmedoids
points, medoids = kmedoids.cluster(distances,3)
Пример #10
0
#

# Fazer um teste com mini-batch k-means
#
#

miniKmeans = MiniBatchKMeans(n_clusters=100)
label_mini = miniKmeans.fit_predict(X_treino)
print('Mudando para MiniBatchKMeans temos: ',
      silhouette_score(X_treino, label_mini), '\n')

# MEDOIDS
# https://github.com/salspaugh/machine_learning/blob/master/clustering/kmedoids.py

distancia_treino = pairwise_distances(X_treino, metric='euclidean')
clusters, medoids = kmedoids.cluster(distancia_treino, 100)
print('Numero de clusters: ', i)
print('Mudando para K-Medoids temos: ',
      silhouette_score(distancia_treino, clusters), '\n')

#	APLICANDO PCA
#
#

for i in range(1, 100):
    pca = PCA(n_components=i / 100, svd_solver='full')
    X_treino_pca = pca.fit_transform(X_treino)
    kmeans_pca = KMeans(n_clusters=78)
    label_teste = kmeans_pca.fit_predict(X_treino_pca)

    #print('Aplicando PCA no nosso melhor modelo com clusters:', 78, 'e variancia: ', i/10)
Пример #11
0
def cluster_rows_in_k(kdata, k):
    distances = compute_distances_in_k(kdata)
    return kmedoids.cluster(distances, k=k)
Пример #12
0
def cluster_rows_in_k(kdata, k):
    distances = compute_distances_in_k(kdata)
    return kmedoids.cluster(distances, k=k)
Пример #13
0
for round in range(0, 6):
    #method = KMeans(n_clusters = 3)
    #method.fit(data)
    #centroids = method.cluster_centers_
    #plt.scatter(centroids[:, 0], centroids[:, 1], color = colors[round],s=169,  marker = markers[round], linewidths=3, facecolor="None")

    #labels = method.labels_
    #centroids = []
    #for i in range(0,method.n_clusters):
    #    center = np.mean( data[labels == i],0)
    #    centroids.append(center)
    #    plt.scatter(center[0], center[1], color = colors[round],s=169,  marker =markers[round], linewidths=3, facecolor="None")

    #plt.scatter(centroids[:, 0], centroids[:, 1], color = colors[i], s=169,  marker = "x", linewidths=3)
    labels, medoids = kmedoids.cluster(distances, 3)
    m_x = []
    m_y = []
    for m in medoids:
        m_x.append(data[m][0])
        m_y.append(data[m][1])
        plt.scatter(m_x,
                    m_y,
                    color=colors[round],
                    s=169,
                    marker=markers[round],
                    linewidths=3,
                    facecolor="None")

plt.title('spectral clustering and evenly distributed data')
#plt.xlim(x_min, x_max)