import imtools from PIL import Image from pylab import * imlist = imtools.get_imlist('/Users/thakis/Downloads/data/flickr-sunsets-small') # extract histogram as feature vector (8 bins per color channel) features = zeros([len(imlist), 512]) for i, f in enumerate(imlist): im = array(Image.open(f)) h, edges = histogramdd(im.reshape(-1, 3), 8, normed=True, range=[(0,255), (0, 255), (0, 255)]) features[i] = h.flatten() tree = hcluster.hcluster(features) hcluster.draw_dendrogram(tree, imlist, filename='out_sunset.png') # visualize clusters clusters = tree.extract_clusters(dist=0.23 * tree.distance) for c in clusters: elements = c.get_cluster_elements() if len(elements) > 3: figure() for p in range(minimum(len(elements), 20)): subplot(4, 5, p + 1) im = array(Image.open(imlist[elements[p]])) imshow(im) axis('off') show()
from PIL import Image from pylab import * import imtools import pickle from scipy.cluster.vq import * # 画像のリストを得る imlist = imtools.get_imlist('selected_fontimages/') imnbr = len(imlist) # モデルのファイルを読み込む with open('font_pca_modes.pkl','rb') as f: immean = pickle.load(f) V = pickle.load(f) # 平板化した画像を格納する行列を作る immatrix = array([array(Image.open(im)).flatten() for im in imlist],'f') # 第40主成分までを射影する immean = immean.flatten() projected = array([dot(V[:40],immatrix[i]-immean) for i in range(imnbr)]) import hcluster tree = hcluster.hcluster(projected) hcluster.draw_dendrogram(tree,imlist,filename='fonts.jpg')
imlist = [os.path.join(path,f) for f in os.listdir(path) if f.endswith('.jpg')] # 特徴量ベクトルを抽出する(色チャンネルに8つのビン) features = zeros([len(imlist), 512]) for i,f in enumerate(imlist): im = array(Image.open(f)) # 多次元ヒストグラム h,edges = histogramdd(im.reshape(-1,3),8,normed=True, range=[(0,255),(0,255),(0,255)]) features[i] = h.flatten() tree = hcluster.hcluster(features) hcluster.draw_dendrogram(tree,imlist,filename='sunset.pdf') # 任意の閾値についてクラスタを可視化する clusters = tree.extract_clusters(0.23*tree.distance) # 3要素以上のクラスタの画像を描画する for c in clusters: elements = c.get_cluster_elements() nbr_elements = len(elements) if nbr_elements>3: figure() for p in range(minimum(nbr_elements,20)): subplot(4,5,p+1) im = array(Image.open(imlist[elements[p]])) imshow(im) axis('off')
imlist = imtools.get_imlist( '/Users/thakis/Downloads/data/flickr-sunsets-small') # extract histogram as feature vector (8 bins per color channel) features = zeros([len(imlist), 512]) for i, f in enumerate(imlist): im = array(Image.open(f)) h, edges = histogramdd(im.reshape(-1, 3), 8, normed=True, range=[(0, 255), (0, 255), (0, 255)]) features[i] = h.flatten() tree = hcluster.hcluster(features) hcluster.draw_dendrogram(tree, imlist, filename='out_sunset.png') # visualize clusters clusters = tree.extract_clusters(dist=0.23 * tree.distance) for c in clusters: elements = c.get_cluster_elements() if len(elements) > 3: figure() for p in range(minimum(len(elements), 20)): subplot(4, 5, p + 1) im = array(Image.open(imlist[elements[p]])) imshow(im) axis('off') show()
imlist = [ os.path.join(path, f) for f in os.listdir(path) if f.endswith('.jpg') ] # extract feature vector (8 bins per color channel) features = zeros([len(imlist), 512]) for i, f in enumerate(imlist): im = array(Image.open(f)) # multi-dimensional histogram h, edges = histogramdd(im.reshape(-1, 3), 8, normed=True, range=[(0, 255), (0, 255), (0, 255)]) features[i] = h.flatten() tree = hcluster.hcluster(features) hcluster.draw_dendrogram(tree, imlist, filename='sunset.pdf') # visualize clusters with some (arbitrary) threshold clusters = tree.extract_clusters(0.23 * tree.distance) # plot images for clusters with more than 3 elements for c in clusters: elements = c.get_cluster_elements() nbr_elements = len(elements) if nbr_elements > 3: figure() for p in range(minimum(nbr_elements, 20)): subplot(4, 5, p + 1) im = array(Image.open(imlist[elements[p]])) imshow(im) axis('off') show() tree = hcluster.hcluster(projected)
def draw_tree(observations, filename): node = hcluster([o[0] for o in observations]) draw_dendrogram(node, [o[1] for o in observations], filename) return filename