'stingray': 'Hexatrygonidae', 'wolf': 'Canis lupus', 'vampire': 'Desmodus rotundus', 'wasp': 'Hymenoptera', 'tuna': 'Thunnini', 'tortoise': 'Testudinidae', 'termite': 'Termitidae', 'vole': 'Arvicolinae', 'tuatara': 'Hatteria punctata', 'wren': 'Troglodytidae', } def convert(a): return CONVERSION.get(a, a) X, y = load('zoo') if os.path.exists('./ids.pkl'): with open('./ids.pkl', 'rb') as fp: ids = pickle.load(fp) else: tax = api.taxomachine animals = [convert(a.replace('+', ' ')) for a in y] ids = [] for animal in tqdm(animals): result = tax.TNRS([animal])['results'] if len(result) == 0: print "Failed:", animal result = result[0] match = result['matches'][0] ids.append(match['ot:ottId']) with open('./ids.pkl', 'wb') as fp:
import seaborn as sns sns.set_style("white") import logging logging.basicConfig(level=logging.INFO) import cPickle as pickle import numpy as np from sklearn.decomposition import PCA from path import Path from tqdm import tqdm from trees.data import load from trees.interact import Database, Interactor from trees.ddt import GaussianLikelihoodModel, DirichletDiffusionTree, Inverse from trees.mcmc import MetropolisHastingsSampler mnist = load('mnist') database = Database('mnist') interactor = Interactor(mnist, database) X, y = mnist.X, mnist.y X = X.astype(np.float32) X /= 255.0 X -= X.mean(axis=0) logging.debug("Finding PCA...") pca_path = Path("pca.pkl") if pca_path.exists(): with open(pca_path, 'rb') as fp: pca = pickle.load(fp)
from cStringIO import StringIO import numpy as np import logging # logging.basicConfig(level=logging.DEBUG) import matplotlib.pyplot as plt import seaborn as sns from trees.dasgupta import DasguptaTree from trees.mcmc import SPRSampler from scipy.spatial.distance import pdist, squareform from tqdm import tqdm from itertools import combinations from trees.data import load from sklearn.decomposition import PCA data = load('zoo') X, y = data.X, data.y pca = PCA(10) X = pca.fit_transform(X) X += np.random.normal(size=X.shape) * 0.01 N = X.shape[0] np.random.seed(0) # idx = np.random.permutation(np.arange(N))[:20] # X = X[idx] # y = np.array(y) # y = y[idx] D = 1.0 / squareform(pdist(X))
pickle.dump(costs, fp) # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp: # previous_trees = pickle.load(fp) # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp: # pickle.dump(previous_trees + [t.get_state() for t in trees], fp) with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp: pickle.dump(sampler.tree.get_state(), fp) return costs, scores, sampler if __name__ == "__main__": args = parse_args() out_dir = Path(args.out_dir) / args.data out_dir.mkdir_p() dataset_name = args.data dataset = load(dataset_name) X, y = dataset.X, dataset.y if dataset_name == 'mnist' or dataset_name == 'iris' or dataset_name == '20news': np.random.seed(0) idx = np.random.permutation(xrange(X.shape[0]))[:args.subset] X = X[idx] y = y[idx] if dataset_name == 'mnist' or dataset_name == '20news': pca = PCA(10) X = pca.fit_transform(X) if dataset_name == 'zoo': # pca = PCA(5) # X = pca.fit_transform(X) X += np.random.normal(size=X.shape) * 0.01
import numpy as np # logging.basicConfig(level=logging.DEBUG) import matplotlib.pyplot as plt import seaborn as sns from trees.mcmc import MetropolisHastingsSampler from trees.ddt import * from scipy.spatial.distance import pdist, squareform from tqdm import tqdm from itertools import combinations import cPickle as pickle import random from trees.data import load from sklearn.decomposition import PCA data = load('zoo') X, y = data.X, data.y pca = PCA(10) X = pca.fit_transform(X) X += np.random.normal(size=X.shape) * 0.01 N = X.shape[0] np.random.seed(0) # idx = np.random.permutation(np.arange(N))[:20] # X = X[idx] # y = np.array(y) # y = y[idx] N, D = X.shape
'vampire': 'Desmodus rotundus', 'wasp': 'Hymenoptera', 'tuna': 'Thunnini', 'tortoise': 'Testudinidae', 'termite': 'Termitidae', 'vole': 'Arvicolinae', 'tuatara': 'Hatteria punctata', 'wren': 'Troglodytidae', } def convert(a): return CONVERSION.get(a, a) X, y = load('zoo') if os.path.exists('./ids.pkl'): with open('./ids.pkl', 'rb') as fp: ids = pickle.load(fp) else: tax = api.taxomachine animals = [convert(a.replace('+', ' ')) for a in y] ids = [] for animal in tqdm(animals): result = tax.TNRS([animal])['results'] if len(result) == 0: print "Failed:", animal result = result[0] match = result['matches'][0] ids.append(match['ot:ottId']) with open('./ids.pkl', 'wb') as fp:
# with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp: # previous_trees = pickle.load(fp) # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp: # pickle.dump(previous_trees + [t.get_state() for t in trees], fp) with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp: pickle.dump(sampler.tree.get_state(), fp) return costs, scores, sampler if __name__ == "__main__": args = parse_args() out_dir = Path(args.out_dir) / args.data out_dir.mkdir_p() dataset_name = args.data dataset = load(dataset_name) X, y = dataset.X, dataset.y if dataset_name == 'mnist' or dataset_name == 'iris' or dataset_name == '20news': np.random.seed(0) idx = np.random.permutation(xrange(X.shape[0]))[:args.subset] X = X[idx] y = y[idx] if dataset_name == 'mnist' or dataset_name == '20news': pca = PCA(10) X = pca.fit_transform(X) if dataset_name == 'zoo': # pca = PCA(5) # X = pca.fit_transform(X) X += np.random.normal(size=X.shape) * 0.01