def _processFashionMNIST(): pfile = getPYDIR()+'/datasets/fashion_mnist/proc-fashion_mnist.h5' DIR = os.path.dirname(pfile) createIfAbsent(DIR) if not os.path.exists(os.path.join(DIR,'train-images-idx3-ubyte.gz')): print 'Downloading data' urllib.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz',os.path.join(DIR,'train-images-idx3-ubyte.gz')) urllib.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz',os.path.join(DIR,'train-labels-idx1-ubyte.gz')) urllib.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz',os.path.join(DIR,'t10k-images-idx3-ubyte.gz')) urllib.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz',os.path.join(DIR,'t10k-labels-idx1-ubyte.gz')) if os.path.exists(pfile): print 'Found: ',pfile return pfile print DIR X, Y= readData(os.path.join(DIR,'train-images-idx3-ubyte.gz'), os.path.join(DIR,'train-labels-idx1-ubyte.gz')) np.random.seed(0) idxshuf = np.random.permutation(X.shape[0]) valid_idx = idxshuf[:10000] train_idx = idxshuf[10000:] train_x, train_y = np.clip(X[train_idx]/255., a_min=0.0, a_max=1.0), Y[train_idx] valid_x, valid_y = np.clip(X[valid_idx]/255., a_min=0.0, a_max=1.0), Y[valid_idx] test_x, test_y = readData(os.path.join(DIR,'t10k-images-idx3-ubyte.gz'), os.path.join(DIR,'t10k-labels-idx1-ubyte.gz')) test_x = np.clip(test_x/255., a_min=0.0, a_max=1.0) print 'Processing Fashion MNIST' h5f = h5py.File(pfile, mode='w') h5f.create_dataset('train',data = train_x) h5f.create_dataset('train_y',data = train_y) h5f.create_dataset('test' ,data = test_x) h5f.create_dataset('test_y' ,data = test_y) h5f.create_dataset('valid',data = valid_x) h5f.create_dataset('valid_y',data = valid_y) h5f.close() for dd in [train_x, train_y, valid_x, valid_y, test_x, test_y]: print dd.shape, dd.min(), dd.max() print 'Done processing Fashion MNIST....',pfile return pfile
sys.path.append('../') """ Change this to modify the loadDataset function """ from load import loadDataset """ This will contain a hashmap where the parameters correspond to the default ones modified by any command line options given to this script """ from parse_args_dkf import parse params = parse() """ Some utility functions from theanomodels """ from utils.misc import removeIfExists, createIfAbsent, mapPrint, saveHDF5, displayTime """ Load the dataset into a hashmap. See load.py for details """ dataset = loadDataset() params['savedir'] += '-template' createIfAbsent(params['savedir']) """ Add dataset and NADE parameters to "params" which will become part of the model """ for k in ['dim_observations', 'data_type']: params[k] = dataset[k] mapPrint('Options: ', params) if params['use_nade']: params['data_type'] = 'binary_nade' """ import DKF + learn/evaluate functions """ start_time = time.time() from stinfmodel.dkf import DKF import stinfmodel.learning as DKF_learn import stinfmodel.evaluate as DKF_evaluate
from optvaemodels.vae import VAE import optvaemodels.vae_learn as VAE_learn import optvaemodels.vae_evaluate as VAE_evaluate import optvaemodels.vae_learn as VAE_learn from optvaedatasets.load import loadDataset models, epochval = OrderedDict(), OrderedDict() models[ 'wikicorp-pl-2-finopt'] = './chkpt-wikicorp-finopt/VAE_lr-8_0e-04-ph-400-qh-400-ds-100-pl-2-ql-2-nl-relu-bs-500-ep-52-plr-1_0e-02-ar-0-otype-finopt-ns-100-etype-mlp-ll-mult-itype-tfidfl20_01_-uid' epochval['wikicorp-pl-2-finopt'] = '50' MODELS_TO_USE = models.keys() print 'Evaluating on: ', MODELS_TO_USE SAVEDIR = './evaluate_if_dir/' createIfAbsent(SAVEDIR) DNAME = '' dataset_wiki = loadDataset('wikicorp') additional_attrs_wiki = {} def getTF(dataset): tfidf = TfidfTransformer(norm=None) tfidf.fit(dataset['train']) return tfidf.idf_ additional_attrs_wiki['idf'] = getTF(dataset_wiki) for mname in MODELS_TO_USE: