def get_ll(x, parzen, batch_size=10): inds = range(x.shape[0]) n_batches = int(numpy.ceil(float(len(inds)) / batch_size)) times = [] lls = [] for i in range(n_batches): begin = time.time() ll = parzen(x[inds[i::n_batches]]) end = time.time() times.append(end-begin) lls.extend(ll) if i % 10 == 0: log.maybeLog(None, [i, make_time_units_string(numpy.mean(times)), numpy.mean(lls)]) return lls
def CSL(h_samples, x_test, model): ''' Conservative Sampling-based Log-likelihood (CSL) "Bounding the Test Log-Likelihood of Generative Models" Yoshua Bengio, Li Yao, Kyunghyun Cho http://arxiv.org/pdf/1311.6184.pdf ''' log.maybeLog(None, "Starting CSL estimate...") t = time.time() x_indices = xrange(x_test.shape[0]) h_indices = xrange(len(h_samples)) LL = 0 times = [] for x_i in x_indices: _t = time.time() x = x_test[x_i:x_i+1] r = T.zeros_like(x, dtype='float32') for h_i in h_indices: h = h_samples[h_i][0].eval() r += model.pxh(x,h) fs = r / len(h_samples) LL += T.log(fs.mean(1)) times.append(time.time()-_t) if x_i % 1 == 0: log.maybeLog(None, make_time_units_string(numpy.average(times)*(x_test.shape[0]-x_i))+" remaining") log.maybeLog(None, "CSL took "+make_time_units_string(time.time()-t)) return (LL / len(x_test)).eval()
def main(sigma, dataset, data_path='../data/', sample_paths=['samples.npy']): lls = [] for sample_path in sample_paths: # provide a .npy file where 10k generated samples are saved. filename = sample_path log.maybeLog(None, 'loading samples from %s'%filename) samples = numpy.load(filename) parzen = theano_parzen(samples, sigma) (_, _), (_, _), (test_X, _) = load_datasets(dataset,data_path) test_X = raise_to_list(test_X) test_ll = get_ll(test_X[0], parzen) lls.extend(test_ll) log.maybeLog(None, "Mean Log-Likelihood of test set = %.5f" % numpy.mean(lls)) log.maybeLog(None, "Std of Mean Log-Likelihood of test set = %.5f" % (numpy.std(lls) / 100))