def get_ll(x, parzen, batch_size=10):
    inds = range(x.shape[0])
    
    n_batches = int(numpy.ceil(float(len(inds)) / batch_size))
    
    times = []
    lls = []
    for i in range(n_batches):
        begin = time.time()
        ll = parzen(x[inds[i::n_batches]])
        end = time.time()
        
        times.append(end-begin)
        
        lls.extend(ll)
        
        if i % 10 == 0:
            log.maybeLog(None, [i, make_time_units_string(numpy.mean(times)), numpy.mean(lls)])
    
    return lls
def CSL(h_samples, x_test, model):
    '''
    Conservative Sampling-based Log-likelihood (CSL)
    "Bounding the Test Log-Likelihood of Generative Models"
    Yoshua Bengio, Li Yao, Kyunghyun Cho
    http://arxiv.org/pdf/1311.6184.pdf
    '''
    log.maybeLog(None, "Starting CSL estimate...")
    t = time.time()
    x_indices = xrange(x_test.shape[0])
    h_indices = xrange(len(h_samples))
    LL = 0
    times = []
    for x_i in x_indices:
        _t = time.time()
        x = x_test[x_i:x_i+1]
        r = T.zeros_like(x, dtype='float32')
        
        for h_i in h_indices:
            h = h_samples[h_i][0].eval()
            r += model.pxh(x,h)
            
        fs = r / len(h_samples)
        LL += T.log(fs.mean(1))
        times.append(time.time()-_t)
        if x_i % 1 == 0:
            log.maybeLog(None, make_time_units_string(numpy.average(times)*(x_test.shape[0]-x_i))+" remaining")
        
    log.maybeLog(None, "CSL took "+make_time_units_string(time.time()-t))
    return (LL / len(x_test)).eval()
def main(sigma, dataset, data_path='../data/', sample_paths=['samples.npy']):
    lls = []
    for sample_path in sample_paths:
        # provide a .npy file where 10k generated samples are saved. 
        filename = sample_path
        
        log.maybeLog(None, 'loading samples from %s'%filename)
        
        samples = numpy.load(filename)
        
        parzen = theano_parzen(samples, sigma)
        
        (_, _), (_, _), (test_X, _) = load_datasets(dataset,data_path)
        test_X = raise_to_list(test_X)
        test_ll = get_ll(test_X[0], parzen)
        lls.extend(test_ll)
    
        log.maybeLog(None, "Mean Log-Likelihood of test set = %.5f" % numpy.mean(lls))
        log.maybeLog(None, "Std of Mean Log-Likelihood of test set = %.5f" % (numpy.std(lls) / 100))