示例#1
0
    def init_z(self, frame_id=-1, image_id=-1):
        nz = self.nz
        n_sigma = 0.5
        self.iter_total = 0
        # set prev_z
        if self.z_seq is not None and image_id >= 0:
            image_id = image_id % self.z_seq.shape[0]
            frame_id = frame_id % self.z_seq.shape[1]
            print('set z as image %d, frame %d' % (image_id, frame_id))
            self.prev_z = self.z_seq[image_id, frame_id]

        if self.prev_z is None:  #random initialization
            self.z_init = np_rng.uniform(-1.0, 1.0, size=(self.batch_size, nz))
            self.opt_solver.set_smoothness(0.0)
            self.z_const = self.z_init
            self.prev_zs = self.z_init
        else:  # add small noise to initial latent vector, so that we can get different results
            z0_r = np.tile(self.prev_z, [self.batch_size, 1])
            z0_n = np_rng.uniform(-1.0, 1.0, size=(self.batch_size, nz)) * n_sigma
            self.z_init = np.clip(z0_r + z0_n, -0.99, 0.99)
            self.opt_solver.set_smoothness(5.0)
            self.z_const = np.tile(self.prev_z, [self.batch_size, 1])
            self.prev_zs = z0_r

        self.opt_solver.initialize(self.z_init)
        self.just_fixed = True
示例#2
0
    def init_z(self, frame_id=-1, image_id=-1):
        nz = self.nz
        n_sigma = 0.5
        self.iter_total = 0
        # set prev_z
        if self.z_seq is not None and image_id >= 0:
            image_id = image_id % self.z_seq.shape[0]
            frame_id = frame_id % self.z_seq.shape[1]
            print('set z as image %d, frame %d' % (image_id, frame_id))
            self.prev_z = self.z_seq[image_id, frame_id]

        if self.prev_z is None:  #random initialization
            self.z_init = np_rng.uniform(-1.0, 1.0, size=(self.batch_size, nz))
            self.opt_solver.set_smoothness(0.0)
            self.z_const = self.z_init
            self.prev_zs = self.z_init
        else:  # add small noise to initial latent vector, so that we can get different results
            z0_r = np.tile(self.prev_z, [self.batch_size, 1])
            z0_n = np_rng.uniform(-1.0, 1.0,
                                  size=(self.batch_size, nz)) * n_sigma
            self.z_init = np.clip(z0_r + z0_n, -0.99, 0.99)
            self.opt_solver.set_smoothness(5.0)
            self.z_const = np.tile(self.prev_z, [self.batch_size, 1])
            self.prev_zs = z0_r

        self.opt_solver.initialize(self.z_init)
        self.just_fixed = True
示例#3
0
def gen_samples(n, nbatch=128):
    samples = []
    labels = []
    n_gen = 0
    for i in range(n / nbatch):
        print 'i:', i
        # ymb.shape = (nbatch, ny)
        ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), NUM_LABEL))
        print 'gen_samples: ymb:', ymb.shape
        print ymb

        # zmb.shape = (nbatch, DIM_Z)
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, DIM_Z)))
        print 'gen_samples: zmb:', zmb.shape
        print zmb

        # xmd
        xmb = _gen(zmb, ymb)
        print 'gen_samples: xmb:', xmb.shape
        print rH2

        samples.append(xmb)
        labels.append(np.argmax(ymb, axis=1))
        n_gen += len(xmb)
    n_left = n - n_gen
    ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), NUM_LABEL))
    zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, DIM_Z)))
    xmb = _gen(zmb, ymb)
    samples.append(xmb)
    labels.append(np.argmax(ymb, axis=1))
    return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
    def init_z(self, frame_id=0, image_id=0):
        # print('init z!!!!!')
        nz = 100
        n_sigma = 0.5
        self.iter_total = 0

        # set prev_z
        if self.z_seq is not None:
            image_id = image_id % self.z_seq.shape[0]
            frame_id = frame_id % self.z_seq.shape[1]
            print('set z as image %d, frame %d' % (image_id, frame_id))
            self.prev_z = self.z_seq[image_id, frame_id]

        if self.prev_z is None:
            # print('random initialization')
            self.z0_f = floatX(
                np_rng.uniform(-1.0, 1.0, size=(self.batch_size, nz)))
            self.zero_z_const()
            self.z_i = self.z0_f.copy(
            )  # floatX(np_rng.uniform(-1.0, 1.0, size=(batch_size, nz)))
            self.z1 = self.z0_f.copy()
        else:
            z0_r = np.tile(self.prev_z, [self.batch_size, 1])
            z0_n = floatX(
                np_rng.uniform(-1.0, 1.0, size=(self.batch_size, nz)) *
                n_sigma)
            self.z0_f = floatX(np.clip(z0_r + z0_n, -0.99, 0.99))
            self.z_i = np.tile(self.prev_z, [self.batch_size, 1])
            self.z1 = z0_r.copy()

        z = self.invert_model[2]
        z.set_value(floatX(np.arctanh(self.z0_f)))
        self.just_fixed = True
示例#5
0
def gen_samples(n, nbatch=128):
    samples = []
    n_gen = 0
    for i in range(n/nbatch):
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
        xmb = _gen(zmb)
        samples.append(xmb)
        n_gen += len(xmb)
    n_left = n-n_gen
    zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
    xmb = _gen(zmb)
    samples.append(xmb)    
    return np.concatenate(samples, axis=0)
示例#6
0
def gen_samples(n, nbatch=128):
    samples = []
    n_gen = 0
    for i in range(n / nbatch):
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
        xmb = _gen(zmb)
        samples.append(xmb)
        n_gen += len(xmb)
    n_left = n - n_gen
    zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
    xmb = _gen(zmb)
    samples.append(xmb)
    return np.concatenate(samples, axis=0)
示例#7
0
    def gen_samples(self,
                    z0=None,
                    n=32,
                    batch_size=32,
                    nz=100,
                    use_transform=True):
        assert n % batch_size == 0

        samples = []

        if z0 is None:
            z0 = np_rng.uniform(-1., 1., size=(n, nz))
        else:
            n = len(z0)
            batch_size = max(n, 64)
        n_batches = int(np.ceil(n / float(batch_size)))

        for i in range(n_batches):
            zmb = floatX(z0[batch_size * i:min(len(z0), batch_size *
                                               (i + 1)), :])
            xmb = self._gen(zmb)
            samples.append(xmb)

        samples = np.concatenate(samples, axis=0)

        if use_transform:
            samples = self.inverse_transform(samples, npx=self.npx)
            samples = (samples * 255).astype(np.uint8)
        return samples
示例#8
0
def invert_bfgs(gen_model,
                invert_model,
                ftr_model,
                im,
                z_predict=None,
                npx=64):
    _f, z = invert_model
    nz = gen_model.nz
    if z_predict is None:
        z_predict = np_rng.uniform(-1., 1., size=(1, nz))
    else:
        z_predict = floatX(z_predict)
    z_predict = np.arctanh(z_predict)
    im_t = gen_model.transform(im)
    ftr = ftr_model(im_t)

    prob = optimize.minimize(f_bfgs,
                             z_predict,
                             args=(_f, im_t, ftr),
                             tol=1e-6,
                             jac=True,
                             method='L-BFGS-B',
                             options={'maxiter': 200})
    print('n_iters = %3d, f = %.3f' % (prob.nit, prob.fun))
    z_opt = prob.x
    z_opt_n = floatX(z_opt[np.newaxis, :])
    [f_opt, g, gx] = _f(z_opt_n, im_t, ftr)
    gx = gen_model.inverse_transform(gx, npx=npx)
    z_opt = np.tanh(z_opt)
    return gx, z_opt, f_opt
示例#9
0
def test_model(model_config_dict, model_test_name):
    import glob
    model_list = glob.glob(samples_dir +'/*.pkl')
    # load parameters
    model_param_dicts = unpickle(model_list[0])

    # load generator
    generator_models = load_generator_model(min_num_gen_filters=model_config_dict['min_num_gen_filters'],
                                            model_params_dict=model_param_dicts)
    generator_function = generator_models[0]

    print 'COMPILING SAMPLING FUNCTION'
    t=time()
    sampling_function = set_sampling_function(generator_function=generator_function)
    print '%.2f SEC '%(time()-t)

    print 'START SAMPLING'
    for s in xrange(model_config_dict['num_sampling']):
        print '{} sampling'.format(s)
        hidden_data  = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'],
                                             high=model_config_dict['hidden_distribution'],
                                             size=(model_config_dict['num_display'], model_config_dict['hidden_size'])))
        sample_data = sampling_function(hidden_data)[0]
        sample_data = inverse_transform(np.asarray(sample_data)).transpose([0,2,3,1])
        save_as = samples_dir + '/' + model_test_name + '_SAMPLES(TRAIN){}.png'.format(s+1)
        color_grid_vis(sample_data, (16, 16), save_as)
示例#10
0
def gen_classes(name, steps, classes, interpolate=False, start=None):
  bymb = get_buffer_y(steps, num_buffer_classes, 3)  # dont know why but samples better when 
  bzmb = get_buffer_z(steps, num_buffer_classes, 3)  # included is a buffer of common classes
  offset = bymb.shape[0]
  numtargets = len(classes)
  targets = np.asarray([[classes[i] for _ in range(steps)] for i in range(numtargets)])
  ymb = floatX(OneHot(targets.flatten(), ny))
  zmb = floatX(np_rng.uniform(-1., 1., size=(numtargets * steps, nz)))
  ymb = np.vstack((bymb, ymb))
  zmb = np.vstack((bzmb, zmb))
  if interpolate:
    if numtargets > 1:
      for i in range(numtargets):
        y1 = classes[i]
        y2 = classes[(i+1) % numtargets]
        for j in range(steps):
          y = offset + steps * i + j
          ymb[y] = np.zeros(ny)
          if y1 == y2:
            ymb[y][y1] = 1.0
          else:
            ymb[y][y1] = 1.0 - j / (steps-1.0)
            ymb[y][y2] = j / (steps-1.0)
    zmb = setup_z(zmb, offset, classes, numtargets, steps, start)
  indexes = range(offset, ymb.shape[0])
  samples = gen_image(name, ymb, zmb, steps, indexes)
  gen_image_set(name, ymb, zmb, indexes)
  return ymb[offset:], zmb[offset:], samples
示例#11
0
    def def_invert(self,
                   model,
                   batch_size=1,
                   beta=0.5,
                   lr=0.1,
                   b1=0.9,
                   nz=100,
                   use_bin=True):
        beta_r = sharedX(beta)
        x_c = T.tensor4()
        m_c = T.tensor4()
        x_e = T.tensor4()
        m_e = T.tensor4()
        z0 = T.matrix()
        z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz))))
        gx = model.model_G(z)

        mm_c = T.tile(m_c, (1, gx.shape[1], 1, 1))
        color_all = T.mean(T.sqr(gx - x_c) * mm_c, axis=(1, 2, 3)) / (
            T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5))
        gx_edge = HOGNet.get_hog(gx, use_bin)
        x_edge = HOGNet.get_hog(x_e, use_bin)
        mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1))
        sum_e = T.sum(T.abs_(mm_e))
        sum_x_edge = T.sum(T.abs_(x_edge))
        edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / (
            T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5))
        rec_all = color_all + edge_all * sharedX(0.2)
        z_const = sharedX(10.0)
        init_all = T.mean(T.sqr(z0 - z)) * z_const

        if beta > 0:
            print('using D')
            p_gen = model.model_D(gx)
            real_all = T.nnet.binary_crossentropy(p_gen, T.ones(
                p_gen.shape)).T  # costs.bce(p_gen, T.ones(p_gen.shape))
            cost_all = rec_all + beta_r * real_all[0] + init_all
        else:
            print('without D')
            cost_all = rec_all + init_all
            real_all = T.zeros(cost_all.shape)

        cost = T.sum(cost_all)
        d_updater = updates.Adam(
            lr=sharedX(lr),
            b1=sharedX(b1))  # ,regularizer=updates.Regularizer(l2=l2))
        output = [
            gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge
        ]

        print 'COMPILING...'
        t = time()

        z_updates = d_updater([z], cost)
        _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0],
                                  outputs=output,
                                  updates=z_updates)
        print '%.2f seconds to compile _invert function' % (time() - t)
        return [_invert, z_updates, z, beta_r, z_const]
def rand_gen(size, noise_type='normal'):
    if noise_type == 'normal':
        r_vals = floatX(np_rng.normal(size=size))
    elif noise_type == 'uniform':
        r_vals = floatX(np_rng.uniform(size=size, low=-1.0, high=1.0))
    else:
        assert False, "unrecognized noise type!"
    return r_vals
def rand_fill(x, m, scale=1.):
    '''
    Fill masked parts of x, indicated by m, using uniform noise.
    -- assume data is in [-1, 1] (i.e. comes from train_transform())
    '''
    m = 1. * (m > 1e-3)
    nz = (scale * (np_rng.uniform(size=x.shape) - 0.5))
    x_nz = (m * nz) + ((1. - m) * x)
    return x_nz
示例#14
0
def gen_samples(n, nbatch=128):
    samples = []
    labels = []
    n_gen = 0
    for i in range(n/nbatch):
        ymb = floatX(OneHot(np_rng.randint(0, ny, nbatch), ny))
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
        xmb = _gen(zmb, ymb)
        samples.append(xmb)
        labels.append(np.argmax(ymb, axis=1))
        n_gen += len(xmb)
    n_left = n-n_gen
    ymb = floatX(OneHot(np_rng.randint(0, ny, n_left), ny))
    zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
    xmb = _gen(zmb, ymb)
    samples.append(xmb)    
    labels.append(np.argmax(ymb, axis=1))
    return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
def gen_samples(n, nbatch=128):
    samples = []
    labels = []
    n_gen = 0
    for i in range(n / nbatch):
        ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny))
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
        xmb = _gen(zmb, ymb)
        samples.append(xmb)
        labels.append(np.argmax(ymb, axis=1))
        n_gen += len(xmb)
    n_left = n - n_gen
    ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), ny))
    zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
    xmb = _gen(zmb, ymb)
    samples.append(xmb)
    labels.append(np.argmax(ymb, axis=1))
    return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
示例#16
0
def gen_samples(n, nbatch=128):
    samples = []
    labels = []
    n_gen = 0
    for i in range(n / nbatch):
        ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny))
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
        xmb, tmp_yb, yb2, d, h3, h5 = _gen(zmb, ymb)
        print 'tmp_yb:', tmp_yb.shape
        print 'yb2:', yb2.shape
        print 'd:', d.shape
        print 'h3:', h3.shape
        print 'h5:', h5.shape
        sys.exit()
        samples.append(xmb)
        labels.append(np.argmax(ymb, axis=1))
        n_gen += len(xmb)
    n_left = n - n_gen
    ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), ny))
    zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
    xmb = _gen(zmb, ymb)
    samples.append(xmb)
    labels.append(np.argmax(ymb, axis=1))
    return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
示例#17
0
    def def_invert(self, model, batch_size=1, d_weight=0.5, nc=1, lr=0.1, b1=0.9, nz=100, use_bin=True):
        d_weight_r = sharedX(d_weight)
        x_c = T.tensor4()
        m_c = T.tensor4()
        x_e = T.tensor4()
        m_e = T.tensor4()
        z0 = T.matrix()
        z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz))))
        gx = model.model_G(z)
        # input: im_c: 255: no edge; 0: edge; transform=> 1: no edge, 0: edge

        if nc == 1:  # gx, range [0, 1] => edge, 1
            gx3 = 1.0 - gx  # T.tile(gx, (1, 3, 1, 1))
        else:
            gx3 = gx
        mm_c = T.tile(m_c, (1, gx3.shape[1], 1, 1))
        color_all = T.mean(T.sqr(gx3 - x_c) * mm_c, axis=(1, 2, 3)) / (T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5))
        gx_edge = self.hog.get_hog(gx3)
        x_edge = self.hog.get_hog(x_e)
        mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1))
        sum_e = T.sum(T.abs_(mm_e))
        sum_x_edge = T.sum(T.abs_(x_edge))
        edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / (T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5))
        rec_all = color_all + edge_all * sharedX(0.2)
        z_const = sharedX(5.0)
        init_all = T.mean(T.sqr(z0 - z)) * z_const

        if d_weight > 0:
            print('using D')
            p_gen = model.model_D(gx)
            real_all = T.nnet.binary_crossentropy(p_gen, T.ones(p_gen.shape)).T
            cost_all = rec_all + d_weight_r * real_all[0] + init_all
        else:
            print('without D')
            cost_all = rec_all + init_all
            real_all = T.zeros(cost_all.shape)

        cost = T.sum(cost_all)
        d_updater = updates.Adam(lr=sharedX(lr), b1=sharedX(b1))
        output = [gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge]

        print('COMPILING...')
        t = time()

        z_updates = d_updater([z], cost)
        _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0], outputs=output, updates=z_updates)
        print('%.2f seconds to compile _invert function' % (time() - t))
        return [_invert, z_updates, z, d_weight_r, z_const]
示例#18
0
    def def_invert(self, model, batch_size=1, d_weight=0.5, nc=1, lr=0.1, b1=0.9, nz=100, use_bin=True):
        d_weight_r = sharedX(d_weight)
        x_c = T.tensor4()
        m_c = T.tensor4()
        x_e = T.tensor4()
        m_e = T.tensor4()
        z0 = T.matrix()
        z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz))))
        gx = model.model_G(z)
        # input: im_c: 255: no edge; 0: edge; transform=> 1: no edge, 0: edge

        if nc == 1: # gx, range [0, 1] => edge, 1
            gx3 = 1.0-gx #T.tile(gx, (1, 3, 1, 1))
        else:
            gx3 = gx
        mm_c = T.tile(m_c, (1, gx3.shape[1], 1, 1))
        color_all = T.mean(T.sqr(gx3 - x_c) * mm_c, axis=(1, 2, 3)) / (T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5))
        gx_edge = self.hog.get_hog(gx3)
        x_edge = self.hog.get_hog(x_e)
        mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1))
        sum_e = T.sum(T.abs_(mm_e))
        sum_x_edge = T.sum(T.abs_(x_edge))
        edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / (T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5))
        rec_all = color_all + edge_all * sharedX(0.2)
        z_const = sharedX(5.0)
        init_all = T.mean(T.sqr(z0 - z)) * z_const

        if d_weight > 0:
            print('using D')
            p_gen = model.model_D(gx)
            real_all = T.nnet.binary_crossentropy(p_gen, T.ones(p_gen.shape)).T
            cost_all = rec_all + d_weight_r * real_all[0] + init_all
        else:
            print('without D')
            cost_all = rec_all + init_all
            real_all = T.zeros(cost_all.shape)

        cost = T.sum(cost_all)
        d_updater = updates.Adam(lr=sharedX(lr), b1=sharedX(b1))
        output = [gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge]

        print('COMPILING...')
        t = time()

        z_updates = d_updater([z], cost)
        _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0], outputs=output, updates=z_updates)
        print('%.2f seconds to compile _invert function' % (time() - t))
        return [_invert, z_updates, z, d_weight_r, z_const]
示例#19
0
文件: dcgan_theano.py 项目: hsab/iGAN
    def gen_samples(self, z0=None, n=32, batch_size=32, use_transform=True):
        assert n % batch_size == 0

        samples = []

        if z0 is None:
            z0 = np_rng.uniform(-1., 1., size=(n, self.nz))
        else:
            n = len(z0)
            batch_size = max(n, 64)
        n_batches = int(np.ceil(n/float(batch_size)))
        for i in range(n_batches):
            zmb = floatX(z0[batch_size * i:min(n, batch_size * (i + 1)), :])
            xmb = self._gen(zmb)
            samples.append(xmb)

        samples = np.concatenate(samples, axis=0)
        if use_transform:
            samples = self.inverse_transform(samples, npx=self.npx, nc=self.nc)
            samples = (samples * 255).astype(np.uint8)
        return samples
示例#20
0
def gen_classes_arithmetic(name, steps, classes, weights):
  bymb = get_buffer_y(steps, num_buffer_classes, 3)  # dont know why but samples better when 
  bzmb = get_buffer_z(steps, num_buffer_classes, 3)  # included is a buffer of common classes
  offset = bymb.shape[0]
  numtargets = len(classes)+1
  targets = np.asarray([[classes[i % (numtargets-1)] for _ in range(steps)] for i in range(numtargets)])
  ymb = floatX(OneHot(targets.flatten(), ny))
  zmb = floatX(np_rng.uniform(-1., 1., size=(numtargets * steps, nz)))
  ymb = np.vstack((bymb, ymb))
  zmb = np.vstack((bzmb, zmb))
  for i in range(numtargets):
    for j in range(steps):
      y_idx = offset + steps * i + j
      ymb[y_idx] = np.zeros(ny)
      if i == numtargets-1:
        for k, c in enumerate(classes):
          ymb[y_idx][c] = weights[k]
      else:
        ymb[y_idx][classes[i]] = 1.0
      frac = j / (steps-1.0)
      if frac > 0.5:
        frac = 2.0 * (1.0 - frac)
      else:
        frac = 2.0 * frac
      if (i == numtargets-1):
        z1 = zf[classes[0]][0]
        z2 = zf[classes[0]][1]
      else:
        z1 = zf[classes[i]][0]
        z2 = zf[classes[i]][1]
      for k in range(nz): 
        z = (1.0 - frac) * z1[k] + frac * z2[k]
        #z = min(z, z2 - z)
        zmb[y_idx][k] = z
  indexes = range(offset, ymb.shape[0])
  samples = gen_image(name, ymb, zmb, steps, indexes)
  gen_image_set(name, ymb, zmb, indexes)
  return ymb[offset:], zmb[offset:], samples
def main(genpath,datasetname,outpath,target=False):
    #params
    DIM = 512
    SAMPLES = 3000 #3000
    nz = 2
    if target:
        #load samples from db
        xmb = toy_dataset(DATASET=datasetname, size=SAMPLES)
        generate_image(xmb, path=outpath)
    else:
        #load
        gen_fn, generator = create_G(DIM = DIM)
        #for all in the path:
        params_map = dict(np.load(genpath))
        params=list()
        for key,vals in sorted(params_map.items(),key=lambda x: int(x[0].split("_")[1])):
            params.append(np.float32(vals))
        #set params
        lasagne.layers.set_all_param_values(generator, params)
        # generate sample
        s_zmb = floatX(np_rng.uniform(-1., 1., size=(SAMPLES, nz)))
        g_imgs = gen_fn(s_zmb)
        generate_image(g_imgs, path=outpath)
示例#22
0
def main(path, datasetname):
    #params
    DIM = 512
    SAMPLES = 25000
    nz = 2
    #load
    gen_fn, generator = create_G(DIM=DIM)
    #load samples from db
    xmb = toy_dataset(DATASET=datasetname, size=SAMPLES)
    #for all in the path:
    for root, dirs, files in os.walk(path):
        mmd_list = []
        files.sort(key=lambda x: int(x.split("_")[1].split(".")[0]))
        for filename in files:
            try:
                genpath = os.path.join(root, filename)
                params_map = dict(np.load(genpath))
                params = list()
                for key, vals in sorted(params_map.items(),
                                        key=lambda x: int(x[0].split("_")[1])):
                    params.append(np.float32(vals))
                #set params
                lasagne.layers.set_all_param_values(generator, params)
                # generate sample
                s_zmb = floatX(np_rng.uniform(-1., 1., size=(SAMPLES, nz)))
                g_imgs = gen_fn(s_zmb)
                mmd = abs(compute_metric_mmd2(g_imgs, xmb))
                print("MMD: ", mmd, genpath)
                mmd_list.append((mmd, genpath))
            except:
                pass
        mmd_list.sort(key=lambda v: v[0])
        i = 0
        for val, name in mmd_list[:10]:
            i += 1
            print("Best MMD[" + str(i) + "]", val, math.sqrt(val), name)
def train_model(data_stream,
                energy_optimizer,
                generator_optimizer,
                generator_bn_optimizer,
                model_config_dict,
                model_test_name):

    [generator_function, generator_params, generator_bn_params] = set_generator_model(model_config_dict['hidden_size'],
                                                                                      model_config_dict['min_num_gen_filters'])
    [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['expert_size'],
                                                                          model_config_dict['min_num_eng_filters'])
    # compile functions
    print 'COMPILING ENERGY UPDATER'
    t=time()
    energy_updater = set_energy_update_function(feature_function=feature_function,
                                                energy_function=energy_function,
                                                generator_function=generator_function,
                                                energy_params=energy_params,
                                                energy_optimizer=energy_optimizer)
    print '%.2f SEC '%(time()-t)
    print 'COMPILING GENERATOR UPDATER'
    t=time()
    generator_updater = set_generator_update_function(feature_function=feature_function,
                                                      energy_function=energy_function,
                                                      generator_function=generator_function,
                                                      generator_params=generator_params,
                                                      generator_bn_params=generator_bn_params,
                                                      generator_optimizer=generator_optimizer,
                                                      generator_bn_optimizer=generator_bn_optimizer)
    print '%.2f SEC '%(time()-t)

    print 'COMPILING SAMPLING FUNCTION'
    t=time()
    sampling_function = set_sampling_function(generator_function=generator_function)
    print '%.2f SEC '%(time()-t)

    # set fixed hidden data for sampling
    fixed_hidden_data  = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'],
                                               high=model_config_dict['hidden_distribution'],
                                               size=(model_config_dict['num_display'], model_config_dict['hidden_size'])))

    print 'START TRAINING'
    # for each epoch
    input_energy_list = []
    sample_energy_list = []
    batch_count = 0
    for e in xrange(model_config_dict['epochs']):
        # train phase
        batch_iters = data_stream.get_epoch_iterator()
        # for each batch
        for b, batch_data in enumerate(batch_iters):
            # set update function inputs
            input_data   = transform(batch_data[0])
            num_data     = input_data.shape[0]

            hidden_data  = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'],
                                                 high=model_config_dict['hidden_distribution'],
                                                 size=(num_data, model_config_dict['hidden_size'])))
            noise_data   = floatX(np_rng.normal(scale=0.01*(0.99**int(batch_count/100)),
                                                size=(num_data, num_channels, input_shape, input_shape)))

            updater_inputs = [input_data,
                              hidden_data,
                              noise_data,
                              batch_count]
            updater_outputs = generator_updater(*updater_inputs)
            noise_data   = floatX(np_rng.normal(scale=0.01*(0.99**int(batch_count/100)),
                                                size=(num_data, num_channels, input_shape, input_shape)))
            updater_inputs = [input_data,
                              hidden_data,
                              noise_data,
                              batch_count]
            updater_outputs = energy_updater(*updater_inputs)

            # get output values
            input_energy  = updater_outputs[0].mean()
            sample_energy = updater_outputs[1].mean()

            input_energy_list.append(input_energy)
            sample_energy_list.append(sample_energy)

            # batch count up
            batch_count += 1

            if batch_count%1==0:
                print '================================================================'
                print 'BATCH ITER #{}'.format(batch_count), model_test_name
                print '================================================================'
                print '   TRAIN RESULTS'
                print '================================================================'
                print '     input energy     : ', input_energy_list[-1]
                print '----------------------------------------------------------------'
                print '     sample energy    : ', sample_energy_list[-1]
                print '================================================================'

            if batch_count%1000==0:
                # sample data
                [sample_data_t, sample_data_f] = sampling_function(fixed_hidden_data)
                sample_data_t = np.asarray(sample_data_t)
                save_as = samples_dir + '/' + model_test_name + '_SAMPLES(TRAIN){}.png'.format(batch_count)
                color_grid_vis(inverse_transform(sample_data_t).transpose([0,2,3,1]), (16, 16), save_as)
                sample_data_f = np.asarray(sample_data_f)
                save_as = samples_dir + '/' + model_test_name + '_SAMPLES(TEST){}.png'.format(batch_count)
                color_grid_vis(inverse_transform(sample_data_f).transpose([0,2,3,1]), (16, 16), save_as)
                np.save(file=samples_dir + '/' + model_test_name +'_input_energy',
                        arr=np.asarray(input_energy_list))
                np.save(file=samples_dir + '/' + model_test_name +'_sample_energy',
                        arr=np.asarray(sample_energy_list))

                save_as = samples_dir + '/' + model_test_name + '_MODEL.pkl'
                save_model(tensor_params_list=generator_params + generator_bn_params + energy_params,
                           save_to=save_as)
示例#24
0
def main(
        problem,
        popsize,
        moegan,
        freq,
        loss_type=['trickLogD', 'minimax', 'ls'],
        postfix=None,
        nPassD=1,  #backpropagation pass for discriminator
        inBatchSize=64):

    # Parameters
    task = 'toy'
    name = '{}_{}_{}MMDu2'.format(
        problem, "MOEGAN" if moegan else "EGAN",
        postfix + "_" if postfix is not None else "")  #'8G_MOEGAN_PFq_NFd_t2'

    DIM = 512
    begin_save = 0
    nloss = len(loss_type)
    batchSize = inBatchSize

    if problem == "8G":
        DATASET = '8gaussians'
    elif problem == "25G":
        DATASET = '25gaussians'
    else:
        exit(-1)

    ncandi = popsize
    kD = nPassD  # # of discrim updates for each gen update
    kG = 1  # # of discrim updates for each gen update
    ntf = 256
    b1 = 0.5  # momentum term of adam
    nz = 2  # # of dim for Z
    niter = 4  # # of iter at starting learning rate
    lr = 0.0001  # initial learning rate for adam G
    lrd = 0.0001  # initial learning rate for adam D
    N_up = 100000
    save_freq = freq
    show_freq = freq
    test_deterministic = True
    beta = 1.
    GP_norm = False  # if use gradients penalty on discriminator
    LAMBDA = 2.  # hyperparameter sudof GP
    NSGA2 = moegan
    # Load the dataset

    # MODEL D
    print("Building model and compiling functions...")
    # Prepare Theano variables for inputs and targets
    real_imgs = T.matrix('real_imgs')
    fake_imgs = T.matrix('fake_imgs')
    # Create neural network model
    discriminator = models_uncond.build_discriminator_toy(nd=DIM,
                                                          GP_norm=GP_norm)
    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator, real_imgs)
    # Create expression for passing fake data through the discriminator
    fake_out = lasagne.layers.get_output(discriminator, fake_imgs)
    # Create loss expressions
    discriminator_loss = (
        lasagne.objectives.binary_crossentropy(real_out, 1) +
        lasagne.objectives.binary_crossentropy(fake_out, 0)).mean()

    # Gradients penalty norm
    if GP_norm is True:
        alpha = t_rng.uniform((batchSize, 1), low=0., high=1.)
        differences = fake_imgs - real_imgs
        interpolates = real_imgs + (alpha * differences)
        gradients = theano.grad(lasagne.layers.get_output(
            discriminator, interpolates).sum(),
                                wrt=interpolates)
        slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1)))
        gradient_penalty = T.mean((slopes - 1.)**2)

        D_loss = discriminator_loss + LAMBDA * gradient_penalty
        b1_d = 0.
    else:
        D_loss = discriminator_loss
        b1_d = 0.

    # Create update expressions for training
    discriminator_params = lasagne.layers.get_all_params(discriminator,
                                                         trainable=True)
    lrtd = theano.shared(lasagne.utils.floatX(lrd))
    updates_d = lasagne.updates.adam(D_loss,
                                     discriminator_params,
                                     learning_rate=lrtd,
                                     beta1=b1_d)
    lrt = theano.shared(lasagne.utils.floatX(lr))

    # Fd Socre
    Fd = theano.gradient.grad(discriminator_loss, discriminator_params)
    Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd))

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_d = theano.function([real_imgs, fake_imgs],
                              discriminator_loss,
                              updates=updates_d)

    # Compile another function generating some data
    dis_fn = theano.function([real_imgs, fake_imgs],
                             [(fake_out).mean(), Fd_score])
    disft_fn = theano.function([real_imgs, fake_imgs], [
        real_out.mean(),
        fake_out.mean(), (real_out > 0.5).mean(),
        (fake_out > 0.5).mean(), Fd_score
    ])

    #main MODEL G
    noise = T.matrix('noise')
    generator_trainer = create_G(noise=noise,
                                 discriminator=discriminator,
                                 lr=lr,
                                 b1=b1,
                                 DIM=DIM)

    # Finally, launch the training loop.
    print("Starting training...")
    desc = task + '_' + name
    print(desc)

    if not os.path.isdir('front'):
        os.mkdir(os.path.join('front'))
    if not os.path.isdir('front/' + desc):
        os.mkdir(os.path.join('front/', desc))
    if not os.path.isdir('logs'):
        os.mkdir(os.path.join('logs'))
    f_log = open('logs/%s.ndjson' % desc, 'wb')
    if not os.path.isdir('models'):
        os.mkdir(os.path.join('models/'))
    if not os.path.isdir('models/' + desc):
        os.mkdir(os.path.join('models/', desc))

    instances = []

    class Instance:
        def __init__(self, fq, fd, params, img_values):
            self.fq = fq
            self.fd = fd
            self.params = params
            self.img = img_values

        def f(self):
            return self.fq - self.fd

    # We iterate over epochs:
    for n_updates in range(N_up):
        xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD)
        xmb = xmb[0:batchSize * kD]
        # initial G cluster
        if n_updates == 0:
            for can_i in range(0, ncandi):
                init_generator_trainer = create_G(noise=noise,
                                                  discriminator=discriminator,
                                                  lr=lr,
                                                  b1=b1,
                                                  DIM=DIM)
                zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                cost = init_generator_trainer.train(loss_type[can_i % nloss],
                                                    zmb)
                sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                gen_imgs = init_generator_trainer.gen(sample_zmb)
                frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs)
                instances.append(
                    Instance(
                        frr_score, fd_score,
                        lasagne.layers.get_all_param_values(
                            init_generator_trainer.generator), gen_imgs))
        else:
            instances_old = instances
            instances = []
            for can_i in range(0, ncandi):
                for type_i in range(0, nloss):
                    generator_trainer.set(instances_old[can_i].params)
                    #train
                    zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                    generator_trainer.train(loss_type[type_i], zmb)
                    #score
                    sample_zmb = floatX(np_rng.uniform(-1., 1.,
                                                       size=(ntf, nz)))
                    gen_imgs = generator_trainer.gen(sample_zmb)
                    frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs)
                    #save
                    instances.append(
                        Instance(frr_score, fd_score, generator_trainer.get(),
                                 gen_imgs))
            if ncandi <= (len(instances) + len(instances_old)):
                if NSGA2 == True:
                    #add parents in the pool
                    for inst in instances_old:
                        generator_trainer.set(inst.params)
                        sample_zmb = floatX(
                            np_rng.uniform(-1., 1., size=(ntf, nz)))
                        gen_imgs = generator_trainer.gen(sample_zmb)
                        frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs)
                        instances.append(
                            Instance(frr_score, fd_score,
                                     generator_trainer.get(), gen_imgs))
                    #cromos = { idx:[float(inst.fq),-0.5*float(inst.fd)] for idx,inst in enumerate(instances) } # S1
                    cromos = {
                        idx: [-float(inst.fq), 0.5 * float(inst.fd)]
                        for idx, inst in enumerate(instances)
                    }  # S2
                    cromos_idxs = [idx for idx, _ in enumerate(instances)]
                    finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs)
                    instances = [instances[p] for p in finalpop]
                    with open('front/%s.tsv' % desc, 'wb') as ffront:
                        for inst in instances:
                            ffront.write(
                                (str(inst.fq) + "\t" + str(inst.fd)).encode())
                            ffront.write("\n".encode())
                elif nloss > 1:
                    #sort new
                    instances.sort(
                        key=lambda inst: -inst.f())  #wrong def in the paper
                    #print([inst.f() for inst in instances])
                    #cut best ones
                    instances = instances[len(instances) - ncandi:]
                    #print([inst.f() for inst in instances])

        sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf)
        sample_xmb = sample_xmb[0:ncandi * ntf]
        for i in range(0, ncandi):
            xfake = instances[i].img[0:ntf, :]
            xreal = sample_xmb[i * ntf:(i + 1) * ntf, :]
            tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake)
            fake_rate = np.array([fr]) if i == 0 else np.append(fake_rate, fr)
            real_rate = np.array([tr]) if i == 0 else np.append(real_rate, tr)
            fake_rate_p = np.array([frp]) if i == 0 else np.append(
                fake_rate_p, frp)
            real_rate_p = np.array([trp]) if i == 0 else np.append(
                real_rate_p, trp)
            FDL = np.array([fdscore]) if i == 0 else np.append(FDL, fdscore)

        print(fake_rate, fake_rate_p, FDL)
        print(n_updates, real_rate.mean(), real_rate_p.mean())
        f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' +
                     str(n_updates) + ' ' + str(real_rate.mean()) + ' ' +
                     str(real_rate_p.mean()) + '\n').encode())
        f_log.flush()

        # train D
        #for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize):
        #    cost = train_d(xreal, xfake)
        imgs_fakes = instances[0].img[0:int(batchSize / ncandi * kD), :]
        for i in range(1, len(instances)):
            img = instances[i].img[0:int(batchSize / ncandi * kD), :]
            imgs_fakes = np.append(imgs_fakes, img, axis=0)
        for xreal, xfake in iter_data(xmb, shuffle(imgs_fakes),
                                      size=batchSize):
            cost = train_d(xreal, xfake)

        if (n_updates % show_freq == 0 and n_updates != 0) or n_updates == 1:
            id_update = int(n_updates / save_freq)
            #metric
            s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz)))
            xmb = toy_dataset(DATASET=DATASET, size=512)
            #compue mmd for all points
            mmd2_all = []
            for i in range(0, ncandi):
                generator_trainer.set(instances[i].params)
                g_imgs = generator_trainer.gen(s_zmb)
                mmd2_all.append(abs(compute_metric_mmd2(g_imgs, xmb)))
            mmd2_all = np.array(mmd2_all)
            #print pareto front
            if NSGA2 == True:
                front_path = os.path.join('front/', desc)
                with open('%s/%d_%s_mmd2u.tsv' % (front_path, id_update, desc),
                          'wb') as ffront:
                    for idx in range(0, ncandi):
                        ffront.write((str(instances[idx].fq) + "\t" +
                                      str(instances[idx].fd) + "\t" +
                                      str(mmd2_all[idx])).encode())
                        ffront.write("\n".encode())
            #mmd2 output
            print(n_updates, "mmd2u:", np.min(mmd2_all), "id:",
                  np.argmin(mmd2_all))
            #save best
            params = instances[np.argmin(mmd2_all)].params
            generator_trainer.set(params)
            g_imgs_min = generator_trainer.gen(s_zmb)
            generate_image(xmb,
                           g_imgs_min,
                           id_update,
                           desc,
                           postfix="_mmu2d_best")
            np.savez('models/%s/gen_%d.npz' % (desc, id_update),
                     *lasagne.layers.get_all_param_values(discriminator))
            np.savez('models/%s/dis_%d.npz' % (desc, id_update),
                     *generator_trainer.get())
            #worst_debug
            params = instances[np.argmax(mmd2_all)].params
            generator_trainer.set(params)
            g_imgs_max = generator_trainer.gen(s_zmb)
            generate_image(xmb,
                           g_imgs_max,
                           id_update,
                           desc,
                           postfix="_mmu2d_worst")
 def noise_batch(self, samples=None):
     if samples == None:
         return floatX(
             np_rng.uniform(-1., 1., size=(self.batchSize, self.noiseSize)))
     return floatX(np_rng.uniform(-1., 1., size=(samples, self.noiseSize)))
示例#26
0
def main():
    # Parameters
    task = 'toy'
    name = '25G'
    
    DIM=512
    begin_save = 0 
    loss_type = ['trickLogD','minimax','ls'] 
    nloss = 3
    DATASET = '25gaussians'
    batchSize = 64 
   
    ncandi = 1 
    kD = 1             # # of discrim updates for each gen update
    kG = 1            # # of discrim updates for each gen update
    ntf = 256 
    b1 = 0.5          # momentum term of adam
    nz = 2          # # of dim for Z
    niter = 4       # # of iter at starting learning rate
    lr = 0.0001       # initial learning rate for adam G
    lrd = 0.0001       # initial learning rate for adam D
    N_up = 100000 
    save_freq = 10000 
    show_freq = 10000 
    test_deterministic = True   
    beta = 1.
    GP_norm = False     # if use gradients penalty on discriminator
    LAMBDA = 2.       # hyperparameter of GP 
   
    # Load the dataset
   
    # MODEL D
    print("Building model and compiling functions...")
    # Prepare Theano variables for inputs and targets
    real_imgs = T.matrix('real_imgs')
    fake_imgs = T.matrix('fake_imgs')
    # Create neural network model
    discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm)
    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator, real_imgs)
    # Create expression for passing fake data through the discriminator
    fake_out = lasagne.layers.get_output(discriminator, fake_imgs)
    # Create loss expressions
    discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1)
            + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean()
 
    # Gradients penalty norm 
    if GP_norm is True: 
        alpha = t_rng.uniform((batchSize,1), low=0.,high=1.)  
        differences = fake_imgs - real_imgs  
        interpolates = real_imgs + (alpha*differences)
        gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) 
        slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1)))
        gradient_penalty = T.mean((slopes-1.)**2)
 
        D_loss = discriminator_loss +  LAMBDA*gradient_penalty 
        b1_d = 0. 
    else:
        D_loss = discriminator_loss 
        b1_d = 0. 
 
    # Create update expressions for training
    discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True)
    lrtd = theano.shared(lasagne.utils.floatX(lrd))
    updates_d = lasagne.updates.adam(
            D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d)
    lrt = theano.shared(lasagne.utils.floatX(lr))

    # Fd Socre 
    Fd = theano.gradient.grad(discriminator_loss, discriminator_params) 
    Fd_score  = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd))
     
    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_d = theano.function([real_imgs, fake_imgs],
                               discriminator_loss,
                               updates=updates_d)
    # Compile another function generating some data
    dis_fn = theano.function([real_imgs,fake_imgs],[(fake_out).mean(),Fd_score])
    disft_fn = theano.function([real_imgs,fake_imgs],
                               [real_out.mean(),
                                fake_out.mean(),
                                (real_out>0.5).mean(),
                                (fake_out>0.5).mean(),
                                Fd_score])
 
    # Finally, launch the training loop.
    print("Starting training...") 
    desc = task + '_' + name 
    print desc
    
    if not os.path.isdir('logs'):
        os.mkdir(os.path.join('logs'))
    f_log = open('logs/%s.ndjson'%desc, 'wb')
    if not os.path.isdir('models'):
        os.mkdir(os.path.join('models/'))
    if not os.path.isdir('models/'+desc):
        os.mkdir(os.path.join('models/',desc))
     
    gen_new_params = [] 
     
    # We iterate over epochs:
    for n_updates in range(N_up):
        xmb = toy_dataset(DATASET=DATASET, size=batchSize*kD) 
        xmb = xmb[0:batchSize*kD] 
        # initial G cluster 
        if n_updates == 0:
            for can_i in range(0,ncandi): 
                train_g, gen_fn, generator = create_G(
                        loss_type=loss_type[can_i%nloss],
                        discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) 
                for _ in range(0,kG):
                    zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                    cost = train_g(zmb)
                sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                gen_imgs = gen_fn(sample_zmb)
 
                gen_new_params.append(lasagne.layers.get_all_param_values(generator)) 
                    
                if can_i == 0: 
                    g_imgs_old=gen_imgs
                    fmb = gen_imgs[0:batchSize/ncandi*kD,:]
                else: 
                    g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0)
                    fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:],axis=0)
            #print gen_new_params
            # MODEL G
            noise = T.matrix('noise')
            generator = models_uncond.build_generator_toy(noise,nd=DIM)
            Tgimgs = lasagne.layers.get_output(generator)
            Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs)

            g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean()
            g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean()
            g_loss_ls = T.mean(T.sqr((Tfake_out - 1)))

            g_params = lasagne.layers.get_all_params(generator, trainable=True)

            up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1)
            up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1)
            up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1)

            train_g = theano.function([noise],g_loss_logD,updates=up_g_logD)
            train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax)
            train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls)

            gen_fn = theano.function([noise], lasagne.layers.get_output(
                                    generator,deterministic=True))
        else:
            gen_old_params = gen_new_params
            for can_i in range(0,ncandi):
                for type_i in range(0,nloss):
                    lasagne.layers.set_all_param_values(generator, gen_old_params[can_i])
                    if loss_type[type_i] == 'trickLogD':
                        for _ in range(0,kG):
                            zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g(zmb)
                    elif loss_type[type_i] == 'minimax': 
                        for _ in range(0,kG):
                            zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g_minimax(zmb)
                    elif loss_type[type_i] == 'ls': 
                        for _ in range(0,kG):
                            zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g_ls(zmb)

                    sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                    gen_imgs = gen_fn(sample_zmb)
                    frr_score, fd_score = dis_fn(xmb[0:ntf],gen_imgs)
                    #frr = frr[0]
                    frr = frr_score - fd_score 
                    if can_i*nloss + type_i < ncandi: 
                        idx = can_i*nloss + type_i
                        gen_new_params[idx]=lasagne.layers.get_all_param_values(generator)
                        fake_rate[idx]=frr
                        g_imgs_old[idx*ntf:(idx+1)*ntf,:]=gen_imgs
                        fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:] = \
                            gen_imgs[0:batchSize/ncandi*kD,:]
                    else: 
                        fr_com = fake_rate - frr
                        if min(fr_com) < 0:
                            ids_replace = np.where(fr_com==min(fr_com)) 
                            idr = ids_replace[0][0]
                            fake_rate[idr]=frr

                            gen_new_params[idr] = lasagne.layers.get_all_param_values(generator)

                            g_imgs_old[idr*ntf:(idr+1)*ntf,:]=gen_imgs
                            fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:] = \
                                gen_imgs[0:batchSize/ncandi*kD,:]

        sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi*ntf) 
        sample_xmb = sample_xmb[0:ncandi*ntf] 
        for i in range(0, ncandi):
            xfake = g_imgs_old[i*ntf:(i+1)*ntf,:]
            xreal = sample_xmb[i*ntf:(i+1)*ntf,:]
            tr, fr, trp, frp, fdscore = disft_fn(xreal,xfake) 
            if i == 0:
                fake_rate = np.array([fr])   
                real_rate = np.array([tr])     
                fake_rate_p = np.array([frp])   
                real_rate_p = np.array([trp])     
                FDL = np.array([fdscore])     
            else:
                fake_rate = np.append(fake_rate,fr)
                real_rate = np.append(real_rate,tr)
                fake_rate_p = np.append(fake_rate_p,frp)
                real_rate_p = np.append(real_rate_p,trp)
                FDL = np.append(FDL,fdscore)
        print fake_rate, fake_rate_p, FDL
        print (n_updates, real_rate.mean(), real_rate_p.mean()) 
        f_log.write(str(fake_rate)+' '+str(fake_rate_p)+'\n'+ str(n_updates) + ' ' + str(real_rate.mean())+ ' ' +str(real_rate_p.mean())+'\n')
        f_log.flush()

        # train D 
	for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize):
            cost = train_d(xreal, xfake)

	if n_updates%show_freq == 0:
            s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz)))
            g_imgs = gen_fn(s_zmb)
            xmb = toy_dataset(DATASET=DATASET, size=512) 
            generate_image(xmb,g_imgs,n_updates/save_freq,desc)
示例#27
0
g_updates = g_updater(gen_params, g_cost)
updates = d_updates + g_updates

print "COMPILING"
t = time()
_train_g = theano.function([X, Z, Y], cost, updates=g_updates)
_train_d = theano.function([X, Z, Y], cost, updates=d_updates)
_gen = theano.function([Z, Y], gX)
print "%.2f seconds to compile theano functions" % (time() - t)

tr_idxs = np.arange(len(trX))
trX_vis = np.asarray([[trX[i] for i in py_rng.sample(tr_idxs[trY == y], 20)] for y in range(10)]).reshape(200, -1)
trX_vis = inverse_transform(transform(trX_vis))
grayscale_grid_vis(trX_vis, (10, 20), "samples/%s_etl_test.png" % desc)

sample_zmb = floatX(np_rng.uniform(-1.0, 1.0, size=(200, nz)))
sample_ymb = floatX(OneHot(np.asarray([[i for _ in range(20)] for i in range(10)]).flatten(), ny))


def gen_samples(n, nbatch=128):
    samples = []
    labels = []
    n_gen = 0
    for i in range(n / nbatch):
        ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny))
        zmb = floatX(np_rng.uniform(-1.0, 1.0, size=(nbatch, nz)))
        xmb = _gen(zmb, ymb)
        samples.append(xmb)
        labels.append(np.argmax(ymb, axis=1))
        n_gen += len(xmb)
    n_left = n - n_gen
示例#28
0
def gen_z(n):
    if args.znorm:
        return floatX(normalize(np_rng.uniform(-1., 1., size=(n, nz))))
    else:
        return floatX(np_rng.uniform(-1., 1., size=(n, nz)))
示例#29
0
g_updates = g_updater(gen_params, g_cost)

print 'COMPILING'
t = time()
_train_d = theano.function([X, X0], d_cost, updates=d_updates)
_train_g = theano.function([Z, deltaX], g_cost, updates=g_updates)
_gen = theano.function([Z], gen(Z, *gen_params))
_logp_rbm = theano.function([X], logp_rbm(X))
_svgd_gradient = theano.function([X], svgd_gradient(X))
print '%.2f seconds to compile theano functions' % (time() - t)

nbatch = 100
n_iter = 20
n_updates = 0

sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz)))

for iter in tqdm(range(1, n_iter + 1)):
    trX = shuffle(trX)
    for imb in iter_data(trX, size=nbatch):
        imb = floatX(imb)
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))

        # generate samples
        samples = floatX(_gen(zmb).reshape(-1, nx))

        grad, svgd_grad = _svgd_gradient(samples)
        _train_g(zmb, floatX(svgd_grad.reshape(-1, nc, npx, npx)))  # generator

        _train_d(imb, floatX(samples))  # discriminator
示例#30
0
def discrim(X, w, w2, g2, b2, w3, g3, b3, w4, g4, b4, w5, g5, b5, w6, g6, b6, wy):
    h = lrelu(dnn_conv(X, w, subsample=(1, 1), border_mode=(1, 1)))
    h2 = lrelu(batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(1, 1)), g=g2, b=b2))
    h3 = lrelu(batchnorm(dnn_conv(h2, w3, subsample=(1, 1), border_mode=(1, 1)), g=g3, b=b3))
    h4 = lrelu(batchnorm(dnn_conv(h3, w4, subsample=(2, 2), border_mode=(1, 1)), g=g4, b=b4))
    h5 = lrelu(batchnorm(dnn_conv(h4, w5, subsample=(1, 1), border_mode=(1, 1)), g=g5, b=b5))
    h6 = lrelu(batchnorm(dnn_conv(h5, w6, subsample=(2, 2), border_mode=(1, 1)), g=g6, b=b6))
    h6 = T.flatten(h6, 2)
    y = sigmoid(T.dot(h6, wy))
    return y

def inverse_transform(X):
    X = (X.reshape(-1, nc, npx, npx).transpose(0, 2, 3, 1)+1.)/2.
    return X

Z = T.matrix()
X = T.tensor4()

gX = gen(Z, *gen_params)
dX = discrim(X, *discrim_params)

_gen = theano.function([Z], gX)
_discrim = theano.function([X], dX)

sample_zmb = floatX(np_rng.uniform(-1., 1., size=(400, 256)))
samples = _gen(sample_zmb)
scores = _discrim(samples)
sort = np.argsort(scores.flatten())[::-1]
samples = samples[sort]
color_grid_vis(inverse_transform(samples), (20, 20), 'samples.png')
示例#31
0
    'n_examples', 
    'n_seconds',
    '1k_va_nnd',
    '10k_va_nnd',
    '100k_va_nnd',
    'g_cost',
    'd_cost',
]
tr_data, te_data, tr_stream, val_stream, te_stream = faces(ntrain=ntrain) # Only tr_data/tr_stream are used.
tr_handle = tr_data.open()
vaX, = tr_data.get_data(tr_handle, slice(0, 10000))
vaX = transform(vaX)
vis_idxs = py_rng.sample(np.arange(len(vaX)), nvis)
vaX_vis = inverse_transform(vaX[vis_idxs])
color_grid_vis(vaX_vis, (14, 14), 'samples/%s_etl_test.png'%desc)
sample_zmb = floatX(np_rng.uniform(-1., 1., size=(nvis, nz)))
vaX = vaX.reshape(len(vaX), -1)

# DEFINE NETWORKS.
relu = activations.Rectify()
sigmoid = activations.Sigmoid()
lrelu = activations.LeakyRectify()
tanh = activations.Tanh()
bce = T.nnet.binary_crossentropy
gifn = inits.Normal(scale=0.02)
difn = inits.Normal(scale=0.02)
gain_ifn = inits.Normal(loc=1., scale=0.02)
bias_ifn = inits.Constant(c=0.)
gw  = gifn((nz, ngf*8*4*4), 'gw')
gg = gain_ifn((ngf*8*4*4), 'gg')
gb = bias_ifn((ngf*8*4*4), 'gb')
示例#32
0
print('COMPILING...')
t = time()
_estimate_bn = theano.function([Z], bn_data)
print('%.2f seconds to compile theano functions' % (time() - t))


# batchnorm statistics
nb_sum = []
nb_mean = []
nb_mean_ext = []


# first pass
print('first pass: computing mean')
for n in tqdm(range(num_batches)):
    zmb = floatX(np_rng.uniform(-1., 1., size=(batch_size, nz)))
    bn_data = _estimate_bn(zmb)

    if n == 0:
        for d in bn_data:
            nb_sum.append(d)
    else:
        for id, d in enumerate(bn_data):
            nb_sum[id] = nb_sum[id] + d

# compute empirical mean
for id, d_sum in enumerate(nb_sum):
    if d_sum.ndim == 4:
        m = np.mean(d_sum, axis=(0, 2, 3)) / num_batches
        nb_mean.append(m)
        nb_mean_ext.append(np.reshape(m, [1, len(m), 1, 1]))
示例#33
0
def continue_train_model(last_batch_idx,
                         data_stream,
                         energy_optimizer,
                         generator_optimizer,
                         model_config_dict,
                         model_test_name):
    model_list = glob.glob(samples_dir +'/*.pkl')
    # load parameters
    model_param_dicts = unpickle(model_list[0])
    generator_models = load_generator_model(min_num_gen_filters=model_config_dict['min_num_gen_filters'],
                                            model_params_dict=model_param_dicts)
    generator_function = generator_models[0]
    generator_params   = generator_models[1]

    energy_models = load_energy_model(num_experts=model_config_dict['expert_size'],
                                      model_params_dict=model_param_dicts)
    feature_function = energy_models[0]
    # norm_function    = energy_models[1]
    expert_function  = energy_models[1]
    # prior_function   = energy_models[3]
    energy_params    = energy_models[2]

    # compile functions
    print 'COMPILING MODEL UPDATER'
    t=time()
    generator_updater, generator_optimizer_params = set_generator_update_function(energy_feature_function=feature_function,
                                                                                  # energy_norm_function=norm_function,
                                                                                  energy_expert_function=expert_function,
                                                                                  # energy_prior_function=prior_function,
                                                                                  generator_function=generator_function,
                                                                                  generator_params=generator_params,
                                                                                  generator_optimizer=generator_optimizer,
                                                                                  init_param_dict=model_param_dicts)
    energy_updater, energy_optimizer_params = set_energy_update_function(energy_feature_function=feature_function,
                                                                         # energy_norm_function=norm_function,
                                                                         energy_expert_function=expert_function,
                                                                         # energy_prior_function=prior_function,
                                                                         generator_function=generator_function,
                                                                         energy_params=energy_params,
                                                                         energy_optimizer=energy_optimizer,
                                                                         init_param_dict=model_param_dicts)
    print '%.2f SEC '%(time()-t)
    print 'COMPILING SAMPLING FUNCTION'
    t=time()
    sampling_function = set_sampling_function(generator_function=generator_function)
    print '%.2f SEC '%(time()-t)

    # set fixed hidden data for sampling
    fixed_hidden_data  = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'],
                                               high=model_config_dict['hidden_distribution'],
                                               size=(model_config_dict['num_display'], model_config_dict['hidden_size'])))

    print 'START TRAINING'
    # for each epoch
    input_energy_list = []
    sample_energy_list = []
    batch_count = 0
    for e in xrange(model_config_dict['epochs']):
        # train phase
        batch_iters = data_stream.get_epoch_iterator()
        # for each batch
        for b, batch_data in enumerate(batch_iters):
            # batch count up
            batch_count += 1
            if batch_count<last_batch_idx:
                continue

            # set update function inputs
            input_data   = transform(batch_data[0])
            num_data     = input_data.shape[0]
            hidden_data  = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'],
                                                 high=model_config_dict['hidden_distribution'],
                                                 size=(num_data, model_config_dict['hidden_size'])))

            noise_data      = floatX(np_rng.normal(scale=0.01, size=input_data.shape))
            update_input    = [hidden_data, noise_data]
            update_output   = generator_updater(*update_input)
            entropy_weights = update_output[1].mean()
            entropy_cost    = update_output[2].mean()

            noise_data      = floatX(np_rng.normal(scale=0.01, size=input_data.shape))
            update_input    = [input_data, hidden_data, noise_data]
            update_output   = energy_updater(*update_input)
            input_energy    = update_output[0].mean()
            sample_energy   = update_output[1].mean()

            input_energy_list.append(input_energy)
            sample_energy_list.append(sample_energy)

            if batch_count%10==0:
                print '================================================================'
                print 'BATCH ITER #{}'.format(batch_count), model_test_name
                print '================================================================'
                print '   TRAIN RESULTS'
                print '================================================================'
                print '     input energy     : ', input_energy_list[-1]
                print '----------------------------------------------------------------'
                print '     sample energy    : ', sample_energy_list[-1]
                print '----------------------------------------------------------------'
                print '     entropy weight   : ', entropy_weights
                print '----------------------------------------------------------------'
                print '     entropy cost     : ', entropy_cost
                print '================================================================'

            if batch_count%100==0:
                # sample data
                sample_data = sampling_function(fixed_hidden_data)[0]
                sample_data = np.asarray(sample_data)
                save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(batch_count)
                color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as)
                np.save(file=samples_dir + '/' + model_test_name +'_input_energy',
                        arr=np.asarray(input_energy_list))
                np.save(file=samples_dir + '/' + model_test_name +'_sample_energy',
                        arr=np.asarray(sample_energy_list))

                save_as = samples_dir + '/' + model_test_name + '_MODEL.pkl'
                save_model(tensor_params_list=generator_params[0] + generator_params[1] + energy_params + generator_optimizer_params + energy_optimizer_params,
                           save_to=save_as)
示例#34
0
def main():
    # Parameters
    data_path = '../datasets/'
    task = 'face'
    name = '128'

    start = 0
    stop = 202560 
    input_nc = 3 
    loss_type = ['trickLogD','minimax','ls'] 
    nloss = 3
    shuffle_ = True 
    batchSize = 32 
    fineSize = 128 
    flip = True
   
    ncandi = 1         # # of survived childern 
    kD = 3             # # of discrim updates for each gen update
    kG = 1             # # of discrim updates for each gen update
    ntf = batchSize*kD 
    b1 = 0.5           # momentum term of adam
    nz = 100           # # of dim for Z
    ngf = 64           # # of gen filters in first conv layer
    ndf = 64           # # of discrim filters in first conv layer
    niter = 25         # # of iter at starting learning rate
    lr = 0.0002        # initial learning rate for adam G
    lrd = 0.0002       # initial learning rate for adam D
    beta = 0.001       # the hyperparameter that balance fitness score 
    GP_norm = False    # if use gradients penalty on discriminator
    LAMBDA = 2.        # hyperparameter of GP 
    
    save_freq = 5000 
    show_freq = 500 
    begin_save = 0 
    test_deterministic = True   
    
    # Load the dataset
    print("Loading data...")
    f = h5py.File(data_path+'img_align_celeba_128.hdf5','r') 
    trX = f['data'] 
    ids = range(start, stop)
    
    ################## MODEL D ####################### 
    print("Building model and compiling functions...")
    # Prepare Theano variables for inputs and targets
    real_imgs = T.tensor4('real_imgs')
    fake_imgs = T.tensor4('fake_imgs')
    # Create neural network model
    discriminator = models_uncond.build_discriminator_128(ndf=ndf)
    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator, real_imgs)
    # Create expression for passing fake data through the discriminator
    fake_out = lasagne.layers.get_output(discriminator, fake_imgs)
    # Create loss expressions
    discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1)
            + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean()
    
    # Gradients penalty norm 
    if GP_norm is True: 
        alpha = t_rng.uniform((batchSize,1,1,1), low=0.,high=1.)  
        differences = fake_imgs - real_imgs  
        interpolates = real_imgs + (alpha*differences)
        gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) 
        slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1,2,3)))
        gradient_penalty = T.mean((slopes-1.)**2)
 
        D_loss = discriminator_loss +  LAMBDA*gradient_penalty 
        b1_d = 0. 
    else:
        D_loss = discriminator_loss 
        b1_d = b1 
 
    # Create update expressions for training
    discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True)
    lrtd = theano.shared(lasagne.utils.floatX(lrd))
    updates_d = lasagne.updates.adam(
            D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d)
    lrt = theano.shared(lasagne.utils.floatX(lr))
    
    # Diversity fitnees 
    Fd = theano.gradient.grad(discriminator_loss, discriminator_params) 
    Fd_score  = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd))
    
    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_d = theano.function([real_imgs, fake_imgs],
                               discriminator_loss,
                               updates=updates_d)
    
    # Compile another function generating some data
    disft_fn = theano.function([real_imgs,fake_imgs],
                               [(real_out).mean(),
                                (fake_out).mean(),
                                Fd_score])
 
    # Finally, launch the training loop.
    print("Starting training...") 
    desc = task + '_' + name 
    print desc
    
    if not os.path.isdir('logs'):
        os.mkdir(os.path.join('logs'))
    f_log = open('logs/%s.ndjson'%desc, 'wb')
    if not os.path.isdir('samples'):
        os.mkdir(os.path.join('samples/'))
    if not os.path.isdir('samples/'+desc):
        os.mkdir(os.path.join('samples/',desc))
    if not os.path.isdir('models'):
        os.mkdir(os.path.join('models/'))
    if not os.path.isdir('models/'+desc):
        os.mkdir(os.path.join('models/',desc))
     
    gen_new_params = [] 
    n_updates = 0
     
    # We iterate over epochs:
    for epoch in range(niter):
        t = time()
	if shuffle_ is True:
	    ids = shuffle(ids) 
	for index_ in iter_data(ids, size=batchSize*kD):
            index = sorted(index_) 
            xmb = trX[index,:,:,:]
	    xmb = Batch(xmb,fineSize,input_nc,flip=flip) 
	    xmb = processing_img(xmb, center=True, scale=True, convert=False)
                        
            rand_idx = random.randint(start,stop-ntf-1) 
            rand_ids = ids[rand_idx:rand_idx+ntf] 
            rand_ids = sorted(rand_ids) 
            sample_xmb = trX[rand_ids,:,:,:]
	    sample_xmb = Batch(sample_xmb,fineSize,input_nc,flip=flip) 
	    sample_xmb = processing_img(sample_xmb, center=True, scale=True, convert=False)
     
            # initial G cluster 
            if epoch + n_updates == 0:
                for can_i in range(0,ncandi): 
                    train_g, gen_fn, generator = create_G(
                            loss_type=loss_type[can_i%nloss],
                            discriminator=discriminator, lr=lr, b1=b1, ngf=ngf) 
                    for _ in range(0,kG):
                        zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                        cost = train_g(zmb)
                    sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                    gen_imgs = gen_fn(sample_zmb)
 
                    gen_new_params.append(lasagne.layers.get_all_param_values(generator)) 
                    
                    if can_i == 0: 
                        g_imgs_old=gen_imgs
                        fmb = gen_imgs[0:batchSize/ncandi*kD,:,:,:]
                    else: 
                        g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0)
                        fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:,:,:],axis=0)
                #print gen_new_params
                # MODEL G
                noise = T.matrix('noise')
                generator = models_uncond.build_generator_128(noise,ngf=ngf)
                Tgimgs = lasagne.layers.get_output(generator)
                Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs)

                g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean()
                g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean()
                g_loss_ls = T.mean(T.sqr((Tfake_out - 1)))

                g_params = lasagne.layers.get_all_params(generator, trainable=True)

                up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1)
                up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1)
                up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1)

                train_g = theano.function([noise],g_loss_logD,updates=up_g_logD)
                train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax)
                train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls)

                gen_fn = theano.function([noise], lasagne.layers.get_output(
                                        generator,deterministic=True))
            else:
                gen_old_params = gen_new_params
                for can_i in range(0,ncandi):
                    for type_i in range(0,nloss):
                        
                        lasagne.layers.set_all_param_values(generator, gen_old_params[can_i])
                        if loss_type[type_i] == 'trickLogD':
                            for _ in range(0,kG):
                                zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                                cost = train_g(zmb)
                        elif loss_type[type_i] == 'minimax': 
                            for _ in range(0,kG):
                                zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                                cost = train_g_minimax(zmb)
                        elif loss_type[type_i] == 'ls': 
                            for _ in range(0,kG):
                                zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                                cost = train_g_ls(zmb)

                        sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                        gen_imgs = gen_fn(sample_zmb)

                        _, fr_score, fd_score = disft_fn(sample_xmb,gen_imgs)
                        fit = fr_score - fd_score 
                     
                        if can_i*nloss + type_i < ncandi: 
                            idx = can_i*nloss + type_i
                            gen_new_params[idx]=lasagne.layers.get_all_param_values(generator)
                            fitness[idx]=fit
                            fake_rate[idx]=fr_score
                            g_imgs_old[idx*ntf:(idx+1)*ntf,:,:,:]=gen_imgs
                            fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:,:,:] = \
                                gen_imgs[0:batchSize/ncandi*kD,:,:,:]
                        else: 
                            fit_com = fitness - fit
                            if min(fit_com) < 0:
                                ids_replace = np.where(fit_com==min(fit_com)) 
                                idr = ids_replace[0][0]
                                fitness[idr]=fit
                                fake_rate[idr]=fr_score

                                gen_new_params[idr] = lasagne.layers.get_all_param_values(generator)

                                g_imgs_old[idr*ntf:(idr+1)*ntf,:,:,:]=gen_imgs
                                fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:,:,:] = \
                                    gen_imgs[0:batchSize/ncandi*kD,:,:,:]

                print fake_rate, fitness
                f_log.write(str(fake_rate) + ' '+str(fd_score) +' ' + str(fitness)+ '\n')

            # train D 
	    for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize):
                cost = train_d(xreal, xfake)
            for i in range(0, ncandi):
                xfake = g_imgs_old[i*ntf:(i+1)*ntf,:,:,:]
                xreal = sample_xmb[0:ntf,:,:,:]
                tr, fr, fd = disft_fn(xreal,xfake) 
                if i == 0:
                    fake_rate = np.array([fr])   
                    fitness = np.array([0.])   
                    real_rate = np.array([tr])     
                    FDL = np.array([fd])     
                else:
                    fake_rate = np.append(fake_rate,fr)
                    fitness = np.append(fitness,[0.])
                    real_rate = np.append(real_rate,tr)
                    FDL = np.append(FDL,fd)
            print fake_rate, FDL
            print (n_updates, epoch,real_rate.mean()) 
            n_updates += 1
            f_log.write(str(fake_rate)+' '+str(FDL)+ '\n'+ str(epoch)+' '+str(n_updates)+' '+str(real_rate.mean())+'\n')
            f_log.flush()

	    if n_updates%show_freq == 0:
	        blank_image = Image.new("RGB",(fineSize*8+9,fineSize*8+9))
	        for i in range(8):
                    for ii in range(8):
                        img = g_imgs_old[i*8+ii,:,:,:]
                        img = ImgRescale(img, center=True, scale=True, convert_back=True)
                        blank_image.paste(Image.fromarray(img),(ii*fineSize+ii+1,i*fineSize+i+1)) 
                blank_image.save('samples/%s/%s_%d.png'%(desc,desc,n_updates/save_freq))

            if n_updates%save_freq == 0 and epoch > begin_save - 1:
            # Optionally, you could now dump the network weights to a file like this:
                np.savez('models/%s/gen_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(generator))
                np.savez('models/%s/dis_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(discriminator))
示例#35
0
def train_model(data_stream,
                energy_optimizer,
                generator_optimizer,
                model_config_dict,
                model_test_name):

    [generator_function, generator_params, generator_entropy_params] = set_generator_model(model_config_dict['hidden_size'],
                                                                                           model_config_dict['min_num_gen_filters'])
    [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['hidden_size'],
                                                                          model_config_dict['min_num_eng_filters'])
    # compile functions
    print 'COMPILING ENERGY UPDATER'
    t=time()
    energy_updater = set_energy_update_function(feature_function=feature_function,
                                                energy_function=energy_function,
                                                generator_function=generator_function,
                                                energy_params=energy_params,
                                                energy_optimizer=energy_optimizer)
    print '%.2f SEC '%(time()-t)
    print 'COMPILING GENERATOR UPDATER'
    t=time()
    generator_updater = set_generator_update_function(feature_function=feature_function,
                                                      energy_function=energy_function,
                                                      generator_function=generator_function,
                                                      generator_params=generator_params,
                                                      generator_entropy_params=generator_entropy_params,
                                                      generator_optimizer=generator_optimizer)
    print '%.2f SEC '%(time()-t)
    print 'COMPILING EVALUATION FUNCTION'
    t=time()
    evaluation_function = set_evaluation_and_sampling_function(feature_function=feature_function,
                                                               energy_function=energy_function,
                                                               generator_function=generator_function)
    print '%.2f SEC '%(time()-t)
    print 'COMPILING SAMPLING FUNCTION'
    t=time()
    sampling_function = set_sampling_function(generator_function=generator_function)
    print '%.2f SEC '%(time()-t)

    # set fixed hidden data for sampling
    fixed_hidden_data  = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'],
                                               high=model_config_dict['hidden_distribution'],
                                               size=(model_config_dict['num_display'], model_config_dict['hidden_size'])))

    print 'START TRAINING'
    # for each epoch
    input_energy_list = []
    sample_energy_list = []
    batch_count = 0
    for e in xrange(model_config_dict['epochs']):
        # train phase
        batch_iters = data_stream.get_epoch_iterator()
        # for each batch
        for b, batch_data in enumerate(batch_iters):
            # set update function inputs
            input_data   = transform(batch_data[0])
            num_data     = input_data.shape[0]

            hidden_data  = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'],
                                                 high=model_config_dict['hidden_distribution'],
                                                 size=(num_data, model_config_dict['hidden_size'])))

            noise_data   = np_rng.normal(size=input_data.shape)
            noise_data   = floatX(noise_data*model_config_dict['init_noise']*(model_config_dict['noise_decay']**e))

            # update generator
            generator_update_inputs = [input_data,
                                       hidden_data,
                                       noise_data,
                                       e]
            [input_energy_val, sample_energy_val, entropy_cost] = generator_updater(*generator_update_inputs)

            # update energy function
            energy_update_inputs = [input_data,
                                    hidden_data,
                                    e]
            [input_energy_val, sample_energy_val, ] = energy_updater(*energy_update_inputs)

            # get output values
            input_energy  = input_energy_val.mean()
            sample_energy = sample_energy_val.mean()

            input_energy_list.append(input_energy)
            sample_energy_list.append(sample_energy)

            # batch count up
            batch_count += 1

            if batch_count%100==0:
                print '================================================================'
                print 'BATCH ITER #{}'.format(batch_count), model_test_name
                print '================================================================'
                print '   TRAIN RESULTS'
                print '================================================================'
                print '     input energy     : ', input_energy
                print '----------------------------------------------------------------'
                print '     sample energy    : ', sample_energy
                print '----------------------------------------------------------------'
                print '     entropy cost     : ', entropy_cost
                print '================================================================'

            if batch_count%1000==0:
                # sample data
                save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(batch_count)
                sample_data = sampling_function(fixed_hidden_data)[0]
                sample_data = np.asarray(sample_data)
                color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as)
                np.save(file=samples_dir + '/' + model_test_name +'_input_energy',
                        arr=np.asarray(input_energy_list))
                np.save(file=samples_dir + '/' + model_test_name +'_sample_energy',
                        arr=np.asarray(sample_energy_list))
示例#36
0
def train_model(data_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name):

    generator_models = set_generator_model(
        num_hiddens=model_config_dict["hidden_size"], min_num_gen_filters=model_config_dict["min_num_gen_filters"]
    )
    generator_function = generator_models[0]
    generator_params = generator_models[1]

    energy_models = set_energy_model(
        num_experts=model_config_dict["expert_size"], min_num_eng_filters=model_config_dict["min_num_eng_filters"]
    )
    feature_function = energy_models[0]
    # norm_function    = energy_models[1]
    expert_function = energy_models[1]
    # prior_function   = energy_models[3]
    energy_params = energy_models[2]

    # compile functions
    print "COMPILING MODEL UPDATER"
    t = time()
    generator_updater = set_generator_update_function(
        energy_feature_function=feature_function,
        # energy_norm_function=norm_function,
        energy_expert_function=expert_function,
        # energy_prior_function=prior_function,
        generator_function=generator_function,
        generator_params=generator_params,
        generator_optimizer=generator_optimizer,
    )
    energy_updater = set_energy_update_function(
        energy_feature_function=feature_function,
        # energy_norm_function=norm_function,
        energy_expert_function=expert_function,
        # energy_prior_function=prior_function,
        generator_function=generator_function,
        energy_params=energy_params,
        energy_optimizer=energy_optimizer,
    )
    print "%.2f SEC " % (time() - t)
    print "COMPILING SAMPLING FUNCTION"
    t = time()
    sampling_function = set_sampling_function(generator_function=generator_function)
    print "%.2f SEC " % (time() - t)

    # set fixed hidden data for sampling
    fixed_hidden_data = floatX(
        np_rng.uniform(
            low=-model_config_dict["hidden_distribution"],
            high=model_config_dict["hidden_distribution"],
            size=(model_config_dict["num_display"], model_config_dict["hidden_size"]),
        )
    )

    print "START TRAINING"
    # for each epoch
    input_energy_list = []
    sample_energy_list = []
    batch_count = 0
    for e in xrange(model_config_dict["epochs"]):
        # train phase
        batch_iters = data_stream.get_epoch_iterator()
        # for each batch
        for b, batch_data in enumerate(batch_iters):
            # set update function inputs
            input_data = transform(batch_data[0])
            num_data = input_data.shape[0]
            hidden_data = floatX(
                np_rng.uniform(
                    low=-model_config_dict["hidden_distribution"],
                    high=model_config_dict["hidden_distribution"],
                    size=(num_data, model_config_dict["hidden_size"]),
                )
            )

            noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape))
            update_input = [hidden_data, noise_data]
            update_output = generator_updater(*update_input)
            entropy_weights = update_output[1].mean()
            entropy_cost = update_output[2].mean()

            noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape))
            update_input = [input_data, hidden_data, noise_data]
            update_output = energy_updater(*update_input)
            input_energy = update_output[0].mean()
            sample_energy = update_output[1].mean()

            input_energy_list.append(input_energy)
            sample_energy_list.append(sample_energy)

            # batch count up
            batch_count += 1

            if batch_count % 10 == 0:
                print "================================================================"
                print "BATCH ITER #{}".format(batch_count), model_test_name
                print "================================================================"
                print "   TRAIN RESULTS"
                print "================================================================"
                print "     input energy     : ", input_energy_list[-1]
                print "----------------------------------------------------------------"
                print "     sample energy    : ", sample_energy_list[-1]
                print "----------------------------------------------------------------"
                print "     entropy weight   : ", entropy_weights
                print "----------------------------------------------------------------"
                print "     entropy cost     : ", entropy_cost
                print "================================================================"

            if batch_count % 100 == 0:
                # sample data
                sample_data = sampling_function(fixed_hidden_data)[0]
                sample_data = np.asarray(sample_data)
                save_as = samples_dir + "/" + model_test_name + "_SAMPLES(TRAIN){}.png".format(batch_count)
                color_grid_vis(inverse_transform(sample_data).transpose([0, 2, 3, 1]), (16, 16), save_as)
                np.save(file=samples_dir + "/" + model_test_name + "_input_energy", arr=np.asarray(input_energy_list))
                np.save(file=samples_dir + "/" + model_test_name + "_sample_energy", arr=np.asarray(sample_energy_list))

                save_as = samples_dir + "/" + model_test_name + "_MODEL.pkl"
                save_model(
                    tensor_params_list=generator_params[0] + generator_params[1] + energy_params, save_to=save_as
                )
示例#37
0
 def __call__(self, shape, name=None):
     return sharedX(np_rng.uniform(low=-self.scale,
                                   high=self.scale,
                                   size=shape),
                    name=name)
示例#38
0
def mnistGANcond():
    """
    This example loads the 32x32 imagenet model used in the paper,
    generates 400 random samples, and sorts them according to the
    discriminator's probability of being real and renders them to
    the file samples.png
    """

    nc = 1
    npx = 28
    ngf = 64  # # of gen filters in first conv layer
    ndf = 128
    ny = 10  # # of classes

    nz = 100  # # of dim for Z
    k = 1  # # of discrim updates for each gen update
    l2 = 2.5e-5  # l2 weight decay
    b1 = 0.5  # momentum term of adam
    nc = 1  # # of channels in image
    ny = 10  # # of classes
    nbatch = 128  # # of examples in batch
    npx = 28  # # of pixels width/height of images
    nz = 100  # # of dim for Z
    ngfc = 1024  # # of gen units for fully connected layers
    ndfc = 1024  # # of discrim units for fully connected layers
    ngf = 64  # # of gen filters in first conv layer
    ndf = 64  # # of discrim filters in first conv layer
    nx = npx * npx * nc  # # of dimensions in X
    niter = 100  # # of iter at starting learning rate
    niter_decay = 100  # # of iter to linearly decay learning rate to zero
    lr = 0.0002

    relu = activations.Rectify()
    sigmoid = activations.Sigmoid()
    lrelu = activations.LeakyRectify()
    tanh = activations.Tanh()

    model_path = 'dcgan_code-master/mnist/models/cond_dcgan/'
    gen_params = [
        sharedX(p) for p in joblib.load(model_path + '200_gen_params.jl')
    ]
    discrim_params = [
        sharedX(p) for p in joblib.load(model_path + '200_discrim_params.jl')
    ]

    def gen(Z, Y, w, w2, w3, wx):
        yb = Y.dimshuffle(0, 1, 'x', 'x')
        Z = T.concatenate([Z, Y], axis=1)
        h = relu(batchnorm(T.dot(Z, w)))
        h = T.concatenate([h, Y], axis=1)
        h2 = relu(batchnorm(T.dot(h, w2)))
        h2 = h2.reshape((h2.shape[0], ngf * 2, 7, 7))
        h2 = conv_cond_concat(h2, yb)
        h3 = relu(
            batchnorm(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2))))
        h3 = conv_cond_concat(h3, yb)
        x = sigmoid(deconv(h3, wx, subsample=(2, 2), border_mode=(2, 2)))
        return x

    def discrim(X, Y, w, w2, w3, wy):
        yb = Y.dimshuffle(0, 1, 'x', 'x')
        X = conv_cond_concat(X, yb)
        h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2)))
        h = conv_cond_concat(h, yb)
        h2 = lrelu(
            batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2))))
        h2 = T.flatten(h2, 2)
        h2 = T.concatenate([h2, Y], axis=1)
        h3 = lrelu(batchnorm(T.dot(h2, w3)))
        h3 = T.concatenate([h3, Y], axis=1)
        y = sigmoid(T.dot(h3, wy))
        return y

    def inverse_transform(X):
        X = (X.reshape(-1, nc, npx, npx).transpose(0, 2, 3, 1) + 1.) / 2.
        return X

    Z = T.matrix()
    X = T.tensor4()
    Y = T.matrix()

    gX = gen(Z, Y, *gen_params)
    dX = discrim(X, Y, *discrim_params)

    _gen = theano.function([Z, Y], gX)
    _discrim = theano.function([X, Y], dX)

    sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz)))
    sample_ymb = floatX(
        OneHot(
            np.asarray([[i for _ in range(20)] for i in range(10)]).flatten(),
            ny))
    samples = _gen(sample_zmb, sample_ymb)
    scores = _discrim(samples, sample_ymb)
    print(scores[1:10])
    sort = np.argsort(scores.flatten())[::-1]
    samples = samples[sort]
    print(np.shape(inverse_transform(samples)))
    print(min(scores))
    print(max(scores))

    color_grid_vis(inverse_transform(samples), (20, 20), 'samples.png')

    return inverse_transform(samples), sample_ymb
示例#39
0
def get_buffer_z(steps,num_buffer_samples=480,num_buffer_steps=3):
  num_buffer_rows = int(math.ceil(float(num_buffer_samples) / steps))
  zmb = floatX(np_rng.uniform(-1., 1., size=(num_buffer_rows * steps, nz)))
  return zmb
示例#40
0
bn_data = gbn + dbn

print('COMPILING...')
t = time()
_estimate_bn = theano.function([Z], bn_data)
print('%.2f seconds to compile theano functions' % (time() - t))

# batchnorm statistics
nb_sum = []
nb_mean = []
nb_mean_ext = []

# first pass
print('first pass: computing mean')
for n in tqdm(range(num_batches)):
    zmb = floatX(np_rng.uniform(-1., 1., size=(batch_size, nz)))
    bn_data = _estimate_bn(zmb)

    if n == 0:
        for d in bn_data:
            nb_sum.append(d)
    else:
        for id, d in enumerate(bn_data):
            nb_sum[id] = nb_sum[id] + d

# compute empirical mean
for id, d_sum in enumerate(nb_sum):
    if d_sum.ndim == 4:
        m = np.mean(d_sum, axis=(0, 2, 3)) / num_batches
        nb_mean.append(m)
        nb_mean_ext.append(np.reshape(m, [1, len(m), 1, 1]))
def train_model(model_name,
                data_stream,
                num_hiddens,
                num_epochs,
                generator_optimizer):

    # set models
    print 'LOADING VGG'
    t=time()
    feature_extractor = load_vgg_feature_extractor()
    print '%.2f SEC '%(time()-t)
    sample_generator , generator_parameters = set_generator_model(num_hiddens)


    print 'COMPILING UPDATER AND SAMPLER'
    t=time()
    updater_function = set_updater_function(feature_extractor,
                                            sample_generator,
                                            generator_parameters,
                                            generator_optimizer)
    sampling_function = set_sampling_function(sample_generator)
    print '%.2f SEC '%(time()-t)

    # set fixed hidden data for sampling
    fixed_hidden_data  = floatX(np_rng.uniform(low=-1.0,
                                               high=1.0,
                                               size=(16*16, num_hiddens)))

    print 'START TRAINING'
    # for each epoch
    moment_cost_list = []
    batch_count = 0
    for e in xrange(num_epochs):
        # train phase
        batch_iters = data_stream.get_epoch_iterator()
        # for each batch
        for b, batch_data in enumerate(batch_iters):
            # set update function inputs
            input_data  = transform(batch_data[0])
            hidden_data = floatX(np_rng.uniform(low=-1.0, high=1.0, size=(input_data.shape[0], num_hiddens)))

            updater_inputs = [input_data,
                              hidden_data]
            updater_outputs = updater_function(*updater_inputs)
            moment_cost_list.append(updater_outputs[0])

            # batch count up
            batch_count += 1

            if batch_count%10==0:
                print '================================================================'
                print 'BATCH ITER #{}'.format(batch_count), model_name
                print '================================================================'
                print '   TRAIN RESULTS'
                print '================================================================'
                print '     moment matching cost     : ', moment_cost_list[-1]
                print '================================================================'

            if batch_count%100==0:
                # sample data
                save_as = samples_dir + '/' + model_name + '_SAMPLES{}.png'.format(batch_count)
                sample_data = sampling_function(fixed_hidden_data)[0]
                sample_data = np.asarray(sample_data)
                color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as)

                np.save(file=samples_dir + '/' + model_name +'_MOMENT_COST',
                        arr=np.asarray(moment_cost_list))
示例#42
0
def gen_z(n):
    if args.znorm:
        return floatX(normalize(np_rng.uniform(-1., 1., size=(n, nz))))
    else:
        return floatX(np_rng.uniform(-1., 1., size=(n, nz)))
                         b1=b1,
                         regularizer=updates.Regularizer(l2=l2))

d_updates = d_updater(discrim_params, d_cost)
g_updates = g_updater(gen_params, g_cost)

print 'COMPILING'
t = time()
_gen = theano.function([Z], gX)
_train_d = theano.function([X, X0], d_cost, updates=d_updates)
_train_g = theano.function([Z, deltaX], g_cost, updates=g_updates)
_vgd_gradient = theano.function([X0, X1], vgd_gradient(X0, X1))
_reconstruction_cost = theano.function([X], T.mean(mse_data))
print '%.2f seconds to compile theano functions' % (time() - t)

sample_zmb = floatX(np_rng.uniform(-1., 1., size=(nvis, nz)))

n_updates = 0
t = time()
for epoch in range(1, niter + 1):
    for filename in npzfiles:
        batch_data = shuffle(
            np.load(filename)['images'].astype(theano.config.floatX))

        for idx in tqdm(xrange(0, batch_data.shape[0] // nbatch)):
            imb = transform(batch_data[idx * nbatch:(idx + 1) * nbatch])

            zmb = floatX(np_rng.uniform(-1., 1., size=(imb.shape[0], nz)))
            # generate samples
            samples = _gen(zmb)
示例#44
0

#
#
# main
#
#

if __name__ == '__main__':
    NUM_CLASSES = 10  # # of classes
    nz = 100  # # of dim for Z

    Z = T.matrix('random')
    Y = T.matrix('label')

    #####
    mnist = BinaryMnist()
    generator = mnist.makeGeneratorLayers(NUM_MINIBATCH, Z, nz, Y, NUM_CLASSES)
    out = ll.get_output(generator)

    print 'compiling...'
    out_func = theano.function([Z, Y], out, mode='DebugMode')
    print 'compiling...DONE'

    #test
    Zval = floatX(np_rng.uniform(-1.0, 1.0, size=(NUM_MINIBATCH, nz)))
    Yval = floatX(OneHot(np_rng.randint(0, 10, NUM_MINIBATCH), NUM_CLASSES))

    ret = out_func(Zval, Yval)
    print 'ret', ret.shape
示例#45
0
def sample_z(a_batch_size, a_z_size):
    return floatX(np_rng.uniform(Z_MIN, Z_MAX, size=(a_batch_size, a_z_size)))
示例#46
0
        data = tr_data.get_data(tr_handle, slice(0, tr_data.num_examples))
        labels = data[labels_idx]
        vc_idx = np.where(labels == vc_num)[0]
        vc_idx = vc_idx[:196]

        if 'orig' in desc:
            zmb_idx = tr_stream.dataset.provides_sources.index('feat_orig')
        else:
            zmb_idx = tr_stream.dataset.provides_sources.index('feat_l2')
        sample_zmb = data[zmb_idx][vc_idx,:]

        patches = data[patches_idx][vc_idx,:]
        patches = transform(patches, 64)
        color_grid_vis(inverse_transform(patches, nc=3, npx=64), (14, 14), './patches.png')
    else:
        sample_zmb = floatX(np_rng.uniform(-1., 1., size=(196, 100)))

    print 'COMPILING...'
    _gen = theano.function([Z], gX)
    recon = theano.function([gX,X], cost)
    print 'Done!'

    samples = np.asarray(_gen(sample_zmb))

    if 'patches' in locals():
        recon_cost = recon(samples, patches)
        costs[ii,0] = recon_cost
        print "Reconstruction Error: %3f" % (float(recon_cost))

    save_file = dcgan_root + 'samples/%s/vc_%s.png'%(desc, str(vc_num))
    color_grid_vis(inverse_transform(samples, nc=3, npx=64), (14, 14), save_file)
示例#47
0
f_log = open( os.path.join( log_dir, '%s.ndjson' % desc ), 'wb' )
log_fields = [
    'num_epoch', 
    'num_update', 
    'num_example', 
    't_spent',
    'c_cost',
    'd_cost',]

# DO THE JOB.
print desc.upper(  )
num_update = 0
num_epoch = 0
num_update = 0
num_example = 0
Zb_vis = floatX( np_rng.uniform( -1., 1., size = ( nvis ** 2, nz, 1, 1 ) ) )
t = time(  )
for epoch in range( niter ):
    # Load pre-trained param if exists.
    num_epoch += 1
    mpath_c = os.path.join( model_dir, 'C%03d.npy' % num_epoch )
    mpath_d = os.path.join( model_dir, 'D%03d.npy' % num_epoch )
    if os.path.exists( mpath_c ) and os.path.exists( mpath_d ):
        print( 'Epoch %02d: Load.' % num_epoch )
        data_c = np.load( mpath_c )
        for pi in range( len( converter_params ) ):
            converter_params[ pi ].set_value( data_c[ pi ] )
        data_d = np.load( mpath_d )
        for pi in range( len( discrim_params ) ):
            discrim_params[ pi ].set_value( data_d[ pi ] )
        continue
示例#48
0
def main():
    # Parameters
    task = 'toy'
    name = '8G_MOEGAN_MMDu2'  #'8G_MOEGAN_PFq_NFd_t2'

    DIM = 512
    begin_save = 0
    loss_type = ['trickLogD', 'minimax', 'ls']  #['trickLogD', 'minimax', 'ls']
    nloss = 3  #2
    DATASET = '8gaussians'
    batchSize = 64

    ncandi = 8
    kD = 1  # # of discrim updates for each gen update
    kG = 1  # # of discrim updates for each gen update
    ntf = 256
    b1 = 0.5  # momentum term of adam
    nz = 2  # # of dim for Z
    niter = 4  # # of iter at starting learning rate
    lr = 0.0001  # initial learning rate for adam G
    lrd = 0.0001  # initial learning rate for adam D
    N_up = 100000
    save_freq = 10000 / 10
    show_freq = 10000 / 10
    test_deterministic = True
    beta = 1.
    GP_norm = False  # if use gradients penalty on discriminator
    LAMBDA = 2.  # hyperparameter of GP
    NSGA2 = True
    # Load the dataset

    # MODEL D
    print("Building model and compiling functions...")
    # Prepare Theano variables for inputs and targets
    real_imgs = T.matrix('real_imgs')
    fake_imgs = T.matrix('fake_imgs')
    # Create neural network model
    discriminator = models_uncond.build_discriminator_toy(nd=DIM,
                                                          GP_norm=GP_norm)
    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator, real_imgs)
    # Create expression for passing fake data through the discriminator
    fake_out = lasagne.layers.get_output(discriminator, fake_imgs)
    # Create loss expressions
    discriminator_loss = (
        lasagne.objectives.binary_crossentropy(real_out, 1) +
        lasagne.objectives.binary_crossentropy(fake_out, 0)).mean()

    # Gradients penalty norm
    if GP_norm is True:
        alpha = t_rng.uniform((batchSize, 1), low=0., high=1.)
        differences = fake_imgs - real_imgs
        interpolates = real_imgs + (alpha * differences)
        gradients = theano.grad(lasagne.layers.get_output(
            discriminator, interpolates).sum(),
                                wrt=interpolates)
        slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1)))
        gradient_penalty = T.mean((slopes - 1.)**2)

        D_loss = discriminator_loss + LAMBDA * gradient_penalty
        b1_d = 0.
    else:
        D_loss = discriminator_loss
        b1_d = 0.

    # Create update expressions for training
    discriminator_params = lasagne.layers.get_all_params(discriminator,
                                                         trainable=True)
    lrtd = theano.shared(lasagne.utils.floatX(lrd))
    updates_d = lasagne.updates.adam(D_loss,
                                     discriminator_params,
                                     learning_rate=lrtd,
                                     beta1=b1_d)
    lrt = theano.shared(lasagne.utils.floatX(lr))

    # Fd Socre
    Fd = theano.gradient.grad(discriminator_loss, discriminator_params)
    Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd))

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_d = theano.function([real_imgs, fake_imgs],
                              discriminator_loss,
                              updates=updates_d)
    # Compile another function generating some data
    dis_fn = theano.function([real_imgs, fake_imgs],
                             [(fake_out).mean(), Fd_score])
    disft_fn = theano.function([real_imgs, fake_imgs], [
        real_out.mean(),
        fake_out.mean(), (real_out > 0.5).mean(),
        (fake_out > 0.5).mean(), Fd_score
    ])

    # Finally, launch the training loop.
    print("Starting training...")
    desc = task + '_' + name
    print(desc)

    if not os.path.isdir('logs'):
        os.mkdir(os.path.join('logs'))
    f_log = open('logs/%s.ndjson' % desc, 'wb')
    if not os.path.isdir('models'):
        os.mkdir(os.path.join('models/'))
    if not os.path.isdir('models/' + desc):
        os.mkdir(os.path.join('models/', desc))

    gen_new_params = []

    # We iterate over epochs:
    for n_updates in range(N_up):
        xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD)
        xmb = xmb[0:batchSize * kD]
        # initial G cluster
        if n_updates == 0:
            for can_i in range(0, ncandi):
                train_g, gen_fn, generator = create_G(
                    loss_type=loss_type[can_i % nloss],
                    discriminator=discriminator,
                    lr=lr,
                    b1=b1,
                    DIM=DIM)
                for _ in range(0, kG):
                    zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                    cost = train_g(zmb)
                sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                gen_imgs = gen_fn(sample_zmb)

                gen_new_params.append(
                    lasagne.layers.get_all_param_values(generator))

                if can_i == 0:
                    g_imgs_old = gen_imgs
                    fmb = gen_imgs[0:int(batchSize / ncandi * kD), :]
                else:
                    g_imgs_old = np.append(g_imgs_old, gen_imgs, axis=0)
                    newfmb = gen_imgs[0:int(batchSize / ncandi * kD), :]
                    fmb = np.append(fmb, newfmb, axis=0)
            # print gen_new_params
            # MODEL G
            noise = T.matrix('noise')
            generator = models_uncond.build_generator_toy(noise, nd=DIM)
            Tgimgs = lasagne.layers.get_output(generator)
            Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs)

            g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out,
                                                                 1).mean()
            g_loss_minimax = - \
                lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean()
            g_loss_ls = T.mean(T.sqr((Tfake_out - 1)))

            g_params = lasagne.layers.get_all_params(generator, trainable=True)

            up_g_logD = lasagne.updates.adam(g_loss_logD,
                                             g_params,
                                             learning_rate=lrt,
                                             beta1=b1)
            up_g_minimax = lasagne.updates.adam(g_loss_minimax,
                                                g_params,
                                                learning_rate=lrt,
                                                beta1=b1)
            up_g_ls = lasagne.updates.adam(g_loss_ls,
                                           g_params,
                                           learning_rate=lrt,
                                           beta1=b1)

            train_g = theano.function([noise], g_loss_logD, updates=up_g_logD)
            train_g_minimax = theano.function([noise],
                                              g_loss_minimax,
                                              updates=up_g_minimax)
            train_g_ls = theano.function([noise], g_loss_ls, updates=up_g_ls)

            gen_fn = theano.function([noise],
                                     lasagne.layers.get_output(
                                         generator, deterministic=True))
        else:

            class Instance:
                def __init__(self, fq, fd, params, img_values, image_copy):
                    self.fq = fq
                    self.fd = fd
                    self.params = params
                    self.vimg = img_values
                    self.cimg = image_copy

                def f(self):
                    return self.fq - self.fd

            instances = []
            fq_list = np.zeros(ncandi)
            fd_list = np.zeros(ncandi)

            gen_old_params = gen_new_params
            for can_i in range(0, ncandi):
                for type_i in range(0, nloss):
                    lasagne.layers.set_all_param_values(
                        generator, gen_old_params[can_i])
                    if loss_type[type_i] == 'trickLogD':
                        for _ in range(0, kG):
                            zmb = floatX(
                                np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g(zmb)
                    elif loss_type[type_i] == 'minimax':
                        for _ in range(0, kG):
                            zmb = floatX(
                                np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g_minimax(zmb)
                    elif loss_type[type_i] == 'ls':
                        for _ in range(0, kG):
                            zmb = floatX(
                                np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g_ls(zmb)

                    sample_zmb = floatX(np_rng.uniform(-1., 1.,
                                                       size=(ntf, nz)))
                    gen_imgs = gen_fn(sample_zmb)
                    frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs)
                    instances.append(
                        Instance(
                            frr_score, fd_score,
                            lasagne.layers.get_all_param_values(generator),
                            gen_imgs,
                            gen_imgs[0:int(batchSize / ncandi * kD), :]))
            if ncandi < len(instances):
                if NSGA2 == True:
                    cromos = {
                        idx: [float(inst.fq), -float(inst.fd)]
                        for idx, inst in enumerate(instances)
                    }
                    cromos_idxs = [idx for idx, _ in enumerate(instances)]
                    finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs)

                    for idx, p in enumerate(finalpop):
                        inst = instances[p]
                        gen_new_params[idx] = inst.params
                        fq_list[idx] = inst.fq
                        fd_list[idx] = inst.fd
                        fake_rate[idx] = inst.f()
                        g_imgs_old[idx * ntf:(idx + 1) * ntf, :] = inst.vimg
                        fmb[int(idx * batchSize / ncandi *
                                kD):math.ceil((idx + 1) * batchSize / ncandi *
                                              kD), :] = inst.cimg

                    with open('front/%s.tsv' % desc, 'wb') as ffront:
                        for idx, p in enumerate(finalpop):
                            inst = instances[p]
                            ffront.write(
                                (str(inst.fq) + "\t" + str(inst.fd)).encode())
                            ffront.write("\n".encode())
                else:
                    for idx, inst in enumerate(instances):
                        if idx < ncandi:
                            gen_new_params[idx] = inst.params
                            fake_rate[idx] = inst.f()
                            fq_list[idx] = inst.fq
                            fd_list[idx] = inst.fd
                            g_imgs_old[idx * ntf:(idx + 1) *
                                       ntf, :] = inst.vimg
                            fmb[int(idx * batchSize / ncandi *
                                    kD):math.ceil((idx + 1) * batchSize /
                                                  ncandi * kD), :] = inst.cimg
                        else:
                            fr_com = fake_rate - inst.f()
                            if min(fr_com) < 0:
                                idr = np.where(fr_com == min(fr_com))[0][0]
                                gen_new_params[idr] = inst.params
                                fake_rate[idr] = inst.f()
                                g_imgs_old[idr * ntf:(idr + 1) *
                                           ntf, :] = inst.vimg
                                fmb[int(idr * batchSize / ncandi *
                                        kD):math.ceil((idr + 1) * batchSize /
                                                      ncandi *
                                                      kD), :] = inst.cimg

        sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf)
        sample_xmb = sample_xmb[0:ncandi * ntf]
        for i in range(0, ncandi):
            xfake = g_imgs_old[i * ntf:(i + 1) * ntf, :]
            xreal = sample_xmb[i * ntf:(i + 1) * ntf, :]
            tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake)
            if i == 0:
                fake_rate = np.array([fr])
                real_rate = np.array([tr])
                fake_rate_p = np.array([frp])
                real_rate_p = np.array([trp])
                FDL = np.array([fdscore])
            else:
                fake_rate = np.append(fake_rate, fr)
                real_rate = np.append(real_rate, tr)
                fake_rate_p = np.append(fake_rate_p, frp)
                real_rate_p = np.append(real_rate_p, trp)
                FDL = np.append(FDL, fdscore)

        print(fake_rate, fake_rate_p, FDL)
        print(n_updates, real_rate.mean(), real_rate_p.mean())
        f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' +
                     str(n_updates) + ' ' + str(real_rate.mean()) + ' ' +
                     str(real_rate_p.mean()) + '\n').encode())
        f_log.flush()

        # train D
        for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize):
            cost = train_d(xreal, xfake)

        if n_updates % show_freq == 0:
            s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz)))
            params_max = gen_new_params[np.argmax(fake_rate)]
            lasagne.layers.set_all_param_values(generator, params_max)
            g_imgs_max = gen_fn(s_zmb)

        if n_updates % show_freq == 0 and n_updates != 0:
            #metric
            s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz)))
            xmb = toy_dataset(DATASET=DATASET, size=512)
            mmd2_all = []
            for i in range(0, ncandi):
                lasagne.layers.set_all_param_values(generator,
                                                    gen_new_params[i])
                g_imgs_min = gen_fn(s_zmb)
                mmd2_all.append(compute_metric_mmd2(g_imgs_min, xmb))
            mmd2_all = np.array(mmd2_all)
            if NSGA2:
                with open('front/%s_mmd2u.tsv' % desc, 'wb') as ffront:
                    for idx in range(0, ncandi):
                        ffront.write(
                            (str(fq_list[idx]) + "\t" + str(fd_list[idx]) +
                             "\t" + str(mmd2_all[idx])).encode())
                        ffront.write("\n".encode())
            #save best
            params = gen_new_params[np.argmin(mmd2_all)]
            lasagne.layers.set_all_param_values(generator, params)
            g_imgs_min = gen_fn(s_zmb)
            generate_image(xmb,
                           g_imgs_min,
                           n_updates / save_freq,
                           desc,
                           postfix="_mmu2d")
            np.savez('models/%s/gen_%d.npz' % (desc, n_updates / save_freq),
                     *lasagne.layers.get_all_param_values(discriminator))
            np.savez('models/%s/dis_%d.npz' % (desc, n_updates / save_freq),
                     *lasagne.layers.get_all_param_values(generator))
示例#49
0
desc = 'dcgan'
model_dir = 'models/%s'%desc
samples_dir = 'samples/%s'%desc
if not os.path.exists('logs/'):
    os.makedirs('logs/')
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
if not os.path.exists(samples_dir):
    os.makedirs(samples_dir)

X_sample = data.get_unlab_batch(0,monitor_size)
X_sample = data.center_crop(X_sample,img_size)
color_grid_vis(X_sample.transpose(0, 2, 3, 1), (14, 14), 'samples/%s_etl_test.png'%desc)


Z_sample = floatX(np_rng.uniform(-1., 1., size=(monitor_size, model.gen_dim)))


print desc.upper()

print "starting training"
with open('errors.log', 'w') as f:
    f.write('# iter data_seen epoch dis_loss g_loss')
    f.write(' c_loss c_val_err c_test_err\n')
    
with open('best.log', 'w') as f:
    f.write('# iter data_seen epoch c_val_err c_test_err\n')

n_iter = n_epochs*(data.unlab_size/batch_size+1)

best_err = 1e6
    def run(self):
        parser = argparse.ArgumentParser()
        parser.add_argument("--gendim", type = int, default = 100)
        #parser.add_argument("--dataset", type = str, default = 'stl10')
        parser.add_argument("--batch_size", type = int, default = 128)
        parser.add_argument("--n_epochs", type = int, default = 100)
        parser.add_argument("--k_iter", type = int, default = 1)
        parser.add_argument("--monitor_size", type = int, default = 196)
        parser.add_argument("--init_scale", type = float, default = 0.02)
        parser.add_argument("--folds", type = int, default = 5)
        parser.add_argument("--valid_fold", type = int, default = 0)
        parser.add_argument("--iter_save", type = int, default = 100)
        parser.add_argument('--classify', action='store_true')
        parser.add_argument("--img_size", type = int, default = 64)
        args = parser.parse_args()
        print args

        gen_dim = args.gendim
        n_epochs = args.n_epochs
        batch_size = args.batch_size
        #dataset = args.dataset
        k_iter = args.k_iter
        monitor_size = args.monitor_size
        init_scale = args.init_scale
        folds = args.folds
        valid_fold = args.valid_fold
        iter_save = args.iter_save
        classify = args.classify
        img_size = args.img_size

        if classify:
            from src.gan_class import GAN_trainer
        else:
            from src.gan import GAN_trainer

        model = self.model_module.GAN_model(img_shape=(img_size,img_size),gen_dim=gen_dim,init_scale=init_scale)
        trainer = GAN_trainer(model)
        data = dataset.stl10()

        desc = 'dcgan'
        model_dir = 'models/%s'%desc
        samples_dir = 'samples/%s'%desc
        if not os.path.exists('logs/'):
            os.makedirs('logs/')
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        if not os.path.exists(samples_dir):
            os.makedirs(samples_dir)

        X_sample = data.get_unlab_batch(0,monitor_size)
        X_sample = data.center_crop(X_sample,img_size)
        color_grid_vis(X_sample.transpose(0, 2, 3, 1), (14, 14), 'samples/%s_etl_test.png'%desc)




        Z_sample = floatX(np_rng.uniform(-1., 1., size=(monitor_size, model.gen_dim)))



        print desc.upper()

        print "starting training"
        with open('errors.log', 'w') as f:
            f.write('# iter data_seen epoch dis_loss g_loss')
            if classify:
                f.write(' c_loss c_val_err c_test_err\n')
            else:
                f.write('\n')

        if classify:
            with open('best.log', 'w') as f:
                f.write('# iter data_seen epoch c_val_err c_test_err\n')

        n_iter = n_epochs*(data.unlab_size/batch_size+1)


        best_err = 1e6
        last_it = 0
        t = time()
        for it in xrange(n_iter):

            epoch = it*batch_size/data.unlab_size
            X_batch = data.get_unlab_batch(it,batch_size)
            X_batch = data.scale_data(data.center_crop(X_batch,img_size))
            Z_batch = floatX(np_rng.uniform(-1., 1., size=(len(X_batch), model.gen_dim)))

            gen_loss = trainer.train_generator_on_batch(Z_batch)

            dis_loss = trainer.train_discriminator_on_batch(X_batch, Z_batch)

            if classify:
                X_batch, y_batch = data.get_train_batch(it,batch_size)
                X_batch = data.scale_data(data.center_crop(X_batch,img_size))
                cls_loss = trainer.train_classifier_on_batch(X_batch, y_batch)

            if (it % iter_save == 0) or (it % 10 == 0 and it < iter_save):
                if classify:
                    cls_test_err = 0.0
                    for it2 in xrange(data.test_size/batch_size):
                        X_batch, y_batch = data.get_test_batch(it2,batch_size)
                        X_batch = data.scale_data(data.center_crop(X_batch,img_size))
                        cls_test_err += trainer._cls_error(X_batch, y_batch)
                    cls_test_err /= data.test_size/batch_size
                    cls_valid_err = 0.0
                    for it2 in xrange(data.valid_size/batch_size):
                        X_batch, y_batch = data.get_valid_batch(it2,batch_size)
                        X_batch = data.scale_data(data.center_crop(X_batch,img_size))
                        cls_valid_err += trainer._cls_error(X_batch, y_batch)
                    cls_valid_err /= data.valid_size/batch_size

                samples = np.asarray(trainer._gen(Z_sample))
                color_grid_vis(data.inv_scale_data(samples).transpose(0, 2, 3, 1), (14, 14), 'samples/%s/%d.png'%(desc, it))

                with open('errors.log', 'a') as f:
                    f.write( " ".join(map(str, (it,it*batch_size,epoch) ))+" ")
                    f.write( " ".join(map(str, (dis_loss,gen_loss) ))+" ")
                    if classify:
                        f.write( " ".join(map(str, (cls_loss,cls_valid_err,cls_test_err) ))+"\n")
                    else:
                        f.write("\n")

                if classify and cls_valid_err<best_err:
                    best_err = cls_valid_err
                    with open('best.log', 'a') as f:
                        f.write( " ".join(map(str, (it,it*batch_size,epoch) ))+" ")
                        f.write( " ".join(map(str, (cls_valid_err,cls_test_err) ))+"\n")

                    model.dump('models/%s/best_gen_params.jl'%(desc))

                t2 = time()-t
                t += t2
                print "iter:%d/%d; epoch:%d;    %f sec. per iteration"%(it,n_iter,epoch,t2/(1+it-last_it))
                last_it = it+1

            if epoch in [1, 2, 3, 4, 5, 10, 15, 20, 25, 50, 75, 100, 200, n_epochs]:
                if (it*batch_size)%data.unlab_size<batch_size:
                    model_dir = 'models/%s/%d'%(desc, it)
                    if not os.path.exists(model_dir):
                        os.makedirs(model_dir)
                    model.dump('%s/params.jl'%(model_dir))

        model_dir = 'models/%s/last'%(desc)
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        model.dump('%s/params.jl' % (model_dir))
####################
# COMPILE FUNCTION #
####################
print 'COMPILING'
t = time()
_train_g = theano.function([X, N, Z, Temp], cost, updates=g_updates)
_train_d = theano.function([X, N, Z, Temp], cost, updates=d_updates)
_gen = theano.function([Z], gX)
print '%.2f seconds to compile theano functions'%(time()-t)


#####################################
# SAMPLE RANDOM DATA FOR GENERATION #
#####################################
sample_zmb = floatX(np_rng.uniform(-1., 1., size=(nvis, nz)))

###################
# GENERATE SAMPLE #
###################
def gen_samples(n, nbatch=128):
    samples = []
    n_gen = 0
    for i in range(n/nbatch):
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
        xmb = _gen(zmb)
        samples.append(xmb)
        n_gen += len(xmb)
    n_left = n-n_gen
    zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
    xmb = _gen(zmb)
示例#52
0
文件: train.py 项目: aaajiao/typeface
tr_idxs = np.arange(len(trX))
trX_vis = np.asarray([[trX[i] for i in py_rng.sample(tr_idxs[trY==y], cols)] for y in range(ny)]).reshape(ny * cols, -1)
trX_vis = inverse_transform(transform(trX_vis))
grayscale_grid_vis(trX_vis, (ny, cols), 'samples/test.png')


############
# set up targets normally
steps = 6
numtargets = 9 #This is how many letter you will count
start = 1
targets = np.asarray([[i+start for _ in range(steps)] for i in range(numtargets)])
sample_ymb = floatX(OneHot(targets.flatten(), ny))

# set up random z
sample_zmb = floatX(np_rng.uniform(-1., 1., size=(numtargets * steps, nz)))



def gen_samples(n, nbatch=128):
    samples = []
    labels = []
    n_gen = 0
    for i in range(n/nbatch):
        ymb = floatX(OneHot(np_rng.randint(0, ny, nbatch), ny))
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
        xmb = _gen(zmb, ymb)
        samples.append(xmb)
        labels.append(np.argmax(ymb, axis=1))
        n_gen += len(xmb)
    n_left = n-n_gen
示例#53
0
def transform(X):
    return (floatX(X) / 255.).reshape(-1, nc, npx, npx)


Z = T.matrix()
X = T.tensor4()
Y = T.matrix()

gX = gen(Z, Y, *gen_params)
dX = discrim(X, Y, *discrim_params)

_gen = theano.function([Z, Y], gX)
_discrim = theano.function([X, Y], dX)

sample_zmb = floatX(np_rng.uniform(-1., 1., size=(10 * ny, nz)))
sample_ymb = floatX(
    OneHot(
        np.asarray([[i for _ in range(10)] for i in range(ny)]).flatten(), ny))
samples = _gen(sample_zmb, sample_ymb)

scores = _discrim(samples, sample_ymb)
color_grid_vis(inverse_transform(samples), (ny, 10), 'samples.png')

for i in range(ny):
    Z = T.matrix()
    X = T.tensor4()
    Y = T.matrix()

    gX = gen(Z, Y, *gen_params)
    dX = discrim(X, Y, *discrim_params)
示例#54
0
gen_params32 = [param.get_value().astype('float32') for param in gen_params]
#%%
#%%
Z = T.matrix()
Zz = Z.astype('float32')
X = T.tensor4()
Xx = X.astype('float32')
gX = gen(Zz, *gen_params32)

#%%
dX = discrim(Xx, *discrim_params)

_gen = theano.function([Zz], gX)
_discrim = theano.function([Xx], dX)

sample_zmb = floatX(np_rng.uniform(-1., 1., size=(400, 256)))
samples = _gen(sample_zmb)
scores = _discrim(samples)
sort = np.argsort(scores.flatten())[::-1]
samples = samples[sort]
color_grid_vis(inverse_transform(samples), (20, 20), 'samples.png')


#%%
#%%
def calculate_b_u_b_s(X, g=None, b=None, u=None, s=None, a=1., e=1e-8):
    if X.ndim == 4:
        if u is not None and s is not None:
            b_u = u.dimshuffle('x', 0, 'x', 'x')
            b_s = s.dimshuffle('x', 0, 'x', 'x')
        else:
g_updater = updates.Adam(lr=lrt,
                         b1=args.b1,
                         regularizer=updates.Regularizer(l2=args.weight_decay))
d_updates = d_updater(disc_params, d_cost)
g_updates = g_updater(gen_params, g_cost)
updates = d_updates + g_updates

print('COMPILING')
t = time()
_train_g = theano.function([x, z], cost, updates=g_updates)
_train_d = theano.function([x, z], cost, updates=d_updates)
_gen = theano.function([z], gx)
print('%.2f seconds to compile theano functions' % (time() - t))

# test z samples
sample_zmb = floatX(np_rng.uniform(-1., 1., size=(n_vis, nz)))

f_log = open('%s/training_log.ndjson' % log_dir, 'wb')
log_fields = [
    'n_epochs',
    'n_updates',
    'n_examples',
    'n_seconds',
    'g_cost',
    'd_cost',
]

# initialization
n_updates = 0
n_epochs = 0
n_examples = 0
示例#56
0
    if not args.output_image:
        args.output_image = '%s_%s_samples.png' % (args.model_name, args.model_type)

    for arg in vars(args):
        print('[%s] =' % arg, getattr(args, arg))

    # initialize model and constrained optimization problem
    model_class = locate('model_def.%s' % args.model_type)
    model = model_class.Model(model_name=args.model_name, model_file=args.model_file)
    # generate samples

        #def gen_samples(self, z0=None, n=32, batch_size=32, use_transform=True):
    samples = []
    n = 32
    batch_size = 32
    z0 = np_rng.uniform(-1., 1., size=(n, model.nz))
    n_batches = int(np.ceil(n/float(batch_size)))
    for i in range(n_batches):
        zmb = floatX(z0[batch_size * i:min(n, batch_size * (i + 1)), :])
        xmb = model._gen(zmb)
        samples.append(xmb)
    samples = np.concatenate(samples, axis=0)
    samples = model.inverse_transform(samples, npx=model.npx, nc=model.nc)
    samples = (samples * 255).astype(np.uint8)
    #samples = model.gen_samples(z0=None, n=196, batch_size=49, use_transform=True)
    # generate grid visualization
    im_vis = utils.grid_vis(samples, 14, 14)
    # write to the disk
    im_vis = cv2.cvtColor(im_vis, cv2.COLOR_BGR2RGB)
    cv2.imwrite(args.output_image, im_vis)
    print('samples_shape', samples.shape)
示例#57
0
文件: job.py 项目: mehdidc/dcgan
def run(hp, folder):
    trX, trY, nb_classes = load_data()
    k = 1             # # of discrim updates for each gen update
    l2 = 2.5e-5       # l2 weight decay
    b1 = 0.5          # momentum term of adam
    nc = 1            # # of channels in image
    ny = nb_classes   # # of classes
    nbatch = 128      # # of examples in batch
    npx = 28          # # of pixels width/height of images
    nz = 100          # # of dim for Z
    ngfc = 512       # # of gen units for fully connected layers
    ndfc = 512      # # of discrim units for fully connected layers
    ngf = 64          # # of gen filters in first conv layer
    ndf = 64          # # of discrim filters in first conv layer
    nx = npx*npx*nc   # # of dimensions in X
    niter = 200       # # of iter at starting learning rate
    niter_decay = 100 # # of iter to linearly decay learning rate to zero
    lr = 0.0002       # initial learning rate for adam
    scale = 0.02

    k = hp['k']
    l2 = hp['l2']
    #b1 = hp['b1']
    nc = 1
    ny = nb_classes
    nbatch = hp['nbatch']
    npx = 28
    nz = hp['nz']
    ngfc = hp['ngfc']       # # of gen units for fully connected layers
    ndfc = hp['ndfc']      # # of discrim units for fully connected layers
    ngf = hp['ngf']          # # of gen filters in first conv layer
    ndf = hp['ndf']          # # of discrim filters in first conv layer
    nx = npx*npx*nc   # # of dimensions in X
    niter = hp['niter']       # # of iter at starting learning rate
    niter_decay = hp['niter_decay'] # # of iter to linearly decay learning rate to zero
    lr = hp['lr']       # initial learning rate for adam


    scale = hp['scale']

    #k = 1             # # of discrim updates for each gen update
    #l2 = 2.5e-5       # l2 weight decay
    b1 = 0.5          # momentum term of adam
    #nc = 1            # # of channels in image
    #ny = nb_classes   # # of classes
    budget_hours = hp.get('budget_hours', 2)
    budget_secs = budget_hours * 3600

    ntrain = len(trX)
    def transform(X):
        return (floatX(X)).reshape(-1, nc, npx, npx)

    def inverse_transform(X):
        X = X.reshape(-1, npx, npx)
        return X
    
    model_dir = folder
    samples_dir = os.path.join(model_dir, 'samples')
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    if not os.path.exists(samples_dir):
        os.makedirs(samples_dir)

    relu = activations.Rectify()
    sigmoid = activations.Sigmoid()
    lrelu = activations.LeakyRectify()
    bce = T.nnet.binary_crossentropy

    gifn = inits.Normal(scale=scale)
    difn = inits.Normal(scale=scale)

    gw  = gifn((nz, ngfc), 'gw')
    gw2 = gifn((ngfc, ngf*2*7*7), 'gw2')
    gw3 = gifn((ngf*2, ngf, 5, 5), 'gw3')
    gwx = gifn((ngf, nc, 5, 5), 'gwx')

    dw  = difn((ndf, nc, 5, 5), 'dw')
    dw2 = difn((ndf*2, ndf, 5, 5), 'dw2')
    dw3 = difn((ndf*2*7*7, ndfc), 'dw3')
    dwy = difn((ndfc, 1), 'dwy')

    gen_params = [gw, gw2, gw3, gwx]
    discrim_params = [dw, dw2, dw3, dwy]

    def gen(Z, w, w2, w3, wx, use_batchnorm=True):
        if use_batchnorm:
            batchnorm_ = batchnorm
        else:
            batchnorm_ = lambda x:x
        h = relu(batchnorm_(T.dot(Z, w)))
        h2 = relu(batchnorm_(T.dot(h, w2)))
        h2 = h2.reshape((h2.shape[0], ngf*2, 7, 7))
        h3 = relu(batchnorm_(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2))))
        x = sigmoid(deconv(h3, wx, subsample=(2, 2), border_mode=(2, 2)))
        return x

    def discrim(X, w, w2, w3, wy):
        h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2)))
        h2 = lrelu(batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2))))
        h2 = T.flatten(h2, 2)
        h3 = lrelu(batchnorm(T.dot(h2, w3)))
        y = sigmoid(T.dot(h3, wy))
        return y

    X = T.tensor4()
    Z = T.matrix()

    gX = gen(Z, *gen_params)

    p_real = discrim(X, *discrim_params)
    p_gen = discrim(gX, *discrim_params)

    d_cost_real = bce(p_real, T.ones(p_real.shape)).mean()
    d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean()
    g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean()

    d_cost = d_cost_real + d_cost_gen
    g_cost = g_cost_d

    cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen]

    lrt = sharedX(lr)
    d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
    g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
    d_updates = d_updater(discrim_params, d_cost)
    g_updates = g_updater(gen_params, g_cost)
    #updates = d_updates + g_updates

    print 'COMPILING'
    t = time()
    _train_g = theano.function([X, Z], cost, updates=g_updates)
    _train_d = theano.function([X, Z], cost, updates=d_updates)
    _gen = theano.function([Z], gX)
    print '%.2f seconds to compile theano functions'%(time()-t)

    tr_idxs = np.arange(len(trX))
    sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz)))

    def gen_samples(n, nbatch=128):
        samples = []
        labels = []
        n_gen = 0
        for i in range(n/nbatch):
            zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
            xmb = _gen(zmb)
            samples.append(xmb)
            n_gen += len(xmb)
        n_left = n-n_gen
        zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
        xmb = _gen(zmb)
        samples.append(xmb)
        return np.concatenate(samples, axis=0)

    s = floatX(np_rng.uniform(-1., 1., size=(10000, nz)))
    n_updates = 0
    n_check = 0
    n_epochs = 0
    n_updates = 0
    n_examples = 0
    t = time()
    begin = datetime.now()
    for epoch in range(1, niter+niter_decay+1): 
        t = time()
        print("Epoch {}".format(epoch))
        trX = shuffle(trX)
        for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain/nbatch):
            imb = transform(imb)
            zmb = floatX(np_rng.uniform(-1., 1., size=(len(imb), nz)))
            if n_updates % (k+1) == 0:
                cost = _train_g(imb, zmb)
            else:
                cost = _train_d(imb, zmb)
            n_updates += 1
            n_examples += len(imb)
        samples = np.asarray(_gen(sample_zmb))
        grayscale_grid_vis(inverse_transform(samples), (10, 20), '{}/{:05d}.png'.format(samples_dir, n_epochs))
        n_epochs += 1
        if n_epochs > niter:
            lrt.set_value(floatX(lrt.get_value() - lr/niter_decay))
        if n_epochs % 50 == 0 or epoch == niter + niter_decay or epoch == 1:
            imgs = []
            for i in range(0, s.shape[0], nbatch):
                imgs.append(_gen(s[i:i+nbatch]))
            img = np.concatenate(imgs, axis=0)
            samples_filename = '{}/{:05d}_gen.npz'.format(model_dir, n_epochs)
            joblib.dump(img, samples_filename, compress=9)
            shutil.copy(samples_filename, '{}/gen.npz'.format(model_dir))
            joblib.dump([p.get_value() for p in gen_params], '{}/d_gen_params.jl'.format(model_dir, n_epochs), compress=9)
            joblib.dump([p.get_value() for p in discrim_params], '{}/discrim_params.jl'.format(model_dir, n_epochs), compress=9)
        print('Elapsed : {}sec'.format(time() - t))

        if (datetime.now() - begin).total_seconds() >= budget_secs:
            print("Budget finished.quit.")
            break
示例#58
0
def train_model(train_stream,
                valid_stream,
                energy_optimizer,
                generator_optimizer,
                model_config_dict,
                model_test_name):

    [generator_function, generator_params] = set_generator_model(model_config_dict['hidden_size'],
                                                                 model_config_dict['min_num_gen_filters'])
    [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['hidden_size'],
                                                                          model_config_dict['min_num_eng_filters'])
    # compile functions
    print 'COMPILING ENERGY UPDATER'
    t=time()
    energy_updater = set_energy_update_function(feature_function=feature_function,
                                                energy_function=energy_function,
                                                generator_function=generator_function,
                                                energy_params=energy_params,
                                                energy_optimizer=energy_optimizer)
    print '%.2f SEC '%(time()-t)
    print 'COMPILING GENERATOR UPDATER'
    t=time()
    generator_updater = set_generator_update_function(feature_function=feature_function,
                                                      energy_function=energy_function,
                                                      generator_function=generator_function,
                                                      generator_params=generator_params,
                                                      generator_optimizer=generator_optimizer)
    print '%.2f SEC '%(time()-t)
    print 'COMPILING EVALUATION FUNCTION'
    t=time()
    evaluation_function = set_evaluation_and_sampling_function(feature_function=feature_function,
                                                               energy_function=energy_function,
                                                               generator_function=generator_function)
    print '%.2f SEC '%(time()-t)
    print 'COMPILING SAMPLING FUNCTION'
    t=time()
    sampling_function = set_sampling_function(generator_function=generator_function)
    print '%.2f SEC '%(time()-t)

    # set fixed hidden data for sampling
    fixed_hidden_data  = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'],
                                               high=model_config_dict['hidden_distribution'],
                                               size=(model_config_dict['num_display'], model_config_dict['hidden_size'])))

    print 'START TRAINING'
    # for each epoch
    for e in xrange(model_config_dict['epochs']):
        # train phase
        epoch_train_input_energy  = 0.
        epoch_train_sample_energy = 0.
        epoch_train_count         = 0.

        train_batch_iters = train_stream.get_epoch_iterator()
        # for each batch
        for b, train_batch_data in enumerate(train_batch_iters):
            # set update function inputs
            input_data   = transform(train_batch_data[0])
            num_data     = input_data.shape[0]

            hidden_data  = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'],
                                                 high=model_config_dict['hidden_distribution'],
                                                 size=(num_data, model_config_dict['hidden_size'])))

            noise_data   = np_rng.normal(size=input_data.shape)
            noise_data   = floatX(noise_data*model_config_dict['init_noise']*(model_config_dict['noise_decay']**e))

            # update generator
            generator_update_inputs = [input_data,
                                       hidden_data,
                                       noise_data,
                                       e]
            [input_energy_val, sample_energy_val, ] = generator_updater(*generator_update_inputs)

            # update energy function
            energy_update_inputs = [input_data,
                                    hidden_data,
                                    e]
            [input_energy_val, sample_energy_val, ] = energy_updater(*energy_update_inputs)

            # get output values
            epoch_train_input_energy  += input_energy_val.mean()
            epoch_train_sample_energy += sample_energy_val.mean()
            epoch_train_count         += 1.



        epoch_train_input_energy  /= epoch_train_count
        epoch_train_sample_energy /= epoch_train_count

        # validation phase
        epoch_valid_input_energy     = 0.
        epoch_valid_sample_energy    = 0.
        epoch_valid_count            = 0.
        valid_batch_iters = valid_stream.get_epoch_iterator()
        for b, valid_batch_data in enumerate(valid_batch_iters):
            # set function inputs
            input_data   = transform(valid_batch_data[0])
            num_data     = input_data.shape[0]
            hidden_data  = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'],
                                                 high=model_config_dict['hidden_distribution'],
                                                 size=(num_data, model_config_dict['hidden_size'])))
            # evaluate model
            evaluation_input = [input_data,  hidden_data]
            outputs = evaluation_function(*evaluation_input)
            epoch_valid_input_energy  += outputs[0].mean()
            epoch_valid_sample_energy += outputs[1].mean()
            epoch_valid_count         += 1.

        epoch_valid_input_energy  /= epoch_valid_count
        epoch_valid_sample_energy /= epoch_valid_count

        print '================================================================'
        print 'EPOCH #{}'.format(e), model_test_name
        print '================================================================'
        print '   TRAIN RESULTS'
        print '================================================================'
        print '     input energy     : ', epoch_train_input_energy
        print '----------------------------------------------------------------'
        print '     sample energy    : ', epoch_train_sample_energy
        print '================================================================'
        print '   VALID RESULTS'
        print '================================================================'
        print '     input energy     : ', epoch_valid_input_energy
        print '----------------------------------------------------------------'
        print '     sample energy    : ', epoch_valid_sample_energy
        print '================================================================'

        # # plot curve data
        # save_as = model_test_name + '_ENERGY_CURVE.png'
        # plot_learning_curve(cost_values=[train_input_energy,
        #                                  train_sample_energy,
        #                                  valid_input_energy,
        #                                  valid_sample_energy],
        #                     cost_names=['Input Energy (train)',
        #                                 'Sample Energy (train)',
        #                                 'Input Energy (valid)',
        #                                 'Sample Energy (valid)'],
        #                     save_as=save_as)

        # sample data
        save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(e+1)
        sample_data = sampling_function(fixed_hidden_data)[0]
        sample_data = np.asarray(sample_data)
        color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as)