def _ELMAE(self, inputX, hiddenunit, filtersize, stride): assert inputX.shape[1] == 1 # ELMAE的输入通道数必须为1,即只有一张特征图 # 生成随机正交滤波器 filters = init.GlorotNormal().sample((hiddenunit, np.prod(filtersize))) filters = orthonormalize(filters) filters = filters.reshape((hiddenunit, 1) + filtersize) bias = init.Normal().sample(hiddenunit) bias = orthonormalize(bias) # 卷积前向输出,一定要pad=0和取patch时一致 convout = convbiasact_decomp(inputX, filters, bias, pad=0, stride=stride) del filters, bias # 将卷积的结果4维矩阵改变为2维 hiddens = convout.transpose((0, 2, 3, 1)).reshape((-1, hiddenunit)) del convout # 取图像的patch im2col = myUtils.basic.Im2ColOp(psize=filtersize[0], stride=stride[0]) patches = im2col.transform(inputX) # 在取图像的patch时不做pad patches = patches.reshape((-1, np.prod(filtersize))) # 计算beta beta = compute_beta(hiddens, patches, self.C) del hiddens, patches return beta
def __init__(self, incomings, nfilters, nrings=5, nrays=16, W=LI.GlorotNormal(), b=LI.Constant(0.0), normalize_rings=False, normalize_input=False, take_max=True, nonlinearity=L.nonlinearities.rectify, **kwargs): super(GCNNLayer, self).__init__(incomings, **kwargs) self.nfilters = nfilters self.filter_shape = (nfilters, self.input_shapes[0][1], nrings, nrays) self.nrings = nrings self.nrays = nrays self.normalize_rings = normalize_rings self.normalize_input = normalize_input self.take_max = take_max self.nonlinearity = nonlinearity self.W = self.add_param(W, self.filter_shape, name="W") biases_shape = (nfilters, ) self.b = self.add_param(b, biases_shape, name="b", regularizable=False)
def _get_beta(self, oneChannel, randChannel, addpad): # assert inputX.ndim == 4 and inputX.shape[1] == 1 # ELMAE的输入通道数必须为1,即只有一张特征图 # 使用聚类获取每一类中不同的patch # 生成随机正交滤波器 filters = init.GlorotNormal().sample( (self.n_hidden, self.filter_size**2)) filters = orthonormalize(filters) filters = filters.reshape( (self.n_hidden, 1, self.filter_size, self.filter_size)) bias = init.Normal().sample(self.n_hidden) bias = orthonormalize(bias) # 卷积前向输出,和取patch时一致 pad = self.filter_size // 2 if addpad else 0 stride = self.filter_size // 2 + 1 hiddens = convbiasact_decomp(oneChannel, filters, bias, pad=pad, stride=stride) hiddens = hiddens.transpose((0, 2, 3, 1)).reshape((-1, self.n_hidden)) # 随机跨通道取图像patch patches = im2col(randChannel, self.filter_size, stride=stride, pad=pad) # 计算beta beta = compute_beta(hiddens, patches, self.C) beta = beta.reshape( (self.n_hidden, 1, self.filter_size, self.filter_size)) return beta
def _get_beta(self, inputX, ch, addpad): # assert inputX.ndim == 4 and inputX.shape[1] == 1 # ELMAE的输入通道数必须为1,即只有一张特征图 batches, channels, rows, cols = inputX.shape oneChannel = inputX[:, ch, :, :].reshape((batches, 1, rows, cols)) # 使用聚类获取每一类中不同的patch # 生成随机正交滤波器 filters = init.GlorotNormal().sample((self.hidden_unit, self.filter_size ** 2)) filters = orthonormalize(filters) filters = filters.reshape((self.hidden_unit, 1, self.filter_size, self.filter_size)) bias = init.Normal().sample(self.hidden_unit) bias = orthonormalize(bias) # 卷积前向输出,和取patch时一致 pad = self.filter_size // 2 if addpad else 0 stride = self.filter_size // 2 + 1 hiddens = convbiasact_decomp(oneChannel, filters, bias, pad=pad, stride=stride) hiddens = hiddens.transpose((0, 2, 3, 1)).reshape((-1, self.hidden_unit)) # 随机跨通道取图像patch # batchindex = np.arange(batches) # channelindex = np.random.randint(channels, size=batches) # randChannel = inputX[batchindex, channelindex, :, :].reshape((batches, 1, rows, cols)) patches = im2col(oneChannel, self.filter_size, stride=stride, pad=pad) # 计算beta beta = compute_beta(hiddens, patches, self.C) beta = beta.reshape((self.hidden_unit, 1, self.filter_size, self.filter_size)) return beta
def _ELMAE(self, inputX, hiddenunit, filtersize): assert inputX.shape[1] == 1 # ELMAE的输入通道数必须为1,即只有一张特征图 # 生成随机正交滤波器 filters = init.GlorotNormal().sample((hiddenunit, np.prod(filtersize))) filters = orthonormalize(filters) filters = filters.reshape((hiddenunit, 1) + filtersize) bias = init.Normal().sample(hiddenunit) bias = orthonormalize(bias) # 卷积前向输出,一定要pad=0和取patch时一致 stride = 4 convout = convbiasact_decomp(inputX, filters, bias, pad=0, stride=(stride, stride)) # 将卷积的结果4维矩阵改变为2维 hiddens = convout.transpose((0, 2, 3, 1)).reshape((-1, hiddenunit)) # 取图像的patch patches = im2col(inputX, filtersize[0], stride=stride, pad=0) # 计算beta beta1 = compute_beta(hiddens, patches, self.C) stride = 1 convout = convbiasact_decomp(inputX, filters, bias, pad=0, stride=(stride, stride)) # 将卷积的结果4维矩阵改变为2维 hiddens = convout.transpose((0, 2, 3, 1)).reshape((-1, hiddenunit)) # 取图像的patch patches = im2col(inputX, filtersize[0], stride=stride, pad=0) # 计算beta beta2 = compute_beta(hiddens, patches, self.C) return beta1, beta2
def _get_beta(self, oneChannel, addpad): # assert inputX.ndim == 4 and inputX.shape[1] == 1 # ELMAE的输入通道数必须为1,即只有一张特征图 # 生成随机正交滤波器 filters = init.GlorotNormal().sample( (self.n_hidden, self.filter_size**2)) filters = orthonormalize(filters) filters = filters.reshape( (self.n_hidden, 1, self.filter_size, self.filter_size)) bias = init.Normal().sample(self.n_hidden) bias = orthonormalize(bias) # 卷积前向输出,和取patch时一致 pad = self.filter_size // 2 if addpad else 0 stride = self.filter_size // 2 + 1 noiseChannel = add_mn(oneChannel, p=0.25) hiddens = convbiasact_decomp(noiseChannel, filters, bias, pad=pad, stride=stride) hiddens = hiddens.transpose((0, 2, 3, 1)).reshape((-1, self.n_hidden)) # 随机跨通道取图像patch patches = im2col(oneChannel, self.filter_size, stride=stride, pad=pad) # randPatch = add_mn_row(patches, p=0.25) # hiddens = np.dot(randPatch, filters.T) + bias # hiddens = relu(hiddens) # 计算beta beta = compute_beta_val(hiddens, patches, 5) beta = beta.reshape( (self.n_hidden, 1, self.filter_size, self.filter_size)) return beta
def nn_fn(self): l_in_z = InputLayer((None, self.num_choices, self.z_dim)) l_in_mask = InputLayer((None, self.num_choices)) l_h = l_in_z for h in range(self.nn_depth - 1): l_h = DenseLayer(l_h, num_units=self.nn_hid_units, b=None, num_leading_axes=2) l_out_flat = DenseLayer(l_h, num_units=1, b=None, nonlinearity=None, num_leading_axes=2, W=init.GlorotNormal(1.)) l_out_pre_softmax = ReshapeLayer(l_out_flat, ([0], [1])) l_out_pre_softmax = SwitchLayer((l_in_mask, l_out_pre_softmax), 0, -np.inf) l_out = NonlinearityLayer(l_out_pre_softmax, softmax) return (l_in_z, l_in_mask), l_out
def get_train_output_for(self, inputX, inputy=None): self.W = init.GlorotNormal().sample((inputX.shape[1], self.hidden_unit)) self.b = init.Normal().sample(self.hidden_unit) H = dotbiasact_decomp(inputX, self.W, self.b) self.beta = compute_beta(H, inputy, self.C) out = dot_decomp(H, self.beta) return out
def fit(self, X, y): y = myUtils.load.one_hot(y, len(np.unique(y))) self.W = init.GlorotNormal().sample((X.shape[1], self.n_hidden)) self.b = init.Normal().sample(self.n_hidden) H = np.dot(X, self.W) + self.b H = relu(H) self.beta = compute_beta(H, y, self.C) return self
def __init__(self, incomings, nfilters, nrings=5, nrays=16, W=LI.GlorotNormal(), b=LI.Constant(0.0), normalize_rings=False, normalize_input=False, take_max=True, nonlinearity=LN.rectify, **kwargs): super(ACNNLayer, self).__init__(incomings, nfilters, nrings, nrays, W, b, normalize_rings, normalize_input, take_max, nonlinearity, **kwargs)
def fit(self, inputX, inputy): n_hidden = int(self.n_times * inputX.shape[1]) inputy = myUtils.load.one_hot(inputy, len(np.unique(inputy))) self.W = init.GlorotNormal().sample((inputX.shape[1], n_hidden)) self.b = init.Normal().sample(n_hidden) H = np.dot(inputX, self.W) + self.b H = relu(H) self.beta = compute_beta(H, inputy, self.C) return self
def get_train_output_for(self, inputX, inputy=None): inputX = self.pca.fit_transform(inputX) n_hidden = int(self.n_times * inputX.shape[1]) self.W = init.GlorotNormal().sample((inputX.shape[1], n_hidden)) self.b = init.Normal().sample(n_hidden) H = dotbiasact_decomp(inputX, self.W, self.b) self.beta = compute_beta(H, inputy, self.C) out = dot_decomp(H, self.beta) return out
def get_train_output(self, inputX, inputy): self.W = init.GlorotNormal().sample( (inputX.shape[1], self.hidden_unit)) self.b = init.Normal().sample(self.hidden_unit) H = np.dot(inputX, self.W) + self.b H = relu(H) self.beta = compute_beta(H, inputy, self.C) out = np.dot(H, self.beta) return out
def get_train_output_for(self, inputX, inputy=None): n_hidden = int(self.n_times * inputX.shape[1]) self.W = init.GlorotNormal().sample((inputX.shape[1], n_hidden)) self.b = init.Normal().sample(n_hidden) H = np.dot(inputX, self.W) + self.b H = relu(H) self.beta = compute_beta_val(H, inputy, 3) out = np.dot(H, self.beta) return out
def __init__(self, W_in=init.GlorotNormal(1.0), W_hid=init.GlorotNormal(1.0), W_read=init.GlorotNormal(1.0), W_cell=init.Normal(1.0), b=init.Constant(0.), nonlinearity=nonlinearities.sigmoid): self.W_in = W_in self.W_hid = W_hid self.W_read = W_read # Don't store a cell weight vector when cell is None if W_cell is not None: self.W_cell = W_cell self.b = b # For the nonlinearity, if None is supplied, use identity if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity
def lstm_layer(input, nunits, return_final, backwards=False, name='LSTM'): ingate = Gate(W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), b=init.Constant(0.0)) forgetgate = Gate(W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), b=init.Constant(5.0)) cell = Gate( W_cell=None, nonlinearity=T.tanh, W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), ) outgate = Gate(W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), b=init.Constant(0.0)) lstm = LSTMLayer(input, num_units=nunits, backwards=backwards, peepholes=False, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate, name=name, only_return_final=return_final) rec = RecurrentLayer(input, num_units=nunits, W_in_to_hid=init.GlorotNormal('relu'), W_hid_to_hid=init.GlorotNormal('relu'), backwards=backwards, nonlinearity=rectify, only_return_final=return_final, name=name) return lstm
def __init__(self, n_v, n_h, trans_func=sigmoid): super(NADE, self).__init__(n_v, n_h, n_v, trans_func) self._srng = RandomStreams() self.n_hidden = n_h l_v = InputLayer((None, n_v)) self.model = NADELayer(l_v, n_h, W=init.GlorotNormal(), b=init.Constant(0.)) self.model_params = get_all_params(self.model) self.sym_x = T.matrix('x')
def get_train_output_ensemble(self, inputX, inputy, n=10): self.W = init.GlorotNormal().sample( (inputX.shape[1], self.hidden_unit)) self.b = init.Normal().sample(self.hidden_unit) outputs = [] for _ in xrange(n): inputX, binomial1 = dropout(inputX, p=0.5) H = np.dot(inputX, self.W) + self.b H = relu(H) H, binomial2 = dropout(H, p=0.5) beta = compute_beta(H, inputy, self.C) out = np.dot(H, beta) outputs.append(np.copy(out)) self.binomials.append((np.copy(binomial1), np.copy(binomial2))) self.betas.append(np.copy(beta)) return outputs
def _addCCCPLayer(self, inputX, outchannels): batches, inchannels, rows, cols = inputX.shape inputX = inputX.transpose((0, 2, 3, 1)).reshape((-1, inchannels)) W = init.GlorotNormal().sample((inchannels, outchannels)) b = init.Normal().sample(outchannels) # AE前向计算隐层 H = dotbiasact_decomp(inputX, W, b) del W, b # AE使用ELM计算输出矩阵 beta = compute_beta(H, inputX, self.C).T # 前向输出CCCP结果 cccpout = dot_decomp(inputX, beta) del inputX cccpout = cccpout.reshape((batches, rows, cols, -1)).transpose((0, 3, 1, 2)) print 'add cccp layer' return cccpout, beta
def train(self, inputX, inputy): layerout = self._buildAE(inputX) rows, cols = layerout.shape classifierunit = cols * 5 W = init.GlorotNormal().sample((cols, classifierunit)) b = init.Normal().sample(classifierunit) H = dotbiasact_decomp(layerout, W, b) del layerout beta = compute_beta(H, inputy, self.C) out = dot_decomp(H, beta) del H self.paramsC['W'] = W self.paramsC['b'] = b self.paramsC['beta'] = beta ypred = np.argmax(out, axis=1) ytrue = np.argmax(inputy, axis=1) return np.mean(ypred == ytrue)
def forward(self, inputX, train=True): assert inputX.ndim == 4 and inputX.shape[1] == 1 # ELMAE的输入通道数必须为1,即只有一张特征图 batches, channels, rows, cols = inputX.shape if train: patches = self._make_patches(inputX, fit=True, addpad=False) # 使用聚类获取每一类中不同的patch # 生成随机正交滤波器 filters = init.GlorotNormal().sample((self.filter_size ** 2, self.hidden_unit)) filters = orthonormalize(filters) bias = init.Normal().sample(self.hidden_unit) bias = orthonormalize(bias) # 卷积前向输出,和取patch时一致 hiddens = dotbiasact_decomp(patches, filters, bias) # 计算beta self.beta = compute_beta(hiddens, patches, self.C).T patches = self._make_patches(inputX, fit=False, addpad=True) out = dot_decomp(patches, self.beta) out = out.reshape((batches, rows, cols, -1)).transpose((0, 3, 1, 2)) return out
def train(self, inputX, inputy): layerout1, layerout2 = self._buildAE(inputX) rows, cols = layerout1.shape classifierunit = cols * 5 W = init.GlorotNormal().sample((cols, classifierunit)) b = init.Normal().sample(classifierunit) H1 = dotbiasact_decomp(layerout1, W, b) beta1 = compute_beta(H1, inputy, self.C) out1 = dot_decomp(H1, beta1) H2 = dotbiasact_decomp(layerout2, W, b) beta2 = compute_beta(H2, inputy, self.C) out2 = dot_decomp(H2, beta2) self.paramsC['W'] = W self.paramsC['b'] = b self.paramsC['beta1'] = beta1 self.paramsC['beta2'] = beta2 ypred1 = np.argmax(out1, axis=1) ypred2 = np.argmax(out2, axis=1) ytrue = np.argmax(inputy, axis=1) return np.mean(ypred1 == ytrue), np.mean(ypred2 == ytrue)
def __init__(self, incomings, nfilters, nrings=5, nrays=16, W=LI.GlorotNormal(), b=LI.Constant(0.0), normalize_rings=False, normalize_input=False, take_max=True, nonlinearity=LN.rectify, **kwargs): super(GCNNLayer, self).__init__(incomings, **kwargs) # patch operator sizes self.nfilters = nfilters self.nrings = nrings self.nrays = nrays self.filter_shape = (nfilters, self.input_shapes[0][1], nrings, nrays) self.biases_shape = (nfilters, ) # path operator parameters self.normalize_rings = normalize_rings self.normalize_input = normalize_input self.take_max = take_max self.nonlinearity = nonlinearity # layer parameters: # y = Wx + b, where x are the input features and y are the output features self.W = self.add_param(W, self.filter_shape, name="W") self.b = self.add_param(b, self.biases_shape, name="b", regularizable=False)
def __init__(self, n_x, n_z, qz_hid, px_hid, filters, seq_length=50, nonlinearity=rectify, px_nonlinearity=None, x_dist='linear', batchnorm=False, seed=1234): """ Weights are initialized using the Bengio and Glorot (2010) initialization scheme. :param n_x: Number of inputs. :param n_z: Number of latent. :param qz_hid: List of number of deterministic hidden q(z|a,x,y). :param px_hid: List of number of deterministic hidden p(a|z,y) & p(x|z,y). :param nonlinearity: The transfer function used in the deterministic layers. :param x_dist: The x distribution, 'bernoulli', 'multinomial', or 'gaussian'. :param batchnorm: Boolean value for batch normalization. :param seed: The random seed. """ super(CVAE, self).__init__(n_x, qz_hid + px_hid, n_z, nonlinearity) self.x_dist = x_dist self.n_x = n_x self.seq_length = seq_length self.n_z = n_z self.batchnorm = batchnorm self._srng = RandomStreams(seed) # Pool layer cache pool_layers = [] # Decide Glorot initializaiton of weights. init_w = 1e-3 hid_w = "" if nonlinearity == rectify or nonlinearity == softplus: hid_w = "relu" # Define symbolic variables for theano functions. self.sym_x = T.tensor3('x') # inputs self.sym_z = T.matrix('z') self.sym_samples = T.iscalar('samples') # MC samples # Assist methods for collecting the layers def dense_layer(layer_in, n, dist_w=init.GlorotNormal, dist_b=init.Normal): dense = DenseLayer(layer_in, n, dist_w(hid_w), dist_b(init_w), None) if batchnorm: dense = bn(dense) return NonlinearityLayer(dense, self.transf) def stochastic_layer(layer_in, n, samples, nonlin=None): mu = DenseLayer(layer_in, n, init.Normal(init_w), init.Normal(init_w), nonlin) logvar = DenseLayer(layer_in, n, init.Normal(init_w), init.Normal(init_w), nonlin) return SampleLayer(mu, logvar, eq_samples=samples, iw_samples=1), mu, logvar def conv_layer(layer_in, filter, stride=(1, 1), pool=1, name='conv'): l_conv = Conv2DLayer(layer_in, num_filters=filter, filter_size=(3, 1), stride=stride, pad='full', name=name) if pool > 1: l_conv = MaxPool2DLayer(l_conv, pool_size=(pool, 1)) pool_layers.append(l_conv) return l_conv # Reshape input l_x_in = InputLayer((None, seq_length, n_x), name='Input') l_x_in_reshp = ReshapeLayer(l_x_in, (-1, 1, seq_length, n_x)) print("l_x_in_reshp", l_x_in_reshp.output_shape) # CNN encoder implementation l_conv_enc = l_x_in_reshp for filter, stride, pool in filters: l_conv_enc = conv_layer(l_conv_enc, filter, stride, pool) print("l_conv_enc", l_conv_enc.output_shape) # Pool along last 2 axes l_global_pool_enc = GlobalPoolLayer(l_conv_enc) l_enc = dense_layer(l_global_pool_enc, n_z) print("l_enc", l_enc.output_shape) # Recognition q(z|x) l_qz = l_enc for hid in qz_hid: l_qz = dense_layer(l_qz, hid) l_qz, l_qz_mu, l_qz_logvar = stochastic_layer(l_qz, n_z, self.sym_samples) print("l_qz", l_qz.output_shape) # Inverse pooling l_global_depool = InverseLayer(l_qz, l_global_pool_enc) print("l_global_depool", l_global_depool.output_shape) # Reverse pool layer order pool_layers = pool_layers[::-1] # Decode l_deconv = l_global_depool for idx, filter in enumerate(filters[::-1]): filter, stride, pool = filter if pool > 1: l_deconv = InverseLayer(l_deconv, pool_layers[idx]) l_deconv = Conv2DLayer(l_deconv, num_filters=filter, filter_size=(3, 1), stride=(stride, 1), W=init.GlorotNormal('relu')) print("l_deconv", l_deconv.output_shape) # The last l_conv layer should give us the input shape l_dec = Conv2DLayer(l_deconv, num_filters=1, filter_size=(3, 1), pad='same', nonlinearity=None) print("l_dec", l_dec.output_shape) # Flatten first two dimensions l_dec = ReshapeLayer(l_dec, (-1, n_x)) l_px = l_dec if x_dist == 'bernoulli': l_px = DenseLayer(l_px, n_x, init.GlorotNormal(), init.Normal(init_w), sigmoid) elif x_dist == 'multinomial': l_px = DenseLayer(l_px, n_x, init.GlorotNormal(), init.Normal(init_w), softmax) elif x_dist == 'gaussian': l_px, l_px_mu, l_px_logvar = stochastic_layer( l_px, n_x, self.sym_samples, px_nonlinearity) elif x_dist == 'linear': l_px = DenseLayer(l_px, n_x, nonlinearity=None) # Reshape all the model layers to have the same size self.l_x_in = l_x_in self.l_qz = ReshapeLayer(l_qz, (-1, self.sym_samples, 1, n_z)) self.l_qz_mu = DimshuffleLayer(l_qz_mu, (0, 'x', 'x', 1)) self.l_qz_logvar = DimshuffleLayer(l_qz_logvar, (0, 'x', 'x', 1)) self.l_px = DimshuffleLayer( ReshapeLayer(l_px, (-1, seq_length, self.sym_samples, 1, n_x)), (0, 2, 3, 1, 4)) self.l_px_mu = DimshuffleLayer(ReshapeLayer(l_px_mu, (-1, seq_length, self.sym_samples, 1, n_x)), (0, 2, 3, 1, 4)) \ if x_dist == "gaussian" else None self.l_px_logvar = DimshuffleLayer(ReshapeLayer(l_px_logvar, (-1, seq_length, self.sym_samples, 1, n_x)), (0, 2, 3, 1, 4)) \ if x_dist == "gaussian" else None # Predefined functions inputs = {self.l_x_in: self.sym_x} outputs = get_output(l_qz, inputs, deterministic=True) self.f_qz = theano.function([self.sym_x, self.sym_samples], outputs) inputs = {l_qz: self.sym_z} outputs = get_output(self.l_px, inputs, deterministic=True).mean(axis=(1, 2)) self.f_px = theano.function([self.sym_z, self.sym_samples], outputs) outputs = get_output(self.l_px_mu, inputs, deterministic=True).mean(axis=(1, 2)) self.f_mu = theano.function([self.sym_z, self.sym_samples], outputs) outputs = get_output(self.l_px_logvar, inputs, deterministic=True).mean(axis=(1, 2)) self.f_var = theano.function([self.sym_z, self.sym_samples], outputs) # Define model parameters self.model_params = get_all_params([self.l_px]) self.trainable_model_params = get_all_params([self.l_px], trainable=True)
def __init__( self, input, # input images (n_batch x n_channels x img_height x img_width) #n_batch=64, # number of batch k=1, # number of glimps scales patch=8, # size of glimps patch n_steps=6, # number of glimps steps lambda_=10.0, # mixing ratio between n_h_g=128, # number of hidden units in h_g (in glimps network) n_h_l=128, # number of hidden units in h_l (in glimps network) n_f_g=256, # number of hidden units in f_g (glimps network) n_f_h=256, # number of hidden units in f_h (core network) #n_f_l=2, # dim of output of f_l (location network) i.e. 2 n_classes=10, # number of classes in classification problem learn_init=True, **kwargs): super(RAMLayer, self).__init__(input, **kwargs) if len(self.input_shape) is 3: self.n_batch = self.input_shape[0] self.n_channels = 1 self.img_height = self.input_shape[1] self.img_width = self.input_shape[2] elif len(self.input_shape) is 4: self.n_batch = self.input_shape[0] self.n_channels = self.input_shape[1] self.img_height = self.input_shape[2] self.img_width = self.input_shape[3] else: raise ValueError( "Input should be either gray scale (ndim = 3) or color (ndim = 4) images." "Current ndim=%d" % self.ndim) self.k = k self.patch = patch self.n_steps = n_steps self.lambda_ = lambda_ self.n_h_g = n_h_g self.n_h_l = n_h_l self.n_f_g = n_f_g self.n_f_h = n_f_h #self.n_f_l = 2 self.n_classes = n_classes # for glimps network, f_g self.W_h_g = [] for i in xrange(self.k): self.W_h_g.append( self.add_param(init.GlorotNormal(), (self.n_channels * ((self.patch * (2**i))**2), self.n_h_g), name='W_h_g')) self.b_h_g = self.add_param(init.Constant(0.), (self.n_h_g, ), name='b_h_g') self.W_h_l = self.add_param(init.GlorotNormal(), (2, self.n_h_l), name='W_h_l') self.b_h_l = self.add_param(init.Constant(0.), (self.n_h_l, ), name='b_h_l') self.W_f_g_1 = self.add_param(init.GlorotNormal(), (self.n_h_g, self.n_f_g), name='W_f_g_1') self.W_f_g_2 = self.add_param(init.GlorotNormal(), (self.n_h_l, self.n_f_g), name='W_f_g_2') self.b_f_g = self.add_param(init.Constant(0.), (self.n_f_g, ), name='b_f_g') # for core network, f_h self.W_f_h_1 = self.add_param(init.GlorotNormal(), (self.n_f_g, self.n_f_h), name='W_f_h_1') self.W_f_h_2 = self.add_param(init.GlorotNormal(), (self.n_f_g, self.n_f_h), name='W_f_h_2') self.b_f_h = self.add_param(init.Constant(0.), (self.n_f_h, ), name='b_f_h') # for action network (location) f_l self.W_f_l = self.add_param(init.GlorotNormal(), (self.n_f_h, 2), name='W_f_l') self.b_f_l = self.add_param(init.Constant(0.), (2, ), name='b_f_l') # for action network (classification) f_a self.W_classifier = self.add_param(init.GlorotNormal(), (self.n_f_h, self.n_classes), name='W_classifier') self.b_classifier = self.add_param(init.Constant(0.), (self.n_classes, ), name='b_classifier') # for step self._srng = RandomStreams(np.random.randint(1, 2147462579)) self.sigma = 0.1 self.hid_init = self.add_param(init.Constant(0.), (1, ) + (self.n_f_h, ), name="hid_init", trainable=learn_init, regularizable=False)
def __init__(self, n_in, n_filters, filter_sizes, n_out, pool_sizes=None, n_hidden=(512), ccf=False, trans_func=rectify, out_func=softmax, dense_dropout=0.0, stats=2, input_noise=0.0, batch_norm=False, conv_dropout=0.0): super(CNN, self).__init__(n_in, n_hidden, n_out, trans_func) self.outf = out_func self.log = "" # Define model using lasagne framework dropout = True if not dense_dropout == 0.0 else False # Overwrite input layer sequence_length, n_features = n_in self.l_in = InputLayer(shape=(None, sequence_length, n_features)) l_prev = self.l_in # Separate into raw values and statistics sequence_length -= stats stats_layer = SliceLayer(l_prev, indices=slice(sequence_length, None), axis=1) stats_layer = ReshapeLayer(stats_layer, (-1, stats * n_features)) print('Stats layer shape', stats_layer.output_shape) l_prev = SliceLayer(l_prev, indices=slice(0, sequence_length), axis=1) print('Conv input layer shape', l_prev.output_shape) # Apply input noise l_prev = GaussianNoiseLayer(l_prev, sigma=input_noise) if ccf: self.log += "\nAdding cross-channel feature layer" l_prev = ReshapeLayer(l_prev, (-1, 1, sequence_length, n_features)) l_prev = Conv2DLayer(l_prev, num_filters=4 * n_features, filter_size=(1, n_features), nonlinearity=None) n_features *= 4 if batch_norm: l_prev = batch_norm_layer(l_prev) l_prev = ReshapeLayer(l_prev, (-1, n_features, sequence_length)) l_prev = DimshuffleLayer(l_prev, (0, 2, 1)) # 2D Convolutional layers l_prev = ReshapeLayer(l_prev, (-1, 1, sequence_length, n_features)) l_prev = DimshuffleLayer(l_prev, (0, 3, 2, 1)) # Add the convolutional filters for n_filter, filter_size, pool_size in zip(n_filters, filter_sizes, pool_sizes): self.log += "\nAdding 2D conv layer: %d x %d" % (n_filter, filter_size) l_prev = Conv2DLayer(l_prev, num_filters=n_filter, filter_size=(filter_size, 1), nonlinearity=self.transf, pad=filter_size // 2) if batch_norm: l_prev = batch_norm_layer(l_prev) if pool_size > 1: self.log += "\nAdding max pooling layer: %d" % pool_size l_prev = Pool2DLayer(l_prev, pool_size=(pool_size, 1)) self.log += "\nAdding dropout layer: %.2f" % conv_dropout l_prev = TiedDropoutLayer(l_prev, p=conv_dropout) print("Conv out shape", get_output_shape(l_prev)) # Global pooling layer l_prev = GlobalPoolLayer(l_prev, pool_function=T.mean, name='Global Mean Pool') print("GlobalPoolLayer out shape", get_output_shape(l_prev)) # Concatenate stats l_prev = ConcatLayer((l_prev, stats_layer), axis=1) for n_hid in n_hidden: self.log += "\nAdding dense layer with %d units" % n_hid print("Dense input shape", get_output_shape(l_prev)) l_prev = DenseLayer(l_prev, n_hid, init.GlorotNormal(), init.Normal(1e-3), self.transf) if batch_norm: l_prev = batch_norm_layer(l_prev) if dropout: self.log += "\nAdding dense dropout with probability: %.2f" % dense_dropout l_prev = DropoutLayer(l_prev, p=dense_dropout) if batch_norm: self.log += "\nUsing batch normalization" self.model = DenseLayer(l_prev, num_units=n_out, nonlinearity=out_func) self.model_params = get_all_params(self.model) self.sym_x = T.tensor3('x') self.sym_t = T.matrix('t')
def build_model(self, use_mean_lstm = True, old_version = True, #init params used during setup stage transf = lasagne.nonlinearities.tanh, # density layer active function word_ebd_init = init.Normal(1e-6), b_init = init.Normal(1e-4), W_init = init.GlorotNormal(), W_init_act = init.GlorotNormal()): self.transf = transf ############## build model ############ self.l_sents_in = InputLayer((None,None)) self.l_mask_in = InputLayer((None,None)) self.l_label_in = InputLayer((None,self.dimy)) #for unlabel data, y is generated from classifier,else y is a parameter in trainning self.l_z_in = InputLayer((None, self.dimz)) #samples in generation model self.l_dec_cell_in = InputLayer((None, self.dec_num_units)) #used in one step beam search self.l_dec_hid_in = InputLayer((None, self.dec_num_units)) #used in one step beam search self.l_dec_input_word_in = InputLayer((None, None, self.word_ebd_dims)) #batch_size * sent_length(max lstm steps) * word_ebd_dims self.l_dec_out_in = InputLayer((None,None, self.dec_num_units)) ###word embedding layers self.l_ebd = EmbeddingLayer(self.l_sents_in,self.word_dict_size, self.word_ebd_dims, W = word_ebd_init, name = 'EbdLayer' ) self.l_ebd_drop = DropoutLayer(self.l_ebd, p = self.drop_out, name = 'EbdDropoutLayer') #no params; input: batch_size*sent_length*word_ebd_dims ####################encoder lstm layers###################################### self.l_x = DropoutLayer( LSTMLayer(self.l_ebd_drop, num_units = self.enc_num_units, mask_input = self.l_mask_in, grad_clipping = self.grad_clipping, only_return_final = True, name='EncLSTMLayer'), p = self.drop_out, name='EncLSTMLayer') #LSTM for classifier mean pooling is better? if use_mean_lstm: print 'Using mean pooling for classifier!!!!!!!!!!!!!!!' self.l_c = DropoutLayer( MeanLstmLayer(self.l_ebd_drop, num_units = self.enc_num_units, mask_input = self.l_mask_in, grad_clipping = self.grad_clipping, name='ClassLSTMLayer'), p = self.drop_out, name='ClassLSTMLayer') else: self.l_c = DropoutLayer( LSTMLayer(self.l_ebd_drop, num_units = self.enc_num_units, mask_input = self.l_mask_in, grad_clipping = self.grad_clipping, only_return_final = True, name='ClassLSTMLayer'), p = self.drop_out, name='ClassLSTMLayer') #----------------- auxiliary q(a|x) ########################################### if old_version: self.l_x_to_a = DropoutLayer( batch_norm( DenseLayer(self.l_x, num_units= self.dima, W = W_init_act, b = b_init, nonlinearity= self.transf, name = 'x_to_a_old'), alpha = self.bnalpha, name = 'x_to_a_old'),p = self.drop_out, name = 'x_to_a_old') else: print 'Using new version of model!!!!!!!!!!' self.l_mean_pooling =DropoutLayer(MeanMaskLayer(self.l_ebd_drop, self.l_mask_in, name='mean_pooling'), p = self.drop_out, name = 'mean_pooling') self.l_x_to_a = DropoutLayer( batch_norm( DenseLayer(self.l_mean_pooling, num_units= self.dima, W = W_init_act, b = b_init, nonlinearity= self.transf, name = 'x_to_a_new'), alpha = self.bnalpha, name = 'x_to_a_new'),p = self.drop_out, name = 'x_to_a_new') self.l_a_mu = DenseLayer(self.l_x_to_a, self.dima, W = W_init, b = b_init, nonlinearity=None, name = 'a_mu') #Linear without active functions self.l_a_var = DenseLayer(self.l_x_to_a, self.dima, W = W_init,b = b_init, nonlinearity=None, name = 'a_var') self.l_a = SimpleSampleLayer(self.l_a_mu, self.l_a_var, name= 'a_sample') #no params ################# Classifier q(y|a,x) ##################################### self.l_ax = ConcatLayer([self.l_c, self.l_a], axis=1, name = 'Concat_ax') #no params self.l_ax_to_y = DropoutLayer( batch_norm( DenseLayer(self.l_ax, num_units= self.dimy, W = W_init_act, b= b_init, nonlinearity= self.transf,name = 'ax_to_y'), alpha = self.bnalpha,name = 'ax_to_y'), p = self.drop_out, name = 'ax_to_y') self.l_y = DenseLayer(self.l_ax, num_units= self.dimy,W=W_init, b = b_init, nonlinearity= softmax, name='y_classifier' ) #################### sample q(z|a,x,y) #################################### self.l_xy = ConcatLayer([self.l_x, self.l_label_in], axis=1, name = 'Concat_xy') #no params first use l_label_in self.l_xy_to_z = DropoutLayer( batch_norm( DenseLayer(self.l_xy, num_units= self.dimz, W = W_init_act, b= b_init, nonlinearity= self.transf, name='xy_to_z'), alpha = self.bnalpha, name='xy_to_z'), p = self.drop_out, name='xy_to_z') self.l_z_mu = DenseLayer(self.l_xy_to_z, self.dimz,W=W_init, b=b_init, nonlinearity=None, name='z_mu') #Linear without active functions self.l_z_var = DenseLayer(self.l_xy_to_z, self.dimz,W=W_init, b=b_init, nonlinearity=None, name='z_var') #Linear without active functions self.l_z = SimpleSampleLayer(self.l_z_mu, self.l_z_var, name ='z_sample') ################## generative model, we use 'u' to stand 'a' in paper ##### self.l_yz = ConcatLayer([self.l_label_in, self.l_z_in], axis = 1, name='Concat_yz') #l_z_in layer is used in beam search self.l_hid = batch_norm( DenseLayer(self.l_yz, num_units= self.dec_num_units, W = W_init_act, b= b_init, nonlinearity= self.transf, name ='LmHidInit'), alpha = self.bnalpha, name ='LmHidInit') #init of hidden has no dropout ######################## dec lm ################### self.l_lm = ScLSTMLayer(incoming=self.l_dec_input_word_in, num_units= self.dec_num_units,da_init=self.l_label_in, cell_init=self.l_dec_cell_in, hid_init=self.l_dec_hid_in, mask_input= self.l_mask_in, grad_clipping = self.grad_clipping, name='ScLSTMLayer') #cell, hid used in beam search, shape(batch_size,sent_length,dec_num_units) ######################## softmax results ################### self.l_recons_x = DenseLayer(DropoutLayer(ReshapeLayer(self.l_dec_out_in, shape=(-1, self.dec_num_units), name='ScLSTMLayer'), p = self.drop_out, name='ScLSTMLayer'),#output shape:( batch_size*sent_length,dec_num_units) num_units = self.word_dict_size, W=W_init, b=b_init,nonlinearity = softmax, name='recons_x')#(batch_size*sent_length, word_dict_size) '''
def __init__(self, n_c, n_l, n_a, n_z, n_y, qa_hid, qz_hid, qy_hid, px_hid, pa_hid, filters, nonlinearity=rectify, px_nonlinearity=None, x_dist='bernoulli', batchnorm=False, seed=1234): """ Initialize an skip deep generative model consisting of discriminative classifier q(y|a,x), generative model P p(a|z,y) and p(x|a,z,y), inference model Q q(a|x) and q(z|a,x,y). Weights are initialized using the Bengio and Glorot (2010) initialization scheme. :param n_c: Number of input channels. :param n_l: Number of lengths. :param n_a: Number of auxiliary. :param n_z: Number of latent. :param n_y: Number of classes. :param qa_hid: List of number of deterministic hidden q(a|x). :param qz_hid: List of number of deterministic hidden q(z|a,x,y). :param qy_hid: List of number of deterministic hidden q(y|a,x). :param px_hid: List of number of deterministic hidden p(a|z,y) & p(x|z,y). :param nonlinearity: The transfer function used in the deterministic layers. :param x_dist: The x distribution, 'bernoulli', 'multinomial', or 'gaussian'. :param batchnorm: Boolean value for batch normalization. :param seed: The random seed. """ super(CSDGM, self).__init__(n_c, qz_hid + px_hid, n_a + n_z, nonlinearity) self.x_dist = x_dist self.n_y = n_y self.n_c = n_c self.n_l = n_l self.n_a = n_a self.n_z = n_z self.batchnorm = batchnorm self._srng = RandomStreams(seed) # Decide Glorot initializaiton of weights. init_w = 1e-3 hid_w = "" if nonlinearity == rectify or nonlinearity == softplus: hid_w = "relu" pool_layers = [] # Define symbolic variables for theano functions. self.sym_beta = T.scalar('beta') # scaling constant beta self.sym_x_l = T.tensor3('x') # labeled inputs self.sym_t_l = T.matrix('t') # labeled targets self.sym_x_u = T.tensor3('x') # unlabeled inputs self.sym_bs_l = T.iscalar('bs_l') # number of labeled data self.sym_samples = T.iscalar('samples') # MC samples self.sym_z = T.matrix('z') # latent variable z self.sym_a = T.matrix('a') # auxiliary variable a self.sym_warmup = T.fscalar('warmup') # warmup to scale KL term # Assist methods for collecting the layers def dense_layer(layer_in, n, dist_w=init.GlorotNormal, dist_b=init.Normal): dense = DenseLayer(layer_in, n, dist_w(hid_w), dist_b(init_w), None) if batchnorm: dense = BatchNormLayer(dense) return NonlinearityLayer(dense, self.transf) def stochastic_layer(layer_in, n, samples, nonlin=None): mu = DenseLayer(layer_in, n, init.Normal(init_w), init.Normal(init_w), nonlin) logvar = DenseLayer(layer_in, n, init.Normal(init_w), init.Normal(init_w), nonlin) return SampleLayer(mu, logvar, eq_samples=samples, iw_samples=1), mu, logvar def conv_layer(layer_in, filter, stride=(1, 1), pool=1, name='conv', dist_w=init.GlorotNormal, dist_b=init.Normal): l_conv = Conv2DLayer(layer_in, num_filters=filter, filter_size=(3, 1), stride=stride, pad='full', W=dist_w(hid_w), b=dist_b(init_w), name=name) if pool > 1: l_conv = MaxPool2DLayer(l_conv, pool_size=(pool, 1)) pool_layers.append(l_conv) return l_conv # Input layers l_y_in = InputLayer((None, n_y)) l_x_in = InputLayer((None, n_l, n_c), name='Input') # Reshape input l_x_in_reshp = ReshapeLayer(l_x_in, (-1, 1, n_l, n_c)) print("l_x_in_reshp", l_x_in_reshp.output_shape) # CNN encoder implementation l_conv_enc = l_x_in_reshp for filter, stride, pool in filters: l_conv_enc = conv_layer(l_conv_enc, filter, stride, pool) print("l_conv_enc", l_conv_enc.output_shape) # Pool along last 2 axes l_global_pool_enc = GlobalPoolLayer(l_conv_enc, pool_function=T.mean) l_enc = dense_layer(l_global_pool_enc, n_z) print("l_enc", l_enc.output_shape) # Auxiliary q(a|x) l_qa_x = l_enc for hid in qa_hid: l_qa_x = dense_layer(l_qa_x, hid) l_qa_x, l_qa_x_mu, l_qa_x_logvar = stochastic_layer( l_qa_x, n_a, self.sym_samples) # Classifier q(y|a,x) l_qa_to_qy = DenseLayer(l_qa_x, qy_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None) l_qa_to_qy = ReshapeLayer(l_qa_to_qy, (-1, self.sym_samples, 1, qy_hid[0])) l_x_to_qy = DenseLayer(l_enc, qy_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None) l_x_to_qy = DimshuffleLayer(l_x_to_qy, (0, 'x', 'x', 1)) l_qy_xa = ReshapeLayer(ElemwiseSumLayer([l_qa_to_qy, l_x_to_qy]), (-1, qy_hid[0])) if batchnorm: l_qy_xa = BatchNormLayer(l_qy_xa) l_qy_xa = NonlinearityLayer(l_qy_xa, self.transf) if len(qy_hid) > 1: for hid in qy_hid[1:]: l_qy_xa = dense_layer(l_qy_xa, hid) l_qy_xa = DenseLayer(l_qy_xa, n_y, init.GlorotNormal(), init.Normal(init_w), softmax) # Recognition q(z|x,a,y) l_qa_to_qz = DenseLayer(l_qa_x, qz_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None) l_qa_to_qz = ReshapeLayer(l_qa_to_qz, (-1, self.sym_samples, 1, qz_hid[0])) l_x_to_qz = DenseLayer(l_enc, qz_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None) l_x_to_qz = DimshuffleLayer(l_x_to_qz, (0, 'x', 'x', 1)) l_y_to_qz = DenseLayer(l_y_in, qz_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None) l_y_to_qz = DimshuffleLayer(l_y_to_qz, (0, 'x', 'x', 1)) l_qz_axy = ReshapeLayer( ElemwiseSumLayer([l_qa_to_qz, l_x_to_qz, l_y_to_qz]), (-1, qz_hid[0])) if batchnorm: l_qz_axy = BatchNormLayer(l_qz_axy) l_qz_axy = NonlinearityLayer(l_qz_axy, self.transf) if len(qz_hid) > 1: for hid in qz_hid[1:]: l_qz_axy = dense_layer(l_qz_axy, hid) l_qz_axy, l_qz_axy_mu, l_qz_axy_logvar = stochastic_layer( l_qz_axy, n_z, 1) # Generative p(a|z,y) l_y_to_pa = DenseLayer(l_y_in, pa_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None) l_y_to_pa = DimshuffleLayer(l_y_to_pa, (0, 'x', 'x', 1)) l_qz_to_pa = DenseLayer(l_qz_axy, pa_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None) l_qz_to_pa = ReshapeLayer(l_qz_to_pa, (-1, self.sym_samples, 1, pa_hid[0])) l_pa_zy = ReshapeLayer(ElemwiseSumLayer([l_qz_to_pa, l_y_to_pa]), [-1, pa_hid[0]]) if batchnorm: l_pa_zy = BatchNormLayer(l_pa_zy) l_pa_zy = NonlinearityLayer(l_pa_zy, self.transf) if len(pa_hid) > 1: for hid in pa_hid[1:]: l_pa_zy = dense_layer(l_pa_zy, hid) l_pa_zy, l_pa_zy_mu, l_pa_zy_logvar = stochastic_layer(l_pa_zy, n_a, 1) # Generative p(x|a,z,y) l_qa_to_px = DenseLayer(l_qa_x, px_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None) l_qa_to_px = ReshapeLayer(l_qa_to_px, (-1, self.sym_samples, 1, px_hid[0])) l_y_to_px = DenseLayer(l_y_in, px_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None) l_y_to_px = DimshuffleLayer(l_y_to_px, (0, 'x', 'x', 1)) l_qz_to_px = DenseLayer(l_qz_axy, px_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None) l_qz_to_px = ReshapeLayer(l_qz_to_px, (-1, self.sym_samples, 1, px_hid[0])) l_px_azy = ReshapeLayer( ElemwiseSumLayer([l_qa_to_px, l_qz_to_px, l_y_to_px]), [-1, px_hid[0]]) if batchnorm: l_px_azy = BatchNormLayer(l_px_azy) l_px_azy = NonlinearityLayer(l_px_azy, self.transf) # Note that px_hid[0] has to be equal to the number filters in the first convolution. Otherwise add a # dense layers here. # Inverse pooling l_global_depool = InverseLayer(l_px_azy, l_global_pool_enc) print("l_global_depool", l_global_depool.output_shape) # Reverse pool layer order pool_layers = pool_layers[::-1] # Decode l_deconv = l_global_depool for idx, filter in enumerate(filters[::-1]): filter, stride, pool = filter if pool > 1: l_deconv = InverseLayer(l_deconv, pool_layers[idx]) l_deconv = Conv2DLayer(l_deconv, num_filters=filter, filter_size=(3, 1), stride=(stride, 1), W=init.GlorotNormal('relu')) print("l_deconv", l_deconv.output_shape) # The last l_conv layer should give us the input shape l_px_azy = Conv2DLayer(l_deconv, num_filters=1, filter_size=(3, 1), pad='same', nonlinearity=None) print("l_dec", l_px_azy.output_shape) # Flatten first two dimensions l_px_azy = ReshapeLayer(l_px_azy, (-1, n_c)) if x_dist == 'bernoulli': l_px_azy = DenseLayer(l_px_azy, n_c, init.GlorotNormal(), init.Normal(init_w), sigmoid) elif x_dist == 'multinomial': l_px_azy = DenseLayer(l_px_azy, n_c, init.GlorotNormal(), init.Normal(init_w), softmax) elif x_dist == 'gaussian': l_px_azy, l_px_zy_mu, l_px_zy_logvar = stochastic_layer( l_px_azy, n_c, self.sym_samples, px_nonlinearity) elif x_dist == 'linear': l_px_azy = DenseLayer(l_px_azy, n_c, nonlinearity=None) # Reshape all the model layers to have the same size self.l_x_in = l_x_in self.l_y_in = l_y_in self.l_a_in = l_qa_x self.l_qa = ReshapeLayer(l_qa_x, (-1, self.sym_samples, 1, n_a)) self.l_qa_mu = DimshuffleLayer(l_qa_x_mu, (0, 'x', 'x', 1)) self.l_qa_logvar = DimshuffleLayer(l_qa_x_logvar, (0, 'x', 'x', 1)) self.l_qz = ReshapeLayer(l_qz_axy, (-1, self.sym_samples, 1, n_z)) self.l_qz_mu = ReshapeLayer(l_qz_axy_mu, (-1, self.sym_samples, 1, n_z)) self.l_qz_logvar = ReshapeLayer(l_qz_axy_logvar, (-1, self.sym_samples, 1, n_z)) self.l_qy = ReshapeLayer(l_qy_xa, (-1, self.sym_samples, 1, n_y)) self.l_pa = ReshapeLayer(l_pa_zy, (-1, self.sym_samples, 1, n_a)) self.l_pa_mu = ReshapeLayer(l_pa_zy_mu, (-1, self.sym_samples, 1, n_a)) self.l_pa_logvar = ReshapeLayer(l_pa_zy_logvar, (-1, self.sym_samples, 1, n_a)) # Here we assume that we pass (batch size * segment length, number of features) to the sample layer from # which we then get (batch size * segment length, samples, IW samples, features) self.l_px = ReshapeLayer(l_px_azy, (-1, n_l, self.sym_samples, 1, n_c)) self.l_px_mu = ReshapeLayer(l_px_zy_mu, (-1, n_l, self.sym_samples, 1, n_c)) \ if x_dist == "gaussian" else None self.l_px_logvar = ReshapeLayer(l_px_zy_logvar, (-1, n_l, self.sym_samples, 1, n_c)) \ if x_dist == "gaussian" else None # Predefined functions inputs = {l_x_in: self.sym_x_l} outputs = get_output(self.l_qy, inputs, deterministic=True).mean(axis=(1, 2)) self.f_qy = theano.function([self.sym_x_l, self.sym_samples], outputs) outputs = get_output(l_qa_x, inputs, deterministic=True) self.f_qa = theano.function([self.sym_x_l, self.sym_samples], outputs) inputs = {l_x_in: self.sym_x_l, l_y_in: self.sym_t_l} outputs = get_output(l_qz_axy, inputs, deterministic=True) self.f_qz = theano.function( [self.sym_x_l, self.sym_t_l, self.sym_samples], outputs) inputs = {l_qz_axy: self.sym_z, l_y_in: self.sym_t_l} outputs = get_output(self.l_pa, inputs, deterministic=True).mean(axis=(1, 2)) self.f_pa = theano.function( [self.sym_z, self.sym_t_l, self.sym_samples], outputs) inputs = { l_x_in: self.sym_x_l, l_qa_x: self.sym_a, l_qz_axy: self.sym_z, l_y_in: self.sym_t_l } outputs = get_output(self.l_px, inputs, deterministic=True).mean(axis=(2, 3)) self.f_px = theano.function([ self.sym_x_l, self.sym_a, self.sym_z, self.sym_t_l, self.sym_samples ], outputs) outputs = get_output(self.l_px_mu, inputs, deterministic=True).mean(axis=(2, 3)) self.f_mu = theano.function([ self.sym_x_l, self.sym_a, self.sym_z, self.sym_t_l, self.sym_samples ], outputs) outputs = get_output(self.l_px_logvar, inputs, deterministic=True).mean(axis=(2, 3)) self.f_var = theano.function([ self.sym_x_l, self.sym_a, self.sym_z, self.sym_t_l, self.sym_samples ], outputs) # Define model parameters self.model_params = get_all_params([self.l_qy, self.l_pa, self.l_px]) self.trainable_model_params = get_all_params( [self.l_qy, self.l_pa, self.l_px], trainable=True)
def __init__(self, n_in, n_filters, filter_size, n_out, pool_sizes=None, n_hidden=(), downsample=1, batch_size=100, trans_func=rectify, out_func=softmax, dropout_probability=0.0): super(UFCNN, self).__init__(n_in, n_hidden, n_out, trans_func) self.outf = out_func self.log = "" l2_mask = np.zeros((1, 1, filter_size * 2 + 1, 1)) l2_mask[:, :, 2::2, :] = 1 l2_mask = l2_mask[:, :, ::-1] self.l2_mask = theano.shared(l2_mask.astype(theano.config.floatX), broadcastable=(True, True, False, False)) l3_mask = np.zeros((1, 1, filter_size * 4 + 1, 1)) l3_mask[:, :, 4::4, :] = 1 l3_mask = l3_mask[:, :, ::-1] self.l3_mask = theano.shared(l3_mask.astype(theano.config.floatX), broadcastable=(True, True, False, False)) W2 = init.GlorotNormal(gain=1.0).sample(shape=(n_filters, n_filters, filter_size * 2 + 1, 1)) W2 *= l2_mask W3 = init.GlorotNormal(gain=1.0).sample(shape=(n_filters, n_filters, filter_size * 4 + 1, 1)) W3 *= l3_mask # Overwrite input layer sequence_length, n_features = n_in self.l_in = InputLayer(shape=(batch_size, sequence_length, n_features)) l_prev = self.l_in l_prev = ReshapeLayer(l_prev, (batch_size, 1, sequence_length, n_features)) l_prev = DimshuffleLayer(l_prev, (0, 3, 2, 1)) l_h1 = Conv2DLayer(l_prev, num_filters=n_filters, filter_size=(filter_size, 1), nonlinearity=self.transf, pad='same', name='h1') self.log += "\n%s:\t %s" % (l_h1.name, get_output_shape(l_h1)) l_h2 = Conv2DLayer(l_h1, num_filters=n_filters, filter_size=(filter_size * 2 + 1, 1), nonlinearity=self.transf, pad='same', name='h2', W=W2) self.log += "\n%s:\t %s" % (l_h2.name, get_output_shape(l_h2)) l_h3 = Conv2DLayer(l_h2, num_filters=n_filters, filter_size=(filter_size * 4 + 1, 1), nonlinearity=self.transf, pad='same', name='h3', W=W3) self.log += "\n%s:\t %s" % (l_h3.name, get_output_shape(l_h3)) l_g3 = Conv2DLayer(l_h3, num_filters=n_filters, filter_size=(filter_size * 4 + 1, 1), nonlinearity=self.transf, pad='same', name='g3', W=W3) self.log += "\n%s:\t %s" % (l_g3.name, get_output_shape(l_g3)) print(l_g3.W.get_value()[0, 0]) l_h2_g3 = ConcatLayer((l_h2, l_g3), axis=1, name='l_h2_g3') self.log += "\n%s: %s" % (l_h2_g3.name, get_output_shape(l_h2_g3)) l_g2 = Conv2DLayer(l_h2_g3, num_filters=n_filters, filter_size=(filter_size * 2 + 1, 1), nonlinearity=self.transf, pad='same', name='g2', W=np.concatenate((W2, W2), axis=1)) self.log += "\n%s:\t %s" % (l_g2.name, get_output_shape(l_g2)) l_h1_g2 = ConcatLayer((l_h1, l_g2), axis=1, name='l_h1_g2') self.log += "\n%s: %s" % (l_h1_g2.name, get_output_shape(l_h1_g2)) l_g1 = Conv2DLayer(l_h1_g2, num_filters=n_filters, filter_size=(filter_size, 1), nonlinearity=self.transf, pad='same', name='g1') self.log += "\n%s:\t %s" % (l_g1.name, get_output_shape(l_g1)) l_prev = l_g1 for n_hid in n_hidden: l_prev = DenseLayer(l_prev, num_units=n_hid, nonlinearity=self.transf) self.log += "\nAdding dense layer with %d units" % n_hid if dropout_probability: l_prev = DropoutLayer(l_prev, p=dropout_probability) self.log += "\nAdding dropout layer with p=%.3f" % dropout_probability self.model = DenseLayer(l_prev, num_units=n_out, nonlinearity=out_func) self.model_params = get_all_params(self.model) self.sym_x = T.tensor3('x') self.sym_t = T.matrix('t')
def build_model( self, use_mean_lstm=False, act_fun=lasagne.nonlinearities.tanh, # density layer active function word_ebd_init=init.Normal(1e-2), b_init=init.Normal(1e-4), W_init=init.GlorotNormal()): # -------------------------------- Global Inputs ------------------------------------------------------------ self.l_sents_in = InputLayer( (None, None)) # sentences inputs as word indexes. self.l_mask_in = InputLayer((None, None)) self.l_label_in = InputLayer((None, self.dim_y)) # one hot # for unlabel data, y is generated from classifier,else y is a parameter in trainning # --------------------------------- Word Embedding --------------------------------------------------------- # ## Input Nodes: l_sents_in self.l_ebd = EmbeddingLayer(self.l_sents_in, self.word_dict_size, self.word_ebd_dims, W=word_ebd_init, name='EbdLayer') # we do dropout later. self.l_enc_sents_in = InputLayer( (None, None, self.word_ebd_dims )) # sentences inputs as for classifier and encoder self.l_dec_sents_in = InputLayer( (None, None, self.word_ebd_dims)) # for decoder, shifted and word-dropoutted # --------------------------------- Classifier --------------------------------------------------------- # ## Input Nodes: l_enc_sents_in, l_mask_in self.l_c_sents_drop = DropoutLayer(self.l_enc_sents_in, p=self.drop_out, name='Classifier Sents Dropout') if use_mean_lstm: # we do dropout later for loading pretraining weights self.l_c = MeanLstmLayer(self.l_c_sents_drop, num_units=self.num_units, mask_input=self.l_mask_in, grad_clipping=self.grad_clipping, name='Classifier Mean') else: self.l_c = LSTMLayer(self.l_c_sents_drop, num_units=self.num_units, mask_input=self.l_mask_in, grad_clipping=self.grad_clipping, only_return_final=True, name='Classifier Final') self.l_c_drop = DropoutLayer(self.l_c, p=self.drop_out, name='Classifier LSTM Dropout') self.l_c_to_y = DropoutLayer(batch_norm(DenseLayer( self.l_c_drop, num_units=self.num_units, W=W_init, b=b_init, nonlinearity=act_fun, name='c_to_y'), name='c_to_y'), p=self.drop_out, name='c_to_y') self.l_y = DenseLayer(self.l_c_to_y, num_units=self.dim_y, W=W_init, b=b_init, nonlinearity=softmax, name='y_pred') # --------------------------------- Inference Network --------------------------------------------------------- # ## Input Nodes: l_enc_sents_in, l_label_in, l_mask_in self.l_enc_sents_drop = DropoutLayer(self.l_enc_sents_in, p=self.drop_out, name='Enc Sents Dropout') # Encoder LSTM self.l_x = DropoutLayer(LSTMLayer(self.l_enc_sents_drop, num_units=self.num_units, mask_input=self.l_mask_in, grad_clipping=self.grad_clipping, only_return_final=True, name='Enc LSTM'), p=self.drop_out, name='Enc LSTM Drop') # Encoder Dense Layer(s), use a class if many self.l_x_to_a = DropoutLayer(batch_norm(DenseLayer( self.l_x, num_units=self.num_units, W=W_init, b=b_init, nonlinearity=act_fun, name='x_to_a'), name='x_to_a'), p=self.drop_out, name='x_to_a') # combine information from label and encoder self.l_label_to_enc = DropoutLayer(DenseLayer(self.l_label_in, num_units=self.num_units, W=W_init, b=b_init, nonlinearity=act_fun, name='label_to_enc'), p=self.drop_out, name='label_to_enc') self.l_xy = ConcatLayer([self.l_x_to_a, self.l_label_to_enc], axis=1, name='Concat_xy') self.l_xy = DropoutLayer(batch_norm(DenseLayer( self.l_xy, num_units=self.num_units, W=W_init, b=b_init, nonlinearity=act_fun, name='xy'), name='xy'), p=self.drop_out, name='xy') self.l_xy_to_z = DropoutLayer(batch_norm(DenseLayer( self.l_xy, num_units=self.dim_z, W=W_init, b=b_init, nonlinearity=act_fun, name='xy_to_z'), name='xy_to_z'), p=self.drop_out, name='xy_to_z') # sample z self.l_z_mu = DenseLayer(self.l_xy_to_z, self.dim_z, W=W_init, b=b_init, nonlinearity=None, name='z_mu') self.l_z_var = DenseLayer(self.l_xy_to_z, self.dim_z, W=W_init, b=b_init, nonlinearity=None, name='z_var') self.l_z = SimpleSampleLayer(self.l_z_mu, self.l_z_var, name='z_sample') # --------------------------------- Generation Network ------------------------------------------------------- # ## Input Nodes: l_label_in, l_z_in, l_dec_sents_in, l_mask_in # In this model, there is no interface for beam search. self.l_z_in = InputLayer((None, self.dim_z)) self.l_label_to_dec = DropoutLayer(DenseLayer(self.l_label_in, num_units=self.num_units, W=W_init, b=b_init, nonlinearity=act_fun, name='label_to_dec'), p=self.drop_out, name='label_to_dec') self.l_yz = ConcatLayer([self.l_label_to_dec, self.l_z_in], axis=1, name='Concat_yz') # Decoder Dense Layer(s) self.l_yz = DropoutLayer(batch_norm(DenseLayer( self.l_yz, num_units=self.num_units, W=W_init, b=b_init, nonlinearity=act_fun, name='yz'), name='yz'), p=self.drop_out, name='yz') # the last layer has no dropout self.l_hid = batch_norm(DenseLayer(self.l_yz, num_units=self.num_units, W=W_init, b=b_init, nonlinearity=act_fun, name='yz_to_hid'), name='yz_to_hid') # language model self.l_lm = ScLSTMLayer(incoming=self.l_dec_sents_in, num_units=self.num_units, da_init=self.l_label_in, hid_init=self.l_hid, mask_input=self.l_mask_in, grad_clipping=self.grad_clipping, name='ScLSTMLayer') self.l_rec = DenseLayer(DropoutLayer(ReshapeLayer( self.l_lm, shape=(-1, self.num_units), name='ScLSTMLayer'), p=self.drop_out, name='ScLSTMLayer'), num_units=self.word_dict_size, W=W_init, b=b_init, nonlinearity=softmax, name='recons_x') # (batch_size*sent_length, word_dict_size) # ------------------------------- Baseline ---------------------------------- if theano.config.floatX == 'float32': self.b = theano.shared(np.float32(5.5)) else: self.b = theano.shared(np.float64(5.5))