def forward(self, x): n_layer = len(self.layers) for i in range(n_layer): x = F.tanh(self.layers[i][0](x)) for i in range(n_layer-1): x = F.tanh(self.layers[n_layer-i-1][1](x)) return self.layers[0][1](x)
def lstm(x, h, c, Wxi, Wxf, Wxo, Whi, Whf, Who, Wxc, Whc, bi, bf, bo, bc): i = nd.sigmoid(nd.dot(x, Wxi) + nd.dot(h, Whi) + bi) f = nd.sigmoid(nd.dot(x, Wxf) + nd.dot(h, Whf) + bf) o = nd.sigmoid(nd.dot(x, Wxo) + nd.dot(h, Who) + bo) c̃ = nd.tanh(nd.dot(x, Wxc) + nd.dot(h, Whc) + bc) c = f * c + i * c̃ h = o * nd.tanh(c) return h, c
def forward(self, x): x = self.pool1(F.tanh(self.conv1(x))) x = self.pool2(F.tanh(self.conv2(x))) # 0 means copy over size from corresponding dimension. # -1 means infer size from the rest of dimensions. x = x.reshape((0, -1)) x = F.tanh(self.fc1(x)) x = F.tanh(self.fc2(x)) return x
def foo(h, c, patch, Wxi, Wxf, Wxo, Wxg, bxi, bxf, bxo, bxg, Whi, Whf, Who, Whg, bhi, bhf, bho, bhg): i = sigmoid(linear(patch, Wxi, bxi) + linear(h, Whi, bhi)) f = sigmoid(linear(patch, Wxf, bxf) + linear(h, Whf, bhf)) o = sigmoid(linear(patch, Wxo, bxo) + linear(h, Who, bho)) g = nd.tanh(linear(patch, Wxg, bxg) + linear(h, Whg, bhg)) c = f * c + i * g h = o * mx.nd.tanh(c) return h, c, linear(h, W, b)
def residue_forward(self, x, conv_sigmoid, conv_tanh, skip_scale, residue_scale): output = x output_sigmoid, output_tanh = conv_sigmoid(output), conv_tanh(output) output = F.sigmoid(output_sigmoid) * F.tanh(output_tanh) skip = skip_scale(output) output = residue_scale(output) output = output + x[:, :, -output.shape[2]:] return output, skip
def rnn(inputs,state,*params): H = state W_xh,W_hh,b_h,W_hy,b_y = params outputs = [] for X in inputs: H = nd.tanh(nd.dot(X,W_xh) + nd.dot(H,W_hh) + b_h) Y = nd.dot(H,W_hy) + b_y outputs.append(Y) return (outputs,H)
def rnn(inputs, state, *params): H = state W_xh, W_hh, b_h, W_hy, b_y = params outputs = [] for X in inputs: H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h) Y = nd.dot(H, W_hy) + b_y outputs.append(Y) return (outputs, H)
def rnn(inputs, H): # inputs: seq_len ? batch_size x vocab_size ?? # H: batch_size x num_hidden ?? # outputs: seq_len ? batch_size x vocab_size ?? outputs = [] for X in inputs: H = nd.tanh(nd.dot(X, Wxh) + nd.dot(H, Whh) + bh) Y = nd.dot(H, Why) + by outputs.append(Y) return (outputs, H)
def foo(h, c, patch, Wxi, Wxf, Wxo, Wxg, bxi, bxf, bxo, bxg, Whi, Whf, Who, Whg, bhi, bhf, bho, bhg): i = sigmoid(linear(patch, Wxi, bxi) + linear(h, Whi, bhi)) f = sigmoid(linear(patch, Wxf, bxf) + linear(h, Whf, bhf)) o = sigmoid(linear(patch, Wxo, bxo) + linear(h, Who, bho)) print('break the consecutive assignments') g = nd.tanh(linear(patch, Wxg, bxg) + linear(h, Whg, bhg)) c = f * c + i * g h = o * mx.nd.tanh(c) return h, c, linear(h, W, b)
def lstm_rnn(inputs, state_h, state_c, *params): # inputs: num_steps 个尺寸为 batch_size * vacab_size 矩阵 # H: 尺寸为 batch_size * hidden_dim 矩阵 # outputs: num_steps 个尺寸为 batch_size * vacab_size 矩阵 [W_xi, W_hi, W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hy, b_y] = params H = state_h C = state_c outputs = [] for X in inputs: I = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i) F = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) + b_f) O = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o) C_tilda = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) + b_c) C = F * C + I * C_tilda H = O * nd.tanh(C) Y = nd.dot(H, W_hy) + b_y outputs.append(Y) return (outputs, H, C)
def foo(h, c, patch, Wxi, Wxf, Wxo, Wxg, bxi, bxf, bxo, bxg, Whi, Whf, Who, Whg, bhi, bhf, bho, bhg): i = sigmoid(linear(patch, Wxi, bxi) + linear(h, Whi, bhi)) f = sigmoid(linear(patch, Wxf, bxf) + linear(h, Whf, bhf)) # CR(haoran): adding the following line will create a new segment, is this a bug? # XCR(yutian): it is a bug. Already fixed. sigmoid(f) o = sigmoid(linear(patch, Wxo, bxo) + linear(h, Who, bho)) g = nd.tanh(linear(patch, Wxg, bxg) + linear(h, Whg, bhg)) c = f * c + i * g h = o * mx.nd.tanh(c) return h, c, linear(h, W, b)
def observe_reward_value( self, state_arr, action_arr, meta_data_arr=None, ): ''' Compute the reward value. Args: state_arr: Tensor of state. action_arr: Tensor of action. meta_data_arr: Meta data of actions. Returns: Reward value. ''' if state_arr is not None: t_hot_loss = -nd.mean( nd.flatten(state_arr) * nd.flatten(action_arr), axis=0, exclude=True) reward_value_arr = t_hot_loss reward_value_arr = nd.expand_dims(reward_value_arr, axis=1) else: reward_value_arr = nd.zeros((action_arr.shape[0], 1), ctx=action_arr.context) if meta_data_arr is not None: add_reward_arr = nd.zeros((action_arr.shape[0], 1), ctx=action_arr.context) for batch in range(meta_data_arr.shape[0]): keyword = "".join(meta_data_arr[batch].reshape(1, -1).tolist()[0]) reward = 0.0 for i in range(len(self.__txt_list)): key = self.__txt_list[i].index(keyword) reward = reward + ((len(self.__txt_list[i]) - key) / len(self.__txt_list[i])) reward = reward + (self.__txt_list[i].count(keyword) / len(self.__txt_list[i])) add_reward_arr[batch] = reward / len(self.__txt_list) else: add_reward_arr = nd.zeros((meta_data_arr.shape[0], 1), ctx=meta_data_arr.context) reward_value_arr = (reward_value_arr * self.__s_a_dist_weight) + ( add_reward_arr * (1 - self.__s_a_dist_weight)) reward_value_arr = nd.tanh(reward_value_arr) return reward_value_arr
def gru_rnn(inputs, H, *params): # inputs: num_steps 个尺寸为 batch_size * vocab_size 矩阵 # H: 尺寸为 batch_size * hidden_dim 矩阵 # outputs: num_steps 个尺寸为 batch_size * vocab_size 矩阵 W_xz, W_hz, b_z, W_xr, W_hr, b_r, W_xh, W_hh, b_h, W_hy, b_y = params outputs = [] for X in inputs: Z = nd.sigmoid(nd.dot(X, W_xz) + nd.dot(H, W_hz) + b_z) R = nd.sigmoid(nd.dot(X, W_xr) + nd.dot(H, W_hr) + b_r) H_tilda = nd.tanh(nd.dot(X, W_xh) + R * nd.dot(H, W_hh) + b_h) H = Z * H + (1 - Z) * H_tilda Y = nd.dot(H, W_hy) + b_y outputs.append(Y) return (outputs, H)
def rnn(_inputs, initial_state, *parameters): # _inputs: a list with length num_steps, # corresponding element: batch_size * input_dim matrix H = initial_state W_xh, W_hh, b_h, W_hy, b_y = parameters _outputs = [] for X in _inputs: # compute hidden state from input and last/initial hidden state H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h) # compute output from hidden state Y = nd.dot(H, W_hy) + b_y _outputs.append(Y) return _outputs, H
def LSTM_Cell(input, h_state, c_state): for x in input: f_t = nd.Activation(nd.FullyConnected( data=x, weight=wxhf, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whhf, no_bias=True, num_hidden=num_hidden) + bhf, act_type="sigmoid") i_t = nd.Activation(nd.FullyConnected( data=x, weight=wxhi, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whhi, no_bias=True, num_hidden=num_hidden) + bhi, act_type="sigmoid") o_t = nd.Activation(nd.FullyConnected( data=x, weight=wxho, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whho, no_bias=True, num_hidden=num_hidden) + bho, act_type="sigmoid") g_t = nd.Activation(nd.FullyConnected( data=x, weight=wxhg, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whhg, no_bias=True, num_hidden=num_hidden) + bhg, act_type="tanh") c_state = nd.multiply(f_t, c_state) + nd.multiply(i_t, g_t) h_state = nd.multiply(o_t, nd.tanh(c_state)) output = nd.FullyConnected(data=h_state, weight=why, bias=by, num_hidden=num_outputs) output = nd.softmax(data=output) return output, h_state, c_state
def train(cep, pool_size, epochs, train_data, val_data, ctx, netEn, netDe, netD, netD2, netDS, trainerEn, trainerDe, trainerD, trainerD2, trainerSD, lambda1, batch_size, expname, append=True, useAE=False): im_mean = mean_image.load_mean() im_mean = im_mean.broadcast_to( (batch_size, np.shape(im_mean)[0], np.shape(im_mean)[1], np.shape(im_mean)[2])) #im_mean = nd.transpose(im_mean, (2, 0, 1)) tp_file = open(expname + "_trainloss.txt", "w") tp_file.close() text_file = open(expname + "_validtest.txt", "w") text_file.close() #netGT, netDT, _, _ = set_test_network(opt.depth, ctx, opt.lr, opt.beta1,opt.ndf, opt.ngf, opt.append) GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() L1_loss = gluon.loss.L2Loss() image_pool = imagePool.ImagePool(pool_size) metric = mx.metric.CustomMetric(facc) metric2 = mx.metric.CustomMetric(facc) metricStrong = mx.metric.CustomMetric(facc) metricMSE = mx.metric.MSE() loss_rec_G = [] loss_rec_D = [] loss_rec_R = [] acc_rec = [] acc2_rec = [] loss_rec_D2 = [] loss_rec_G2 = [] lr = 2.0 * batch_size stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) if cep == -1: cep = 0 else: netEn.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_En.params', ctx=ctx) netDe.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_De.params', ctx=ctx) netD.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_D.params', ctx=ctx) netD2.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_D2.params', ctx=ctx) netDS.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_SD.params', ctx=ctx) for epoch in range(cep + 1, epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 #print('learning rate : '+str(trainerD.learning_rate )) for batch in train_data: ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### real_in = batch.data[0].as_in_context(ctx) - im_mean.as_in_context( ctx) real_out = batch.data[1].as_in_context( ctx) - im_mean.as_in_context(ctx) fake_latent = netEn(real_in) #real_latent = nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx) real_latent = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out eps2 = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) if epoch > 150: # and epoch%10==0: mu = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) #isigma = nd.ones((batch_size,64,1,1),ctx=ctx)*0.000001 mu.attach_grad() #sigma.attach_grad() images = netDe(mu) fake_img1T = nd.concat(images[0], images[1], images[2], dim=1) fake_img2T = nd.concat(images[3], images[4], images[5], dim=1) fake_img3T = nd.concat(images[6], images[7], images[8], dim=1) fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + '_fakespre_' + str(epoch) + '.png') for ep2 in range(1): with autograd.record(): #eps = nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx) # eps2 = nd.tanh( mu ) #+nd.multiply(eps,sigma))#nd.random.uniform( low=-1, high=1, shape=fake_latent.shape, ctx=ctx) rec_output = netDS(netDe(eps2)) fake_label = nd.zeros(rec_output.shape, ctx=ctx) errGS = GAN_loss(rec_output, fake_label) errGS.backward() mu -= lr / mu.shape[0] * mu.grad images = netDe(mu) fake_img1T = nd.concat(images[0], images[1], images[2], dim=1) fake_img2T = nd.concat(images[3], images[4], images[5], dim=1) fake_img3T = nd.concat(images[6], images[7], images[8], dim=1) fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + str(ep2) + '_fakespost_' + str(epoch) + '.png') eps2 = nd.tanh( mu ) #+nd.multiply(eps,sigma))#nd.random.uniform( low=-1, high=1, shape=fake_latent.shape, ctx=ctx) with autograd.record(): # Train with fake image # Use image pooling to utilize history imagesi output = netD(fake_concat) output2 = netD2(fake_latent) fake_label = nd.zeros(output.shape, ctx=ctx) fake_latent_label = nd.zeros(output2.shape, ctx=ctx) noiseshape = (fake_latent.shape[0] / 2, fake_latent.shape[1], fake_latent.shape[2], fake_latent.shape[3]) #eps2 = nd.random_normal(loc=0, scale=1, shape=noiseshape, ctx=ctx) # eps = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) #strong_output = netDS(netDe(eps)) rec_output = netD(netDe(eps)) errD_fake = GAN_loss(rec_output, fake_label) errD_fake2 = GAN_loss(output, fake_label) errD2_fake = GAN_loss(output2, fake_latent_label) metric.update([ fake_label, ], [ rec_output, ]) metric2.update([ fake_latent_label, ], [ output2, ]) real_concat = nd.concat(real_in, real_out, dim=1) if append else real_out output = netD(real_concat) output2 = netD2(real_latent) real_label = nd.ones(output.shape, ctx=ctx) real_latent_label = nd.ones(output2.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD2_real = GAN_loss(output2, real_latent_label) #errD = (errD_real + 0.5*(errD_fake+errD_fake2)) * 0.5 errD = (errD_real + errD_fake) * 0.5 errD2 = (errD2_real + errD2_fake) * 0.5 totalerrD = errD + errD2 totalerrD.backward() metric.update([ real_label, ], [ output, ]) metric2.update([ real_latent_label, ], [ output2, ]) trainerD.step(batch.data[0].shape[0]) trainerD2.step(batch.data[0].shape[0]) with autograd.record(): strong_output = netDS(netDe(eps)) strong_real = netDS(fake_concat) errs1 = GAN_loss(strong_output, fake_label) errs2 = GAN_loss(strong_real, real_label) metricStrong.update([ fake_label, ], [ strong_output, ]) metricStrong.update([ real_label, ], [ strong_real, ]) strongerr = 0.5 * (errs1 + errs2) strongerr.backward() trainerSD.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): sh = fake_latent.shape #eps2 = nd.random_normal(loc=0, scale=1, shape=noiseshape, ctx=ctx) # #eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx) #if epoch>100: # eps2 = nd.multiply(eps2,sigma)+mu # eps2 = nd.tanh(eps2) #else: #eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx) #eps2 = nd.concat(eps,eps2,dim=0) rec_output = netD(netDe(eps2)) fake_latent = (netEn(real_in)) output2 = netD2(fake_latent) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) real_latent_label = nd.ones(output2.shape, ctx=ctx) errG2 = GAN_loss(rec_output, real_label) errR = L1_loss(real_out, fake_out) * lambda1 errG = 10.0 * GAN_loss(output2, real_latent_label) + errG2 + errR errG.backward() trainerDe.step(batch.data[0].shape[0]) trainerEn.step(batch.data[0].shape[0]) loss_rec_G2.append(nd.mean(errG2).asscalar()) loss_rec_G.append( nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() - nd.mean(errR).asscalar()) loss_rec_D.append(nd.mean(errD).asscalar()) loss_rec_R.append(nd.mean(errR).asscalar()) loss_rec_D2.append(nd.mean(errD2).asscalar()) _, acc2 = metric2.get() name, acc = metric.get() acc_rec.append(acc) acc2_rec.append(acc2) # Print log infomation every ten batches if iter % 10 == 0: _, acc2 = metric2.get() name, acc = metric.get() _, accStrong = metricStrong.get() logging.info('speed: {} samples/s'.format( batch_size / (time.time() - btic))) #print(errD) logging.info( 'discriminator loss = %f, D2 loss = %f, generator loss = %f, G2 loss = %f, SD loss = %f, D acc = %f , D2 acc = %f, DS acc = %f, reconstruction error= %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errD2).asscalar(), nd.mean(errG - errG2 - errR).asscalar(), nd.mean(errG2).asscalar(), nd.mean(strongerr).asscalar(), acc, acc2, accStrong, nd.mean(errR).asscalar(), iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() _, acc2 = metric2.get() tp_file = open(expname + "_trainloss.txt", "a") tp_file.write( str(nd.mean(errG2).asscalar()) + " " + str( nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() - nd.mean(errR).asscalar()) + " " + str(nd.mean(errD).asscalar()) + " " + str(nd.mean(errD2).asscalar()) + " " + str(nd.mean(errR).asscalar()) + " " + str(acc) + " " + str(acc2) + "\n") tp_file.close() metric.reset() metric2.reset() train_data.reset() metricStrong.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) if epoch % 5 == 0: # and epoch>0: text_file = open(expname + "_validtest.txt", "a") filename = "checkpoints/" + expname + "_" + str( epoch) + "_D.params" netD.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_D2.params" netD2.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_En.params" netEn.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_De.params" netDe.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_SD.params" netDS.save_params(filename) fake_img1 = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1) fake_img2 = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1) fake_img3 = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1) fake_img4 = nd.concat(real_in[3], real_out[3], fake_out[3], dim=1) val_data.reset() text_file = open(expname + "_validtest.txt", "a") for vbatch in val_data: real_in = vbatch.data[0].as_in_context(ctx) real_out = vbatch.data[1].as_in_context(ctx) fake_latent = netEn(real_in) y = netDe(fake_latent) fake_out = y metricMSE.update([ fake_out, ], [ real_out, ]) _, acc2 = metricMSE.get() text_file.write("%s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(), str(acc2))) metricMSE.reset() images = netDe(eps2) fake_img1T = nd.concat(images[0], images[1], images[2], dim=1) fake_img2T = nd.concat(images[3], images[4], images[5], dim=1) fake_img3T = nd.concat(images[6], images[7], images[8], dim=1) fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + '_fakes_' + str(epoch) + '.png') text_file.close() return ([ loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2, loss_rec_G2, acc2_rec ])
def hybrid_forward(self, x): return x * F.tanh(F.activation(data = x, act_type = 'softrelu'))
def rnn(x, h, W, b): return nd.tanh(nd.dot(nd.concat(x, h, dim=1), W) + b)
def sigmoid(x): return .5 * (nd.tanh(.5 * x) + 1)
def train(cep, pool_size, epochs, train_data, val_data, ctx, netEn, netDe, netD, netD2, netDS, trainerEn, trainerDe, trainerD, trainerD2, trainerSD, lambda1, batch_size, expname, append=True, useAE=False): tp_file = open(expname + "_trainloss.txt", "w") tp_file.close() text_file = open(expname + "_validtest.txt", "w") text_file.close() #netGT, netDT, _, _ = set_test_network(opt.depth, ctx, opt.lr, opt.beta1,opt.ndf, opt.ngf, opt.append) GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() L1_loss = gluon.loss.L2Loss() image_pool = imagePool.ImagePool(pool_size) metric = mx.metric.CustomMetric(facc) metric2 = mx.metric.CustomMetric(facc) metricStrong = mx.metric.CustomMetric(facc) metricMSE = mx.metric.MSE() loss_rec_G = [] loss_rec_D = [] loss_rec_R = [] acc_rec = [] acc2_rec = [] loss_rec_D2 = [] loss_rec_G2 = [] lr = 2.0 * 512 stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) if cep == -1: cep = 0 else: netEn.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_En.params', ctx=ctx) netDe.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_De.params', ctx=ctx) netD.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_D.params', ctx=ctx) netD2.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_D2.params', ctx=ctx) netDS.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_SD.params', ctx=ctx) iter = 0 for epoch in range(cep + 1, epochs): tic = time.time() btic = time.time() train_data.reset() #print('learning rate : '+str(trainerD.learning_rate )) for batch in train_data: ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### if ctx == mx.cpu(): ct = mx.cpu() else: ct = mx.gpu() real_in = batch.data[0] #.as_in_context(ctx) real_out = batch.data[1] #.as_in_context(ctx) if iter == 0: latent_shape = (batch_size, 512, 1, 1) #code.shape out_l_shape = (batch_size, 1, 1, 1) #netD2((code)).shape out_i_shape = (batch_size, 1, 1, 1) #netD(netDe(code)).shape out_s_shape = (batch_size, 1, 1, 1) #netSD(netDe(code)).shape real_in = gluon.utils.split_and_load(real_in, ctx) real_out = gluon.utils.split_and_load(real_out, ctx) fake_latent = [netEn(r) for r in real_in] real_latent = nd.random.uniform(low=-1, high=1, shape=latent_shape) real_latent = gluon.utils.split_and_load(real_latent, ctx) fake_out = [netDe(f) for f in fake_latent] fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out eps2 = nd.random.uniform(low=-1, high=1, shape=latent_shape, ctx=ct) eps2 = gluon.utils.split_and_load(eps2, ctx) if epoch > 150: # (1/float(batch_size))*512*150:# and epoch%10==0: print('Mining..') mu = nd.random.uniform(low=-1, high=1, shape=latent_shape, ctx=ct) #isigma = nd.ones((batch_size,64,1,1),ctx=ctx)*0.000001 mu.attach_grad() #sigma.attach_grad() images = netDe(mu) fake_img1T = nd.concat(images[0], images[1], images[2], dim=1) fake_img2T = nd.concat(images[3], images[4], images[5], dim=1) fake_img3T = nd.concat(images[6], images[7], images[8], dim=1) fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + '_fakespre_' + str(epoch) + '.png') eps2 = gluon.utils.split_and_load(mu, ctx) for e in eps2: e.attach_grad() for ep2 in range(1): with autograd.record(): #eps = nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx) # #eps2 = gluon.utils.split_and_load(nd.tanh(mu),ctx) #+nd.multiply(eps,sigma))#nd.random.uniform( low=-1, high=1, shape=fake_latent.shape, ctx=ctx) rec_output = [netDS(netDe(e)) for e in eps2] fake_label = gluon.utils.split_and_load( nd.zeros(out_s_shape), ctx) errGS = [ GAN_loss(r, f) for r, f in zip(rec_output, fake_label) ] for e in errGS: e.backward() for idx, _ in enumerate(eps2): eps2[idx] = nd.tanh(eps2[idx] - lr / eps2[idx].shape[0] * eps2[idx].grad) images = netDe((eps2[0])) fake_img1T = nd.concat(images[0], images[1], images[2], dim=1) fake_img2T = nd.concat(images[3], images[4], images[5], dim=1) fake_img3T = nd.concat(images[6], images[7], images[8], dim=1) fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + str(ep2) + '_fakespost_' + str(epoch) + '.png') #eps2 = nd.tanh(mu)#+nd.multiply(eps,sigma))#nd.random.uniform( low=-1, high=1, shape=fake_latent.shape, ctx=ctx) with autograd.record(): #eps2 = gluon.utils.split_and_load(eps2,ctx) # Train with fake image # Use image pooling to utilize history imagesi output = [netD(f) for f in fake_concat] output2 = [netD2(f) for f in fake_latent] fake_label = nd.zeros(out_i_shape) fake_label = gluon.utils.split_and_load(fake_label, ctx) fake_latent_label = nd.zeros(out_l_shape) fake_latent_label = gluon.utils.split_and_load( fake_latent_label, ctx) eps = gluon.utils.split_and_load( nd.random.uniform(low=-1, high=1, shape=latent_shape), ctx) rec_output = [netD(netDe(e)) for e in eps] errD_fake = [ GAN_loss(r, f) for r, f in zip(rec_output, fake_label) ] errD_fake2 = [ GAN_loss(o, f) for o, f in zip(output, fake_label) ] errD2_fake = [ GAN_loss(o, f) for o, f in zip(output2, fake_latent_label) ] for f, o in zip(fake_label, rec_output): metric.update([ f, ], [ o, ]) for f, o in zip(fake_latent_label, output2): metric2.update([ f, ], [ o, ]) real_concat = nd.concat(real_in, real_out, dim=1) if append else real_out output = [netD(r) for r in real_concat] output2 = [netD2(r) for r in real_latent] real_label = gluon.utils.split_and_load( nd.ones(out_i_shape), ctx) real_latent_label = gluon.utils.split_and_load( nd.ones(out_l_shape), ctx) errD_real = [ GAN_loss(o, r) for o, r in zip(output, real_label) ] errD2_real = [ GAN_loss(o, r) for o, r in zip(output2, real_latent_label) ] for e1, e2, e4, e5 in zip(errD_real, errD_fake, errD2_real, errD2_fake): err = (e1 + e2) * 0.5 + (e5 + e4) * 0.5 err.backward() for f, o in zip(real_label, output): metric.update([ f, ], [ o, ]) for f, o in zip(real_latent_label, output2): metric2.update([ f, ], [ o, ]) trainerD.step(batch.data[0].shape[0]) trainerD2.step(batch.data[0].shape[0]) nd.waitall() with autograd.record(): strong_output = [netDS(netDe(e)) for e in eps] strong_real = [netDS(f) for f in fake_concat] errs1 = [ GAN_loss(r, f) for r, f in zip(strong_output, fake_label) ] errs2 = [ GAN_loss(r, f) for r, f in zip(strong_real, real_label) ] for f, s in zip(fake_label, strong_output): metricStrong.update([ f, ], [ s, ]) for f, s in zip(real_label, strong_real): metricStrong.update([ f, ], [ s, ]) for e1, e2 in zip(errs1, errs2): strongerr = 0.5 * (e1 + e2) strongerr.backward() trainerSD.step(batch.data[0].shape[0]) nd.waitall() ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): sh = out_l_shape #eps2 = nd.random_normal(loc=0, scale=1, shape=noiseshape, ctx=ctx) # #eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx) #if epoch>100: # eps2 = nd.multiply(eps2,sigma)+mu # eps2 = nd.tanh(eps2) #else: #eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx) #eps2 = nd.concat(eps,eps2,dim=0) rec_output = [netD(netDe(e)) for e in eps2] fake_latent = [(netEn(r)) for r in real_in] output2 = [netD2(f) for f in fake_latent] fake_out = [netDe(f) for f in fake_latent] fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out output = [netD(f) for f in fake_concat] real_label = gluon.utils.split_and_load( nd.ones(out_i_shape), ctx) real_latent_label = gluon.utils.split_and_load( nd.ones(out_l_shape), ctx) errG2 = [ GAN_loss(r, f) for r, f in zip(rec_output, real_label) ] errR = [ L1_loss(r, f) * lambda1 for r, f in zip(real_out, fake_out) ] errG = [ 10 * GAN_loss(r, f) for r, f in zip(output2, real_latent_label) ] # +errG2+errR for e1, e2, e3 in zip(errG, errG2, errR): e = e1 + e2 + e3 e.backward() trainerDe.step(batch.data[0].shape[0]) trainerEn.step(batch.data[0].shape[0]) nd.waitall() errD = (errD_real[0] + errD_fake[0]) * 0.5 errD2 = (errD2_real[0] + errD2_fake[0]) * 0.5 loss_rec_G2.append(nd.mean(errG2[0]).asscalar()) loss_rec_G.append( nd.mean(nd.mean(errG[0])).asscalar() - nd.mean(errG2[0]).asscalar() - nd.mean(errR[0]).asscalar()) loss_rec_D.append(nd.mean(errD[0]).asscalar()) loss_rec_R.append(nd.mean(errR[0]).asscalar()) loss_rec_D2.append(nd.mean(errD2[0]).asscalar()) _, acc2 = metric2.get() name, acc = metric.get() acc_rec.append(acc) acc2_rec.append(acc2) # Print log infomation every ten batches if iter % 10 == 0: _, acc2 = metric2.get() name, acc = metric.get() _, accStrong = metricStrong.get() logging.info('speed: {} samples/s'.format( batch_size / (time.time() - btic))) #print(errD) #logging.info('discriminator loss = %f, D2 loss = %f, generator loss = %f, G2 loss = %f, SD loss = %f, D acc = %f , D2 acc = %f, DS acc = %f, reconstruction error= %f at iter %d epoch %d' # % (nd.mean(errD[0]).asscalar(),nd.mean(errD2[0]).asscalar(), # nd.mean(errG[0]-errG2[0]-errR[0]).asscalar(),nd.mean(errG2[0]).asscalar(),nd.mean(strongerr[0]).asscalar() ,acc,acc2,accStrong[0],nd.mean(errR[0]).asscalar() ,iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() _, acc2 = metric2.get() #tp_file = open(expname + "_trainloss.txt", "a") #tp_file.write(str(nd.mean(errG2).asscalar()) + " " + str( # nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() - nd.mean(errR).asscalar()) + " " + str( # nd.mean(errD).asscalar()) + " " + str(nd.mean(errD2).asscalar()) + " " + str(nd.mean(errR).asscalar()) +" "+str(acc) + " " + str(acc2)+"\n") #tp_file.close() metric.reset() metric2.reset() train_data.reset() metricStrong.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) if epoch % 2 == 0: # and epoch>0: text_file = open(expname + "_validtest.txt", "a") filename = "checkpoints/" + expname + "_" + str( epoch) + "_D.params" netD.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_D2.params" netD2.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_En.params" netEn.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_De.params" netDe.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_SD.params" netDS.save_parameters(filename) fake_img1 = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1) fake_img2 = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1) fake_img3 = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1) fake_img4 = nd.concat(real_in[3], real_out[3], fake_out[3], dim=1) val_data.reset() text_file = open(expname + "_validtest.txt", "a") for vbatch in val_data: real_in = vbatch.data[0] real_out = vbatch.data[1] real_in = gluon.utils.split_and_load(real_in, ctx) real_out = gluon.utils.split_and_load(real_out, ctx) fake_latent = [netEn(r) for r in real_in] fake_out = [netDe(f) for f in fake_latent] for f, r in zip(fake_out, real_out): metricMSE.update([ f, ], [ r, ]) _, acc2 = metricMSE.get() toterrR = 0 for e in errR: toterrR += nd.mean(e).asscalar() text_file.write("%s %s %s\n" % (str(epoch), toterrR, str(acc2))) metricMSE.reset() return ([ loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2, loss_rec_G2, acc2_rec ])
def trainadnov(opt, train_data, val_data, ctx, networks, datasize): netEn = networks[0] netDe = networks[1] netD = networks[2] netD2 = networks[3] netDS = networks[4] trainerEn = networks[5] trainerDe = networks[6] trainerD = networks[7] trainerD2 = networks[8] trainerSD = networks[9] cep = opt.continueEpochFrom epochs = opt.epochs lambda1 = opt.lambda1 batch_size = opt.batch_size expname = opt.expname append = opt.append text_file = open(expname + "_trainloss.txt", "w") text_file.close() text_file = open(expname + "_validtest.txt", "w") text_file.close() GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() L1_loss = gluon.loss.L2Loss() metric = mx.metric.CustomMetric(facc) metricl = mx.metric.CustomMetric(facc) metricStrong = mx.metric.CustomMetric(facc) metric2 = mx.metric.MSE() metricMSE = mx.metric.MSE() loss_rec_G2 = [] acc2_rec = [] loss_rec_G = [] loss_rec_D = [] loss_rec_R = [] acc_rec = [] loss_rec_D2 = [] stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) lr = 2.0 * batch_size logging.basicConfig(level=logging.DEBUG) if cep == -1: cep = 0 else: netEn.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_En.params', ctx=ctx) netDe.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_De.params', ctx=ctx) netD.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_D.params', ctx=ctx) netD2.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_D2.params', ctx=ctx) netDS.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_SD.params', ctx=ctx) for epoch in range(cep + 1, epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 counter = 0 for batch in train_data: for i in range(iter): batch = train_data.next() #.data[0] ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) counter += opt.batch_size fake_latent = netEn(real_in) mu = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) real_latent = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out if epoch > 150: # negative mining mu = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) mu.attach_grad() for ep2 in range(1): # doing single gradient step with autograd.record(): eps2 = nd.tanh(mu) rec_output = netDS(netDe(eps2)) fake_label = nd.zeros(rec_output.shape, ctx=ctx) errGS = GAN_loss(rec_output, fake_label) errGS.backward() mu -= lr / mu.shape[0] * mu.grad # Update mu with SGD eps2 = nd.tanh(mu) with autograd.record(): # Train with fake image output = netD(fake_concat) output2 = netD2(fake_latent) fake_label = nd.zeros(output.shape, ctx=ctx) fake_latent_label = nd.zeros(output2.shape, ctx=ctx) eps = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) rec_output = netD(netDe(eps)) errD_fake = GAN_loss(rec_output, fake_label) errD_fake2 = GAN_loss(output, fake_label) errD2_fake = GAN_loss(output2, fake_latent_label) metric.update([ fake_label, ], [ rec_output, ]) metric2.update([ fake_latent_label, ], [ output2, ]) real_concat = nd.concat(real_in, real_out, dim=1) if append else real_out output = netD(real_concat) output2 = netD2(real_latent) real_label = nd.ones(output.shape, ctx=ctx) real_latent_label = nd.ones(output2.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD2_real = GAN_loss(output2, real_latent_label) errD = (errD_real + errD_fake) * 0.5 errD2 = (errD2_real + errD2_fake) * 0.5 totalerrD = errD + errD2 totalerrD.backward() metric.update([ real_label, ], [ output, ]) metric2.update([ real_latent_label, ], [ output2, ]) trainerD.step(batch.data[0].shape[0]) trainerD2.step(batch.data[0].shape[0]) with autograd.record(): # Train classifier strong_output = netDS(netDe(eps)) strong_real = netDS(fake_concat) errs1 = GAN_loss(strong_output, fake_label) errs2 = GAN_loss(strong_real, real_label) metricStrong.update([ fake_label, ], [ strong_output, ]) metricStrong.update([ real_label, ], [ strong_real, ]) strongerr = 0.5 * (errs1 + errs2) strongerr.backward() trainerSD.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): rec_output = netD(netDe(eps2)) fake_latent = (netEn(real_in)) output2 = netD2(fake_latent) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) real_latent_label = nd.ones(output2.shape, ctx=ctx) errG2 = GAN_loss(rec_output, real_label) errR = L1_loss(real_out, fake_out) * lambda1 errG = 10.0 * GAN_loss(output2, real_latent_label) + errG2 + errR errG.backward() trainerDe.step(batch.data[0].shape[0]) trainerEn.step(batch.data[0].shape[0]) loss_rec_G2.append(nd.mean(errG2).asscalar()) loss_rec_G.append( nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() - nd.mean(errR).asscalar()) loss_rec_D.append(nd.mean(errD).asscalar()) loss_rec_R.append(nd.mean(errR).asscalar()) loss_rec_D2.append(nd.mean(errD2).asscalar()) _, acc2 = metric2.get() name, acc = metric.get() acc_rec.append(acc) acc2_rec.append(acc2) # Print log infomation every ten batches if iter % 10 == 0: _, acc2 = metric2.get() name, acc = metric.get() _, accStrong = metricStrong.get() logging.info('speed: {} samples/s'.format(batch_size / (time.time() - btic))) logging.info( 'discriminator loss = %f, D2 loss = %f, generator loss = %f, G2 loss = %f, SD loss = %f, D acc = %f , D2 acc = %f, DS acc = %f, reconstruction error= %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errD2).asscalar(), nd.mean(errG - errG2 - errR).asscalar(), nd.mean(errG2).asscalar(), nd.mean(strongerr).asscalar(), acc, acc2, accStrong, nd.mean(errR).asscalar(), iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() _, acc2 = metric2.get() metric.reset() metric2.reset() train_data.reset() metricStrong.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) if epoch % 5 == 0: filename = "checkpoints/" + expname + "_" + str( epoch) + "_D.params" netD.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_D2.params" netD2.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_En.params" netEn.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_De.params" netDe.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_SD.params" netDS.save_parameters(filename) val_data.reset() text_file = open(expname + "_validtest.txt", "a") for vbatch in val_data: real_in = vbatch.data[0].as_in_context(ctx) real_out = vbatch.data[1].as_in_context(ctx) fake_latent = netEn(real_in) y = netDe(fake_latent) fake_out = y metricMSE.update([ fake_out, ], [ real_out, ]) _, acc2 = metricMSE.get() text_file.write( "%s %s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(), str(acc2), str(accStrong))) metricMSE.reset() if counter > datasize: break return [ loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2, loss_rec_G2, acc2_rec ]
def onelayer(self, x, layer): xx = F.tanh(layer[0](x)) #xx = nn.HybridLambda('tanh')(layer[0](x)) return layer[1](xx)
def gru(x, h, Wxr, Wxz, Whr, Whz, Wxh, Whh, br, bz, bh): r = nd.sigmoid(nd.dot(x, Wxr) + nd.dot(h, Whr) + br) z = nd.sigmoid(nd.dot(x, Wxz) + nd.dot(h, Whz) + bz) h̃ = nd.tanh(nd.dot(x, Wxh) + r * nd.dot(h, Whh) + bh) return z * h + (1 - z) * h̃
def oneforward(self, x, layer): return F.tanh(layer[0](x))
def manifold(self, x): n_layer = len(self.layers) for i in range(n_layer-1): x = F.tanh(self.layers[i][0](x)) return self.layers[n_layer-1][0](x)