def infer(self, head_emb, rel_emb, tail_emb): real_head, img_head = nd.split(head_emb, num_outputs=2, axis=-1) real_tail, img_tail = nd.split(tail_emb, num_outputs=2, axis=-1) real_rel, img_rel = nd.split(rel_emb, num_outputs=2, axis=-1) score = ( (real_head.expand_dims(axis=1) * real_rel.expand_dims(axis=0)).expand_dims( axis=2 ) * real_tail.expand_dims(axis=0).expand_dims(axis=0) + (img_head.expand_dims(axis=1) * real_rel.expand_dims(axis=0)).expand_dims( axis=2 ) * img_tail.expand_dims(axis=0).expand_dims(axis=0) + (real_head.expand_dims(axis=1) * img_rel.expand_dims(axis=0)).expand_dims( axis=2 ) * img_tail.expand_dims(axis=0).expand_dims(axis=0) - (img_head.expand_dims(axis=1) * img_rel.expand_dims(axis=0)).expand_dims( axis=2 ) * real_tail.expand_dims(axis=0).expand_dims(axis=0) ) return nd.sum(score, -1)
def _get_lstm_features(self, sentence): self.hidden = self.init_hidden() length = sentence.shape[0] embeds = self.word_embeds(sentence).reshape((length, 1, -1)) lstm_out, self.hidden = self.lstm(embeds, self.hidden) lstm_out = lstm_out.reshape((length, self.hidden_dim)) lstm_feats = self.hidden2tag(lstm_out) return nd.split(lstm_feats, num_outputs=length, axis=0, squeeze_axis=True)
def _get_lstm_features(self, sentence): self.hidden = self.init_hidden() length = sentence.shape[0] embeds = self.word_embeds(sentence).reshape((length, 1, -1)) lstm_out, self.hidden = self.lstm(embeds, self.hidden) lstm_out = lstm_out.reshape((length, self.hidden_dim)) lstm_feats = self.hidden2tag(lstm_out) return nd.split(lstm_feats, num_outputs=length, axis=0, squeeze_axis=True)
def subtract_imagenet_mean_preprocess_batch(batch): """Subtract ImageNet mean pixel-wise from a BGR image.""" batch = F.swapaxes(batch,0, 1) (r, g, b) = F.split(batch, num_outputs=3, axis=0) r = r - 123.680 g = g - 116.779 b = b - 103.939 batch = F.concat(b, g, r, dim=0) batch = F.swapaxes(batch,0, 1) return batch
def add_imagenet_mean_batch(batch): batch = F.swapaxes(batch,0, 1) (b, g, r) = F.split(batch, num_outputs=3, axis=0) r = r + 123.680 g = g + 116.779 b = b + 103.939 batch = F.concat(b, g, r, dim=0) batch = F.swapaxes(batch,0, 1) """ batch = denormalizer(batch) """ return batch
def refine_anchor_generator(arm_anchor_boxes,arm_loc_preds): ''' function: input: arm_anchor_boxes: shape (1,h*w*num_anchors[:layers],4) arm_loc_preds: shape (batch,h*w*num_loc_pred[:layers]) ''' batch_size = arm_loc_preds.shape[0] arm_anchor_boxes = nd.concat(*[arm_anchor_boxes]*batch_size,dim=0) #(batch,h*w*num_anchors[:layers],4) arm_anchor_boxes_bs = nd.split(data=arm_anchor_boxes,axis=2,num_outputs=4)#(batch,all_anchors,1)*4 al = arm_anchor_boxes_bs[0] # left top x at = arm_anchor_boxes_bs[1] # left top y ar = arm_anchor_boxes_bs[2] # right below x ab = arm_anchor_boxes_bs[3] # right below y aw = ar - al ah = ab - at ax = (al+ar)/2.0 ay = (at+ab)/2.0 arm_loc_preds = nd.reshape(data=arm_loc_preds,shape=(0,-1,4)) #(batch,h*w*num_anchors[:layers],4) arm_loc_preds_bs = nd.split(data=arm_loc_preds,axis=2,num_outputs=4) ox_preds = arm_anchor_boxes_bs[0] oy_preds = arm_anchor_boxes_bs[1] ow_preds = arm_anchor_boxes_bs[2] oh_preds = arm_anchor_boxes_bs[3] ## TODO: RCNN Paper object ox = ox_preds * aw * 0.1 + ax oy = oy_preds * ah * 0.1 + ay ow = nd.exp(ow_preds * 0.2) * aw oh = nd.exp(oh_preds * 0.2) * ah out0 = ox - ow / 2.0 out1 = oy - oh / 2.0 out2 = ox + ow / 2.0 out3 = oy + oh / 2.0 refine_anchor = nd.concat(out0,out1,out2,out3,dim=2) # refine_anchor = nd.split(data=refine_anchor,axis=0,num_outputs=batch_size) return refine_anchor # (batch,h*w*num_anchors[:layers],4)
def test_out_grads(): x = nd.ones((3, 5)) dx = nd.zeros_like(x) mark_variables([x], [dx]) da = None db = nd.array([1,2,3,4,5]) dc = nd.array([5,4,3,2,1]) with train_section(): a, b, c = nd.split(x, axis=0, num_outputs=3, squeeze_axis=True) backward([a, b, c], [da, db, dc]) assert (dx.asnumpy() == np.array( [[1,1,1,1,1], [1,2,3,4,5], [5,4,3,2,1]])).all()
def preprocess_batch(batch): batch = F.swapaxes(batch, 0, 1) (r, g, b) = F.split(batch, num_outputs=3, axis=0) batch = F.concat(b, g, r, dim=0) batch = F.swapaxes(batch, 0, 1) return batch
def tensor_save_bgrimage(tensor, filename, cuda=False): (b, g, r) = F.split(tensor, num_outputs=3, axis=0) tensor = F.concat(r, g, b, dim=0) tensor_save_rgbimage(tensor, filename, cuda)
def forward(self, input_vec, pop_art_hyper=None, loss=None, training=True): # print('************* ' + str(input_vec.shape[1]) + ' *************') # print('############# ' + str(input_vec.shape) + ' #############') assert input_vec.shape[1] == self.input_dimension # get inputs for every slot(including global) inputs = {} for slot in self.slots: inputs[slot] = input_vec[:, self.slot_dimension[slot][0]:self.slot_dimension[slot][1]] input_global = [] for seg in self.global_dimension: input_global.append(input_vec[:, seg[0]:seg[1]]) inputs['global'] = nd.concat(*input_global, dim=1) layer = [] # inputs -> first_hidden_layer if (not self.sort_input_vec) and self.state_feature != 'dip': layer.append([]) for slot in self.slots: layer[0].append(self.input_trans[slot](inputs[slot])) layer[0].append(self.input_trans['global'](inputs['global'])) elif self.state_feature == 'dip': sorted_inputs = [] for slot in self.slots: sorted_inputs.append(inputs[slot]) sorted_inputs.append(inputs['global']) layer.append(self.input_trans.forward(sorted_inputs, loss, training=training)) elif self.sort_input_vec: sorted_inputs = [] for slot in self.slots: tmp = inputs[slot][:, :-2].sort(is_ascend=False) if tmp.shape[1] < 20: tmp = nd.concat(tmp, nd.zeros((tmp.shape[0], 20 - tmp.shape[1]), ctx=CTX), dim=1) else: tmp = nd.slice_axis(tmp, axis=1, begin=0, end=20) sorted_inputs.append(nd.concat(tmp, inputs[slot][:, -2:], dim=1)) sorted_inputs.append(inputs['global']) layer.append(self.input_trans.forward(sorted_inputs, loss, training=training)) # hidden_layers for i in range(self.hidden_layers - 1): if self.recurrent_mode is False: # equal to 'layer.append(self.ma_trans[i](layer[-1], loss))' layer.append(self.ma_trans[i](layer[i], loss)) else: layer.append(self.ma_trans(layer[i], loss)) if self.share_last_layer is False: # dropout of last hidden layer for j in range(len(self.slots)): layer[-1][j] = self.local_out_drop_op(layer[-1][j]) layer[-1][-1] = self.global_out_drop_op(layer[-1][-1]) # last_hidden_layer -> outputs outputs = [] slotv_probs = [] attention = [] # tmp_ave = nd.zeros_like(layer[-1][0]) for i in range(len(self.slots) + 1): if self.use_dueling is False: outputs.append(self.output_trans[i](layer[-1][i])) else: if i < len(self.slots): cur_slotv_prob = self.output_trans_local_valueP.forward(layer[-1][i], training=training) cur_slotv_prob = nd.softmax(cur_slotv_prob) else: cur_slotv_prob = self.output_trans_global_valueP.forward(layer[-1][i], training=training) cur_slotv_prob = nd.softmax(cur_slotv_prob) if i < len(self.slots): cur_slot_prob = self.output_trans_local_slotP.forward(layer[-1][i], training=training) attention.append(self.attention_layer.forward(nd.concat(*[layer[-1][i], layer[-1][-1]], dim=1), training=training)) # tmp_ave = tmp_ave + layer[-1][i] else: cur_slot_prob = self.output_trans_global_slotP.forward(layer[-1][i], training=training) softmax_attention = nd.softmax(nd.concat(*attention)) split_softmax_attention = nd.split(softmax_attention, num_outputs=len(self.slots)) tmp_ave = nd.zeros_like(layer[-1][0]) for j in range(len(self.slots)): layer[-1][j] = layer[-1][j]*split_softmax_attention[j] tmp_ave = tmp_ave + layer[-1][j] slots_concat = nd.concat(*[tmp_ave, layer[-1][i]], dim=1) cur_slotv_prob = cur_slotv_prob*cur_slot_prob slotv_probs.append(cur_slotv_prob) if pop_art_hyper != None: sigma, sigma_prime, mu, mu_prime = pop_art_hyper # self.private_w.set_data((sigma/sigma_prime)*self.private_w.data()) # self.private_b.set_data((sigma*self.private_b.data() + mu - mu_prime)/sigma_prime) batch_slotv_prob = nd.softmax(nd.concat(*slotv_probs, dim=1)) batch_value = nd.squeeze((sigma/sigma_prime)*self.private_w.data()*(self.output_trans_value.forward(slots_concat, training=training))+((sigma*self.private_b.data() + mu - mu_prime)/sigma_prime)) else: batch_slotv_prob = nd.softmax(nd.concat(*slotv_probs, dim=1)) batch_value = nd.squeeze(self.private_w.data()*(self.output_trans_value.forward(slots_concat, training=training))+self.private_b.data()) # print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@') # print(batch_slotv_prob) # print(batch_slotv_prob.shape) # print(batch_value) # print(batch_value.shape) # exit(0) return batch_slotv_prob, batch_value
def train(pool_size, epochs, train_data, val_data, ctx, netEn, netDe, netD, trainerEn, trainerDe, trainerD, lambda1, batch_size, expname, append=True, useAE = False): text_file = open(expname + "_validtest.txt", "w") text_file.close() #netGT, netDT, _, _ = set_test_network(opt.depth, ctx, opt.lr, opt.beta1,opt.ndf, opt.ngf, opt.append) GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() L1_loss = gluon.loss.L2Loss() image_pool = imagePool.ImagePool(pool_size) metric = mx.metric.CustomMetric(facc) metric2 = mx.metric.MSE() loss_rec_G = [] loss_rec_D = [] loss_rec_R = [] acc_rec = [] stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) for epoch in range(epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 #print('learning rate : '+str(trainerD.learning_rate )) for batch in train_data: ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) soft_zero = 1e-10 fake_latent= netEn(real_in) fake_latent = np.squeeze(fake_latent) mu_lv = nd.split(fake_latent, axis=1, num_outputs=2) mu = (mu_lv[0]) lv = (mu_lv[1]) KL = 0.5*nd.nansum(1+lv-mu*mu-nd.exp(lv+soft_zero)) eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, 2048), ctx=ctx) z = mu + nd.exp(0.5*lv)*eps z = nd.expand_dims(nd.expand_dims(z,2),2) y = netDe(z) fake_out = y logloss = nd.nansum(real_in*nd.log(y+soft_zero)+ (1-real_in)*nd.log(1-y+soft_zero)) loss = -logloss-KL fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out with autograd.record(): # Train with fake image # Use image pooling to utilize history imagesi output = netD(fake_concat) fake_label = nd.zeros(output.shape, ctx=ctx) errD_fake = GAN_loss(output, fake_label) metric.update([fake_label, ], [output, ]) real_concat = nd.concat(real_in, real_out, dim=1) if append else real_out output = netD(real_concat) real_label = nd.ones(output.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD = (errD_real + errD_fake) * 0.5 errD.backward() metric.update([real_label, ], [output, ]) trainerD.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): fake_latent= np.squeeze(netEn(real_in)) mu_lv = nd.split(fake_latent, axis=1, num_outputs=2) mu = mu_lv[0] lv = mu_lv[1] KL = 0.5*nd.nansum(1+lv-mu*mu-nd.exp(lv+soft_zero)) eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, 2048), ctx=ctx) #KL = 0.5*nd.nansum(1+lv-mu*mu-nd.exp(lv+soft_zero)) z = mu + nd.exp(0.5*lv)*eps z = nd.expand_dims(nd.expand_dims(z,2),2) y = netDe(z) fake_out = y logloss = nd.nansum((real_in+1)*0.5*nd.log(0.5*(y+1)+soft_zero)+ (1-0.5*(real_in+1))*nd.log(1-0.5*(y+1)+soft_zero)) loss =-logloss-KL fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) errG = GAN_loss(output, real_label) + loss*lambda1 #L1_loss(real_out, fake_out) * lambda1 errR = logloss#L1_loss(real_out, fake_out) errG.backward() trainerDe.step(batch.data[0].shape[0]) trainerEn.step(batch.data[0].shape[0]) loss_rec_G.append(nd.mean(errG).asscalar()-nd.mean(errR).asscalar()*lambda1) loss_rec_D.append(nd.mean(errD).asscalar()) loss_rec_R.append(nd.mean(errR).asscalar()) name, acc = metric.get() acc_rec.append(acc) # Print log infomation every ten batches if iter % 10 == 0: name, acc = metric.get() logging.info('speed: {} samples/s'.format(batch_size / (time.time() - btic))) #print(errD) logging.info('discriminator loss = %f, generator loss = %f, binary training acc = %f reconstruction error= %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc,nd.mean(errR).asscalar() ,iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() metric.reset() train_data.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) if epoch%10 ==0: text_file = open(expname + "_validtest.txt", "a") filename = "checkpoints/"+expname+"_"+str(epoch)+"_D.params" netD.save_params(filename) filename = "checkpoints/"+expname+"_"+str(epoch)+"_En.params" netEn.save_params(filename) filename = "checkpoints/"+expname+"_"+str(epoch)+"_De.params" netDe.save_params(filename) fake_img1 = nd.concat(real_in[0],real_out[0], fake_out[0], dim=1) fake_img2 = nd.concat(real_in[1],real_out[1], fake_out[1], dim=1) fake_img3 = nd.concat(real_in[2],real_out[2], fake_out[2], dim=1) fake_img4 = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1) val_data.reset() text_file = open(expname + "_validtest.txt", "a") for vbatch in val_data: real_in = vbatch.data[0].as_in_context(ctx) real_out = vbatch.data[1].as_in_context(ctx) fake_latent= netEn(real_in) mu_lv = nd.split(fake_latent, axis=1, num_outputs=2) mu = mu_lv[0] lv = mu_lv[1] eps = nd.random_normal(loc=0, scale=1, shape=(batch_size/5, 2048,1,1), ctx=ctx) z = mu + nd.exp(0.5*lv)*eps y = netDe(z) fake_out = y KL = 0.5*nd.sum(1+lv-mu*mu-nd.exp(lv),axis=1) logloss = nd.sum(real_in*nd.log(y+soft_zero)+ (1-real_in)*nd.log(1-y+soft_zero), axis=1) loss = logloss+KL metric2.update([fake_out, ], [real_out, ]) _, acc2 = metric2.get() text_file.write("%s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(), str(acc2))) metric2.reset() fake_img1T = nd.concat(real_in[0],real_out[0], fake_out[0], dim=1) fake_img2T = nd.concat(real_in[1],real_out[1], fake_out[1], dim=1) fake_img3T = nd.concat(real_in[2],real_out[2], fake_out[2], dim=1) #fake_img4T = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1) fake_img = nd.concat(fake_img1,fake_img2, fake_img3,fake_img1T,fake_img2T, fake_img3T,dim=2) visual.visualize(fake_img) plt.savefig('outputs/'+expname+'_'+str(epoch)+'.png') text_file.close() return([loss_rec_D,loss_rec_G, loss_rec_R, acc_rec])