def k_fold_cross_valid(k, epochs, verbose_epoch, X_train, y_train, learning_rate, weight_decay, batch_size): """Conducts k-fold cross validation for the model.""" assert k > 1 fold_size = X_train.shape[0] // k train_loss_sum = 0.0 test_loss_sum = 0.0 for test_idx in range(k): X_val_test = X_train[test_idx * fold_size: (test_idx + 1) * fold_size, :] y_val_test = y_train[test_idx * fold_size: (test_idx + 1) * fold_size] val_train_defined = False for i in range(k): if i != test_idx: X_cur_fold = X_train[i * fold_size: (i + 1) * fold_size, :] y_cur_fold = y_train[i * fold_size: (i + 1) * fold_size] if not val_train_defined: X_val_train = X_cur_fold y_val_train = y_cur_fold val_train_defined = True else: X_val_train = nd.concat(X_val_train, X_cur_fold, dim=0) y_val_train = nd.concat(y_val_train, y_cur_fold, dim=0) net = get_net() train_loss = train(net, X_val_train, y_val_train, epochs, verbose_epoch, learning_rate, weight_decay, batch_size) train_loss_sum += train_loss test_loss = get_rmse_log(net, X_val_test, y_val_test) print("Test loss: %f" % test_loss) test_loss_sum += test_loss return train_loss_sum / k, test_loss_sum / k
def biLSTM(f_lstm, b_lstm, inputs, batch_size=None, dropout_x=0., dropout_h=0.): """Feature extraction through BiLSTM Parameters ---------- f_lstm : VariationalDropoutCell Forward cell b_lstm : VariationalDropoutCell Backward cell inputs : NDArray seq_len x batch_size dropout_x : float Variational dropout on inputs dropout_h : Not used Returns ------- outputs : NDArray Outputs of BiLSTM layers, seq_len x 2 hidden_dims x batch_size """ for f, b in zip(f_lstm, b_lstm): inputs = nd.Dropout(inputs, dropout_x, axes=[0]) # important for variational dropout fo, fs = f.unroll(length=inputs.shape[0], inputs=inputs, layout='TNC', merge_outputs=True) bo, bs = b.unroll(length=inputs.shape[0], inputs=inputs.flip(axis=0), layout='TNC', merge_outputs=True) f.reset(), b.reset() inputs = nd.concat(fo, bo.flip(axis=0), dim=2) return inputs
def _forward_alg(self, feats): # Do the forward algorithm to compute the partition function alphas = [[-10000.] * self.tagset_size] alphas[0][self.tag2idx[START_TAG]] = 0. alphas = nd.array(alphas) # Iterate through the sentence for feat in feats: alphas_t = [] # The forward variables at this timestep for next_tag in range(self.tagset_size): # broadcast the emission score: it is the same regardless of # the previous tag emit_score = feat[next_tag].reshape((1, -1)) # the ith entry of trans_score is the score of transitioning to # next_tag from i trans_score = self.transitions[next_tag].reshape((1, -1)) # The ith entry of next_tag_var is the value for the # edge (i -> next_tag) before we do log-sum-exp next_tag_var = alphas + trans_score + emit_score # The forward variable for this tag is log-sum-exp of all the # scores. alphas_t.append(log_sum_exp(next_tag_var)) alphas = nd.concat(*alphas_t, dim=0).reshape((1, -1)) terminal_var = alphas + self.transitions[self.tag2idx[STOP_TAG]] alpha = log_sum_exp(terminal_var) return alpha
def bilinear(x, W, y, input_size, seq_len, batch_size, num_outputs=1, bias_x=False, bias_y=False): """Do xWy Parameters ---------- x : NDArray (input_size x seq_len) x batch_size W : NDArray (num_outputs x ny) x nx y : NDArray (input_size x seq_len) x batch_size input_size : int input dimension seq_len : int sequence length batch_size : int batch size num_outputs : int number of outputs bias_x : bool whether concat bias vector to input x bias_y : bool whether concat bias vector to input y Returns ------- output : NDArray [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x] x batch_size """ if bias_x: x = nd.concat(x, nd.ones((1, seq_len, batch_size)), dim=0) if bias_y: y = nd.concat(y, nd.ones((1, seq_len, batch_size)), dim=0) nx, ny = input_size + bias_x, input_size + bias_y # W: (num_outputs x ny) x nx lin = nd.dot(W, x) if num_outputs > 1: lin = reshape_fortran(lin, (ny, num_outputs * seq_len, batch_size)) y = y.transpose([2, 1, 0]) # May cause performance issues lin = lin.transpose([2, 1, 0]) blin = nd.batch_dot(lin, y, transpose_b=True) blin = blin.transpose([2, 1, 0]) if num_outputs > 1: blin = reshape_fortran(blin, (seq_len, num_outputs, seq_len, batch_size)) return blin
def forward(self, x): if isinstance(x, np.ndarray): x = nd.array(x) if self._max_len > x.size: pad = nd.ones((self._max_len - x.size,)) * self._fill_value x = nd.concat(x, pad, dim=0) elif self._max_len < x.size: x = x[:self._max_len] return x
def train(input_variable, target_variable, encoder, decoder, teacher_forcing_ratio, encoder_optimizer, decoder_optimizer, criterion, max_length, ctx): with autograd.record(): loss = F.zeros((1,), ctx=ctx) encoder_hidden = encoder.initHidden(ctx) input_length = input_variable.shape[0] target_length = target_variable.shape[0] encoder_outputs, encoder_hidden = encoder( input_variable.expand_dims(0), encoder_hidden) if input_length < max_length: encoder_outputs = F.concat(encoder_outputs.flatten(), F.zeros((max_length - input_length, encoder.hidden_size), ctx=ctx), dim=0) else: encoder_outputs = encoder_outputs.flatten() decoder_input = F.array([SOS_token], ctx=ctx) decoder_hidden = encoder_hidden use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss = F.add(loss, criterion(decoder_output, target_variable[di])) print criterion(decoder_output, target_variable[di]) decoder_input = target_variable[di] # Teacher forcing else: # Without teacher forcing: use its own predictions as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) topi = decoder_output.argmax(axis=1) decoder_input = F.array([topi.asscalar()], ctx=ctx) loss = F.add(loss, criterion(decoder_output, target_variable[di])) if topi.asscalar() == EOS_token: break loss.backward() encoder_optimizer.step(1) decoder_optimizer.step(1) return loss.asscalar()/target_length
def _score_sentence(self, feats, tags): # Gives the score of a provided tag sequence score = nd.array([0]) tags = nd.concat(nd.array([self.tag2idx[START_TAG]]), *tags, dim=0) for i, feat in enumerate(feats): score = score + \ self.transitions[to_scalar(tags[i+1]), to_scalar(tags[i])] + feat[to_scalar(tags[i+1])] score = score + self.transitions[self.tag2idx[STOP_TAG], to_scalar(tags[int(tags.shape[0]-1)])] return score
def subtract_imagenet_mean_preprocess_batch(batch): """Subtract ImageNet mean pixel-wise from a BGR image.""" batch = F.swapaxes(batch,0, 1) (r, g, b) = F.split(batch, num_outputs=3, axis=0) r = r - 123.680 g = g - 116.779 b = b - 103.939 batch = F.concat(b, g, r, dim=0) batch = F.swapaxes(batch,0, 1) return batch
def forward(self, input, hidden, encoder_outputs): #input shape, (1,) embedded = self.embedding(input) if self.dropout_p > 0: embedded = self.dropout(embedded) attn_weights = F.softmax( self.attn(F.concat(embedded, hidden[0].flatten(), dim=1))) attn_applied = F.batch_dot(attn_weights.expand_dims(0), encoder_outputs.expand_dims(0)) output = F.concat(embedded.flatten(), attn_applied.flatten(), dim=1) output = self.attn_combine(output).expand_dims(0) for i in range(self.n_layers): output = F.relu(output) output, hidden = self.gru(output, hidden) output = self.out(output) return output, hidden, attn_weights
def add_imagenet_mean_batch(batch): batch = F.swapaxes(batch,0, 1) (b, g, r) = F.split(batch, num_outputs=3, axis=0) r = r + 123.680 g = g + 116.779 b = b + 103.939 batch = F.concat(b, g, r, dim=0) batch = F.swapaxes(batch,0, 1) """ batch = denormalizer(batch) """ return batch
def _viterbi_decode(self, feats): backpointers = [] # Initialize the viterbi variables in log space vvars = nd.full((1, self.tagset_size), -10000.) vvars[0, self.tag2idx[START_TAG]] = 0 for feat in feats: bptrs_t = [] # holds the backpointers for this step viterbivars_t = [] # holds the viterbi variables for this step for next_tag in range(self.tagset_size): # next_tag_var[i] holds the viterbi variable for tag i at the # previous step, plus the score of transitioning # from tag i to next_tag. # We don't include the emission scores here because the max # does not depend on them (we add them in below) next_tag_var = vvars + self.transitions[next_tag] best_tag_id = argmax(next_tag_var) bptrs_t.append(best_tag_id) viterbivars_t.append(next_tag_var[0, best_tag_id]) # Now add in the emission scores, and assign vvars to the set # of viterbi variables we just computed vvars = (nd.concat(*viterbivars_t, dim=0) + feat).reshape((1, -1)) backpointers.append(bptrs_t) # Transition to STOP_TAG terminal_var = vvars + self.transitions[self.tag2idx[STOP_TAG]] best_tag_id = argmax(terminal_var) path_score = terminal_var[0, best_tag_id] # Follow the back pointers to decode the best path. best_path = [best_tag_id] for bptrs_t in reversed(backpointers): best_tag_id = bptrs_t[best_tag_id] best_path.append(best_tag_id) # Pop off the start tag (we dont want to return that to the caller) start = best_path.pop() assert start == self.tag2idx[START_TAG] # Sanity check best_path.reverse() return path_score, best_path
def query(self, images): if self.pool_size == 0: return images return_images = [] for image in images: image = image.reshape(1,image.shape[0],image.shape[1],image.shape[2]) if self.num_imgs < self.pool_size: self.num_imgs = self.num_imgs + 1 self.images.append(image) return_images.append(image) else: p = random.uniform(0, 1) if p > 0.5: random_id = random.randint(0, self.pool_size - 1) # randint is inclusive tmp = self.images[random_id].copy() self.images[random_id] = image return_images.append(tmp) else: return_images.append(image) image_array = return_images[0].copyto(images.context) for image in return_images[1:]: image_array = nd.concat(image_array,image.copyto(images.context),dim=0) return image_array
def parse_net_output(Y,numClass, box_per_cell): pred = nd.transpose(Y,(0,2,3,1)) pred = pred.reshape((0,0,0,box_per_cell,numClass + 5)) #add one dim for boxes predCls = nd.slice_axis(pred, begin = 0, end = numClass,axis=-1) predObject = nd.slice_axis(pred,begin=numClass,end=numClass+1,axis=-1) #predObject = nd.sigmoid(predObject) predXY = nd.slice_axis(pred, begin = numClass + 1, end = numClass + 3, axis=-1) #predXY = nd.sigmoid(predXY) predWH = nd.slice_axis(pred, begin = numClass + 3, end = numClass + 5, axis=-1) #x,y = convert_xy(predXY) #w,h = convert_wh(predWH) #w = nd.clip(w,0,1) #h = nd.clip(h,0,1) #x0 = nd.clip(x, 0, 1) #y0 = nd.clip(y,0,1) #x1 = nd.clip(x0 + w,0,1) #y1 = np.clip(y0 + h, 0,1) #x = x0 #y = y0 #w = x1 - x0 #h = y1 - y0 XYWH = nd.concat(predXY,predWH,dim=-1) # pdb.set_trace() return predCls, predObject, XYWH
valid_ds_aug, label_valid_aug, angle_valid_aug = augment_data(valid_ds[0], valid_ds[1], valid_ds[2]) valid_ds = ( valid_ds_aug.astype('float32'), nd.array(label_valid_aug).astype('float32'), nd.array(angle_valid_aug).astype('float32') ) test_norm = [] angle_test_norm = [] for k in range(test.shape[0]): imag = test[k].reshape(shape=(1, test[k].shape[0], test[k].shape[1], test[k].shape[2])) test_norm.append(img_norm(imag)) for k in range(angles_test.shape[0]): angle_test_norm.append(test_norm(angles_test[k].asscalar())) test_ds = (nd.concat(*test_norm, dim=0).astype('float32'), ids, angle_test_norm) batch_size = 128 train_data = DataLoader(train_ds, batch_size, shuffle=True) valid_data = DataLoader(valid_ds, batch_size, shuffle=False) test_data = TestDataLoader(test_ds, batch_size) test_norm = [] for k in range(test.shape[0]): print(len(test_ds[0])) train(train_data, valid_data, test_data, batch_size)
batch[(be - bs) * w + b] = max(0, int(tgt[b][w]) - 1) if w < len(tgt[b]) else max_voc batch = nd.array(batch) # forward with autograd.record(): result = [] # CNN+encoder forward output, status = model(buffer, dpt) # RNN decoder forward for w in range(maxlen + expand_terminal): output, word, status = model.one_word(output, status) result.append(word) # make RNN output to sequencial result = F.concat(*result, dim=0) # make loss loss = loss_func(result, batch) loss_n.append(np.mean(loss.asnumpy())) del output, result # backward loss.backward() trainer.step(be - bs, ignore_stale_grad=True) n_iter += be - bs del loss, ids, tgt, maxlen, buffer, dpt, batch del bs, be, indexs print('%d/%d epoch loss=%f...' % (epoch, epochs, np.mean(loss_n))) loss_n = [] del n_iter, t_index, r_index del trainer, loss_func, loss_n, ID, TG, X_imgs
def generate_learned_samples(self): ''' Draw and generate data. Returns: `Tuple` data. The shape is ... - `mxnet.ndarray` of observed data points in training. - `mxnet.ndarray` of supervised data in training. - `mxnet.ndarray` of observed data points in test. - `mxnet.ndarray` of supervised data in test. ''' for epoch in range(self.epochs): training_batch_arr, test_batch_arr = None, None training_label_arr, test_label_arr = None, None for batch_size in range(self.batch_size): dir_key = np.random.randint( low=0, high=len(self.__training_file_path_list)) training_one_hot_arr = nd.zeros( (1, len(self.__training_file_path_list)), ctx=self.__ctx) training_one_hot_arr[0, dir_key] = 1 training_file_path_list = self.__split_at_intervals( self.__training_file_path_list[dir_key], start_pos=0, seq_interval=self.__at_intervals) training_data_arr, test_data_arr = None, None training_file_key = np.random.randint( low=0, high=len(training_file_path_list) - self.__seq_len) test_dir_key = np.random.randint( low=0, high=len(self.__test_file_path_list)) test_one_hot_arr = nd.zeros( (1, len(self.__test_file_path_list)), ctx=self.__ctx) test_one_hot_arr[0, test_dir_key] = 1 test_file_path_list = self.__split_at_intervals( self.__test_file_path_list[test_dir_key], start_pos=0, seq_interval=self.__at_intervals) test_file_key = np.random.randint( low=0, high=len(test_file_path_list) - self.__seq_len) for seq in range(self.__seq_len): seq_training_batch_arr = self.__image_extractor.extract( path=training_file_path_list[training_file_key + seq], ) seq_training_batch_arr = self.pre_normalize( seq_training_batch_arr) seq_training_batch_arr = nd.expand_dims( seq_training_batch_arr, axis=0) seq_test_batch_arr = self.__image_extractor.extract( path=test_file_path_list[test_file_key + seq], ) seq_test_batch_arr = self.pre_normalize(seq_test_batch_arr) seq_test_batch_arr = nd.expand_dims(seq_test_batch_arr, axis=0) if training_data_arr is not None: training_data_arr = nd.concat(training_data_arr, seq_training_batch_arr, dim=0) else: training_data_arr = seq_training_batch_arr if test_data_arr is not None: test_data_arr = nd.concat(test_data_arr, seq_test_batch_arr, dim=0) else: test_data_arr = seq_test_batch_arr training_data_arr = nd.expand_dims(training_data_arr, axis=0) test_data_arr = nd.expand_dims(test_data_arr, axis=0) if training_batch_arr is not None: training_batch_arr = nd.concat(training_batch_arr, training_data_arr, dim=0) else: training_batch_arr = training_data_arr if test_batch_arr is not None: test_batch_arr = nd.concat(test_batch_arr, test_data_arr, dim=0) else: test_batch_arr = test_data_arr if training_label_arr is not None: training_label_arr = nd.concat(training_label_arr, training_one_hot_arr, dim=0) else: training_label_arr = training_one_hot_arr if test_label_arr is not None: test_label_arr = nd.concat(test_label_arr, test_one_hot_arr, dim=0) else: test_label_arr = test_one_hot_arr if self.__noiseable_data is not None: training_batch_arr = self.__noiseable_data.noise( training_batch_arr) yield training_batch_arr, training_label_arr, test_batch_arr, test_label_arr
def cat(seq, dim): return nd.concat(*seq, dim=dim)
def trainadnov(opt, train_data, val_data, ctx, networks): netEn = networks[0] netDe = networks[1] netD = networks[2] netD2 = networks[3] netDS = networks[4] trainerEn = networks[5] trainerDe = networks[6] trainerD = networks[7] trainerD2 = networks[8] trainerSD = networks[9] cep = opt.continueEpochFrom epochs = opt.epochs lambda1 = opt.lambda1 batch_size = opt.batch_size expname = opt.expname append = opt.append text_file = open(expname + "_trainloss.txt", "w") text_file.close() text_file = open(expname + "_validtest.txt", "w") text_file.close() GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() L1_loss = gluon.loss.L2Loss() metric = mx.metric.CustomMetric(facc) metricl = mx.metric.CustomMetric(facc) metricStrong = mx.metric.CustomMetric(facc) metric2 = mx.metric.MSE() loss_rec_G2 = [] acc2_rec = [] loss_rec_G = [] loss_rec_D = [] loss_rec_R = [] acc_rec = [] loss_rec_D2 = [] stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) lr = 2.0 * batch_size logging.basicConfig(level=logging.DEBUG) if cep == -1: cep = 0 else: netEn.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_En.params', ctx=ctx) netDe.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_De.params', ctx=ctx) netD.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_D.params', ctx=ctx) netD2.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_D2.params', ctx=ctx) netDS.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_SD.params', ctx=ctx) for epoch in range(cep + 1, epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 for batch in train_data: ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) fake_latent = netEn(real_in) mu = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) real_latent = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out if epoch > 150: # negative mining mu = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) mu.attach_grad() for ep2 in range(1): # doing single gradient step with autograd.record(): eps2 = nd.tanh(mu) rec_output = netDS(netDe(eps2)) fake_label = nd.zeros(rec_output.shape, ctx=ctx) errGS = GAN_loss(rec_output, fake_label) errGS.backward() mu -= lr / mu.shape[0] * mu.grad # Update mu with SGD eps2 = nd.tanh(mu) with autograd.record(): # Train with fake image output = netD(fake_concat) output2 = netD2(fake_latent) fake_label = nd.zeros(output.shape, ctx=ctx) fake_latent_label = nd.zeros(output2.shape, ctx=ctx) eps = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) rec_output = netD(netDe(eps)) errD_fake = GAN_loss(rec_output, fake_label) errD_fake2 = GAN_loss(output, fake_label) errD2_fake = GAN_loss(output2, fake_latent_label) metric.update([ fake_label, ], [ rec_output, ]) metric2.update([ fake_latent_label, ], [ output2, ]) real_concat = nd.concat(real_in, real_out, dim=1) if append else real_out output = netD(real_concat) output2 = netD2(real_latent) real_label = nd.ones(output.shape, ctx=ctx) real_latent_label = nd.ones(output2.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD2_real = GAN_loss(output2, real_latent_label) errD = (errD_real + errD_fake) * 0.5 errD2 = (errD2_real + errD2_fake) * 0.5 totalerrD = errD + errD2 totalerrD.backward() metric.update([ real_label, ], [ output, ]) metric2.update([ real_latent_label, ], [ output2, ]) trainerD.step(batch.data[0].shape[0]) trainerD2.step(batch.data[0].shape[0]) with autograd.record(): # Train classifier strong_output = netDS(netDe(eps)) strong_real = netDS(fake_concat) errs1 = GAN_loss(strong_output, fake_label) errs2 = GAN_loss(strong_real, real_label) metricStrong.update([ fake_label, ], [ strong_output, ]) metricStrong.update([ real_label, ], [ strong_real, ]) strongerr = 0.5 * (errs1 + errs2) strongerr.backward() trainerSD.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): rec_output = netD(netDe(eps2)) fake_latent = (netEn(real_in)) output2 = netD2(fake_latent) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) real_latent_label = nd.ones(output2.shape, ctx=ctx) errG2 = GAN_loss(rec_output, real_label) errR = L1_loss(real_out, fake_out) * lambda1 errG = 10.0 * GAN_loss(output2, real_latent_label) + errG2 + errR errG.backward() trainerDe.step(batch.data[0].shape[0]) trainerEn.step(batch.data[0].shape[0]) loss_rec_G2.append(nd.mean(errG2).asscalar()) loss_rec_G.append( nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() - nd.mean(errR).asscalar()) loss_rec_D.append(nd.mean(errD).asscalar()) loss_rec_R.append(nd.mean(errR).asscalar()) loss_rec_D2.append(nd.mean(errD2).asscalar()) _, acc2 = metric2.get() name, acc = metric.get() acc_rec.append(acc) acc2_rec.append(acc2) # Print log infomation every ten batches if iter % 10 == 0: _, acc2 = metric2.get() name, acc = metric.get() _, accStrong = metricStrong.get() logging.info('speed: {} samples/s'.format(batch_size / (time.time() - btic))) logging.info( 'discriminator loss = %f, D2 loss = %f, generator loss = %f, G2 loss = %f, SD loss = %f, D acc = %f , D2 acc = %f, DS acc = %f, reconstruction error= %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errD2).asscalar(), nd.mean(errG - errG2 - errR).asscalar(), nd.mean(errG2).asscalar(), nd.mean(strongerr).asscalar(), acc, acc2, accStrong, nd.mean(errR).asscalar(), iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() _, acc2 = metric2.get() metric.reset() metric2.reset() train_data.reset() metricStrong.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) if epoch % 5 == 0: filename = "checkpoints/" + expname + "_" + str( epoch) + "_D.params" netD.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_D2.params" netD2.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_En.params" netEn.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_De.params" netDe.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_SD.params" netDS.save_params(filename) val_data.reset() text_file = open(expname + "_validtest.txt", "a") for vbatch in val_data: real_in = vbatch.data[0].as_in_context(ctx) real_out = vbatch.data[1].as_in_context(ctx) fake_latent = netEn(real_in) y = netDe(fake_latent) fake_out = y metricMSE.update([ fake_out, ], [ real_out, ]) _, acc2 = metricMSE.get() text_file.write( "%s %s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(), str(acc2), str(accStrong))) metricMSE.reset() return [ loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2, loss_rec_G2, acc2_rec ]
group_begin = time.monotonic() epoch_begin_time = time.monotonic() for batch_id, batch in enumerate(iterator): data = batch.data[0].as_in_context(ctx) # Data: Images labels = batch.label[0].as_in_context(ctx) # Data: Labels all_labels.extend(labels.asnumpy()) semantic_vectors = Resnet50(data) out1 = low_net(semantic_vectors[0]) out2 = med_net(semantic_vectors[1]) out3 = high_net(semantic_vectors[2]) combined = nd.concat(out1, out2, out3).asnumpy() print(" >> %d" % len(combined)) # print(len(out3)) for row in combined: all_features.append(row) # print(len(all_features)) # for row in all_features: # print(len(row)) # write_features_to_file(all_features, all_labels, feature_file) # -------------------------------------------------------------------- # -----------------------------< End >-------------------------------- # --------------------------------------------------------------------
def backward(self, out_grads=None): #print('in backward') assert self.binded and self.params_initialized #tmp_ctx = self._ctx_cpu tmp_ctx = self._ctx_single_gpu fc7_outs = [] ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context))) #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu()) for i, _module in enumerate(self._arcface_modules): _fc7 = _module.get_outputs(merge_multi_context=True)[0] fc7_outs.append(_fc7) _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx) ctx_fc7_max[:, i] = _fc7_max local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1)) nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max) global_fc7_max = local_fc7_max #local_fc7_sum = None local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size, 1)) local_fc7_sum[:, :] = 0.0 for i, _module in enumerate(self._arcface_modules): _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max) fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max) fc7_outs[i] = nd.exp(fc7_outs[i]) _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx) local_fc7_sum += _sum global_fc7_sum = local_fc7_sum if self._iter % self._frequent == 0: #_ctx = self._context[-1] _ctx = self._ctx_cpu _probs = [] for i, _module in enumerate(self._arcface_modules): _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d' % i, fc7_outs[i]) _probs.append(_prob) fc7_prob = self.get_ndarray( _ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes * len(self._context))) nd.concat(*_probs, dim=1, out=fc7_prob) fc7_pred = nd.argmax(fc7_prob, axis=1) pd = fc7_pred.asnumpy().astype('int32') local_label = self.global_label - self._local_class_start #local_label = self.get_ndarray2(_ctx, 'test_label', local_label) _pred = nd.equal(fc7_pred, local_label) print('fc7_acc [%d]: %f' % (self._iter, nd.mean(_pred).asnumpy()[0])) #local_fc1_grad = [] #fc1_grad_ctx = self._ctx_cpu fc1_grad_ctx = self._ctx_single_gpu local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size, self._emb_size)) local_fc1_grad[:, :] = 0.0 for i, _module in enumerate(self._arcface_modules): _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum) fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum) a = i * self._ctx_num_classes b = (i + 1) * self._ctx_num_classes _label = self.global_label - self._ctx_class_start[i] _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label) onehot_label = self.get_ndarray( fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes)) nd.one_hot(_label, depth=self._ctx_num_classes, on_value=1.0, off_value=0.0, out=onehot_label) fc7_outs[i] -= onehot_label _module.backward(out_grads=[fc7_outs[i]]) #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu()) ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d' % i, _module.get_input_grads()[0]) local_fc1_grad += ctx_fc1_grad global_fc1_grad = local_fc1_grad self._curr_module.backward(out_grads=[global_fc1_grad])
def trainAE(opt, train_data, val_data, ctx, networks): netEn = networks[0] netDe = networks[1] trainerEn = networks[5] trainerDe = networks[6] epochs = opt.epochs batch_size = opt.batch_size expname = opt.expname text_file = open(expname + "_trainloss.txt", "w") text_file.close() text_file = open(expname + "_validtest.txt", "w") text_file.close() L1_loss = gluon.loss.L2Loss() metric2 = mx.metric.MSE() loss_rec_G = [] loss_rec_D = [] loss_rec_R = [] acc_rec = [] loss_rec_D2 = [] stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) for epoch in range(epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 for batch in train_data: real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) with autograd.record(): fake_out = netDe(netEn(real_in)) errR = L1_loss(real_out, fake_out) errR.backward() trainerDe.step(batch.data[0].shape[0]) trainerEn.step(batch.data[0].shape[0]) loss_rec_R.append(nd.mean(errR).asscalar()) if iter % 10 == 0: logging.info('speed: {} samples/s'.format(batch_size / (time.time() - btic))) logging.info('reconstruction error= %f at iter %d epoch %d' % (nd.mean(errR).asscalar(), iter, epoch)) iter = iter + 1 btic = time.time() text_tl = open(expname + "_trainloss.txt", "a") text_tl.write('%f %f %f %f %f %f %f ' % (0, 0, 0, 0, 0, nd.mean(errR).asscalar(), epoch)) text_file.close() train_data.reset() if epoch % 10 == 0: filename = "checkpoints/" + expname + "_" + str( epoch) + "_En.params" netEn.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_De.params" netDe.save_params(filename) fake_img1 = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1) fake_img2 = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1) fake_img3 = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1) val_data.reset() text_file = open(expname + "_validtest.txt", "a") for vbatch in val_data: real_in = vbatch.data[0].as_in_context(ctx) real_out = vbatch.data[1].as_in_context(ctx) fake_out = netDe(netEn(real_in)) metric2.update([ fake_out, ], [ real_out, ]) _, acc2 = metric2.get() text_file.write("%s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(), str(acc2))) metric2.reset() fake_img1T = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1) fake_img2T = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1) fake_img3T = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1) fake_img = nd.concat(fake_img1, fake_img2, fake_img3, fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + '_' + str(epoch) + '.png') text_file.close() return ([loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2])
def make_grid(tensor, nrow=8, padding=2, normalize=False, range=None, scale_each=False, pad_value=0): if not (is_ndarray(tensor) or (isinstance(tensor, list) and all(is_ndarray(t) for t in tensor))): raise TypeError('tensor or list of tensors expected, got {}'.format( type(tensor))) # if list of tensors, convert to a 4D mini-batch Tensor if isinstance(tensor, list): tensor = nd.stack(tensor, dim=0) if tensor.ndim == 2: # single image H x W tensor = nd.expand_dims(tensor, axis=0) if tensor.ndim == 3: # single image if tensor.shape[0] == 1: # if single-channel, convert to 3-channel tensor = nd.concat(tensor, tensor, tensor, dim=0) tensor = nd.expand_dims(tensor, axis=0) if tensor.ndim == 4 and tensor.shape[1] == 1: # single-channel images tensor = nd.concat(tensor, tensor, tensor, dim=1) if normalize is True: tensor = tensor.copy() # avoid modifying tensor in-place if range is not None: assert isinstance( range, tuple ), "range has to be a tuple (min, max) if specified. min and max are numbers" def norm_ip(img, min, max): nd.clip(img, min, max) img += (-min) img /= (max - min + 1e-5) #img.add_(-min).div_(max - min + 1e-5) def norm_range(t, range): if range is not None: norm_ip(t, range[0], range[1]) else: norm_ip(t, float(t.min().asscalar()), float(t.max().asscalar())) if scale_each is True: for t in tensor: # loop over mini-batch dimension norm_range(t, range) else: norm_range(tensor, range) if tensor.shape[0] == 1: return tensor.reshape((-3, -2)) # make the mini-batch of images into a grid nmaps = tensor.shape[0] # 我们截取的mini_batch大小 # print(nmaps) xmaps = min(nrow, nmaps) # 输入的参数 ymaps = int(math.ceil(float(nmaps) / xmaps)) # 算列数向下取整 height, width = int(tensor.shape[2] + padding), int( tensor.shape[3] + padding) # 图片显示的高宽 num_channels = tensor.shape[1] # 图像通道数 grid = nd.full( (num_channels, height * ymaps + padding, width * xmaps + padding), pad_value) # 创建一个全为零的通道数等于输入图片,高宽等于图片高宽分别乘以行列数加上图片的间隔 k = 0 for y in irange(ymaps): for x in irange(xmaps): if k >= nmaps: break grid[:, x * width + padding:x * width + padding + width - padding, y * height + padding:y * height + padding + height - padding] = tensor[k] k = k + 1 return grid
def forward_dense_prediction(self, data): """Returns the output of a forward pass for dense prediction tasks.""" _, guidance_large, data_small = data height, width = guidance_large.shape[2:] # Upsample data_small with nearest neighbors if applicable. if data_small.shape[2:] == guidance_large.shape[2:]: data_small_upsampled = data_small else: # Use skimage as mx.nd.UpSampling allows only for one scale factor. batch_size = guidance_large.shape[0] data_small_upsampled = nd.zeros( (batch_size, data_small.shape[1], height, width), ctx=data_small.context) for batch_num in range(batch_size): data_help = skimage.transform.resize(np.transpose( data_small[batch_num].asnumpy(), (1, 2, 0)), (height, width), order=0, anti_aliasing=False, mode='constant') data_small_upsampled[batch_num] = nd.array( np.transpose(data_help, (2, 0, 1)), ctx=data_small.context) # Generate features. with data_small.context: features = self.feature_generator(guidance_large) # Scale spatial features by height/width to get invariance to size. with data_small.context: spatial_scaling = nd.array([[[[width]], [[height]]]]) spatial_features = spatial_scaling * self.feature_factor_spatial.data( ) * features[:, :2] # Center remaining features if applicable. remaining_features = features[:, 2:] if self.data_mean is not None: remaining_features = ( remaining_features - self.guidance_mean.copyto(guidance_large.context)) # Scale remaining features and pass through embedding network. remaining_features = self.feature_factor_intensity.data( ) * remaining_features remaining_features = self.embedding(remaining_features) remaining_features = self.batchnorm(remaining_features) # Concatenate and reshape features. features = nd.concat(spatial_features, remaining_features, dim=1) features = features.reshape([0, 0, -1]) features_size = features.shape[-1] # Center input data if applicable. if self.data_mean is not None: data_small_upsampled = data_small_upsampled - self.data_mean.copyto( data_small.context) # Reshape input data. data_small_upsampled = data_small_upsampled.reshape([0, 0, -1]) # Pass small data through permutohedral convolution. data_large = self.convolutions(data_small_upsampled, features, 0, features_size, 0, features_size, self.weight_factor) # Reshape output data. data_large = data_large.reshape([0, 0, height, width]) # Revert centering if applicable. if self.data_mean is not None: data_large = data_large + self.data_mean.copyto(data_large.context) return data_large
def forward_colorization(self, data): """Returns the output of a forward pass for colorization.""" guidance_small, guidance_large, data_small = data height, width = guidance_large.shape[2:] # Upsample data_small and guidance_small with nearest neighbors. # Use skimage as mx.nd.UpSampling allows only for one scale factor. batch_size = guidance_large.shape[0] data_small_upsampled = nd.zeros( (batch_size, data_small.shape[1], height, width), ctx=data_small.context) for batch_num in range(batch_size): data_help = skimage.transform.resize(np.transpose( data_small[batch_num].asnumpy(), (1, 2, 0)), (height, width), order=0, anti_aliasing=False, mode='constant') data_small_upsampled[batch_num] = nd.array(np.transpose( data_help, (2, 0, 1)), ctx=data_small.context) guidance_small_upsampled = nd.zeros( (batch_size, guidance_small.shape[1], height, width), ctx=data_small.context) for batch_num in range(batch_size): guidance_help = skimage.transform.resize(np.transpose( guidance_small[batch_num].asnumpy(), (1, 2, 0)), (height, width), order=0, anti_aliasing=False, mode='constant') guidance_small_upsampled[batch_num] = nd.array( np.transpose(guidance_help, (2, 0, 1)), ctx=data_small.context) # Generate features for small input data. with data_small.context: features_small = self.feature_generator(guidance_small_upsampled) # Scale spatial features by height/width to get invariance to size. with data_small.context: spatial_scaling = nd.array([[[[width]], [[height]]]]) spatial_features = spatial_scaling * self.feature_factor_spatial.data( ) * features_small[:, :2] # Center remaining features if applicable. remaining_features = features_small[:, 2:] if self.data_mean is not None: remaining_features = remaining_features - self.guidance_mean.copyto( guidance_large.context) # Scale remaining feature and pass through embedding network. remaining_features = self.feature_factor_intensity.data( ) * remaining_features remaining_features = self.embedding(remaining_features) remaining_features = self.batchnorm(remaining_features) # Concatenate and reshape features_small. features_small = nd.concat(spatial_features, remaining_features, dim=1) features_small = features_small.reshape([0, 0, -1]) # Generate features for large output data. with data_small.context: features_large = self.feature_generator(guidance_large) # Scale spatial features by height/width to get invariance to size. spatial_features = spatial_scaling * self.feature_factor_spatial.data( ) * features_large[:, :2] # Center remaining features if applicable. remaining_features = features_large[:, 2:] if self.data_mean is not None: remaining_features = remaining_features - self.guidance_mean.copyto( guidance_large.context) # Center remaining features and pass through embedding network. remaining_features = self.feature_factor_intensity.data( ) * remaining_features remaining_features = self.embedding(remaining_features) remaining_features = self.batchnorm(remaining_features) # Concatenate and reshape features_large. features_large = nd.concat(spatial_features, remaining_features, dim=1) features_large = features_large.reshape([0, 0, -1]) # Concatenate input and output features. features = nd.concat(features_small, features_large, dim=2) features_in_size = features_small.shape[-1] features_out_size = features_large.shape[-1] # Reshape input data and guidance images. data_small_upsampled = data_small_upsampled.reshape([0, 0, -1]) guidance_small_upsampled = guidance_small_upsampled.reshape([0, 0, -1]) guidance_large = guidance_large.reshape([0, 0, -1]) # Compute offset between data_small and guidance_small offset_small = data_small_upsampled - guidance_small_upsampled # Pass offset_small through permutohedral convolutions. offset_large = self.convolutions(offset_small, features, 0, features_in_size, features_in_size, features_out_size, self.weight_factor) # Generate output data from estimated offset. data_large = offset_large + guidance_large return data_large.reshape([0, 0, height, width])
# ## Model 2: Distance to Administrator and Instructor as Additional Features # In[ ]: X_2 = nd.zeros((A.shape[0], 2)) node_distance_instructor = shortest_path_length(zkc.network, target=33) node_distance_administrator = shortest_path_length(zkc.network, target=0) for node in zkc.network.nodes(): X_2[node][0] = node_distance_administrator[node] X_2[node][1] = node_distance_instructor[node] # In[ ]: X_2 = nd.concat(X_1, X_2) model_2, features_2 = build_model(A, X_2) model_2(X_2) # # Train and Test Models # In[ ]: get_ipython().run_line_magic('time', '') from mxnet import autograd from mxnet.gluon import Trainer from mxnet.ndarray import sum as ndsum import numpy as np def train(model, features, X, X_train, y_train, epochs):
def traincvpr18(opt, train_data, val_data, ctx, networks): netEn = networks[0] netDe = networks[1] netD = networks[2] trainerEn = networks[5] trainerDe = networks[6] trainerD = networks[7] epochs = opt.epochs lambda1 = opt.lambda1 batch_size = opt.batch_size expname = opt.expname append = opt.append text_file = open(expname + "_trainloss.txt", "w") text_file.close() text_file = open(expname + "_validtest.txt", "w") text_file.close() GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() L1_loss = gluon.loss.L2Loss() metric = mx.metric.CustomMetric(facc) metricl = mx.metric.CustomMetric(facc) metric2 = mx.metric.MSE() loss_rec_G2 = [] loss_rec_G = [] loss_rec_D = [] loss_rec_R = [] acc_rec = [] loss_rec_D2 = [] stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) for epoch in range(epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 for batch in train_data: ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) fake_latent = netEn(real_in) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out with autograd.record(): # Train with fake image # Use image pooling to utilize history imagesi output = netD(fake_concat) fake_label = nd.zeros(output.shape, ctx=ctx) errD_fake = GAN_loss(output, fake_label) metric.update([ fake_label, ], [ output, ]) real_concat = nd.concat(real_in, real_out, dim=1) if append else real_out output = netD(real_concat) real_label = nd.ones(output.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD = (errD_real + errD_fake) * 0.5 errD.backward() metric.update([ real_label, ], [ output, ]) trainerD.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): fake_latent = (netEn(real_in)) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) errG = GAN_loss( output, real_label) + L1_loss(real_out, fake_out) * lambda1 errR = L1_loss(real_out, fake_out) errG.backward() trainerDe.step(batch.data[0].shape[0]) trainerEn.step(batch.data[0].shape[0]) loss_rec_G.append( nd.mean(errG).asscalar() - nd.mean(errR).asscalar() * lambda1) loss_rec_D.append(nd.mean(errD).asscalar()) loss_rec_R.append(nd.mean(errR).asscalar()) name, acc = metric.get() acc_rec.append(acc) # Print log infomation every ten batches if iter % 10 == 0: name, acc = metric.get() logging.info('speed: {} samples/s'.format(batch_size / (time.time() - btic))) logging.info( 'discriminator loss = %f, generator loss = %f, binary training acc = %f , reconstruction error= %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc, nd.mean(errR).asscalar(), iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() _, acc2 = metricl.get() text_tl = open(expname + "_trainloss.txt", "a") text_tl.write('%f %f %f %f %f %f %f ' % (nd.mean(errD).asscalar(), 0, nd.mean(errG).asscalar(), acc, 0, nd.mean(errR).asscalar(), epoch)) text_file.close() metricl.reset() metric.reset() train_data.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) if epoch % 10 == 0: filename = "checkpoints/" + expname + "_" + str( epoch) + "_D.params" netD.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_En.params" netEn.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_De.params" netDe.save_params(filename) fake_img1 = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1) fake_img2 = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1) fake_img3 = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1) val_data.reset() text_file = open(expname + "_validtest.txt", "a") for vbatch in val_data: real_in = vbatch.data[0].as_in_context(ctx) real_out = vbatch.data[1].as_in_context(ctx) fake_latent = netEn(real_in) y = netDe(fake_latent) fake_out = y metric2.update([ fake_out, ], [ real_out, ]) _, acc2 = metric2.get() text_file.write("%s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(), str(acc2))) metric2.reset() fake_img1T = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1) fake_img2T = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1) fake_img3T = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1) fake_img = nd.concat(fake_img1, fake_img2, fake_img3, fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + '_' + str(epoch) + '.png') text_file.close() return [loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2]
def select_action( self, possible_action_arr, possible_predicted_q_arr, possible_reward_value_arr, possible_next_q_arr, possible_meta_data_arr=None ): ''' Select action by Q(state, action). Args: possible_action_arr: Tensor of actions. possible_predicted_q_arr: Tensor of Q-Values. possible_reward_value_arr: Tensor of reward values. possible_next_q_arr: Tensor of Q-Values in next time. possible_meta_data_arr: `mxnet.ndarray.NDArray` or `np.array` of meta data of the actions. Retruns: Tuple(`np.ndarray` of action., Q-Value) ''' key_arr = self.select_action_key(possible_action_arr, possible_predicted_q_arr) meta_data_arr = None if possible_meta_data_arr is not None: for i in range(possible_meta_data_arr.shape[0]): _meta_data_arr = possible_meta_data_arr[i, key_arr[i]] if i == 0: if isinstance(_meta_data_arr, nd.NDArray) is True: meta_data_arr = nd.expand_dims(_meta_data_arr, axis=0) else: meta_data_arr = np.expand_dims(_meta_data_arr, axis=0) else: if isinstance(_meta_data_arr, nd.NDArray) is True: meta_data_arr = nd.concat( meta_data_arr, nd.expand_dims(_meta_data_arr, axis=0), dim=0 ) else: meta_data_arr = np.concatenate( [ meta_data_arr, np.expand_dims(_meta_data_arr, axis=0), ], axis=0 ) action_arr = None predicted_q_arr = None reward_value_arr = None next_q_arr = None for i in range(possible_action_arr.shape[0]): _action_arr = possible_action_arr[i, key_arr[i]] _predicted_q_arr = possible_predicted_q_arr[i, key_arr[i]] _reward_value_arr = possible_reward_value_arr[i, key_arr[i]] _next_q_arr = possible_next_q_arr[i, key_arr[i]] if i == 0: action_arr = nd.expand_dims(_action_arr, axis=0) predicted_q_arr = nd.expand_dims(_predicted_q_arr, axis=0) reward_value_arr = nd.expand_dims(_reward_value_arr, axis=0) next_q_arr = nd.expand_dims(_next_q_arr, axis=0) else: action_arr = nd.concat( action_arr, nd.expand_dims(_action_arr, axis=0), dim=0 ) predicted_q_arr = nd.concat( predicted_q_arr, nd.expand_dims(_predicted_q_arr, axis=0), dim=0 ) reward_value_arr = nd.concat( reward_value_arr, nd.expand_dims(_reward_value_arr, axis=0), dim=0 ) next_q_arr = nd.concat( next_q_arr, nd.expand_dims(_next_q_arr, axis=0), dim=0 ) return ( action_arr, predicted_q_arr, reward_value_arr, next_q_arr, meta_data_arr )
# y=1.2x−3.4x2+5.6x3+5.0+noise from mxnet import ndarray as nd from mxnet import autograd from mxnet import gluon import matplotlib as mpl # mpl.rcParams['figure.dpi'] = 120 import matplotlib.pyplot as plt num_train = 100 num_test = 100 true_w = [1.2, -3.4, 5.6] true_b = 5.0 x = nd.random.normal(shape=(num_train + num_test, 1)) X = nd.concat(x, nd.power(x, 2), nd.power(x, 3)) y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b y += .1 * nd.random.normal(shape=y.shape) def train(X_train, X_test, y_train, y_test): net = gluon.nn.Sequential() with net.name_scope(): net.add(gluon.nn.Dense(1)) net.initialize() learning_rate = 0.01 epochs = 100 batch_size = min(10, y_train.shape[0]) dataset_train = gluon.data.ArrayDataset(X_train, y_train) data_iter_train = gluon.data.DataLoader(dataset_train, batch_size,
def facc(label, pred): pred = pred.ravel() label = label.ravel() return ((pred > 0.5) == label).mean() lbllist = [] scorelist = [] test_data.reset() count = 0 for batch in (test_data): print(count) count += 1 real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) lbls = batch.label[0].as_in_context(ctx) out = (netG(real_in)) real_concat = nd.concat(real_in, real_in, dim=1) #real_concat = nd.concat(out, out, dim=1) output = netD(real_concat) output = nd.mean(output, (1, 3, 2)).asnumpy() lbllist = lbllist + list(lbls.asnumpy()) scorelist = scorelist + list(output) visualize(out[0, :, :, :]) plt.savefig('outputs/testnet_T' + str(count) + '.png') print((lbllist)) print((scorelist)) fpr, tpr, _ = roc_curve(lbllist, scorelist, 0) roc_auc = auc(fpr, tpr) print(roc_auc)
def train(cep, pool_size, epochs, train_data, val_data, ctx, netEn, netDe, netD, netD2, netDS, trainerEn, trainerDe, trainerD, trainerD2, trainerSD, lambda1, batch_size, expname, append=True, useAE=False): tp_file = open(expname + "_trainloss.txt", "w") tp_file.close() text_file = open(expname + "_validtest.txt", "w") text_file.close() #netGT, netDT, _, _ = set_test_network(opt.depth, ctx, opt.lr, opt.beta1,opt.ndf, opt.ngf, opt.append) GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() L1_loss = gluon.loss.L2Loss() image_pool = imagePool.ImagePool(pool_size) metric = mx.metric.CustomMetric(facc) metric2 = mx.metric.CustomMetric(facc) metricStrong = mx.metric.CustomMetric(facc) metricMSE = mx.metric.MSE() loss_rec_G = [] loss_rec_D = [] loss_rec_R = [] acc_rec = [] acc2_rec = [] loss_rec_D2 = [] loss_rec_G2 = [] lr = 2.0 * 512 stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) if cep == -1: cep = 0 else: netEn.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_En.params', ctx=ctx) netDe.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_De.params', ctx=ctx) netD.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_D.params', ctx=ctx) netD2.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_D2.params', ctx=ctx) netDS.load_params('checkpoints/' + opt.expname + '_' + str(cep) + '_SD.params', ctx=ctx) iter = 0 for epoch in range(cep + 1, epochs): tic = time.time() btic = time.time() train_data.reset() #print('learning rate : '+str(trainerD.learning_rate )) for batch in train_data: ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### if ctx == mx.cpu(): ct = mx.cpu() else: ct = mx.gpu() real_in = batch.data[0] #.as_in_context(ctx) real_out = batch.data[1] #.as_in_context(ctx) if iter == 0: latent_shape = (batch_size, 512, 1, 1) #code.shape out_l_shape = (batch_size, 1, 1, 1) #netD2((code)).shape out_i_shape = (batch_size, 1, 1, 1) #netD(netDe(code)).shape out_s_shape = (batch_size, 1, 1, 1) #netSD(netDe(code)).shape real_in = gluon.utils.split_and_load(real_in, ctx) real_out = gluon.utils.split_and_load(real_out, ctx) fake_latent = [netEn(r) for r in real_in] real_latent = nd.random.uniform(low=-1, high=1, shape=latent_shape) real_latent = gluon.utils.split_and_load(real_latent, ctx) fake_out = [netDe(f) for f in fake_latent] fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out eps2 = nd.random.uniform(low=-1, high=1, shape=latent_shape, ctx=ct) eps2 = gluon.utils.split_and_load(eps2, ctx) if epoch > 150: # (1/float(batch_size))*512*150:# and epoch%10==0: print('Mining..') mu = nd.random.uniform(low=-1, high=1, shape=latent_shape, ctx=ct) #isigma = nd.ones((batch_size,64,1,1),ctx=ctx)*0.000001 mu.attach_grad() #sigma.attach_grad() images = netDe(mu) fake_img1T = nd.concat(images[0], images[1], images[2], dim=1) fake_img2T = nd.concat(images[3], images[4], images[5], dim=1) fake_img3T = nd.concat(images[6], images[7], images[8], dim=1) fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + '_fakespre_' + str(epoch) + '.png') eps2 = gluon.utils.split_and_load(mu, ctx) for e in eps2: e.attach_grad() for ep2 in range(1): with autograd.record(): #eps = nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx) # #eps2 = gluon.utils.split_and_load(nd.tanh(mu),ctx) #+nd.multiply(eps,sigma))#nd.random.uniform( low=-1, high=1, shape=fake_latent.shape, ctx=ctx) rec_output = [netDS(netDe(e)) for e in eps2] fake_label = gluon.utils.split_and_load( nd.zeros(out_s_shape), ctx) errGS = [ GAN_loss(r, f) for r, f in zip(rec_output, fake_label) ] for e in errGS: e.backward() for idx, _ in enumerate(eps2): eps2[idx] = nd.tanh(eps2[idx] - lr / eps2[idx].shape[0] * eps2[idx].grad) images = netDe((eps2[0])) fake_img1T = nd.concat(images[0], images[1], images[2], dim=1) fake_img2T = nd.concat(images[3], images[4], images[5], dim=1) fake_img3T = nd.concat(images[6], images[7], images[8], dim=1) fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + str(ep2) + '_fakespost_' + str(epoch) + '.png') #eps2 = nd.tanh(mu)#+nd.multiply(eps,sigma))#nd.random.uniform( low=-1, high=1, shape=fake_latent.shape, ctx=ctx) with autograd.record(): #eps2 = gluon.utils.split_and_load(eps2,ctx) # Train with fake image # Use image pooling to utilize history imagesi output = [netD(f) for f in fake_concat] output2 = [netD2(f) for f in fake_latent] fake_label = nd.zeros(out_i_shape) fake_label = gluon.utils.split_and_load(fake_label, ctx) fake_latent_label = nd.zeros(out_l_shape) fake_latent_label = gluon.utils.split_and_load( fake_latent_label, ctx) eps = gluon.utils.split_and_load( nd.random.uniform(low=-1, high=1, shape=latent_shape), ctx) rec_output = [netD(netDe(e)) for e in eps] errD_fake = [ GAN_loss(r, f) for r, f in zip(rec_output, fake_label) ] errD_fake2 = [ GAN_loss(o, f) for o, f in zip(output, fake_label) ] errD2_fake = [ GAN_loss(o, f) for o, f in zip(output2, fake_latent_label) ] for f, o in zip(fake_label, rec_output): metric.update([ f, ], [ o, ]) for f, o in zip(fake_latent_label, output2): metric2.update([ f, ], [ o, ]) real_concat = nd.concat(real_in, real_out, dim=1) if append else real_out output = [netD(r) for r in real_concat] output2 = [netD2(r) for r in real_latent] real_label = gluon.utils.split_and_load( nd.ones(out_i_shape), ctx) real_latent_label = gluon.utils.split_and_load( nd.ones(out_l_shape), ctx) errD_real = [ GAN_loss(o, r) for o, r in zip(output, real_label) ] errD2_real = [ GAN_loss(o, r) for o, r in zip(output2, real_latent_label) ] for e1, e2, e4, e5 in zip(errD_real, errD_fake, errD2_real, errD2_fake): err = (e1 + e2) * 0.5 + (e5 + e4) * 0.5 err.backward() for f, o in zip(real_label, output): metric.update([ f, ], [ o, ]) for f, o in zip(real_latent_label, output2): metric2.update([ f, ], [ o, ]) trainerD.step(batch.data[0].shape[0]) trainerD2.step(batch.data[0].shape[0]) nd.waitall() with autograd.record(): strong_output = [netDS(netDe(e)) for e in eps] strong_real = [netDS(f) for f in fake_concat] errs1 = [ GAN_loss(r, f) for r, f in zip(strong_output, fake_label) ] errs2 = [ GAN_loss(r, f) for r, f in zip(strong_real, real_label) ] for f, s in zip(fake_label, strong_output): metricStrong.update([ f, ], [ s, ]) for f, s in zip(real_label, strong_real): metricStrong.update([ f, ], [ s, ]) for e1, e2 in zip(errs1, errs2): strongerr = 0.5 * (e1 + e2) strongerr.backward() trainerSD.step(batch.data[0].shape[0]) nd.waitall() ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): sh = out_l_shape #eps2 = nd.random_normal(loc=0, scale=1, shape=noiseshape, ctx=ctx) # #eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx) #if epoch>100: # eps2 = nd.multiply(eps2,sigma)+mu # eps2 = nd.tanh(eps2) #else: #eps = nd.random.uniform( low=-1, high=1, shape=noiseshape, ctx=ctx) #eps2 = nd.concat(eps,eps2,dim=0) rec_output = [netD(netDe(e)) for e in eps2] fake_latent = [(netEn(r)) for r in real_in] output2 = [netD2(f) for f in fake_latent] fake_out = [netDe(f) for f in fake_latent] fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out output = [netD(f) for f in fake_concat] real_label = gluon.utils.split_and_load( nd.ones(out_i_shape), ctx) real_latent_label = gluon.utils.split_and_load( nd.ones(out_l_shape), ctx) errG2 = [ GAN_loss(r, f) for r, f in zip(rec_output, real_label) ] errR = [ L1_loss(r, f) * lambda1 for r, f in zip(real_out, fake_out) ] errG = [ 10 * GAN_loss(r, f) for r, f in zip(output2, real_latent_label) ] # +errG2+errR for e1, e2, e3 in zip(errG, errG2, errR): e = e1 + e2 + e3 e.backward() trainerDe.step(batch.data[0].shape[0]) trainerEn.step(batch.data[0].shape[0]) nd.waitall() errD = (errD_real[0] + errD_fake[0]) * 0.5 errD2 = (errD2_real[0] + errD2_fake[0]) * 0.5 loss_rec_G2.append(nd.mean(errG2[0]).asscalar()) loss_rec_G.append( nd.mean(nd.mean(errG[0])).asscalar() - nd.mean(errG2[0]).asscalar() - nd.mean(errR[0]).asscalar()) loss_rec_D.append(nd.mean(errD[0]).asscalar()) loss_rec_R.append(nd.mean(errR[0]).asscalar()) loss_rec_D2.append(nd.mean(errD2[0]).asscalar()) _, acc2 = metric2.get() name, acc = metric.get() acc_rec.append(acc) acc2_rec.append(acc2) # Print log infomation every ten batches if iter % 10 == 0: _, acc2 = metric2.get() name, acc = metric.get() _, accStrong = metricStrong.get() logging.info('speed: {} samples/s'.format( batch_size / (time.time() - btic))) #print(errD) #logging.info('discriminator loss = %f, D2 loss = %f, generator loss = %f, G2 loss = %f, SD loss = %f, D acc = %f , D2 acc = %f, DS acc = %f, reconstruction error= %f at iter %d epoch %d' # % (nd.mean(errD[0]).asscalar(),nd.mean(errD2[0]).asscalar(), # nd.mean(errG[0]-errG2[0]-errR[0]).asscalar(),nd.mean(errG2[0]).asscalar(),nd.mean(strongerr[0]).asscalar() ,acc,acc2,accStrong[0],nd.mean(errR[0]).asscalar() ,iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() _, acc2 = metric2.get() #tp_file = open(expname + "_trainloss.txt", "a") #tp_file.write(str(nd.mean(errG2).asscalar()) + " " + str( # nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() - nd.mean(errR).asscalar()) + " " + str( # nd.mean(errD).asscalar()) + " " + str(nd.mean(errD2).asscalar()) + " " + str(nd.mean(errR).asscalar()) +" "+str(acc) + " " + str(acc2)+"\n") #tp_file.close() metric.reset() metric2.reset() train_data.reset() metricStrong.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) if epoch % 2 == 0: # and epoch>0: text_file = open(expname + "_validtest.txt", "a") filename = "checkpoints/" + expname + "_" + str( epoch) + "_D.params" netD.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_D2.params" netD2.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_En.params" netEn.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_De.params" netDe.save_parameters(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_SD.params" netDS.save_parameters(filename) fake_img1 = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1) fake_img2 = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1) fake_img3 = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1) fake_img4 = nd.concat(real_in[3], real_out[3], fake_out[3], dim=1) val_data.reset() text_file = open(expname + "_validtest.txt", "a") for vbatch in val_data: real_in = vbatch.data[0] real_out = vbatch.data[1] real_in = gluon.utils.split_and_load(real_in, ctx) real_out = gluon.utils.split_and_load(real_out, ctx) fake_latent = [netEn(r) for r in real_in] fake_out = [netDe(f) for f in fake_latent] for f, r in zip(fake_out, real_out): metricMSE.update([ f, ], [ r, ]) _, acc2 = metricMSE.get() toterrR = 0 for e in errR: toterrR += nd.mean(e).asscalar() text_file.write("%s %s %s\n" % (str(epoch), toterrR, str(acc2))) metricMSE.reset() return ([ loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2, loss_rec_G2, acc2_rec ])
def generate_learned_samples(self): ''' Draw and generate data. Returns: `Tuple` data. The shape is ... - `mxnet.ndarray` of observed data points in training. - `mxnet.ndarray` of supervised data in training. - `mxnet.ndarray` of observed data points in test. - `mxnet.ndarray` of supervised data in test. ''' for epoch in range(self.epochs): training_batch_arr, test_batch_arr = None, None for i in range(self.batch_size): file_key = np.random.randint(low=0, high=len(self.__train_csv_path_list)) train_observed_arr = self.__unlabeled_csv_extractor.extract( self.__train_csv_path_list[file_key] ) test_file_key = np.random.randint(low=0, high=len(self.__test_csv_path_list)) test_observed_arr = self.__unlabeled_csv_extractor.extract( self.__test_csv_path_list[test_file_key] ) train_observed_arr = np.identity( 1 + int(train_observed_arr.max() + (train_observed_arr.min() * -1)) )[ (train_observed_arr.reshape(train_observed_arr.shape[0], -1) + (train_observed_arr.min() * -1)).astype(int) ] test_observed_arr = np.identity( 1 + int(test_observed_arr.max() + (test_observed_arr.min() * -1)) )[ (test_observed_arr.reshape(test_observed_arr.shape[0], -1) + (test_observed_arr.min() * -1)).astype(int) ] start_row = np.random.randint(low=0, high=train_observed_arr.shape[0] - self.seq_len) test_start_row = np.random.randint(low=0, high=test_observed_arr.shape[0] - self.seq_len) train_observed_arr = train_observed_arr[start_row:start_row+self.seq_len] test_observed_arr = test_observed_arr[test_start_row:test_start_row+self.seq_len] if training_batch_arr is None: training_batch_arr = nd.expand_dims( nd.ndarray.array(train_observed_arr, ctx=self.__ctx), axis=0 ) else: training_batch_arr = nd.concat( training_batch_arr, nd.expand_dims( nd.ndarray.array(train_observed_arr, ctx=self.__ctx), axis=0 ), dim=0 ) if test_batch_arr is None: test_batch_arr = nd.expand_dims( nd.ndarray.array(test_observed_arr, ctx=self.__ctx), axis=0 ) else: test_batch_arr = nd.concat( test_batch_arr, nd.expand_dims( nd.ndarray.array(test_observed_arr, ctx=self.__ctx), axis=0 ), dim=0 ) training_batch_arr = self.pre_normalize(training_batch_arr) test_batch_arr = self.pre_normalize(test_batch_arr) if self.__noiseable_data is not None: training_batch_arr = self.__noiseable_data.noise(training_batch_arr) yield training_batch_arr, training_batch_arr, test_batch_arr, test_batch_arr
def hybrid_forward(self, F, x, c1): c1 = self.c1_block(c1) x = self.aspp(x) x = F.contrib.BilinearResize2D(x, **self._up_kwargs) return self.block(F.concat(c1, x, dim=1))
def forward(self, inpt): fwd = self._lstm_fwd(inpt) bwd_inpt = nd.flip(inpt, 0) bwd = self._lstm_bwd(bwd_inpt) bwd = nd.flip(bwd, 0) return nd.concat(fwd, bwd, dim=2)
def tensor_save_bgrimage(tensor, filename, cuda=False): (b, g, r) = F.split(tensor, num_outputs=3, axis=0) tensor = F.concat(r, g, b, dim=0) tensor_save_rgbimage(tensor, filename, cuda)
def hybrid_forward(self, F, x, *args, **kwargs): if self.outermost: return self.model(x) else: return nd.concat([x, self.model(x)],1)
def train(pool_size, epochs, train_data, val_data, ctx, netEn, netDe, netD, netD2, trainerEn, trainerDe, trainerD, trainerD2, lambda1, batch_size, expname, append=True, useAE=False): tp_file = open(expname + "_trainloss.txt", "w") tp_file.close() text_file = open(expname + "_validtest.txt", "w") text_file.close() #netGT, netDT, _, _ = set_test_network(opt.depth, ctx, opt.lr, opt.beta1,opt.ndf, opt.ngf, opt.append) GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() L1_loss = gluon.loss.L2Loss() image_pool = imagePool.ImagePool(pool_size) metric = mx.metric.CustomMetric(facc) metric2 = mx.metric.CustomMetric(facc) metricMSE = mx.metric.MSE() loss_rec_G = [] loss_rec_D = [] loss_rec_R = [] acc_rec = [] acc2_rec = [] loss_rec_D2 = [] loss_rec_G2 = [] lr = 0.002 #mu = nd.random_normal(loc=0, scale=1, shape=(batch_size/2,64,1,1), ctx=ctx) mu = nd.random.uniform(low=-1, high=1, shape=(batch_size / 2, 64, 1, 1), ctx=ctx) #mu = nd.zeros((batch_size/2,64,1,1),ctx=ctx) sigma = nd.ones((batch_size / 2, 64, 1, 1), ctx=ctx) * 0.02 mu.attach_grad() sigma.attach_grad() stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) for epoch in range(epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 #print('learning rate : '+str(trainerD.learning_rate )) for batch in train_data: ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) fake_latent = netEn(real_in) #real_latent = nd.random_normal(loc=0, scale=1, shape=fake_latent.shape, ctx=ctx) real_latent = nd.random.uniform(low=-1, high=1, shape=fake_latent.shape, ctx=ctx) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out with autograd.record(): # Train with fake image # Use image pooling to utilize history imagesi output = netD(fake_concat) output2 = netD2(fake_latent) fake_label = nd.zeros(output.shape, ctx=ctx) fake_latent_label = nd.zeros(output2.shape, ctx=ctx) noiseshape = (fake_latent.shape[0] / 2, fake_latent.shape[1], fake_latent.shape[2], fake_latent.shape[3]) eps2 = nd.random_normal(loc=0, scale=1, shape=noiseshape, ctx=ctx) # eps = nd.random.uniform(low=-1, high=1, shape=noiseshape, ctx=ctx) if epoch > 100: eps2 = nd.multiply(eps2, sigma) + mu eps2 = nd.tanh(eps2) else: eps2 = nd.random.uniform(low=-1, high=1, shape=noiseshape, ctx=ctx) eps2 = nd.concat(eps, eps2, dim=0) rec_output = netD(netDe(eps2)) errD_fake = GAN_loss(rec_output, fake_label) errD_fake2 = GAN_loss(output, fake_label) errD2_fake = GAN_loss(output2, fake_latent_label) metric.update([ fake_label, ], [ output, ]) metric2.update([ fake_latent_label, ], [ output2, ]) real_concat = nd.concat(real_in, real_out, dim=1) if append else real_out output = netD(real_concat) output2 = netD2(real_latent) real_label = nd.ones(output.shape, ctx=ctx) real_latent_label = nd.ones(output2.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD2_real = GAN_loss(output2, real_latent_label) #errD = (errD_real + 0.5*(errD_fake+errD_fake2)) * 0.5 errD = (errD_real + errD_fake) * 0.5 errD2 = (errD2_real + errD2_fake) * 0.5 totalerrD = errD + errD2 totalerrD.backward() #errD2.backward() metric.update([ real_label, ], [ output, ]) metric2.update([ real_latent_label, ], [ output2, ]) trainerD.step(batch.data[0].shape[0]) trainerD2.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): sh = fake_latent.shape eps2 = nd.random_normal(loc=0, scale=1, shape=noiseshape, ctx=ctx) # eps = nd.random.uniform(low=-1, high=1, shape=noiseshape, ctx=ctx) if epoch > 100: eps2 = nd.multiply(eps2, sigma) + mu eps2 = nd.tanh(eps2) else: eps2 = nd.random.uniform(low=-1, high=1, shape=noiseshape, ctx=ctx) eps2 = nd.concat(eps, eps2, dim=0) rec_output = netD(netDe(eps2)) fake_latent = (netEn(real_in)) output2 = netD2(fake_latent) fake_out = netDe(fake_latent) fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) real_latent_label = nd.ones(output2.shape, ctx=ctx) errG2 = GAN_loss(rec_output, real_label) errR = L1_loss(real_out, fake_out) * lambda1 errG = 10.0 * GAN_loss(output2, real_latent_label) + errG2 + errR errG.backward() trainerDe.step(batch.data[0].shape[0]) trainerEn.step(batch.data[0].shape[0]) loss_rec_G2.append(nd.mean(errG2).asscalar()) loss_rec_G.append( nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() - nd.mean(errR).asscalar()) loss_rec_D.append(nd.mean(errD).asscalar()) loss_rec_R.append(nd.mean(errR).asscalar()) loss_rec_D2.append(nd.mean(errD2).asscalar()) _, acc2 = metric2.get() name, acc = metric.get() acc_rec.append(acc) acc2_rec.append(acc2) # Print log infomation every ten batches if iter % 10 == 0: _, acc2 = metric2.get() name, acc = metric.get() logging.info('speed: {} samples/s'.format( batch_size / (time.time() - btic))) #print(errD) logging.info( 'discriminator loss = %f, D2 loss = %f, generator loss = %f, G2 loss = %f, binary training acc = %f , D2 acc = %f, reconstruction error= %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errD2).asscalar(), nd.mean(errG - errG2 - errR).asscalar(), nd.mean(errG2).asscalar(), acc, acc2, nd.mean(errR).asscalar(), iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() _, acc2 = metric2.get() tp_file = open(expname + "_trainloss.txt", "a") tp_file.write( str(nd.mean(errG2).asscalar()) + " " + str( nd.mean(nd.mean(errG)).asscalar() - nd.mean(errG2).asscalar() - nd.mean(errR).asscalar()) + " " + str(nd.mean(errD).asscalar()) + " " + str(nd.mean(errD2).asscalar()) + " " + str(nd.mean(errR).asscalar()) + " " + str(acc) + " " + str(acc2) + "\n") tp_file.close() metric.reset() metric2.reset() train_data.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) if epoch % 10 == 0: # and epoch>0: text_file = open(expname + "_validtest.txt", "a") filename = "checkpoints/" + expname + "_" + str( epoch) + "_D.params" netD.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_D2.params" netD2.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_En.params" netEn.save_params(filename) filename = "checkpoints/" + expname + "_" + str( epoch) + "_De.params" netDe.save_params(filename) fake_img1 = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1) fake_img2 = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1) fake_img3 = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1) fake_img4 = nd.concat(real_in[3], real_out[3], fake_out[3], dim=1) val_data.reset() text_file = open(expname + "_validtest.txt", "a") for vbatch in val_data: real_in = vbatch.data[0].as_in_context(ctx) real_out = vbatch.data[1].as_in_context(ctx) fake_latent = netEn(real_in) y = netDe(fake_latent) fake_out = y metricMSE.update([ fake_out, ], [ real_out, ]) _, acc2 = metricMSE.get() text_file.write("%s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(), str(acc2))) metricMSE.reset() images = netDe(eps2) fake_img1T = nd.concat(images[0], images[1], images[2], dim=1) fake_img2T = nd.concat(images[3], images[4], images[5], dim=1) fake_img3T = nd.concat(images[6], images[7], images[8], dim=1) fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + '_fakes_' + str(epoch) + '.png') text_file.close() # Do 10 iterations of sampler update fake_img1T = nd.concat(real_in[0], real_out[0], fake_out[0], dim=1) fake_img2T = nd.concat(real_in[1], real_out[1], fake_out[1], dim=1) fake_img3T = nd.concat(real_in[2], real_out[2], fake_out[2], dim=1) #fake_img4T = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1) fake_img = nd.concat(fake_img1, fake_img2, fake_img3, fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + '_' + str(epoch) + '.png') if epoch > 100: for ep2 in range(10): with autograd.record(): #eps = nd.random_normal(loc=0, scale=1, shape=noiseshape, ctx=ctx) # eps = nd.random.uniform(low=-1, high=1, shape=noiseshape, ctx=ctx) eps2 = nd.random_normal(loc=0, scale=0.02, shape=noiseshape, ctx=ctx) eps2 = nd.tanh(eps2 * sigma + mu) eps2 = nd.concat(eps, eps2, dim=0) rec_output = netD(netDe(eps2)) fake_label = nd.zeros(rec_output.shape, ctx=ctx) errGS = GAN_loss(rec_output, fake_label) errGS.backward() mu -= lr / mu.shape[0] * mu.grad sigma -= lr / sigma.shape[0] * sigma.grad print('mu ' + str(mu[0, 0, 0, 0].asnumpy()) + ' sigma ' + str(sigma[0, 0, 0, 0].asnumpy())) images = netDe(eps2) fake_img1T = nd.concat(images[0], images[1], images[2], dim=1) fake_img2T = nd.concat(images[3], images[4], images[5], dim=1) fake_img3T = nd.concat(images[6], images[7], images[8], dim=1) fake_img = nd.concat(fake_img1T, fake_img2T, fake_img3T, dim=2) visual.visualize(fake_img) plt.savefig('outputs/' + expname + '_fakespost_' + str(epoch) + '.png') return ([ loss_rec_D, loss_rec_G, loss_rec_R, acc_rec, loss_rec_D2, loss_rec_G2, acc2_rec ])
def preprocess_batch(batch): batch = F.swapaxes(batch, 0, 1) (r, g, b) = F.split(batch, num_outputs=3, axis=0) batch = F.concat(b, g, r, dim=0) batch = F.swapaxes(batch, 0, 1) return batch
get_accuracy( nd.argmax(pred_val, axis=1).asnumpy(), label.asnumpy())) loss_accumulate_val = loss_accumulate_val + nd.sum(loss).asscalar() pred_val_accumulate.append(nd.argmax(pred_val, axis=1)) label_val_accumulate.append(label) # get indexes of wrong predictions if False: u = list(pred_val.argmax_channel().asnumpy()) # prediction idx = [i for i in range(u.__len__()) if u[i] != label_val[i]] # index of prediction == 0 # compute the confusion matrix on validation set cm = confusion_matrix( nd.concat(*pred_val_accumulate, dim=0).asnumpy(), nd.concat(*label_val_accumulate, dim=0).asnumpy()) logger.info('[%d %d] [%d %d]' % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])) logger.info('Epoch %d/%d: train [acc, loss] = [%.4f, %.4f], ' 'val [acc, loss] = [%.4f, %.4f], LR = %.5f, GS = %d' % (e, opts.num_epochs, np.array(acc_accumulate).mean(), loss_accumulate / im_train.shape[0], np.array(acc_accumulate_val).mean(), loss_accumulate_val / im_val.shape[0], trainer.learning_rate, global_step)) sw.add_scalar(tag='acc', value=('training', np.array(acc_accumulate).mean()), global_step=e) sw.add_scalar(tag='acc',
def hybrid_forward(self, F, x, *args, **kwargs): if self.outermost: return self.model(x) else: return nd.concat([x, self.model(x)], 1)
def debug_bilinear(x, W, y, input_size, seq_len, batch_size, num_outputs=1, bias_x=False, bias_y=False): """ Do xWy :param x: (input_size x seq_len) x batch_size :param W: :param y: (input_size x seq_len) x batch_size :param input_size: :param seq_len: :param batch_size: :param num_outputs: :param bias_x: :param bias_y: :return: [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x] x batch_size """ import dynet as dy xd = dy.inputTensor(x, batched=True) xm = nd.array(x) yd = dy.inputTensor(y, batched=True) ym = nd.array(y) Wd = dy.inputTensor(W) Wm = nd.array(W) def allclose(dyarray, mxarray): a = dyarray.npvalue() b = mxarray.asnumpy() return np.allclose(a, b) if bias_x: xd = dy.concatenate( [xd, dy.inputTensor(np.ones((1, seq_len), dtype=np.float32))]) xm = nd.concat(xm, nd.ones((1, seq_len, batch_size)), dim=0) # print(allclose(xd, xm)) if bias_y: yd = dy.concatenate( [yd, dy.inputTensor(np.ones((1, seq_len), dtype=np.float32))]) ym = nd.concat(ym, nd.ones((1, seq_len, batch_size)), dim=0) # print(allclose(yd, ym)) nx, ny = input_size + bias_x, input_size + bias_y # W: (num_outputs x ny) x nx lind = Wd * xd linm = nd.dot(Wm, xm) # print(allclose(lind, linm)) if num_outputs > 1: lind = dy.reshape(lind, (ny, num_outputs * seq_len), batch_size=batch_size) # linm = nd.reshape(linm, (ny, num_outputs * seq_len, batch_size)) linm = reshape_fortran(linm, (ny, num_outputs * seq_len, batch_size)) # print(allclose(lind, linm)) blind = dy.transpose(yd) * lind ym = ym.transpose([2, 1, 0]) linm = linm.transpose([2, 1, 0]) blinm = nd.batch_dot(linm, ym, transpose_b=True) blinm = blinm.transpose([2, 1, 0]) print(np.allclose(blind.npvalue(), blinm.asnumpy())) if num_outputs > 1: blind = dy.reshape(blind, (seq_len, num_outputs, seq_len), batch_size=batch_size) blinm = reshape_fortran(blinm, (seq_len, num_outputs, seq_len, batch_size)) print(allclose(blind, blinm)) return blind
#过拟合:机器学习模型的训练误差远小于其在测试数据集上的误差。 ## 一二次多项式拟合为例子 #y=1.2x−3.4x^2+5.6x^3+5.0+noise from mxnet import ndarray as nd from mxnet import autograd from mxnet import gluon num_train = 100 num_test = 100 true_w = [1.2, -3.4, 5.6] true_b = 5.0 x = nd.random.normal(shape=(num_train + num_test, 1))#随机 X = nd.concat(x, nd.power(x, 2), nd.power(x, 3))#x x^2 x^3 y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b y += .1 * nd.random.normal(shape=y.shape)#加入噪声 print('x:', x[:5], 'X:', X[:5], 'y:', y[:5]) ### 训练 import matplotlib as mpl#画图 mpl.rcParams['figure.dpi']= 120#分辨率 import matplotlib.pyplot as plt#画图 def train(X_train, X_test, y_train, y_test): # 线性回归模型 net = gluon.nn.Sequential() with net.name_scope():
def forward(self, input_vec, loss=None, training=True): # print('************* ' + str(input_vec.shape[1]) + ' *************') # print('############# ' + str(input_vec.shape) + ' #############') assert input_vec.shape[1] == self.input_dimension # get inputs for every slot(including global) inputs = {} for slot in self.slots: slot_input = input_vec[:, self.slot_dimension[slot][0]:self. slot_dimension[slot][1]] global_input = input_vec[:, self.global_dimension[0][0]:self. global_dimension[0][1]] inputs[slot] = nd.concat(*[slot_input, global_input], dim=1) batch_size = input_vec.shape[0] zero_slot_input = nd.zeros((batch_size, 25)) input_global = [zero_slot_input] for seg in self.global_dimension: input_global.append(input_vec[:, seg[0]:seg[1]]) inputs['global'] = nd.concat(*input_global, dim=1) layer = [] # inputs -> first_hidden_layer if (not self.sort_input_vec) and self.state_feature != 'dip': layer.append([]) for slot in self.slots: layer[0].append(self.input_trans[slot](inputs[slot], training=training)) layer[0].append(self.input_trans['global'](inputs['global'], training=training)) elif self.state_feature == 'dip': sorted_inputs = [] for slot in self.slots: sorted_inputs.append(inputs[slot]) sorted_inputs.append(inputs['global']) layer.append( self.input_trans.forward(sorted_inputs, loss, training=training)) elif self.sort_input_vec: sorted_inputs = [] for slot in self.slots: tmp = inputs[slot][:, :-2].sort(is_ascend=False) if tmp.shape[1] < 20: tmp = nd.concat(tmp, nd.zeros((tmp.shape[0], 20 - tmp.shape[1]), ctx=CTX), dim=1) else: tmp = nd.slice_axis(tmp, axis=1, begin=0, end=20) sorted_inputs.append( nd.concat(tmp, inputs[slot][:, -2:], dim=1)) sorted_inputs.append(inputs['global']) layer.append( self.input_trans.forward(sorted_inputs, loss, training=training)) # hidden_layers for i in range(self.hidden_layers - 1): if self.recurrent_mode is False: # equal to 'layer.append(self.ma_trans[i](layer[-1], loss))' layer.append(self.ma_trans[i].forward(layer[i], loss, training=training)) else: layer.append( self.ma_trans.forward(layer[i], loss, training=training)) if self.share_last_layer is False: # dropout of last hidden layer for j in range(len(self.slots)): layer[-1][j] = self.local_out_drop_op(layer[-1][j]) layer[-1][-1] = self.global_out_drop_op(layer[-1][-1]) # last_hidden_layer -> outputs outputs = [] for i in range(len(self.slots) + 1): outputs.append(self.output_trans(layer[-1][i])) # if self.use_dueling is False: # outputs.append(self.output_trans[i](layer[-1][i])) # else: # if i < len(self.slots): # tmp_adv = self.output_trans_local_advantage1.forward(sorted_inputs[i], training=training) # tmp_adv = self.output_trans_local_advantage2.forward(tmp_adv, training=training) # else: # tmp_adv = self.output_trans_global_advantage1.forward(sorted_inputs[-1], training=training) # tmp_adv = self.output_trans_global_advantage2.forward(tmp_adv, training=training) # if self.dueling_share_last: # if i < len(self.slots): # cur_value = self.output_trans_local_value.forward(layer[-1][i], training=training) # if self.shared_last_layer_use_bias: # cur_value = cur_value + nd.slice(self.value_bias_local.data(), begin=(i, ), end=(i + 1, )) # else: # cur_value = self.output_trans_global_value.forward(layer[-1][i], training=training) # else: # cur_value = self.output_trans_value[i].forward(layer[-1][i], training=training) # outputs.append( # cur_value + # tmp_adv - tmp_adv.mean(axis=1).reshape( # (tmp_adv.shape[0], 1)).broadcast_axes(axis=1, size=tmp_adv.shape[1])) # else: # outputs = [] # for i in range(len(self.slots)): # output_i = self.output_trans_local.forward(layer[-1][i], training=training) # if self.shared_last_layer_use_bias: # output_i = output_i + self.output_trans_local_biases[i].data() # outputs.append(output_i) # outputs.append(self.output_trans_global.forward(layer[-1][-1], training=training)) normal_output = [] for i in range(len(self.slots)): normal_output.append(outputs[i][:, :3]) normal_output.append(outputs[-1][:, 3:]) return nd.concat(*normal_output, dim=1)
# -*- coding: utf-8 -*- from mxnet import ndarray as nd from mxnet import autograd from mxnet import gluon import matplotlib as mpl mpl.rcParams['figure.dpi']= 120 import matplotlib.pyplot as plt num_train = 100 num_test = 100 true_w = [1.2, -3.4, 5.6] true_b = 5.0 x = nd.random.normal(shape=(num_train + num_test, 1)) X = nd.concat(x, nd.power(x, 2), nd.power(x, 3)) # power(x,2)表示x中所有元素2次方 # y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_w[2] * X[:, 2] + true_b y = true_w[0] * X[:, 0] + true_b y += .1 * nd.random.normal(shape=y.shape) y_train, y_test = y[:num_train], y[num_train:] # matplotlib inline import matplotlib as mpl mpl.rcParams['figure.dpi']= 120 import matplotlib.pyplot as plt # def test(net, X, y): # return square_loss(net(X), y).mean().asscalar() def train(X_train, X_test, y_train, y_test):
def generate_learned_samples(self): ''' Draw and generate data. Returns: `Tuple` data. The shape is ... - `mxnet.ndarray` of observed data points in training. - `mxnet.ndarray` of supervised data in training. - `mxnet.ndarray` of observed data points in test. - `mxnet.ndarray` of supervised data in test. - `mxnet.ndarray` of obsrved data points in target domain. ''' for _ in range(self.iter_n): training_batch_arr, test_batch_arr = None, None training_label_arr, test_label_arr = None, None target_domain_batch_arr = None for batch_size in range(self.batch_size): dir_key = np.random.randint(low=0, high=len(self.__training_file_path_list)) training_one_hot_arr = nd.zeros((1, len(self.__training_file_path_list)), ctx=self.__ctx) training_one_hot_arr[0, dir_key] = 1 file_key = np.random.randint(low=0, high=len(self.__training_file_path_list[dir_key])) training_data_arr = self.__image_extractor.extract( path=self.__training_file_path_list[dir_key][file_key], ) training_data_arr = self.pre_normalize(training_data_arr) test_dir_key = np.random.randint(low=0, high=len(self.__test_file_path_list)) test_one_hot_arr = nd.zeros((1, len(self.__test_file_path_list)), ctx=self.__ctx) test_one_hot_arr[0, test_dir_key] = 1 file_key = np.random.randint(low=0, high=len(self.__test_file_path_list[test_dir_key])) test_data_arr = self.__image_extractor.extract( path=self.__test_file_path_list[test_dir_key][file_key], ) test_data_arr = self.pre_normalize(test_data_arr) target_domain_dir_key = np.random.randint(low=0, high=len(self.__target_domain_file_path_list)) target_domain_one_hot_arr = nd.zeros((1, len(self.__target_domain_file_path_list)), ctx=self.__ctx) target_domain_one_hot_arr[0, target_domain_dir_key] = 1 target_domain_file_key = np.random.randint(low=0, high=len(self.__target_domain_file_path_list[target_domain_dir_key])) target_domain_data_arr = self.__image_extractor.extract( path=self.__target_domain_file_path_list[target_domain_dir_key][target_domain_file_key], ) target_domain_data_arr = self.pre_normalize(target_domain_data_arr) training_data_arr = nd.expand_dims(training_data_arr, axis=0) test_data_arr = nd.expand_dims(test_data_arr, axis=0) target_domain_data_arr = nd.expand_dims(target_domain_data_arr, axis=0) if training_batch_arr is not None: training_batch_arr = nd.concat(training_batch_arr, training_data_arr, dim=0) else: training_batch_arr = training_data_arr if test_batch_arr is not None: test_batch_arr = nd.concat(test_batch_arr, test_data_arr, dim=0) else: test_batch_arr = test_data_arr if training_label_arr is not None: training_label_arr = nd.concat(training_label_arr, training_one_hot_arr, dim=0) else: training_label_arr = training_one_hot_arr if test_label_arr is not None: test_label_arr = nd.concat(test_label_arr, test_one_hot_arr, dim=0) else: test_label_arr = test_one_hot_arr if target_domain_batch_arr is not None: target_domain_batch_arr = nd.concat(target_domain_batch_arr, target_domain_data_arr, dim=0) else: target_domain_batch_arr = target_domain_data_arr if self.__noiseable_data is not None: training_batch_arr = self.__noiseable_data.noise(training_batch_arr) target_domain_batch_arr = self.__noiseable_data.noise(target_domain_batch_arr) yield training_batch_arr, training_label_arr, test_batch_arr, test_label_arr, target_domain_batch_arr
def hybrid_forward(self, F, X, entry_b): contactlist = [] for index, param in self.layer_list: contactlist.append(F.dot(param.data(), X[index, :])) y = nd.concat(*contactlist, dim=0) + entry_b return self.activation(y)
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim, learning_rate, clipping_theta, batch_size, pred_period, pred_len, seqs, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() for e in range(1, epochs + 1): # 如使用相邻批量采样,在同一个epoch中,隐含变量只需要在该epoch开始的时候初始化。 if not is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: # 当RNN使用LSTM时才会用到,这里可以忽略。 state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) train_loss, num_examples = 0, 0 for data, label in data_iter(corpus_indices, batch_size, num_steps, ctx): # 如使用随机批量采样,处理每个随机小批量前都需要初始化隐含变量。 if is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: # 当RNN使用LSTM时才会用到,这里可以忽略。 state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) with autograd.record(): # outputs 尺寸:(batch_size, vocab_size) if is_lstm: # 当RNN使用LSTM时才会用到,这里可以忽略。 outputs, state_h, state_c = rnn(get_inputs(data), state_h, state_c, *params) else: outputs, state_h = rnn(get_inputs(data), state_h, *params) # 设t_ib_j为i时间批量中的j元素: # label 尺寸:(batch_size * num_steps) # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ] label = label.T.reshape((-1,)) # 拼接outputs,尺寸:(batch_size * num_steps, vocab_size)。 outputs = nd.concat(*outputs, dim=0) # 经上述操作,outputs和label已对齐。 loss = softmax_cross_entropy(outputs, label) loss.backward() grad_clipping(params, clipping_theta, ctx) utils.SGD(params, learning_rate) train_loss += nd.sum(loss).asscalar() num_examples += loss.size if e % pred_period == 0: print("Epoch %d. Perplexity %f" % (e, exp(train_loss/num_examples))) for seq in seqs: print' - ', predict_rnn(rnn, seq, pred_len, params, hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm) print()