def forward(self, inputs, begin_state=None): # pylint: disable=arguments-differ """Implement forward computation. Parameters ---------- inputs : NDArray The training dataset. begin_state : list The initial hidden states. Returns ------- out: NDArray The output of the model. out_states: list The list of output states of the model's encoder. """ encoded = self.embedding(inputs) if not begin_state: begin_state = self.begin_state(batch_size=inputs.shape[1]) out_states = [] for i, (e, s) in enumerate(zip(self.encoder, begin_state)): encoded, state = e(encoded, s) out_states.append(state) if self._drop_h and i != len(self.encoder)-1: encoded = nd.Dropout(encoded, p=self._drop_h, axes=(0,)) if self._dropout: encoded = nd.Dropout(encoded, p=self._dropout, axes=(0,)) with autograd.predict_mode(): out = self.decoder(encoded) return out, out_states
def evaluate_accuracy_and_loss(data_iterator, loss_fn, net): metric_acc = mx.metric.Accuracy() # numerator = 0. # denominator = 0. cumulative_loss = 0. no_of_samples = 0 for i, (data, label) in enumerate(data_iterator): with autograd.predict_mode(): data = data.astype(np.float32).as_in_context(ctx) label = label.astype(np.int32).as_in_context(ctx) output = net(data) loss = loss_fn(output, label) prediction = nd.argmax(output, axis=1).astype(np.int32) cumulative_loss += nd.sum(loss).asscalar() no_of_samples += data.shape[0] metric_acc.update([label], [prediction]) # metric_loss.update([label], [prediction]) print("cumulative loss = {0} no_of_samples = {1}".format(cumulative_loss, no_of_samples)) loss = cumulative_loss / no_of_samples return (metric_acc.get()[1], loss)
def generate(self, video_dataset): # Init params self.load(['repr_generator'], ['repr_generator'], None, allow_init=False) # Generate for each batch batch_index = 0 while True: print("Generating label for video dataset - [%-5d/%-5d]..." % (batch_index, video_dataset.num_data / self.cfg.BATCH_SIZE_GENERATE)) # 1. Load data (batch_images, _, batch_paths), finish = video_dataset.get_batch_data_cls( batch_index, self.cfg.BATCH_SIZE_GENERATE) x_list = utils.io.split_and_load_gpu(self.cfg.CTX, batch_images) # 2. Generate features with autograd.predict_mode(): features = self.feedforward(x_list, mode='generate').asnumpy() # 3. Save for (cat_dir_name, cat_obj), feature in zip(batch_paths, features): cat_dir_path = os.path.join( self.cfg.VIDEO_REPR_DIR[video_dataset.dataset_info], cat_dir_name) if not os.path.exists(cat_dir_path): os.makedirs(cat_dir_path) np.save( os.path.join(cat_dir_path, os.path.splitext(cat_obj)[0] + '.npy'), feature[np.newaxis, :]) # Move to next if finish: break else: batch_index += 1 # Finish print("Generating accomplished. ")
def test_network(model, test_set, embedding, ctx, args): acc = nd.array([0.], ctx=ctx) counter = 0 for idx in range(len(test_set)): sen1 = [] sen2 = [] label = [] item = test_set[idx] s1 = fetch_embedding_of_sentence(item.sentence1, embedding) sen1.append(s1) s2 = fetch_embedding_of_sentence(item.sentence2, embedding) sen2.append(s2) label.append(label_to_idx[item.gold_label]) sen1 = pad_sentences(sen1).as_in_context(ctx) sen2 = pad_sentences(sen2).as_in_context(ctx) label = nd.array(label, dtype=int).as_in_context(ctx) with autograd.predict_mode(): yhat = model(sen1, sen2) pred = yhat.argmax(axis=1) cur_acc = (pred == label.astype(np.float32)).sum() acc += cur_acc counter += 1 acc = acc / counter print("Acc=", acc.asscalar())
def validation(g, d, val_loader): g_val_loss = 0.0 d_val_loss = 0.0 iter_times = 0 for data, _ in tqdm.tqdm(val_loader, desc="Validating", leave=False, unit='batch', unit_scale=True, mininterval=1, maxinterval=5, dynamic_ncols=True): iter_times += 1 bs = len(data) nosise = make_noise(bs) data = data.as_in_context(CTX) with autograd.predict_mode(): # loss for d err2real = d(data).mean() fake_img = g(nosise) err2fake = d(fake_img).mean() penalty = wasser_penalty(d, data, fake_img, 10, ctx=CTX) err4sucker = -(err2real - err2fake) + penalty d_val_loss += err4sucker.asscalar() # loss for g fake_img = g(nosise) err4fucker = -d(fake_img).mean() g_val_loss += err4fucker.asscalar() return g_val_loss / iter_times, d_val_loss / iter_times
def sample(self, num_samples, c, output_type='mol', sanitize=True, random=True): if len(c.shape) == 1: c = np.stack([c, ]*num_samples, axis=0) with autograd.predict_mode(): # step one finished = [False, ] * num_samples def get_init(): self.mdl.mode = 'decode_0' _c = nd.array(c, dtype='float32', ctx=self.ctx) init = self.mdl(_c).asnumpy() return init outputs = _decode_step(X=None, A=None, NX=None, NA=None, last_action=None, finished=finished, get_init=get_init, get_action=None, n_node_types=self.mdl.N_A, n_edge_types=self.mdl.N_B, random=random) X, A, NX, NA, last_action, finished = outputs count = 1 h = np.zeros([self.mdl.N_rnn, num_samples, self.mdl.F_c[-1]], dtype=np.float32) while not np.all(finished) and count < 100: def get_action(inputs): self.mdl.mode = 'decode_step' _h = nd.array(h[:, np.logical_not(finished), :], ctx=self.ctx, dtype='float32') _c = nd.array(c[np.logical_not(finished), :], ctx=self.ctx, dtype='float32') _X, _A_sparse, _NX, _NX_rep, _mask, _NX_cum = self.to_nd(inputs) _append, _connect, _end, _h = self.mdl(_X, _A_sparse, _NX, _NX_rep, _mask, _NX_cum, _h, _c, _NX_rep) h[:, np.logical_not(finished), :] = _h[0].asnumpy() return _append.asnumpy(), _connect.asnumpy(), _end.asnumpy() outputs = _decode_step(X, A, NX, NA, last_action, finished, get_init=None, get_action=get_action, n_node_types=self.mdl.N_A, n_edge_types=self.mdl.N_B, random=random) X, A, NX, NA, last_action, finished = outputs count += 1 graph_list = [] cumsum_X_ = np.cumsum(np.pad(NX, [[1, 0]], mode='constant')).tolist() cumsum_A_ = np.cumsum(np.pad(NA, [[1, 0]], mode='constant')).tolist() for cumsum_A_pre, cumsum_A_post, \ cumsum_X_pre, cumsum_X_post in zip(cumsum_A_[:-1], cumsum_A_[1:], cumsum_X_[:-1], cumsum_X_[1:]): graph_list.append([X[cumsum_X_pre:cumsum_X_post], A[cumsum_A_pre:cumsum_A_post, :]]) if output_type=='graph': return graph_list elif output_type == 'mol': return data.get_mol_from_graph_list(graph_list, sanitize) elif output_type == 'smiles': mol_list = data.get_mol_from_graph_list(graph_list, sanitize=True) smiles_list = [Chem.MolToSmiles(m) if m is not None else None for m in mol_list] return smiles_list else: raise ValueError('Unrecognized output type')
def train_generator(self): ''' Train generator. Returns: Tuple data. - generative loss. - discriminative posterior. ''' with autograd.record(): generated_arr, observed_arr, decoded_arr = self.generative_model.draw( ) re_encoded_arr = self.__re_encoder_model(decoded_arr) with autograd.predict_mode(): observed_posterior_arr = self.discriminative_model.inference( observed_arr) decoded_posterior_arr = self.discriminative_model.inference( decoded_arr) advarsarial_loss = self.__advarsarial_loss( observed_posterior_arr, decoded_posterior_arr) contextual_loss = self.__contextual_loss(observed_arr, decoded_arr) encoding_loss = self.__encoding_loss(generated_arr, re_encoded_arr) loss = advarsarial_loss + contextual_loss + encoding_loss loss.backward() self.generator_trainer.step(generated_arr.shape[0]) return loss.mean().asnumpy()[0], decoded_posterior_arr.mean().asnumpy( )[0]
def forward(self, inputs, begin_state=None): # pylint: disable=arguments-differ """Implement forward computation. Parameters ----------- inputs : NDArray input tensor with shape `(sequence_length, batch_size)` when `layout` is "TNC". begin_state : list initial recurrent state tensor with length equals to num_layers. the initial state with shape `(1, batch_size, num_hidden)` Returns -------- out: NDArray output tensor with shape `(sequence_length, batch_size, input_size)` when `layout` is "TNC". out_states: list output recurrent state tensor with length equals to num_layers. the state with shape `(1, batch_size, num_hidden)` """ encoded = self.embedding(inputs) if begin_state is None: begin_state = self.begin_state(batch_size=inputs.shape[1]) out_states = [] for i, (e, s) in enumerate(zip(self.encoder, begin_state)): encoded, state = e(encoded, s) out_states.append(state) if self._drop_h and i != len(self.encoder)-1: encoded = nd.Dropout(encoded, p=self._drop_h, axes=(0,)) if self._dropout: encoded = nd.Dropout(encoded, p=self._dropout, axes=(0,)) with autograd.predict_mode(): out = self.decoder(encoded) return out, out_states
def forward(self, inputs, begin_state=None): # pylint: disable=arguments-differ """Implement forward computation. Parameters ----------- inputs : NDArray input tensor with shape `(sequence_length, batch_size)` when `layout` is "TNC". begin_state : list initial recurrent state tensor with length equals to num_layers. the initial state with shape `(1, batch_size, num_hidden)` Returns -------- out: NDArray output tensor with shape `(sequence_length, batch_size, input_size)` when `layout` is "TNC". out_states: list output recurrent state tensor with length equals to num_layers. the state with shape `(1, batch_size, num_hidden)` """ encoded = self.embedding(inputs) if not begin_state: begin_state = self.begin_state(batch_size=inputs.shape[1]) out_states = [] for i, (e, s) in enumerate(zip(self.encoder, begin_state)): encoded, state = e(encoded, s) out_states.append(state) if self._drop_h and i != len(self.encoder) - 1: encoded = nd.Dropout(encoded, p=self._drop_h, axes=(0, )) if self._dropout: encoded = nd.Dropout(encoded, p=self._dropout, axes=(0, )) with autograd.predict_mode(): out = self.decoder(encoded) return out, out_states
def test(self, test_dataset): # Init params self.load(['model'], logger=None, allow_init=False) # Test # Train each batch batch_index = 0 while True: print("Saving for batch [%-5d/%-5d]..." % (batch_index, test_dataset.num_data / self.cfg.BATCH_SIZE)) # 1. Load data (batch_images, batch_labels, batch_paths), finish = test_dataset.get_batch_data_cls( batch_size=self.cfg.BATCH_SIZE, batch_index=batch_index, mode='test') x_list, y = utils.io.split_and_load_gpu(self.cfg.CTX, [batch_images], batch_labels) # 2. Record calculation with autograd.predict_mode(): pred_y = self.forward(x_list) pred_y = utils.gen_op.convert_to_image(pred_y) # 3. Save to directory for py, (cat_dir_name, cur_obj) in zip(pred_y, batch_paths): # Make cat dir cat_dir = os.path.join(self.cfg.TEST_IMAGE_DATASET_FLOW_DIR, cat_dir_name) if not os.path.exists(cat_dir): os.makedirs(cat_dir) # Save imsave(os.path.join(cat_dir, cur_obj), py) # Move to next if finish: break else: batch_index += 1 # Finish print("Testing accomplished. ")
def demo(net, dataset, m): idx = np.arange(len(dataset)) np.random.shuffle(idx) for i in range(m): i = idx[i] img, lbl = dataset[i] img_, lbl_ = resizer(img, lbl) fig = plt.figure() fig.add_subplot(1, 3, 1) plt.imshow(img_) fig.add_subplot(1, 3, 2) plt.imshow(lbl_) img_, lbl_ = augment.voc_val(img, lbl) img_ = mx.nd.expand_dims(img_, 0) with ag.predict_mode(): pred = net(img_) pred = mx.nd.argmax(pred, 1).asnumpy().squeeze() pred = pred.astype(np.uint8) pred = Image.fromarray(pred) fig.add_subplot(1, 3, 3) plt.imshow(pred) plt.show()
def predict(task): logging.info('Training Finished. Starting Prediction.\n') f_out = open('submission/%s.csv' % (task), 'w') with open('data2/week-rank/Tests/question.csv', 'r') as f_in: lines = f_in.readlines() tokens = [l.rstrip().split(',') for l in lines] task_tokens = [t for t in tokens if t[1] == task] n = len(task_tokens) cnt = 0 for path, task, _ in task_tokens: img_path = os.path.join('data2/week-rank', path) with open(img_path, 'rb') as f: img = image.imdecode(f.read()) out_all = np.zeros([ task_list[task], ]) ###### Test Time augmentation (muti-scale test) ###### for scale in input_scale: data = transform_predict(img, scale) with ag.predict_mode(): out = net(data.as_in_context( mx.gpu(0))) # 随机crop十张图片,所以此处是10张图片的结果 out = nd.SoftmaxActivation(out).mean( axis=0) # 取softmax,然后对十个结果取平均 out_all += out.asnumpy() out = out_all / len(input_scale) pred_out = ';'.join(["%.8f" % (o) for o in out.tolist()]) line_out = ','.join([path, task, pred_out]) f_out.write(line_out + '\n') cnt += 1 #progressbar(cnt, n) f_out.close()
def validation(g, d, val_loader): g_val_loss = 0.0 d_val_loss = 0.0 iter_times = 0 for data, _ in tqdm.tqdm(val_loader, desc="Validating", leave=False, unit='batch', unit_scale=True, mininterval=1, maxinterval=5, dynamic_ncols=True): iter_times += 1 bs = len(data) nosise = make_noise(bs) data = data.as_in_context(CTX) with autograd.predict_mode(): # loss for d err2real = loss(d(data), true_label) fake_img = g(nosise) err2fake = loss(d(fake_img), fake_label) err4sucker = (err2real + err2fake) d_val_loss += (err4sucker.mean().asscalar() / 2) # loss for g fake_img = g(nosise) err4fucker = loss(d(fake_img), true_label) g_val_loss += err4fucker.mean().asscalar() return g_val_loss / iter_times, d_val_loss / iter_times
def batch_predict(self, predictor, img_batch): model = predictor.model.model try: model = model.module except Exception: pass with autograd.predict_mode(): outputs = model(img_batch.as_in_context(self.ctx)) output, _ = outputs predict = mxnet.nd.argmax(output, 1).asnumpy().clip(0, 1) return predict
def hybrid_forward(self, F, inputs, begin_state=None): # pylint: disable=arguments-differ """Implement the forward computation that the awd language model and cache model use. Parameters ----------- inputs : NDArray or Symbol input tensor with shape `(sequence_length, batch_size)` when `layout` is "TNC". begin_state : list initial recurrent state tensor with length equals to num_layers. the initial state with shape `(1, batch_size, num_hidden)` Returns -------- out: NDArray or Symbol output tensor with shape `(sequence_length, batch_size, input_size)` when `layout` is "TNC". out_states: list output recurrent state tensor with length equals to num_layers. the state with shape `(1, batch_size, num_hidden)` encoded_raw: list The list of outputs of the model's encoder with length equals to num_layers. the shape of every encoder's output `(sequence_length, batch_size, num_hidden)` encoded_dropped: list The list of outputs with dropout of the model's encoder with length equals to num_layers. The shape of every encoder's dropped output `(sequence_length, batch_size, num_hidden)` """ # XXX Temporary hack for hybridization as hybridblock does not support None inputs if isinstance(begin_state, list) and len(begin_state) == 0: begin_state = None encoded = self.embedding(inputs) if not begin_state: if F == nd: begin_state = self.begin_state(batch_size=inputs.shape[1]) else: begin_state = self.begin_state(batch_size=0, func=sym.zeros) out_states = [] encoded_raw = [] encoded_dropped = [] for i, (e, s) in enumerate(zip(self.encoder, begin_state)): encoded, state = e(encoded, s) encoded_raw.append(encoded) out_states.append(state) if self._drop_h and i != len(self.encoder) - 1: encoded = F.Dropout(encoded, p=self._drop_h, axes=(0,)) encoded_dropped.append(encoded) if self._dropout: encoded = F.Dropout(encoded, p=self._dropout, axes=(0,)) encoded_dropped.append(encoded) with autograd.predict_mode(): out = self.decoder(encoded) return out, out_states, encoded_raw, encoded_dropped
def predict(net, date): s_t_d_obj = create_dataset.short_time_dataset([create_dataset.train_data_url_txt, create_dataset.train_data_url_dig]) x1, x2, y = s_t_d_obj.get_ndate_data(date, 4) #print(x1.shape) #print(x2.shape) #print(y) with autograd.predict_mode(): y_hats = net(x1, x2) print(y_hats) y_class = nd.argmax(y_hats, axis = 1) print(y_class) print(u'预测{}的次日上证指数最低位置将会{}'.format(date,s_t_d_obj.y_class_to_value(y_class[0].asscalar())))
def sample(self, num_samples, output_type='mol', sanitize=True, random=True): with autograd.predict_mode(): # step one finished = [False, ] * num_samples def get_init(): self.mdl.mode = 'decode_0' init = self.mdl(self.ctx).asnumpy() init = np.stack([init, ] * num_samples, axis=0) return init outputs = _decode_step(X=None, A=None, NX=None, NA=None, last_action=None, finished=finished, get_init=get_init, get_action=None, n_node_types=self.mdl.N_A, n_edge_types=self.mdl.N_B, random=random) X, A, NX, NA, last_action, finished = outputs count = 1 while not np.all(finished) and count < 100: def get_action(inputs): self.mdl.mode = 'decode_step' _append, _connect, _end = self.mdl(*self.to_nd(inputs)) return _append.asnumpy(), _connect.asnumpy(), _end.asnumpy() outputs = _decode_step(X, A, NX, NA, last_action, finished, get_init=None, get_action=get_action, n_node_types=self.mdl.N_A, n_edge_types=self.mdl.N_B, random=random) X, A, NX, NA, last_action, finished = outputs count += 1 graph_list = [] cumsum_X_ = np.cumsum(np.pad(NX, [[1, 0]], mode='constant')).tolist() cumsum_A_ = np.cumsum(np.pad(NA, [[1, 0]], mode='constant')).tolist() for cumsum_A_pre, cumsum_A_post, \ cumsum_X_pre, cumsum_X_post in zip(cumsum_A_[:-1], cumsum_A_[1:], cumsum_X_[:-1], cumsum_X_[1:]): graph_list.append([X[cumsum_X_pre:cumsum_X_post], A[cumsum_A_pre:cumsum_A_post, :]]) if output_type=='graph': return graph_list elif output_type == 'mol': return data.get_mol_from_graph_list(graph_list, sanitize) elif output_type == 'smiles': mol_list = data.get_mol_from_graph_list(graph_list, sanitize=True) smiles_list = [Chem.MolToSmiles(m) if m is not None else None for m in mol_list] return smiles_list else: raise ValueError('Unrecognized output type')
def calculate_loss(model, data_iter, loss_obj, ctx=mx.cpu()): test_loss = [] for i, (x_data, y_data, z_data) in enumerate(data_iter): x_data = x_data.as_in_context(ctx).astype('float32') y_data = y_data.as_in_context(ctx).astype('float32') z_data = z_data.as_in_context(ctx).astype('float32') with autograd.predict_mode(): z_output = model(x_data, y_data) loss_te = loss_obj(z_output, z_data) curr_loss = nd.mean(loss_te).asscalar() test_loss.append(curr_loss) return np.mean(test_loss)
def eval_step(data_tr, data_te, data_type="valid"): running_loss, update_count = 0.0, 0 eval_idxlist = list(range(data_tr.shape[0])) eval_N = data_tr.shape[0] eval_steps = len(range(0, eval_N, args.batch_size)) n100_list, r20_list, r50_list = [], [], [] with trange(eval_steps) as t: for batch_idx, start_idx in zip(t, range(0, eval_N, args.batch_size)): t.set_description(data_type) end_idx = min(start_idx + args.batch_size, eval_N) X_tr = data_tr[eval_idxlist[start_idx:end_idx]] X_te = data_te[eval_idxlist[start_idx:end_idx]] X_tr_inp = nd.from_numpy(X_inp.toarray()).as_in_context(ctx) with autograd.predict_mode(): if model.__class__.__name__ == "MultiVAE": if args.total_anneal_steps > 0: anneal = min( args.anneal_cap, 1.0 * update_count / args.total_anneal_steps ) else: anneal = args.anneal_cap loss = model(X_tr_inp, anneal) elif model.__class__.__name__ == "MultiDAE": loss = models(X_tr_inp) running_loss += loss.item() avg_loss = running_loss / (batch_idx + 1) # Exclude examples from training set X_out = X_out.asnumpy() X_out[X_tr.nonzero()] = -np.inf n100 = NDCG_binary_at_k_batch(X_out, X_te, k=100) r20 = Recall_at_k_batch(X_out, X_te, k=20) r50 = Recall_at_k_batch(X_out, X_te, k=50) n100_list.append(n100) r20_list.append(r20) r50_list.append(r20) t.set_postfix(loss=avg_loss) n100_list = np.concatenate(n100_list) r20_list = np.concatenate(r20_list) r50_list = np.concatenate(r50_list) return avg_loss, np.mean(n100_list), np.mean(r20_list), np.mean(r50_list)
def predict(self, x): x = nd.expand_dims(x, axis=0) x = nd.transpose(x, axes=(0, 3, 1, 2)) #print(u'单个样本预测输入的形状:{}'.format(x.shape)) # 初始化及编译模型准备训练 self.init_model() self.model_compile() #self.setup_debug() # 进入预测 with autograd.predict_mode(): #print(u'训练模式:{}'.format(autograd.is_training())) output = self.model(x) return output
def test_net(net, test_data, ctx): """ Test data on net and get metric (RMSE as default). """ metric = mx.metric.RMSE() metric.reset() for i, (data, label) in enumerate(test_data): data = gluon.utils.split_and_load(data, ctx_list=ctx, even_split=False) label = gluon.utils.split_and_load(label, ctx_list=ctx, even_split=False) with autograd.predict_mode(): outputs = [net(x) for x in data] metric.update(label, outputs) return metric.get()
def learn(self, experiences, gamma): """Update value parameters using given batch of experience tuples. Params ====== experiences: tuple of (s, a, r, s', done, snake_id, turn_count, snake_health) tuples gamma (float): discount factor """ states, actions, rewards, next_states, dones, snake_id, turn_count, snake_health = experiences # Get max predicted Q values (for next states) from target model with autograd.predict_mode(): if self.qnetwork_target.take_additional_forward_arguments: Q_targets_next = self.qnetwork_target( next_states, snake_id, turn_count, snake_health).max(1).expand_dims(1) else: Q_targets_next = self.qnetwork_target(next_states).max( 1).expand_dims(1) # Compute Q targets for current states dones = dones.astype(np.float32) Q_targets = rewards[:, -1].expand_dims(1) + ( gamma * Q_targets_next * (1 - dones[:, -1].expand_dims(1))) # Get expected Q values from local model last_action = actions[:, -1].expand_dims(1) action_indices = nd.array(np.arange( 0, last_action.shape[0])).as_in_context(ctx) action_indices.attach_grad() last_actions = nd.concat(action_indices.expand_dims(1), last_action, dim=1) with autograd.record(): if self.qnetwork_local.take_additional_forward_arguments: predicted_actions = self.qnetwork_local( states, snake_id, turn_count, snake_health) else: predicted_actions = self.qnetwork_local(states) Q_expected = nd.gather_nd(predicted_actions, last_actions.T) # Compute loss loss = self.loss_function(Q_expected, Q_targets) # Minimize the loss loss.backward() self.trainer.step(Q_expected.shape[0]) # ------------------- update target network ------------------- # self.soft_update(self.tau)
def validate(net, val_data, ctx): metric = mx.metric.Accuracy() L = gluon.loss.SoftmaxCrossEntropyLoss() AP = 0. AP_cnt = 0 val_loss = 0 all_softmax_output = [] mAP_name = task + model_name + '.npy' for i, batch in enumerate(val_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) #data = transform_predict(data, scale) outputs = [net(X) for X in data] # 将图片输入,得到16X5维的结果 metric.update([label[0][:, 0]], outputs) with ag.predict_mode(): outputs = [net(X) for X in data] loss = [] for yhat, y in zip(outputs[0], label[0]): loss_1 = 0 if y[1] == 99: # only have y [4,0,0,0,0] loss_1 += L(yhat, y[0]) elif y[2] == 99: #have one m [4,1,0,0,0] loss_1 = 0.8 * L(yhat, y[0]) + 0.2 * L(yhat, y[1]) elif y[3] == 99: #have two m [4,1,3,0,0] loss_1 = 0.7 * L(yhat, y[0]) + 0.15 * L( yhat, y[1]) + 0.15 * L(yhat, y[2]) else: # have many m [4,1,3,2,0] loss_1 = 0.6 * L(yhat, y[0]) + 0.13 * L( yhat, y[1]) + 0.13 * L(yhat, y[2]) + 0.13 * L( yhat, y[3]) loss += [loss_1] val_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) # loss相加求和 #ap, cnt = calculate_ap(label, outputs) # softmax_output和label本身是list,但是softmax_output[0]和label[0]是NDarray格式,注意这个NDarray是mxnet的,不是numpy的NDarray #softmax_output = [nd.SoftmaxActivation(output) for output in outputs] # 取softmax,然后对十个结果取平均 #sm_out_label = zip(softmax_output[0].asnumpy(), label[0].asnumpy()) #all_softmax_output += sm_out_label #np.save(mAP_name, all_softmax_output) #this_AP = cal_mAP(mAP_name) # 得到当前的AP _, val_acc = metric.get() #return ((this_AP, val_acc, val_loss / len(val_data))) return ((val_acc, val_loss / len(val_data)))
def forward(inputs, begin_state=None): """Implement forward computation using awd language model. Parameters ---------- inputs : NDArray The training dataset. begin_state : list The initial hidden states. Returns ------- out: NDArray The output of the model. out_states: list The list of output states of the model's encoder. encoded_raw: list The list of outputs of the model's encoder. encoded_dropped: list The list of outputs with dropout of the model's encoder. """ encoded = model.embedding(inputs) if not begin_state: begin_state = model.begin_state(batch_size=inputs.shape[1]) out_states = [] encoded_raw = [] encoded_dropped = [] if args.weight_dropout > 0: for i, (e, s) in enumerate(zip(model.encoder, begin_state)): encoded, state = e(encoded, s) encoded_raw.append(encoded) out_states.append(state) if model._drop_h and i != len(model.encoder)-1: encoded = mx.nd.Dropout(encoded, p=model._drop_h, axes=(0,)) encoded_dropped.append(encoded) else: encoded, state = model.encoder(encoded, begin_state) encoded_raw.append(encoded) if model._dropout: encoded = mx.nd.Dropout(encoded, p=model._dropout, axes=(0,)) if args.weight_dropout > 0: encoded_dropped.append(encoded) with autograd.predict_mode(): out = model.decoder(encoded) else: out = model.decoder(encoded) if args.weight_dropout > 0: return out, out_states, encoded_raw, encoded_dropped else: return out, state, encoded_raw, encoded_dropped
def forward(inputs, begin_state=None): """Implement forward computation using awd language model. Parameters ---------- inputs : NDArray The training dataset. begin_state : list The initial hidden states. Returns ------- out: NDArray The output of the model. out_states: list The list of output states of the model's encoder. encoded_raw: list The list of outputs of the model's encoder. encoded_dropped: list The list of outputs with dropout of the model's encoder. """ encoded = model.embedding(inputs) if not begin_state: begin_state = model.begin_state(batch_size=inputs.shape[1]) out_states = [] encoded_raw = [] encoded_dropped = [] if args.weight_dropout > 0: for i, (e, s) in enumerate(zip(model.encoder, begin_state)): encoded, state = e(encoded, s) encoded_raw.append(encoded) out_states.append(state) if model._drop_h and i != len(model.encoder) - 1: encoded = mx.nd.Dropout(encoded, p=model._drop_h, axes=(0, )) encoded_dropped.append(encoded) else: encoded, state = model.encoder(encoded, begin_state) encoded_raw.append(encoded) if model._dropout: encoded = mx.nd.Dropout(encoded, p=model._dropout, axes=(0, )) if args.weight_dropout > 0: encoded_dropped.append(encoded) with autograd.predict_mode(): out = model.decoder(encoded) else: out = model.decoder(encoded) if args.weight_dropout > 0: return out, out_states, encoded_raw, encoded_dropped else: return out, state, encoded_raw, encoded_dropped
def train_seq2seq(epochs, log_interval, model, train_data, valid_data, trainer, loss, char_indices, indices_char, ctx=mx.cpu()): from mxnet import autograd, nd tot_train_loss = [] tot_va_loss = [] for e in range(epochs): train_loss = [] for i, (x_data, y_data, z_data) in enumerate(train_data): x_data = x_data.as_in_context(ctx).astype('float32') y_data = y_data.as_in_context(ctx).astype('float32') z_data = z_data.as_in_context(ctx).astype('float32') with autograd.record(): z_output = model(x_data, y_data) loss_ = loss(z_output, z_data) loss_.backward() trainer.step(x_data.shape[0]) curr_loss = nd.mean(loss_).asscalar() train_loss.append(curr_loss) if e % log_interval == 0: q, y = gen_n_test(10) for i in range(10): with autograd.predict_mode(): p = model.calculation(q[i], char_indices, indices_char).strip() iscorr = 1 if p == y[i] else 0 if iscorr == 1: print(colors.ok + '☑' + colors.close, end=' ') else: print(colors.fail + '☒' + colors.close, end=' ') print("{} = {}({}) 1/0 {}".format(q[i], p, y[i], str(iscorr))) #caculate test loss va_loss = calculate_loss(model, valid_data, loss_obj=loss, ctx=ctx) print("Epoch %s. Train Loss: %s, Test Loss : %s" % (e, np.mean(train_loss), va_loss)) tot_va_loss.append(va_loss) tot_train_loss.append(np.mean(train_loss)) return tot_train_loss, tot_va_loss
def validate(val_data, net, criterion, ctx): loss = 0.0 for data, label in val_data: data_list = gluon.utils.split_and_load(data, ctx) label_list = gluon.utils.split_and_load(label, ctx) with autograd.predict_mode(): outpus = [net(X) for X in data_list] losses = [criterion(X, y) for X, y in zip(outpus, label_list)] accuray = [nd.mean(X.argmax(axis=1)==y.astype('float32')).asscalar() for X, y in zip(outpus, label_list)] loss_list = [l.mean().asscalar() for l in losses] loss += sum(loss_list) / len(loss_list) return loss/len(val_data), sum(accuray)/len(accuray)
def validate(val_data, net, criterion, ctx): loss = 0.0 for data, label in val_data: data_list = gluon.utils.split_and_load(data, ctx) label_list = gluon.utils.split_and_load(label, ctx) with autograd.predict_mode(): outpus = [net(X) for X in data_list] losses = [criterion(X, y) for X, y in zip(outpus, label_list)] accuray = [(X.argmax(axis=1)==y.astype('float32')).mean.asscalar() for X, y in zip(outpus, label_list)] loss_list = [l.mean().asscalar() for l in losses] loss += sum(loss_list) / len(loss_list) return loss/len(val_data), sum(accuray)/len(accuray)
def validate(val_data, net, criterion, ctx): _loss = 0.0 for data, label in val_data: data_list = gluon.utils.split_and_load(data, ctx) label_list = gluon.utils.split_and_load(label, ctx) with autograd.predict_mode(): outputs = concat_and_load([net(X) for X in data_list]) labels = concat_and_load(label_list) loss = criterion(outputs, labels) accuray = nd.mean(outputs.argmax( axis=1) == labels.astype('float32')).asscalar() _loss += loss.mean().asscalar() return _loss / len(val_data), sum(accuray) / len(accuray)
def forward(self, inputs, begin_state=None): # pylint: disable=arguments-differ """Implement the forward computation that the awd language model and cache model use. Parameters ----------- inputs : NDArray input tensor with shape `(sequence_length, batch_size)` when `layout` is "TNC". begin_state : list initial recurrent state tensor with length equals to num_layers. the initial state with shape `(1, batch_size, num_hidden)` Returns -------- out: NDArray output tensor with shape `(sequence_length, batch_size, input_size)` when `layout` is "TNC". out_states: list output recurrent state tensor with length equals to num_layers. the state with shape `(1, batch_size, num_hidden)` encoded_raw: list The list of outputs of the model's encoder with length equals to num_layers. the shape of every encoder's output `(sequence_length, batch_size, num_hidden)` encoded_dropped: list The list of outputs with dropout of the model's encoder with length equals to num_layers. The shape of every encoder's dropped output `(sequence_length, batch_size, num_hidden)` """ encoded = self.embedding(inputs) if not begin_state: begin_state = self.begin_state(batch_size=inputs.shape[1]) out_states = [] encoded_raw = [] encoded_dropped = [] for i, (e, s) in enumerate(zip(self.encoder, begin_state)): encoded, state = e(encoded, s) encoded_raw.append(encoded) out_states.append(state) if self._drop_h and i != len(self.encoder) - 1: encoded = nd.Dropout(encoded, p=self._drop_h, axes=(0,)) encoded_dropped.append(encoded) if self._dropout: encoded = nd.Dropout(encoded, p=self._dropout, axes=(0,)) encoded_dropped.append(encoded) with autograd.predict_mode(): out = self.decoder(encoded) return out, out_states, encoded_raw, encoded_dropped
def eval_step(data_tr, data_te, data_type="valid"): running_loss = 0.0 eval_idxlist = list(range(data_tr.shape[0])) eval_N = data_tr.shape[0] eval_steps = len(range(0, eval_N, args.batch_size)) n100_list, r20_list, r50_list = [], [], [] with trange(eval_steps) as t: for batch_idx, start_idx in zip(t, range(0, eval_N, args.batch_size)): t.set_description(data_type) end_idx = min(start_idx + args.batch_size, eval_N) X_tr = data_tr[eval_idxlist[start_idx:end_idx]] X_te = data_te[eval_idxlist[start_idx:end_idx]] X_tr_inp = nd.array(X_tr.toarray()).as_in_context(ctx) with autograd.predict_mode(): if model.__class__.__name__ == "MultiVAE": X_out, mu, logvar = model(X_tr_inp) loss = vae_loss_fn(X_tr_inp, X_out, mu, logvar, train_step.anneal) elif model.__class__.__name__ == "MultiDAE": X_out = model(X_tr_inp) loss = -nd.mean(nd.sum(nd.log_softmax(X_out) * X_tr_inp, -1)) running_loss += loss.asscalar() avg_loss = running_loss / (batch_idx + 1) # Exclude examples from training set X_out = X_out.asnumpy() X_out[X_tr.nonzero()] = -np.inf n100 = NDCG_binary_at_k_batch(X_out, X_te, k=100) r20 = Recall_at_k_batch(X_out, X_te, k=20) r50 = Recall_at_k_batch(X_out, X_te, k=50) n100_list.append(n100) r20_list.append(r20) r50_list.append(r50) t.set_postfix(loss=avg_loss) n100_list = np.concatenate(n100_list) r20_list = np.concatenate(r20_list) r50_list = np.concatenate(r50_list) return avg_loss, np.mean(n100_list), np.mean(r20_list), np.mean(r50_list)
def train_ranking(net, train_iter, test_iter, loss, trainer, test_seq_iter, num_users, num_items, num_epochs, devices, evaluator, candidates, eval_step=1): timer, hit_rate, auc = d2l.Timer(), 0, 0 animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0, 1], legend=['test hit rate', 'test AUC']) for epoch in range(num_epochs): metric, l = d2l.Accumulator(3), 0. for i, values in enumerate(train_iter): # values: (batch_size, user_id, observered_item_id, unobserverd_item_id) input_data = [] for v in values: # values: (user_id, observered_item_id, unobserverd_item_id) input_data.append(gluon.utils.split_and_load(v, devices)) with autograd.record(): p_pos = [net(*t) for t in zip(*input_data[0:-1])] p_neg = [ net(*t) for t in zip(*input_data[0:-2], input_data[-1]) ] ls = [loss(p, n) for p, n in zip(p_pos, p_neg)] [l.backward(retain_graph=False) for l in ls] l += sum([l.asnumpy() for l in ls]).mean() / len(devices) trainer.step(values[0].shape[0]) metric.add(l, values[0].shape[0], values[0].size) timer.stop() with autograd.predict_mode(): if (epoch + 1) % eval_step == 0: hit_rate, auc = evaluator(net, test_iter, test_seq_iter, candidates, num_users, num_items, devices) animator.add(epoch + 1, (hit_rate, auc)) print(f'train loss {metric[0] / metric[1]:.3f}, ' f'test hit rate {float(hit_rate):.3f}, test AUC {float(auc):.3f}') print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec ' f'on {str(devices)}')
def update(self, data, batch_size, episode_num, discount_factor): with autograd.record(): observations = nd.zeros((batch_size, 1, 128, 128)) actions = nd.zeros(batch_size) rewards = nd.zeros_like(actions) next_obs = nd.zeros_like(observations) dones = nd.zeros_like(actions) for i in range(batch_size): observations[i] = data[i].obs actions[i] = data[i].action rewards[i] = data[i].reward next_obs[i] = data[i].next_obs dones[i] = data[i].done actions = actions.reshape((-1, 1)) rewards = rewards.reshape((-1, 1)) dones = dones.reshape((-1, 1)) print('observations:', observations.shape) print('actions:', actions.shape) print('rewards:', rewards.shape) print('next observations:', next_obs.shape) print('dones:', dones.shape) not_dones = nd.array(np.logical_not(dones).astype('int8')) with autograd.predict_mode(): next_max_action_values = nd.max(self.model(next_obs), 1) target = nd.array( rewards) + discount_factor * next_max_action_values * not_dones del next_max_action_values obs_values = self.model(observations) obs_actions_values = nd.zeros_like(actions) for i in range(len(obs_actions_values)): obs_actions_values[i] = obs_values[i][actions[i]] del obs_values loss = self.loss(obs_actions_values, target) loss.backward() self.trainer.step(batch_size, True) return loss