def main(): net = get_net() net_str = '%s'%net logging.info(net_str) if pretrained is not None and pretrained != "": net.load_params(pretrained,ctx=utils.try_gpu()) logging.info("load model:%s"%pretrained) trainIter, testIter = get_train_test() trainer,lossfunc = get_trainer(net) lr_steps = [6000,12000,18000,24000] utils.train(trainIter, testIter, net,lossfunc, trainer, utils.try_gpu(), 100000,lr_steps,print_batches=10, cpdir='models')
def main(): net = get_net() net_str = '%s'%net logging.info('ok') logging.info(net_str) if pretrained is not None: net.load_params(pretrained,ctx=utils.try_gpu()) trainIter, testIter = get_train_test() trainer,loss = get_trainer(net) utils.train(trainIter, testIter, net, loss, trainer, utils.try_gpu(), 1000,print_batches=100, cpdir='cp')
def main(): net = get_net() net_str = '%s'%net #logging.info('ok') logging.info(net_str) if pretrained is not None: net.load_params(pretrained,ctx=utils.try_gpu()) train_data, test_data = get_train_test() trainer,loss = get_trainer(net) utils.train(train_data, test_data, trainBatchSize,\ net, loss, trainer, utils.try_gpu(), 1000,\ 500,0.1,print_batches=100, chk_pts_dir=checkpoints)
def main(): num_outputs = 10 architecture = ((2, 64), (2, 128), (4, 256), (4, 512), (4, 512)) net = nn.Sequential() with net.name_scope(): net.add( vgg_stack(architecture), nn.Flatten(), nn.Dense(4096, activation="relu"), nn.Dropout(.5), nn.Dense(4096, activation="relu"), nn.Dropout(.5), nn.Dense(num_outputs)) train_data, test_data = utils.load_data_fashion_mnist( batch_size=64, resize=96) ctx = utils.try_gpu() net.initialize(ctx=ctx, init=init.Xavier()) loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.05}) utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=100)
def __init__(self, input_dim, hidden_dim, mask_dim, delta_dim, x_mean=None): super(GRUDCell, self).__init__() self.device = utils.try_gpu() self.hidden_dim = hidden_dim # Set empirical mean if first GRU-D layer. Subsequent stacked layers don't need the mean. if x_mean is not None: self.x_mean = x_mean self.first_layer = True else: self.first_layer = False self.R_lin = nn.Linear(input_dim + hidden_dim + mask_dim, hidden_dim) # RESET self.Z_lin = nn.Linear(input_dim + hidden_dim + mask_dim, hidden_dim) # UPDATE self.tilde_lin = nn.Linear(input_dim + hidden_dim + mask_dim, hidden_dim) # CANDIDATE STATE self.gamma_h_lin = nn.Linear( delta_dim, hidden_dim) # HIDDEN STATE DECAY PARAMETERS if self.first_layer: self.gamma_x_lin = nn.Linear(delta_dim, delta_dim) # INPUT DECAY PARAMETERS # XAVIER INIT FOR FASTER CONVERGENCE nn.init.xavier_normal_(self.R_lin.weight) nn.init.xavier_normal_(self.Z_lin.weight) nn.init.xavier_normal_(self.tilde_lin.weight)
def purge_round(): candidate_leaders_map = {} # {filename --> agent} # Load in all of the leaders for leader_checkpoint in os.listdir(LEADER_DIR): path = os.path.join(LEADER_DIR, leader_checkpoint) candidate_leader = try_gpu( DQNAgent(6, LinearSchedule(0.05, 0.05, 1), OBSERVATION_MODE, lr=LR, max_grad_norm=GRAD_CLIP_NORM, name=leader_checkpoint)) candidate_leader.load_state_dict( torch.load(path, map_location=lambda storage, loc: storage)) candidate_leaders_map[leader_checkpoint] = candidate_leader candidate_scores = [] # list[(filename, score)] filenames, candidate_leaders = zip(*candidate_leaders_map.items()) for i, (filename, candidate_leader) in enumerate(zip(filenames, candidate_leaders)): print "EVALUATING {}".format(candidate_leader.name) leaders = EnsembleDQNAgent(candidate_leaders[:i] + candidate_leaders[i + 1:]) candidate_scores.append((filename, evaluate(candidate_leader, leaders, EPISODES_EVALUATE_PURGE))) sorted_scores = sorted(candidate_scores, key=lambda x: x[1], reverse=True) print "SCORES: {}".format(sorted_scores) for filename, score in sorted_scores[NUM_LEADERS:]: print "PURGING ({}, {})".format(filename, score) leader_path = os.path.join(LEADER_DIR, filename) graveyard_path = os.path.join(GRAVEYARD_DIR, filename) os.rename(leader_path, graveyard_path)
def __init__(self, input_dim, output_dim, x_mean, aux_op_dims=[], op_act=None): """ Example classifier with 2 GRUD layers, {aux_op_dims} auxiliary target classifiers, and 1 primary target classifier """ super(StackedGRUDClassifier, self).__init__() self.device = utils.try_gpu() # Assign input and hidden dim self.hidden_dim = input_dim * 6 self.output_dim = output_dim self.x_mean = torch.tensor(x_mean, device=self.device, dtype=float) # Activation function self.op_act = op_act or nn.LeakyReLU() # GRU layers self.gru1 = GRUDCell(input_dim, self.hidden_dim, input_dim, input_dim, self.x_mean) self.gru2 = GRUDCell(self.hidden_dim, self.hidden_dim, input_dim, input_dim) # 4 FC Layers with dropout for each Aux output self.aux_fc_layers = nn.ModuleList() for aux in aux_op_dims: self.aux_fc_layers.append( nn.Sequential( nn.Linear(self.hidden_dim, self.hidden_dim // 3), nn.Dropout(0.3), self.op_act, nn.Linear(self.hidden_dim // 3, self.hidden_dim // 9), nn.Dropout(0.3), self.op_act, nn.Linear(self.hidden_dim // 9, self.hidden_dim // 27), nn.Dropout(0.3), self.op_act, nn.Linear(self.hidden_dim // 27, aux))) nn.init.xavier_normal_(self.aux_fc_layers[-1][0].weight, 0.1) nn.init.xavier_normal_(self.aux_fc_layers[-1][3].weight, 0.1) nn.init.xavier_normal_(self.aux_fc_layers[-1][6].weight, 0.1) nn.init.xavier_normal_(self.aux_fc_layers[-1][9].weight, 0.1) # 4 FC Layers with dropout for primary output self.fc_op = nn.Sequential( nn.Linear(self.hidden_dim, self.hidden_dim // 3), nn.Dropout(0.3), self.op_act, nn.Linear(self.hidden_dim // 3, self.hidden_dim // 9), nn.Dropout(0.3), self.op_act, nn.Linear(self.hidden_dim // 9, self.hidden_dim // 27), nn.Dropout(0.3), self.op_act, nn.Linear(self.hidden_dim // 27, output_dim)) nn.init.xavier_normal_(self.aux_fc_layers[-1][0].weight, 0.1) nn.init.xavier_normal_(self.aux_fc_layers[-1][3].weight, 0.1) nn.init.xavier_normal_(self.aux_fc_layers[-1][6].weight, 0.1) nn.init.xavier_normal_(self.aux_fc_layers[-1][9].weight, 0.1)
def main(): net = get_net() trainIter, testIter = get_train_test() trainer, loss = get_trainer(net) utils.train(trainIter, testIter, net, loss, trainer, utils.try_gpu(), 1000, print_batches=100)
def eval_model(model, test_iter, tgt_col, nb_classes): """ Return dict containing: - Log Loss - Accuracy - Precision, Recall, F1 - Cohen's Kappa - Matthew's Corr Coef - OvA AUC ROC - Binary Brier Loss (if multiclass, min and max label are considered) - PR Curve """ device = utils.try_gpu() test_loss = 0 accuracy = 0 loss_criterion = nn.CrossEntropyLoss() conf_matrix = torch.zeros(nb_classes, nb_classes, device=device) model.to(device) model.eval() # No dropout needed with torch.no_grad(): # require_grad = False for batch, (X,y_dict) in enumerate(test_iter): y = y_dict[tgt_col] h1 = model.init_hidden(test_iter.batch_size) h2 = model.init_hidden(test_iter.batch_size) yhat, h2 = model.predict(X.to(device).float(), h1, h2) _, labels = torch.max(yhat, 1) test_loss += loss_criterion(yhat.to(device),y.to(device)).item() accuracy += (labels.to(device).long()==y.to(device).long()).float().mean() for t,p in zip(y.view(-1), labels.view(-1)): conf_matrix[t.long(), p.long()] += 1 # y, yhat, [logits for all classes] preds = torch.cat((torch.unsqueeze(y.to(device).float(), 1), torch.unsqueeze(labels.float(), 1), torch.softmax(yhat,1).float()), 1).to('cpu') conf_matrix = conf_matrix.detach() accuracy = (accuracy/(batch+1)).item() exp_accuracy = sum(conf_matrix.sum(0)/conf_matrix.sum() * conf_matrix.sum(1)/conf_matrix.sum()).item() kappa = (accuracy-exp_accuracy)/(1-exp_accuracy) eval_scores = {'loss':test_loss/(batch+1), 'accuracy':accuracy, 'conf_matrix':conf_matrix.tolist(), 'kappa':kappa} eval_scores.update(clf_report(preds)) model.train() return eval_scores
def k_fold_cross_valid(k, epochs, verbose_epoch, X_train, y_train, learning_rate, weight_decay): global X_test_new global global_round global global_type global Y_test_new global global_predict assert k > 1 fold_size = X_train.shape[0] // k train_loss_sum = 0.0 test_loss_sum = 0.0 for test_i in range(k): #for test_i in [3,2,1,0,4]: X_val_test = X_train[test_i * fold_size: (test_i + 1) * fold_size, :] y_val_test = y_train[test_i * fold_size: (test_i + 1) * fold_size] val_train_defined = False print('round is {}'.format(test_i)) global_round = test_i for i in range(k): if i != test_i: X_cur_fold = X_train[i * fold_size: (i + 1) * fold_size, :] y_cur_fold = y_train[i * fold_size: (i + 1) * fold_size] if not val_train_defined: X_val_train = X_cur_fold y_val_train = y_cur_fold val_train_defined = True else: X_val_train = nd.concat(X_val_train, X_cur_fold, dim=0) y_val_train = nd.concat(y_val_train, y_cur_fold, dim=0) ctx = utils.try_gpu() net = ResNet(1) net.initialize(ctx=ctx, init=init.Xavier()) train_loss, test_loss = Train(net, X_val_train, y_val_train, X_val_test, y_val_test, epochs, verbose_epoch, learning_rate, weight_decay) train_loss_sum += train_loss print("Round is: {}, Type is: {}, Final train loss is: {}, Test loss is: {}, target is: {}".format(global_round, global_type, train_loss, test_loss, Y_test_new)) preds = net(X_test_new).asnumpy().reshape(1, -1)[0] #global_predict.append(preds) print(preds)
def main(config): if os.path.isfile(config['data_loader']['args']['dataset']['alphabet']): config['data_loader']['args']['dataset']['alphabet'] = str( np.load(config['data_loader']['args']['dataset']['alphabet'])) prediction_type = config['arch']['args']['prediction']['type'] num_class = len(config['data_loader']['args']['dataset']['alphabet']) # loss 设置 if prediction_type == 'CTC': criterion = CTCLoss() else: raise NotImplementedError ctx = try_gpu(config['trainer']['gpus']) model = get_model(num_class, config['arch']['args']) model.hybridize() model.initialize(ctx=ctx) img_w = config['data_loader']['args']['dataset']['img_w'] img_h = config['data_loader']['args']['dataset']['img_h'] train_loader, val_loader = get_dataloader( config['data_loader']['type'], config['data_loader']['args'], num_label=model.get_batch_max_length(img_h=img_h, img_w=img_w, ctx=ctx)) config['lr_scheduler']['args']['step'] *= len(train_loader) config['name'] = config['name'] + '_' + model.model_name trainer = Trainer(config=config, model=model, criterion=criterion, train_loader=train_loader, val_loader=val_loader, ctx=ctx) trainer.train()
def get_net(): mod = import_module('symbol.resnet') net = mod.get_symbol(classNum, utils.try_gpu()) print(net) return net
def main(): net = get_net() trainIter, testIter = get_train_test() trainer,loss = get_trainer(net) utils.train(trainIter, testIter, net, loss, trainer, utils.try_gpu(), 1000,print_batches=100)
def get_net(): mod = import_module('symbol.ninnet') net = mod.get_symbol(classNum,utils.try_gpu()) print(net) return net
def train_epoch(model, train_iter, tgt_col, aux_cols, criterion, optimizer, aux_alpha, tr_alpha,\ scheduler=None, print_every=10, plotter=None): def get_aux_loss(pred_aux, true_aux): aux_criterion = nn.BCEWithLogitsLoss() # MultiLabel combined_aux_loss = 0 for truth, pred in zip(true_aux, pred_aux) if len(truth.size())==1: truth = torch.unsqueeze(truth, 1) combined_aux_loss += aux_criterion(pred, truth) return combined_aux_loss device = utils.try_gpu() metrics = utils.Accumulator(5) #batch, loss, outputloss, trloss, auxloss batch_size = train_iter.batch_size denom = 1+aux_alpha+tr_alpha mtl_loss_weights = [1/denom, aux_alpha/denom, tr_alpha/denom] for batch, (X, y_dict) in enumerate(train_iter): X = X.to(device) y_dict = y_dict.to(device) # GET LABELS true_op = y_dict[tgt_col] # OP target tensor true_aux = [y_dict[ac] for ac in aux_cols] # List of Aux target tensors # GET HIDDENS h1 = model.init_hidden(batch_size) h2 = model.init_hidden(batch_size) # FORWARD PASS pred_op, pred_tr, pred_aux, h2 = model(X, h1, h2) # OP LOSS op_loss = criterion(pred_op, true_op) # Output Loss # TR LOSS | Reshape replicated targets and predictions for loss compute. No linear scaling. seq_len = X.size(2) true_tr = torch.unsqueeze(true_op, 1).repeat(1, seq_len).view(batch_size*seq_len) pred_tr = pred_tr.view(batch_size*seq_len, -1) # [batch_size*seq_len, C] tr_loss = criterion(pred_tr.to(device), true_tr) # AUX LOSS aux_loss = get_aux_loss(pred_aux, true_aux) # COMBINED LOSS loss = mtl_loss_weights[0]*op_loss + mtl_loss_weights[1]*aux_loss + mtl_loss_weights[2]*tr_loss # Weighted combination of OP, Aux, TR loss # BACKPROP optimizer.zero_grad() loss.backward() optimizer.step() # STORE METRICS metrics.add(1, loss.item(), op_loss.item(),tr_loss.item(),aux_loss.item()) if batch%print_every == 0: plotter.plot_grad_flow(model.named_parameters()) print(f"Minibatch:{batch} OPLoss:{metrics[2]/metrics[0]} TRLoss:{metrics[3]/metrics[0]} AuxLoss:{metrics[4]/metrics[0]} AggLoss:{metrics[1]/metrics[0]} Examples seen: {metrics[0]*batch_size}") return metrics
else: with open('../input/GD_EM.plk', "rb") as f: em = pickle.load(f) print('updating net...') em = array(em, ctx=mx.cpu()) kf_label = np.ones(train_label.shape) for i in range(train_label.shape[1]): kf_label[:, i] = 2**i kf_label = np.sum(kf_label, axis=1) kf = StratifiedKFold(n_splits=args.kfold, shuffle=True) for i, (inTr, inTe) in enumerate(kf.split(train_data, kf_label)): print('training fold: ', i) # ctx = [mx.gpu(0), mx.gpu(1)]# , mx.gpu(4), mx.gpu(5)] ctx = [utils.try_gpu(), utils.try_gpu()] net = net_define_eu() xtr = train_data[inTr] xte = train_data[inTe] ytr = train_label[inTr] yte = train_label[inTe] data_iter = NDArrayIter(data=xtr, label=ytr, batch_size=args.batch_size, shuffle=True) val_data_iter = NDArrayIter(data=xte, label=yte, batch_size=args.batch_size, shuffle=False)
def get_net(): mod = import_module('symbol.convnet') net = mod.get_symbol(classNum,utils.try_gpu()) return net
mean[0,:,:] = 0.4914 mean[1,:,:] = 0.4822 mean[2,:,:] = 0.4465 std = np.zeros(dataShape) std[0,:,:] = 0.2023 std[1,:,:] = 0.1994 std[2,:,:] = 0.2010 def test_transform(X,Y): out = X.astype(np.float32)/255.0 out = np.transpose(out,(2,0,1)) #pdb.set_trace() #return (mx.image.color_normalize(out,np.asarray([0.4914, 0.4822, 0.4465]), np.asarray([0.2023, 0.1994, 0.2010])),Y) return (mx.image.color_normalize(out.asnumpy(),mean,std),Y) ctx = utils.try_gpu() mod = import_module('symbol.resnet18') net = mod.get_symbol(classNum,ctx) net.load_params(pretrained,ctx=ctx) test_ds = mx.gluon.data.vision.ImageFolderDataset( os.path.join(inputroot, 'test'), flag=1, transform = test_transform) loader = mx.gluon.data.DataLoader test_data = loader( test_ds, testBatchSize, shuffle=False, last_batch='keep') preds = [] for data, label in test_data: output = net(data.as_in_context(ctx)) preds.extend(output.argmax(axis=1).astype(int).asnumpy()) hit = 0
def get_net(): mod = import_module('symbol.facenet') net = mod.get_symbol(outputNum,utils.try_gpu(),verbose=False) return net
def challenger_round(): challengers = [] leaders = [] leader_checkpoints = os.listdir(LEADER_DIR) # Need to share the same schedule with all challengers, so they all anneal # at same rate epsilon_schedule = LinearSchedule(EPS_START, EPS_END, TRAIN_FRAMES) for i in xrange(NUM_LEADERS): challenger = try_gpu( DQNAgent(6, epsilon_schedule, OBSERVATION_MODE, lr=LR, max_grad_norm=GRAD_CLIP_NORM)) if i < len(leader_checkpoints): leader = try_gpu( DQNAgent(6, LinearSchedule(0.1, 0.1, 500000), OBSERVATION_MODE)) leader_path = os.path.join(LEADER_DIR, leader_checkpoints[i]) print "LOADING CHECKPOINT: {}".format(leader_path) challenger.load_state_dict( torch.load(leader_path, map_location=lambda storage, loc: storage)) leader.load_state_dict( torch.load(leader_path, map_location=lambda storage, loc: storage)) else: leader = RandomAgent(6) print "INITIALIZING NEW CHALLENGER AND LEADER" challengers.append(challenger) leaders.append(leader) if CHALLENGER_DIR is not None: challengers = [] # Load in all of the leaders for checkpoint in os.listdir(CHALLENGER_DIR): path = os.path.join(CHALLENGER_DIR, checkpoint) print "LOADING FROM CHALLENGER_DIR: {}".format(path) challenger = try_gpu( DQNAgent(6, LinearSchedule(0.05, 0.05, 1), CHALLENGER_OBSERVATION_MODE, lr=LR, max_grad_norm=GRAD_CLIP_NORM, name=checkpoint)) challenger.load_state_dict( torch.load(path, map_location=lambda storage, loc: storage)) challengers.append(challenger) challenger = EnsembleDQNAgent(challengers) leader = EnsembleDQNAgent(leaders) if OPPONENT is not None or HUMAN: leader = NoOpAgent() replay_buffer = ReplayBuffer(1000000) rewards = collections.deque(maxlen=1000) frames = 0 # number of training frames seen episodes = 0 # number of training episodes that have been played with tqdm(total=TRAIN_FRAMES) as progress: # Each loop completes a single episode while frames < TRAIN_FRAMES: states = env.reset() challenger.reset() leader.reset() episode_reward = 0. episode_frames = 0 # Each loop completes a single step, duplicates _evaluate() to # update at the appropriate frame #s for _ in xrange(MAX_EPISODE_LENGTH): frames += 1 episode_frames += 1 action1 = challenger.act(states[0]) action2 = leader.act(states[1]) next_states, reward, done = env.step(action1, action2) episode_reward += reward # NOTE: state and next_state are LazyFrames and must be # converted to np.arrays replay_buffer.add( Experience(states[0], action1._action_index, reward, next_states[0], done)) states = next_states if len(replay_buffer) > 50000 and \ frames % 4 == 0: experiences = replay_buffer.sample(32) challenger.update_from_experiences(experiences) if frames % 10000 == 0: challenger.sync_target() if frames % SAVE_FREQ == 0: # TODO: Don't access internals for agent in challenger._agents: path = os.path.join(LEADER_DIR, agent.name + "-{}".format(frames)) print "SAVING CHECKPOINT TO: {}".format(path) torch.save(agent.state_dict(), path) #path = os.path.join( # LEADER_DIR, challenger.name + "-{}".format(frames)) #torch.save(challenger.state_dict(), path) if frames >= TRAIN_FRAMES: break if done: break if episodes % 300 == 0: print "Evaluation: {}".format( evaluate(challenger, leader, EPISODES_EVALUATE_TRAIN)) print "Episode reward: {}".format(episode_reward) episodes += 1 rewards.append(episode_reward) stats = challenger.stats stats["Avg Episode Reward"] = float(sum(rewards)) / len(rewards) stats["Num Episodes"] = episodes stats["Replay Buffer Size"] = len(replay_buffer) progress.set_postfix(stats, refresh=False) progress.update(episode_frames) episode_frames = 0
EMBED_SIZE = hp.EMBED_SIZE WORD_HIDDEN_SIZE = hp.WORD_HIDDEN_SIZE WORD_NLAYERS = hp.WORD_NLAYERS SENTENCE_HIDDEN_SIZE = hp.SENTENCE_HIDDEN_SIZE SENTENCE_NLAYERS = hp.SENTENCE_NLAYERS NDOC_DIMS = hp.NDOC_DIMS LR = hp.LR VOCAB_PATH = hp.VOCAB_PATH SENT_RNN_MODEL_PATH = hp.SENT_RNN_MODEL_PATH ENCODER_MODEL_PATH = hp.ENCODER_MODEL_PATH CTX = try_gpu() PORT = hp.PORT app = Flask(__name__) word_vocab = None encoder = None sent_rnn = None @app.route('/') def do_summarize(): source = '' src_type = '' json_result = {"status": "success"}
choices=None, help="filepath to image", metavar=None, ) args = parser.parse_args() im = Image.open(args.image[0]) torch.set_grad_enabled(False) im = torchvision.transforms.functional.to_tensor(im) H, W = im.shape[1:3] im = try_gpu(im) im = im.unsqueeze(0) # box filter im_box_filterd = box_filter(im, 5, 2) im_box_filterd = im_box_filterd.squeeze() im_box_filterd = torchvision.transforms.functional.to_pil_image( im_box_filterd) im_box_filterd.save("output-box-filtered.jpg") # shift filter im_shift_filterd = shift_filter(im, 5, 2)
import utils from tools.RNNLanguageManager import * from samples.lyrics.common import read_lyrics if __name__ == "__main__": filename = utils.get_file_name(__file__) + ".params" indices, index_to_char, char_to_index, _ = read_lyrics() parameter = RNNLanguageParameter( num_steps = 35, num_epochs=10, indices = indices, index_to_char = index_to_char, char_to_index = char_to_index, filename = filename, context=utils.try_gpu(), rnnType=rnn.RNN ) manager = RNNLanguageManager(parameter) manager.initialize() def per_epoch_finish_handler(param): print(param) index = random.randint(0, len(indices) - 2) prefix = [index_to_char[index] for index in indices[index: index + 2]] print(manager.predict(prefix, 50)) parameter.per_epoch_finish_handler = per_epoch_finish_handler manager.train()
encoder_blk = EncoderBlock(24, 48, 8, 0.5) logging.info(f'endcoer output shape: {encoder_blk(X, valid_length).shape}') logging.info('Transformer 编码器 ...') encoder = TransformerEncoder(200, 24, 48, 8, 2, 0.5) logging.info( f'encoder output shape: {encoder(torch.ones((2, 100)).long(), valid_length).shape}' ) if __name__ == '__main__': # test() embed_size, embedding_size, num_layers, dropout = 32, 32, 2, 0.05 batch_size, num_steps = 64, 10 lr, num_epochs, ctx = 0.005, 250, try_gpu() logging.info(f'using {ctx} ...') num_hiddens, num_heads = 64, 4 src_vocab, tgt_vocab, train_iter = load_data_nmt(batch_size, num_steps) encoder = TransformerEncoder(len(src_vocab), embedding_size, num_hiddens, num_heads, num_layers, dropout) decoder = TransformerDecoder(len(src_vocab), embedding_size, num_hiddens, num_heads, num_layers, dropout) model = EncoderDecoder(encoder, decoder) train_s2s_ch9(model, train_iter, lr, num_epochs, ctx) # 测试模型 model.eval() for sentence in ['Go .', 'Wow !', "I'm OK .", 'I won !']:
decoder = Seq2SeqAttentionDecoder(vocab_size=10, embed_size=8, num_hiddens=16, num_layers=2) X = torch.zeros((4, 7), dtype=torch.long) print("batch size=4\nseq_length=7\nhidden dim=16\nnum_layers=2\n") print('encoder output size:', encoder(X)[0].size()) print('encoder hidden size:', encoder(X)[1][0].size()) print('encoder memory size:', encoder(X)[1][1].size()) state = decoder.init_state(encoder(X), None) out, state = decoder(X, state) print(out.shape, len(state), state[0].shape, len(state[1]), state[1][0].shape) if __name__ == '__main__': # test() embed_size, num_hiddens, num_layers, dropout = 32, 32, 2, 0.0 batch_size, num_steps = 64, 10 lr, num_epochs, ctx = 0.005, 500, try_gpu() src_vocab, tgt_vocab, train_iter = load_data_nmt(batch_size, num_steps) encoder = Seq2SeqEncoder( len(src_vocab), embed_size, num_hiddens, num_layers, dropout) decoder = Seq2SeqAttentionDecoder( len(tgt_vocab), embed_size, num_hiddens, num_layers, dropout) model = EncoderDecoder(encoder, decoder) train_s2s_ch9(model, train_iter, lr, num_epochs, ctx) for sentence in ['Go .', 'Good Night !', "I'm OK .", 'I won !']: print(sentence + ' => ' + predict_s2s_ch9( model, sentence, src_vocab, tgt_vocab, num_steps, ctx))
from mxnet import nd, init from mxnet import gluon from mxnet.gluon import nn, rnn from utils import find_wordnet_rel, try_gpu import random # class KnowledgeEnrichedCoAttention(nn.Block): # def __init__(self, **kwargs): # super(KnowledgeEnrichedCoAttention, self).__init__(**kwargs) # self.kb = init_kb() # with self.name_scope(): # self.attention_h = nn.soft iszero = lambda x: sum(x != 0).asscalar() == 0 _ctx = try_gpu() def F(m, ctx=_ctx): """ 1 m: (batch_size, seq_len, seq_len, 5) """ out = nd.zeros(m.shape[:3], ctx=ctx) for ba in range(m.shape[0]): for i in range(m.shape[1]): for j in range(m.shape[2]): if not iszero(m[ba][i][j]): out[ba][i][j] = 1 return out
lr_step_epochs=None, # lr_step_epochs = '26,28,30,32,34', # lr_decay, the ratio to reduce lr on each step. e.g. lr_decay = 0.1. lr_decay=0.1, # chechpoint # load_epoch. Load trained model, load_epoch is the epoch of the model. e.g. load_epoch = 28. load_epoch= 0, # Load trained model. if load_epoch is 0, represent from random init to train. # model_prefix, the prefix of save checkp, e.g., SSD_300x300.params. model_prefix='model/SSD_300x300', ) args = parser.parse_args() # context ctx = utils.try_gpu() # network net = SSD(num_classes=args.num_classes, sizes=args.sizes, ratios=args.ratios) # There are 21 classes, the first class is background. label form 0 to 20. # In the succeeding process, num_classes will plus 1. # init weight and bias net.initialize(ctx=ctx, init=mx.initializer.Xavier(magnitude=2)) # initialize() define in mxnet/gulon/parameter.py. # Loss. Loss will defined in utils.py. # training and validating data. Use data.py to load data iter.
def train_model(train_iter, valid_iter, X_Mean, tgt_col, aux_cols, epochs, modelname, nb_classes, \ lr=0.001, aux_alpha=0, tr_alpha=0, class_weights=None, l2=None, model=None, print_every=100): """ Train a GRUD model :param train_iter: Train DataLoader :param valid_iter: Valid DataLoader :param X_Mean: Empirical Mean values for each dimension in the input (only important for variables with missing data) :param tgt_col: (str) Name of OP target :param aux_cols: list(str) of names of Aux targets. :param epochs: Int of epochs to run :param modelname: Unique name for this model :param nb_classes: Number of OP target classes :param aux_alpha: Weight for Aux Loss :param tr_alpha: Weight for TR Loss :param class_weights (optional): Weights to scale OP Loss (for skewed datasets) """ device = utils.try_gpu() # Set directory for model outputs try: os.makedirs(os.path.join('models',modelname)) os.makedirs(os.path.join('models',modelname, 'gradflow')) except FileExistsError: pass # Initialize plotter class for gradflow plotter = TrainPlot(modelname) # Initialize model and learners class_weights = class_weights or [1]*nb_classes l2 = l2 or 0 for X,y in train_iter: break input_dim = X.size(-1) aux_dim = [ (y[aux_c].size(-1) if len(y[aux_c].size())>1 else 1) for aux_c in aux_cols] # if-else for targets with single dimennsion. their size(-1) will be batchsize model = StackedGRUDClassifier(input_dim, nb_classes, X_Mean, aux_dim).to(device=device, dtype=torch.float) criterion = nn.CrossEntropyLoss(weight=torch.Tensor(class_weights).to(device=device)) optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 30, 0.85) # Store training metrics train_meta = {} train_meta['train_losses'] = [] train_meta['valid_losses'] = [] train_meta['min_valid_loss'] = sys.maxsize train_meta['epoch_results'] = [] for epoch in range(epochs): # TRAIN EPOCH t0 = time.time() metrics = train_epoch(model, train_iter, tgt_col, aux_cols, criterion, optimizer, aux_alpha, tr_alpha, scheduler, print_every=print_every, plotter=plotter) if epoch<200:scheduler.step() print(f"Epoch trained in {time.time()-t0}") # EVALUATE AGAINST VALIDATION SET t0 = time.time() eval_scores = eval_model(model, valid_iter, tgt_col, nb_classes) train_meta['epoch_results'].append(eval_scores) print(f"Evaluation done in {time.time()-t0}") t0 = time.time() # SAVE CHECKPOINT if eval_scores['loss'] < train_meta['min_valid_loss'] or epoch % 20 == 0: train_meta['min_valid_loss'] = min(eval_scores['loss'], train_meta['min_valid_loss']) checkpoint = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_ckp(checkpoint, True, os.path.join('models', modelname, 'checkpoint.pt'), os.path.join('models', modelname, 'best_model.pt')) print(f"Checkpoint created") # LOG PROGRESS print("\n\n================================================================================================================\n") print(f"Epoch: {epoch+1} TrainLoss: {metrics[1]/metrics[0]} ValidLoss: {eval_scores['loss']} ValidAcc:{eval_scores['accuracy']} WallTime: {datetime.datetime.now()}\n") print(eval_scores['conf_matrix']) print(pd.DataFrame.from_dict(eval_scores['clf_report'])) print(eval_scores['brier']) print(eval_scores['roc']) print("\n\n================================================================================================================\n") # SAVE TRAINING PROGRESS DATA train_meta['train_losses'].append(metrics[1]/metrics[0]) train_meta['valid_losses'].append(eval_scores['loss']) utils.pkl_dump(train_meta, os.path.join('models', modelname, 'trainmeta.dict')) print(f"post eval dumping took {time.time()-t0}") # PLOT LOSSES t0 = time.time() plt.figure(1) plt.plot(train_meta['train_losses']) plt.plot(train_meta['valid_losses']) plt.xlabel("Minibatch") plt.ylabel("Loss") plt.savefig(os.path.join('models', modelname, modelname+'_lossPlot.png'), bbox_inches='tight') print(f"plotting took {time.time()-t0}") return model