def test(args): test_x, test_y, n_items = load_test(args.max_len) args.n_items = n_items test_batches = len(test_x) // args.batch_size HR, NDCG, MRR = 0.0, 0.0, 0.0 test_loss = 0.0 model = RCNN(args) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print('Restore model from {} successfully!'.format( args.checkpoint_dir)) else: print('Restore model from {} failed!'.format(args.checkpoint_dir)) return for i in range(test_batches): x = test_x[i * args.batch_size:(i + 1) * args.batch_size] y = test_y[i * args.batch_size:(i + 1) * args.batch_size] fetches = [model.sum_loss, model.top_k_index, model.y_labels] feed_dict = {model.X: x, model.Y: y} loss, top_k_index, labels = sess.run(fetches, feed_dict) test_loss += loss hr, ndcg, mrr = cal_eval(top_k_index, labels) HR += hr NDCG += ndcg MRR += mrr print('loss:{:6f}\tHR@{}:{:.6f}\tNDCG@{}:{:.6f}\tMRR@{}:{:.6f}'.format( test_loss, args.top_k, HR, args.top_k, NDCG, args.top_k, MRR))
def main(args): model = RCNN(vocab_size=args.vocab_size, embedding_dim=args.embedding_dim, hidden_size=args.hidden_size, hidden_size_linear=args.hidden_size_linear, class_num=args.class_num, dropout=args.dropout).to(args.device) if args.n_gpu > 1: model = torch.nn.DataParallel(model, dim=0) train_texts, train_labels = read_file(args.train_file_path) word2idx = build_dictionary(train_texts, vocab_size=args.vocab_size) logger.info('Dictionary Finished!') full_dataset = CustomTextDataset(train_texts, train_labels, word2idx) num_train_data = len(full_dataset) - args.num_val_data train_dataset, val_dataset = random_split( full_dataset, [num_train_data, args.num_val_data]) train_dataloader = DataLoader(dataset=train_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) valid_dataloader = DataLoader(dataset=val_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) train(model, optimizer, train_dataloader, valid_dataloader, args) logger.info('******************** Train Finished ********************') # Test if args.test_set: test_texts, test_labels = read_file(args.test_file_path) test_dataset = CustomTextDataset(test_texts, test_labels, word2idx) test_dataloader = DataLoader(dataset=test_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) model.load_state_dict( torch.load(os.path.join(args.model_save_path, "best.pt"))) _, accuracy, precision, recall, f1, cm = evaluate( model, test_dataloader, args) logger.info('-' * 50) logger.info( f'|* TEST SET *| |ACC| {accuracy:>.4f} |PRECISION| {precision:>.4f} |RECALL| {recall:>.4f} |F1| {f1:>.4f}' ) logger.info('-' * 50) logger.info('---------------- CONFUSION MATRIX ----------------') for i in range(len(cm)): logger.info(cm[i]) logger.info('--------------------------------------------------')
def train(config): train_data = pickle.load(open(os.path.join(config.data_path, config.train_name), "rb")) dev_data = pickle.load(open(os.path.join(config.data_path, config.dev_name), "rb")) test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) vocabulary = pickle.load(open(os.path.join(config.data_path, config.vocabulary_name), "rb")) # load w2v data weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) if config.task_name == "lstm": text_model = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "lstm_maxpool": text_model = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "rnn": text_model = RNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "cnn": text_model = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels) elif config.task_name == "cnn_w2v": text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels, weight=weight) elif config.task_name == "rcnn": text_model = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) accuracy = AccuracyMetric(pred='output', target='target') trainer = Trainer(train_data=train_data, model=text_model, loss=CrossEntropyLoss(), batch_size=config.batch_size, check_code_level=0, metrics=accuracy, n_epochs=config.epoch, dev_data=dev_data, save_path=config.save_path, print_every=config.print_every, validate_every=config.validate_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop]) trainer.train() # test result tester = Tester(test_data, text_model, metrics=accuracy) tester.test()
def start_training(train_arguments, folder_index): rcnn = RCNN(train_arguments.pos_loss_method, train_arguments.loss_weight_lambda, train_arguments.prevent_overfitting_method).cuda() rcnn.train() # train mode could use dropout. npz_path = train_arguments.get_train_data_path(folder_index) npz = np.load(npz_path) print("\n\n\nload from: ", npz_path) train_arguments.train_sentences = npz['train_sentences'] train_arguments.train_sentence_info = npz['train_sentence_info'] train_arguments.train_roi = npz['train_roi'] train_arguments.train_cls = npz['train_cls'] if train_arguments.normalize: if train_arguments.dx_compute_method == "left_boundary": train_arguments.train_tbbox = npz["train_norm_lb_tbbox"] else: train_arguments.train_tbbox = npz["train_norm_tbbox"] else: train_arguments.train_tbbox = npz['train_tbbox'] train_arguments.train_sentences = t.Tensor(train_arguments.train_sentences) train_arguments.train_set = np.random.permutation(train_arguments.train_sentences.size(0)) # like shuffle if train_arguments.prevent_overfitting_method.lower() == "l2 regu": if train_arguments.partial_l2_penalty: optimizer = optim.Adam([ {"params": rcnn.conv1.parameters(), "weight_decay": 0}, {"params": rcnn.cls_fc1.parameters(), "weight_decay": train_arguments.l2_beta}, {"params": rcnn.cls_score.parameters(), "weight_decay": train_arguments.l2_beta}, {"params": rcnn.bbox_fc1.parameters(), "weight_decay": train_arguments.l2_beta}, {"params": rcnn.bbox.parameters(), "weight_decay": train_arguments.l2_beta} ], lr=train_arguments.learning_rate) else: optimizer = optim.Adam(rcnn.parameters(), lr=train_arguments.learning_rate, weight_decay=train_arguments.l2_beta) else: # dropout optimizer optimizer = optim.Adam(rcnn.parameters(), lr=train_arguments.learning_rate) rcnn.optimizer = optimizer for epoch_time in range(train_arguments.max_iter_epoch): print('===========================================') print('[Training Epoch {}]'.format(epoch_time + 1)) train_epoch(train_arguments, rcnn) if epoch_time >= train_arguments.start_save_epoch: save_directory = train_arguments.get_save_directory(folder_index) save_path = save_directory + "model_epoch" + str(epoch_time + 1) + ".pth" t.save(rcnn.state_dict(), save_path) print("Model save in ", save_path)
def main(args): model = RCNN(vocab_size=args.vocab_size, embedding_dim=args.embedding_dim, hidden_size=args.hidden_size, hidden_size_linear=args.hidden_size_linear, class_num=args.class_num, dropout=args.dropout) train_texts, train_labels = read_file(args.train_file_path) test_texts, test_labels = read_file(args.test_file_path) word2idx = build_dictionary(train_texts, vocab_size=args.vocab_size) logger.info('Dictionary Finished!') x_train, y_train = CustomTextDataset(train_texts, train_labels, word2idx) x_test, y_test = CustomTextDataset(test_texts, test_labels, word2idx) num_train_data = len(x_train) optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr) hist = train(model, optimizer, x_train, x_test, y_train, y_test, args) logger.info('******************** Train Finished ********************') tf.saved_model.save(model, "/tmp/module_no_signatures")
def train_initialization(domain, classifier_name, all_data, data_type): train_data, test_data, Final_test, Final_test_original, Final_test_gt, unique_vocab_dict, unique_vocab_list = all_data output_size = 2 batch_size = 32 pre_train = True embedding_tune = True if data_type == 'train': epoch_num = 10 if domain == 'captions' else 4 else: # 'dev' epoch_num = 3 # sample test embedding_length = 300 if domain != 'captions' else 50 hidden_size = 256 if domain != 'captions' else 32 learning_rate = collections.defaultdict(dict) learning_rate['amazon'] = {'LSTM': 0.001, 'LSTMAtten': 0.0002, 'RNN': 0.001, 'RCNN': 0.001, 'SelfAttention': 0.001, 'CNN': 0.001} learning_rate['yelp'] = {'LSTM': 0.002, 'LSTMAtten': 0.0002, 'RNN': 0.0001, 'RCNN': 0.001, 'SelfAttention': 0.0001, 'CNN': 0.001} learning_rate['captions'] = {'LSTM': 0.005, 'LSTMAtten': 0.005, 'RNN': 0.01, 'RCNN': 0.01, 'SelfAttention': 0.005, 'CNN': 0.001} TEXT, vocab_size, word_embeddings, train_iter, test_iter, Final_test_iter, Final_test_original_iter, Final_test_gt_iter = load_dataset(train_data, test_data, Final_test, Final_test_original, Final_test_gt, embedding_length, batch_size) if classifier_name == 'LSTM': model = LSTM(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) elif classifier_name == 'LSTMAtten': model = LSTM_AttentionModel(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) elif classifier_name == 'RNN': model = RNN(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) elif classifier_name == 'RCNN': model = RCNN(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) elif classifier_name == 'SelfAttention': model = SelfAttention(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) elif classifier_name == 'CNN': model = CNN(batch_size, output_size, 1, 32, [2,4,6], 1, 0, 0.6, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune) else: raise ValueError('Not a valid classifier_name!!!') loss_fn = F.cross_entropy optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate[domain][classifier_name]) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 2, gamma=0.1) return train_iter, test_iter, Final_test_iter, Final_test_original_iter, Final_test_gt_iter, epoch_num, model, loss_fn, optimizer, scheduler
def main(args): acc_list = [] f1_score_list = [] prec_list = [] recall_list = [] for i in range(10): setup_data() model = RCNN(vocab_size=args.vocab_size, embedding_dim=args.embedding_dim, hidden_size=args.hidden_size, hidden_size_linear=args.hidden_size_linear, class_num=args.class_num, dropout=args.dropout).to(args.device) if args.n_gpu > 1: model = torch.nn.DataParallel(model, dim=0) train_texts, train_labels = read_file(args.train_file_path) word2idx, embedding = build_dictionary(train_texts, args.vocab_size, args.lexical, args.syntactic, args.semantic) logger.info('Dictionary Finished!') full_dataset = CustomTextDataset(train_texts, train_labels, word2idx, args) num_train_data = len(full_dataset) - args.num_val_data train_dataset, val_dataset = random_split( full_dataset, [num_train_data, args.num_val_data]) train_dataloader = DataLoader(dataset=train_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) valid_dataloader = DataLoader(dataset=val_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) train(model, optimizer, train_dataloader, valid_dataloader, embedding, args) logger.info('******************** Train Finished ********************') # Test if args.test_set: test_texts, test_labels = read_file(args.test_file_path) test_dataset = CustomTextDataset(test_texts, test_labels, word2idx, args) test_dataloader = DataLoader( dataset=test_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) model.load_state_dict( torch.load(os.path.join(args.model_save_path, "best.pt"))) _, accuracy, precision, recall, f1, cm = evaluate( model, test_dataloader, embedding, args) logger.info('-' * 50) logger.info( f'|* TEST SET *| |ACC| {accuracy:>.4f} |PRECISION| {precision:>.4f} |RECALL| {recall:>.4f} |F1| {f1:>.4f}' ) logger.info('-' * 50) logger.info('---------------- CONFUSION MATRIX ----------------') for i in range(len(cm)): logger.info(cm[i]) logger.info('--------------------------------------------------') acc_list.append(accuracy / 100) prec_list.append(precision) recall_list.append(recall) f1_score_list.append(f1) avg_acc = sum(acc_list) / len(acc_list) avg_prec = sum(prec_list) / len(prec_list) avg_recall = sum(recall_list) / len(recall_list) avg_f1_score = sum(f1_score_list) / len(f1_score_list) logger.info('--------------------------------------------------') logger.info( f'|* TEST SET *| |Avg ACC| {avg_acc:>.4f} |Avg PRECISION| {avg_prec:>.4f} |Avg RECALL| {avg_recall:>.4f} |Avg F1| {avg_f1_score:>.4f}' ) logger.info('--------------------------------------------------') plot_df = pd.DataFrame({ 'x_values': range(10), 'avg_acc': acc_list, 'avg_prec': prec_list, 'avg_recall': recall_list, 'avg_f1_score': f1_score_list }) plt.plot('x_values', 'avg_acc', data=plot_df, marker='o', markerfacecolor='blue', markersize=12, color='skyblue', linewidth=4) plt.plot('x_values', 'avg_prec', data=plot_df, marker='', color='olive', linewidth=2) plt.plot('x_values', 'avg_recall', data=plot_df, marker='', color='olive', linewidth=2, linestyle='dashed') plt.plot('x_values', 'avg_f1_score', data=plot_df, marker='', color='olive', linewidth=2, linestyle='dashed') plt.legend() fname = 'lexical-semantic-syntactic.png' if args.lexical and args.semantic and args.syntactic \ else 'semantic-syntactic.png' if args.semantic and args.syntactic \ else 'lexical-semantic.png' if args.lexical and args.semantic \ else 'lexical-syntactic.png'if args.lexical and args.syntactic \ else 'lexical.png' if args.lexical \ else 'syntactic.png' if args.syntactic \ else 'semantic.png' if args.semantic \ else 'plain.png' if not (path.exists('./images')): mkdir('./images') plt.savefig(path.join('./images', fname))
iou_threshold=iou_threshold, max_samples=max_samples, verbose=verbose) # Save it for later. with open(imagesdata_pickle_path, 'wb') as fi: pickle.dump(imagesdata, fi) print('NB CLASSES : ' + str(imagesdata.get_num_classes())) # check the classes tf.debugging.set_log_device_placement(True) strategy = tf.distribute.MirroredStrategy() # run on multiple GPUs with strategy.scope(): arch = RCNN(imagesdata, loss=loss, opt=opt, lr=lr, verbose=verbose) arch.train(epochs=epochs, batch_size=batch_size, split_size=split_size, checkpoint_path=checkpoint_path, early_stopping=early_stopping, verbose=verbose) arch.model.save(filepath='../data/out_test/test2/model.h5', save_format='h5') loss = arch.history()['loss'] val_loss = arch.history()['val_loss'] accuracy = arch.history()['accuracy'] val_accuracy = arch.history()['val_accuracy']
model.train() return score if __name__ == "__main__": random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) train_data = pickle.load(open(os.path.join(data_path, train_name), "rb")) dev_data = pickle.load(open(os.path.join(data_path, dev_name), "rb")) vocabulary = pickle.load(open(os.path.join(data_path, vocabulary_name), "rb")) print('dataset', len(train_data), len(dev_data)) # load w2v data weight = pickle.load(open(os.path.join(data_path, weight_name), "rb")) # model train_device = torch.device(device if torch.cuda.is_available() else "cpu") model = RCNN(vocab_size=len(vocabulary), embed_dim=embed_dim, output_dim=class_num, hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout, weight=weight) model.to(train_device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # train writer = SummaryWriter(log_dir=log_path) train() writer.close()
def train(config, task_name): train_data = pickle.load( open(os.path.join(config.data_path, config.train_name), "rb")) # debug if config.debug: train_data = train_data[0:30] dev_data = pickle.load( open(os.path.join(config.data_path, config.dev_name), "rb")) # test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) vocabulary = pickle.load( open(os.path.join(config.data_path, config.vocabulary_name), "rb")) # load w2v data # weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) if task_name == "lstm": text_model = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif task_name == "lstm_maxpool": text_model = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif task_name == "cnn": text_model = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels) elif task_name == "rnn": text_model = RNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) # elif task_name == "cnn_w2v": # text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim, # class_num=config.class_num, kernel_num=config.kernel_num, # kernel_sizes=config.kernel_sizes, dropout=config.dropout, # static=config.static, in_channels=config.in_channels, # weight=weight) elif task_name == "rcnn": text_model = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) #elif task_name == "bert": # text_model = BertModel.from_pretrained(config.bert_path) optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) logs = FitlogCallback(dev_data) f1 = F1_score(pred='output', target='target') trainer = Trainer(train_data=train_data, model=text_model, loss=BCEWithLogitsLoss(), batch_size=config.batch_size, check_code_level=-1, metrics=f1, metric_key='f1', n_epochs=config.epoch, dev_data=dev_data, save_path=config.save_path, print_every=config.print_every, validate_every=config.validate_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop, logs]) trainer.train() # test result tester = Tester( dev_data, text_model, metrics=f1, device=config.device, batch_size=config.batch_size, ) tester.test()
y_va_age = to_categorical(y_va_age) x_train_current = x_train_age x_train_left = np.hstack([np.expand_dims(x_train_age[:, 0], axis=1), x_train_age[:, 0:-1]]) x_train_right = np.hstack([x_train_age[:, 1:], np.expand_dims(x_train_age[:, -1], axis=1)]) print('x_train_current 维度:', x_train_current.shape) print('x_train_left 维度:', x_train_left.shape) print('x_train_right 维度:', x_train_right.shape) x_val_current = x_va_age x_val_left = np.hstack([np.expand_dims(x_va_age[:, 0], axis=1), x_va_age[:, 0:-1]]) x_val_right = np.hstack([x_va_age[:, 1:], np.expand_dims(x_va_age[:, -1], axis=1)]) print('开始RCNN建模......') max_features = len(word2index) + 1 # 词表的大小 model = RCNN(maxlen, max_features, embedding_dims, 7, 'softmax').get_model() # 指定optimizer、loss、评估标准 model.compile('adam', 'categorical_crossentropy', metrics=['accuracy']) print('训练...') my_callbacks = [ ModelCheckpoint(model_path + 'rcnn_model_age.h5', verbose=1), EarlyStopping(monitor='val_accuracy', patience=2, mode='max') ] # fit拟合数据 history = model.fit([x_train_current, x_train_left, x_train_right], y_train_age, batch_size=batch_size, epochs=epochs, callbacks=my_callbacks, validation_data=([x_val_current, x_val_left, x_val_right], y_va_age))
def train(x_train, y_train, x_test, y_test,params, train_summary_dir_path,test_summary_dir_path, checkpoint_dir_path,checkpoint_prefix_path): # Training # ================================================== with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): # Define Training procedure if params['model']=='TextCNN': model = TextCNN( sequence_length=params['sequence_length'], num_classes=params['num_classes'], vocab_size=params['vocab_size'], embedding_size=params['embedding_size'], filter_sizes=list(map(int, params['filter_sizes'].split(","))), num_filters=params['num_filters'], l2_reg_lambda=params['l2_reg_lambda']) elif params['model']=='BiLSTM': print('train BiLSTM model') model = BiLSTM( sequence_length=params['sequence_length'], num_classes=params['num_classes'], vocab_size=params['vocab_size'], embedding_size=params['embedding_size'], hidden_size=params['hidden_size'], batch_size=params['batch_size'], l2_reg_lambda=params['l2_reg_lambda']) elif params['model']=='Fasttext': model = Fasttext(sequence_length=params['sequence_length'], num_classes=params['num_classes'], vocab_size=params['vocab_size'], embedding_size=params['embedding_size'], l2_reg_lambda=params['l2_reg_lambda']) elif params['model']=='RCNN': model = RCNN(sequence_length=params['sequence_length'], num_classes=params['num_classes'], vocab_size=params['vocab_size'], embedding_size=params['embedding_size'], hidden_size=params['hidden_size'], output_size=params['output_size'], l2_reg_lambda=params['l2_reg_lambda']) global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # 每迭代一个batch,global_step+1 # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries out_dir = out_dir_path print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", model.loss) acc_summary = tf.summary.scalar("accuracy", model.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = train_summary_dir_path if not os.path.exists(train_summary_dir): os.makedirs(train_summary_dir) train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Test summaries test_summary_op = tf.summary.merge([loss_summary, acc_summary]) test_summary_dir = test_summary_dir_path if not os.path.exists(test_summary_dir): os.makedirs(test_summary_dir) test_summary_writer = tf.summary.FileWriter(test_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = checkpoint_dir_path checkpoint_prefix = checkpoint_prefix_path if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=params['num_checkpoints']) # Write vocabulary # vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { model.input_x: x_batch, model.input_y: y_batch, model.dropout_keep_prob: params['dropout_keep_prob'] } _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, model.loss, model.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() res['step'].append(step) res['loss'].append(loss) res['acc'].append(accuracy) print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def test_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { model.input_x: x_batch, model.input_y: y_batch, model.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, test_summary_op, model.loss, model.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = batch_iter( list(zip(x_train, y_train)), params['batch_size'], params['num_epochs']) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) # if current_step % params['evaluate_every'] == 0: # print("\nEvaluation:") # test_step(x_test, y_test, writer=test_summary_writer) # print("") if current_step % params['checkpoint_every'] == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) if current_step==10000: return
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from model import RCNN from dataloader_mnist import dataloader,batch_size,test_dataset_len,train_dataset_len n_classes = 10 net = RCNN(n_classes=n_classes) learning_rate = 1e-3 epoch = 30 criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9) optimizer = optim.Adam(net.parameters(), lr=learning_rate) # scheduler = optim.lr_scheduler.ReduceLROnPlateau( # optimizer, 'min' , # factor=0.1 , # patience=(train_dataset_len/batch_size)*3, # verbose=True) use_gpu = torch.cuda.is_available() if use_gpu: net = net.cuda()
def train_RCNN(args): train_x, train_y, n_items = load_train(args.max_len) args.n_items = n_items data = list(zip(train_x, train_y)) random.shuffle(data) train_x, train_y = zip(*data) num_batches = len(train_x) // args.batch_size global valid_x global valid_y valid_x, valid_y, _ = load_valid(args.max_len) print('#Items: {}'.format(n_items)) print('#Training Nums: {}'.format(len(train_x))) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: model = RCNN(args) if args.is_store: saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print('Restore model from {} successfully!'.format( ckpt.model_checkpoint_path)) else: print('Restore model from {} failed!'.format( args.checkpoint_dir)) return else: sess.run(tf.global_variables_initializer()) best_epoch = -1 best_step = -1 best_loss = np.inf best_HR = np.inf max_stay, stay_cnt = 20, 0 losses = 0.0 for epoch in range(args.epochs): for i in range(num_batches): x = train_x[i * args.batch_size:(i + 1) * args.batch_size] y = train_y[i * args.batch_size:(i + 1) * args.batch_size] fetches = [ model.sum_loss, model.global_step, model.lr, model.train_op ] feed_dict = {model.X: x, model.Y: y} loss, step, lr, _ = sess.run(fetches, feed_dict) losses += loss if step % 50 == 0: print('Epoch-{}\tstep-{}\tlr:{:.6f}\tloss: {:.6f}'.format( epoch + 1, step, lr, losses / 50)) losses = 0.0 if step % 1000 == 0: valid_loss, HR, NDCG, MRR = eval_validation( model, sess, args.batch_size) print( 'step-{}\teval_validation\tloss:{:6f}\tHR@{}:{:.6f}\tNDCG@{}:{:.6f}\tMRR@{}:{:.6f}' .format(step, valid_loss, args.top_k, HR, args.top_k, NDCG, args.top_k, MRR)) if HR > best_HR or (valid_loss < best_loss and HR > 0.0): best_HR = HR best_loss = valid_loss best_epoch = epoch + 1 best_step = step stay_cnt = 0 ckpt_path = args.checkpoint_dir + 'model.ckpt' model.saver.save(sess, ckpt_path, global_step=step) print("model saved to {}".format(ckpt_path)) else: stay_cnt += 1 if stay_cnt >= max_stay: break if stay_cnt >= max_stay: break print("best model at:epoch-{}\tstep-{}\tloss:{:.6f}\tHR@{}:{:.6f}". format(best_epoch, best_step, best_loss, args.top_k, best_HR))
def main(): # print('\nRunnig fold: ' + sys.argv[1]) kfold_ = int(sys.argv[1]) # only train for one cross-validation fold at a time (this way we can train all folds in parallel) print(type(kfold_)) # load data tr_fact = 1 # 1 is 100% data for training out_dir = './results/rcnn_merge_time_coch_cval10_brain_hfb/' + \ 'n_back_6_cnnT_300h_cnnF_100h_rnn_300h_alt_alt2_concattest_train' + str(int(tr_fact * 100)) + '/' x1_file = './data/M3_audio_mono_down.wav' x2_file = './data/minoes_wav_freq_125Hz_abs.npy' t_file = './data/minoes_hfb_6subjs.npy' xtr1 = librosa.load(x1_file, sr=8000)[0] xtr2 = np.load(x2_file).astype(np.float32) ttr = np.load(t_file).astype(np.float32) print('Train data: ' + str(int(tr_fact * 100)) + '%') # resample brain and spectrogram data to 50 Hz xtr2 = resample(xtr2, sr1=50, sr2=125) ttr = resample(ttr, sr1=50, sr2=125) # take a sample in sec global sr1, sr2, sr3, n_back sr1 = 8000 sr2 = 50 sr3 = 50 nsec = ttr.shape[0] / float(sr2) nsamp = nsec * 1 n2 = int(nsamp * sr2) n3 = int(nsamp * sr3) xtr2 = xtr2[:n2] ttr = ttr[:n3] # cut raw audio to match brain data (ttr) length in sec n1 = int(nsamp * sr1) xtr1 = xtr1[:n1] xtr1 = xtr1[:, None] # set up cross-validation for performance accuracy: set-up the same way for all folds when folds are trained separately kfolds = 10 nparts = 7 # test set is not a continuous chunk but is a concatenation of nparts fragments for better performance ind1 = np.arange(xtr1.shape[0]) ind2 = np.arange(ttr.shape[0]) ind3 = np.arange(ttr.shape[0]) TestI_, TestI = [], [] kf = KFold(n_splits=kfolds * nparts) for (_, ix1_test), (_, ix2_test), (_, it_test) in zip(kf.split(xtr1), kf.split(xtr2), kf.split(ttr)): TestI_.append([ix1_test, ix2_test, it_test]) for kfold in range(kfolds): TestI.append([np.array( [item for sublist in [TestI_[i][j] for i in range(0 + kfold, kfolds * nparts + kfold, kfolds)] for item in sublist]) for j in range(len(TestI_[0]))]) if (out_dir is not None) & (not os.path.exists(out_dir)): os.makedirs(out_dir) process = psutil.Process(os.getpid()) print(process.memory_info().rss / 1024 / 1024 / 1024) # standard sklearn preprocessing of data scaler = Scaler() kfold = kfold_ ktrain, ktest, _ = scaler([xtr1[np.setdiff1d(ind1, TestI[kfold][0])], xtr2[np.setdiff1d(ind2, TestI[kfold][1])], ttr[np.setdiff1d(ind3, TestI[kfold][2])]], [xtr1[TestI[kfold][0]], xtr2[TestI[kfold][1]], ttr[TestI[kfold][2]]], None) nsec_tr = ktrain[-1].shape[0] / float(sr2) nsamp_tr = nsec_tr * tr_fact ktrain = map(lambda x, n: x.copy()[:n], ktrain, [int(nsamp_tr *i) for i in [sr1, sr2, sr3]]) print(map(len, ktrain)) print(map(len, ktest)) # model parameters dur = 1 # sec units batch_size = 16 n_back = 6 * dur # in dur units, temporal window of input data (how much data the model sees at once) nepochs = 30 n_out = ttr.shape[-1] alpha = 5e-04 h_cnn_t = 300 # number of hidden units on top layer of CNN time h_cnn_f = 100 # number of hidden units on top layer of CNN freq/spectra h_rnn = 300 # number of hidden units of RNN print('batch size: ' + str(batch_size) + ', nepochs: ' + str(nepochs) + ', lr: ' + str(alpha) + ', h_cnn_t: ' + str(h_cnn_t) + ', h_cnn_f: ' + str(h_cnn_f) + ', h_rnn: ' + str(h_rnn)) print('outdir: ' + out_dir) # set up model rcnn = RCNN(h_cnn_t, h_cnn_f, h_rnn, n_out) opt = chainer.optimizers.Adam(alpha) opt.setup(rcnn) with open(out_dir + 'fold' + str(kfold) + '_run.log', 'wb'): pass # running epoch and best performance are saved to txt file for bookkeeping with open(out_dir + 'fold' + str(kfold) + '_epoch.txt', 'wb'): pass # train loop best_acc = -1 for epoch in range(nepochs): print('Epoch ' + str(epoch)) with open(out_dir + 'fold' + str(kfold) + '_run.log', 'a') as fid0: fid0.write('epoch' + str(epoch) + '\n') rcnn.reset_state() x1, x2, t = roll_data(ktrain, [.14 * epoch * sr for sr in [sr1, sr2, sr3]]) x1, x2, t = prepare_input([x1, x2, t], [sr1, sr2, sr3], n_back) xbs1, xbs2, tbs = get_batches([x1, x2, t], batch_size) print(process.memory_info().rss / 1024 / 1024 / 1024) for ib, (xb1, xb2, tb) in enumerate(zip(xbs1, xbs2, tbs)): with chainer.using_config('train', True): y = rcnn([np.expand_dims(xb1, 1), np.expand_dims(xb2, 1)], n_back) loss = 0 for ni in range(y.shape[1]): loss += F.mean_squared_error(tb[:, ni, :], y[:, ni, :]) r = acc_pass(tb.reshape((-1, n_out)), y.data.reshape((-1, n_out))) print('\t\tbatch ' + str(ib) + ', train loss: ' + str(loss.data / tb.shape[1]) + ', max acc: ' + str(np.max(r))) rcnn.cleargrads() loss.backward() loss.unchain_backward() opt.update() xb1_, xb2_, tb_ = prepare_input(ktest, [sr1, sr2, sr3], n_back) rcnn.reset_state() with chainer.using_config('train', False): y_ = rcnn([np.expand_dims(xb1_, 1), np.expand_dims(xb2_, 1)], n_back) loss_ = 0 for ni in range(y_.shape[1]): loss_ += F.mean_squared_error(tb_[:, ni, :], y_[:, ni, :]) r = acc_pass(tb_.reshape((-1, n_out)), y_.data.reshape((-1, n_out))) print('\t\ttest loss: ' + str(np.round(loss_.data / tb_.shape[1], 3)) + ', max acc: ' + str( np.round(np.sort(r)[::-1][:10], 4))) run_acc = np.mean(np.sort(r)[::-1][:10]) if run_acc > best_acc: # only if performance of current model is superior, save it to file print('Current model is best: ' + str(np.round(run_acc, 4)) + ' > ' + str( np.round(best_acc, 4)) + ': saving update to disk') best_acc = run_acc.copy() serializers.save_npz(out_dir + '/model' + str(kfold) + '.npz', rcnn) with open(out_dir + 'fold' + str(kfold) + '_epoch.txt', 'a') as fid: fid.write(str(epoch) + '\n') fid.write(str(np.sort(r)[::-1][:10]) + '\n') np.save(out_dir + '/predictions_fold' + str(kfold), y_.data.reshape((-1, n_out))) np.save(out_dir + '/targets_fold' + str(kfold), tb_.reshape((-1, n_out)))
def load_model(test_arguments): rcnn = RCNN(test_arguments.pos_loss_method, test_arguments.loss_weight_lambda).cuda() rcnn.load_state_dict(t.load(test_arguments.model_path)) rcnn.eval() # dropout rate = 0 return rcnn
################################################################################ #command: main python main.py train/inference --weights=coco/last --image=link #tensorboard --logdir=log_dir ################################################################################ parser = argparse.ArgumentParser() parser.add_argument("command") parser.add_argument("--weights", required=True) parser.add_argument("--image", required=False) parser.add_argument("--video", required=False) args = parser.parse_args() config = Config() if args.command == "train": model = RCNN(mode="train", config=config) else: model = RCNN(mode="inference", config=config) #load resnet101 pretrained model isCoco = 0 if args.weights == "coco": weight_path = COCO_WEIGHTS isCoco = 1 print(weight_path) elif args.weights == "last": weight_path = model.find_last() print(weight_path) else: weight_path = args.weights
def load_models(config): # train_data = pickle.load(open(os.path.join(config.data_path, config.train_name), "rb")) # debug # if config.debug: # train_data = train_data[0:30] # dev_data = pickle.load(open(os.path.join(config.data_path, config.dev_name), "rb")) # test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) vocabulary = pickle.load( open(os.path.join(config.data_path, config.vocabulary_name), "rb")) # load w2v data # weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) cnn = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels) state_dict = torch.load( os.path.join(config.save_path, config.ensemble_models[0])).state_dict() cnn.load_state_dict(state_dict) lstm = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) state_dict = torch.load( os.path.join(config.save_path, config.ensemble_models[1])).state_dict() lstm.load_state_dict(state_dict) lstm_mxp = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) state_dict = torch.load( os.path.join(config.save_path, config.ensemble_models[2])).state_dict() lstm_mxp.load_state_dict(state_dict) rcnn = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) state_dict = torch.load( os.path.join(config.save_path, config.ensemble_models[3])).state_dict() rcnn.load_state_dict(state_dict) schemas = get_schemas(config.source_path) state_dict = torch.load( os.path.join(config.save_path, config.ensemble_models[4])).state_dict() bert = BertForMultiLabelSequenceClassification.from_pretrained( config.bert_folder, state_dict=state_dict, num_labels=len(schemas)) bert.load_state_dict(state_dict) return cnn, lstm, lstm_mxp, rcnn, bert