def load(args, checkpoint_dir): state_dict = torch.load(os.path.join(checkpoint_dir, 'checkpoint.pth')) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): if 'module' in k: namekey = k[7:] # remove `module.` else: namekey = k new_state_dict[namekey] = v if args.model_type == 'bert': config = BertConfig.from_json_file(os.path.join(checkpoint_dir, 'config.bin')) model = BertForSequenceClassification(config) model.load_state_dict(new_state_dict) elif args.model_type == 'cnn': model = CNNModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels, num_filters=args.num_filters, filter_sizes=args.filter_sizes, device=args.device) model.load_state_dict(new_state_dict) elif args.model_type == 'lstm': model = LSTMModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels, hidden_size=args.hidden_size, device=args.device) model.load_state_dict(new_state_dict) elif args.model_type == 'char-cnn': model = CharCNN(num_features=args.num_features, num_classes=args.num_labels) model.load_state_dict(new_state_dict) else: raise ValueError('model type is not found!') return model.to(args.device)
def main(): # parse arguments args = parser.parse_args() # load training data print("\nLoading training data...") train_dataset = AGNEWs(label_data_path=args.train_path, alphabet_path=args.alphabet_path) print("Transferring training data into iterator...") train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) # feature length args.num_features = len(train_dataset.alphabet) # load developing data print("\nLoading developing data...") dev_dataset = AGNEWs(label_data_path=args.val_path, alphabet_path=args.alphabet_path) print("Transferring developing data into iterator...") dev_loader = DataLoader(dev_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True) class_weight, num_class_train = train_dataset.get_class_weight() _, num_class_dev = dev_dataset.get_class_weight() # when you have an unbalanced training set if args.class_weight!=None: args.class_weight = torch.FloatTensor(class_weight).sqrt_() if args.cuda: args.class_weight = args.class_weight.cuda() print('\nNumber of training samples: '+str(train_dataset.__len__())) for i, c in enumerate(num_class_train): print("\tLabel {:d}:".format(i).ljust(15)+"{:d}".format(c).rjust(8)) print('\nNumber of developing samples: '+str(dev_dataset.__len__())) for i, c in enumerate(num_class_dev): print("\tLabel {:d}:".format(i).ljust(15)+"{:d}".format(c).rjust(8)) # make save folder try: os.makedirs(args.save_folder) except OSError as e: if e.errno == errno.EEXIST: print('Directory already exists.') else: raise # args.save_folder = os.path.join(args.save_folder, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) # configuration print("\nConfiguration:") for attr, value in sorted(args.__dict__.items()): print("\t{}:".format(attr.capitalize().replace('_', ' ')).ljust(25)+"{}".format(value)) # log result if args.log_result: with open(os.path.join(args.save_folder,'result.csv'), 'w') as r: r.write('{:s},{:s},{:s},{:s},{:s}'.format('epoch', 'batch', 'loss', 'acc', 'lr')) # model model = CharCNN(args) print(model) # train train(train_loader, dev_loader, model, args)
target[row][i].data[0]) + "_" print("Input:{}, Predicted:{} , Target:{}".format( input_word_list[row], predicted_word, target_word)) return totalLoss / numLines #number of input char types char_vocab = len(string.printable) # number of output classes = vocab size numOutputClass = len(labelCorpus.dictionary) print("Number of Classes:" + str(numOutputClass)) # Initialize models and start training encoder = CharCNN(char_vocab, args.hidden_size) decoder = DecoderRNN(args.hidden_size, numOutputClass) encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=args.learning_rate) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) criterion = nn.CrossEntropyLoss() if args.cuda: criterion.cuda() encoder.cuda() decoder.cuda() start = time.time()
# print(dev_set.data) # DEBUG # print("numberized test set:") # print(test_set.data) # DEBUG print('#token: {}'.format(len(token_vocab))) print('#char: {}'.format(len(char_vocab))) print('#label: {}'.format(len(label_vocab))) # Embedding file word_embed = load_embedding(args.word_embed, dimension=args.word_embed_dim, vocab=token_vocab) charcnn_filters = [[int(f.split(',')[0]), int(f.split(',')[1])] for f in args.charcnn_filters.split(';')] char_embed = CharCNN(len(char_vocab), args.char_embed_dim, filters=charcnn_filters) char_hw = Highway(char_embed.output_size, layer_num=args.charhw_layer, activation=args.charhw_func) feat_dim = word_embed.embedding_dim + char_embed.output_size lstm = LSTM(feat_dim, args.lstm_hidden_size, batch_first=True, bidirectional=True, forget_bias=args.lstm_forget_bias) crf = CRF(label_size=len(label_vocab) + 2) linear = Linears(in_features=lstm.output_size, out_features=len(label_vocab), hiddens=[lstm.output_size // 2]) lstm_crf = LstmCrf(token_vocab,
parser.add_argument('--lower', default=True) args = parser.parse_args() dataloader = load_datasets(args) best_error = 1000 early_stop = 0 if not os.path.exists(args.savedir): os.makedirs(args.savedir) model_name = args.savedir + '/' + 'best.pt' train_begin = time.time() print('train begin', '-' * 50) print() print() model = CharCNN(70, args.dropout) criterion = nn.CrossEntropyLoss() if args.optimizer == 'SGD': optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) scheduler = StepLR(optimizer, step_size=3, gamma=0.5) elif args.optimizer == 'Adam': optimizer = optim.Adam(model.parameters(), lr=0.0001) else: print('optimizer is bad') optimizer = None exit(0) writer = SummaryWriter('log') for epoch in range(args.epochs): epoch_begin = time.time()
def main(_): print("Loading data...") x, y, sequence_length = data_loader.read_data(FLAGS.pos_data, FLAGS.neg_data, FLAGS.max_word_length, FLAGS.max_seq_length) print("Data Size:", len(y)) np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] seq_shuffled = sequence_length[shuffle_indices] dev_sample_index = -1 * int(FLAGS.dev_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[ dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[ dev_sample_index:] seq_train, seq_dev = seq_shuffled[:dev_sample_index], seq_shuffled[ dev_sample_index:] del x, y, sequence_length, x_shuffled, y_shuffled, seq_shuffled print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True #session_conf.gpu_options.per_process_gpu_memory_fraction = 0.45 sess = tf.Session(config=session_conf) with sess.as_default(): cnn = CharCNN(char_vocab_size=FLAGS.char_vocab_size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, max_word_length=FLAGS.max_word_length, max_seq_length=FLAGS.max_seq_length, filters=eval(FLAGS.filters), filter_sizes=eval(FLAGS.filter_sizes), num_classes=FLAGS.num_classes, rnn_size=FLAGS.rnn_size, attention_size=FLAGS.attention_size) save_path = os.path.join(FLAGS.save_path) if not os.path.isdir(save_path): os.makedirs(save_path) saver = tf.train.Saver(tf.trainable_variables()) for v in tf.trainable_variables(): print("Save:", v.name) sess.run(tf.global_variables_initializer()) check_point_dir = os.path.join(FLAGS.save_path) ckpt = tf.train.get_checkpoint_state(check_point_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") batches = data_loader.batch_iter( list(zip(x_train, y_train, seq_train)), FLAGS.batch_size, FLAGS.num_epochs) gloabl_max_acc = 0 for batch in batches: x_batch, y_batch, seq_batch = zip(*batch) train_step(x_batch, y_batch, seq_batch, sess, cnn) current_step = tf.train.global_step(sess, cnn.global_step) if current_step % FLAGS.evaluate_every == 0: max_dev_acc = 0 print("\nEvaluation:") batches_dev = data_loader.batch_iter( list(zip(x_dev, y_dev, seq_dev)), FLAGS.batch_size, 1) for batch_dev in batches_dev: x_batch_dev, y_batch_dev, seq_batch_dev = zip( *batch_dev) max_dev_acc = dev_step(x_batch_dev, y_batch_dev, seq_batch_dev, sess, cnn, max_dev_acc) print("During this evaluation phase, the max accuracy is:", max_dev_acc) if max_dev_acc > gloabl_max_acc: gloabl_max_acc = max_dev_acc print("\n Until now, the max accuracy is:", gloabl_max_acc) if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, os.path.join(save_path, "model"), global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
print("\nLoading testing data...") test_dataset = AGNEWs(label_data_path=args.test_path, alphabet_path=args.alphabet_path) print("Transferring testing data to iterator...") test_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True) _, num_class_test = test_dataset.get_class_weight() print('\nNumber of testing samples: ' + str(test_dataset.__len__())) for i, c in enumerate(num_class_test): print("\tLabel {:d}:".format(i).ljust(15) + "{:d}".format(c).rjust(8)) args.num_features = len(test_dataset.alphabet) model = CharCNN(args) print("=> loading weights from '{}'".format(args.model_path)) assert os.path.isfile( args.model_path), "=> no checkpoint found at '{}'".format( args.model_path) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict']) # using GPU if args.cuda: model = torch.nn.DataParallel(model).cuda() model.eval() corrects, avg_loss, accumulated_loss, size = 0, 0, 0, 0 predicates_all, target_all = [], [] print('\nTesting...')
logger.info('Building the model') word_embed = Embedding(Config({ 'num_embeddings': len(token_vocab), 'embedding_dim': args.word_embed_dim, 'padding': C.EMBED_START_IDX, 'padding_idx': 0, 'sparse': True, 'trainable': True, 'file': embed_file, 'stats': args.embed_skip_first, 'vocab': token_vocab, 'ignore_case': word_ignore_case })) char_cnn = CharCNN(Config({ 'vocab_size': len(char_vocab), 'padding': C.CHAR_EMBED_START_IDX, 'dimension': args.char_embed_dim, 'filters': charcnn_filters })) char_highway = Highway(Config({ 'num_layers': 2, 'size': char_cnn.output_size, 'activation': 'selu' })) lstm = LSTM(Config({ 'input_size': word_embed.output_size + char_cnn.output_size, 'hidden_size': args.lstm_hidden_size, 'forget_bias': 1.0, 'batch_first': True, 'bidirectional': True })) crf = CRF(Config({
print("Train data sample number: {:d}".format(len(y_train))) # Training # ================================================== with tf.Graph().as_default(): ## define training computation graph learning_rate = 0.001 m, n = x_train.shape print('x_train\'s shape is', x_train.shape) print(x_train[0]) session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) cnn = CharCNN() global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.8) train_op = optimizer.minimize(cnn.loss) init = tf.global_variables_initializer() n_batches = int(np.ceil(m / FLAGS.batch_size)) # create a Saver node after all variable nodes are created saver = tf.train.Saver() # Output directory for models and summaries now = datetime.utcnow().strftime("%Y%m%d%H%M%S") checkpoint_dir = os.path.abspath(os.path.join(os.path.curdir, "cv")) print("Writing check point to {}\n".format(checkpoint_dir))
word_embed = Embedding( Config({ 'num_embeddings': len(token_vocab), 'embedding_dim': train_args['word_embed_dim'], 'padding': C.EMBED_START_IDX, 'padding_idx': 0, 'sparse': True, 'trainable': True, 'stats': train_args['embed_skip_first'], 'vocab': token_vocab, 'ignore_case': train_args['word_ignore_case'] })) char_cnn = CharCNN( Config({ 'vocab_size': len(char_vocab), 'padding': C.CHAR_EMBED_START_IDX, 'dimension': train_args['char_embed_dim'], 'filters': charcnn_filters })) char_highway = Highway( Config({ 'num_layers': 2, 'size': char_cnn.output_size, 'activation': 'selu' })) lstm = LSTM( Config({ 'input_size': word_embed.output_size + char_cnn.output_size, 'hidden_size': train_args['lstm_hidden_size'], 'forget_bias': 1.0, 'batch_first': True,
from model import CharCNN import torch model = CharCNN(70, 0.5) model.load_state_dict(torch.load('save_model/best.pt')) sent = "U.S. Brokers Cease-fire in Western Afghanistan KABUL (Reuters) - The United States has brokered a cease-fire between a renegade Afghan militia leader and the embattled governor of the western province of Herat, Washington's envoy to Kabul said Tuesday." sent_tensor = torch.zeros(1014).long() alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}\n" for i, char in enumerate(sent): if i == 1014: break alphabet_index = alphabet.find(char) if alphabet_index != -1: sent_tensor[i] = alphabet_index sent_tensor = sent_tensor.view(-1, sent_tensor.size(0)) out_feature = model(sent_tensor) out_feature = out_feature.squeeze(0) print('out_feature:', out_feature)
seed_everything(config.seed) validation_split = .2 shuffle_dataset = True dataset = CustomDatasetFromCSV("train.csv") # Creating data indices for training and validation splits: dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(validation_split * dataset_size)) np.random.seed(config.seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, sampler=train_sampler, batch_size=128, num_workers=4) validation_loader = torch.utils.data.DataLoader(dataset, sampler=valid_sampler, num_workers=4) model = CharCNN(train_ds=train_loader, val_ds=validation_loader) trainer = Trainer(gpus=1, fast_dev_run=True, max_epochs=1) trainer.fit(model)
input_size) train_data, train_labels = dataTrain.convert_data() dataVal = Data(list(zip(contents_val, labels_val)), alphabet, input_size) val_data, val_labels = dataVal.convert_data() dataTest = Data(list(zip(contents_test, labels_test)), alphabet, input_size) test_data, test_labels = dataTest.convert_data() # Initialize the model model = CharCNN(input_sz=config["data"]["input_size"], alphabet_sz=config["data"]["alphabet_size"], emb_sz=config["char_cnn_zhang"]["embedding_size"], conv_layers=config["char_cnn_zhang"]["conv_layers"], fc_layers=[], threshold=config["char_cnn_zhang"]["threshold"], dropout_p=config["char_cnn_zhang"]["dropout_p"], optimizer=config["char_cnn_zhang"]["optimizer"], loss=config["char_cnn_zhang"]["loss"]) # Train model.train(train_inputs=train_data, train_labels=train_labels, val_inputs=val_data, val_labels=val_labels, epochs=config["training"]["epochs"], bs=config["training"]["batch_size"]) # Evaluate results = model.test(test_data, test_labels, bs=128)
# TODO: Create a f****n' correct cross validation procedure x_train, x_dev = x_shuffled[:-n_dev_samples], x_shuffled[-n_dev_samples:] y_train, y_dev = y_shuffled[:-n_dev_samples], y_shuffled[-n_dev_samples:] print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = CharCNN(l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g) sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary)
def main(unused_argv): train_data_path = FLAGS.train_data_path val_data_path = FLAGS.validate_data_path # load train data train_data = DataSet(train_data_path) dev_data = DataSet(val_data_path) train_data.dataset_process() dev_data.dataset_process() with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config = session_conf) with sess.as_default(): cnn = CharCNN( l0 = Config.l0, num_classes = Config.nums_classes, conv_layers = Config.model.conv_layers, fc_layers = Config.model.fc_layers, l2_reg_lambda = 0 ) global_step = tf.Variable(0, name = 'global_step', trainable = False) optimizer = tf.train.AdamOptimizer(Config.model.learning_rate) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables()) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: Config.model.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) print "初始化完毕,开始训练" for i in range(Config.training.epoches): batch_train = train_data.next_batch() # 训练模型 train_step(batch_train[0], batch_train[1]) current_step = tf.train.global_step(sess, global_step) # train_step.run(feed_dict={x: batch_train[0], y_actual: batch_train[1], keep_prob: 0.5}) # 对结果进行记录 if current_step % Config.training.evaluate_every == 0: print("\nEvaluation:") dev_step(dev_data.doc_image, dev_data.label_image, writer=dev_summary_writer) print("") if current_step % Config.training.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
token_vocab = state['vocab']['token'] label_vocab = state['vocab']['label'] char_vocab = state['vocab']['char'] train_args = state['args'] charcnn_filters = [[int(f.split(',')[0]), int(f.split(',')[1])] for f in train_args['charcnn_filters'].split(';')] # Resume model logger.info('Resuming the model') word_embed = torch.nn.Embedding(train_args['word_embed_size'], train_args['word_embed_dim'], sparse=True, padding_idx=C.PAD_INDEX) char_embed = CharCNN(len(char_vocab), train_args['char_embed_dim'], filters=charcnn_filters) char_hw = Highway(char_embed.output_size, layer_num=train_args['charhw_layer'], activation=train_args['charhw_func']) feat_dim = word_embed.embedding_dim + char_embed.output_size lstm = LSTM(feat_dim, train_args['lstm_hidden_size'], batch_first=True, bidirectional=True, forget_bias=train_args['lstm_forget_bias']) crf = CRF(label_size=len(label_vocab) + 2) linear = Linear(in_features=lstm.output_size, out_features=len(label_vocab)) lstm_crf = LstmCrf(token_vocab, label_vocab, char_vocab,
# Add predicted character to string and use as next input predicted_word = labelCorpus.idxToWord(top_i) target_word = labelCorpus.idxToWord(target[i].data[0]) print("Input:{}, Predicted:{} , Target:{}".format( input_word_list[i], predicted_word, target_word)) #number of input char types char_vocab = len(string.printable) # number of output classes = vocab size numOutputClass = len(labelCorpus.dictionary) # Initialize models and start training decoder = CharCNN(char_vocab, args.hidden_size, numOutputClass) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) criterion = nn.CrossEntropyLoss() if args.cuda: decoder.cuda() start = time.time() all_losses = [] loss_avg = 0 try: print("Training for %d epochs..." % args.n_epochs) for epoch in tqdm(range(1, args.n_epochs + 1)): loss = train(*random_training_set(args.batch_size, linesInTrain))
params = {'dim': (input_size,), 'batch_size': config["training"]["batch_size"], 'n_classes': config["data"]["n_classes"], 'shuffle': True} # Datasets with open(config["data"]["save_ratings"], 'rb') as fp: labels = pickle.load(fp) # Generators training_generator = DataGenerator(train_indices, labels, config["data"]["save_reviews"], **params) validation_generator = DataGenerator(valid_indices, labels, config["data"]["save_reviews"], **params) # Define model model = CharCNN(input_size=input_size, alphabet_size=alphabet_size, embedding_size=config["char_cnn"]["embedding_size"], conv_layers=config["char_cnn"]["conv_layers"], fully_connected_layers=config["char_cnn"]["fully_connected_layers"], n_classes=config["data"]["n_classes"], threshold=config["char_cnn"]["threshold"], dropout_p=config["char_cnn"]["dropout_p"], optimizer=config["char_cnn"]["optimizer"], loss=config["char_cnn"]["loss"]) # Train model model.train(training_gen=training_generator, validation_gen=validation_generator, epochs=config["training"]["epochs"], batch_size=config["training"]["batch_size"], log_freq=config["training"]["log_freq"])
def main(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument('--data_dir', default=None, type=str, required=True, help="The input data dir.") parser.add_argument('--model_type', default=None, type=str, required=True, help="Model type selected in [bert, xlnet, xlm, cnn, lstm]") parser.add_argument('--model_name_or_path', default='bert-base-uncased', type=str, help="Shortcut name is selected in [bert-base-uncased, ]") parser.add_argument('--task_name', default=None, type=str, required=True, help="The name of task is selected in [imdb, amazon]") parser.add_argument('--output_dir', default='../out', type=str, help="The output directory where the model predictions and checkpoints will be written.") # other parameters parser.add_argument("--cache_dir", default='../cache', type=str, help="Store the cache files.") parser.add_argument("--max_seq_length", default=256, type=int, help="The maximum total input sequence length after tokenization.") parser.add_argument("--batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm. Avoiding over-fitting.") parser.add_argument("--num_train_epochs", default=20, type=int, help="Total number of training epochs to perform.") parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument("--seed", default=42, type=int, help="Random seed for initializaiton.") parser.add_argument("--train", action='store_true', help="Whether to run training.") parser.add_argument("--eval", action='store_true', help="Whether to run eval on dev set.") parser.add_argument("--ckpt", default=-1, type=int, help="Which ckpt to load.") parser.add_argument("--from_scratch", action='store_true', help="Whether to train from scratch.") parser.add_argument("--train_type", default='normal', type=str, help="Train type is selected in [normal, rs].") args = parser.parse_args() if not os.path.exists(args.data_dir): raise ValueError("input data dir is not exist.") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args.device = device logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger.warning("model type: %s, task name: %s, device: %s, ", args.model_type, args.task_name, device) # set seed set_seed(args) # Prepare task if args.task_name not in processors: raise ValueError("Task not found: %s" % args.task_name) task_class = processors[args.task_name]() label_list = task_class.get_labels() num_labels = len(label_list) args.num_labels = num_labels # load model. # MODEL_CLASSES = { # 'bert': (BertConfig, BertForSequenceClassification, BertTokenizer), # # 'xlnet': (XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer), # # 'xlm': (XLMConfig, XLMForSequenceClassification, XLMTokenizer), # } model = None tokenizer = BertTokenizer.from_pretrained(args.model_name_or_path, do_lower_case=True) args.vocab_size = tokenizer.vocab_size if args.model_type == 'bert': config = BertConfig.from_pretrained(args.model_name_or_path, num_labels=num_labels, finetuning_task=args.task_name) model = BertForSequenceClassification.from_pretrained(args.model_name_or_path, config=config) elif args.model_type == 'cnn': args.embed_size = 300 args.num_filters = 100 args.filter_sizes = (3,) model = CNNModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=num_labels, num_filters=args.num_filters, filter_sizes=args.filter_sizes, device=args.device) elif args.model_type == 'lstm': args.embed_size = 300 args.hidden_size = 100 model = LSTMModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=num_labels, hidden_size=args.hidden_size, device=args.device) elif args.model_type == 'char-cnn': args.alphabets = 'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:\'"\\/|_@#$%^&*~`+-=<>()[]{}\n' args.num_features = len(args.alphabets) args.l0 = 1014 model = CharCNN(num_features=args.num_features, num_classes=args.num_labels) else: raise ValueError('model type is not found!') model.to(device) logger.info("Training/evaluation parameters %s", args) # Create output directory if needed if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # Create cache directory if needed if not os.path.exists(args.cache_dir): os.makedirs(args.cache_dir) train_dataset = None if args.model_type != 'char-cnn': if args.train: train_dataset = load_and_cache_normal_example(args, tokenizer, evaluate=False) eval_dataset = load_and_cache_normal_example(args, tokenizer, evaluate=True) else: if args.train: train_dataset = load_and_cache_normal_char_example(args, args.alphabets, evaluate=False) eval_dataset = load_and_cache_normal_char_example(args, args.alphabets, evaluate=True) # Training if args.train: if args.from_scratch: # default False global_step, train_loss = normal_train(args, model, train_dataset, eval_dataset) else: if args.ckpt < 0: checkpoints = glob.glob( args.output_dir + '/normal_{}_{}_checkpoint-*'.format(args.task_name, args.model_type)) checkpoints.sort(key=lambda x: int(x.split('-')[-1])) checkpoint = checkpoints[-1] ckpt = int(checkpoint.split('-')[-1]) else: checkpoint = os.path.join(args.output_dir, 'normal_{}_{}_checkpoint-{}'.format(args.task_name, args.model_type, args.ckpt)) ckpt = args.ckpt model = load(args, checkpoint) print("Load model from {}".format(checkpoint)) global_step, train_loss = normal_train(args, model, train_dataset, eval_dataset, ckpt + 1) logger.info(" global_step = %s, average loss = %s", global_step, train_loss) # logger.info("Saving model checkpoint to %s", args.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` # model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training # if args.model_type == 'bert': # model_to_save.save_pretrained(args.output_dir) # else: # torch.save({'state_dict': model_to_save.state_dict()}, os.path.join(args.output_dir, '{}_{}_normal_checkpoint.pth.tar'.format(args.task_name, args.model_type))) # tokenizer.save_pretrained(args.output_dir) # # Good practice: save your training arguments together with the trained model # torch.save(args, os.path.join(args.output_dir, '{}_{}_normal_training_args.bin'.format(args.task_name, args.model_type))) # save model in two ways, one is model_to_save.save_pretrained(output_dir), other is torch.save({'state_dict': # model.state_dict()}, output_file). loading way is different, BertForSequenceClassifition.from_pretrained( # output_dir), other is ckpt = torch.load('config.bin'); model = model_class.from_pretrained(ckpt); model.load_state_dict(state_dict) # Evaluation if args.eval: if args.ckpt < 0: checkpoints = glob.glob( args.output_dir + '/{}_{}_{}_checkpoint-*'.format(args.train_type, args.task_name, args.model_type)) checkpoints.sort(key=lambda x: int(x.split('-')[-1])) checkpoint = checkpoints[-1] else: checkpoint = os.path.join(args.output_dir, '{}_{}_{}_checkpoint-{}'.format(args.train_type, args.task_name, args.model_type, args.ckpt)) model = load(args, checkpoint) print("Evaluation result, load model from {}".format(checkpoint)) acc = evaluate(args, model, eval_dataset) print("acc={:.4f}".format(acc))
def main(): # parse arguments args = parser.parse_args(args=[]) # load training data train_dataset = Novels(label_data_path=args.train_path, alphabet_path=args.alphabet_path) # load developing data print("\nLoading developing data...") dev_dataset = Novels(label_data_path=args.val_path, alphabet_path=args.alphabet_path) #Do the splitting--20% chosen num_train = len(train_dataset) indices = list(range(num_train)) valid_size = 0.20 random_seed = 1 shuffle = True split = int(np.floor(valid_size * num_train)) if shuffle: np.random.seed(random_seed) np.random.shuffle(indices) train_idx, valid_idx = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.num_workers, drop_last=True, pin_memory=False) # feature length args.num_features = len(train_dataset.alphabet) dev_loader = DataLoader(dev_dataset, batch_size=args.batch_size, sampler=valid_sampler, num_workers=args.num_workers, pin_memory=False) class_weight, num_class_train = train_dataset.get_class_weight() _, num_class_dev = dev_dataset.get_class_weight() print("Transferring developing data into iterator...") # when you have an unbalanced training set if args.class_weight != None: args.class_weight = torch.FloatTensor(class_weight).sqrt_() if args.cuda: args.class_weight = args.class_weight.cuda() print('\nNumber of training samples: ' + str(train_dataset.__len__())) # make save folder try: os.makedirs(args.save_folder) except OSError as e: if e.errno == errno.EEXIST: print('Directory already exists.') else: raise # configuration print("\nConfiguration:") for attr, value in sorted(args.__dict__.items()): print("\t{}:".format(attr.capitalize().replace('_', ' ')).ljust(25) + "{}".format(value)) # log result if args.log_result: with open(os.path.join(args.save_folder, 'result.csv'), 'w') as r: r.write('{:s},{:s},{:s},{:s},{:s}'.format('epoch', 'batch', 'loss', 'acc', 'lr')) # model model = CharCNN(args) print(model) # train train(train_loader, dev_loader, model, args)
from model import CharCNN from torch.autograd import Variable import torch charcnn = CharCNN(5, 30, 60) test_input = Variable(torch.randn(2, 5, 30)) print(charcnn(test_input))