def predict(): # prepare data_loader and vocab use_by_article = False if use_by_article: _, data_loader_test, vocab = prepare_byarticle_data() else: _, _, data_loader_test, vocab = prepare_data('./data_new/preprocessed_new_{}', constant.batch_size) if constant.use_bert: from pytorch_pretrained_bert import BertTokenizer, BertModel tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') bert_model = BertModel.from_pretrained('bert-base-uncased') state = torch.load("bert_model/pytorch_model.bin") bert_model.load_state_dict(state) article_model = bert_model title_model = bert_model # print("finish bert model loading") LR = models.Classifier(hidden_dim1=768, hidden_dim2=768) classifer_state = torch.load("bert_model/classifier.bin") LR.load_state_dict(classifer_state) # else: # for basic LSTM model article_model = models.LSTM(vocab=vocab, embedding_size=constant.emb_dim, hidden_size=constant.hidden_dim, num_layers=constant.n_layers, pretrain_emb=constant.pretrain_emb ) title_model = models.LSTM(vocab=vocab, embedding_size=constant.emb_dim, hidden_size=constant.hidden_dim_tit, num_layers=constant.n_layers, pretrain_emb=constant.pretrain_emb ) LR = models.LR(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit) # load parameters article_model = load_model(article_model, model_name="article_model") title_model = load_model(title_model, model_name="title_model") LR = load_model(LR, model_name="LR") if constant.USE_CUDA: article_model.cuda() title_model.cuda() LR.cuda() # predict and save result in result folder predict(article_model, title_model, LR, data_loader_test, name="bypublisher", print_pred=True)
def load_model(path_model, path_config, vocab): config = Config(path_config) model_name = config.getstr("model") word_dim = config.getint("word_dim") state_dim = config.getint("state_dim") if model_name == "rnn": model = models.RNN(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=None, EOS_ID=vocab["<EOS>"]) elif model_name == "lstm": model = models.LSTM(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=None, EOS_ID=vocab["<EOS>"]) elif model_name == "gru": model = models.GRU(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=None, EOS_ID=vocab["<EOS>"]) else: print "[error] Unkwown model name: %s" % model_name sys.exit(-1) serializers.load_npz(path_model, model) return model
def train(args, jsons): ''' Trains a model to do character-based ASR using CTC loss on WSJ. ''' trainset, trainloader = make_dataset_dataloader(args, jsons, split='train') devset, devloader = make_dataset_dataloader(args, jsons, split='dev') model = models.LSTM(num_layers=args.n_layers, hidden_dim=args.hidden_dim, bidirectional=(not args.unidir)) use_gpu = not args.cpu and torch.cuda.is_available() device = torch.device('cuda:0' if use_gpu else 'cpu') model.to(device) ctc_loss = torch.nn.CTCLoss(blank=0, reduction='mean', zero_infinity=True) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.98)) global best_wer best_wer = np.inf tr_epoch = lambda ep: train_epoch(ep, args, trainset, trainloader, devset, devloader, model, ctc_loss, optimizer) stats = list(map(tr_epoch, range(args.n_epochs))) return stats
def __init__(self, config, word_vocab): super(GLSTM, self).__init__() self.config = config self.hidden_size = config.hidden_size self.emb_size = config.emb_size self.word_vocab = word_vocab if word_vocab.emb is not None: self.word_emb = nn.Embedding.from_pretrained(torch.from_numpy(word_vocab.emb), freeze=config.freeze_emb) else: self.word_emb = nn.Embedding(word_vocab.voc_size, config.emb_size) self.node_emb = nn.Embedding(config.node_num, config.hidden_size) if config.encoder == 'cnn': self.text_encoder = models.CNN_Encoder(config.filter_size, config.hidden_size) elif config.encoder == 'rnn': self.text_encoder = models.RNN_Encoder(config.hidden_size, config.emb_size, config.dropout) self.feature_weight = nn.Linear(config.feature_size, config.hidden_size) self.feature_lstm = models.LSTM(config.hidden_size, config.hidden_size, config.dropout, bidirec=False) self.feature_combine = nn.Linear(config.hidden_size * 2, config.hidden_size) self.attn_pooling = Attentive_Pooling(config.hidden_size) assert config.graph_encoder in {'lstm', 'gru', 'highway', 'rgcn'} if config.graph_encoder == 'lstm': self.s_cell = SLSTMCell(config.hidden_size, config.hidden_size, config.relation_num) self.g_cell = GLSTMCell(config.hidden_size, self.attn_pooling) elif config.graph_encoder == 'gru': self.s_cell = SGRUCell(config.hidden_size, config.hidden_size, config.relation_num) self.g_cell = GGRUCell(config.hidden_size, self.attn_pooling) elif config.graph_encoder == 'highway': self.highway = highway_RGCN(config.hidden_size, config.hidden_size, config.relation_num) elif config.graph_encoder == 'rgcn': self.rgcn = RGCN(config.hidden_size, config.hidden_size, config.relation_num) self.w_out = nn.Linear(config.hidden_size, config.label_size) self.num_layers = config.num_layers self.dropout = torch.nn.Dropout(config.dropout)
def make_preds_labels(model_dir, save_file, utt_idx=0): ''' Logs the difference between predicted and label word sequence of a single utterance in the dev set for every model saved in an experiment. ''' with open(os.path.join(model_dir, 'args.json'), 'r') as f: args_dict = json.load(f) model = models.LSTM(num_layers=args_dict['n_layers'], hidden_dim=args_dict['hidden_dim'], bidirectional=(not args_dict['unidir'])) wts = glob.glob(os.path.join(model_dir, '*.pt')) dataset = datasets.ESPnetBucketDataset( os.path.join(args_dict['data_root'], 'dump/test_dev93/deltafalse/data.json'), os.path.join(args_dict['data_root'], 'lang_1char/train_si284_units.txt'), load_dir=args_dict['bucket_load_dir'], n_buckets=args_dict['n_buckets']) lines = {} for wt in wts: model.load_state_dict(torch.load(wt)) device = torch.device('cpu') model.to(device) data = dataset[utt_idx] feat = data['feat'].copy()[None, ...] log_probs, embed = model(torch.tensor(feat)) log_probs = log_probs.detach().numpy() labels = np.array(data['label']) preds, to_remove = decoder.batch_greedy_ctc_decode(log_probs, zero_infinity=True) preds = preds[preds != to_remove] pred_words = decoder.compute_words(preds, dataset.idx2tok) label_words = decoder.compute_words(labels, dataset.idx2tok) lines[wt] = [] lines[wt].append(f'Predicted:\n{" ".join(pred_words)}') lines[wt].append(f'Label:\n{" ".join(label_words)}') with open(os.path.join(model_dir, save_file), 'w') as f: for wt in lines.keys(): f.write(f'{wt}\n') for line in lines[wt]: f.write(line + '\n') f.write('\n')
def nn_predict(data): model = models.LSTM(output_size=64).to(device) checkpoint = torch.load('./models/nn.hdf5', map_location=torch.device('cpu')) model.load_state_dict(checkpoint['model_state_dict']) model.eval() data = np.array(data[1:-1].split(', ')).astype(float) data = torch.tensor(data).float() predict = model.predict(data) return predict
def evaluate(args, jsons, spk2genders): model = models.LSTM(num_layers=args.n_layers, hidden_dim=args.hidden_dim, bidirectional=args.bidir) model.load_state_dict( torch.load(os.path.join(args.temp_root, args.model_dir, 'best.pt'))) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model.to(device) splits = ['train', 'dev', 'test'] for split in splits: evaluate_split(args, jsons, spk2genders, model, split=split)
def evaluate(args, jsons, spk2genders): if args.adversarial: model=models_gender.LSTM_gender(num_layers=args.n_layers, hidden_dim=args.hidden_dim, bidirectional=args.bidir) else: model = models.LSTM(num_layers=args.n_layers, hidden_dim=args.hidden_dim, bidirectional=args.bidir) model.load_state_dict(torch.load(os.path.join(args.temp_root, args.model_dir, 'best.pt'))) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model.to(device) if args.test: evaluate_split(args, jsons, spk2genders, model, split='test') else: evaluate_split(args, jsons, spk2genders, model, split='train') evaluate_split(args, jsons, spk2genders, model, split='dev')
def evaluate(args, jsons): ''' Evaluates a model trained for character-based ASR with CTC loss on WSJ. ''' model = models.LSTM(num_layers=args.n_layers, hidden_dim=args.hidden_dim, bidirectional=(not args.unidir)) model.load_state_dict(torch.load(os.path.join(args.model_dir, 'best.pt'))) use_gpu = not args.cpu and torch.cuda.is_available() device = torch.device('cuda:0' if use_gpu else 'cpu') model.to(device) ctc_loss = torch.nn.CTCLoss(blank=0, reduction='mean', zero_infinity=True) splits = ['train', 'dev'] if args.test: splits.append('test') for split in splits: dataset, dataloader = make_dataset_dataloader(args, jsons, split=split) stats = evaluate_epoch(dataset, dataloader, model, ctc_loss) log_stats(f'Final results on {split}', stats)
1), np.expand_dims(validate_y, 1) test_x, test_y = np.expand_dims(test_x, 1), np.expand_dims(test_y, 1) seed = FLAGS.seed torch.manual_seed(seed) rmse_list = [] mae_list = [] if FLAGS.algorithm == 'RNN': model = models.RNN(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size) elif FLAGS.algorithm == 'LSTM': model = models.LSTM(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size) elif FLAGS.algorithm == 'mRNN_fixD': model = models.MRNNFixD(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K) elif FLAGS.algorithm == 'mRNN': model = models.MRNN(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K) elif FLAGS.algorithm == 'mLSTM_fixD': model = models.MLSTMFixD(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size,
def LSTMTrainer(df): param = constants.lstm_param model = models.LSTM(param["input_dim"], param["hidden_dim"], param["layer_dim"], param["output_dim"]) generic_train(model, df, param, "log/lstm.txt", "vanilla_lstm.pth")
import torch import datasets import models dataset_loader = datasets.uiuc_video(1, batch_size=8) epoch_size = 200 device = 'cuda:0' model = models.LSTM() criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.8, weight_decay=1e-2)
def main(gpu, path_corpus, path_config, path_word2vec): MAX_EPOCH = 50 EVAL = 200 MAX_LENGTH = 70 config = utils.Config(path_config) model_name = config.getstr("model") word_dim = config.getint("word_dim") state_dim = config.getint("state_dim") grad_clip = config.getfloat("grad_clip") weight_decay = config.getfloat("weight_decay") batch_size = config.getint("batch_size") print "[info] CORPUS: %s" % path_corpus print "[info] CONFIG: %s" % path_config print "[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec print "[info] MODEL: %s" % model_name print "[info] WORD DIM: %d" % word_dim print "[info] STATE DIM: %d" % state_dim print "[info] GRADIENT CLIPPING: %f" % grad_clip print "[info] WEIGHT DECAY: %f" % weight_decay print "[info] BATCH SIZE: %d" % batch_size path_save_head = os.path.join( config.getpath("snapshot"), "rnnlm.%s.%s" % (os.path.basename(path_corpus), os.path.splitext(os.path.basename(path_config))[0])) print "[info] SNAPSHOT: %s" % path_save_head sents_train, sents_val, vocab, ivocab = \ utils.load_corpus(path_corpus=path_corpus, max_length=MAX_LENGTH) if path_word2vec is not None: word2vec = utils.load_word2vec(path_word2vec, word_dim) initialW = utils.create_word_embeddings(vocab, word2vec, dim=word_dim, scale=0.001) else: initialW = None cuda.get_device(gpu).use() if model_name == "rnn": model = models.RNN(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) elif model_name == "lstm": model = models.LSTM(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) elif model_name == "gru": model = models.GRU(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) elif model_name == "bd_lstm": model = models.BD_LSTM(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) else: print "[error] Unknown model name: %s" % model_name sys.exit(-1) model.to_gpu(gpu) opt = optimizers.SMORMS3() opt.setup(model) opt.add_hook(chainer.optimizer.GradientClipping(grad_clip)) opt.add_hook(chainer.optimizer.WeightDecay(weight_decay)) print "[info] Evaluating on the validation sentences ..." loss_data, acc_data = evaluate(model, model_name, sents_val, ivocab) perp = math.exp(loss_data) print "[validation] iter=0, epoch=0, perplexity=%f, accuracy=%.2f%%" \ % (perp, acc_data*100) it = 0 n_train = len(sents_train) vocab_size = model.vocab_size for epoch in xrange(1, MAX_EPOCH + 1): perm = np.random.permutation(n_train) for data_i in xrange(0, n_train, batch_size): if data_i + batch_size > n_train: break words = sents_train[perm[data_i:data_i + batch_size]] if model_name == "bd_lstm": xs, ms = utils.make_batch(words, train=True, tail=False, mask=True) ys = model.forward(xs=xs, ms=ms, train=True) else: xs = utils.make_batch(words, train=True, tail=False) ys = model.forward(ts=xs, train=True) ys = F.concat(ys, axis=0) ts = F.concat(xs, axis=0) ys = F.reshape(ys, (-1, vocab_size)) # (TN, |V|) ts = F.reshape(ts, (-1, )) # (TN,) loss = F.softmax_cross_entropy(ys, ts) acc = F.accuracy(ys, ts, ignore_label=-1) model.zerograds() loss.backward() loss.unchain_backward() opt.update() it += 1 loss_data = float(cuda.to_cpu(loss.data)) perp = math.exp(loss_data) acc_data = float(cuda.to_cpu(acc.data)) print "[training] iter=%d, epoch=%d (%d/%d=%.03f%%), perplexity=%f, accuracy=%.2f%%" \ % (it, epoch, data_i+batch_size, n_train, float(data_i+batch_size)/n_train*100, perp, acc_data*100) if it % EVAL == 0: print "[info] Evaluating on the validation sentences ..." loss_data, acc_data = evaluate(model, model_name, sents_val, ivocab) perp = math.exp(loss_data) print "[validation] iter=%d, epoch=%d, perplexity=%f, accuracy=%.2f%%" \ % (it, epoch, perp, acc_data*100) serializers.save_npz( path_save_head + ".iter_%d.epoch_%d.model" % (it, epoch), model) utils.save_word2vec( path_save_head + ".iter_%d.epoch_%d.vectors.txt" % (it, epoch), utils.extract_word2vec(model, vocab)) print "[info] Saved." print "[info] Done."
model_path = os.path.join(MODEL_DIR, 'majority.pkl') if os.path.exists(model_path): with open(model_path, 'rb') as f: model = pickle.load(f) else: model = models.MajorityBaseline(proto_instances, PROPERTIES) with open(model_path, 'wb') as f: pickle.dump(model, f) elif args.model_type == 'lstm': w2i, i2w = data['dicts'] emb_np = data['emb_np'] X, y = data['lstm_data'] model = models.LSTM(vocab_size=len(w2i), emb_size=int(args.glove_d), h_size=args.h_size, padding_idx=w2i[PAD_TOKEN], emb_np=emb_np, properties=PROPERTIES) print('Finished building lstm model!') elif args.model_type == 'logreg': X = {} y = {} for split in SPLITS: X_split, y_split = data_utils.get_ins_outs(args, proto_instances[split], properties=PROPERTIES, sents=sents, w2e=w2e) X[split] = X_split
parser.add_argument('--save', type=str, default='drum-nn.pth', help='path to save the model') args = parser.parse_args() np.random.seed(args.seed) torch.manual_seed(args.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") midi_encoder = models.MidiEncoder(q=args.quantization) data = models.DrumData(args.data, midi_encoder) print(f'Corpus has a drumset of {len(midi_encoder.index)} items.') rnn = models.LSTM( input_size=len(midi_encoder.index), hidden_size=args.hid_dim, emb_dim=args.emb_dim, num_classes=len(midi_encoder.index), n_layers=args.n_layers, dropout=args.dropout ).to(device) def current_lr(optimizer): for param_group in optimizer.param_groups: lr = param_group.get('lr') if lr is not None: return lr lr = args.lr optimizer = torch.optim.Adam(rnn.parameters(), lr=lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, 'min', verbose=True, patience=10, threshold=1e-4)
def main(output_dim, train_bs, val_bs, test_bs, num_epochs, max_seq_length, learning_rate, warmup_proportion, early_stopping_criteria, num_layers, hidden_dim, bidirectional, dropout, filter_sizes, embedding_file, model_name, use_mongo, vm, subtask, _run): #Logger directory_checkpoints = f"results/checkpoints/{_run._id}/" directory = f"results/{_run._id}/" #Batch sizes batch_sizes = [int(train_bs), int(val_bs), int(test_bs)] batch_size = int(train_bs) if "BERT" in model_name: #Default = False, if BERT model is used then use_bert is set to True use_bert = True else: use_bert = False if vm == "google": directory = f"results-bert-google/{_run._id}/" elif vm == "aws": directory = f"results-bert-aws/{_run._id}/" #Data if use_bert: train_dataloader, val_dataloader, test_dataloader = get_data_bert( int(max_seq_length), batch_sizes, subtask) else: embedding_dim, vocab_size, embedding_matrix, train_dataloader, val_dataloader, test_dataloader = get_data( int(max_seq_length), embedding_file=embedding_file, batch_size=batch_size, subtask=subtask) #Model if model_name == "MLP": model = models.MLP(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, output_dim) if model_name == "MLP_Features": model = models.MLP_Features(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), 14, dropout, output_dim) print(model) elif model_name == "CNN": model = models.CNN(embedding_matrix, embedding_dim, vocab_size, dropout, filter_sizes, output_dim) print(model) elif model_name == "LSTM": model = models.LSTM(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, int(num_layers), bidirectional, output_dim) print(model) elif model_name == "LSTMAttention": model = models.LSTMAttention(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, int(num_layers), bidirectional, output_dim) print(model) elif model_name == "BERTFreeze": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", output_dim) for param in model.bert.parameters(): param.requires_grad = False print(param) print(param.requires_grad) print(model) elif model_name == "BERT": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", output_dim) print(model) elif model_name == "BERTLinear": model = models.BertLinear(hidden_dim, dropout, output_dim) print(model) elif model_name == "BERTLinearFreeze": model = models.BertLinearFreeze(hidden_dim, dropout, output_dim) print(model) elif model_name == "BERTLinearFreezeEmbeddings": model = models.BertLinearFreezeEmbeddings(hidden_dim, dropout, output_dim) print(model) elif model_name == "BERTLSTM": model = models.BertLSTM(hidden_dim, dropout, bidirectional, output_dim) print(model) elif model_name == "BERTNonLinear": model = models.BertNonLinear(dropout, output_dim) print(model) elif model_name == "BERTNorm": model = models.BertNorm(dropout, output_dim) print(model) model = model.to(device) #Loss and optimizer #optimizer = optim.Adam([{'params': model.parameters(), 'weight_decay': 0.1}], lr=learning_rate) optimizer = optim.Adam(model.parameters(), lr=learning_rate) loss_fn = F.cross_entropy #Scheduler #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5, 50], gamma=0.1) #Training and evaluation print('Training and evaluation for {} epochs...'.format(num_epochs)) train_metrics, val_metrics = train_and_evaluate( num_epochs, model, optimizer, loss_fn, train_dataloader, val_dataloader, early_stopping_criteria, directory_checkpoints, use_bert, use_mongo) train_metrics.to_csv(directory + "train_metrics.csv"), val_metrics.to_csv( directory + "val_metrics.csv") #Test print('Testing...') load_checkpoint(directory_checkpoints + "best_model.pth.tar", model) #Add artifacts #ex.add_artifact(directory+"best_model.pth.tar") #ex.add_artifact(directory+"last_model.pth.tar") test_metrics = evaluate_model(model, optimizer, loss_fn, test_dataloader, device, use_bert) if use_mongo: log_scalars(test_metrics, "Test") test_metrics_df = pd.DataFrame(test_metrics) #test_metrics_df = pd.DataFrame(test_metrics, index=["NOT","OFF"]) print(test_metrics) test_metrics_df.to_csv(directory + "test_metrics.csv") id_nummer = f'{_run._id}' results = { 'id': id_nummer, 'loss': np.round(np.mean(val_metrics['loss']), 4), 'accuracy': test_metrics['accuracy'], 'recall': test_metrics['recall'], 'precision': test_metrics['precision'], 'f1': test_metrics['f1'], 'learning_rate': learning_rate, 'hidden_dim': hidden_dim, 'status': 'ok' } return results
T_y_train = torch.reshape( T_y_train, (-1, BATCH_SIZE, int(MAX_LEN / 2), N_FEATURES)).to(device) print("T_X_train.shape", T_X_train.shape) T_X_test = torch.FloatTensor(X_test) T_y_test = torch.FloatTensor(Y_test) T_X_test = torch.reshape(T_X_test, (-1, int(MAX_LEN / 2), N_FEATURES)).to(device) T_y_test = torch.reshape(T_y_test, (-1, int(MAX_LEN / 2), N_FEATURES)).to(device) print("T_X_train.shape", T_X_test.shape) print("model declaration") #-------------- model declaration model = models.LSTM(hidden_size=HIDDEN_SIZE, nfeatures=N_FEATURES, num_layers=NUM_LAYERS, dropout=DROPOUT).to(device) loss_function = torch.nn.MSELoss(reduction='mean') optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) losses = [] test_losses = [] h = torch.zeros(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE).to(device) c = torch.zeros(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE).to(device) print("training model") #-------------- model training for i in range(EPOCHS): model.train() loss = 0 predictions, (_, _) = model(T_X_test) single_loss = loss_function(predictions, T_y_test)
def make_preds_labels(datadir, expdir, save_file): model = models.LSTM(num_layers=3, hidden_dim=512, bidirectional=True) wts = glob.glob(os.path.join(expdir, '*.pt')) gender_dataset = gender_subset.ESPnetGenderBucketDataset( os.path.join(datadir, 'dump/test_dev93/deltafalse/data.json'), os.path.join(datadir, 'lang_1char/train_si284_units.txt'), os.path.join(datadir, 'test_dev93/spk2gender'), num_buckets=10) lines = {} for wt in wts: model.load_state_dict(torch.load(wt)) device = torch.device('cpu') model.to(device) counter = [0, 0] idxs = [0, 0] for i in range(200, len(gender_dataset)): if sum(counter) >= 2: break data = gender_dataset[i] if gender_dataset.utt2gender[ data['utt_id']] == 'f' and counter[0] == 0: idxs[0] = i counter[0] = 1 elif gender_dataset.utt2gender[ data['utt_id']] == 'm' and counter[1] == 0: idxs[1] = i counter[1] = 1 else: continue lines[wt] = {} lines[wt]['f'] = [] lines[wt]['m'] = [] for i, idx in enumerate(idxs): data = gender_dataset[idx] feat = data['feat'].copy()[None, ...] log_probs, embed = model(torch.tensor(feat)) log_probs = log_probs.detach().numpy() labels = np.array(data['label']) preds, to_remove = decoder.batch_greedy_ctc_decode( log_probs, zero_infinity=True) preds = preds[preds != -1] pred_words = decoder.compute_words(preds, gender_dataset.idx2tok) label_words = decoder.compute_words(labels, gender_dataset.idx2tok) gndr = 'f' if i == 0 else 'm' lines[wt][gndr].append(f'{gndr}:') lines[wt][gndr].append(f'Predicted:\n{" ".join(pred_words)}') lines[wt][gndr].append(f'Label:\n{" ".join(label_words)}') with open(save_file, 'w') as f: for wt in lines.keys(): f.write(f'{wt}\n') for key in lines[wt].keys(): for line in lines[wt][key]: f.write(line + '\n') f.write('\n') f.write('\n')
cnt = 0 else: cnt += 1 if (cnt == 3): break if (avg_best == 1.0): break return avg_best, test_best data_loader_tr, data_loader_val, data_loader_test, vocab = prepare_data( '/home/nayeon/fakenews/data_new/preprocessed_new_{}_wtitle.pickle', constant.batch_size) if constant.use_bert: article_model = models.LSTM(vocab=vocab, embedding_size=constant.emb_dim, hidden_size=constant.hidden_dim, num_layers=constant.n_layers, pretrain_emb=constant.pretrain_emb) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') title_model = BertModel.from_pretrained('bert-base-uncased') LR = models.LR(hidden_dim1=constant.hidden_dim, hidden_dim2=768) elif constant.use_utransformer: article_model = models.UTransformer( vocab=vocab, embedding_size=constant.emb_dim, hidden_size=constant.hidden_dim, num_layers=constant.max_hops_article, num_heads=constant.num_heads, total_key_depth=constant.key_value_depth, total_value_depth=constant.key_value_depth, filter_size=constant.filter_size_article,
RESULTS = [] MARGINS = [0.2] MAX_EPOCHS = 50 BATCH_SIZE = 32 FILTER_WIDTHS = [3] POOL_METHOD = "average" FEATURE_DIMS = [600] DROPOUT_PS = [0.3] NUM_HIDDEN_UNITS = [240] LEARNING_RATES = [1E-3] MODELS = [] ############################################################################## LSTM_HYPERPARAMETERS = itertools.product(MARGINS, NUM_HIDDEN_UNITS, LEARNING_RATES) for margin, num_hidden_units, learning_rate in LSTM_HYPERPARAMETERS: model = models.LSTM(EMBEDDINGS, num_hidden_units, POOL_METHOD, CUDA) criterion = helpers.MaxMarginLoss(margin) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adam(parameters, lr=learning_rate) model, mrr = train_utils.train_model(model, optimizer, criterion, DATA, \ MAX_EPOCHS, BATCH_SIZE, CUDA) torch.save(model.state_dict(), "./lstm_" +\ str(margin) + "_" +\ str(num_hidden_units) + "_" +\ str(learning_rate)) MODELS.append((mrr, margin, num_hidden_units, learning_rate)) ############################################################################## CNN_HYPERPARAMETERS = itertools.product(MARGINS, FILTER_WIDTHS, FEATURE_DIMS, DROPOUT_PS, LEARNING_RATES) for margin, filter_width, feature_dim, dropout_p, learning_rate in CNN_HYPERPARAMETERS: model = models.CNN(EMBEDDINGS, filter_width, POOL_METHOD, feature_dim,
def cross_validation(kfold=10): with open("data_new/by_article_ids.pickle", "rb") as ids_file: ids = pickle.load(ids_file) with open("data_new/preprocessed_byarticle_data.pickle", "rb")as data_file: data = pickle.load(data_file) with open("data_new/by_article_labels.pickle", "rb") as labels_file: labels = pickle.load(labels_file) with open("/home/nayeon/fakenews/data_new/vocab_trim4.pickle", 'rb') as vocab_file: vocab = pickle.load(vocab_file) if not constant.use_bert: # for basic LSTM model article_model = models.LSTM(vocab=vocab, embedding_size=constant.emb_dim, hidden_size=constant.hidden_dim, num_layers=constant.n_layers, pretrain_emb=constant.pretrain_emb ) title_model = models.LSTM(vocab=vocab, embedding_size=constant.emb_dim, hidden_size=constant.hidden_dim_tit, num_layers=constant.n_layers, pretrain_emb=constant.pretrain_emb ) article_model = load_model(article_model, model_name="article_model") title_model = load_model(title_model, model_name="title_model") else: from pytorch_pretrained_bert import BertTokenizer, BertModel tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') bert_model = BertModel.from_pretrained('bert-base-uncased') if not constant.bert_from_scratch: state = torch.load("bert_model/pytorch_model.bin") bert_model.load_state_dict(state) article_model = bert_model title_model = bert_model if constant.use_bert_plus_lstm: lstm_article = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim, num_layers=constant.n_layers, bidirectional=False, batch_first=True) lstm_title = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim_tit, num_layers=constant.n_layers, bidirectional=False, batch_first=True) lstm_article.load_state_dict(torch.load("bert_model/lstm_article2.bin")) lstm_title.load_state_dict(torch.load("bert_model/lstm_title2.bin")) # set average test acc avg_test_acc = 0 best_acc = 0 k = 0 kf = KFold(n_splits=kfold) for train_index, test_index in kf.split(ids): k += 1 print("k:", k) # get 25 true 25 false for validation # ids_train, ids_val = [], [] data_train, data_val = {}, {} labels_train, labels_val = {}, {} cnt_true, cnt_false = 0, 0 for index in train_index: id_ = ids[index] if labels[id_] == "true": if cnt_true < 25: cnt_true += 1 ids_val.append(id_) data_val[id_] = data[id_] labels_val[id_] = labels[id_] else: ids_train.append(id_) data_train[id_] = data[id_] labels_train[id_] = labels[id_] else: if cnt_false < 25: cnt_false += 1 ids_val.append(id_) data_val[id_] = data[id_] labels_val[id_] = labels[id_] else: ids_train.append(id_) data_train[id_] = data[id_] labels_train[id_] = labels[id_] # get test set from test_index ids_test, data_test, labels_test = [], {}, {} for index in test_index: id_ = ids[index] ids_test.append(id_) data_test[id_] = data[id_] labels_test[id_] = labels[id_] train = (ids_train, data_train, labels_train) val = (ids_val, data_val, labels_val) test = (ids_test, data_test, labels_test) # prepare by article cross validation data if constant.aug_count != '': data_loader_train, data_loader_val, data_loader_test, ids_val_dict, ids_test_dict = prepare_byarticle_cross_validation(train, val, test, constant.batch_size, constant.aug_count) else: data_loader_train, data_loader_val, data_loader_test = prepare_byarticle_cross_validation(train, val, test, constant.batch_size, constant.aug_count) # need to init the final Classifier for each fold if constant.use_bert: if constant.use_bert_plus_lstm: Classifier = models.Classifier(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit) # Classifier.load_state_dict(torch.load("bert_model/classifier_bypublisher2.bin")) else: Classifier = models.Classifier(hidden_dim1=768, hidden_dim2=768) else: Classifier = models.Classifier(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit) if constant.USE_CUDA: if constant.use_bert_plus_lstm: lstm_article.cuda() lstm_title.cuda() article_model.cuda() title_model.cuda() Classifier.cuda() criterion = nn.BCELoss() if constant.optimizer=='adam': opt = torch.optim.Adam(Classifier.parameters(), lr=constant.lr_classi, weight_decay=constant.weight_decay) elif constant.optimizer=='adagrad': opt = torch.optim.Adagrad(Classifier.parameters(), lr=constant.lr_classi) elif constant.optimizer=='sgd': opt = torch.optim.SGD(Classifier.parameters(), lr=constant.lr_classi, momentum=0.9) # set lr scheduler # scheduler = StepLR(opt, step_size=1, gamma=0.8) # set tensorboard folder name if constant.use_bert: experiment_name = "BERT_FineTune_aug{0}_LRlr{1}_k{2}".format(constant.aug_count, constant.lr_classi, k) else: experiment_name = "LSTM_FineTune_aug{0}_LRlr{1}_k{2}".format(constant.aug_count, constant.lr_classi, k) logdir = "tensorboard/" + experiment_name + "/" writer = SummaryWriter(logdir) global_steps = 0 best_val_acc = 0 # training and testifng for e in range(constant.max_epochs): # scheduler.step() article_model.train() title_model.train() Classifier.train() if constant.use_bert_plus_lstm: lstm_article.train() lstm_title.train() loss_log = [] f1_log = 0 acc_log = 0 # training pbar = tqdm(enumerate(data_loader_train),total=len(data_loader_train)) for i, (X, x_len, tit, tit_len, y, ind) in pbar: opt.zero_grad() if constant.use_bert: X = [tokenizer.convert_tokens_to_ids(tokenizer.tokenize(item)) for item in X] tit = [tokenizer.convert_tokens_to_ids(tokenizer.tokenize(item)) for item in tit] X, segments_ids_article, tit, segments_ids_tit = padding_for_bert(X, tit) if constant.USE_CUDA: X, segments_ids_article, tit, segments_ids_tit, y = X.cuda(), segments_ids_article.cuda(), tit.cuda(), segments_ids_tit.cuda(), y.cuda() encoded_article_layers, _ = article_model(X, segments_ids_article) encoded_tit_layers, _ = title_model(tit, segments_ids_tit) if constant.use_bert_plus_lstm: _, article_hidden = lstm_article(encoded_article_layers[-1]) _, title_hidden = lstm_title(encoded_tit_layers[-1]) article_feat = article_hidden[-1][-1] title_feat = title_hidden[-1][-1] else: article_feat = torch.sum(encoded_article_layers[-1], dim=1) title_feat = torch.sum(encoded_tit_layers[-1], dim=1) #[batch_size, hidden_size] else: article_feat = article_model.feature(X, x_len) title_feat = title_model.feature(tit, tit_len) feature = torch.cat((article_feat, title_feat), dim=1) pred_prob = Classifier(feature) loss = criterion(pred_prob, y) loss.backward() opt.step() loss_log.append(loss.item()) accuracy, microPrecision, microRecall, microF1 = getMetrics(pred_prob.detach().cpu().numpy(), y.cpu().numpy()) f1_log += microF1 acc_log += accuracy pbar.set_description("(Epoch {}) TRAIN F1:{:.4f} TRAIN LOSS:{:.4f} ACCURACY:{:.4f}".format((e+1), f1_log/float(i+1), np.mean(loss_log), acc_log/float(i+1))) writer.add_scalars('train', {'loss': np.mean(loss_log), 'acc': acc_log/float(i+1), 'f1': f1_log/float(i+1)}, global_steps) global_steps+=1 """ validate and test 1. Get the test accuracy result from the model that gets the best accuracy in validation 2. Whenever we find better accuracy result in the validation set, we need to test the model in the test set and get the updated test set accuracy result. 3. No need to save model during cross validation (cross validation is to find the best model) """ article_model.eval() title_model.eval() Classifier.eval() if constant.use_bert_plus_lstm: lstm_article.eval() lstm_title.eval() print("Evaluation on validation set") use_add_feature_flag = constant.use_emo2vec_feat or constant.use_url if constant.use_bert: if constant.aug_count != '': accuracy, pred, id_ = eval_bert_with_chunked_data(article_model, title_model, Classifier, data_loader_val, tokenizer, ids_val_dict, None, writer, e, False) else: if constant.use_bert_plus_lstm: accuracy, pred, id_ = eval_bert(article_model, title_model, Classifier, data_loader_val, tokenizer, lstm_article, lstm_title, use_add_feature_flag, writer, e, False) else: accuracy, pred, id_ = eval_bert(article_model, title_model, Classifier, data_loader_val, tokenizer, None, None, use_add_feature_flag, writer, e, False) else: accuracy, pred, id_ = eval_tit_lstm(article_model, title_model, Classifier, data_loader_val, use_add_feature_flag, writer, e, False) # find better accuracy in the validation set, need to test the model in the testset if(accuracy > best_val_acc): print("Find better model, test it on test set") best_val_acc = accuracy if constant.use_bert: if constant.aug_count != '': accuracy, pred, id_ = eval_bert_with_chunked_data(article_model, title_model, Classifier, data_loader_test, tokenizer, ids_test_dict, None, writer, e, True) else: if constant.use_bert_plus_lstm: accuracy, pred, id_ = eval_bert(article_model, title_model, Classifier, data_loader_test, tokenizer, lstm_article, lstm_title, use_add_feature_flag, writer, e, True) else: accuracy, pred, id_ = eval_bert(article_model, title_model, Classifier, data_loader_test, tokenizer, None, None, use_add_feature_flag, writer, e, True) else: accuracy, pred, id_ = eval_tit_lstm(article_model, title_model, Classifier, data_loader_test, use_add_feature_flag, writer, e, True) test_acc = accuracy if best_val_acc + test_acc > 1.53: torch.save(Classifier.state_dict(), "bert_model/classifier.bin") print("Classifier has been saved in bert_model/classifier.bin") # finish one fold, need to accumulate the test_acc (will do average of accuracy after k folds) avg_test_acc += test_acc # after k folds cross validation, get the final average test accuracy avg_test_acc = avg_test_acc * 1.0 / kfold print("After {0} folds cross validation, the final accuracy of {1} is {2}".format(kfold, constant.manual_name, avg_test_acc))
def train(aug_count=""): # prepare data_loader and vocab if constant.train_cleaner_dataset: data_loader_train, data_loader_test, vocab = prepare_filtered_data(batch_size=constant.batch_size) else: data_loader_train, data_loader_test, vocab = prepare_byarticle_data(aug_count=aug_count, batch_size=constant.batch_size) # load parameters, LR is for fine tune if constant.use_bert: from pytorch_pretrained_bert import BertTokenizer, BertModel tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') bert_model = BertModel.from_pretrained('bert-base-uncased') if not constant.bert_from_scratch: state = torch.load("bert_model/pytorch_model.bin") bert_model.load_state_dict(state) article_model = bert_model title_model = bert_model # print("finish bert model loading") if constant.train_cleaner_dataset: lstm_article = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim, num_layers=constant.n_layers, bidirectional=False, batch_first=True) lstm_title = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim_tit, num_layers=constant.n_layers, bidirectional=False, batch_first=True) LR = models.Classifier(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit) else: LR = models.Classifier(hidden_dim1=768, hidden_dim2=768) else: # for basic LSTM model article_model = models.LSTM(vocab=vocab, embedding_size=constant.emb_dim, hidden_size=constant.hidden_dim, num_layers=constant.n_layers, pretrain_emb=constant.pretrain_emb, ) title_model = models.LSTM(vocab=vocab, embedding_size=constant.emb_dim, hidden_size=constant.hidden_dim_tit, num_layers=constant.n_layers, pretrain_emb=constant.pretrain_emb, ) # LR = models.LR(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit) LR = models.Classifier(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit) article_model = load_model(article_model, model_name="article_model") title_model = load_model(title_model, model_name="title_model") if constant.USE_CUDA: article_model.cuda() title_model.cuda() LR.cuda() if constant.train_cleaner_dataset: lstm_article.cuda() lstm_title.cuda() criterion = nn.BCELoss() if constant.train_cleaner_dataset: model = [ {"params": lstm_article.parameters(), "lr": constant.lr_lstm}, {"params": lstm_title.parameters(), "lr": constant.lr_title}, {"params": LR.parameters(), "lr": constant.lr_classi}, ] if constant.optimizer=='adam': opt = torch.optim.Adam(model, lr=constant.lr_classi, weight_decay=constant.weight_decay) elif constant.optimizer=='adagrad': opt = torch.optim.Adagrad(model, lr=constant.lr_classi) elif constant.optimizer=='sgd': opt = torch.optim.SGD(model, lr=constant.lr_classi, momentum=0.9) else: if constant.optimizer=='adam': opt = torch.optim.Adam(LR.parameters(), lr=constant.lr_classi, weight_decay=constant.weight_decay) elif constant.optimizer=='adagrad': opt = torch.optim.Adagrad(LR.parameters(), lr=constant.lr_classi) elif constant.optimizer=='sgd': opt = torch.optim.SGD(LR.parameters(), lr=constant.lr_classi, momentum=0.9) # test the result without fine tune # print("testing without fine tune") # accuracy = eval_tit_lstm(article_model, title_model, LR, data_loader_test, False) # set tensorboard folder name if constant.use_bert: experiment_name = "BERT_FineTune_aug{0}_LRlr{1}".format(constant.aug_count, constant.lr_classi) else: experiment_name = "LSTM_FineTune_aug{0}_LRlr{1}".format(constant.aug_count, constant.lr_classi) logdir = "tensorboard/" + experiment_name + "/" writer = SummaryWriter(logdir) test_best = 0 cnt = 0 global_steps = 0 for e in range(constant.max_epochs): article_model.train() title_model.train() LR.train() if constant.train_cleaner_dataset: lstm_article.train() lstm_title.train() loss_log = [] f1_log = 0 acc_log = 0 # training pbar = tqdm(enumerate(data_loader_train),total=len(data_loader_train)) for i, (X, x_len, tit, tit_len, y, ind) in pbar: opt.zero_grad() if constant.use_bert: X = [tokenizer.convert_tokens_to_ids(tokenizer.tokenize(item)) for item in X] tit = [tokenizer.convert_tokens_to_ids(tokenizer.tokenize(item)) for item in tit] # padding X, segments_ids_article, tit, segments_ids_tit = padding_for_bert(X, tit) if constant.USE_CUDA: X, segments_ids_article, tit, segments_ids_tit, y = X.cuda(), segments_ids_article.cuda(), tit.cuda(), segments_ids_tit.cuda(), y.cuda() encoded_article_layers, _ = article_model(X, segments_ids_article) encoded_tit_layers, _ = title_model(tit, segments_ids_tit) if constant.train_cleaner_dataset: _, article_hidden = lstm_article(encoded_article_layers[-1]) _, title_hidden = lstm_title(encoded_tit_layers[-1]) article_feat = article_hidden[-1][-1] title_feat = title_hidden[-1][-1] else: article_feat = torch.sum(encoded_article_layers[-1], dim=1) title_feat = torch.sum(encoded_tit_layers[-1], dim=1) #[batch_size, hidden_size] else: article_feat = article_model.feature(X, x_len) title_feat = title_model.feature(tit, tit_len) feature = torch.cat((article_feat, title_feat), dim=1) pred_prob = LR(feature) loss = criterion(pred_prob, y) loss.backward() opt.step() loss_log.append(loss.item()) accuracy, microPrecision, microRecall, microF1 = getMetrics(pred_prob.detach().cpu().numpy(), y.cpu().numpy()) f1_log += microF1 acc_log += accuracy pbar.set_description("(Epoch {}) TRAIN F1:{:.4f} TRAIN LOSS:{:.4f} ACCURACY:{:.4f}".format((e+1), f1_log/float(i+1), np.mean(loss_log), acc_log/float(i+1))) writer.add_scalars('train', {'loss': np.mean(loss_log), 'acc': acc_log/float(i+1), 'f1': f1_log/float(i+1)}, global_steps) global_steps+=1 article_model.eval() title_model.eval() LR.eval() if constant.train_cleaner_dataset: lstm_article.eval() lstm_title.eval() # testing if(e % 1 == 0): print("Evaluation on Test") use_add_feature_flag = constant.use_emo2vec_feat or constant.use_url if constant.use_bert: if constant.train_cleaner_dataset: accuracy, pred, id_ = eval_bert(article_model, title_model, LR, data_loader_test, tokenizer, lstm_article, lstm_title, use_add_feature_flag, writer, e, True) else: accuracy, pred, id_ = eval_bert(article_model, title_model, LR, data_loader_test, tokenizer, None, None, use_add_feature_flag, writer, e, True) else: accuracy, pred, id_ = eval_tit_lstm(article_model, title_model, LR, data_loader_test, use_add_feature_flag, writer, e, True) if(accuracy > test_best): test_best = accuracy print("Find better model. Saving model ...") cnt = 0 if constant.train_cleaner_dataset: torch.save(lstm_article.state_dict(), "bert_model/by_publisher/lstm_article_"+str(constant.hidden_dim)+"_"+str(constant.hidden_dim_tit)+"_"+str(test_best)+".bin") torch.save(lstm_title.state_dict(), "bert_model/by_publisher/lstm_title_"+str(constant.hidden_dim)+"_"+str(constant.hidden_dim_tit)+"_"+str(test_best)+".bin") torch.save(LR.state_dict(), "bert_model/by_publisher/classifier_bypublisher_"+str(constant.hidden_dim)+"_"+str(constant.hidden_dim_tit)+"_"+str(test_best)+".bin") print("The lstm_article lstm_title classifier_bypublisher have been saved!") else: torch.save(LR.state_dict(), "bert_model/finetune_classi_for_tunebert_"+str(accuracy)+".bin") print("The fine tune classifier has been saved!") else: cnt += 1 if(cnt == 10): # save prediction and gold with open('pred/{0}_pred.pickle'.format(experiment_name), 'wb') as handle: pickle.dump({"preds":pred, "ids":id_}, handle, protocol=pickle.HIGHEST_PROTOCOL) break if(test_best == 1.0): # save prediction and gold with open('pred/{0}_pred.pickle'.format(experiment_name), 'wb') as handle: pickle.dump({"preds":pred, "ids":id_}, handle, protocol=pickle.HIGHEST_PROTOCOL) break
def make_tsne(datadir, expdir, save_file, title, adversarial=False, test=False, mean=True): if adversarial: model = models_gender.LSTM_gender(num_layers=3) else: model = models.LSTM(num_layers=3) model_file = os.path.join(expdir, 'best.pt') model.load_state_dict(torch.load(model_file)) if test: split = 'eval92' else: split = 'dev93' gender_dataset = gender_subset.ESPnetGenderBucketDataset( os.path.join(datadir, f'dump/test_{split}/deltafalse/data.json'), os.path.join(datadir, 'lang_1char/train_si284_units.txt'), os.path.join(datadir, f'test_{split}/spk2gender'), num_buckets=10) # since pushkar uses whole sequence to predict gender, and that's too much to # keep in memory, take a mean over all frame outputs from the model embeds = np.zeros((len(gender_dataset), 1024)) genders = np.zeros(len(gender_dataset), dtype=np.int) for i in range(len(gender_dataset)): data = gender_dataset[i] feat = data['feat'].copy()[None, ...] if adversarial: y, gen_y, embed = model(torch.tensor(feat)) else: _, embed = model(torch.tensor(feat)) embed = embed.detach().numpy()[0] if mean: embeds[i, :] = np.mean(embed, axis=0) else: embeds[i, :] = embed[-1, :] utt = data['utt_id'] genders[i] = 0 if gender_dataset.utt2gender[utt] == 'f' else 1 tsne = TSNE(n_components=2, metric='cosine') tsne_embeds = tsne.fit_transform(embeds) f = tsne_embeds[genders == 0, :] m = tsne_embeds[genders == 1, :] plt.scatter(f[:, 0], f[:, 1], label='Female') plt.scatter(m[:, 0], m[:, 1], label='Male') plt.legend(loc='upper right') plt.title(f't-SNE of Female and Male embeddings for {title}') plt.axis('off') plt.tight_layout() plt.savefig(save_file) plt.clf()
"data/Panasonic 18650PF Data/0degC/Drive cycles/06-02-17_10.43 0degC_HWFET_Pan18650PF.mat", args.sequence_length, args.window_size) train_loaders = list() for d in train_datasets: temp = DataLoader(dataset=d, batch_size=args.batch_size, shuffle=True) train_loaders.append(temp) # train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) validation_loader = DataLoader(dataset=validation_dataset, batch_size=args.batch_size, shuffle=False) # Model, loss, and optimizer if args.model == 'lstm': model = models.LSTM(args.input_size, args.hidden_size, args.num_layers, args.num_classes, args.noise_std).to(device) elif args.model == 'gru': model = models.GRU(args.input_size, args.hidden_size, args.num_layers, args.num_classes, args.noise_std).to(device) elif args.model == 'rnn': model = models.RNN(args.input_size, args.hidden_size, args.num_layers, args.num_classes, args.noise_std).to(device) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # Train the model # total_step = len(train_loader) # total_train_step = len(train_loader) # total_val_step = len(validation_loader)
def main(_): if not os.path.exists(FLAGS.local_path_in) or FLAGS.use_optimizer: utils_gcs.download_files_from_gcs(FLAGS.local_path_in, FLAGS.gcs_path_in) logging.info('Data downloaded successfully!') sequence_df = pd.read_hdf( os.path.join(FLAGS.local_path_in, FLAGS.seq_file), 'df') if FLAGS.balance_df: balance_df = pd.read_hdf( os.path.join(FLAGS.local_path_in, FLAGS.balance_df), 'df') sequence_df = sequence_df[sequence_df['url'].isin(balance_df['url'])] embeddings_dict = utils.get_n2v_graph_embedding(os.path.join( FLAGS.local_path_in, FLAGS.g_emb), graph_gen=False, normalize_type='minmax') x_sequence, y_label, label_list = utils.load_input_with_label( sequence_df, embeddings_dict, FLAGS.task) train_idx, val_idx, test_idx = utils.split_data_idx( len(x_sequence), FLAGS.train_ratio, FLAGS.val_ratio) train_batches = np.array_split(train_idx, len(train_idx) / FLAGS.batch_size) val_batches = np.array_split(val_idx, len(val_idx) / FLAGS.batch_size) test_batches = np.array_split(test_idx, len(test_idx) / FLAGS.batch_size) # model training/testing logging.info('FLAGS.epochs: %s', FLAGS.epochs) logging.info('FLAGS.batch_size: %s', FLAGS.batch_size) logging.info('FLAGS.learning_rate: %s', FLAGS.lr) dropout = 0.0 if FLAGS.num_layers == 1 else FLAGS.dropout print_gpu_info() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logging.info('Current device is %s', device.type) if FLAGS.model == 'rnn': tm_model = models.RNN(in_dim=FLAGS.dim, hid_dim=FLAGS.hid_dim, num_label=len(label_list), num_layers=FLAGS.num_layers, dropout=dropout).to(device) elif FLAGS.model == 'lstm': tm_model = models.LSTM(in_dim=FLAGS.dim, hid_dim=FLAGS.hid_dim, num_label=len(label_list), num_layers=FLAGS.num_layers, dropout=dropout, bi_direct=FLAGS.bi).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(tm_model.parameters(), lr=FLAGS.lr, weight_decay=1e-6) if FLAGS.use_optimizer: # example trial_name: # 'projects/{project_id}/locations/{region}/studies/{study_id}/trials/{trial_id}' trial_name_split = FLAGS.trial_name.split('/') project_id = trial_name_split[1] region = trial_name_split[3] study_id = trial_name_split[-3] trial_id = trial_name_split[-1] logging.info('project_id: %s, region: %s, study_id: %s, trial_id: %s', project_id, region, study_id, trial_id) ml_client = optimizer_client.create_or_load_study( project_id, region, study_id, json.loads(FLAGS.study_config)) for epoch in range(FLAGS.epochs): logging.info('Epoch %s', epoch) start_time = time.time() train(tm_model, x_sequence, y_label, train_batches, criterion, optimizer, device, FLAGS.print_step) val_f1 = val(tm_model, x_sequence, y_label, val_batches, device) test(tm_model, x_sequence, y_label, test_batches, device) if FLAGS.use_optimizer: elapsed_secs = int(time.time() - start_time) metric_list = [{'metric': 'valf1', 'value': float(val_f1)}] ml_client.report_intermediate_objective_value( epoch, elapsed_secs, metric_list, trial_id) logging.info('Experiment finished.') if FLAGS.save_model: filename = '%s_%s_%s' % (FLAGS.task, FLAGS.model, FLAGS.name) utils.save_model(tm_model, optimizer, FLAGS.local_path_out, filename) utils_gcs.upload_files_to_gcs(local_folder=FLAGS.local_path_out, gcs_path=FLAGS.gcs_path_out)
import models LSTM256 = models.LSTM(256, 3) LSTM256.create_model() Data = models.Data([1, 1, 1], 20000, 0.01) Data.getData() LSTM256.fit_model(5, Data) LSTM256.print_stats() LSTM256.model.summary() LSTM256_States = models.States(1100, 1000) LSTM256_States.create_unperturbed(LSTM256, Data) LSTM256_States.create_pertrurbed(LSTM256, Data) print(LSTM256_States.unperturbed - LSTM256_States.perturbed) LSTM256_lyapunov = models.Lyapunov(LSTM256_States) LSTM256_lyapunov.plot_exponent(LSTM256_States) LSTM_Layer = LSTM256.model.layers[0] LSTM_Layer.weights import matplotlib.pyplot as plt plt.plot(np.linspace(1, 10, 20), line)
def main(output_dim, train_bs, val_bs, test_bs, num_epochs, max_seq_length, learning_rate, warmup_proportion, early_stopping_criteria, num_layers, hidden_dim, bidirectional, dropout, filter_sizes, embedding_file, model_name, use_mongo, _run): #Logger directory = f"results/{_run._id}/" #Batch sizes batch_sizes = [int(train_bs), int(val_bs), int(test_bs)] batch_size = int(train_bs) if "BERT" in model_name: #Default = False, if BERT model is used then use_bert is set to True use_bert = True else: use_bert = False #Data if use_bert: train_dataloader, val_dataloader, test_dataloader = get_data_bert( int(max_seq_length), batch_sizes) else: embedding_dim, vocab_size, embedding_matrix, train_dataloader, val_dataloader, test_dataloader = get_data_features( int(max_seq_length), embedding_file=embedding_file, batch_size=batch_size) #Model if model_name == "MLP": model = models.MLP(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, output_dim) if model_name == "MLP_Features": model = models.MLP_Features(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), 13, dropout, output_dim) print(model) elif model_name == "CNN": model = models.CNN(embedding_matrix, embedding_dim, vocab_size, dropout, filter_sizes, output_dim) print(model) elif model_name == "LSTM": model = models.LSTM(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, int(num_layers), bidirectional, output_dim) print(model) elif model_name == "LSTMAttention": model = models.LSTMAttention(embedding_matrix, embedding_dim, vocab_size, int(hidden_dim), dropout, int(num_layers), bidirectional, output_dim) print(model) elif model_name == "BERT": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", output_dim) print(model) elif model_name == "BERTLinear": model = models.BertLinear(hidden_dim, dropout, output_dim) print(model) elif model_name == "BERTLSTM": model = models.BertLSTM(hidden_dim, dropout, output_dim) print(model) model = model.to(device) #Loss and optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate) loss_fn = F.cross_entropy #Training and evaluation print('Training and evaluation for {} epochs...'.format(num_epochs)) train_metrics, val_metrics = train_and_evaluate( num_epochs, model, optimizer, loss_fn, train_dataloader, val_dataloader, early_stopping_criteria, directory, use_bert, use_mongo) train_metrics.to_csv(directory + "train_metrics.csv"), val_metrics.to_csv( directory + "val_metrics.csv") #Test print('Testing...') load_checkpoint(directory + "best_model.pth.tar", model) test_metrics = evaluate_model(model, optimizer, loss_fn, test_dataloader, device, use_bert) if use_mongo: log_scalars(test_metrics, "Test") test_metrics_df = pd.DataFrame(test_metrics) print(test_metrics) test_metrics_df.to_csv(directory + "test_metrics.csv") id_nummer = f'{_run._id}' results = { 'id': id_nummer, 'loss': np.round(np.mean(val_metrics['loss']), 4), 'accuracy': test_metrics['accuracy'], 'recall': test_metrics['recall'], 'precision': test_metrics['precision'], 'f1': test_metrics['f1'], 'learning_rate': learning_rate, 'hidden_dim': hidden_dim, 'status': 'ok' } return results
def main(): batch_size = 1 start = 0 end = 100 # read data df_data = pd.read_csv('data/' + FLAGS.dataset + '.csv') # split train/val/test if FLAGS.dataset == 'tree7': train_size = 2500 validate_size = 1000 if FLAGS.dataset == 'DJI': train_size = 2500 validate_size = 1500 if FLAGS.dataset == 'traffic': train_size = 1200 validate_size = 200 if FLAGS.dataset == 'arfima': train_size = 2000 validate_size = 1200 rmse_list = [] mae_list = [] for i in range(start, end): seed = i print('seed ----------------------------------', seed) x = np.array(df_data['x']) y = np.array(df_data['x']) x = x.reshape(-1, FLAGS.input_size) y = y.reshape(-1, FLAGS.output_size) # normalize the data scaler = MinMaxScaler(feature_range=(0, 1)) x = scaler.fit_transform(x) y = scaler.fit_transform(y) # use this function to prepare the data for modeling data_x, data_y = create_dataset(x, y) # split into train and test sets train_x, train_y = data_x[0:train_size], data_y[0:train_size] validate_x, validate_y = data_x[train_size:train_size + validate_size], \ data_y[train_size:train_size + validate_size] test_x, test_y = data_x[train_size + validate_size:len(data_y)], \ data_y[train_size + validate_size:len(data_y)] # reshape input to be [time steps,samples,features] train_x = np.reshape(train_x, (train_x.shape[0], batch_size, FLAGS.input_size)) validate_x = np.reshape( validate_x, (validate_x.shape[0], batch_size, FLAGS.input_size)) test_x = np.reshape(test_x, (test_x.shape[0], batch_size, FLAGS.input_size)) train_y = np.reshape(train_y, (train_y.shape[0], batch_size, FLAGS.output_size)) validate_y = np.reshape( validate_y, (validate_y.shape[0], batch_size, FLAGS.output_size)) test_y = np.reshape(test_y, (test_y.shape[0], batch_size, FLAGS.output_size)) torch.manual_seed(seed) # initialize model if FLAGS.algorithm == 'RNN': model = models.RNN(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size) elif FLAGS.algorithm == 'LSTM': model = models.LSTM(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size) elif FLAGS.algorithm == 'mRNN_fixD': model = models.MRNNFixD(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K) elif FLAGS.algorithm == 'mRNN': model = models.MRNN(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K) elif FLAGS.algorithm == 'mLSTM_fixD': model = models.MLSTMFixD(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K) elif FLAGS.algorithm == 'mLSTM': model = models.MLSTM(input_size=FLAGS.input_size, hidden_size=FLAGS.hidden_size, output_size=FLAGS.output_size, k=FLAGS.K) else: print('Algorithm selection ERROR!!!') criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=FLAGS.lr) best_loss = np.infty best_train_loss = np.infty stop_criterion = 1e-5 rec = np.zeros((FLAGS.epochs, 3)) epoch = 0 val_loss = -1 train_loss = -1 cnt = 0 def train(): model.train() optimizer.zero_grad() target = torch.from_numpy(train_y).float() output, hidden_state = model(torch.from_numpy(train_x).float()) with torch.no_grad(): val_y, _ = model( torch.from_numpy(validate_x).float(), hidden_state) target_val = torch.from_numpy(validate_y).float() val_loss = criterion(val_y, target_val) loss = criterion(output, target) loss.backward() optimizer.step() return loss, val_loss def compute_test(best_model): model = best_model train_predict, hidden_state = model(to_torch(train_x)) train_predict = train_predict.detach().numpy() val_predict, hidden_state = model(to_torch(validate_x), hidden_state) test_predict, _ = model(to_torch(test_x), hidden_state) test_predict = test_predict.detach().numpy() # invert predictions test_predict_r = scaler.inverse_transform(test_predict[:, 0, :]) test_y_r = scaler.inverse_transform(test_y[:, 0, :]) # calculate error test_rmse = math.sqrt( mean_squared_error(test_y_r[:, 0], test_predict_r[:, 0])) test_mape = (abs((test_predict_r[:, 0] - test_y_r[:, 0]) / test_y_r[:, 0])).mean() test_mae = mean_absolute_error(test_predict_r[:, 0], test_y_r[:, 0]) return test_rmse, test_mape, test_mae while epoch < FLAGS.epochs: _time = time.time() loss, val_loss = train() if val_loss < best_loss: best_loss = val_loss best_epoch = epoch best_model = deepcopy(model) # stop_criteria = abs(criterion(val_Y, target_val) - val_loss) if (best_train_loss - loss) > stop_criterion: best_train_loss = loss cnt = 0 else: cnt += 1 if cnt == FLAGS.patience: break # save training records time_elapsed = time.time() - _time rec[epoch, :] = np.array([loss, val_loss, time_elapsed]) print("epoch: {:2.0f} train_loss: {:2.5f} val_loss: {:2.5f} " "time: {:2.1f}s".format(epoch, loss.item(), val_loss.item(), time_elapsed)) epoch = epoch + 1 # make predictions test_rmse, test_mape, test_mae = compute_test(best_model) rmse_list.append(test_rmse) mae_list.append(test_mae) print('RMSE:{}'.format(rmse_list)) print('MAE:{}'.format(mae_list))
import config import models def huber_approx_obj(preds, dtrain): ''' xgboost optimizing function for mean absolute error ''' d = preds - dtrain #add .get_labels() for xgb.train() h = 1 #h is delta in the graphic scale = 1 + (d / h)**2 scale_sqrt = np.sqrt(scale) grad = d / scale_sqrt hess = 1 / scale / scale_sqrt return grad, hess models = { "dt": models.DecisionTree(), "rf": models.RandomForest(), "lr": models.LR(), "xgb": models.XGBoost(), "svm": models.SVM(), "lgb": models.LGB(), # "mlp": models.MLP(), "lstm": models.LSTM() } # to get the final accuracy, calculate the mean and the mean absolute error should be the percentage of the # performance since he wants to see performance
if __name__ == "__main__": # device = torch.device("cpu") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") dataset = loaders.WavLSTM(wave, SAMPLE_RATE, WINDOW_SIZE) x, y = dataset[0] print(f'x: {x.shape} y{y.shape}') print(f'len(dataset): {len(dataset)}') loader = DataLoader(dataset, batch_size=BATCH_SIZE) loss_fn = nn.MSELoss() writer = SummaryWriter(f'runs/{LOG_FN}{time.asctime()}') # model = nn.LSTM(BATCH_SIZE*2, BATCH_SIZE*2, N_LAYERS).to(device) model = models.LSTM(WINDOW_SIZE, WINDOW_SIZE, N_LAYERS, device).to(device) print(model) optimizer = optim.Adam(model.parameters(), lr=LR) optimizer.zero_grad() hn = torch.randn(N_LAYERS, 1, WINDOW_SIZE).to(device) cn = torch.randn(N_LAYERS, 1, WINDOW_SIZE).to(device) all_outs = [] for epoch in range(EPOCHS): model.reset() for i, (x, y) in enumerate(loader): try: x = x.to(device).view(BATCH_SIZE, 1, -1) except RuntimeError: