def __init__(self, model): self.model_ = model self.lstm_stack_enc_ = lstm_spatial.LSTMStack() self.lstm_stack_dec_ = lstm.LSTMStack() self.lstm_stack_pre_ = lstm.LSTMStack() for l in model.lstm: self.lstm_stack_enc_.Add(lstm_spatial.LSTM(l)) if model.dec_seq_length > 0: for l in model.lstm_dec: self.lstm_stack_dec_.Add(lstm.LSTM(l)) if model.pre_seq_length > 0: for l in model.lstm_pre: self.lstm_stack_pre_.Add(lstm.LSTM(l)) assert model.dec_seq_length > 0 self.is_conditional_dec_ = model.dec_conditional if self.is_conditional_dec_ and model.dec_seq_length > 0: assert self.lstm_stack_dec_.HasInputs() self.squash_relu_ = False #model.squash_relu self.squash_relu_lambda_ = 0 #model.squash_relu_lambda self.relu_data_ = False #model.relu_data self.binary_data_ = True #model.binary_data or self.squash_relu_ self.only_occ_predict_ = model.only_occ_predict if len(model.timestamp) > 0: old_st = model.timestamp[-1] ckpt = os.path.join(model.checkpoint_dir, '%s_%s.h5' % (model.name, old_st)) f = h5py.File(ckpt) self.lstm_stack_enc_.Load(f) if model.dec_seq_length > 0: self.lstm_stack_dec_.Load(f) if model.pre_seq_length > 0 and not self.only_occ_predict_: self.lstm_stack_pre_.Load(f) f.close()
def __init__(self, model, board, board_ladv, board_sup): self.model_ = model self.board_ = board self.board_ladv_ = board_ladv self.board_sup_ = board_sup self.lstm_stack_enc_ = lstm_spatial.LSTMStack() self.lstm_stack_dec_ = lstm.LSTMStack() self.lstm_stack_pre_ = lstm.LSTMStack() model_file_sup = './data/bk20151009/part_1/bvlc_googlenet_quick_iter_231760.caffemodel' solver_file = './data/googlenet_ladv_solver.prototxt' prototxt_file_sup = './data/train_val_quick_grad.prototxt' mean_file = './data/bk20151009/part_1/lmdb_casia_full_part1_mean.binaryproto' self.cnn_solver = caffe.SGDSolver(solver_file) self.cnn_solver.net.copy_from(model_file_sup) self.cnn_net_sup = caffe.Net(prototxt_file_sup, model_file_sup, caffe.TRAIN) mean = read_mean(mean_file) mean = mean.reshape((1, 128, 128)) self.cnn_mean_ = mean caffe.set_mode_gpu() if self.board_ladv_ == self.board_: caffe.set_device(self.board_ladv_) for l in model.lstm: self.lstm_stack_enc_.Add(lstm_spatial.LSTM(l)) if model.dec_seq_length > 0: for l in model.lstm_dec: self.lstm_stack_dec_.Add(lstm.LSTM(l)) if model.pre_seq_length > 0: for l in model.lstm_pre: self.lstm_stack_pre_.Add(lstm.LSTM(l)) assert model.dec_seq_length > 0 self.is_conditional_dec_ = model.dec_conditional if self.is_conditional_dec_ and model.dec_seq_length > 0: assert self.lstm_stack_dec_.HasInputs() self.squash_relu_ = False #model.squash_relu self.squash_relu_lambda_ = 0 #model.squash_relu_lambda self.relu_data_ = False #model.relu_data self.binary_data_ = True #model.binary_data or self.squash_relu_ self.only_occ_predict_ = model.only_occ_predict if len(model.timestamp) > 0: old_st = model.timestamp[-1] ckpt = os.path.join(model.checkpoint_dir, '%s_%s.h5' % (model.name, old_st)) f = h5py.File(ckpt) self.lstm_stack_enc_.Load(f) if model.dec_seq_length > 0: self.lstm_stack_dec_.Load(f) if model.pre_seq_length > 0 and not self.only_occ_predict_: self.lstm_stack_pre_.Load(f) f.close()
def __init__(self, model): self.model_ = model # keeps the model configurations alongside global configurations # stacks of encoder, decoder and future predictions self.lstm_stack_enc_ = lstm.LSTMStack() self.lstm_stack_dec_ = lstm.LSTMStack() self.lstm_stack_fut_ = lstm.LSTMStack() self.decoder_copy_init_state_ = model.decoder_copy_init_state self.future_copy_init_state_ = model.future_copy_init_state # add LSTM blocks for encoder, decoder and future predictor for l in model.lstm: # get LSTM encoder model according to specifications self.lstm_stack_enc_.Add(lstm.LSTM(l)) if model.dec_seq_length > 0: for l in model.lstm_dec: # get LSTM decoder model according to specifications self.lstm_stack_dec_.Add(lstm.LSTM(l)) if model.future_seq_length > 0: for l in model.lstm_future: # get LSTM future predictor model according to specifications self.lstm_stack_fut_.Add(lstm.LSTM(l)) # do other initialization stuff assert model.dec_seq_length > 0 or model.future_seq_length > 0 # get specification of whether decoder and future predictors are conditional on inputs self.is_conditional_dec_ = model.dec_conditional self.is_conditional_fut_ = model.future_conditional if self.is_conditional_dec_ and model.dec_seq_length > 0: assert self.lstm_stack_dec_.HasInputs() if self.is_conditional_fut_ and model.future_seq_length > 0: assert self.lstm_stack_fut_.HasInputs() self.squash_relu_ = model.squash_relu self.binary_data_ = model.binary_data or model.squash_relu self.squash_relu_lambda_ = model.squash_relu_lambda self.relu_data_ = model.relu_data # load model if available if len(model.timestamp) > 0: old_st = model.timestamp[-1] ckpt = os.path.join(model.checkpoint_dir, '%s_%s.h5' % (model.name, old_st)) f = h5py.File(ckpt) self.lstm_stack_enc_.Load(f) self.lstm_stack_dec_.Load(f) self.lstm_stack_fut_.Load(f) f.close()
def run(df, fold): train_df = df[df.kfold != fold].reset_index(drop=True) valid_df = df[df.kfold == fold].reset_index(drop=True) tokenizer = tf.keras.preprocessing.text.Tokenizer() tokenizer.fit_on_texts(df.review.values.tolist()) xtrain = tokenizer.texts_to_sequences(train_df.review.values) xtest = tokenizer.texts_to_sequences(valid_df.review.values) xtrain = tf.keras.preprocessing.sequence.pad_sequences( xtrain, maxlen=config.MAX_LEN) xtest = tf.keras.preprocessing.sequence.pad_sequences( xtest, maxlen=config.MAX_LEN) train_dataset = dataset.IMDBDataset(reviews=xtrain, targets=train_df.sentiment.values) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=2) valid_dataset = dataset.IMDBDataset(reviews=xtest, targets=valid_df.sentiment.values) valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=2) print('Loading Embeddings') embedding_dict = load_vectors('./crawl-300d-2M.vec') print('Embeddings Loaded') embedding_matrix = create_embedding_matrix(tokenizer.word_index, embedding_dict) device = torch.device('cuda') model = lstm.LSTM(embedding_matrix) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.1) print('Training model') best_accuracy = 0 early_stopping_counter = 0 for epoch in range(config.EPOCHS): engine.train(train_data_loader, model, optimizer, device) outputs, targets = engine.evaluate(valid_data_loader, model, device) outputs = np.array(outputs) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) print('Fold: ', fold, ' EPOCH: ', epoch, ' Accuracy Score: ', accuracy) if accuracy > best_accuracy: best_accuracy = accuracy else: early_stopping_counter += 1
def main(): run_id = np.random.randint(1000) if restore: with open('./saves/state.pkl', 'rb') as f: X, Y, char2ix, ix2char = pickle.load(f) else: X, Y, char2ix, ix2char = data.read_data("warandpeace.txt", sequence_length=100) with open('./saves/state.pkl', 'wb') as f: pickle.dump([X, Y, char2ix, ix2char], f) train_set = data.train_set(X, Y, 128) solver = lstm.LSTM(num_classes=len(char2ix), heavy_device=heavy_device, light_device=light_device, restore=restore) if test == False: solver.train(train_set) else: print(solver.generate(char2ix, ix2char, 100))
def main(model): qa = None if model == 'lstm': import lstm qa = lstm.LSTM() else: import svm qa = svm.SVM() if qa == None: qa = svm.SVM() qa.train_save()
def gen_lstm_with(lstm_name, in_stack, exp_stack, inter_len): IN_LEN = in_stack.shape[1] OUT_LEN = inter_len CELL_STATE_LEN = OUT_LEN HIDDEN_LEN = OUT_LEN + IN_LEN FULL_OUT_LEN = exp_stack.shape[1] my_lstm = lstm.LSTM(lstm_name, SEQUENCE_LEN, IN_LEN, OUT_LEN, FULL_OUT_LEN, BATCH_SIZE) return my_lstm
def __init__(self, model): self.model_ = model self.lstm_stack_ = lstm.LSTMStack() for l in model.lstm: self.lstm_stack_.Add(lstm.LSTM(l)) self.squash_relu_ = model.squash_relu self.squash_relu_lambda_ = model.squash_relu_lambda if len(model.timestamp) > 0: old_st = model.timestamp[-1] ckpt = os.path.join(model.checkpoint_dir, '%s_%s.h5' % (model.name, old_st)) f = h5py.File(ckpt) self.lstm_stack_.Load(f) f.close()
def load_context(self, context): light_device = "/cpu:0" heavy_device = "/cpu:0" with open('./saves/state.pkl', 'rb') as f: _, _, self.char2ix, self.ix2char = pickle.load(f) self.solver = lstm.LSTM( num_classes=len(self.char2ix), heavy_device=heavy_device, light_device=light_device, restore=True, )
def compare_custom_and_cuda(batch_size, length, input_size, hidden_size, num_layers, bias, inter_layer_dropout, recurrent_dropout, batch_first, skip_connection, jit_forward_custom): if batch_first: input = torch.rand(batch_size, length, input_size) else: input = torch.rand(length, batch_size, input_size) hx = torch.rand(num_layers, batch_size, hidden_size), torch.rand(num_layers, batch_size, hidden_size) torch_lstm = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bias=bias, dropout=inter_layer_dropout, batch_first=batch_first, bidirectional=False) custom_lstm = lstm.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bias=bias, inter_layer_dropout=inter_layer_dropout, recurrent_dropout=recurrent_dropout, skip_connection=skip_connection, batch_first=batch_first) CustomLSTMTest.copy_weights_from_torch_to_custom(torch_lstm, custom_lstm, bias) if jit_forward_custom: custom_lstm_forward = torch.jit.script(custom_lstm) else: custom_lstm_forward = custom_lstm torch_output, torch_hidden = torch_lstm(input, hx) custom_output, custom_hidden = custom_lstm_forward(input, hx) torch_output.pow(2).sum().backward() custom_output.pow(2).sum().backward() torch_testing.assert_allclose(torch_hidden[0], custom_hidden[0]) torch_testing.assert_allclose(torch_hidden[1], custom_hidden[1]) torch_testing.assert_allclose(custom_output, torch_output) torch_grads = [p.grad for p in torch_lstm.parameters() if p.grad is not None] custom_grads = [p.grad for p in custom_lstm.parameters() if p.grad is not None] assert len(torch_grads) == len(custom_grads) for i in range(len(torch_grads)): torch_testing.assert_allclose(torch_grads[i], custom_grads[i])
#############test data test_sentence_label_list = utils.read_file(config.test_file) test_sentence_word_list = utils.sentence_extract(test_sentence_label_list) test_feat, test_candidate_category = utils.exact_feat( test_sentence_word_list) test_distence_feat = utils.cal_distence_feat(test_feat) # file = open('test_data.txt', 'w',encoding='utf-8') # for idx in range(len(test_feat)): # for idy in range(len(test_feat[idx])): # file.write(str(test_sentence_word_list[idx].words.strip('\n')) + '\t' + str(test_feat[idx][idy][0].word) + '\t' + str(test_feat[idx][idy][1].word) + '\t' + str(test_candidate_category[idx][idy]) + '\t' + str(test_distence_feat[idx][idy]) + '\t' + str(test_feat[idx][idy][0].sentiment) + '\t' + str(test_feat[idx][idy][1].sentiment) + '\n') # file.close() ###################### # train_sentences, train_sentences_index, train_labels, train_labels_index, train_candidates, train_candidates_index, train_sparse_list, train_sparse_index = data.sentence_index('./train_data.txt') # test_sentences, test_sentences_index, test_labels, test_labels_index, test_candidates, test_candidates_index, test_sparse_list, test_sparse_index = data.sentence_index('./test_data.txt') train_data_set = data.sentence_index('./train_data.txt') if not args.static: data.fix_alphabet() test_data_set = data.sentence_index('./test_data.txt') dev_data_set = data.sentence_index('./dev_data.txt') if config.pretrained_wordEmb_file is not '': data.load_pretrained_emb_uniform(config.pretrained_wordEmb_file, config.word_dims) model = lstm.LSTM(config, data) train.train(train_data_set, dev_data_set, test_data_set, model, config, data)
df_train_1 = pickle.load(open(root_dir + "df_train_1.p",'rb')) df_test_1 = pickle.load(open(root_dir + "df_test_1.p",'rb')) #---------------------- 1 time training ------------------------ #-----------------------Load/Train the LSTM model--------------- train = train_1 + train_2 # True to training the data, False to laod the existed data print "Now the maxlen =", maxlen if True: dir_file = "weights/201702281025_e1_1k1k_l0_b64.p" print "Starting to training the model..., saving to", dir_file sls=lstm.LSTM(dir_file, maxlen, load=False, training=True) sls.train_lstm(train, epoch, train_1, test_1) sls.save_model() else: dir_file = "weights/201702212157_e100_1k1k_l0.p" print "NO Training. Load the existed model:", dir_file sls=lstm.LSTM(dir_file, maxlen, load=True, training=False) #--- New method to evaluate the results ------------------------ #--------------------Evaluate the results using new method------ if True: print "Evaluate the model using fast estimation..." projection1_train, projection2_train = sls.seq2vec(train_1) projection1_test, projection2_test = sls.seq2vec(test_1)
def run(df, fold): """ Run training and validation for a given fold :param df: dataframe with kold column :param fold: current fold, int """ # training dataframe train_df = df[df.kfold != fold].reset_index(drop=True) # validation dataframe valid_df = df[df.kfold == fold].reset_index(drop=True) print("Fitting tokenizer") # tokenize tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='!~\t\n', ) tokenizer.fit_on_texts(df.question.values.tolist()) # convert training data to sequence xtrain = tokenizer.texts_to_sequences(train_df.question.values) # convert validation data to sequence xtest = tokenizer.texts_to_sequences(valid_df.question.values) # zero pad the trainign sequence, padding on left side xtrain = tf.keras.preprocessing.sequence.pad_sequences( xtrain, maxlen=config.MAX_LEN) # zero pad validation sequence xtest = tf.keras.preprocessing.sequence.pad_sequences( xtest, maxlen=config.MAX_LEN) # initialize dataset class for training train_dataset = dataset.QUORADataset(question=xtrain, OpenStatus=train_df.OpenStatus.values) # create torch DataLoader train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=2) #initialize dataset class for validation valid_dataset = dataset.QUORADataset(question=xtest, OpenStatus=valid_df.OpenStatus.values) # create torch DataLoader valid_data_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) print("loading embeddings") embedding_dict = load_vectors("../input/embeddings/crawl-300d-2M.vec") embedding_matrix = create_embedding_matrix(tokenizer.word_index, embedding_dict) # create torch device device = torch.device("cuda") # get LSTM model model = lstm.LSTM(embedding_matrix) # send model to device model.to(device) #initialize adam optimizer optimizer = torch.optim.Adam(model.parameters(), lr=5e-3) #optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) print("Training model") # best accuracy to 0 best_accuracy = 0 # early stopping counter early_stopping_counter = 0 # train and validate for all epoch for epoch in range(config.EPOCHS): # train one epoch engine.train(train_data_loader, model, optimizer, device) # validate outputs, targets = engine.evaluate(valid_data_loader, model, device) print(outputs[:10]) # threshold #outputs1=outputs outputs1 = outputs outputs = np.array(outputs) >= 0.5 #print(outputs1[:10]) # calculate accuracy accuracy = metrics.accuracy_score(targets, outputs) conf_m = confusion_matrix(targets, outputs) print(conf_m) roc_score = roc_auc_score(targets, outputs1) print('ROC AUC score\n', roc_score) print(f"Fold:{fold}, Epoch:{epoch}, Accuracy_score ={accuracy}") #print("conf_m\n",conf_m) print("---") # early stopping if accuracy > best_accuracy: best_accuracy = accuracy else: early_stopping_counter += 1 if early_stopping_counter > 4: break
def __init__(self, rnn_type, num_tokens, embedding_size, hidden_size, num_layers, input_dropout=0., input_noise_std=0., recurrent_dropout=0., inter_layer_dropout=0., output_dropout=0., output_noise_std=0., up_project_embedding=False, up_project_hidden=False, tie_weights=False, lstm_skip_connection=False, drop_state_probability=0.01): super(RNNModel, self).__init__() # Making sure either dropout or gaussian noise is activated assert not (input_dropout > 0. and input_noise_std > 0.) assert not (output_dropout > 0. and output_noise_std > 0.) self.input_dropout_or_noise = get_noise_layer(input_dropout, input_noise_std) encoder_layer_list = [] embedding_layer = nn.Embedding(num_tokens, embedding_size) encoder_layer_list.append(embedding_layer) if embedding_size != hidden_size and up_project_embedding: logging.info( "Encoder: adding linear transformation to up project embedding to hidden" ) encoder_layer_list.append( nn.Linear(embedding_size, hidden_size, bias=False)) rnn_input_size = hidden_size else: rnn_input_size = embedding_size self.encoder = nn.Sequential(*encoder_layer_list) if rnn_type in ['LSTM', 'GRU']: if recurrent_dropout > 0.: logging.warning( "recurrent_dropout argument is only used in the custom LSTM model" ) self.rnn = getattr(nn, rnn_type)(rnn_input_size, hidden_size, num_layers, dropout=inter_layer_dropout) elif rnn_type == "custom_LSTM": self.rnn = lstm.LSTM(rnn_input_size, hidden_size, num_layers, bias=True, inter_layer_dropout=inter_layer_dropout, recurrent_dropout=recurrent_dropout, skip_connection=lstm_skip_connection, batch_first=False) else: try: nonlinearity = { 'RNN_TANH': 'tanh', 'RNN_RELU': 'relu' }[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""" ) self.rnn = nn.RNN(rnn_input_size, hidden_size, num_layers, nonlinearity=nonlinearity, dropout=inter_layer_dropout) self.output_dropout_or_noise = get_noise_layer(output_dropout, output_noise_std) decoder_list = [] linear_layer = nn.Linear(embedding_size, num_tokens, bias=False) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 self.decoder_is_sequential = True if tie_weights: linear_layer.weight = embedding_layer.weight if hidden_size != embedding_size: if up_project_hidden: self.decoder_is_sequential = False logging.info( "Decoder: adding linear transformation to up project embedding to to hidden" ) decoder_list.append(nn.Linear(embedding_size, hidden_size)) else: logging.info( "Decoder: adding linear transformation to down project hidden to embedding" ) decoder_list.append(nn.Linear(hidden_size, embedding_size)) decoder_list.append(linear_layer) if self.decoder_is_sequential: self.decoder = nn.Sequential(*decoder_list) else: self.decoder = nn.ModuleList(decoder_list) self.embedding_size = embedding_size self.rnn_type = rnn_type self.nhid = hidden_size self.nlayers = num_layers self.drop_state_probability = drop_state_probability self.init_weights()
def main(): args.n_ehr = len( json.load( open(os.path.join(args.files_dir, 'demo_index_dict.json'), 'r'))) + 10 args.name_list = json.load( open(os.path.join(args.files_dir, 'feature_list.json'), 'r'))[1:] args.input_size = len(args.name_list) files = sorted(glob(os.path.join(args.data_dir, 'resample_data/*.csv'))) data_splits = json.load( open(os.path.join(args.files_dir, 'splits.json'), 'r')) train_files = [ f for idx in [0, 1, 2, 3, 4, 5, 6] for f in data_splits[idx] ] valid_files = [f for idx in [7] for f in data_splits[idx]] test_files = [f for idx in [8, 9] for f in data_splits[idx]] if args.phase == 'test': train_phase, valid_phase, test_phase, train_shuffle = 'test', 'test', 'test', False else: train_phase, valid_phase, test_phase, train_shuffle = 'train', 'valid', 'test', True train_dataset = data_loader.DataBowl(args, train_files, phase=train_phase) valid_dataset = data_loader.DataBowl(args, valid_files, phase=valid_phase) test_dataset = data_loader.DataBowl(args, test_files, phase=test_phase) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=train_shuffle, num_workers=args.workers, pin_memory=True) valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) args.vocab_size = args.input_size + 2 if args.use_unstructure: args.unstructure_size = len( py_op.myreadjson(os.path.join(args.files_dir, 'vocab_list.json'))) + 10 # net = icnn.CNN(args) # net = cnn.CNN(args) net = lstm.LSTM(args) # net = torch.nn.DataParallel(net) # loss = myloss.Loss(0) loss = myloss.MultiClassLoss(0) net = _cuda(net, 0) loss = _cuda(loss, 0) best_metric = [0, 0] start_epoch = 0 if args.resume: p_dict = {'model': net} function.load_model(p_dict, args.resume) best_metric = p_dict['best_metric'] start_epoch = p_dict['epoch'] + 1 parameters_all = [] for p in net.parameters(): parameters_all.append(p) optimizer = torch.optim.Adam(parameters_all, args.lr) if args.phase == 'train': for epoch in range(start_epoch, args.epochs): print('start epoch :', epoch) t0 = time.time() train_eval(train_loader, net, loss, epoch, optimizer, best_metric) t1 = time.time() print('Running time:', t1 - t0) best_metric = train_eval(valid_loader, net, loss, epoch, optimizer, best_metric, phase='valid') print('best metric', best_metric) elif args.phase == 'test': train_eval(test_loader, net, loss, 0, optimizer, best_metric, 'test')
def __init__(self, max_depth, output_sizes, node_feature_sizes, learning_rate_vector, learning_method_vector, shuffle_levels=[], adadelta_parameters=None, momentum_vector=None): """Initializes a multi-level LSTM. The ML-LSTM has max_depth layers. Layer 0 is the root node. Layers max_depth - 1 to 0 have LSTMs in them. Layer max_depth is simply composed of graph nodes, which forward their features to the LSTMs of level max_depth - 1. The output of level i consists in the LSTM features computed from the children of i; it does not contain any features computed from the node at level i itself. The features of the node at level i will be passed to node at level i-1 along with the LSTM output. @param max_depth: As noted above. @param node_feature_sizes: How many features are produced by a node, according to its depth. This can go from 0 to max_depth (included). Be careful: unless e.g. the graph is bipartite, you need to use the same number throughout. @param output_sizes: How many features are produced by LSTMs at different depth. This does not need to be constant. @param learning_rate_vector: Vector of learning rates. @param learning_method_vector: Vector of learning methods. It can be None, in which case adadelta is used, or it can be a vector consisting of 'adadelta' or 'momentum' or 'steady_rate' (the latter is not recommended) for each layer. @param momentum_vector: vector containing momentums for learning. It can be None if adadelta is used. @param adadelta_parameters: vector of adadelta parameters. It can be None if momentum learning is used. @param shuffle_children: a list (or set) of depths at which shuffling is to occur. """ # First, some sanity checks. assert max_depth > 0 assert len(output_sizes) == max_depth assert len(node_feature_sizes) == max_depth assert len(learning_method_vector) == max_depth assert adadelta_parameters is None or len( adadelta_parameters) == max_depth assert adadelta_parameters is not None or all( m != 'adadelta' for m in learning_method_vector) assert momentum_vector is None or len(momentum_vector) == max_depth assert momentum_vector is not None or all( m == 'steady_rate' for m in learning_method_vector) #assert [i < max_depth for i in shuffle_levels] self.output_sizes = output_sizes self.node_feature_sizes = node_feature_sizes self.max_depth = max_depth self.learning_rate_vector = learning_rate_vector self.learning_method_vector = learning_method_vector self.adadelta_parameters = adadelta_parameters self.momentum_vector = momentum_vector self.shuffle_levels = shuffle_levels # Creates the list of LSTMs, one per level. self.lstm_stack = [lstm.LSTM() for _ in range(max_depth)] for l in range(max_depth): self.lstm_stack[l].initialize( node_feature_sizes[l] + (0 if l == max_depth - 1 else output_sizes[l + 1]), output_sizes[l]) # we need the following structures, when training with momentum and/or adadelta, # to keep track of the sum of dW at each level in order to update the momentum_dW # or the adadelta parameters of the respective LSTM modules. self.number_of_nodes_per_level = None self.sum_of_dWs = None self.sum_tot_sq_gradient = None self.sum_tot_gradient_weight = None self.sum_tot_sq_delta = None self.sum_tot_delta_weight = None
import lstm import recurrent_network import learning_methods file_name = "/home/lie/lol.txt" lang = recurrent_network.compute_language(file_name) hl_size = 100 num_hl = 1 max_time_step = 25 net = lstm.LSTM(\ hl_size, \ num_hl, \ max_time_step, \ lang \ ) epochs = 100 eta = 2 learning_method = learning_methods.AdaGrad(eta) net.no_batch_learn( \ file_name, \ epochs, \ learning_method, \ )
def test(): config = load_config("lstm.conf") model = lstm.LSTM(config) get_loss_from_file("test", config, model)
def main(argv): print '\nSYSTEM START\n' print 'Emb Dim: %d\tHidden Dim: %d\tOptimization: %s\tLayer: %d\tEpoch: %d' %\ (argv.emb, argv.hidden, argv.opt, argv.layer, argv.epoch) print 'Parameters to be saved: %s' % argv.save """data preprocessing""" print 'DATA Preprocessing...' corpus, vocab_word = utils.load_conll(argv.data) id_corpus = utils.convert_words_into_ids(corpus, vocab_word) train_samples = utils.convert_data(id_corpus) n_samples = len(id_corpus) print 'Samples: %d\tVocab: %d' % (n_samples, vocab_word.size()) """symbol definition""" index = T.iscalar() w = T.ivector() d = T.ivector() n_hidden = argv.hidden n_words = argv.n_words batch_size = argv.batch """model setup""" print 'Compiling Theano Code...' model = lstm.LSTM(w=w, d=d, n_layers=argv.layer, vocab_size=vocab_word.size(), n_in=n_hidden, n_h=n_hidden, n_words=n_words, batch_size=batch_size ) cost = model.nll opt = optimizers.main(name=argv.opt, cost=cost, params=model.params, emb=model.emb, x=model.x, w=model.w) """ train """ def _train(): train_model = theano.function( inputs=[index], outputs=[model.nll, model.errors], updates=opt, givens={ w: train_samples[index * n_words * batch_size: (index+1) * n_words * batch_size], d: train_samples[index * n_words * batch_size + 1: (index+1) * n_words * batch_size + 1] }, mode='FAST_RUN' ) n_batch_samples = n_samples / n_words / batch_size print 'Vocabulary Size: %d\tBatch Sample Size: %d' % (vocab_word.size(), n_batch_samples) print '\nTrain START' for epoch in xrange(argv.epoch): print '\nEpoch: %d' % (epoch + 1) print '\tIndex: ', start = time.time() losses = [] errors = [] for b_index in xrange(n_batch_samples): if b_index % 100 == 0 and b_index != 0: print b_index, sys.stdout.flush() loss, error = train_model(b_index) losses.append(loss) errors.append(error) avg_loss = np.mean(losses) end = time.time() print '\tTime: %f seconds' % (end - start) print '\tAverage Negative Log Likelihood: %f' % avg_loss total = 0.0 correct = 0 for sent in errors: total += len(sent) for y_pred in sent: if y_pred == 0: correct += 1 print '\tTrain Accuracy: %f' % (correct / total) if argv.save: model.save() _train()
nrows = hf['x'].shape[0] ncols = hf['x'].shape[2] print(hf['x'].shape) print(hf['y'].shape) ntrain = int(configs['data']['train_test_split'] * nrows) steps_per_epoch = int(ntrain / configs['data']['batch_size']) # ntrain를 batch_size의 배수로 만들어 경계 값을 명확히 한다. ntrain = steps_per_epoch * configs['data']['batch_size'] print('> Clean data has', nrows, 'data rows. Training on', ntrain, 'rows with', steps_per_epoch, 'steps-per-epoch') # Building a model sess =tf.Session() model = lstm.LSTM(sess, configs['data']['x_window_size'], ncols, configs['model']['dirname_save_model']) sess.run(tf.global_variables_initializer()) # Train the model # data_gen_train = dl.generate_clean_data(0, ntrain) # model.training(configs['model']['epochs'], steps_per_epoch, data_gen_train, save=True) # Load a trained model model.load_model('epoch0_loss6.74e-01') ntest = nrows - ntrain steps_test = int(ntest / configs['data']['batch_size']) print('> Testing model on', ntest, 'data rows with', steps_test, 'steps') batch_size = configs['data']['batch_size']
def run(df, fold): """ Run training and validation for a given fold & dataset :param df: pandas dataframe with kfold column :param fold: current fold, int """ # fetch training dataframe df_train = df[df.kfold != fold].reset_index(drop=True) # fetch validation dataframe df_valid = df[df.kfold == fold].reset_index(drop=True) tokenizer = tf.keras.preprocessing.text.Tokenizer() tokenizer.fit_on_texts(df_train.review.values) x_train = tokenizer.texts_to_sequences(df_train.review.values) x_valid = tokenizer.texts_to_sequences(df_valid.review.values) x_train = tf.keras.preprocessing.sequence.pad_sequences( x_train, maxlen=config.MAXLEN) x_valid = tf.keras.preprocessing.sequence.pad_sequences( x_valid, maxlen=config.MAXLEN) #* embedding_dict: dictionary with word:embedding_vectors embedding_dict = load_vectors( "../input/wiki-news-300d-1M.vec/wiki-news-300d-1M.vec") #* word_index: dictionary with word:idx -- {'the': 1, 'cat': 2, 'sat': 3, 'on': 4} word_index = tokenizer.word_index #* embedding matrix: a dictionary with idx:embedding_vector embedding_matrix = create_embedding_matrix(word_index, embedding_dict) model = lstm.LSTM(embedding_matrix) optimizer = torch.optim.Adam(model.parameters, lr=1e-3) # check if GPU is available else run on CPU. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # device = torch.device("cpu") train_dataset = dataset.IMDBDataset(reviews=x_train, targets=df_train.sentiment.values) valid_dataset = dataset.IMDBDataset(reviews=x_valid, targets=df_valid.sentiment.values) train_data_loader = torch.utils.DataLoader( train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_data_loader = torch.utils.DataLoader( valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=2) best_accuracy = 0 early_stopping_counter = 0 for epoch in range(config.EPOCHS): engine.train(train_data_loader, model, optimizer, device) preds, targets = engine.evaluate(valid_data_loader, model, optimizer, device) print(f"preds----{preds}") preds = np.array(preds) >= 0.5 accuracy = metrics.accuracy_score(preds, targets) print(f"Fold:{fold}, Epoch: {epoch}, Accuracy: {accuracy}") # simple early stopping if accuracy > best_accuracy: best_accuracy = accuracy else: early_stopping_counter += 1 if early_stopping_counter > 2: break
model_type = args.type if model_type == "relu": import relu_rnn model = relu_rnn.Classifier( relu_rnn.ReLURNN( embed_dim=dim, n_units=int(config["n_units"]), gpu=args.gpu ) ) elif model_type == "lstm": import lstm model = lstm.Classifier( lstm.LSTM( embed_dim=dim, n_units=int(config["n_units"]), gpu=args.gpu ) ) else: raise Exception("model argment should be relu or lstm") # load model init_model_name = os.path.join( model_dir, "model.npz" ) if os.path.exists(init_model_name): serializers.load_npz(init_model_name, model) print("load model {}".format(init_model_name))
# calculate the imbalance ratio. used for construct the loss(cost) fn. imbalance_ratio = num_majority / num_minority steps_per_epoch = int(ntrain / configs['data']['batch_size']) print('> Clean data has', nrows, 'data rows. Training on', ntrain, 'rows with', steps_per_epoch, 'steps-per-epoch') print('> Class 0: {}, Class 1: {} --in training set'.format( num_majority, num_minority)) print('> imbalance_ratio(class_weight): {}'.format(imbalance_ratio)) # Building a model sess = tf.Session() model = lstm.LSTM(sess, configs['data']['x_window_size'], ncols, configs['model']['dirname_save_model'], class_weight=imbalance_ratio) sess.run(tf.global_variables_initializer()) if ans_load_model in ['y', 'Y']: # Load a trained model model.load_model(configs['model']['filename_load_model']) else: # Train the model data_gen_train = dl.generate_clean_data(0, ntrain) model.training(configs['model']['epochs'], steps_per_epoch, data_gen_train, save=True)
with open('keys_mtjuney.yml', 'r') as f: keys_mtjuney = yaml.load(f) parser = argparse.ArgumentParser() parser.add_argument('--vocabin', '-vi', default='dumps/vocab_ready_in.dump') parser.add_argument('--vocabout', '-vo', default='dumps/vocab_ready_out.dump') parser.add_argument('--modelinput', '-mi', default=None) parser.add_argument('--modeloutput', '-mo', default='dumps/model_lstm.dump') args = parser.parse_args() with open(args.vocabin, 'rb') as f: vocabin = pickle.load(f) with open(args.vocabout, 'rb') as f: vocabout = pickle.load(f) lstm = lstm.LSTM(650, vocabin, vocabout, loadpath=args.modelinput) tweet_q = queue.Queue(maxsize=300) # def feed_tweet(): # global vocab, tweet_q, keys_mtjuney # # api = OAuth1Session(keys_mtjuney['CONSUMER_KEY'], keys_mtjuney['CONSUMER_SECRET'], keys_mtjuney['ACCESS_TOKEN'], keys_mtjuney['ACCESS_SECRET']) # url = "https://api.twitter.com/1.1/statuses/home_timeline.json" # # # last_tweetid = None # # # while True: #
excluded_ids.append(row['region_id']) return excluded_ids # with open('category_freqs.json', 'w') as f: # json.dump(category_freqs, f) # pprint(category_freqs) if __name__ == '__main__': print "******** Train model ****************" data_interface = data.Data(results_data_dir, [], [], []) print data_interface.vocab_size training = train.Learn(results_data_dir) for run in range(1, params.num_runs + 1): # this is the configuration for a model that knows all categories! Therefore, the list of excluded IDs is empty. model = lstm.LSTM(run, data_interface.vocab_size, results_data_dir, [], data_interface.index_to_token) model.build_network() training.run_training(model, data_interface) generate_indextotoken(data_interface, results_data_dir, []) ##### run training for all categories # categories = defaultdict() # # reader = csv.reader(open("../eval/cats.txt")) # reader = csv.reader(open("cats.txt")) # for row in reader: # categories[row[0].strip()] = row[1:] # # for key in categories.keys(): # print "******** Train model without:", categories[key][0].strip() # cat_ids = parse_categories([key])
def run(df, fold): ''' Run training and validation for given fold and dataset :param df: pandas dataframe with kfold column :param fold: current fold, int ''' #fetching training dataframe train_df = df[df.kfold != fold].reset_index(drop= True) #fetch validation dataframe valid_df = df[df.kfold == fold].reset_index(drop= True) print('Fitting Tokenizer') #Using tf.keras for tokenization tokenizer = tf.keras.preprocessing.text.Tokenizer() tokenizer.fit_on_texts(df.reviews.values.tolist()) # convert training and validation data to sequences # for example : "bad movie" gets converted to # [24, 27] where 24 is the index for bad and 27 is index for movie xtrain = tokenizer.text_to_sequences(train.df.reviews.values) xtest = tokenizer.texts_to_sequences(valid_df.reviews.values) # zero pad the training & validation sequences given the maximum length # done on left hand side xtrain = tf.keras.preprocessing.sequence.pad_sequences(xtrain, maxlen=config.MAX_LEN) xtest = tf.keras.preprocessing.sequence.pad_sequences(xtest, maxlen=config.MAX_LEN) #intialize datset class for training train_Dataset = dataset.IMDDataset(reviews=xtrain, target= train_df.sentiment.values) # dataloader for training trian_data_loader = torch.utils.data.DataLoader(tain_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers = 2) valid_Dataset = dataset.IMDDataset(reviews=xtest, target= valid_df.sentiment.values) # dataloader for training valid_data_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers = 2) print('Loading Embeddings') #load embedding embedding_dict = load_vectors('crawl-300d-2M.vec') embedding_matrix = create_embedding_matrix(tokenizer.word_index, embedding_dict) device = torch.device('cuda') model = lstm.LSTM(embedding_matrix) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr= 1e-3) print('Training Model') #set best accuracy & early stoping ouenter to 0 best_accuracy = 0 early_stopping_counter = 0 #train and validate for all epochs for epoch in range(config.EPOCHS): #train & validate one epoch engine.train(train_data_loader, model, optimizer, device) outputs, targets = engine.evaluate(valid_data_loader, model, device) #use threshold 0.5 not sigmoid output = np.array(outputs)>=0.5 #calculate accuracy accuracy = metrics.accuracy_score(target, outputs) print('Fold:{}, Epoch:{}, Accuracy Score:{}'.format(fold, epoch, accuracy)) #simple early stopping if accuracy > best_accuracy: best_accuracy = accuracy else: early_stopping_counter += 1 if early_stopping_counter > 2: break
def run(df, fold): """ Run training and validation for a given fold and dataset :param df: pandas dataframe with kfold column :param fold: current fold, int """ # fetch training dataframe train_df = df[df.kfold != fold].reset_index(drop=True) # fetch validation dataframe valid_df = df[df.kfold == fold].reset_index(drop=True) print("Fitting tokenizer") # we use tf.keras for tokenization # you can use your own tokenizer and then you can # get rid of tensorflow tokenizer = tf.keras.preprocessing.text.Tokenizer() tokenizer.fit_on_texts(df.review.values.tolist()) # convert training data to sequences # for example : "bad movie" gets converted to # [24, 27] where 24 is the index for bad and 27 is the # index for movie xtrain = tokenizer.texts_to_sequences(train_df.review.values) xtest = tokenizer.texts_to_sequences(valid_df.review.values) # zero pad the training/validation sequences given the maximum length # this padding is done on left hand side # if sequence is > MAX_LEN, it is truncated on left hand side too xtrain = tf.keras.preprocessing.sequence.pad_sequences(xtrain, maxlen=config.MAX_LEN) xtest = tf.keras.preprocessing.sequence.pad_sequences(xtest, maxlen=config.MAX_LEN) # initialize dataset class for training train_dataset = dataset.IMDBDataset(reviews=xtrain, targets=train_df.sentiment.values) # create torch dataloader for training # torch dataloader loads the data using dataset # class in batches specified by batch size train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=2) # initialize dataset class for validation valid_dataset = dataset.IMDBDataset(reviews=xtest, targets=valid_df.sentiment.values) # create torch dataloader for validation valid_data_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=1) print("Loading embeddings") # load embeddings as shown previously embedding_dict = load_vectors("../input/crawl-300d-2M.vec") embedding_matrix = create_embedding_matrix(tokenizer.word_index, embedding_dict) # create torch device, since we use gpu, we are using cuda device = torch.device("cuda") # fetch our LSTM model model = lstm.LSTM(embedding_matrix) # send model to device model.to(device) # initialize Adam optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) print("Training Model") # set best accuracy to zero best_accuracy = 0 # set early stopping counter to zero early_stopping_counter = 0 # train and validate for all epochs for epoch in range(config.EPOCHS): # train one epoch engine.train(train_data_loader, model, optimizer, device) # validate outputs, targets = engine.evaluate(valid_data_loader, model, device) # use threshold of 0.5 # please note we are using linear layer and no sigmoid # you should do this 0.5 threshold after sigmoid outputs = np.array(outputs) >= 0.5 # calculate accuracy accuracy = metrics.accuracy_score(targets, outputs) print(f"FOLD:{fold}, Epoch: {epoch}, Accuracy Score = {accuracy}") # simple early stopping if accuracy > best_accuracy: best_accuracy = accuracy else: early_stopping_counter += 1 if early_stopping_counter > 2: break
path = args.data dataset = data.TxtLoader(path) params = { 'nhid': args.nhid, 'nlayers': args.nlayers, 'dropout': args.dropout, 'batch': args.batch_size, 'seq': args.seq, 'type': dtype, 'alphabet_size': len(dataset.alphabet) } dataloaders = data.loaders(dataset, params) model = lstm.LSTM(params).type(params['type']) optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss() def sequence_to_one_hot(sequence): """Turns a sequence of chars into one-hot Tensor""" batch_size = params['batch'] * (params['seq'] + 1) assert len(sequence) == batch_size, 'Sequence must be a batch' tensor = torch.zeros(len(sequence), params['alphabet_size']).type(params['type']) for i, c in enumerate(sequence): tensor[i][dataset.char2ix[c]] = 1
trigrams_lm = trigrams.TrigramsLM(vocab_size = len(TEXT.vocab), alpha=0.01, lambdas=[.2, .5, .3]) criterion = nn.CrossEntropyLoss() trigrams_lm.train(train_iter, n_iters=None) print(utils.validate_trigrams(trigrams_lm, val_iter, criterion)) print("Calculate Kaggle") kaggle_trigrams(trigrams_lm, "input.txt", "trigramsagain.txt") elif args.model == 'Ensemble': print("TRAINING TRIGRAMS MODEL") trigrams_lm = trigrams.TrigramsLM(vocab_size = len(TEXT.vocab), alpha=1, lambdas=[.1, .4, .5]) criterion = nn.CrossEntropyLoss() trigrams_lm.train(train_iter, n_iters=None) filename = 'lstm_large_hidden45.sav' print("LOADING LSTM MODEL") loaded_model = lstm.LSTM(embedding_size=EMBEDDING_SIZE, vocab_size=len(TEXT.vocab), num_layers=NUM_LAYERS, lstm_type='large') if CUDA: print("USING CUDA") loaded_model = loaded_model.cuda() loaded_model.load_state_dict(torch.load(filename)) criterion = nn.CrossEntropyLoss() print("VALIDATION SET") loss = utilslstm.evaluate2(loaded_model, val_iter, criterion) elif args.model == 'LSTM': # Save Model # rnn = lstm.LSTM(embedding_size=EMBEDDING_SIZE, vocab_size=len(TEXT.vocab), num_layers=NUM_LAYERS, lstm_type='large') # if CUDA: # print("USING CUDA") # rnn = rnn.cuda() # criterion = nn.CrossEntropyLoss()
x, y = util.line_toseq(dataset_validate.pop(), charstop) if dense: dataset.append(util.seq_to_densevec(x, y, vdict)) else: dataset.append(util.seq_to_sparsevec(x, y, charset)) if not len(dataset_validate) % 1000: print "len(dataset_validate)", len(dataset_validate) dataset_validate = dataset #sys.exit() min_val_loss = float("inf") # very big peak = 0 int_num = 0 print "Making LSTM..." mylstm = lstm.LSTM(n_input=len(dataset_train[0][0][0]), n_output=len(dataset_train[0][1][0]), n_memblock=hidden_size, lr=learning_rate) #mylstm.load("m50saving1740") print "Start Training... " try: while True: numpy.random.shuffle(dataset_train) dt = [ dataset_train[x:x + validate_interval] for x in xrange(1, len(dataset_train), validate_interval) ] for d in dt: mylstm.train(d) vcost, act, aco, atp, p, r, f = mylstm.test(dataset_validate) mylstm.save(modelname + "/saving-" + str(int_num))