def train(config,echoNum): lstmer = lstm_model(config) jd_data = read_data_sets('data/jdData.json', 'data/w2id.json', config.num_steps) with tf.Session() as sess: if not os.path.exists('tmp/'): os.mkdir('tmp/') loss_op = lstmer.loss() train_op = lstmer.training() saver = tf.train.Saver() if os.path.exists('tmp/checkpoint'): # 判断模型是否存在 saver.restore(sess, 'tmp/model') # 存在就从模型中恢复变量 else: init = tf.global_variables_initializer() # 不存在就初始化变量 sess.run(init) for i in range(echoNum): x_data, y_data = jd_data.next_batch(config.batch_size) print('训练前loss:',sess.run(loss_op, feed_dict={lstmer.x: x_data, lstmer.y: y_data})) sess.run(train_op, feed_dict={lstmer.x: x_data, lstmer.y: y_data}) saver.save(sess, './tmp/model') print('训练后loss:',sess.run(loss_op, feed_dict={lstmer.x: x_data, lstmer.y: y_data})) # print('预测结果:',sess.run(lstmer.logits,feed_dict={lstmer.x: x_data, lstmer.y: y_data})) # print(y_data) print('完成第%s轮' % i)
def main(): load_dotenv() # to_time = int(time() // (1440 * 60) - 7) * 1440 * 60 # data = {} cryptos = ["BTC", "LTC", "ETH", "EOS", "BCH", "XRP", "TRX", "BNB"] # for crypto in cryptos: # array = [] # for i in range(8): # array.append(open_or_download(crypto, to_time + i * 1440 * 60, 1440)) # data[crypto] = array # data = collect(data, cryptos) # cryptos = ["BTC", "XRP"] model = lstm_model(len(cryptos)) model.fit_generator( training_generator(cryptos), steps_per_epoch=128, epochs=20, callbacks=[tf.keras.callbacks.EarlyStopping(monitor="loss")])
def run_training() -> None: sales = load_datasets() X = pp.reorder(sales, config.COLUMNS) X = pp.sorter(X) X = pp.grouper(X, config.GROUPING_VARS) X = pp.stationarizer(X) X = pp.featureBuilder(X, 12) _, X = pp.scale_features(X) X_train, y_train = pp.targetDefiner(X) print(X_train.shape, y_train.shape) model = lstm_model((1, X_train.shape[2])) model.fit(X_train, y_train, batch_size=1, epochs=30, verbose=1, shuffle=False) model.save(config.MODEL_PATH)
def trainer(name_dataset_train=None, name_dataset_dev=None): ############################################################################### # This function defines and trains the model for the chinese segmentation, # all the features are loaded and passed to the model for the training process # # Input: # name_dataset_train: name of the train dataset # name_dataset_dev: name of the dev dataset # # Output: # history: history of the model ############################################################################### try: path_train = dictionary_paths[name_dataset_train] except: path_train = None try: path_dev = dictionary_paths[name_dataset_dev] except: path_dev = None # load dictionaries vocab_unigram, vocab_bigram = load_dictionaries() vocab_size_unigram = len(vocab_unigram) vocab_size_bigram = len(vocab_bigram) # Creation of the model training_model = model.lstm_model(vocab_size_unigram, vocab_size_bigram) training_model.summary() cbk = tf.keras.callbacks.TensorBoard("logging/keras_model") # Trainin the model train_data, dev_data = datasets_features(path_train, path_dev, False) data_gen = model.batch_creation(*train_data) history = training_model.fit_generator(data_gen, STEP_EPOCHS, EPOCHS, validation_data=([*dev_data[:2]], dev_data[2]), callbacks=[cbk]) save_model(training_model) return history
def train(argv=None): print("Loading data...") x_train, y_train, x_test, y_test, vocabulary_inv = load_data( FLAGS.max_words, FLAGS.sequence_length) print('Building model...') if FLAGS.model_type == 'cnn': model = cnn_model(len(vocabulary_inv), FLAGS.embedding_dim, FLAGS.sequence_length, FLAGS.dropout_rate, FLAGS.num_filters, FLAGS.hidden_units) elif FLAGS.model_type == 'lstm': model = lstm_model(vocab_length=len(vocabulary_inv), embedding_dim=FLAGS.embedding_dim, sequence_length=FLAGS.sequence_length, dropout_rate=FLAGS.dropout_rate, lstm_units=FLAGS.num_filters, hidden_units=FLAGS.hidden_units) else: raise ValueError( 'Unrecognized value `{}` for argument model_type'.format( FLAGS.model_type)) if FLAGS.sequence_length != x_test.shape[1]: print("Adjusting sequence length for actual size") FLAGS.sequence_length = x_test.shape[1] print("x_train shape:", x_train.shape) print("x_test shape:", x_test.shape) print("Vocabulary Size: {:d}".format(len(vocabulary_inv))) model.compile( loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=FLAGS.learning_rate), metrics=[tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.AUC()]) model.fit(x_train, y_train, batch_size=FLAGS.batch_size, epochs=FLAGS.num_epochs, validation_data=(x_test, y_test), verbose=1) tf.keras.models.save_model(model, '{}.h5'.format(FLAGS.model_type))
def __init__(self, max_steps): self.filename = 'stateactionfile.h5' self.max_steps = max_steps self.prev_rpm = None self.prev_gear = 0 self.prevTrack = np.zeros(20) self.number_of_sensors = shared_preferences.number_of_sensors self.number_of_efectors = shared_preferences.number_of_efectors self.backpropagation_size = shared_preferences.backpropagation_size self.internal_state_size = shared_preferences.internal_state_size self.input = tf.placeholder( tf.float32, [1, self.number_of_sensors, self.backpropagation_size], name='state') self.sensors_input = np.zeros( (self.backpropagation_size, self.number_of_sensors)) # model normalized_input = norm.normalize_input(self.input) input_series = tf.unstack(normalized_input, axis=2) self.output_series, self.current_state, self.init_state = lstm_model( 1, input_series, self.number_of_sensors, self.internal_state_size, self.number_of_efectors, shared_preferences.number_of_lstm_layers) unnormalized_output = self.output_series[-1] steering_normalized, acceleration_normalized, self.y = norm.normalize_output( unnormalized_output) self.saver = tf.train.Saver() init = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init) self.saver.restore(self.sess, self.network_dirpath + "model.ckpt")
annotations = modify_annotation(image_description) # Add "<START>" and "<END>" tag test_annotations = modify_annotation(test_imagedescription) tokenizer = text_tokenizer(annotations) vocab_size = len(tokenizer.word_index) + 1 # Vocabulary size max_len = max_length(annotations) # Maximum length of annotations # Data preparation training_pixels = images_pixels(path + '/data/Flicker8k_Dataset/', list(image_description.keys()), shape) test_pixels = images_pixels(path + '/data/Flicker8k_Dataset/', list(test_imagedescription.keys()), shape) X1, X2, y = input_preparation(tokenizer, max_len, annotations, training_pixels, shape) X1test, X2test, ytest = input_preparation(tokenizer, max_len, test_annotations, test_pixels, shape) # Creating LSTM + CNN model model = ml.lstm_model(vocab_size, max_len, shape) # Training Starts model.fit([X1, X2, y, epochs=1, verbose=2, validation_data=([X1test, X2test, ytest)) # Save model to j_son model_json = model.to_json() with open("model_cnn.json", "w") as json_file: json_file.write(model_json) # Save weights model.save_weights("model_cnn.h5") # Data Fitting using data generator (Useful for CPU with not enough memory) # =============================================================================
""" Created by kunal on 11/23/17 """ from model import lstm_model, sequence_length from data_generator import data_generator from helper import plot_func model_path = '../models/model_1/stock_prediction_model_1_.hdf5' stocks_csv = '../data/AAPL.csv' model = lstm_model() model.load_weights(model_path) data = data_generator(1, sequence_length, stocks_csv, mode='val').next() stock_op = data[0] stock_cp = data[1] pred_p = model.predict(stock_op) plot_func(stock_op, stock_cp, pred_p)
def interpret(argv=None): print('Setting up environment...') utils.set_up_environment(visible_devices=FLAGS.visible_devices) print('Loading data...') x_train, y_train, x_test, y_test, vocabulary_inv = load_data( FLAGS.max_words, FLAGS.sequence_length) lengths = np.sum(x_test != 0, axis=1) min_indices = np.argsort(lengths) print('Loading model...') if FLAGS.model_type == 'cnn': interpret_model = cnn_model(len(vocabulary_inv), FLAGS.embedding_dim, FLAGS.sequence_length, FLAGS.dropout_rate, FLAGS.num_filters, FLAGS.hidden_units, for_interpretation=True) elif FLAGS.model_type == 'lstm': interpret_model = lstm_model(vocab_length=len(vocabulary_inv), embedding_dim=FLAGS.embedding_dim, sequence_length=FLAGS.sequence_length, dropout_rate=FLAGS.dropout_rate, lstm_units=FLAGS.num_filters, hidden_units=FLAGS.hidden_units, for_interpretation=True) else: raise ValueError( 'Unrecognized value `{}` for argument model_type'.format( FLAGS.model_type)) model = tf.keras.models.load_model('{}.h5'.format(FLAGS.model_type)) embedding_model = tf.keras.models.Model(model.input, model.layers[1].output) interpret_model.load_weights('{}.h5'.format(FLAGS.model_type), by_name=True) explainer = PathExplainerTF(interpret_model) batch_input = x_test[min_indices[:FLAGS.num_sentences]] batch_embedding = embedding_model(batch_input) batch_pred = model(batch_input) baseline_input = np.zeros(x_test[0:1].shape) baseline_embedding = embedding_model(baseline_input) print('Getting attributions...') # Get word-level attributions embedding_attributions = explainer.attributions( batch_embedding, baseline_embedding, batch_size=FLAGS.batch_size, num_samples=FLAGS.num_samples, use_expectation=False, output_indices=0, verbose=True) np.save('embedding_attributions_{}.npy'.format(FLAGS.model_type), embedding_attributions) print('Getting interactions...') # Get pairwise word interactions max_indices = np.sum(batch_input[-1] != 0) interaction_matrix = np.zeros( (FLAGS.num_sentences, max_indices, FLAGS.embedding_dim, FLAGS.sequence_length, FLAGS.embedding_dim)) indices = np.indices((max_indices, FLAGS.embedding_dim)) indices = indices.reshape(2, -1) indices = indices.swapaxes(0, 1) for interaction_index in tqdm(indices): embedding_interactions = explainer.interactions( batch_embedding, baseline_embedding, batch_size=FLAGS.batch_size, num_samples=FLAGS.num_samples, use_expectation=False, output_indices=0, verbose=False, interaction_index=interaction_index) interaction_matrix[:, interaction_index[0], interaction_index[1], :, :] = embedding_interactions np.save('interaction_matrix_{}.npy'.format(FLAGS.model_type), interaction_matrix)
import csv import time import numpy as np from pprint import pprint import poses import utils import person import model as mdl import config as cfg import control as ps3 timestamp = int(time.time() * 1000) secondary_model = mdl.lstm_model() secondary_model.compile(loss='categorical_crossentropy', optimizer=mdl.RMSprop(lr=0.0001), metrics=['accuracy']) if cfg.log: dataFile = open('data/{}.csv'.format(timestamp), 'w') newFileWriter = csv.writer(dataFile) if cfg.video: # Define the codec and create VideoWriter object name = '{}.mp4'.format(timestamp) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(name, fourcc, cfg.fps, (cfg.w, cfg.h))
from imports import * from make_data import make from preprocessing import network_params from model import lstm_model notes = make() arr = network_params(notes[0], notes[1], 100) inputs = arr[0] outputs = arr[1] total_classes = notes[1] model = lstm_model(inputs, total_classes) print("\n") print("Summary :") print("\n") print(model.summary()) weights = "trained_model/01-5.3365.h5" if len(weights) > 0: model.load_weights(weights) checkpoint = ModelCheckpoint("trained_model/{epoch:02d}-{loss:.4f}.h5", monitor="loss", save_best_only=True, mode="min", verbose=1) callbacks_list = [checkpoint] model.fit(inputs, outputs, epochs=100, batch_size=64, callbacks=callbacks_list) """ call backs list for last model saved """
from tensorflow.contrib import learn from sklearn.metrics import mean_squared_error from model import generate_data, lstm_model LOG_DIR = os.path.join(os.getcwd(), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) TIMESTEPS = 80 RNN_LAYERS = [80] DENSE_LAYERS = None TRAINING_STEPS = 30000 BATCH_SIZE = 100 PRINT_STEPS = TRAINING_STEPS / 100 my_dir = os.sep.join([os.path.expanduser('~'), 'Desktop', 'sine']) regressor = learn.TensorFlowEstimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS), n_classes=0, verbose=2, steps=TRAINING_STEPS, optimizer='SGD', learning_rate=0.001, batch_size=BATCH_SIZE, class_weight = [1]) #generate SINE WAVE data X, y = generate_data(np.sin, np.linspace(0, 100, 5000), TIMESTEPS, seperate=False) # create a lstm instance and validation monitor validation_monitor = learn.monitors.ValidationMonitor(X['val'], y['val'], every_n_steps=PRINT_STEPS, early_stopping_rounds=100000)
os.path.join(model_dir, 'model_unsaved'), sess, 'model_unsaved') data_pl = [x, is_train] test_pred = model.predict_v1(sess, test_iter, test_data, data_pl, preds) if ARGS.model_type == 'lstm': num_layers = config['num_layers'] cell_size = config['cell_size'] feature_size = config['feature_size'] num_time = config['num_time'] dropout_rate = config['dropout_rate'] outputs, inputs, _, drop_rate = model.lstm_model( input_shape=(None, 60, feature_size), label_shape=(None, 60, 6), num_layers=num_layers, cell_size=cell_size) data_path = DATA_DIR test_embeddings = np.load(os.path.join(data_path, 'test_embeddings.npy')) test_filenames = np.load(os.path.join(data_path, 'test_img_ids.npy')) test_data = {'inputs': test_embeddings, 'filename': test_filenames} assert test_embeddings.shape[0] == test_filenames.shape[0] sess = tf.Session() saver = utils.load_checkpoint(os.path.join(model_dir, 'lstm_model'), sess, 'lstm_model') data_pl = [inputs, drop_rate]
train = array[leftover:division] def processData(data, days_before, days_ahead, jump=1): x, y = [], [] for i in range(0, len(data) - days_before - days_ahead + 1, jump): x.append(data[i:(i + days_before)]) y.append(data[(i + days_before):(i + days_before + days_ahead)]) return np.array(x), np.array(y) x, Y = processData(train, days_before, days_ahead) y = np.array([list(a.ravel()) for a in Y]) print('Creating LSTM model') lstm_model = lstm_model(days_before, days_ahead) print('Creating GRU model') gru_model = gru_model(days_before, days_ahead) x_train, x_validate, y_train, y_validate = train_test_split(x, y, test_size=0.2, random_state=42) def train_lstm(lstm_model, x_train, x_validate, y_train, y_validate, EPOCHS): print('Train on LSTM') history = lstm_model.fit(x_train, y_train, epochs=EPOCHS,
def main(): os.chdir('./') global args global word2vec parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) #parser.add_argument('clean_summaries0209.csv', help="Source Input file", type=str) #parser.add_argument('word2vec_50d.txt', help="word2vec file", type=str) parser.add_argument('--padding', help="padding around each text", type=int, default=4) parser.add_argument('--batchsize', help="batchsize if you want to batch the data", type=int, default=1) parser.add_argument('--max_note_len', help="Cut off all notes longer than this (0 = no cutoff).", type=int, default=0) parser.add_argument('--filename', help="File name for output file", type=str, default="data.h5") # with open("conditions.dict", 'w') as f: # for i, c in enumerate(conditions): # print (f, i + 1, c) args = parser.parse_args() # LOAD THE WORD2VEC FILE word2vec, emb_size, v_large = load_bin_vec("word2vec_50d.txt") # word2vec 整个数据集(label+unlabeled) 470260 print ('WORD2VEC POINTS:', v_large) # first step # X_train, y_train, X_test, y_test = lbl, targets, ids, subj, time, embed = preprocess(args, emb_size) # second step BANTCH_SIZE, optimizer, loss, accuracy, input_data, target, prediction, all_outputs, softmax_w, softmax_b, embedding ,prediction_prob, dropout_keep_prob = lstm_model(embed) # third step softmax_w_print, softmax_b_print, embedding_print, all_outputs_print, prediction_print, prediction_prob_print, last_output_print, x_batch, y_batch = cross_validation(lbl, targets, ids, subj, time, BANTCH_SIZE, optimizer, loss, accuracy, input_data, target, prediction, all_outputs, softmax_w, softmax_b, embedding, prediction_prob, dropout_keep_prob)
from load_data import get_data from keras.callbacks import ModelCheckpoint from keras.models import Sequential from keras.layers.recurrent import LSTM from keras.layers.core import Dense, Activation, Dropout epochs = 100 batch_size = 128 seq_len = 100 checkpointer = ModelCheckpoint( filepath="weights.hdf5", verbose=1, save_best_only=True ) print('Loading data') X, Y = get_data(seq_len=seq_len) seq_len, total_char = X.shape[1:] model = lstm_model(seq_len, total_char) print(X.shape) print(Y.shape) print('start training') model.fit(X, Y, batch_size=batch_size, nb_epoch=epochs, callbacks=[checkpointer]) print('training done') model.save('final.hdf5')
def run(batch, lambda_param=0.9, learning_rate=0.005, namespace=""): tf.reset_default_graph() x_placeholder = tf.placeholder(tf.float32, [batch_size, number_of_sensors, time_steps]) y_placeholder = tf.placeholder(tf.float32, [batch_size, number_of_efectors]) target_steering, target_acceleration = tf.split(y_placeholder, [1, 1], 1) #unstack - split input tensor on timesteps normalized_input = norm.normalize_input(x_placeholder) input_series = tf.unstack(normalized_input, axis=2) outputs_series, current_state, init_state = lstm_model( batch_size, input_series, number_of_sensors, internal_state_size, number_of_efectors, number_of_lstm_layers) unnormalized_output = outputs_series[-1] steering_normalized, acceleration_normalized, normalized_output = norm.normalize_output( unnormalized_output) #loss for all outputs loss = 0 with tf.name_scope("loss"): # losses = [] # for output in outputs_series: # steering_normalized, acceleration_normalized, normalized_output = norm.normalize_output(output) # losses.append(loss_functions.exp_log_loss_function(target_steering, steering_normalized, target_acceleration, # acceleration_normalized, lambda_param)) # loss = tf.reduce_mean(losses) # loss = lf.pow_loss_function(target_steering, steering_normalized, target_brake, brake_normalized, target_acceleration, acceleration_normalized, lambda_param, 12) # loss = lf.log_loss_function(target_steering, steering_normalized, target_brake, brake_normalized, target_acceleration, acceleration_normalized, lambda_param) loss = loss_functions.exp_log_loss_function(target_steering, steering_normalized, target_acceleration, acceleration_normalized, lambda_param) tf.summary.scalar( "lr: " + "{:.7f}".format(learning_rate) + "lambda: " + "{:.7f}".format(lambda_param) + "batch: " + str(batch), loss) with tf.name_scope("optimizer"): train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) with tf.Session() as sess: merged = tf.summary.merge_all() sess.run(tf.global_variables_initializer()) trainInput = train_input.TrainInput(shuffle_data=False, single_race_data_size=10000) number_of_epochs = 100 batches_count = trainInput.get_batches_count(batch_size) writer = tf.summary.FileWriter(tensorboard_dirpath + "train", sess.graph) for epoch_index in range(number_of_epochs): x, y = trainInput.get_chain_train_data(batch_size, number_of_sensors) _current_state = np.zeros( (number_of_lstm_layers, 2, batch_size, internal_state_size)) for batch_index in range(batches_count - time_steps + 1): start_index = batch_index end_index = start_index + time_steps batchX = x[:, :, start_index:end_index] batchY = y[:, :, end_index - 1] _loss, _train_step, _current_state, _output_series, summary = sess.run( [loss, train_step, current_state, outputs_series, merged], feed_dict={ x_placeholder: batchX, y_placeholder: batchY, init_state: _current_state }) if batch_index % 50 == 0: print("race: " + str(number_of_epochs + epoch_index) + " loss " + str(_loss)) writer.add_summary(summary, global_step=batch_index + epoch_index * batches_count) # save model saver = tf.train.Saver() save_path = saver.save(sess, network_dirpath + "model.ckpt") print("model saved in file: %s" % save_path)
start_mark = 'B' end_mark = 'E' checkpoint = "checkpoint/" txt_path = "data/poems_res.txt" batch_size = 1 learning_rate = 0.01 lstm_size = 128 num_layers = 2 txt_res = load_txt(txt_path) model = lstm_model(batch_size=batch_size, lstm_size=lstm_size, num_layers=num_layers, learning_rate=learning_rate, num_classes=len(txt_res["vocabs"])) model.gen_model(training=False) saver = tf.train.Saver(tf.global_variables()) all_var = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) first_word = input("输入第一个字:") with tf.Session() as sess: sess.run(all_var) checkpoint = tf.train.latest_checkpoint(checkpoint) saver.restore(sess, checkpoint)
def main(): model_name = '270_0.8201_1_38_10_gd' model_path = './model/%s/model' % model_name if not os.path.isfile('%s.index' % model_path): print("Please run python train.py to train the model first.") exit(1) args = parser.parse_args() length = args.length print("Restoring model...") with open(words_path, 'rb') as f: words = pickle.load(f) with open(word_to_id_path, 'rb') as f: word_to_id = pickle.load(f) param = { 'vocab_size': len(words), 'num_layers': training_param['num_layers'], 'hidden_size': training_param['hidden_size'], } num_steps = training_param['num_steps'] batch_size = training_param['batch_size'] keep_prob = training_param['keep_prob'] init_scale = training_param['init_scale'] initializer = tf.random_uniform_initializer(-init_scale, init_scale) with tf.name_scope("train"): with tf.variable_scope("model", reuse=None, initializer=initializer): m = lstm_model(is_training=True, num_steps=int(num_steps), batch_size=batch_size, keep_prob=float(keep_prob), **param) with tf.name_scope("valid"): with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = lstm_model(batch_size=1, num_steps=1, is_training=False, **param) saver = tf.train.Saver() session = tf.Session() session.run(tf.global_variables_initializer()) saver.restore(session, model_path) while True: user_input = input( "Enter any text to collaborate with the generator or 'END' to stop:\n" ) if user_input == 'END': exit(0) print(''.join( generate(session, mvalid, user_input, word_to_id, words, size=length))) print('\n')
# Leveraging SpaCy for POS removal and lemmatization print("Applying POS and dependancy tagging...") nlp = en_core_web_sm.load() df['Stem_parsed'] = df['Stem'].apply(nlp) print("Removing selected POS tags...") df['Stem'] = df['Stem_parsed'].apply(remove_tokens_on_match) print("Data after lemmatization and POS removal:") pd.options.display.max_colwidth = 100 print(df.head(30)) # print("Average number of words per question: %s" % df.Stem.apply(lambda x: len(x.split(" "))).mean()) # LSTM setup with best hyperparameters model = lstm_model(df) print("Tuning hyperparameters...") best_loss = model.tune_hyperparameters() print('\nBest loss: %f' % (best_loss)) loss = model.train() print('Actual loss: %f' % (loss)) model.plot_loss() # Read inputs for testing pd.set_option('display.max_rows', model.max_features) while 1: print("\nInput a new question to estimate its difficulty:") x_in = input() tmp = [] tmp.append(x_in) df_input = pd.DataFrame(tmp, columns={'input'})
# Checking if data shape is consistent or not assert data_embeddings.shape[0] == data_labels.shape[0] == data_mask.shape[0] print(data_embeddings.dtype) print(data_labels.dtype) print(data_mask.dtype) num_time = data_labels.shape[1] num_labels = data_labels.shape[2] feature_size = data_embeddings.shape[-1] seed = 88 # Creating bi-lstm based computational graph and attaching # loss and optimizer to the graph outputs, inputs, labels, drop_rate = model.lstm_model(input_shape=(None, num_time, feature_size), label_shape=(None, num_time, num_labels), num_layers=NUM_LAYERS, cell_size=CELL_SIZE) loss, mask = model.build_loss(labels, outputs, loss_name=ARGS.loss) patient_pred = model.compute_patient_prediction(labels, outputs, mask) train_loss = tf.summary.scalar('train_loss', loss) validation_loss = tf.summary.scalar('val_loss', loss) train_op, gradient_norm = model.optimizer(loss, lr=ARGS.lr) grad_norm = tf.summary.scalar('grad_norm', gradient_norm) train_summary = tf.summary.merge([train_loss, grad_norm]) validation_summary = tf.summary.merge([validation_loss])
logits_list=[] caps_train_loss=[] caps_train_acc=[] caps_val_loss=[] caps_val_acc=[] zsl_acc=[] # start tf.compat.v1.reset_default_graph() config=tf.compat.v1.ConfigProto() with tf.compat.v1.Session(config=config) as sess: # Instantiate Model lstm = model.lstm_model(FLAGS) if os.path.exists(FLAGS.ckpt_dir): print("Restoring Variables from Checkpoint for rnn model.") saver = tf.compat.v1.train.Saver() saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir)) else: print('Initializing Variables') sess.run(tf.compat.v1.global_variables_initializer()) if FLAGS.use_embedding: #load pre-trained word embedding assign_pretrained_word_embedding(sess, data, lstm) best_caps_acc = 0 best_zsl_acc = 0 var_saver = tf.compat.v1.train.Saver()
"/home/grads/iashiq5/AdvTrainingExperiment/AdversarialTraining/models/lstm-at-tb-kaggle-toxic-comment-2021-04-23-23-41-56-085474.pt" )) # Todo: provide file name here model_wrapper = PyTorchModelWrapper(model, tokenizer) print(evaluate(model_wrapper.model, test_dataloader)) # for checking whether loading is correct return model_wrapper if __name__ == "__main__": # 3 tasks: train, evaluate, pre-generate task = "train" # Todo: change this args = get_args() # define model and tokenizer if args.model_short_name == "lstm": model_wrapper = lstm_model(args) else: model_wrapper = cnn_model(args) model = model_wrapper.model tokenizer = model_wrapper.tokenizer # prepare dataset and dataloader train_dataset, validation_dataset, test_dataset = return_dataset( args.dataset) train_text, train_labels = prepare_dataset_for_training(train_dataset) eval_text, eval_labels = prepare_dataset_for_training(validation_dataset) test_text, test_labels = prepare_dataset_for_training(test_dataset) train_dataloader = _make_dataloader(tokenizer, train_text, train_labels, args.batch_size) eval_dataloader = _make_dataloader(tokenizer, eval_text, eval_labels,
y_val, y_test_t = np.split(y_temp, 2) log.info('Done preprocessing.') print() model_path = model_dir+model_name if os.path.isfile(model_path): log.info('Loading model...') model = load_model(model_path) log.info('Loaded model.') else: log.info('Creating model...') model = lstm_model((batch_size, time_steps, len(train_cols))) log.info('Done.') print() log.info('Training model...') model.fit(x_t, y_t, epochs=epochs, verbose=2, batch_size=batch_size, shuffle=False, validation_data=(trim_dataset(x_val), trim_dataset(y_val))) log.info('Done training...') print() log.info('Saving model...') if not os.path.exists(model_dir): os.mkdir(model_dir)