def train(model, max_len=100000, batch_size=64, verbose=True, epochs=100, save_path='../saved/', save_best=True): # callbacks ear = EarlyStopping(monitor='val_acc', patience=5) #保存模型及相关的参数数据 mcp = ModelCheckpoint(join(save_path, 'convnet.h5'), monitor="val_acc", save_best_only=save_best, save_weights_only=False) #utils.data_generator :得到的训练用的数据和label #A History object. Its History.history attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). history = model.fit_generator( utils.data_generator(x_train, y_train, max_len, batch_size, shuffle=True), steps_per_epoch=len(x_train) // batch_size + 1, epochs=epochs, verbose=verbose, callbacks=[ear, mcp], validation_data=utils.data_generator(x_test, y_test, max_len, batch_size), validation_steps=len(x_test) // batch_size + 1) return history
def train(self, num_class, epochs, trainable): # 训练数据、测试数据和标签转化为模型输入格式 training_data_list = build_data_generator_input(self.trainingSet_path) val_data_list = build_data_generator_input(self.val_path) # 测试集写好了 # testing_data_list = build_data_generator_input(self.testingSet_path, num_class) max_length = self.config.get("training_rule", "max_length") train_D = data_generator(training_data_list, max_length=max_length, shuffle=True) valid_D = data_generator(val_data_list, max_length=max_length, shuffle=True) # # test_D = data_generator(testing_data_list, shuffle=False) model = self.model(num_class, trainable) model.fit(train_D.__iter__(), steps_per_epoch=len(train_D), epochs=epochs, validation_data=valid_D.__iter__(), validation_steps=len(valid_D)) model.save(self.model_save_path, overwrite=True)
def train(model, max_len=200000, batch_size=64, verbose=True, epochs=100, save_path='../saved/', save_best=True): # callbacks ear = EarlyStopping(monitor='val_acc', patience=10) mcp = ModelCheckpoint(join(save_path, 'model.h5'), monitor="val_acc", save_best_only=save_best, save_weights_only=False) history = model.fit_generator( utils.data_generator(x_train, y_train, max_len, batch_size, shuffle=True), steps_per_epoch=len(x_train) // batch_size + 1, epochs=epochs, verbose=verbose, callbacks=[ear, mcp], validation_data=utils.data_generator(x_test, y_test, max_len, batch_size), validation_steps=len(x_test) // batch_size + 1) return history
def main(args): path = C.PATH # model = PureCapsNet(input_shape=C.INPUT_SHAPE, n_class=C.OUTPUT_CLASS, routings=C.ROUTINGS) model = TestMixCapsNet(input_shape=C.INPUT_SHAPE, n_class=C.OUTPUT_CLASS, routings=C.ROUTINGS) # model = MultiScaleCapsNet(input_shape=C.INPUT_SHAPE, n_class=C.OUTPUT_CLASS, routings=C.ROUTINGS) model.summary() # exit() if args.target == 'train' or args.target == 'retrain': checkpoint = callbacks.ModelCheckpoint(f'check_point/{model.name}_best.h5', monitor='val_loss', save_best_only=True, verbose=1) reduce = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, mode='min') earlystopping = callbacks.EarlyStopping(monitor='val_loss', patience=20) log = callbacks.CSVLogger('logs/log.csv') tb = callbacks.TensorBoard('logs/tensorboard-logs', batch_size=C.BATCH_SIZE, histogram_freq=0) lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: C.LR * (C.LR_DECAY ** epoch)) if args.target == 'retrain': # sgd with lr=0.01 for fine-tune optimizer = optimizers.sgd(lr=0.01, momentum=0.9, nesterov=True, decay=1e-6) model.load_weights(f'check_point/{model.name}_best.h5', by_name=True) print(f"{model.name} loaded.") else: optimizer = optimizers.Adam(lr=C.LR) print("No model loaded.") model.compile(optimizer=optimizer, # loss=[margin_loss], loss='binary_crossentropy', # loss_weights=[1.], metrics=[categorical_accuracy]) # metrics={'capsnet': 'accuracy'}) model.fit_generator(data_generator('/'.join((path, 'train'))), epochs=120, steps_per_epoch=C.TRAIN_SIZE // C.BATCH_SIZE, validation_data=data_generator('/'.join((path, 'val'))), validation_steps=C.VAL_SIZE // C.BATCH_SIZE, verbose=1, callbacks=[checkpoint, log, tb, earlystopping]) # callbacks=[checkpoint]) model.save(f'check_point/{model.name}_final.h5') else: # model.load_weights(f'check_point/{model.name}_best.h5') model.load_weights(f'check_point/{model.name}_0.904204.h5') print("Loading test data ...") x_test, y_test = load_all_data('/'.join((path, 'test'))) y_pred = batch_prediction(model, x_test, batch_size=200) print(len(y_test), len(y_pred), len(y_test)) model_evaluate(y_pred, y_test)
def __init__(self, test_h5, eta=0.05): super(fgsm_callback, self).__init__() self.eta = eta self.test_h5 = test_h5 # generators batch_size = 32 test_dgen = data_generator(self.test_h5, batch_size) test_steps_per_epoch = 13718 // batch_size + 1 print("test steps per epoch: ", test_steps_per_epoch) # normalize test set print("[FGSM] Generating normed version of test set..") testX_normed = [] self.testY = [] for _ in range(test_steps_per_epoch): x_batch, y_batch = next(test_dgen) testX_normed.append(x_batch) self.testY.append(y_batch) testX_normed = np.concatenate(testX_normed, axis=0) self.testY = np.concatenate(self.testY, axis=0) del x_batch, y_batch self.testX_normed = testX_normed print("[FGSM] Done") print("[FGSM] shapes {} {}".format(self.testX_normed.shape, self.testY.shape))
def run_task(): (x_train, y_train), (x_test, y_test) = data_generator() model = tcn.compiled_tcn(return_sequences=False, num_feat=1, num_classes=10, nb_filters=25, kernel_size=7, dilations=[2**i for i in range(9)], nb_stacks=2, max_len=x_train[0:1].shape[1], activation='norm_relu', use_skip_connections=True) print(f'x_train.shape = {x_train.shape}') print(f'y_train.shape = {y_train.shape}') print(f'x_test.shape = {x_test.shape}') print(f'y_test.shape = {y_test.shape}') model.summary() model.fit(x_train, y_train.squeeze().argmax(axis=1), epochs=100, validation_data=(x_test, y_test.squeeze().argmax(axis=1)))
def run_task(): (x_train, y_train), (x_test, y_test) = data_generator() model, param_str = tcn.dilated_tcn(output_slice_index='last', # try 'first'. num_feat=1, num_classes=10, nb_filters=64, kernel_size=8, dilatations=[1, 2, 4, 8], nb_stacks=8, max_len=x_train[0:1].shape[1], activation='norm_relu', use_skip_connections=False, return_param_str=True) print(f'x_train.shape = {x_train.shape}') print(f'y_train.shape = {y_train.shape}') print(f'x_test.shape = {x_test.shape}') print(f'y_test.shape = {y_test.shape}') model.summary() # a = np.zeros_like(x_train[0:1]) # a[:, 0, :] = 1.0 # print(get_activations(model, a)) model.fit(x_train, y_train.squeeze().argmax(axis=1), epochs=100, validation_data=(x_test, y_test.squeeze().argmax(axis=1)))
def evaluate_rd(args,dev_set,model,d_model,device): logger = logging.getLogger("D-QA") logger.info('Dev test:') num_batch, dataloader = data_generator(args, dev_set) tqdm_obj = tqdm(dataloader, total=num_batch) sp, answer = {},{} for step, batch in enumerate(tqdm_obj): original_c = dev_set[step] sup_list,ans_list = d_model(args,batch, model, device,pred_mode=True) context = original_c.context tokens = original_c.total_token _id = original_c.t_id sup_answer = [] for each_s in sup_list: sup_answer.append((context[each_s[0]][0],each_s[1])) if isinstance(ans_list,bool): node_answer = ['no','yes'][int(ans_list)] elif ans_list == []: node_answer = '' else: node_answer = ' '.join(tokens[ans_list[0]][(ans_list[1])*args.max_s_len:(ans_list[1]+1)*args.max_s_len][ans_list[2]:ans_list[3]+1]) sp[_id] = sup_answer answer[_id] = node_answer final_answer = {'answer':answer,'sp':sp} with open(os.path.join(args.result_dir,'dev_result.json'),'w') as fout: json.dump(final_answer,fout) metircs = eval(os.path.join(args.result_dir,'dev_result.json'),args.dev_files[0]) logger.info('EM: {}, F1: {}, sup_EM: {}, sup_F1: {}, joint_EM: {} joint_F1: {}'.format( metircs['em'],metircs['f1'],metircs['sp_em'],metircs['sp_f1'],metircs['joint_em'],metircs['joint_f1'] )) return metircs
def train_op(): # reduce_lr = LearningRateScheduler(scheduler, verbose=1) train_D = data_generator(train_data, char2id, n_char2id, word2id, BIO2id, maxlen_sentence, maxlen_word, is_use_n_char, 128) best_f1 = 0 for i in range(1, 150): #epochs print(i) train_model.fit_generator( train_D.__iter__(), steps_per_epoch=len(train_D), epochs=1, # callbacks=[reduce_lr] ) # if (i) % 2 == 0 : #两次对dev进行一次测评,并对dev结果进行保存 ner_pred, true_bio = pred_op('dev') P, R, F = NER_result_Evaluator(ner_pred, true_bio) if F > best_f1: train_model.save_weights(model_save_file) best_f1 = F print('当前第{}个epoch,验证集,准确度为{},召回为{},f1为:{}'.format(i, P, R, F)) ner_pred, true_bio = pred_op('test') P, R, F = NER_result_Evaluator(ner_pred, true_bio) print('当前第{}个epoch,测试集,准确度为{},召回为{},f1为:{}'.format(i, P, R, F)) if i % 50 == 0: ner_pred, true_bio = pred_op('train') P, R, F = NER_result_Evaluator(ner_pred, true_bio) print('训练集,准确度为{},召回为{},f1为:{}'.format(P, R, F)) print(best_f1)
def main(path, input_size, n_classes=2, batch_size=16, epochs_count=30): train_gen = data_generator(path, batch_size) model = Segnet(input_size, n_classes) model.build_model('adam', loss=losses.BinaryCrossentropy(), metrics=["accuracy"]) model.evaluate_generator(train_gen, 11, "./models/modelsweights.10.hdf5")
def main(path, input_size, n_classes=2, batch_size=16, epochs_count=30): train_gen = data_generator(path, batch_size) model = Segnet(input_size, n_classes) model.build_model('adam', loss=losses.BinaryCrossentropy(), metrics=["accuracy"]) model.train_generator(train_gen, steps_per_epoch = 24, epochs=epochs_count, save_path = "./models") print("Training Done....")
def predict(model, x_test, y_test, max_len=102400, batch_size=1, verbose=1): pred = model.predict_generator(utils.data_generator(x_test, y_test, max_len, batch_size, shuffle=False), steps=len(x_test), verbose=verbose) return pred
def predict(model, fn_list, label, batch_size=64, verbose=1): max_len = model.input.shape[1] pred = model.predict_generator(utils.data_generator(fn_list, label, max_len, batch_size, shuffle=False), steps=len(fn_list) // batch_size + 1, verbose=verbose) return pred
def main(path, input_size, n_classes=2, batch_size=30, epochs_count=50): train_gen = data_generator(path, batch_size) model = Segnet(input_size, n_classes) model.build_model() model.compile('sgd', loss=losses.BinaryCrossentropy(), metrics=["accuracy"]) model.train_generator(train_gen, steps_per_epoch=(300) // batch_size, epochs=epochs_count) print("Training Done....")
def _accuracy_term(self, new_model): train_data_generator = data_generator(self.x_train, self.y_train, self.num_classes) eval_data_generator = data_generator(self.x_test, self.y_test, self.num_classes) train_steps = train_data_generator.n // train_data_generator.batch_size validation_steps = eval_data_generator.n // eval_data_generator.batch_size new_model.fit_generator(generator=train_data_generator, steps_per_epoch=train_steps, epochs=self.epochs, validation_data=eval_data_generator, validation_steps=validation_steps) p_hat = new_model.evaluate_generator(eval_data_generator, eval_data_generator.n, verbose=1)[0] if not self.base_model_accuracy: print('Calculating the accuracy of the base line model') self.base_model_accuracy = self.model.evaluate_generator( eval_data_generator, eval_data_generator.n, verbose=1)[0] accuracy_term = (self.b - (self.base_model_accuracy - p_hat)) / self.b return accuracy_term
def train(model, max_len=200000, batch_size=50, verbose=True, epochs=100, save_path='../saved/', save_best=True): # callbacks ear = EarlyStopping(monitor='val_acc', patience=5) mcp = ModelCheckpoint(join(save_path, 'malconv.h5'), monitor="val_acc", save_best_only=save_best, save_weights_only=False) # x_train in _main_ steps_per_epoch_val = len(x_train) // batch_size + 1 validation_steps_val = len(x_test) // batch_size + 1 print("\nlen(x_train):%s, batch_size:%s, steps_per_epoch_val:%s" % (len(x_train), batch_size, steps_per_epoch_val)) print("len(x_test):%s, validation_steps_val:%s \n" % (len(x_test), validation_steps_val)) print("epochs:%s, verbose:%s type(validation_steps_val):%s \n" % (epochs, verbose, type(validation_steps_val))) history = model.fit_generator(utils.data_generator(x_train, y_train, max_len, batch_size, shuffle=True), steps_per_epoch=steps_per_epoch_val, epochs=epochs, verbose=verbose, callbacks=[ear, mcp], validation_data=utils.data_generator( x_test, y_test, max_len, batch_size), validation_steps=validation_steps_val) return history
def main(args): torch.manual_seed(args.seed) train_loader, test_loader = data_generator(args.data_dir, args.batch_size) for m in range(len(models)): if(models[m]=="Transformer"): model = Transformer(args.NumFeatures,args.NumTimeSteps,args.n_layers, args.heads, args.dropout,args.n_classes,time=args.NumTimeSteps) elif(models[m]=="TCN"): channel_sizes = [args.nhid] * args.levels model = TCN(args.NumFeatures, args.n_classes, channel_sizes, kernel_size=args.ksize, dropout=args.dropout) elif(models[m]=="LSTMWithInputCellAttention"): model = LSTMWithInputCellAttention(args.NumFeatures, args.nhid,args.n_classes,args.dropout,args.attention_hops,args.d_a) elif(models[m]=="LSTM"): model = LSTM(args.NumFeatures, args.nhid, args.n_classes,args.dropout) model.to(device) model_name = "model_{}_NumFeatures_{}".format(models[m],args.NumFeatures) model_filename = args.model_dir + 'm_' + model_name + '.pt' lr=args.lr optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) best_test_loss=100 for epoch in range(1, args.epochs+1): model,optimizer = train(args,epoch,model,train_loader,optimizer) test_loss,test_acc = test(args,model,test_loader) if(test_loss<best_test_loss): best_test_loss = test_loss save(model, model_filename) if(test_acc>=99): break if epoch % 10 == 0: lr /= 10 for param_group in optimizer.param_groups: param_group['lr'] = lr
def extract(self): self.base_name = os.path.basename(self.data_dir) data_set = [os.path.join(self.data_dir, file) for file in next(os.walk(self.data_dir))[2] if file.endswith('.jpg')] self.feature_l, self.image_l = [], [] cnt = 0 total_size = len(data_set) data_g = data_generator(data_set, batch_size = self.batch_size) save_cnt = 0 while cnt < total_size: print("extracting {} of {}".format(cnt, total_size)) X, task_l = next(data_g) self.image_l.extend(task_l) my_featuremaps = get_activations(self.model, EXTRACT_LAYER, X) self.feature_l.append(my_featuremaps[0]) cnt += self.batch_size save_cnt += 1 if save_cnt % SAVE_EVERY == 0: self._save() save_cnt = 0 self._save()
def predict(args): logger = logging.getLogger("D-QA") with open(args.test_files[0] ,'r') as fin: dataset = json.load(fin) tokenizer = BertTokenizer.from_pretrained(args.Bert_model, do_lower_case=True) protest = [] for data in dataset: protest.append(test_preprocessed_data(args,tokenizer,data)) logger.info('Loading model ....') device = torch.device('cpu') if not torch.cuda.is_available() else torch.device('cuda') model_state_dict = torch.load(os.path.join(args.model_dir, 'DQA_model.bin.tmp')) model = BertForDQA.from_pretrained(args.Bert_model, state_dict=model_state_dict['bert-params']) d_model = DQA_graph((model.config.hidden_size, args.max_p_len, args.max_s_len, args.at_head)) d_model.load_state_dict(model_state_dict['dg-params']) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.to(device).eval() d_model.to(device).eval() logger.info('Dev test:') num_batch, dataloader = data_generator(args, protest) tqdm_obj = tqdm(dataloader, total=num_batch) sp, answer = {}, {} for step, batch in enumerate(tqdm_obj): original_c = protest[step] sup_list, ans_list = d_model(args, batch, model, device, pred_mode=True) context = original_c.context tokens = original_c.token_set _id = original_c.t_id sup_answer = [] for each_s in sup_list: sup_answer.append((context[each_s[0]][0], each_s[1])) if isinstance(ans_list, bool): node_answer = ['no', 'yes'][int(ans_list)] else: node_answer = ' '.join(tokens[ans_list[0]][ans_list[1]][ans_list[2]:ans_list[3] + 1]) sp[_id] = sup_answer answer[_id] = node_answer final_answer = {'answer': answer, 'sp': sp} with open(os.path.join(args.result_dir, 'hotpot_test_fullwiki_v1_refine.json_pred'), 'w') as fout: json.dump(final_answer, fout)
def run_task(sequence_length=8): x_train, y_train = data_generator(batch_size=2048, sequence_length=sequence_length) print(x_train.shape) print(y_train.shape) model = compiled_tcn(return_sequences=False, num_feat=1, num_classes=10, nb_filters=10, kernel_size=10, dilations=[1, 2, 4, 8, 16, 32], nb_stacks=6, max_len=x_train[0:1].shape[1], use_skip_connections=False) print(f'x_train.shape = {x_train.shape}') print(f'y_train.shape = {y_train.shape}') # model.summary() model.fit(x_train, y_train, epochs=5) return model.evaluate(x_train, y_train)[1]
def train(model, x_train, x_val, y_train, y_val, max_len=102400, batch_size=1, shuffle=True): ear = EarlyStopping(monitor='loss', patience=50) mcp = ModelCheckpoint("./tcn.h5", monitor="loss", save_best_only=False, save_weights_only=False) history = model.fit_generator( utils.data_generator(x_train, y_train, max_len, batch_size, shuffle), steps_per_epoch=len(x_train), epochs=1, verbose=1, callbacks=[ear, mcp] # validation_data=utils.data_generator(x_val, y_val, max_len, batch_size, shuffle), # validation_steps=len(x_val) ) return history, model
parser.add_argument('--mode', type=str, default='hyper', help='Training mode - hyper: Perform hyper-parameter optimisation, return best configuration. ' 'train: Train single model with given parameters') parser.add_argument('--experiment_name', type=str, default=str(np.random.randint(0, 100000)), help="Optional name of experiment, used for saving model checkpoint") args = parser.parse_args() # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") print(args) corpus = data_generator(args) train_data = batchify(corpus.train, args.batch_size, args) val_data = batchify(corpus.valid, args.batch_size, args) test_data = batchify(corpus.test, args.batch_size, args) n_words = len(corpus.dictionary) # Check sequence length setting eff_history = args.seq_len - args.validseqlen if eff_history < 0: raise ValueError("Valid sequence length must be smaller than sequence length!") # May use adaptive softmax to speed up training criterion = nn.CrossEntropyLoss() def compute_loss(predictions, targets):
alpha_1=torch.tensor(1.) beta_1=torch.tensor(1.) alpha_2=torch.tensor(1.) beta_2=torch.tensor(1.) C=torch.tensor(3.0, requires_grad=True) design_matrix = torch.tensor(np.random.rand(N, 2*D-1), dtype=torch.float32) c = torch.tensor([1.0 for _ in range(D)], requires_grad=True) tau = sample_tau(alpha_1, beta_1) # define the number of epochs and the data set size nb_epochs = 5000 model = EvidenceNet(D=D, N=N, alpha_1=alpha_1, alpha_2=alpha_2, beta_1=beta_1, beta_2=beta_2, C=C, c=c) optimizer = SGD(model.parameters(), lr=0.001) x_train, y_train = data_generator(N, max_order=D, noise_var=1/tau, featurize_type='fourier') x_train = torch.tensor(x_train, dtype=torch.float32) y_train = torch.tensor(y_train, dtype=torch.float32) dset = torch.utils.data.TensorDataset(x_train, y_train) train_loader = torch.utils.data.DataLoader(dset, batch_size = int(N)) # create our training loop for epoch in range(nb_epochs): epoch_loss = 0 for batch_idx, batch in enumerate(train_loader): x, y = batch y = y.reshape(N) evidence = model(x, y)
def run_task(length_of_convolution = 3, kernel_size=3, # type: int dilations=[2 ** i for i in range(9)], # type: List[int] nb_stacks=1, # type: int use_skip_connections=True, # type: bool return_sequences=True, #uncertain about this parameter dropout_rate=0.05, # type: float epochs = 100, name="run", create_plot = False): #the portion of the total data set that is dedicated to training portion_training_set = .8 (x_train, y_train), (x_test, y_test) = data_generator(args.CSVFile, length_of_convolution, portion_training_set) model = compiled_tcn(num_feat=1, # type: int num_classes=1, # type: int nb_filters=20, # type: int kernel_size=kernel_size, # type: int dilations=dilations, # type: List[int] nb_stacks=nb_stacks, # type: int max_len=None, # type: int padding='causal', # type: str use_skip_connections=use_skip_connections, # type: bool return_sequences=return_sequences, #uncertain about this parameter regression=True, # type: bool dropout_rate=dropout_rate, # type: float name=name # type: str ) print(f'x_train.shape = {x_train.shape}') print(f'y_train.shape = {y_train.shape}') print(f'x_test.shape = {x_test.shape}') print(f'y_test.shape = {y_test.shape}') model.summary() history = model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test)) loss = history.history['loss'] # Plot training & validation loss values if create_plot: plt.plot(loss) #plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.show() #output parameters file average_loss = str(sum(loss) / len(loss)) file = open(average_loss + "-" + name + ".csv", "w+") file.write("kernel_size, " + str(kernel_size) + "\n" + "dilations, " + str(dilations) + "\n" + "nb_stacks, " + str(nb_stacks) + "\n" + "use_skip_connections, " + str(use_skip_connections) + "\n" + "return_sequences, " + str(return_sequences) + "\n" + #uncertain about this parameter "dropout_rate, " + str(dropout_rate) + "\n" + "epochs, " + str(epochs) + "\n" + "name, " + str(name) + "\n" + "input file, " + str(args.CSVFile) + "\n" + "average loss, " + average_loss + "\n" + "loss, " + str(loss) + "\n")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--hdim', default=512, type=int) parser.add_argument('--grad_clip', default=100, type=int) parser.add_argument('--lr', default=0.01, type=float) parser.add_argument('--batch_size', default=50, type=int) parser.add_argument('--num_epochs', default=50, type=int) parser.add_argument('--seq_len', default=60, type=int) parser.add_argument('--depth', default=1, type=int) parser.add_argument('--model', default=None) parser.add_argument('--model_name_prefix', default='model') parser.add_argument('--language', default='hy-AM') parser.add_argument('--start_from', default=0, type=float) args = parser.parse_args() print("Loading Files") (char_to_index, index_to_char, vocab_size, trans_to_index, index_to_trans, trans_vocab_size) = utils.load_vocabulary(language = args.language) (train_text, val_text, trans) = utils.load_language_data(language = args.language) data_size = len(train_text) print("Building Network ...") (output_layer, train, cost) = utils.define_model(args.hdim, args.depth, args.lr, args.grad_clip, trans_vocab_size, vocab_size, is_train = True) if args.model: f = np.load('languages/' + args.language + '/models/' + args.model) param_values = [np.float32(f[i]) for i in range(len(f))] lasagne.layers.set_all_param_values(output_layer, param_values) print("Training ...") step_cnt = 0 date_at_beginning = datetime.now() last_time = date_at_beginning for epoch in range(args.num_epochs): train_text = train_text.split(u'։') random.shuffle(train_text) train_text = u'։'.join(train_text) avg_cost = 0.0 count = 0 num_of_samples = 0 num_of_chars = 0 for (x, y) in utils.data_generator(train_text, args.seq_len, args.batch_size, trans, trans_to_index, char_to_index, is_train = True): sample_cost = train(x, np.reshape(y,(-1,vocab_size))) sample_cost = float(sample_cost) count += 1 num_of_samples += x.shape[0] num_of_chars += x.shape[0] * x.shape[1] time_now = datetime.now() if (time_now - last_time).total_seconds() > 60 * 1: # 10 minutes print('Computing validation loss...') val_cost = 0.0 val_count = 0.0 for ((x_val, y_val, indices, delimiters), non_valids_list) in utils.data_generator(val_text, args.seq_len, args.batch_size, trans, trans_to_index, char_to_index, is_train = False): val_cost += x_val.shape[0] *cost(x_val,np.reshape(y_val,(-1,vocab_size))) val_count += x_val.shape[0] print('Validation loss is {}'.format(val_cost/val_count)) file_name = 'languages/{}/models/{}.hdim{}.depth{}.seq_len{}.bs{}.time{:4f}.epoch{}.loss{:.4f}'.format(args.language, args.model_name_prefix, args.hdim, args.depth, args.seq_len, args.batch_size, (time_now - date_at_beginning).total_seconds()/60, epoch, val_cost/val_count) print("saving to -> " + file_name) np.save(file_name, lasagne.layers.get_all_param_values(output_layer)) last_time = datetime.now() print("On step #{} loss is {:.4f}, samples passed {}, chars_passed {}, {:.4f}% of an epoch {} time passed {:4f}"\ .format(count, sample_cost, num_of_samples, num_of_chars, 100.0*num_of_chars/len(train_text), epoch, (time_now - date_at_beginning).total_seconds()/60.0)) avg_cost += sample_cost
import utils import matplotlib.pyplot as plt train_gen = utils.data_generator(20) x_batch, y_batch = next(train_gen) print(x_batch[0].shape, x_batch[1].shape, y_batch.shape) fig = plt.figure(figsize=(2, 50)) for i in range(20): plt.subplot(20, 2, 2 * i + 1) plt.imshow(x_batch[0][i].reshape([28, 28])) plt.subplot(20, 2, 2 * i + 2) plt.imshow(x_batch[1][i].reshape([28, 28])) plt.title(str(y_batch[i])) fig.tight_layout() plt.show()
monitor='val_loss', save_best_only=True, verbose=1), keras.callbacks.ReduceLROnPlateau( monitor='loss', verbose=1, factor=0.5, patience=5, min_delta=0.0005), keras.callbacks.TensorBoard(log_dir=myconfig.log_path), keras.callbacks.EarlyStopping( monitor='loss', patience=20, verbose=1, mode='auto'), keras.callbacks.CSVLogger( filename=os.path.join(myconfig.log_path, 'stats_per_epoch.csv'), append=False) ] #keras.utils.plot_model(my_model, to_file=os.path.join(myconfig.model_save_path, 'model.png'), show_shapes=True) train_data_count = len(utils.get_frame_tuple_list(myconfig.dataset_path)) val_data_count = len(utils.get_frame_tuple_list(myconfig.valset_path)) batch_size = 32 my_model.summary() hist = my_model.fit_generator( generator=utils.data_generator(myconfig.dataset_path, batch_size=batch_size), steps_per_epoch=train_data_count // batch_size, epochs=100, callbacks=callbacks, verbose=1, validation_data=utils.data_generator(myconfig.valset_path, batch_size=4), validation_steps=val_data_count // 4)
import keras import tcn # from tcn import compiled_tcn from utils import data_generator x_train, y_train = data_generator(n=200000, seq_length=600) x_test, y_test = data_generator(n=40000, seq_length=600) class PrintSomeValues(keras.callbacks.Callback): def on_epoch_begin(self, epoch, logs={}): print(f'x_test[0:1] = {x_test[0:1]}.') print(f'y_test[0:1] = {y_test[0:1]}.') print(f'pred = {self.model.predict(x_test[0:1])}.') def run_task(): model = tcn.compiled_tcn(return_sequences=False, num_feat=x_train.shape[2], num_classes=0, nb_filters=24, kernel_size=8, dilations=[2**i for i in range(9)], nb_stacks=1, max_len=x_train.shape[1], use_skip_connections=True, regression=True, dropout_rate=0) print(f'x_train.shape = {x_train.shape}') print(f'y_train.shape = {y_train.shape}')
import keras from utils import data_generator from tcn import compiled_tcn x_train, y_train = data_generator(601, 10, 30000) x_test, y_test = data_generator(601, 10, 6000) class PrintSomeValues(keras.callbacks.Callback): def on_epoch_begin(self, epoch, logs={}): print(f'x_test[0:1] = {x_test[0:1].flatten()}.') print(f'y_test[0:1] = {y_test[0:1].flatten()}.') print(f'p.shape = {self.model.predict(x_test[0:1]).shape}.') print( f'p(x_test[0:1]) = {self.model.predict(x_test[0:1]).argmax(axis=2).flatten()}.' ) def run_task(): print(sum(x_train[0].tolist(), [])) print(sum(y_train[0].tolist(), [])) model = compiled_tcn(num_feat=1, num_classes=10, nb_filters=10, kernel_size=8, dilations=[2**i for i in range(9)], nb_stacks=2, max_len=x_train[0:1].shape[1], activation='norm_relu',
help='number of hidden units per layer') parser.add_argument('--seed', type=int, default=1111, help='random seed') parser.add_argument('--gru_units', type=int, default=75, help='Number of hidden units for GRU layer') parser.add_argument('--lstm_units', type=int, default=60, help='Number of hidden units for LSTM layer') argv, _ = parser.parse_known_args() np.random.seed(argv.seed) K.random_ops.random_seed.set_random_seed(argv.seed) train_dataset = data_generator(argv.blank_len, argv.seq_len, 10000, argv.batch_size) test_dataset = data_generator(argv.blank_len, argv.seq_len, 1000, argv.batch_size) filters = { CopyModel.CORE_GRU: [argv.gru_units], CopyModel.CORE_LSTM: [argv.lstm_units], CopyModel.CORE_TCN: [argv.nhid] * argv.levels, CopyModel.CORE_TCN_HE: [argv.nhid] * argv.levels, } histories = {} for core in [ CopyModel.CORE_LSTM, CopyModel.CORE_GRU, CopyModel.CORE_TCN, CopyModel.CORE_TCN_HE ]: