def train(**kwargs): args = Config() args.parse(kwargs) loss_func = loss_function score_func = batch_scorer train_set = DataSet(args.sog_processed + 'train/') dev_set = DataSet(args.sog_processed + 'dev/') train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, collate_fn=own_collate_fn, num_workers=20) dev_loader = DataLoader(dev_set, batch_size=args.batch_size, shuffle=True, collate_fn=own_collate_fn) vocab = pk.load(open('Predictor/Utils/sogou_vocab.pkl', 'rb')) eos_id, sos_id = vocab.token2id['<EOS>'], vocab.token2id['<BOS>'] args.eos_id = eos_id args.sos_id = sos_id model = getattr(Models, args.model_name)(matrix=vocab.matrix, args=args) trainner = Trainner_transformer(args, vocab) trainner.train(model, loss_func, score_func, train_loader, dev_loader, resume=args.resume, exp_root=args.exp_root)
def test(**kwargs): args = Config() test_set = DataSet(args.processed_folder + 'test/') test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True, collate_fn=own_collate_fn) vocab = pk.load(open('Predictor/Utils/vocab.pkl', 'rb')) eos_id, sos_id = vocab.token2id['<EOS>'], vocab.token2id['<BOS>'] args.eos_id = eos_id args.sos_id = sos_id model = getattr(Models, args.model_name)(matrix=vocab.matrix, args=args) load = _load('ckpt/saved_models/2018_08_20_02_12_38_0.2602508540088274', model) model = load['model'] model.to('cuda') #TODO complete load_state_dict and predict model.teacher_forcing_ratio = -100 with t.no_grad(): for data in test_loader: context, title, context_lenths, title_lenths = [ i.to('cuda') for i in data ] token_id, prob_vector, token_lenth, attention_matrix = model( context, context_lenths, title) score = batch_scorer(token_id.tolist(), title.tolist(), args.eos_id) context_word = [[vocab.from_id_token(id.item()) for id in sample] for sample in context] words = [[vocab.from_id_token(id.item()) for id in sample] for sample in token_id] title_words = [[vocab.from_id_token(id.item()) for id in sample] for sample in title] for i in zip(context_word, words, title_words): a = input('next') print(f'context:{i[0]},pre:{i[1]}, tru:{i[2]}, score:{score}')
args = Config() args.sos_id = vocab.token2id['<BOS>'] args.batch_size = 1 print(args.sos_id) matrix = vocab.matrix transformer = Transformer(args, matrix) mm = t.nn.DataParallel(transformer).cuda() # output = transformer(inputs) # output2 = transformer(inputs) mm.load_state_dict(t.load('ckpt/20180913_233530/saved_models/2018_09_16_18_31_10T0.6108602118195541/model')) from torch.utils.data import Dataset, DataLoader from DataSets import DataSet from DataSets import own_collate_fn from Predictor.Utils import batch_scorer train_set = DataSet(args.sog_processed + 'train/') dev_set = DataSet(args.sog_processed + 'dev/') test_set = DataSet(args.sog_processed + 'test/') train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, collate_fn=own_collate_fn) dev_loader = DataLoader(dev_set, batch_size=args.batch_size, shuffle=True, collate_fn=own_collate_fn) test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True, collate_fn=own_collate_fn) vocab = pk.load(open('Predictor/Utils/sogou_vocab.pkl', 'rb')) eos_id, sos_id = vocab.token2id['<EOS>'], vocab.token2id['<BOS>'] mm.eval() with t.no_grad(): for data in test_loader: context, title, context_lenths, title_lenths = [i.to('cuda') for i in data] token_id, prob_vector = mm.module.beam_search(context) score = batch_scorer(token_id.tolist(), title.tolist(), args.eos_id) context_word = [[vocab.from_id_token(id.item()) for id in sample] for sample in context] words = [[vocab.from_id_token(id.item()) for id in sample] for sample in token_id]
from Predictors import Predictor from DataSets import DataSet import Visualizers as visualisers if __name__ == "__main__": dataSet = DataSet() predictor = Predictor(dataSet) visualisers.visualize(predictor.makePrediction())
) # Choix de l'optimizer et de son pas : https://www.tensorflow.org/api_docs/python/tf/keras/optimizers | https://www.youtube.com/watch?v=mdKjMPmcWjY simple_cnn = ConvNeuralNet(img_height, img_width, img_channels) # Initialise le modèle # Chargement du modèle (non par défaut) action_load = input("Charger un modèle ? (O/n) : ") if action_load == "O": modelFolder = input(" Dossier du modèle : ") modelNum = int(input(" Numéro du modèle : ")) loadModel(modelFolder, modelNum, optimizer, simple_cnn) # Entraînement du modèle (oui par défaut) action_train = input("Entraîner le modèle ? (O/n) : ") if action_train != "n": dataset = DataSet("B_train_label", dimsImages=[img_height, img_width, img_channels], batchSize=128) dataset.prep_load_fromBIN_lab01(filename_data_lab0="", filename_data_lab1="", nbLab0=30517, nbLab1=88777, nbImDiffClasse0_train=20000, nbUtilisations0_train=2, nbImDiffClasse0_val=5000, rapport10_val=88777. / 30517.) # dataset = DataSet("data_10k", dimsImages=[img_height, img_width, img_channels], batchSize=128) # dataset.prep_load_fromBIN_lab01(filename_data_lab0 = __dir__ + "DataBases/data_10k_label0.bin", # filename_data_lab1 = __dir__ + "DataBases/data_10k_label1.bin", # nbLab0 = 4768, # nbLab1 = 5232, # nbImDiffClasse0_train = 4000,
def train_model(p_dataset: DataSet, p_model: tf.Module, p_optimizer: tf.optimizers, logFileName: str, betaL2: float, interv_reload: int, nbIterMax: int, min_delta: int, patience: int, nbElemMoyGlissante: int, interv_accuracy: int = 500, verbose: int = 1): """ Fonction qui gère l'entraînement du modèle, dont le nombre d'itérations max et l'early-stopping """ train_summary_writer = tf.summary.create_file_writer( logFileName ) # Crée le fichier de logs pour pouvoir suivre l'évolution dans la tensorboard # Gère les niveaux de verbose (0 à 3) if verbose <= 0: interv_print = nbIterMax else: interv_print = int(1000 / (10**verbose)) # 1 => 100 // 2 => 10 // 3 => 1 # Fait des itérations d'entraînement earlyStopping_counter = 0 max_earlyStopping_counter = 0 l_lastLosses = np.full(shape=(nbElemMoyGlissante), fill_value=999999, dtype=np.float32) minSumLosses = sum(l_lastLosses) for numIter in range(nbIterMax): tf.summary.experimental.set_step(numIter) # Affiche et enregistre l'accuracy, la précision, le rappel et la matrice de confusion toutes les 500 itérations if numIter % interv_accuracy == 0: with train_summary_writer.as_default(): p_dataset.get_mean_accuracy(p_model, numIter) # Entraîne le modèle ima, lab = p_dataset.NextTrainingBatch( ) # Récupère les données (valeurs des pixels et labels) des images du batch suivant with train_summary_writer.as_default( ): # Active l'enregistrement des logs loss = train_one_iter( p_model, p_optimizer, betaL2, ima, lab, numIter % 10 == 0 ) # Fait une itération d'entraînement, en enregistrant les logs toutes les 10 iter loss += betaL2 * p_model.get_L2_loss() # Affiche la perte toutes les *interv_print* itérations if numIter % interv_print == 0: print( "numIter = %6d - loss = %.3f - max_earlyStopping_counter = %d" % (numIter, loss, max_earlyStopping_counter)) max_earlyStopping_counter = 0 # Affiche la valeur maximale de l'earlyStopping_counter depuis le dernier print # Early-stopping l_lastLosses[numIter % nbElemMoyGlissante] = loss.numpy() if minSumLosses - sum(l_lastLosses) < min_delta: earlyStopping_counter += 1 if earlyStopping_counter > max_earlyStopping_counter: max_earlyStopping_counter = earlyStopping_counter else: earlyStopping_counter = 0 minSumLosses = sum(l_lastLosses) if earlyStopping_counter > patience: print( "\n----- EARLY STOPPING : numIter = %6d - loss = %f - earlyStopping_counter = %d -----" % (numIter, loss, earlyStopping_counter)) break # Vide puis recharge le dataset if numIter > 0 and numIter % interv_reload == 0: p_dataset.reload_fromBIN_lab01() p_dataset.get_mean_accuracy(p_model, -1) # On finit en beauté par un calcul de l'accuracy with train_summary_writer.as_default(): p_dataset.get_mean_accuracy(p_model, numIter)
def train(**kwargs): # init print('init') args = DefaultConfig() args.parse(kwargs) processor = pk.load(open('pr.pkl', 'rb')) seg_vocab_size = len(processor.token2id['seg']) char_vocab_size = len(processor.token2id['char']) model = TextCnn(args, seg_vocab_size, char_vocab_size, processor.seg_matrix, processor.char_matrix).cuda() print('build dataset') # dataset train_set = DataSet(args.char_max_lenth, args.word_max_lenth, 'processed/data_train/', processor) valid_set = DataSet(args.char_max_lenth, args.word_max_lenth, 'processed/data_valid/', processor) test_set = DataSet(args.char_max_lenth, args.word_max_lenth, 'processed/data_test/', processor) train_loader = DataLoader(train_set, args.batch_size, shuffle=True, drop_last=True) valid_loader = DataLoader(valid_set, args.batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(test_set, args.batch_size, shuffle=True, drop_last=True) # loss meter bceloss = t.nn.BCEWithLogitsLoss() mseloss = t.nn.MSELoss() celoss = t.nn.CrossEntropyLoss() acc_loss_meter = meter.AverageValueMeter() law_loss_meter = meter.AverageValueMeter() #imprison_loss_meter = meter.MSEMeter() lr = args.lr optimizer = t.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=1e-4) print('start') best_score = -1.0 for epoch in range(args.epochs): acc_loss_meter.reset() law_loss_meter.reset() #imprison_loss_meter.reset() for step, datas in tqdm(enumerate(train_loader), desc='step:'): accusation_names, seg, pos, char, term_of_imprisonment, accusation, law = [ i.cuda() for i in datas ] optimizer.zero_grad() accusation_logits, law_logits, imprison_logits = model( seg, accusation_names) acc_loss = bceloss(accusation_logits, accusation.float()) law_loss = bceloss(law_logits, law.float()) #imprison_loss = mseloss(imprison_logits, term_of_imprisonment.unsqueeze(-1).float()) loss = 0.5 * acc_loss + 0.5 * law_loss # + 0.2 * imprison_loss loss.backward() optimizer.step() if (step % 500 == 0) & (step != 0): acc_score = score_sigmoid(accusation_logits, accusation) law_score = score_sigmoid(law_logits, law) val_acc_loss, val_law_loss, val_acc_score, val_law_score = val( model, valid_loader) print('epoch:%s,step:%s' % (epoch, step)) print(' train:') print(' loss:%s,accsocre:%s,lawscore:%s' % (loss.item(), acc_score, law_score)) print(' val:') print( ' accloss:%s,law_loss:%s,accscore:%s,lawscore:%s' % (val_acc_loss, val_law_loss, val_acc_score, val_law_score)) print(' ') val_acc_loss, val_law_loss, val_acc_score, val_law_score = val( model, valid_loader) print('epoch:', epoch) print(' train:') print(' accloss:%s,law_loss:%s' % (acc_loss_meter.value()[0], law_loss_meter.value()[0])) print(' val:') print(' accloss:%s,law_loss:%s,accscore:%s,lawscore:%s' % (val_acc_loss, val_law_loss, val_acc_score, val_law_score)) score = val_law_score + val_acc_score if score > best_score: model.save()
from configs import Config import pickle as pk vocab = pk.load(open('Predictor/Utils/vocab.pkl', 'rb')) args = Config() args.sos_id = vocab.token2id['<BOS>'] args.eos_id = vocab.token2id['<EOS>'] args.batch_size = 4 print(args.sos_id) matrix = vocab.matrix model = UniversalTransformer(args, matrix) from torch.utils.data import DataLoader from DataSets import DataSet from DataSets import own_collate_fn from Predictor.Utils import batch_scorer from Predictor.Utils.loss import loss_function train_set = DataSet(args.processed_folder + 'train/') train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, collate_fn=own_collate_fn) vocab = pk.load(open('Predictor/Utils/vocab.pkl', 'rb')) eos_id, sos_id = vocab.token2id['<EOS>'], vocab.token2id['<BOS>'] optim = t.optim.Adam( [i for i in model.parameters() if i.requires_grad is True]) # for data in tqdm(train_loader): # context, title, context_lenths, title_lenths = data for data in tqdm(train_loader): context, title, context_lenths, title_lenths = [i for i in data] token_id, probs = model(context, title) loss = loss_function(probs, title) optim.zero_grad()