def main(lr, num_epochs, datadir, batch_size, nworkers, outdir): # code for GPU support : Start cudnn.benchmark = True network = resnet18().cuda() # code for GPU support : End # path = "changed_parameters/" # epochList = ["epoch_15.pth","epoch_20.pth","epoch_30.pth","epoch_90.pth","epoch_99.pth"] # snapshotLoad = torch.load("changed_parameters/epoch_99.pth") # network.load_state_dict(snapshotLoad.get("model_state")) train_iter = make_image_loader(pt.join(datadir, 'train.msgpack'), batch_size, nworkers, *ARGS_VAL) val_iter = make_image_loader(pt.join(datadir, 'val.msgpack'), batch_size, nworkers, *ARGS_VAL) # code without GPU support # net = resnet18() loss = CrossEntropyLoss(output_key="net_out").cuda() val_loss = CrossEntropyLoss(output_key="net_out").cuda() optimizer = optim.SGD(network.parameters(), lr=lr, weight_decay=0.0004, momentum=0.9) policy = PolyPolicy(optimizer, num_epochs, power=1) # trainer.logger.info(run_id=_run._id) # # trainer.set_hook('train_begin', set_eval) # with train_iter, val_iter: # trainer.train(num_epochs, start_epoch=start_epoch) trainer = Trainer(network, optimizer, loss, AccuracyMetric(), None, policy, train_iter, val_iter, outdir, val_loss) with train_iter, val_iter: trainer.train(num_epochs)
def batch_start(self, t: Trainer): # skip if t.step % self.interval != 0: return with torch.no_grad(): bleu = self._score(t) print(f"BLEU:{bleu}\n") if self.best is None or bleu > self.best: self.best = bleu self.patience = self.early_stop if self.keep_best: t.checkpoint(name=t.config["name"], tags=["best"]) # save the best perplexity and bleu score val_loss = t.eval_epoch(only_eval=True) ce_loss = pandas.DataFrame(val_loss)["mt"].mean() text = f"BLEU:{bleu}" \ f"\nCross-Entropy:{ce_loss:.2f}" \ f"\nPerplexity:{math.exp(ce_loss):.2f}" t.exp.text("best_scores", text, "Best scores") else: self.patience -= 1 if self.patience < 0: t.early_stop = True t.exp.line("bleu", None, "BLEU", bleu)
def main(): args = parse_args() if args.vk_group_name is not None: from modules import VKParser vk_parser = VKParser(group_name=args.vk_group_name, app_id=args.vk_appid, login=args.vk_login, password=args.vk_pass) if args.vk_file is not None: vk_parser.dump_posts(args.vk_file) return trainer = Trainer(n=args.model_n, lc=args.lc) if args.vk_group_name is not None: trainer.train(vk_parser.post_iter(args.vk_offset, args.vk_count), re=re_ru_words_punc, output_newlines=True) elif args.input_dir is not None: for entry in scandir(args.input_dir): if entry.name.endswith('.txt') and entry.is_file(): with open(entry.path, 'r', encoding='utf8') as file: trainer.train(file, re=re_ru_words_punc, output_newlines=True) else: trainer.train(stdin, re=re_ru_words_punc, output_newlines=True) with open(args.model, 'wb') as file: trainer.get_model().dump(file)
def main(): dataset = Dataset() fcn_model = loader.get_fcn_model_module().FCNModel() trainer = Trainer() trainer.train(fcn_model, dataset) if not config.one_batch_overfit: detector = FCNDetector(fcn_model.model) detector.weights_path = osp.join(fcn_model.weights_dir, 'best_weights.hdf5') estimate_quality(detector, dataset)
def main(config): loaders = DataLoader(train_fn=config.train_fn, batch_size=config.batch_size, min_freq=config.min_vocab_freq, max_vocab=config.max_vocab_size, device=config.gpu_id) print( '|train| =', len(loaders.train_loader.dataset), '|valid| =', len(loaders.valid_loader.dataset), ) vocab_size = len(loaders.text.vocab) n_classes = len(loaders.label.vocab) print('|vocab| =', vocab_size, ' |classes| =', n_classes) if config.rnn is False and config.cnn is False: raise Exception( 'You need to specify an archtiecture to train. (--rnn or --cnn)') if config.rnn: #Declare model and loss. model = RNNClassifier( input_size=vocab_size, word_vec_size=config.word_vec_size, hidden_size=config.hidden_size, n_classes=n_classes, n_layers=config.n_layers, dropout_p=config.dropout, ) optimizer = optim.Adam(model.parameters()) crit = nn.NLLLoss() print(model) if config.gpu_id >= 0: model.cuda(config.gpu_id) crit.cuda(config.gpu_id) rnn_trainer = Trainer(config) rnn_model = rnn_trainer.train(model, crit, optimizer, loaders.train_loader, loaders.valid_loader) torch.save( { 'rnn': rnn_model.state_dict() if config.rnn else None, 'cnn': cnn_model.state_dict() if config.cnn else None, 'config': config, 'vocab': loaders.text.vocab, 'classes': loaders.label.vocab, }, config.model_fn)
def test_main(): config = json.load(open('config.json', 'r')) config["output_path"] += "{:%Y-%m-%d_%H:%M}/".format( datetime.datetime.now()) config['is_file_saved'] = False config['portrait_dir'] = "./data/person_image_dataset/96x64_one/" config['batch_size'] = 1 config['train_data_num'] = 1 config['test_data_num'] = 1 t = Trainer(**config) t.optimize() assert 1 == 1
def main(config, model_weight=None, opt_weight=None): def print_config(config): pp = pprint.PrettyPrinter(indent=4) pp.pprint(vars(config)) print_config(config) loader = DataLoader( config.train, config.valid, (config.lang[:2], config.lang[-2:]), batch_size=config.batch_size, device=-1, max_length=config.max_length ) input_size, output_size = len(loader.src.vocab), len(loader.tgt.vocab) model = get_model(input_size, output_size, config) crit = get_crit(output_size, data_loader.PAD) if model_weight: model.load_state_dict(model_weight) if config.gpu_id >= 0: model.cuda(config.gpu_id) crit.cuda(config.gpu_id) optimizer = get_optimizer(model, config) if opt_weight: optimizer.load_state_dict(opt_weight) lr_scheduler = None if config.verbose >= 2: print(model) print(crit) print(optimizer) trainer = Trainer(IgniteEngine, config) trainer.train( model, crit, optimizer, train_loader=loader.train_iter, valid_loader=loader.valid_iter, src_vocab=loader.src.vocab, tgt_vocab=loader.tgt.vocab, n_epochs=config.n_epochs, lr_scheduler=lr_scheduler )
def main(config): if config.gpu_id < 0: print("Device: CPU") else: print("Device:", torch.cuda.get_device_name(config.gpu_id)) print("Building Vocab...") data_handler = CbowDataHandler( file_name=config.train_fn, window_size=config.window_size, train_ratio=config.train_ratio, batch_size=config.batch_size, ) print('|train| =', len(data_handler.train_loader.dataset), '|valid| =', len(data_handler.valid_loader.dataset)) print('|vocab_size| =', data_handler.vocab_size) model = CBOW( vocab_size=data_handler.vocab_size, embd_size=config.embd_size, window_size=config.window_size, hidden_size=config.hidden_size, ) # crit = nn.CrossEntropyLoss() # optimizer = optim.SGD(model.parameters(), lr=learning_rate) optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) crit = nn.NLLLoss() print(model) if config.gpu_id >= 0: model.cuda(config.gpu_id) crit.cuda(config.gpu_id) trainer = Trainer(config) trainer.train(model, crit, optimizer, data_handler.train_loader, data_handler.valid_loader) # Test test_data = ['맞교환', '백색', '합판', '이메일'] ctx_idxs = [data_handler.w2i[w] for w in test_data] ctx_var = Variable(torch.LongTensor([ctx_idxs])).to(config.gpu_id) model.zero_grad() y = model(ctx_var) _, predicted = torch.max(y.data, 1) predicted_word = data_handler.i2w[int(predicted[0])] print('input:', test_data) print('predicted:', predicted_word)
def main(): # parse arguments args = parse_agrs() # fix random seeds torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(args.seed) # create tokenizer tokenizer = Tokenizer(args) # create data loader train_dataloader = R2DataLoader(args, tokenizer, split='train', shuffle=True) val_dataloader = R2DataLoader(args, tokenizer, split='val', shuffle=False) test_dataloader = R2DataLoader(args, tokenizer, split='test', shuffle=False) # build model architecture model = R2GenModel(args, tokenizer) # get function handles of loss and metrics criterion = compute_loss metrics = compute_scores # build optimizer, learning rate scheduler optimizer = build_optimizer(args, model) lr_scheduler = build_lr_scheduler(args, optimizer) # build trainer and start to train trainer = Trainer(model, criterion, metrics, optimizer, args, lr_scheduler, train_dataloader, val_dataloader, test_dataloader) trainer.train()
import datetime import json import os from modules.trainer import Trainer from utils.argument_handler import argment_handler if __name__ == "__main__": args = argment_handler() config = json.load(open(args.config_file, 'r')) config["output_path"] += "{:%Y-%m-%d_%H:%M}/".format( datetime.datetime.now()) config['is_file_saved'] = not args.no_write if config['is_file_saved']: os.mkdir(config["output_path"]) json.dump(config, open(config["output_path"] + 'config.json', 'w'), indent=4) t = Trainer(**config) t.optimize()
def main(in_dataset_folder, in_noisy_dataset_folder, in_custom_vocab_file, in_model_folder, in_config): with open(in_config, encoding='utf-8') as config_in: config = json.load(config_in) train_json = load_hcn_json(os.path.join(in_dataset_folder, 'train.json')) dev_json = load_hcn_json(os.path.join(in_dataset_folder, 'dev.json')) # test_json = load_hcn_json(os.path.join(in_dataset_folder, 'test.json')) test_ood_json = load_hcn_json( os.path.join(in_noisy_dataset_folder, 'test_ood.json')) kb = make_augmented_knowledge_base( os.path.join(BABI_FOLDER, 'dialog-babi-task6-dstc2-kb.txt'), os.path.join(BABI_FOLDER, 'dialog-babi-task6-dstc2-candidates.txt')) action_templates = train_json['actions'] max_noisy_dialog_length = max( [len(dialog['turns']) for dialog in test_ood_json['dialogs']]) config['max_input_length'] = max_noisy_dialog_length et = EntityTracker(kb) post_ood_turns_clean, post_ood_turns_noisy = mark_post_ood_turns( test_ood_json) if in_custom_vocab_file is not None: with open(in_custom_vocab_file) as vocab_in: rev_vocab = [line.rstrip() for line in vocab_in] vocab = {word: idx for idx, word in enumerate(rev_vocab)} else: utterances_tokenized = [] for dialog in train_json['dialogs']: for utterance in dialog['turns']: utterances_tokenized.append(utterance['input'].split()) vocab, rev_vocab = make_vocabulary( utterances_tokenized, config['max_vocabulary_size'], special_tokens=[PAD, START, UNK, EOS] + list(kb.keys())) ctx_features = [] for dialog in train_json['dialogs']: for utterance in dialog['turns']: if 'context_features' in utterance: ctx_features.append(utterance['context_features']) ctx_features_vocab, ctx_features_rev_vocab = make_vocabulary( ctx_features, config['max_vocabulary_size'], special_tokens=[]) config['vocabulary_size'] = len(vocab) print('Training with config: {}'.format(json.dumps(config))) data_preparation_function = getattr(utils.preprocessing, config['data_preparation_function']) data_train = data_preparation_function(train_json, vocab, ctx_features_vocab, et, **config) data_dev = data_preparation_function(dev_json, vocab, ctx_features_vocab, et, **config) # data_test = data_preparation_function(test_json, vocab, ctx_features_vocab, et, **config) data_test_ood = data_preparation_function(test_ood_json, vocab, ctx_features_vocab, et, **config) dropout_turn_generation_function = getattr( utils.preprocessing, config['dropout_turn_generation_function']) random_input = dropout_turn_generation_function( 10000, 3, config['max_sequence_length'], train_json, vocab, config['turn_word_dropout_prob']) save_model(rev_vocab, config, kb, action_templates, in_model_folder) net = getattr(modules, config['model_name'])(vocab, config, len(ctx_features_vocab), len(action_templates)) trainer = Trainer(data_train, data_dev, data_test_ood, action_templates, random_input, post_ood_turns_noisy, config, net, in_model_folder) trainer.train()
def main(env, visualise, folder_name, **kwargs): shutil.copyfile(os.path.abspath(__file__), folder_name + 'main.py') obs_dim = tuple(env.observation_space.sample().shape) assert len(obs_dim) == 1 or len( obs_dim) == 3, f'States should be 1D or 3D vector. Received: {obs_dim}' a_dim = tuple(env.action_space.sample().shape) print('Observation space:', obs_dim) print('Action space:', a_dim) device = 'cpu' #'cuda' if torch.cuda.is_available() else 'cpu' policy = Policy(obs_dim, a_dim, sigma=kwargs['exploration_noise'], device=device, **kwargs) policy.save(folder_name) if kwargs['encoder_type'] == 'none': encoder = None elif kwargs['encoder_type'] == 'random': pass elif kwargs['encoder_type'] == 'vae': pass elif kwargs['encoder_type'] == 'idf': pass elif kwargs['encoder_type'] == 'cont': pass if encoder is None: wm = WorldModel(obs_dim, a_dim, **kwargs) else: wm = EncodedWorldModel(obs_dim, a_dim, kwargs['z_dim'], encoder, device=device, **kwargs) trainer = Trainer(x_dim=obs_dim, a_dim=a_dim, policy=policy, wm=wm, encoder=encoder, **kwargs) scores = {'train': [], 'eval': []} start_time = datetime.now() buffer = DynamicsReplayBuffer(kwargs['buffer_size'], device) while trainer.train_steps < kwargs['train_steps']: done = False s_t = env.reset() env.render() score = 0 while not done: a_t = policy.act( torch.from_numpy(s_t).to(dtype=torch.float32, device=device)).numpy() s_tp1, r_t, done, info = env.step(a_t) env.render() score += r_t buffer.add(s_t, a_t, s_tp1, done) if trainer.train_steps < kwargs['train_steps']: xs_t, as_t, xs_tp1, dones = buffer.sample(kwargs['batch_size']) trainer.train_step(xs_t, as_t, xs_tp1) if trainer.train_steps % kwargs['export_interval'] == 0: visualise.train_iteration_update( **{ k + '_loss': np.mean(i[-kwargs['export_interval']:]) for k, i in trainer.losses.items() if i != [] }, ext=np.mean(scores['train'][-kwargs['eval_interval']:])) trainer.save_models(folder_name + 'saved_objects/') if trainer.train_steps % kwargs['eval_interval'] == 0: print(trainer.train_steps) s_t = s_tp1 scores['train'].append(score) print(score)
def main(): # Argument passing/parsing args, model_args = config_utils.initialize_argparser( MODELS, _command_args, custom_argparsers.DialogArgumentParser) hparams, hparams_dict = config_utils.create_or_load_hparams( args, model_args, args.cfg) pprint(hparams_dict) if hparams.test_mode == 'wow': os.makedirs('./tmp', exist_ok=True) if not os.path.exists('tmp/wow_pretrained'): fname = 'wow_pretrained.zip' gd_id = '1lkF1QENr45j0vl-Oja3wEiqkxoNTxkXT' colorlog.info(f"Download pretrained checkpoint {fname}") download_from_google_drive(gd_id, os.path.join('tmp', fname)) unzip('tmp', fname) ckpt_fname = os.path.join('tmp/wow_pretrained', 'ckpt-46070') else: raise ValueError("Only 'wow' is currently supported") # Set environment variables & gpus set_logger() set_gpus(hparams.gpus) set_tcmalloc() gpus = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_visible_devices(gpus, 'GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) # Set random seed #tf.random.set_seed(hparams.random_seed) #np.random.seed(hparams.random_seed) #random.seed(hparams.random_seed) # Set gpu assert hparams.num_gpus == 1 mirrored_strategy = None # Make dataset reader os.makedirs(hparams.cache_dir, exist_ok=True) reader = WowDatasetReader( hparams.batch_size, hparams.num_epochs, buffer_size=hparams.buffer_size, bucket_width=hparams.bucket_width, max_length=hparams.max_length, max_episode_length=hparams.max_episode_length, max_knowledge=hparams.max_knowledge, knowledge_truncate=hparams.knowledge_truncate, cache_dir=hparams.cache_dir, bert_dir=hparams.bert_dir, ) train_dataset, iters_in_train = reader.read('train', mirrored_strategy) test_dataset, iters_in_test = reader.read('test', mirrored_strategy) vocabulary = reader.vocabulary # Build model & optimizer & trainer model = MODELS[hparams.model](hparams, vocabulary) optimizer = tf.keras.optimizers.Adam(learning_rate=hparams.init_lr, clipnorm=hparams.clipnorm) trainer = Trainer(model, optimizer, mirrored_strategy, hparams.enable_function, WowDatasetReader.remove_pad) # Setup checkpoint global_step = tf.compat.v1.train.get_or_create_global_step() checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model, optimizer_step=global_step) train_example = next(iter(train_dataset)) _ = trainer.train_step(train_example) checkpoint.restore(ckpt_fname) # Load retriever and input processor dictionary = reader._dictionary tokenize_fn = lambda x: [data_vocab.BERT_CLS_ID] \ + dictionary.convert_tokens_to_ids(dictionary.tokenize(x)) \ + [data_vocab.BERT_SEP_ID] input_processor = InteractiveInputProcessor(tokenize_fn, 5) # Compile graph colorlog.info("Compile model") dummy_input = input_processor.get_dummy_input() for _ in trange(5, ncols=70): trainer.test_step(dummy_input) # Module for interactive mode wiki_tfidf_retriever = WikiTfidfRetriever(hparams.cache_dir) topics_generator = TopicsGenerator(hparams.cache_dir) interactive_world = InteractiveWorld(responder=trainer, input_processor=input_processor, wiki_retriever=wiki_tfidf_retriever, topics_generator=topics_generator) # Loop! while True: interactive_world.run() interactive_world.reset()
""" load model """ model = prepare_model(args) model.cuda() """ define loss """ criterion = nn.CrossEntropyLoss() """ setup metrics """ metric = MeanIOUScore(9) """ setup optimizer """ optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) """ setup tensorboard """ writer = SummaryWriter(os.path.join(args.save_dir, "train_info")) """ setup trainer """ trainer = Trainer( model, optimizer, criterion, args.accumulate_gradient, train_loader, val_loader, writer, metric, args.save_dir, ) trainer.fit(args.epochs)
def evaluate_mdrnn(test_loader, multi_dimensional_rnn, device, vocab_list: list, blank_symbol: str, horizontal_reduction_factor: int, image_input_is_unsigned_int: bool, input_is_list: bool, language_model_parameters: LanguageModelParameters, save_score_table_file_path: str, epoch_number: int, epoch_statistics: EpochStatistics): correct = 0 total = 0 output_strings = list([]) reference_labels_strings = list([]) for data in test_loader: inputs, labels = data if Utils.use_cuda(): labels = labels.to(device) if input_is_list: inputs = Utils.move_tensor_list_to_device(inputs, device) else: inputs = inputs.to(device) # If the image input comes in the form of unsigned ints, they need to # be converted to floats (after moving to GPU, i.e. directly on GPU # which is faster) if image_input_is_unsigned_int: Trainer.check_inputs_is_right_type(inputs, input_is_list) inputs = IamLinesDataset.convert_unsigned_int_image_tensor_or_list_to_float_image_tensor_or_list(inputs) # https://github.com/pytorch/pytorch/issues/235 # Running the evaluation without computing gradients is the recommended way # since this saves time, and more importantly, memory with torch.no_grad(): # outputs = multi_dimensional_rnn(Variable(inputs)) # For "Net" (Le Net) max_input_width = NetworkToSoftMaxNetwork.get_max_input_width(inputs) outputs = multi_dimensional_rnn(inputs, max_input_width) probabilities_sum_to_one_dimension = 2 # Outputs is the output of the linear layer which is the input to warp_ctc # But to get probabilities for the decoder, the softmax function needs to # be applied to the outputs probabilities = torch.nn.functional. \ softmax(outputs, probabilities_sum_to_one_dimension) # No longer necessary with fixed word separator specification in decoder # and normal language model # probabilities = Evaluator.append_preceding_word_separator_to_probabilities( # probabilities, vocab_list, Evaluator.WORD_SEPARATOR_SYMBOL) print(">>> evaluate_mdrnn - outputs.size: " + str(outputs.size())) print(">>> evaluate_mdrnn - probabilities.size: " + str(probabilities.size())) # beam_size = 20 # This is the problem perhaps... # beam_size = 100 # The normal default is 100 beam_size = Evaluator.BEAM_SIZE # Larger value to see if it further improves results # This value specifies the number of (character) probabilities kept in the # decoder. If it is set equal or larger to the number of characters in the # vocabulary, no pruning is done for it cutoff_top_n = len(vocab_list) # No pruning for this parameter print(">>> evaluate_mdrnn - len(vocab_list): " + str(len(vocab_list))) decoder = Evaluator.create_decoder(vocab_list, cutoff_top_n, beam_size, blank_symbol, language_model_parameters) label_sizes = WarpCTCLossInterface. \ create_sequence_lengths_specification_tensor_different_lengths(labels) sequence_lengths = WarpCTCLossInterface.\ create_probabilities_lengths_specification_tensor_different_lengths( labels, horizontal_reduction_factor, probabilities) sequence_lengths = Evaluator.increase_sequence_lengths_by_one(sequence_lengths) # print(">>> evaluate_mdrnn - sequence lengths: " + str(sequence_lengths)) # print("probabilities.data.size(): " + str(probabilities.data.size())) beam_results, beam_scores, timesteps, out_seq_len = \ decoder.decode(probabilities.data, sequence_lengths) # print(">>> evaluate_mdrnn - beam_results: " + str(beam_results)) total += labels.size(0) for example_index in range(0, beam_results.size(0)): beam_results_sequence = beam_results[example_index][0] # print("beam_results_sequence: \"" + str(beam_results_sequence) + "\"") use_language_model_in_decoder = language_model_parameters is not None output_string = Evaluator.convert_to_string( beam_results_sequence, vocab_list, out_seq_len[example_index][0], use_language_model_in_decoder) example_labels_with_padding = labels[example_index] # Extract the real example labels, removing the padding labels reference_labels = example_labels_with_padding[0:label_sizes[example_index]] # print(">>> evaluate_mdrnn - reference_labels: " + str(reference_labels)) reference_labels_string = Evaluator.convert_labels_tensor_to_string( reference_labels, vocab_list, blank_symbol) if reference_labels_string == output_string: # print("Yaaaaah, got one correct!!!") correct += 1 correct_string = "correct" else: correct_string = "wrong" print(">>> evaluate_mdrnn - output: \"" + output_string + "\" " + "\nreference: \"" + reference_labels_string + "\" --- " + correct_string) output_strings.append(output_string) reference_labels_strings.append(reference_labels_string) # correct += (predicted == labels).sum() cer_including_word_separators = evaluation_metrics.character_error_rate. \ compute_character_error_rate_for_list_of_output_reference_pairs_fast( output_strings, reference_labels_strings, True) cer_excluding_word_separators = evaluation_metrics.character_error_rate. \ compute_character_error_rate_for_list_of_output_reference_pairs_fast( output_strings, reference_labels_strings, False) wer = evaluation_metrics.word_error_rate. \ compute_word_error_rate_for_list_of_output_reference_pairs( output_strings, reference_labels_strings) total_examples = len(test_loader.dataset) validation_stats = ValidationStats(total_examples, correct, cer_excluding_word_separators, wer) # https://stackoverflow.com/questions/3395138/using-multiple-arguments-for-string-formatting-in-python-e-g-s-s print("Accuracy of the network on the {} test inputs: {:.2f} % accuracy".format( total_examples, validation_stats.get_accuracy())) print("Character Error Rate (CER)[%] of the network on the {} test inputs, " "including word separators: {:.3f} CER".format( total_examples, cer_including_word_separators)) print("Character Error Rate (CER)[%] of the network on the {} test inputs, " "excluding word separators: {:.3f} CER".format( total_examples, cer_excluding_word_separators)) print("Word Error Rate (WER)[%] of the network on the {} test inputs: {:.3f} WER".format( total_examples, wer)) if save_score_table_file_path is not None: score_file_existed = os.path.exists(save_score_table_file_path) # Opens the file in append-mode, create if it doesn't exists with open(save_score_table_file_path, "a") as scores_table_file: if not score_file_existed: scores_table_file.write(Evaluator.score_table_header(total_examples, epoch_statistics)) scores_table_file.write(Evaluator.score_table_line(epoch_number, correct, validation_stats.get_accuracy(), cer_including_word_separators, cer_excluding_word_separators, wer, epoch_statistics) + "\n") return validation_stats
joint_num = 24 model = get_pose_net(RESNET_TYPE, OUTPUT_SHAPE, True, joint_num).to(device) model = nn.DataParallel(model) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 250, eta_min=1e-3) metric_fn = get_metric_fn trainer = Trainer(model, device, metric_fn, optimizer, scheduler, logger=system_logger) early_stopper = LossEarlyStopper(patience=EARLY_STOPPING_PATIENCE, verbose=True, logger=system_logger) key_column_value_list = [ TRAIN_SERIAL, TRAIN_TIMESTAMP, MODEL, OPTIMIZER, LOSS_FN, METRIC_FN, EARLY_STOPPING_PATIENCE, BATCH_SIZE, EPOCHS, LEARNING_RATE, WEIGHT_DECAY, RANDOM_SEED ] performance_recorder = PerformanceRecorder( column_name_list=PERFORMANCE_RECORD_COLUMN_NAME_LIST, record_dir=PERFORMANCE_RECORD_DIR,
def main(): parser = get_train_parser() args = parser.parse_args() if torch.cuda.is_available(): torch.cuda.set_device(0) torch.manual_seed(args.seed) task = TextPlanningTask.setup_task(args) task.load_dataset(args.valid_set) model = task.build_model(args).cuda() criterion = task.build_criterion(args).cuda() print('| model {}, criterion {}'.format(args.model_name, criterion.__class__.__name__)) print('| num. model params: {} (num. trained: {})'.format( sum(p.numel() for p in model.parameters()), sum(p.numel() for p in model.parameters() if p.requires_grad), )) if args.tensorboard_logdir: tensorboard_logdir = args.tensorboard_logdir if tensorboard_logdir[-1] == '/': tensorboard_logdir = tensorboard_logdir[:-1] args.tensorboard_logdir = f"{tensorboard_logdir}_{time.strftime('%Y%m%d_%H%M%S')}" print('Tensorboard path {}'.format(args.tensorboard_logdir)) args.ckpt_dir = f'../checkpoints/planning/{args.domain}/{args.exp_name}/' params = list(filter(lambda p: p.requires_grad, model.parameters())) optimizer = FairseqAdam(args, params) lr_scheduler = InverseSquareRootSchedule(args, optimizer) lr_scheduler.step_update(0) # Build trainer trainer = Trainer(args, task, model, criterion, optimizer, lr_scheduler) print('| max tokens per GPU = {} and max samples per GPU = {}'.format( args.max_tokens, args.max_samples, )) # Load the latest checkpoint if one is available and restore the # corresponding train iterator extra_state, epoch_itr = checkpoint_utils.load_checkpoint(args, trainer) # Train until the learning rate gets too small max_epoch = args.max_epoch valid_losses = [None] while epoch_itr.epoch < max_epoch: # train for one epoch train_epoch(args, trainer, epoch_itr) if epoch_itr.epoch % args.validate_interval == 0: valid_losses = validate(args, trainer, task, epoch_itr) # only use first validation loss to update the learning rate trainer.lr_step(epoch_itr.epoch, valid_losses[0]) # save checkpoint if epoch_itr.epoch % args.save_interval == 0: checkpoint_utils.save_checkpoint(args, trainer, epoch_itr, valid_losses[0])
set_random_seed(_env['seed']) project_name = _root.split("/")[-1] run_name = (f"{_model['name']}_{_model['size']}-" f"lr_{_training['lr']}-bsz_{_training['batch_size']}-" f"seed_{_env['seed']}") now = datetime.now().strftime('%Y-%m-%d_%Hh%Mm%Ss') tokenizer = get_tokenizer(_model['name'], _model['size']) train_dataset = CustomDataset(_root, 'train', tokenizer, _training["max_len"]) dev_dataset = CustomDataset(_root, 'dev', tokenizer, _training["max_len"]) Model = get_model_class(_model['name']) Opt = get_optim_class(_model['opt']) Loss_fn = get_loss_fn_class(_model['loss']) model = Model(n_outputs=train_dataset.n_outputs, size=_model['size'], pretrained_model_path=str2bool(_model['pretrained_model_path'])) metric_dic = { "acc": Accuracy(), "precision": Precision() } callbacks = [ ModelCheckpoint(f"{_save_model_root}/{run_name}.pth", monitor='dev_loss', mode="min") ] trainer = Trainer(model=model, loss_fn_class=Loss_fn, optimizer_class=Opt, metrics=metric_dic) trainer.fit(train_dataset, dev_dataset, lr=_training['lr'], epochs=_training['epochs'], batch_size=_training['batch_size'], callbacks=callbacks)
def main(argv): trainer = Trainer() trainer.run()
lr = 0.001 momentum = 0.9 batch_size = 5 start_epoch = 1 end_epoch = 1 data_root = '' # Preprocessing transforms = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) datasets = dset.ImageFolder('../images/', transform=transforms) train_loader = torch.utils.data.DataLoader(datasets, batch_size=batch_size, shuffle=True) # Model Setting model = models.vgg19(pretrained=True) model.fc = nn.Linear(1000, num_classes) if args.use_cuda: model = model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) trainer = Trainer(optimizer, criterion, model, 10, train_loader, args.use_cuda) trained_model = trainer.run() torch.save(trained_model.state_dict(), '../weights/vgg_weight.pth')
def main(): # Argument passing/parsing args, model_args = config_utils.initialize_argparser( MODELS, _command_args, custom_argparsers.DialogArgumentParser) hparams, hparams_dict = config_utils.create_or_load_hparams( args, model_args, args.cfg) pprint(hparams_dict) # Set environment variables & gpus set_logger() set_gpus(hparams.gpus) set_tcmalloc() gpus = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_visible_devices(gpus, 'GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) # Set random seed tf.random.set_seed(hparams.random_seed) np.random.seed(hparams.random_seed) random.seed(hparams.random_seed) # For multi-gpu if hparams.num_gpus > 1: mirrored_strategy = tf.distribute.MirroredStrategy() # NCCL will be used as default else: mirrored_strategy = None # Download BERT pretrained model if not os.path.exists(hparams.bert_dir): os.makedirs(hparams.bert_dir) fname = 'uncased_L-12_H-768_A-12.zip' gd_id = '17rfV9CleFBwwfS7m5Yd72vvxdPLWBHl6' download_from_google_drive(gd_id, os.path.join(hparams.bert_dir, fname)) unzip(hparams.bert_dir, fname) # Make dataset reader os.makedirs(hparams.cache_dir, exist_ok=True) if hparams.data_name == "wizard_of_wikipedia": reader_cls = WowDatasetReader elif hparams.data_name == "holle": reader_cls = HolleDatasetReader else: raise ValueError("data_name must be one of 'wizard_of_wikipedia' and 'holle'") reader = reader_cls( hparams.batch_size, hparams.num_epochs, buffer_size=hparams.buffer_size, bucket_width=hparams.bucket_width, max_length=hparams.max_length, max_episode_length=hparams.max_episode_length, max_knowledge=hparams.max_knowledge, knowledge_truncate=hparams.knowledge_truncate, cache_dir=hparams.cache_dir, bert_dir=hparams.bert_dir, ) train_dataset, iters_in_train = reader.read('train', mirrored_strategy) test_dataset, iters_in_test = reader.read('test', mirrored_strategy) if hparams.data_name == 'wizard_of_wikipedia': unseen_dataset, iters_in_unseen = reader.read('test_unseen', mirrored_strategy) vocabulary = reader.vocabulary # Build model & optimizer & trainer if mirrored_strategy: with mirrored_strategy.scope(): model = MODELS[hparams.model](hparams, vocabulary) optimizer = tf.keras.optimizers.Adam(learning_rate=hparams.init_lr, clipnorm=hparams.clipnorm) else: model = MODELS[hparams.model](hparams, vocabulary) optimizer = tf.keras.optimizers.Adam(learning_rate=hparams.init_lr, clipnorm=hparams.clipnorm) trainer = Trainer(model, optimizer, mirrored_strategy, hparams.enable_function, WowDatasetReader.remove_pad) # misc (tensorboard, checkpoints) file_writer = tf.summary.create_file_writer(hparams.checkpoint_dir) file_writer.set_as_default() global_step = tf.compat.v1.train.get_or_create_global_step() checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model, optimizer_step=global_step) checkpoint_manager = tf.train.CheckpointManager(checkpoint, directory=hparams.checkpoint_dir, max_to_keep=hparams.max_to_keep) checkpoint_tracker = CheckpointTracker( hparams.checkpoint_dir, max_to_keep=BEST_N_CHECKPOINTS) # Main loop! train_dataset_iter = iter(train_dataset) for epoch in range(hparams.num_epochs): print(hparams.checkpoint_dir) base_description = f"(Train) Epoch {epoch}, GPU {hparams.gpus}" train_tqdm = trange(iters_in_train, ncols=120, desc=base_description) for current_step in train_tqdm: example = next(train_dataset_iter) global_step.assign_add(1) _global_step = int(global_step) # Train output_dict = trainer.train_step(example) # Print model if _global_step == 1: model.print_model() loss_str = str(output_dict['loss'].numpy()) train_tqdm.set_description(f"{base_description}, Loss {loss_str}") with file_writer.as_default(): if _global_step % int(hparams.logging_step) == 0: tf.summary.histogram('train/vocab', output_dict['sample_ids'], step=_global_step) tf.summary.scalar('train/loss', output_dict['loss'], step=_global_step) tf.summary.scalar('train/gen_loss', output_dict['gen_loss'], step=_global_step) tf.summary.scalar('train/knowledge_loss', output_dict['knowledge_loss'], step=_global_step) tf.summary.scalar('train/kl_loss', output_dict['kl_loss'], step=_global_step) # Test if _global_step % int(iters_in_train * hparams.evaluation_epoch) == 0: checkpoint_manager.save(global_step) test_loop_outputs = trainer.test_loop(test_dataset, iters_in_test, epoch, 'seen') if hparams.data_name == 'wizard_of_wikipedia': unseen_loop_outputs = trainer.test_loop(unseen_dataset, iters_in_unseen, epoch, 'unseen') test_summaries, log_dict = run_wow_evaluation( test_loop_outputs, hparams.checkpoint_dir, 'seen') if hparams.data_name == 'wizard_of_wikipedia': unseen_summaries, unseen_log_dict = run_wow_evaluation( unseen_loop_outputs, hparams.checkpoint_dir, 'unseen') # Logging tqdm.write(colorful.bold_green("seen").styled_string) tqdm.write(colorful.bold_red(pformat(log_dict)).styled_string) if hparams.data_name == 'wizard_of_wikipedia': tqdm.write(colorful.bold_green("unseen").styled_string) tqdm.write(colorful.bold_red(pformat(unseen_log_dict)).styled_string) with file_writer.as_default(): for family, test_summary in test_summaries.items(): for key, value in test_summary.items(): tf.summary.scalar(f'{family}/{key}', value, step=_global_step) if hparams.data_name == 'wizard_of_wikipedia': for family, unseen_summary in unseen_summaries.items(): for key, value in unseen_summary.items(): tf.summary.scalar(f'{family}/{key}', value, step=_global_step) if hparams.keep_best_checkpoint: current_score = log_dict["rouge1"] checkpoint_tracker.update(current_score, _global_step)
def main(): # Argument passing/parsing args, model_args = config_utils.initialize_argparser( MODELS, _command_args, custom_argparsers.DialogArgumentParser) hparams, hparams_dict = config_utils.create_or_load_hparams( args, model_args, args.cfg) pprint(hparams_dict) if hparams.test_mode == 'wow': os.makedirs('./tmp', exist_ok=True) if not os.path.exists('tmp/wow_pretrained'): fname = 'wow_pretrained.zip' gd_id = '1lkF1QENr45j0vl-Oja3wEiqkxoNTxkXT' colorlog.info(f"Download pretrained checkpoint {fname}") download_from_google_drive(gd_id, os.path.join('tmp', fname)) unzip('tmp', fname) ckpt_fname = os.path.join('tmp/wow_pretrained', 'ckpt-46070') elif hparams.test_mode == "holle_1": os.makedirs('./tmp', exist_ok=True) if not os.path.exists('tmp/holle_pretrained_1'): fname = 'holle_pretrained_1.zip' gd_id = '1o1-Gv5PScxlSzxW6DyZnSp3gDI5zXOhh' colorlog.info(f"Download pretrained checkpoint {fname}") download_from_google_drive(gd_id, os.path.join('tmp', fname)) unzip('tmp', fname) ckpt_fname = os.path.join('tmp/holle_pretrained_1', 'ckpt-1th-best') elif hparams.test_mode == "holle_2": os.makedirs('./tmp', exist_ok=True) if not os.path.exists('tmp/holle_pretrained_2'): fname = 'holle_pretrained_2.zip' gd_id = '13FkCjuC0aBEenlSf-NAAgOfoWVPhqFSc' colorlog.info(f"Download pretrained checkpoint {fname}") download_from_google_drive(gd_id, os.path.join('tmp', fname)) unzip('tmp', fname) ckpt_fname = os.path.join('tmp/holle_pretrained_2', 'ckpt-1th-best') else: raise ValueError("'wow' and 'holle' is currently supported") # Set environment variables & gpus set_logger() set_gpus(hparams.gpus) set_tcmalloc() gpus = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_visible_devices(gpus, 'GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) # Set random seed tf.random.set_seed(hparams.random_seed) np.random.seed(hparams.random_seed) random.seed(hparams.random_seed) # For multi-gpu if hparams.num_gpus > 1: mirrored_strategy = tf.distribute.MirroredStrategy( ) # NCCL will be used as default else: mirrored_strategy = None # Download BERT pretrained model if not os.path.exists(hparams.bert_dir): os.makedirs(hparams.bert_dir) fname = 'uncased_L-12_H-768_A-12.zip' gd_id = '17rfV9CleFBwwfS7m5Yd72vvxdPLWBHl6' download_from_google_drive(gd_id, os.path.join(hparams.bert_dir, fname)) unzip(hparams.bert_dir, fname) # Make dataset reader os.makedirs(hparams.cache_dir, exist_ok=True) if hparams.data_name == 'wizard_of_wikipedia': reader_cls = WowDatasetReader elif hparams.data_name == 'holle': reader_cls = HolleDatasetReader else: raise ValueError( "data_name must be one of 'wizard_of_wikipedia' and 'holle'") reader = reader_cls( hparams.batch_size, hparams.num_epochs, buffer_size=hparams.buffer_size, bucket_width=hparams.bucket_width, max_length=hparams.max_length, max_episode_length=hparams.max_episode_length, max_knowledge=hparams.max_knowledge, knowledge_truncate=hparams.knowledge_truncate, cache_dir=hparams.cache_dir, bert_dir=hparams.bert_dir, ) train_dataset, iters_in_train = reader.read('train', mirrored_strategy) test_dataset, iters_in_test = reader.read('test', mirrored_strategy) if hparams.data_name == 'wizard_of_wikipedia': unseen_dataset, iters_in_unseen = reader.read('test_unseen', mirrored_strategy) vocabulary = reader.vocabulary # Build model & optimizer & trainer if mirrored_strategy: with mirrored_strategy.scope(): model = MODELS[hparams.model](hparams, vocabulary) optimizer = tf.keras.optimizers.Adam(learning_rate=hparams.init_lr, clipnorm=hparams.clipnorm) else: model = MODELS[hparams.model](hparams, vocabulary) optimizer = tf.keras.optimizers.Adam(learning_rate=hparams.init_lr, clipnorm=hparams.clipnorm) trainer = Trainer(model, optimizer, mirrored_strategy, hparams.enable_function, WowDatasetReader.remove_pad) # Setup checkpoint global_step = tf.compat.v1.train.get_or_create_global_step() checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model, optimizer_step=global_step) # Load train_example = next(iter(train_dataset)) _ = trainer.train_step(train_example) #checkpoint.restore(ckpt_fname).assert_consumed() #checkpoint.restore(ckpt_fname).expect_partial() checkpoint.restore(ckpt_fname) # Test test_loop_outputs = trainer.test_loop(test_dataset, iters_in_test, 0, 'seen') if hparams.data_name == 'wizard_of_wikipedia': unseen_loop_outputs = trainer.test_loop(unseen_dataset, iters_in_unseen, 0, 'unseen') test_summaries, log_dict = run_wow_evaluation(test_loop_outputs, hparams.checkpoint_dir, 'seen') if hparams.data_name == 'wizard_of_wikipedia': unseen_summaries, unseen_log_dict = run_wow_evaluation( unseen_loop_outputs, hparams.checkpoint_dir, 'unseen') # Logging tqdm.write(colorful.bold_green("seen").styled_string) tqdm.write(colorful.bold_red(pformat(log_dict)).styled_string) if hparams.data_name == 'wizard_of_wikipedia': tqdm.write(colorful.bold_green("unseen").styled_string) tqdm.write(colorful.bold_red(pformat(unseen_log_dict)).styled_string)