def test(config): if not config.config_path or not config.restore_from: raise AttributeError('You need to specify config_path and restore_from') else: config = load_config(config, config.config_path) set_logger(config) char_vocab = Vocab() char_vocab.load_from(os.path.join(config.vocab_dir, 'char_vocab.data')) label_vocab = Vocab(use_special_token=False) label_vocab.load_from(os.path.join(config.vocab_dir, 'label_vocab.data')) test_set = build_dataset(config, 'test', char_vocab, label_vocab) inputs = build_inputs(test_set.output_types, test_set.output_shapes) model = build_model(config, inputs) eval_metrics, results = model.evaluate(test_set) print('Eval metrics: {}'.format(eval_metrics)) if config.result_name: with open(os.path.join(config.result_dir, os.path.join(config.result_name)) + '.json', 'w') as f: json.dump(eval_metrics, f, indent=4) with open(os.path.join(config.result_dir, os.path.join(config.result_name)) + '.txt', 'w') as f: for result in results: f.write(label_vocab.id2token[result] + '\n')
def main(): torch.manual_seed(1) torch.cuda.manual_seed_all(1) np.random.seed(1) print(args) config = [('conv2d', [args.num_filters, 1, 3, 3, 2, 1]), ('relu', [True]), ('bn', [args.num_filters]), ('conv2d', [args.num_filters, args.num_filters, 3, 3, 2, 1]), ('relu', [True]), ('bn', [args.num_filters]), ('conv2d', [args.num_filters, args.num_filters, 3, 3, 2, 1]), ('relu', [True]), ('bn', [args.num_filters]), ('conv2d', [args.num_filters, args.num_filters, 3, 3, 2, 1]), ('relu', [True]), ('bn', [args.num_filters]), ('flatten', []), ('linear', [args.n_way + 2, args.num_filters * 9])] device = torch.device('cuda') maml = Meta(args, config).to(device) tmp = filter(lambda x: x.requires_grad, maml.parameters()) num = sum(map(lambda x: np.prod(x.shape), tmp)) print(maml) print('Total trainable tensors:', num) # batchsz here means total sampled meta-task number if args.train == 'True': mini_train = LingualData('./data', mode='train', task_type=args.task_type, n_way=args.n_way, k_shot=args.k_spt_train, k_query=args.k_qry_train, k_unk_shot=args.k_spt_unk_train, k_unk_query=args.k_qry_unk_train, k_silence_shot=args.k_spt_silence_train, k_silence_query=args.k_qry_silence_train, batchsz=16000, resize=args.imgsz, unk_sil_spt=args.unk_sil_spt) exp_string = 'cls_' + str(args.n_way) + '.tskn_' + str( args.task_num) + '.spttrain_' + str( args.k_spt_train) + '.qrytrain_' + str( args.k_qry_train) + '.numstep' + str( args.update_step) + '.updatelr' + str(args.update_lr) model_path = args.logdir + '/' + exp_string model_file = None if args.train == 'True': if not os.path.exists(model_path): os.makedirs(model_path) print("logs directory ", args.logdir, " created!") writer = SummaryWriter(model_path) set_logger(os.path.join(args.logdir, 'train.log')) train(maml, mini_train, model_path, args.resume_itr, device, writer) else: if args.test_iter >= 0: model_file = model_path + '/' + 'model-' + str( args.test_iter) + '.pth' test(maml, model_file, device)
def get_dataloaders(args): model_prefix = '{}_{}'.format(args.model_type, args.train_id) log_path = args.LOG_DIR + model_prefix + '/' checkpoint_path = args.CHK_DIR + model_prefix + '/' result_path = args.RESULT_DIR + model_prefix + '/' cp_file = checkpoint_path + "best_model.pth.tar" init_epoch = 0 if not os.path.exists(log_path): os.makedirs(log_path) if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) ## set up the logger set_logger(os.path.join(log_path, 'train.log')) ## save argparse parameters with open(log_path + 'args.yaml', 'w') as f: for k, v in args.__dict__.items(): f.write('{}: {}\n'.format(k, v)) logging.info('Training model: {}'.format(model_prefix)) ## set up vocab txt # create txt here print('running setup') setup(args, clear=True) print(args.__dict__) # indicate src and tgt language if args.source_language == 'en': src, tgt = 'en', 'zh' else: src, tgt = 'zh', 'en' maps = {'en': args.TRAIN_VOCAB_EN, 'zh': args.TRAIN_VOCAB_ZH} vocab_src = read_vocab(maps[src]) tok_src = Tokenizer(language=src, vocab=vocab_src, encoding_length=args.MAX_INPUT_LENGTH) vocab_tgt = read_vocab(maps[tgt]) tok_tgt = Tokenizer(language=tgt, vocab=vocab_tgt, encoding_length=args.MAX_INPUT_LENGTH) logging.info('Vocab size src/tgt:{}/{}'.format(len(vocab_src), len(vocab_tgt))) ## Setup the training, validation, and testing dataloaders train_loader, val_loader, test_loader = create_split_loaders( args.DATA_DIR, (tok_src, tok_tgt), args.batch_size, args.MAX_VID_LENGTH, (src, tgt), num_workers=4, pin_memory=True) logging.info('train/val/test size: {}/{}/{}'.format( len(train_loader), len(val_loader), len(test_loader))) return train_loader, val_loader, test_loader, tok_src, tok_tgt, len( vocab_src), len(vocab_tgt)
def export(directory: str, recursive=True, pattern='*.imdb', output='./kodi.csv', interactive=False, debug=False): """ Exports the IDs from ID or .nfo files. Parameters ---------- directory : str the directory to look for ID/.nfo files recursive : bool, optional whether to locate files recursively, by default True pattern : str, optional the pattern for the ID files (glob), by default "*.imdb" output : str, optional the output CSV file to generate, by default "./kodi.csv" interactive : bool, optional whether to use interactive mode, by default False debug : bool, optional debug message, by default False """ utils.set_logger(debug=debug) export_ids(dir_=directory, idtype='imdb', recursive=recursive, pattern=pattern, output=output, interactive=interactive)
def main(data_dir, model_dir, restore_file): """ Evaluate the model on the test set. """ # Load the parameters json_path = Path(model_dir) / 'hyper_params.json' assert json_path.is_file( ), "No json configuration file found at {}".format(json_path) hyper_params = utils.HyperParams(json_path) # use GPU if available #hyper_params.cuda = torch.cuda.is_available() # use GPU is available hyper_params.cuda = torch.device( 'cuda:0') if torch.cuda.is_available() else -1 # Set the random seed for reproducible experiments torch.manual_seed(230) if hyper_params.cuda is not -1: with torch.cuda.device(str(hyper_params.cuda)[-1]): torch.cuda.manual_seed(230) # Get the logger utils.set_logger(Path(model_dir) / 'evaluate.log') # Create the input data pipeline logging.info("Creating the dataset...") # fetch dataloaders dataloaders = data_loader.fetch_dataloader( ['test'], data_dir + hyper_params.augmentation, hyper_params) test_dl = dataloaders['test'] logging.info("- done.") # Define the model model = getattr(net, hyper_params.model, None) assert model is not None, "Model {} couldn't be found!".format( hyper_params.model) model = model(hyper_params).to( device=hyper_params.cuda) if hyper_params.cuda is not -1 else model( hyper_params) loss_fn = getattr(loss, hyper_params.loss, None) assert loss_fn is not None, "Loss Fn {} couldn't be found!".format( hyper_params.loss) metrics_dict = metric.metrics_dict logging.info("Starting evaluation") # Reload weights from the saved file utils.load_checkpoint(str(Path(model_dir) / (restore_file + '.pth.tar')), model) # Evaluate test_metrics = evaluate(model, loss_fn, test_dl, metrics_dict, model_dir, hyper_params) save_path = str( Path(model_dir) / "metrics_test_{}.json".format(restore_file)) utils.save_dict_to_json(test_metrics, save_path)
def settings(args): if args.save_folder and not os.path.isdir(args.save_folder): os.makedirs(args.save_folder) if args.log_path: set_logger(args.log_path) if not torch.cuda.is_available(): logging.info('no gpu device available') args.cuda = False cudnn.benchmark = True # Set default train and test path if not provided as input. utils.set_dataset_paths(args) args.unfreeze_layers = [ 'layer.0.', 'layer.1.', 'layer.2.', 'layer.3.', 'layer.4.', 'layer.5.', 'layer.6.', 'layer.7.', 'layer.8.', 'layer.9.', 'layer.10.', 'layer.11.', 'pooler' ] args.shared_layers = args.unfreeze_layers # preprocess if args.build_data_seperate: build_data_seperate() if args.mode == 'finetune' and args.build_data_file: build_data_file(args)
def main(): # Set the log file for debuging use utils.set_logger(os.path.join(os.getcwd(), 'train.log')) logging.info('Loading datasets...') data_loader = DataLoader(DATA_PATH) X_train, Y_train, X_val, Y_val = data_loader.get_train_data() X_test, Y_test = data_loader.get_test_data() logging.info('Building the model...') my_model = seq2class() # NEED TO PASS PARAMETERS SHIT print("Here is our model: ") print(my_model.model.summary()) logging.info('Training....') history = my_model.model.fit(X_train, Y_train, epochs=EPOCHS, verbose=1, batch_size=BATCH_SIZE, validation_data=(X_val, Y_val)) logging.info(f"train loss: {history.history['loss']}") logging.info(f"val loss: {history.history['val_loss']}") logging.info(f"train accuracy: {history.history['acc']}") logging.info(f"val accuracy: {history.history['val_acc']}") # Plotting the loss history # plot = utils.Plotting(history) # plot.plot_loss() # plot.plot_accuracy() print('Testing...') loss, accuracy = my_model.model.evaluate(X_test, Y_test) logging.info('Testing loss', loss) logging.info("Test accuracy", accuracy)
def evaluate_from_workspace(workspace_dir): global args, data_loader """ Evaluate the model on the test set. """ data_dir = workspace_dir model_dir = os.path.join(data_dir, "model") # Load the parameters args = parser.parse_args() json_path = os.path.join(model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) params.data_dir = data_dir if data_dir else args.data_dir params.model_dir = model_dir if model_dir else args.model_dir # use GPU if available params.cuda = torch.cuda.is_available() # use GPU is available # Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) # Get the logger utils.set_logger(os.path.join(params.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") # load data data_loader = DataLoader(params.data_dir, params) data = data_loader.load_data_from_dir(['test'], params.data_dir) test_data = data['test'] # specify the test set size params.test_size = test_data['size'] test_data_iterator = data_loader.data_iterator(test_data, params) logging.info("- done.") # Define the model model = net.Net(params).cuda() if params.cuda else net.Net(params) loss_fn = net.loss_fn metrics = net.metrics logging.info("Starting evaluation") # Reload weights from the saved file utils.load_checkpoint( os.path.join(params.model_dir, args.restore_file + '.pth.tar'), model) # Evaluate num_steps = (params.test_size + 1) // params.batch_size test_metrics = evaluate(model, loss_fn, test_data_iterator, metrics, params, num_steps) save_path = os.path.join(params.model_dir, "metrics_test_{}.json".format(args.restore_file)) utils.save_dict_to_json(test_metrics, save_path)
def main(config): set_logger() prepare_dirs(config) """ NOTE : should fix problems when valid mode is on """ # get trainer instance if config.dataset == 'nugu': train, ans2idx, W_e_init, word2idx = \ load_skt_nugu_sample_dataset(config) valid = train elif config.dataset == 'simque': train, valid, W_e_init, word2idx = \ load_simple_questions_dataset(config) ans2idx = None else: raise Exception('Unsupported dataset:', config.dataset) # data, W_e_init, word2idx = 0,0,0 if config.trainer_mode == "G": trainer = GTrainer(config, train, valid, W_e_init, word2idx, ans2idx) elif config.trainer_mode == "D": trainer = DTrainer(config, train, valid, W_e_init, word2idx) else: # config.trainer_mode == "GAN": trainer = GANTrainer(config, train, valid, W_e_init, word2idx, ans2idx) if config.is_train: save_config(config) # save config file(params.json) trainer.train() # Train! else: if not config.load_path: # raise Exception when load_path unknown. raise Exception("[!] You should specify `load_path` to load a " + "pretrained model") if config.interactive: trainer.test_interactive() else: trainer.test()
def runTraining(model_dir, data_dir, restore_file): json_path = os.path.join(model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # use GPU if available params.cuda = torch.cuda.is_available() print(params.cuda) # Set the random seed for reproducible experiments torch.manual_seed(231) if params.cuda: torch.cuda.manual_seed(231) # Addint tensorbard if restore_file == None: writer_train = SummaryWriter("Tensorboard/" + os.path.join(model_dir, "train") + ".SUNet") writer_eval = SummaryWriter("Tensorboard/" + os.path.join(model_dir, "eval") + ".SUNet") writer = {"train": writer_train, "eval": writer_eval} # writer = SummaryWriter() else: writer_train = SummaryWriter(log_dir="Tensorboard/" + os.path.join(restore_file, "train") + ".SUNet") writer_eval = SummaryWriter(log_dir="Tensorboard/" + os.path.join(restore_file, "eval") + ".SUNet") writer = {"train": writer_train, "eval": writer_eval} # Set the logger utils.set_logger(os.path.join(model_dir, 'train.log')) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders dataloaders = data_loader.fetch_dataloader(['train', 'val'], data_dir, params) train_dl = dataloaders['train'] val_dl = dataloaders['val'] logging.info("- done.") # Define the model and optimizer model = net.Net(params).cuda() if params.cuda else net.Net(params) optimizer = optim.Adam(model.parameters(), lr=params.learning_rate) # fetch loss function and metrics loss_fn = net.loss_fn metrics = net.metrics # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(model, train_dl, val_dl, optimizer, loss_fn, metrics, params, model_dir, restore_file, writer)
def main(): parse_args(config) set_logger(config, logger) if config['daemon']: daemon() pid_file = config['pidfile'] mk_pid_file(pid_file) run_server(config)
def main(): """Main function """ # Load the parameters args = args_parser() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # Create summary writer for use with tensorboard writer = SummaryWriter(os.path.join(args.model_dir, 'runs', 'eval')) # use GPU if available params.cuda = torch.cuda.is_available() # use GPU is available # Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) params.device = "cuda:0" else: params.device = "cpu" # Set the logger utils.set_logger(os.path.join(args.model_dir, 'evaluate.log')) logging.info("Loading the dataset...") # fetch dataloaders dataloaders = d_l.get_dataloader(['test'], args.data_dir, params) test_dl = dataloaders['test'] logging.info("- done.") # Define the model model = Net(params) if params.cuda: model = model.to(params.device) writer.add_graph(model, next(iter(test_dl))[0]) criterion = loss_fn metrics = get_metrics() logging.info("Starting evaluation") # Reload weights from the saved file utils.load_checkpoint( os.path.join(args.model_dir, args.restore_file + '.pth.tar'), model) # Evaluate test_metrics = evaluate(model, criterion, test_dl, metrics, params, writer, 0) save_path = os.path.join(args.model_dir, "metrics_test_{}.json".format(args.restore_file)) utils.save_dict_to_json(test_metrics, save_path) writer.close()
def run(): """train the model""" # set the logger utils.set_logger(config.log_dir) logging.info("device: {}".format(config.device)) # 处理数据,分离文本和标签 processor = Processor(config) processor.process() logging.info("--------Process Done!--------") # 分离出验证集 word_train, word_dev, label_train, label_dev = load_dev('train') # build dataset train_dataset = NERDataset(word_train, label_train, config) dev_dataset = NERDataset(word_dev, label_dev, config) logging.info("--------Dataset Build!--------") # get dataset size train_size = len(train_dataset) # build data_loader train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=train_dataset.collate_fn) dev_loader = DataLoader(dev_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=dev_dataset.collate_fn) logging.info("--------Get Dataloader!--------") # Prepare model device = config.device model = BertNER.from_pretrained(config.roberta_model, num_labels=len(config.label2id)) model.to(device) # Prepare optimizer if config.full_fine_tuning: # model.named_parameters(): [bert, classifier, crf] bert_optimizer = list(model.bert.named_parameters()) classifier_optimizer = list(model.classifier.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in bert_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': config.weight_decay}, {'params': [p for n, p in bert_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}, {'params': [p for n, p in classifier_optimizer if not any(nd in n for nd in no_decay)], 'lr': config.learning_rate * 5, 'weight_decay': config.weight_decay}, {'params': [p for n, p in classifier_optimizer if any(nd in n for nd in no_decay)], 'lr': config.learning_rate * 5, 'weight_decay': 0.0}, {'params': model.crf.parameters(), 'lr': config.learning_rate * 5} ] # only fine-tune the head classifier else: param_optimizer = list(model.classifier.named_parameters()) optimizer_grouped_parameters = [{'params': [p for n, p in param_optimizer]}] optimizer = AdamW(optimizer_grouped_parameters, lr=config.learning_rate, correct_bias=False) train_steps_per_epoch = train_size // config.batch_size scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=(config.epoch_num // 10) * train_steps_per_epoch, num_training_steps=config.epoch_num * train_steps_per_epoch) # Train the model logging.info("--------Start Training!--------") train(train_loader, dev_loader, model, optimizer, scheduler, config.model_dir)
def train(self): set_logger(os.path.join(self.log_dir, 'train.log'), terminal=False) epochs = self.hps.num_epochs print_every = self.hps.print_every log_every = self.hps.log_summary_every lr = self.hps.learning_rate loss_avg = RunningAverage() summary_writer = SummaryWriter(log_dir=self.summ_dir) current_best_loss = 1e3 encoder_optimizer = optim.Adam(self.encoder.parameters(), lr=lr) decoder_optimizer = optim.Adam(self.decoder.parameters(), lr=lr) training_pairs = self.dl criterion = nn.NLLLoss(reduce=False) if self.hps.resume: log('- load ckpts...') self.load_state_dict() for epoch in trange(epochs, desc='epochs'): loss_avg.reset() with tqdm(total=len(training_pairs)) as progress_bar: for language_pair, mask_pair in training_pairs: language_pair, mask_pair = language_pair.to( self.device), mask_pair.to(self.device) loss = self.train_single(language_pair, mask_pair, encoder_optimizer, decoder_optimizer, criterion) loss_avg.update(loss.item()) self.global_step += 1 if self.global_step % log_every == 0: summary_writer.add_scalar('loss_value', loss, global_step=self.global_step) if self.global_step % print_every == 0: log('global step: {}, loss average: {:.3f}'.format( self.global_step, loss_avg())) progress_bar.set_postfix(loss_avg=loss_avg()) progress_bar.update() if loss_avg() < current_best_loss: log('new best loss average found, saving modules...') current_best_loss = loss_avg() state = { 'encoder': self.encoder.state_dict(), 'decoder': self.decoder.state_dict(), 'global_step': self.global_step, 'epoch': epoch, 'loss_avg': loss_avg() } torch.save(state, os.path.join(self.ckpt_dir, 'best.pth.tar'))
def main(): """Main function """ # Load the parameters from json file args = args_parser() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # Create summary writer for use with tensorboard writer = SummaryWriter(os.path.join(args.model_dir, 'runs', 'train')) # use GPU if available params.cuda = torch.cuda.is_available() # Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) params.device = "cuda:0" else: params.device = "cpu" # Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders dataloaders = d_l.get_dataloader(['train', 'val'], args.data_dir, params) train_dl = dataloaders['train'] val_dl = dataloaders['val'] logging.info("- done.") # Define the model and optimizer model = Net(params) if params.cuda: model = model.to(params.device) writer.add_graph(model, next(iter(train_dl))[0]) optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate) # fetch loss function and metrics criterion = loss_fn metrics = get_metrics() # Train the model logging.info("Starting training for %d epoch(s)", params.num_epochs) train_and_evaluate(model, train_dl, val_dl, optimizer, criterion, metrics, params, args.model_dir, writer, args.restore_file) writer.close()
def train(data_dir, model_dir, params): set_logger(os.path.join(model_dir, 'train.log')) # find the CSV file(s) filenames, labels = loadCSV(data_dir) train_filenames, eval_filenames, train_labels, eval_labels = train_test_split( filenames, labels, test_size=0.2) training_pipeline(train_filenames, train_labels, eval_filenames, eval_labels, model_dir, params)
def run_sdce(args): config = get_config(f'./configs/y2h_config_cesd.yml') config.log_prefix = f'workspace/ResnetY2HEstimator/mode_{config.mode}_Pn_{config.Pn}/SDCE' config.log_dir = os.path.join(config.log_prefix, args.time) config.ckpt_dir = os.path.join(config.log_dir, 'checkpoints') if not os.path.isdir(config.ckpt_dir): os.makedirs(config.ckpt_dir) set_logger(config) logging.info(config) runner = SDCERunner(config) runner.run()
def run_ema(args): config = get_config(f'./configs/y2h_config_ema.yml') assert config.model == 'ema' config.log_prefix = f'workspace/ResnetY2HEstimator/mode_{config.mode}_Pn_{config.Pn}/EMA' config.log_dir = os.path.join(config.log_prefix, args.time) config.ckpt_dir = os.path.join(config.log_dir, 'checkpoints') if not os.path.isdir(config.ckpt_dir): os.makedirs(config.ckpt_dir) set_logger(config) logging.info(config) runner = EMAY2HRunner(config) runner.run()
def generate(directory: str, recursive=True, pattern='*.imdb', delay=1, dry_run=False, overwrite=False, language="en", fanart=None, fanart_file="folder.jpg", interactive=False, debug=False): """ Traverses the directory Generates the .nfo files. Parameters ---------- directory : str the directory to traverse recursive : bool, optional whether to search recursively, by default True pattern : str, optional the file pattern (glob) to use for identifying the files with the IDs, by default '*.imdb' delay : int, optional the delay in seconds between web queries, by default 1 dry_run : bool, optional whether to perform a 'dry-run', ie generating .nfo content but not saving them (only outputting them to stdout), by default False overwrite : bool, optional whether to overwrite existing .nfo files (ie recreating them), by default False language : str, optional the preferred language for the titles, by default "en" fanart : str, optional how to deal with fanart, by default None fanart_file : str, optional the fanart filename to use (when downloading or re-using existing), by default "folder.jpg" interactive : bool, optional whether to use interactive mode, by default False debug : bool, optional debug message, by default False """ utils.set_logger(debug=debug) generate(dir_=directory, idtype='imdb', recursive=recursive, pattern=pattern, delay=delay, dry_run=dry_run, overwrite=overwrite, language=language, fanart=fanart, fanart_file=fanart_file, interactive=interactive)
def make_vecs(): utils.set_logger('log') log(utils.separator()) data_loader.convert_data_to_vectors(root_path_str='./training_data/expressive_all_tr', text_file_str='tr_data.txt') data_loader.convert_data_to_vectors(root_path_str='./training_data/normal_all_tr', text_file_str='tr_data.txt') data_loader.convert_data_to_vectors(root_path_str='./faceScrub_big_train', text_file_str='tr_data.txt') data_loader.convert_data_to_vectors(root_path_str='./validation_data/expressive_all_val', text_file_str='val_data.txt') data_loader.convert_data_to_vectors(root_path_str='./validation_data/normal_all_val', text_file_str='val_data.txt') data_loader.convert_data_to_vectors(root_path_str='./test_data/expressive_all_test', text_file_str='test_data.txt') data_loader.convert_data_to_vectors(root_path_str='./test_data/normal_all_test', text_file_str='test_data.txt') log(utils.separator())
def run(): set_logger() # load data ent_path = os.path.join(config.data_path, "entities.dict") rel_path = os.path.join(config.data_path, "relations.dict") ent2id = read_elements(ent_path) rel2id = read_elements(rel_path) ent_num = len(ent2id) rel_num = len(rel2id) train_triples = read_triples(os.path.join(config.data_path, "train.txt"), ent2id, rel2id) valid_triples = read_triples(os.path.join(config.data_path, "valid.txt"), ent2id, rel2id) test_triples = read_triples(os.path.join(config.data_path, "test.txt"), ent2id, rel2id) logging.info("#ent_num: %d" % ent_num) logging.info("#rel_num: %d" % rel_num) logging.info("#train triple num: %d" % len(train_triples)) logging.info("#valid triple num: %d" % len(valid_triples)) logging.info("#test triple num: %d" % len(test_triples)) logging.info("#Model: %s" % config.model) # 创建模型 kge_model = TransE(ent_num, rel_num) if config.model == "TransH": kge_model = TransH(ent_num, rel_num) elif config.model == "TransR": kge_model = TransR(ent_num, rel_num) elif config.model == "TransD": kge_model = TransD(ent_num, rel_num) elif config.model == "STransE": kge_model = STransE(ent_num, rel_num) elif config.model == "LineaRE": kge_model = LineaRE(ent_num, rel_num) elif config.model == "DistMult": kge_model = DistMult(ent_num, rel_num) elif config.model == "ComplEx": kge_model = ComplEx(ent_num, rel_num) elif config.model == "RotatE": kge_model = RotatE(ent_num, rel_num) if config.cuda: kge_model = kge_model.cuda() logging.info("Model Parameter Configuration:") for name, param in kge_model.named_parameters(): logging.info("Parameter %s: %s, require_grad = %s" % (name, str(param.size()), str(param.requires_grad))) # 训练 train(model=kge_model, triples=(train_triples, valid_triples, test_triples), ent_num=ent_num)
def run(params,dirs,seed=None,restore_file=None): #set random seed to do reproducible experiments if seed is not None: utils.seed(seed) utils.set_logger(os.path.join(dirs.model_dir, 'train.log')) logger = logging.getLogger('DeepAR.Train') #check cuda is avaliable or not use_cuda=torch.cuda.is_available() # Set random seeds for reproducible experiments if necessary if use_cuda: dirs.device = torch.device('cuda:0') logger.info('Using Cuda...') model = net.Net(params,dirs.device).cuda(dirs.device) else: dirs.device = torch.device('cpu') logger.info('Not using cuda...') model = net.Net(params,dirs.device) logger = logging.getLogger('DeepAR.Data') logger.info('Loading the datasets...') train_set = TrainDataset(dirs.data_dir, dirs.dataset) vali_set = ValiDataset(dirs.data_dir, dirs.dataset) test_set = TestDataset(dirs.data_dir, dirs.dataset) train_loader = DataLoader(train_set,batch_size=params.batch_size,pin_memory=False, num_workers=4) vali_loader = DataLoader(vali_set,batch_size=params.batch_size,pin_memory=False, sampler=RandomSampler(vali_set),num_workers=4) test_loader = DataLoader(test_set,batch_size=params.batch_size,pin_memory=False, sampler=RandomSampler(test_set),num_workers=4) logger.info('Data loading complete.') logger.info('###############################################\n') logger = logging.getLogger('DeepAR.Train') logger.info(f'Model: \n{str(model)}') logger.info('###############################################\n') optimizer = optim.Adam(model.parameters(), lr=params.lr) # fetch loss function loss_fn = net.loss_fn # Train the model logger.info('Starting training for {} epoch(s)'.format(params.num_epochs)) train_and_evaluate(model,train_loader,vali_loader,optimizer,loss_fn,scheduler,params,dirs,restore_file) logger.handlers.clear() logging.shutdown() load_dir = os.path.join(dirs.model_save_dir, 'best.pth.tar') if not os.path.exists(load_dir): return utils.load_checkpoint(load_dir, model) out=evaluate(model, loss_fn, test_loader, params, dirs, istest=True) test_json_path=os.path.join(dirs.model_dir,'test_results.json') utils.save_dict_to_json(out, test_json_path)
def apply_file(self, args): utils._init() logger = log.Log() utils.set_logger(logger) config = utils.ConfFile(args.file) test_mode = config.get_test_mode() if test_mode == 'quorum': test_quorum = control.QuorumAutoTest(config) # test_quorum.ssh_conn_build() test_quorum.test_drbd_quorum() if test_mode == 'drbd_in_used': test_iscsi = control.IscsiTest(config) test_iscsi.test_drbd_in_used()
def run_full(args): config = get_config('./configs/full_config.yml') config.run_mode = args.runner config.Pn = args.Pn config.log_dir = os.path.join('workspace', config.run_mode, f'mode_{config.mode}_Pn_{config.Pn}', args.time) if not os.path.isdir(config.ckpt_dir): os.makedirs(config.ckpt_dir) set_logger(config) logging.info(config) runner = FullRunner(config) runner.run()
def init(): parser = argparse.ArgumentParser(description=globals()['__doc__']) parser.add_argument('--config', type=str, required=True, help='Path to the config file') parser.add_argument('--workspace', type=str, required=True, help='Path to the workspace') parser.add_argument('--mode', type=str, default='train', help='Train, valid or test the model (or others)') args = parser.parse_args() # set config config_ = load_yaml(args.config) config_["device"] = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') config_["workspace_root"] = args.workspace config.set_config(config_) # set writer summary_root = os.path.join(config.workspace_root, "summary") if not os.path.exists(summary_root): os.makedirs(summary_root) writer.set_path(summary_root) # set seed seed = config.get("others", "seed", default=1234) torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) # set logger log_root = os.path.join(config.workspace_root, "logs") if not os.path.exists(log_root): os.makedirs(log_root) log_path = os.path.join(log_root, "{}.log".format(args.mode)) set_logger(log_path) logging.info("running @ {}".format(socket.gethostname())) logging.info(config) return args
def hyperparamSearch(X_train, Y_train, X_dev, Y_dev, X_test, Y_test, lr_rng, num_hid_layers_rng, beta_rng, k_p_rng, reg_type, size_hid_layers_rng, num_sims, num_epochs, minibatch_size, log_dir, parallel=False, cores=1): # compute random values within the ranges for each param of length num_sims num_batches, batch_length, sequence_length, num_features = X_train.shape num_params = 5 np.random.seed(13) # set seed for rand lower_bounds = [lr_rng[0],num_hid_layers_rng[0],size_hid_layers_rng[0],beta_rng[0],k_p_rng[0]] upper_bounds = [lr_rng[1],num_hid_layers_rng[1],size_hid_layers_rng[1],beta_rng[1],k_p_rng[1]] sample_size = [num_sims, num_params] # num_sims x number of params in search samples_params = np.random.uniform(lower_bounds, upper_bounds, sample_size) # modifying the initial random parameters lr_samples = 10**samples_params[:,0] # log scale hl_samples = samples_params[:,1].astype(int) # rounded down to nearest int hu_samples = (samples_params[:,2]*num_features).astype(int) # base of 10 neurons used for each level beta = samples_params[:,3] k_p = samples_params[:,4] # save the data for the ranges used to the main sim file log_name = "Model_"+str(learning_rate)+"_"+str(num_epochs)+"_"+str(num_hid_layers)+"_"+str(size_hid_layers) utils.set_logger(os.path.join(cwd+"/"+log_dir,log_name+'.log')) utils.logging.info("lr_rng = "+str(lr_rng)+" hidden layers rng = "+str(num_hid_layers_rng)+" hidden units rng = "+str(size_hid_layers_rng)+" num sims = %d", num_sims) results = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0] if parallel: # Need cores print("parallelizing the training") # add parallel ability for specified number of cores similar to funct above results = multi_sim(X_train, Y_train, X_dev, Y_dev, X_test, Y_test, lr_samples, beta, k_p, reg_type, num_epochs, hl_samples, hu_samples, minibatch_size, log_dir, False, False, 10, 100, True, cores) print(results) else: for i in range(len(lr_samples)): train_err, Y_train, batch_pred, Y_dev, dev_pred, dev_err, min_dev, min_epoch, test_err = model(X_train, Y_train, X_dev, Y_dev, X_test, Y_test, lr_samples[i], beta[i], k_p[i], reg_type, num_epochs, hl_samples[i], hu_samples[i], minibatch_size, log_dir, False, False) # call model funct temp_results = np.array([lr_samples[i], hl_samples[i], hu_samples[i], beta[i], k_p[i], num_epochs, train_err, dev_err, test_err, min_epoch, min_dev]) #utils.set_logger(os.path.join(cwd+"/"+log_dir,log_dir+'.log')) # reset logger to main log file utils.logging.info("START OF NEW MODEL") utils.logging.info("learning rate = %f, hidden layers = %d, hidden units = %d, beta = %f, keep_prob = %f, epochs = %d, reg_type = %s", lr_samples[i], hl_samples[i], hu_samples[i], beta[i], k_p[i], num_epochs, reg_type) # add other hyperparams utils.logging.info("Train Err = %f, Dev Err = %f, Test Err = %f, Min Dev Err = %f, Min Epoch = %d", train_err, dev_err, test_err, min_dev, min_epoch) # add other hyperparams results = np.vstack((results,temp_results))# get all results in a list # results contain an array of the parameters and then the resulting errors results = results[1:,:] # get rid of placeholder row results= results[results[:,-1].argsort()] # sort by the lowest dev error utils.logging.info("RESULTS") utils.logging.info(str(results)) return results
def hyperparas_search(self): # Set the logger utils.set_logger( os.path.join(self.params.model_dir, 'params_opt_train.log')) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders # train_dl = self.dataset_train # val_dl = self.dataset_eval logging.info("- done.") # Define the model and optimizer # 加载最原始论文提供的ckpt,Load the checkpoint checkpoint = self.params.restore_file # if os.path.exists(checkpoint): # print("{} load !".format(checkpoint)) # self.ckpt = torch.load(checkpoint) # self.model.load_state_dict(self.ckpt['net_dict']) if checkpoint is not None: # TODO 有bug要修 restore_path = checkpoint # checkpoints/ped2/code_length_128.pth.tar print("restore_path: ", restore_path) logging.info("Restoring parameters from {}".format(restore_path)) utils.load_checkpoint(restore_path, self.model) self.model = self.model.to(self.device) optimizer = optim.Adam(self.model.parameters(), lr=self.params.LR) # fetch loss function and metrics self.c = load_init_center_c(self.params.dataset_name, self.params.code_length).to(self.device) self.loss = LSALoss_deepSVDD(lam_rec=self.params.lam_rec, lam_svdd=self.params.lam_svdd, c=self.c, R=self.params.R, nu=self.params.nu, objective=self.params.objective) loss_fn = self.loss metrics = utils.metrics # Train the model logging.info("Starting training for {} epoch(s)".format( self.params.epoch)) # restore_file = self.params.restore_file_path # 默认为None, TODO restore_file = None self.train_and_evaluate(self.model, self.dataset_train, self.dataset_eval, optimizer, loss_fn, metrics, self.params, self.params.model_dir, restore_file)
def set_baseline_dataset(self): utils.set_logger(os.path.join(self.model_dir, 'train.log')) logging.info("Loading the datasets...") if self.params.subset_percent < 1.0: trainloader = datautils.fetch_subset_dataloader( 'train', self.params) else: trainloader = datautils.fetch_dataloader('train', self.params) testloader = datautils.fetch_dataloader('test', self.params) logging.info("- done.") self.trainloader = trainloader self.testloader = testloader
def main(): logging.info("Transformer implementation") parser = argparse.ArgumentParser(description="Transformer CRF implementation") opt = parse_arguments_t(parser) conf = Config(opt) set_seed(opt, conf.seed) # set logger utils.set_logger(os.path.join("log", opt.log_name)) # params for k in opt.__dict__: logging.info(k + ": " + str(opt.__dict__[k])) trains, devs = prepare_data(logging, conf) train_model(config=conf, train_insts=trains, dev_insts=devs)
def run_y2h(args): config = get_config(f'./configs/y2h_config_{args.run_mode}.yml') config.model = args.run_mode if config.model == 'fc': config.log_prefix = f'workspace/ResnetY2HEstimator/mode_{config.mode}_Pn_{config.Pn}/FC' elif config.model == 'cnn': config.log_prefix = f'workspace/ResnetY2HEstimator/mode_{config.mode}_Pn_{config.Pn}/CNN' config.log_dir = os.path.join(config.log_prefix, args.time) config.ckpt_dir = os.path.join(config.log_dir, 'checkpoints') if not os.path.isdir(config.ckpt_dir): os.makedirs(config.ckpt_dir) set_logger(config) logging.info(config) runner = Y2HRunner(config) runner.run()
def runEvaluate(model_dir, data_dir, restore_file): """ Evaluate the model on the test set. """ # Load the parameters json_path = os.path.join(model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # use GPU if available params.cuda = torch.cuda.is_available() # use GPU is available # Set the random seed for reproducible experiments torch.manual_seed(231) if params.cuda: torch.cuda.manual_seed(231) # Get the logger utils.set_logger(os.path.join(model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") # fetch dataloaders dataloaders = data_loader.fetch_dataloader(['test'], data_dir, params) test_dl = dataloaders['test'] logging.info("- done.") # Define the model model = net.Net(params).cuda() if params.cuda else net.Net(params) loss_fn = net.loss_fn metrics = net.metrics logging.info("Starting evaluation") # Reload weights from the saved file utils.load_checkpoint(os.path.join(model_dir, restore_file + '.pth.tar'), model) # Evaluate test_metrics = evaluate(model, loss_fn, test_dl, metrics, params) save_path = os.path.join(model_dir, "metrics_test_{}.json".format(restore_file)) utils.save_dict_to_json(test_metrics, save_path)
# Launch training with this config cmd = "{python} train.py --model_dir={model_dir}".format(python=PYTHON, model_dir=model_dir) print(cmd) check_call(cmd, shell=True) if __name__ == "__main__": # Load the "reference" parameters from parent_dir json file args = parser.parse_args() json_path = os.path.join(args.parent_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # Set the logger utils.set_logger(os.path.join(args.parent_dir, 'search_hyperparameters.log')) ''' Temperature and alpha search for KD on CNN (teacher model picked in params.json) Perform hypersearch (empirical grid): distilling 'temperature', loss weight 'alpha' ''' # hyperparameters for KD alphas = [0.99, 0.95, 0.5, 0.1, 0.05] temperatures = [20., 10., 8., 6., 4.5, 3., 2., 1.5] logging.info("Searching hyperparameters...") logging.info("alphas: {}".format(alphas)) logging.info("temperatures: {}".format(temperatures)) for alpha in alphas:
""" # Load the parameters args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # use GPU if available params.cuda = torch.cuda.is_available() # use GPU is available # Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) # Get the logger utils.set_logger(os.path.join(args.model_dir, 'analysis.log')) # Create the input data pipeline logging.info("Loading the dataset...") # fetch dataloaders # train_dl = data_loader.fetch_dataloader('train', params) # dev_dl = data_loader.fetch_dataloader('dev', params) dataloader = data_loader.fetch_dataloader(args.dataset, params) logging.info("- done.") # Define the model graph model = resnet.ResNet18().cuda() if params.cuda else resnet.ResNet18() # fetch loss function and metrics
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer from sklearn.cross_validation import train_test_split from sklearn import linear_model, datasets from sklearn import metrics from sklearn import preprocessing from sklearn.cross_validation import cross_val_score from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import confusion_matrix from pandas import * from settings import DATA_DIR, LOG_DIR import utils TOTAL_INDEX = 'djia gspc ixic'.split() # dow jones. snp500, nasdaq, vol EXPID = utils.get_expid() utils.set_logger('%s/%s.log' % (LOG_DIR, EXPID), 'DEBUG') # TODO: log configurations (ex: parsing method etc) and/or commit id def openfiles(filename, arg): data = pd.read_csv(filename, sep='\t', header = 0) data = data.where((pd.notnull(data)), '') # Replace np.nan with '' if arg == 100: # X columns = ['id', 'text', 'closePrice', 'week', 'month', 'quater', 'year','djia', 'gspc', 'ixic', 'vix'] data.columns = columns value = pd.DataFrame(data) value.index = data['id'] else: # y columns = TOTAL_INDEX[arg] value = pd.DataFrame(data[TOTAL_INDEX[arg]])
def main(): # argument processing parser = argparse.ArgumentParser(description='Detect your PHP dead code') parser.add_argument('--dir', dest='dir', metavar='dir_path', type=str, nargs=1, default=None, help='Make ZomPHP process that directory') parser.add_argument('--ignore-sub-dirs', dest='ignore_sub_dirs', metavar='dir_path', type=str, nargs='+', default=[], help='A directory path (or list of those) that won\'t ' 'be processed (only makes sense when used with the ' '--dir option)') parser.add_argument('--files', dest='files', metavar='file_path', type=str, nargs='+', default=[], help='A file or list of files (given as absolute paths' ') to have processed by ZomPHP') parser.add_argument('--strict', dest='strict', action='store_const', const=True, default=False, help='If set to true, will' ' guarantee that any function NOT marked is indeed ' 'used, but might also yield more false negatives (this' ' option should only be used if you have files ' 'containing functions with the same name)') parser.add_argument('--path-translation', dest='path_translation', metavar='local_path path_in_db', type=str, nargs='+', default=[], help='A list of couples of paths to ' 'translate (useful if running the code in a different ' 'location than the one running the PHP code)') parser.add_argument('--logging-level', dest='logging_level', metavar='level', type=str, nargs=1, default=None, help='A logging ' 'level to override the one set in the settings file') args = parser.parse_args() # start the logger utils.set_logger() # some sanity checks def check_abs_path(path, option_name): # helper function, checks the paths are absolute, and translates them to real paths if not path: return path if isinstance(path, (tuple, list)): return [check_abs_path(p, option_name) for p in path] if os.path.isabs(path): return os.path.realpath(path) logging.error('The --%s option requires using absolute paths (you entered %s) exiting' % (option_name, path)) sys.exit(1) if bool(args.dir) == bool(args.files): logging.error('You must specify exactly one of the --dir or --files options, exiting') sys.exit(1) args.dir = check_abs_path(args.dir, 'dir') args.files = check_abs_path(args.files, 'files') if args.ignore_sub_dirs: if args.dir: args.ignore_sub_dirs = check_abs_path(args.ignore_sub_dirs, 'ignore-sub-dirs') else: logging.warning('Ignoring the --ignore-sub-dirs option, that option can only be used together with the --dir option') translator = utils.PathTranslator.build_translator(args.path_translation) # down to work! bckend = backend.get_new_backend() if args.dir: bckend.process_directory(args.dir[0], strict=args.strict, ignore_sub_dirs=args.ignore_sub_dirs, translator=translator) else: # then it must be --files for fle in args.files: bckend.process_file(fle, args.strict, translator=translator) logging.info(bckend.stats)
# Load the parameters from json file args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # use GPU if available params.cuda = torch.cuda.is_available() # Set the random seed for reproducible experiments random.seed(230) torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) # Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders, considering full-set vs. sub-set scenarios if params.subset_percent < 1.0: train_dl = data_loader.fetch_subset_dataloader('train', params) else: train_dl = data_loader.fetch_dataloader('train', params) dev_dl = data_loader.fetch_dataloader('dev', params) logging.info("- done.") """Based on the model_version, determine model/optimizer and KD training mode