def main(address): directories = glob.glob(address) for directory in directories: if(files_exits(directory)): print(directory) args = utils.load_model_config(directory) lc_path, log_path, experiment = parse_args(directory) x = np.load(lc_path)[()] experiment = generate_experiment_string(args) epoch_times = extract_epoch_time(log_path) train_ppls, val_ppls, train_losses, val_losses = [x['train_ppls'], x['val_ppls'], x['train_losses'], x['val_losses'] ] plots(train_losses, val_losses, train_ppls, val_ppls, epoch_times, experiment, directory)
def main(): """ Main predict function for the wikikg90m """ args = ArgParser().parse_args() config = load_model_config( os.path.join(args.model_path, 'model_config.json')) args = use_config_replace_args(args, config) dataset = get_dataset(args, args.data_path, args.dataset, args.format, args.delimiter, args.data_files, args.has_edge_importance) print("Load the dataset done.") eval_dataset = EvalDataset(dataset, args) model = BaseKEModel( args=args, n_entities=dataset.n_entities, n_relations=dataset.n_relations, model_name=args.model_name, hidden_size=args.hidden_dim, entity_feat_dim=dataset.entity_feat.shape[1], relation_feat_dim=dataset.relation_feat.shape[1], gamma=args.gamma, double_entity_emb=args.double_ent, cpu_emb=args.cpu_emb, relation_times=args.ote_size, scale_type=args.scale_type) print("Create the model done.") model.entity_feat = dataset.entity_feat model.relation_feat = dataset.relation_feat load_model_from_checkpoint(model, args.model_path) print("The model load the checkpoint done.") if args.infer_valid: valid_sampler_tail = eval_dataset.create_sampler( 'valid', args.batch_size_eval, mode='tail', num_workers=args.num_workers, rank=0, ranks=1) infer(args, model, config, 0, [valid_sampler_tail], "valid") if args.infer_test: test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, mode='tail', num_workers=args.num_workers, rank=i, ranks=args.num_proc) infer(args, model, config, 0, [test_sampler_tail], "test")
def summarize_models(address): directories = glob.glob(address) train_ppls_list, val_ppls_list, train_losses_list, val_losses_list, experiment_list, directory_list, epoch_times_list = [], [], [], [], [], [], [] for directory in directories: if(files_exits(directory)): print(directory) args = utils.load_model_config(directory) lc_path, log_path, experiment = parse_args(directory) x = np.load(lc_path)[()] experiment = generate_experiment_string(args) epoch_times = extract_epoch_time(log_path) train_ppls, val_ppls, train_losses, val_losses = [x['train_ppls'], x['val_ppls'], x['train_losses'], x['val_losses'] ] train_ppls_list.append(train_ppls) val_ppls_list.append(val_ppls) train_losses_list.append(train_losses) val_losses_list.append(val_losses) experiment_list.append(experiment) directory_list.append(directory) epoch_times_list.append(epoch_times) summarize_plots(train_losses_list, val_losses_list, train_ppls_list, val_ppls_list, epoch_times_list, experiment_list, directory_list)
parser = argparse.ArgumentParser( description='PyTorch Penn Treebank Language Modeling') parser.add_argument('--saved_models_dir', type=str, help='Directory with saved models \ (best_params.pt and exp_config.txt must be present there). \ All its\' individual subdirectories will be iterated') saved_model_dir = parser.parse_args().saved_models_dir plt.figure() #(figsize=(12, 12)) dirs = [ x[0] for x in os.walk(saved_model_dir) if x[0] != saved_model_dir and 'TRANSFORMER' not in x[0] ] for dir_name in dirs: args = utils.load_model_config(dir_name) x_raw = np.load(os.path.join(dir_name, 'timestep_grads.npy')) x = minmax_scale(x_raw) plt.plot(x, marker='o', label=args['model']) plt.title('{}'.format('Final time-step loss gradient wrt hidden states')) plt.xlabel("Hidden state (concatenated)") plt.ylabel("Rescaled gradient norm") plt.grid() plt.legend() for dir in dirs: plt.savefig('{}/timestamp_grads_corrected.png'.format(dir)) plt.show()
training_data.iloc[::downsample_full_train].loc[:, feature_cols], training_data.iloc[::downsample_full_train][target]) save_model(model, model_name) gc.collect() model_config["feature_cols"] = feature_cols model_config["targets"] = targets model_config["best_pred_col"] = best_pred_col model_config["riskiest_features"] = riskiest_features print(f"saving model config for {model_config_name}") save_model_config(model_config, model_config_name) else: # load model config from previous model selection loop print(f"loading model config for {model_config_name}") model_config = load_model_config(model_config_name) feature_cols = model_config["feature_cols"] targets = model_config["targets"] best_pred_col = model_config["best_pred_col"] riskiest_features = model_config["riskiest_features"] """ Things that we always do even if we've already trained """ gc.collect() print("reading tournament_data") live_data = pd.read_parquet('v4/live.parquet') print("reading validation_data") validation_data = pd.read_parquet('v4/validation.parquet') print("reading example_predictions") example_preds = pd.read_parquet('v4/live_example_preds.parquet') print("reading example_validaton_predictions") validation_example_preds = pd.read_parquet(
type=str, help='Directory with saved models \ (best_params.pt and exp_config.txt must be present there). \ All its\' individual subdirectories will be iterated') parser.add_argument('--generated_seq_len', type=int, default=35, help='length of generated sequences') args = parser.parse_args() output_dir = parser.parse_args().saved_models_dir seq_len = args.generated_seq_len # load model configuration args = load_model_config(output_dir) # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) # LOAD DATA print('Loading data from ' + args.data) raw_data = ptb_raw_data(data_path=args.data) train_data, valid_data, test_data, word_to_id, id_2_word = raw_data vocab_size = len(word_to_id) print(' vocabulary size: {}'.format(vocab_size)) if args.model == 'RNN': model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len,