def __init__(self, dataset_file, cv=10, random_seed=const.RANDOM_SEED): super().__init__(dataset_file) if cv <= 0: raise ValueError("num_cv should be larger than 10, but %d" % cv) self.cv = cv fix_random_seed(random_seed) self._split_cross_val_data()
def main(): fix_random_seed(RANDOM_SEED) model = vgg16() model.classifier[6] = nn.Linear(4096, 2) # model = resnet18() # num_filter = model.fc.in_features # model.classifier = nn.Linear(num_filter, 2) # 2 is number of classes model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer_ft = SGD(model.parameters(), lr=0.001, momentum=0.9) exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) x_train, y_train = extract_cnrpark_extra_dataset( TRAIN_CNRPARK_EXTRA_ANNOTATION) x_val, y_val = extract_cnrpark_extra_dataset(VAL_CNRPARK_EXTRA_ANNOTATION) best_model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler, x_train, y_train, x_val, y_val, num_epochs=2, plot_path='results.png')
def main(): fix_random_seed(RANDOM_SEED) models = [('vgg', vgg16()), ('resnet18', resnet18()), ('resnet50', resnet50())] remove_parentheses('/home/m_ulyanov/data/splits/PKLot', ['all.txt', 'train.txt', 'test.txt', 'val.txt']) for model_name, model in models: model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer_ft = SGD(model.parameters(), lr=0.001, momentum=0.9) exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) # train annotation file, val annotation file, full annotation file, base data dir cross_val_sets = [ (TRAIN_CNRPARK_EXTRA_ANNOTATION, VAL_CNRPARK_EXTRA_ANNOTATION, TRAIN_CNRPARK_EXTRA_ANNOTATION, CNRPARK_EXTRA_DATA_DIR), (TRAIN_PKLOT_ANNOTATION, VAL_PKLOT_ANNOTATION, TRAIN_PKLOT_ANNOTATION, PKLOT_DATA_DIR) ] for index, (first_set, second_set) in enumerate(permutations(cross_val_sets)): x_train, y_train = extract_annotation_file(first_set[0], first_set[-1]) x_val, y_val = extract_annotation_file(first_set[1], first_set[-1]) plot_name = f'./logs/{model_name}_{index}.png' logs_path = f'./logs/{model_name}_{index}.txt' trained_model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler, x_train, y_train, x_val, y_val, plot_path=plot_name, save=False, num_epochs=7, log_path=logs_path) x_test, y_test = extract_annotation_file(second_set[2], second_set[-1]) test_log_path = f'./logs/{model_name}_{index}.csv' test_model(trained_model, x_test, y_test, log_path=test_log_path)
def train_gan(logger: Logger, experiment_dir: Path, data_dir: Path, batch_size: int, z_dim: int, g_filters: int, d_filters: int, learning_rate: float, beta_1: float, epochs: int, saved_g: bool = False, saved_d: bool = False, seed: Optional[int] = None, g_extra_layers: int = 0, d_extra_layers: int = 0, scheduler: bool = False) -> None: seed = fix_random_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger.info(f"Train started with seed: {seed}") dataset = HDF5ImageDataset(image_dir=data_dir) desired_minkowski = pickle.load( (data_dir / 'minkowski.pkl').open(mode='rb')) loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, pin_memory=True) iterations = epochs * len(loader) img_size = dataset.shape[-1] num_channels = dataset.shape[0] # networks net_g = Generator(img_size=img_size, z_dim=z_dim, num_channels=num_channels, num_filters=g_filters, num_extra_layers=g_extra_layers).to(device) net_d = Discriminator(img_size=img_size, num_channels=num_channels, num_filters=d_filters, num_extra_layers=d_extra_layers).to(device) summary(net_g, (z_dim, 1, 1, 1)) summary(net_d, (num_channels, img_size, img_size, img_size)) if saved_g: net_g.load_state_dict(torch.load(experiment_dir / G_CHECKPOINT_NAME)) logger.info("Loaded generator checkpoint") if saved_d: net_d.load_state_dict(torch.load(experiment_dir / D_CHECKPOINT_NAME)) logger.info("Loaded discriminator checkpoint") # criterion criterion = nn.BCELoss() optimizer_g = optim.Adam(net_g.parameters(), lr=learning_rate, betas=(beta_1, 0.999)) optimizer_d = optim.Adam(net_d.parameters(), lr=learning_rate, betas=(beta_1, 0.999)) patience = int(3000 / len(loader)) scheduler_g = optim.lr_scheduler.ReduceLROnPlateau(optimizer_g, min_lr=1e-6, verbose=True, patience=patience) scheduler_d = optim.lr_scheduler.ReduceLROnPlateau(optimizer_d, min_lr=1e-6, verbose=True, patience=patience) # labels smoothing real_labels = torch.full((batch_size, ), fill_value=0.9, device=device) fake_labels = torch.zeros((batch_size, ), device=device) fixed_noise = torch.randn(1, z_dim, 1, 1, 1, device=device) def step(engine: Engine, batch: torch.Tensor) -> Dict[str, float]: """ Train step function :param engine: pytorch ignite train engine :param batch: batch to process :return batch metrics """ # get batch of fake images from generator fake_batch = net_g( torch.randn(batch_size, z_dim, 1, 1, 1, device=device)) # 1. Update D network: maximize log(D(x)) + log(1 - D(G(z))) batch = batch.to(device) optimizer_d.zero_grad() # train D with real and fake batches d_out_real = net_d(batch) d_out_fake = net_d(fake_batch.detach()) loss_d_real = criterion(d_out_real, real_labels) loss_d_fake = criterion(d_out_fake, fake_labels) # mean probabilities p_real = d_out_real.mean().item() p_fake = d_out_fake.mean().item() loss_d = (loss_d_real + loss_d_fake) / 2 loss_d.backward() optimizer_d.step() # 2. Update G network: maximize log(D(G(z))) loss_g = None p_gen = None for _ in range(1): fake_batch = net_g( torch.randn(batch_size, z_dim, 1, 1, 1, device=device)) optimizer_g.zero_grad() d_out_fake = net_d(fake_batch) loss_g = criterion(d_out_fake, real_labels) # mean fake generator probability p_gen = d_out_fake.mean().item() loss_g.backward() optimizer_g.step() # minkowski functional measures cube = net_g(fixed_noise).detach().squeeze().cpu() cube = cube.mul(0.5).add(0.5).numpy() cube = postprocess_cube(cube) cube = np.pad(cube, ((1, 1), (1, 1), (1, 1)), mode='constant', constant_values=0) v, s, b, xi = compute_minkowski(cube) return { 'loss_d': loss_d.item(), 'loss_g': loss_g.item(), 'p_real': p_real, 'p_fake': p_fake, 'p_gen': p_gen, 'V': v, 'S': s, 'B': b, 'Xi': xi } # ignite objects trainer = Engine(step) checkpoint_handler = ModelCheckpoint(dirname=str(experiment_dir), filename_prefix=CKPT_PREFIX, save_interval=5, n_saved=50, require_empty=False) # attach running average metrics monitoring_metrics = [ 'loss_d', 'loss_g', 'p_real', 'p_fake', 'p_gen', 'V', 'S', 'B', 'Xi' ] RunningAverage(alpha=ALPHA, output_transform=lambda x: x['loss_d']).attach( trainer, 'loss_d') RunningAverage(alpha=ALPHA, output_transform=lambda x: x['loss_g']).attach( trainer, 'loss_g') RunningAverage(alpha=ALPHA, output_transform=lambda x: x['p_real']).attach( trainer, 'p_real') RunningAverage(alpha=ALPHA, output_transform=lambda x: x['p_fake']).attach( trainer, 'p_fake') RunningAverage(alpha=ALPHA, output_transform=lambda x: x['p_gen']).attach( trainer, 'p_gen') RunningAverage(alpha=ALPHA, output_transform=lambda x: x['V']).attach(trainer, 'V') RunningAverage(alpha=ALPHA, output_transform=lambda x: x['S']).attach(trainer, 'S') RunningAverage(alpha=ALPHA, output_transform=lambda x: x['B']).attach(trainer, 'B') RunningAverage(alpha=ALPHA, output_transform=lambda x: x['Xi']).attach(trainer, 'Xi') # attach progress bar pbar = ProgressBar() pbar.attach(trainer, metric_names=monitoring_metrics) @trainer.on(Events.ITERATION_COMPLETED) def print_logs(engine): if (engine.state.iteration - 1) % PRINT_FREQ == 0: fname = experiment_dir / LOGS_FNAME columns = ['iter'] + list(engine.state.metrics.keys()) values = [str(engine.state.iteration)] + [ str(round(value, 7)) for value in engine.state.metrics.values() ] with fname.open(mode='a') as f: if f.tell() == 0: print('\t'.join(columns), file=f) print('\t'.join(values), file=f) message = f"[{engine.state.epoch}/{epochs}][{engine.state.iteration:04d}/{iterations}]" for name, value in zip(engine.state.metrics.keys(), engine.state.metrics.values()): message += f" | {name}: {value:0.5f}" pbar.log_message(message) trainer.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=checkpoint_handler, to_save={ 'net_g': net_g, 'net_d': net_d }) @trainer.on(Events.EPOCH_COMPLETED) def create_plots(engine): df = pd.read_csv(experiment_dir / LOGS_FNAME, delimiter='\t') fig_1 = plt.figure(figsize=(18, 12)) plt.plot(df['iter'], df['loss_d'], label='loss_d', linestyle='dashed') plt.plot(df['iter'], df['loss_g'], label='loss_g') plt.xlabel('Iteration number') plt.legend() fig_1.savefig(experiment_dir / ('loss_' + PLOT_FNAME)) plt.close(fig_1) fig_2 = plt.figure(figsize=(18, 12)) plt.plot(df['iter'], df['p_real'], label='p_real', linestyle='dashed') plt.plot(df['iter'], df['p_fake'], label='p_fake', linestyle='dashdot') plt.plot(df['iter'], df['p_gen'], label='p_gen') plt.xlabel('Iteration number') plt.legend() fig_2.savefig(experiment_dir / PLOT_FNAME) plt.close(fig_2) desired_v = [desired_minkowski[0]] * len(df['iter']) desired_s = [desired_minkowski[1]] * len(df['iter']) desired_b = [desired_minkowski[2]] * len(df['iter']) desired_xi = [desired_minkowski[3]] * len(df['iter']) fig_3 = plt.figure(figsize=(18, 12)) plt.plot(df['iter'], df['V'], label='V', color='b') plt.plot(df['iter'], desired_v, color='b', linestyle='dashed') plt.xlabel('Iteration number') plt.ylabel('Minkowski functional V') plt.legend() fig_3.savefig(experiment_dir / ('minkowski_V_' + PLOT_FNAME)) plt.close(fig_3) fig_4 = plt.figure(figsize=(18, 12)) plt.plot(df['iter'], df['S'], label='S', color='r') plt.plot(df['iter'], desired_s, color='r', linestyle='dashed') plt.xlabel('Iteration number') plt.ylabel('Minkowski functional S') plt.legend() fig_4.savefig(experiment_dir / ('minkowski_S_' + PLOT_FNAME)) plt.close(fig_4) fig_5 = plt.figure(figsize=(18, 12)) plt.plot(df['iter'], df['B'], label='B', color='g') plt.plot(df['iter'], desired_b, color='g', linestyle='dashed') plt.xlabel('Iteration number') plt.ylabel('Minkowski functional B') plt.legend() fig_5.savefig(experiment_dir / ('minkowski_B_' + PLOT_FNAME)) plt.close(fig_5) fig_6 = plt.figure(figsize=(18, 12)) plt.plot(df['iter'], df['Xi'], label='Xi', color='y') plt.plot(df['iter'], desired_xi, color='y', linestyle='dashed') plt.xlabel('Iteration number') plt.ylabel('Minkowski functional Xi') plt.legend() fig_6.savefig(experiment_dir / ('minkowski_Xi_' + PLOT_FNAME)) plt.close(fig_6) if scheduler: @trainer.on(Events.EPOCH_COMPLETED) def lr_scheduler(engine): desired_b = desired_minkowski[2] desired_xi = desired_minkowski[3] current_b = engine.state.metrics['B'] current_xi = engine.state.metrics['Xi'] delta = abs(desired_b - current_b) + abs(desired_xi - current_xi) scheduler_d.step(delta) scheduler_g.step(delta) @trainer.on(Events.EXCEPTION_RAISED) def handle_exception(engine, e): if isinstance(e, KeyboardInterrupt) and (engine.state.iteration > 1): engine.terminate() warnings.warn('KeyboardInterrupt caught. Exiting gracefully.') create_plots(engine) checkpoint_handler(engine, { 'net_g_exception': net_g, 'net_d_exception': net_d }) else: raise e trainer.run(loader, epochs)
with gzip.open(fp / f'Bandit_{source_number}.pickle', 'rb') as f: data = pickle.load(f) x, y, actions, scores, delta = data # as a sanity check _, ps_acc = ps.evaluate(x, y) print(f'ps acc on the whole dataset: {ps_acc}') predictions_hat = ps.predict(x) scores_hat = predictions_hat[np.arange(actions.size), actions] data_list = [x, y, actions, scores_hat, delta] with gzip.open(fp / f"eBandit_{source_number}.pickle", "wb") as f: pickle.dump(data_list, f, protocol=-1) if __name__ == '__main__': fix_random_seed(0) dataset_fp = Path('./test/') # generate logging policy by running tuning.py # lp = LoggingPolicy(model_path='./models/####.h5') # generate_bandit_dataset(fp=dataset_fp, model=lp.model, source_number=1) p_model = tf.keras.models.load_model('./models/####.h5') generate_estimated_bandit_dataset(fp=dataset_fp, ps=p_model, source_number=1)
def main(): args = get_arguments() # expriment name if not args.exp_name: args.exp_name = '_'.join([args.dataset, args.model]) print("# Experiment: ", args.exp_name) # output folder output_folder = os.path.join(args.output_root, args.dataset, args.exp_name) os.makedirs(output_folder, exist_ok=True) print("# Output path: ", output_folder) # visdom global plotter if args.use_visdom: logging_folder = os.path.join(args.logging_root, args.dataset, args.exp_name) os.makedirs(logging_folder, exist_ok=True) plotter = utils.VisdomLinePlotter(env_name=args.exp_name, logging_path=os.path.join(logging_folder, 'vis.log')) print("# Visdom path: ", logging_folder) # dataset print("# Load datasets") train_datasets, val_datasets, test_datasets = get_datasets(args.dataset, args.dataset_folder, args.batch_size) num_classes = train_datasets[0].num_classes vocab = set(train_datasets[0].vocab) vocab = vocab.union(set(val_datasets[0].vocab)) vocab = vocab.union(set(test_datasets[0].vocab)) # pre-trained word2vec print("# Load pre-trained word2vec") pretrained_word2vec_cache = os.path.join(os.path.dirname(args.w2v_file), args.dataset + '_w2v.pkl') if os.path.isfile(pretrained_word2vec_cache): with open(pretrained_word2vec_cache, 'rb') as f: pretrained_word2vec = pickle.load(f) else: pretrained_word2vec = PretrainedWord2Vec(vocab, args.w2v_file) with open(pretrained_word2vec_cache, 'wb') as f: pickle.dump(pretrained_word2vec, f) # train print("# Start training") for cv, (train_dataset, val_dataset, test_dataset) in enumerate(zip(train_datasets, val_datasets, test_datasets)): # fix random seed utils.fix_random_seed(seed=const.RANDOM_SEED) # model cnn = get_model(args.model, num_classes, pretrained_word2vec) if torch.cuda.is_available(): cnn.cuda() # dataloader train_loader = DataLoader(train_dataset, args.batch_size, shuffle=True, collate_fn=sentence_collate_fn) val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=sentence_collate_fn) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=sentence_collate_fn) # optimizer optim = Adadelta(cnn.parameters(), rho=0.95, eps=1e-6) # criterion criterion = CrossEntropyLoss() # training if plotter: plotter.set_cv(cv) output_path = os.path.join(output_folder, 'cv_%d_best.pkl' % cv) train(args.num_epochs, cnn, train_loader, optim, criterion, val_loader, output_path) # evaluation utils.load_model(output_path, cnn) find_most_similar_words(cnn) accuracy = eval(cnn, test_loader) print('cross_val:', cv, '\taccuracy:', accuracy)
img_size=64, z_dim=args.z_dim, num_channels=1, num_filters=64, num_extra_layers=0 ).to(device) net_g.load_state_dict(torch.load(checkpoint_path)) data = { 'V': [], 'S': [], 'B': [], 'Xi': [] } for seed in tqdm(seeds, desc=f"Generate {size}^3, iteration"): _ = fix_random_seed(seed) noise = torch.randn(1, args.z_dim, args.img_factor, args.img_factor, args.img_factor, device=device) cube = net_g(noise).squeeze().detach().cpu() cube = cube.mul(0.5).add(0.5).numpy() cube = postprocess_cube(cube) cube = np.pad(cube, ((1, 1), (1, 1), (1, 1)), mode='constant', constant_values=0) v, s, b, xi = compute_minkowski(cube) data['V'].append(v) data['S'].append(s) data['B'].append(b) data['Xi'].append(xi) two_point_covariance = {} grain_value = cube.max() for i, direct in enumerate(["x", "y", "z"]): two_point_direct = two_point_correlation(cube, i, var=grain_value)
for fold in range(1, 1 + n_folds): for bin in bins: bin_fold_len = int(math.ceil(len(bin) / n_folds)) bin_fold_indexes = bin[bin_fold_len * (fold - 1):bin_fold_len * fold] folds[bin_fold_indexes] = fold fold_lens = list( map(lambda x: np.where(folds == x + 1)[0].shape[0], range(n_folds))) print(f"* Fold sizes: {fold_lens}") df.insert(df.shape[1], "fold", folds) return df fix_random_seed(c["SEED"]) os.chdir(c["WORK_DIR"]) # read multiple csvs into single dataframe df = read_csvs(args.in_csvs) # add folds df = add_folds(df, args.folds) # add Y column df, classes = add_Y(df, args.labels_mode) # create output directory if "/" in args.out_csv: os.makedirs(os.path.dirname(args.out_csv), exist_ok=True)
parser.add_argument('--img_factor', type=int, default=1, help="Image size factor") parser.add_argument('--cpu', action='store_true', help="Run generation on the CPU") parser.add_argument('--experiment_name', type=str, help='Name of the experiment directory') parser.add_argument('--checkpoint_name', type=str, help='Name of the net checkpoint') args = parser.parse_args() seed = fix_random_seed(args.seed) device = torch.device("cuda") if torch.cuda.is_available( ) and not args.cpu else torch.device("cpu") checkpoint_path = Path( 'experiments') / args.experiment_name / args.checkpoint_name # net_g = torch.load(checkpoint_path).to(device) net_g = Generator(img_size=64, z_dim=args.z_dim, num_channels=1, num_filters=64, num_extra_layers=0).to(device) net_g.load_state_dict(torch.load(checkpoint_path)) noise = torch.randn(1, args.z_dim, args.img_factor, args.img_factor,
from train_eval import train, valid from utils import fix_random_seed, print_cuda_usage from vocab_builder import create_vocab start_time = datetime.now() date_str = start_time.strftime("%A %m/%d/%Y %H:%M:%S") results_str = start_time.strftime("%Y%m%d%H%M") # results folder prefix args = arg_parser() # parse command line arguments CONFIG = build_config(args, results_str) sys.stdout = open(CONFIG["LOG_FILE"], 'w') DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print(f'Start Time: {date_str}') print(f'Setting Random seed: {CONFIG["seed"]}') fix_random_seed(CONFIG) classify = CONFIG['classify'] # if classification/seq2seq print('Building Design Matrix...') signals, labels = build_design_matrices(CONFIG, delimiter=" ", aug_shift_ms=[-1000, -500]) print('Plotting Distribution of Signal Lengths') seq_lengths = [len(signal) for signal in signals] figure5(CONFIG["SAVE_DIR"], seq_lengths, 'all') if CONFIG["classify"] and CONFIG["ngrams"]: labels = ['_'.join(label) for label in labels]
total += labels.size(0) correct += torch.sum(predicted.float() == labels.data) for image_path, correct_label, predicted_label in zip(image_paths, labels.cpu().numpy(), predicted.cpu().numpy()): log_df = log_df.append({ 'image_path': image_path, 'ground_true_label': int(correct_label), 'predicted_label': int(predicted_label) }, ignore_index=True) log_df.to_csv(log_path) accuracy = 100 * correct.double() / total print(f'Accuracy {accuracy}') def main(): # x_test, y_test = extract_cnrpark_extra_dataset(TEST_CNRPARK_EXTRA_ANNOTATION) remove_parentheses('/home/m_ulyanov/data/splits/PKLot', ['all.txt', 'train.txt', 'test.txt', 'val.txt']) x_test, y_test = extract_annotation_file(TRAIN_PKLOT_ANNOTATION, PKLOT_DATA_DIR) print(f'Count of test examples = {len(x_test)}') model = torch.load('./models/last.pth') print('Model is loaded!') model.to(device) test_model(model, x_test, y_test) if __name__ == '__main__': fix_random_seed(RANDOM_SEED) main()