def main(): args = read_args() logging.basicConfig( level=logging.INFO, format="%(asctime)s : " + "%(module)s (%(lineno)s) - %(levelname)s - %(message)s") network = MLP(784, args.hidden, 10) train, train_outs, test, test_outs = get_datasets() network.train(train, train_outs, args.iterations, args.batch_size, args.learning_rate, args.reg_lambda, args.lr_decay_rate, args.cost) network.evaluate(test, test_outs) if args.plot_weights_fn: plot_weights(network.W1, args.plot_weights_fn) logging.info('Weights plotted to {}'.format(args.plot_weights_fn))
num_workers, batch_size, num_models, devices, lr, percent, n_epoch, dataset_name = args.num_workers, args.batch_size, args.num_models, args.devices, args.lr, args.percent, args.n_epoch, args.dataset_name noise_rate, noise_type = None, None ID = dataset_name input_channel, num_classes, size = get_input_info(dataset_name) ################################################################################# def _get_labels(dataset_obj, idx): return dataset_obj.get_label(idx) ################################################################################## ########################################Data and Loader##################################### train_dataset, val_dataset, test_dataset = get_datasets( dataset_name, noise_rate, noise_type) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=num_workers, drop_last=False, pin_memory=True, sampler=ImbalancedDatasetSampler( train_dataset, callback_get_label=_get_labels)) val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, num_workers=num_workers, drop_last=False, shuffle=False, pin_memory=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
'dropout': adaptor_dropout, 'spatial': True, 'temporal': True } model = models.create_model(datasets, model_config, adaptor_config, device=device) model.load_state_dict(saved['model_state_dict'], strict=False) if frozen_predictor: for param in model.predictor.parameters(): param.requires_grad_(False) datasets = utils.get_datasets(datasets, 9, 1) scaler = utils.ZScoreScaler(datasets['train'].mean, datasets['train'].std) optimizer = optim.Adam([{ 'params': model.adaptor.parameters() }, { 'params': model.predictor.parameters(), 'lr': 1e-5 }], lr=learning_rate) loss = utils.get_loss('MaskedMAELoss') trainer = utils.OursTrainer(model, loss, scaler, device, optimizer, weight_decay, 2, 5) utils.train_model(datasets=datasets, batch_size=64, folder=saved_folder,
coord.request_stop(exc) finally: coord.request_stop() coord.join(threads) return average_error if __name__ == '__main__': # CLI arguments PARSER = argparse.ArgumentParser(description="Evaluate the model") # Required arguments PARSER.add_argument("--model", required=True, choices=utils.get_models()) PARSER.add_argument( "--dataset", required=True, choices=utils.get_datasets()) PARSER.add_argument("--checkpoint_dir", required=True) PARSER.add_argument("--test", action="store_true") PARSER.add_argument("--device", default="/gpu:0") ARGS = PARSER.parse_args() # Load required model and dataset, ovverides default MODEL = getattr( importlib.import_module("models." + ARGS.model), ARGS.model)() DATASET = getattr( importlib.import_module("inputs." + ARGS.dataset), ARGS.dataset)() DATASET.maybe_download_and_extract() print('{}: {} error = {:.3f}'.format( datetime.now(), 'test' if ARGS.test else 'validation',
# When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join(threads) return best_validation_error_value if __name__ == '__main__': # CLI arguments PARSER = argparse.ArgumentParser(description="Train the model") # Required arguments PARSER.add_argument("--model", required=True, choices=utils.get_models()) PARSER.add_argument("--dataset", required=True, choices=utils.get_datasets()) # Restart train or continue PARSER.add_argument("--restart", action='store_true') # Learning rate decay arguments PARSER.add_argument("--lr_decay", action="store_true") PARSER.add_argument("--lr_decay_epochs", type=int, default=25) PARSER.add_argument("--lr_decay_factor", type=float, default=0.1) # L2 regularization arguments PARSER.add_argument("--l2_penalty", type=float, default=0.0) # Optimization arguments PARSER.add_argument("--optimizer", choices=utils.get_optimizers(),
parser.add_argument('--batch_size', type=int, help='batch_size') parser.add_argument('--dropout', type=float, help='dropout value') parser.add_argument('--network', type=str, required=True, help='lstm/gru') parser.add_argument('--dynet-autobatch', type=int, help='') parser.add_argument('--dynet-mem', type=int, help='') parser.add_argument('--include_embeddings', type=int, help='') parser.add_argument( '--features', type=int, help='whether or not to represent input as phonolgocial features') args = parser.parse_args() id = args.running_id model = dy.Model() ablation_mask = [1, 1, 1, 1, 1, 1] # ["rm", "fr", "it", "sp", "pt", "lt"] train, dev, test, test_missing = utils.get_datasets(id, ablation_mask) letters, C2I, I2C = utils.create_voc(id) latin_embeddings = LatinEmbeddings() encoder = Encoder(model, C2I) if not args.features else FeaturesEncoder( model, C2I) encoders = [] for i in range(6): # 6 languages encoders + separator encoder #encoder = Encoder(model, C2I) encoders.append(encoder) encoders.append(Encoder(model, C2I)) attention_recorder = AttentionRecorder() embedding_collector = Collector(encoders, "voc/voc.txt", "embeddings/embeddings", args.features) network = Network(C2I,
num_const += 1 else: # var = tf.Variable(init(shape=[dim])) # embed_list.append(var) embed_list.append(embed_list[2]) num_vars += 1 print num_const, num_vars return tf.stack(embed_list, axis=0) if __name__ == '__main__': assert sys.argv[1] in models.keys() print 'using model', sys.argv[1] print 'loading data' start = time() trainset, dev, test, vocab = utils.get_datasets(batch_size=BATCH_SIZE, num_words=VOCAB_SIZE, seq_len=SEQ_LEN) print 'took', time() - start, 'seconds' start = time() print 'getting embeddings' embeddings = utils.get_embeddings(vocab, './glove.6B/glove.6B.300d.txt') print 'took', time() - start, 'seconds' print 'initializing embeddings' start = time() embeddings = init_embeddings(embeddings, vocab, 300) print 'took', time() - start, 'seconds' print 'begin training' train(vocab, embeddings, trainset, dev, test)
def get_dataloader(dataset_dir: str) -> DataLoader: datasets = get_datasets(dataset_dir) return DataLoader(datasets["test"], batch_size=32, shuffle=False, num_workers=8)
def main(): global args, best_prec1 global param_avg, train_loss, train_err, test_loss, test_err, arr_time args = parser.parse_args() set_seed(args.randomseed) # Check the save_dir exists or not print(args.save_dir) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Define model model = torch.nn.DataParallel(get_model(args)) model.cuda() # Optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] print('from ', args.start_epoch) best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Prepare Dataloader train_loader, val_loader = get_datasets(args) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() if args.half: model.half() criterion.half() if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=args.weight_decay) ################################################################################################## if args.datasets == 'CIFAR10': lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[100, 150], last_epoch=args.start_epoch - 1) elif args.datasets == 'CIFAR100': lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[150], last_epoch=args.start_epoch - 1) if args.arch in ['resnet1202', 'resnet110']: # for resnet1202 original paper uses lr=0.01 for first 400 minibatches for warm-up # then switch back. In this setup it will correspond for first epoch. for param_group in optimizer.param_groups: param_group['lr'] = args.lr * 0.1 if args.evaluate: validate(val_loader, model, criterion) return is_best = 0 save_checkpoint( { 'epoch': 0, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best, filename=os.path.join(args.save_dir, 'checkpoint_refine_' + str(0) + '.th')) print('Start training: ', args.start_epoch, '->', args.epochs) # DLDR sampling torch.save(model.state_dict(), os.path.join(args.save_dir, str(0) + '.pt')) for epoch in range(args.start_epoch, args.epochs): # train for one epoch print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr'])) train(train_loader, model, criterion, optimizer, epoch) lr_scheduler.step() # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if epoch > 0 and epoch % args.save_every == 0 or epoch == args.epochs - 1: save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best, filename=os.path.join( args.save_dir, 'checkpoint_refine_' + str(epoch + 1) + '.th')) save_checkpoint( { 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best, filename=os.path.join(args.save_dir, 'model.th')) # DLDR sampling torch.save(model.state_dict(), os.path.join(args.save_dir, str(epoch + 1) + '.pt')) print('train loss: ', train_loss) print('train err: ', train_err) print('test loss: ', test_loss) print('test err: ', test_err) print('time: ', arr_time)
import numpy as np import matplotlib.pyplot as plt from tqdm import tqdm import os from nn.losses import CrossEntropy from utils import set_seed, get_datasets, get_optimizer, get_model, get_config if __name__ == "__main__": config = get_config() # Setting seed for reproducability set_seed() # Get data train_dataset, val_dataset = get_datasets(config) # MODEL model = get_model(config['model']) # Define loss loss = CrossEntropy() # Define optimizer optimizer = get_optimizer(config['train']['optimizer'], model, loss) # Main loop train_loss_hist, val_loss_hist, val_acc_hist = list(), list(), list() pbar = tqdm(range(config['train']['epochs'])) lr_decay_config = config['train']['lr_decay'] for i in pbar: # TRAINING model.train()
def main(args): train_dataset, test_dataset, nclasses = utils.get_datasets( args, not args.no_normalization) new_test_size = int(0.8 * len(test_dataset)) val_size = len(test_dataset) - new_test_size test_dataset, val_dataset = random_split(test_dataset, [new_test_size, val_size]) teacher_model = torch.load(args.teacher_model_file).to(args.device) teacher_model = teacher_model.eval() teacher_base_metric = evaluate(teacher_model, val_dataset, args) teacher_test_metric = evaluate(teacher_model, test_dataset, args) if not args.test_only: train_logits = get_logits(teacher_model, train_dataset, args) teacher_model = teacher_model.cpu() if args.student_model_file is None: student_model = copy_model( teacher_model, args.device, reinitialize=(not args.retain_teacher_weights)) if args.predictive_pruning: student_model = StudentModelWrapper2(student_model, logger, args) else: student_model = StudentModelWrapper(student_model, logger, args) for param in student_model.parameters(): param.requires_grad = True else: student_model = torch.load(args.student_model_file) del teacher_model student_model.args = args student_model = student_model.to(args.device) student_base_metric = evaluate(student_model, val_dataset, args) if args.test_only: student_test_metric = evaluate(student_model, test_dataset, args) print('teacher_test_metric = %.4f' % (teacher_test_metric)) print('teacher_base_metric = %.4f' % (teacher_base_metric)) # logger.info('base_metric = %.4f' % (teacher_base_metric)) print('student_base_metric = %.4f' % (student_base_metric)) # logger.info('student_base_metric = %.4f' % student_base_metric) return teacher_base_metric, student_base_metric, teacher_test_metric, student_test_metric base_metric = max(args.base_metric, teacher_base_metric) print('teacher_base_metric = %.4f' % (teacher_base_metric)) logger.info('base_metric = %.4f' % (teacher_base_metric)) print('student_base_metric = %.4f' % (student_base_metric)) logger.info('student_base_metric = %.4f' % student_base_metric) print('base_metric = %.4f' % (base_metric)) logger.info('base_metric = %.4f' % base_metric) shrinkable_layers = student_model.get_shrinkable_layers() if args.global_pruning: student_model = global_compression(student_model, train_logits, val_dataset, test_dataset, nclasses, base_metric, shrinkable_layers, args, mLogger=logger) else: not_shrinkables = [] if args.reverse_shrink_order: shrinkable_layers = shrinkable_layers[::-1] if args.random_shrink_order: np.random.shuffle(shrinkable_layers) if args.shrink_all or len(args.shrink_layer_idxs) > 0: old_num_params = num_params = sum( [p.numel() for p in student_model.parameters()]) # rr_iters = args.round_robin_iters if args.round_robin else 1 # for rri in range(rr_iters): rri = 0 shrinkable_layers_ = shrinkable_layers while rri == 0 or old_num_params != num_params: student_model.reset() shrinkable_layers = shrinkable_layers_ if len(args.shrink_layer_idxs) > 0: not_shrinkables = [ i for i in shrinkable_layers if i not in args.shrink_layer_idxs ] shrinkable_layers = args.shrink_layer_idxs else: not_shrinkables = args.exclude_layers[:] if rri == 0 and args.start_layer_idx >= 0 and args.start_layer_idx in shrinkable_layers: if args.reverse_shrink_order: not_shrinkables += [ i for i in shrinkable_layers if i > args.start_layer_idx ] else: not_shrinkables += [ i for i in shrinkable_layers if i < args.start_layer_idx ] shrinkable_layers = shrinkable_layers[ shrinkable_layers.index(args.start_layer_idx):] print(rri, not_shrinkables, shrinkable_layers, args.exclude_layers, args.shrink_layer_idxs) while len(shrinkable_layers) > 0: student_model = iterative_distillation( student_model, shrinkable_layers[0], train_logits, val_dataset, test_dataset, nclasses, base_metric, args, mLogger=logger) if args.train_on_student: train_logits = get_logits(student_model, train_dataset, args) print(student_model) new_shrinkable_layers = student_model.get_shrinkable_layers( not_shrinkables) if args.reverse_shrink_order: new_shrinkable_layers = new_shrinkable_layers[::-1] if args.random_shrink_order: np.random.shuffle(new_shrinkable_layers) if set(shrinkable_layers) == set(new_shrinkable_layers): not_shrinkables.append(shrinkable_layers[0]) shrinkable_layers = [ x for x in new_shrinkable_layers if x not in not_shrinkables ] print(not_shrinkables, shrinkable_layers) if not args.round_robin: break old_num_params = num_params num_params = sum( [p.numel() for p in student_model.parameters()]) num_dense = sum([ sum([p.numel() for p in m.parameters()]) for m in student_model.modules() if isinstance(m, nn.Linear) ]) num_conv = sum([ sum([p.numel() for p in m.parameters()]) for m in student_model.modules() if isinstance(m, nn.Conv2d) ]) print('change in num_params: %d -> %d' % (old_num_params, num_params)) logger.info('num params: %d' % num_params) logger.info('num dense: %d' % num_dense) logger.info('num conv: %d' % num_conv) rri += 1 else: student_model = iterative_distillation( student_model, shrinkable_layers[args.start_layer_idx], train_logits, val_dataset, test_dataset, nclasses, base_metric, args, mLogger=logger) print(student_model) test_metric = evaluate(student_model, test_dataset, args) print('test_metric = %.4f' % (test_metric)) print('teacher_test_metric = %.4f' % (teacher_test_metric)) logger.info('test_metric = %.4f' % (test_metric)) logger.info('teacher_test_metric = %.4f' % (teacher_test_metric)) torch.save(student_model, args.outfile)
minibatch_size) return opt_update(i, grads, opt_state) @jit def update(rng, i, opt_state, batch): params = get_params(opt_state) grads = grad(loss)(params, batch) return opt_update(i, grads, opt_state) if __name__ == '__main__': key = random.PRNGKey(0) # Create dataset X_full = utils.get_datasets(dataset) kfold = model_selection.KFold(pieces, shuffle=True, random_state=0) for fold_iter, (idx_train, idx_test) in enumerate( utils.take(pieces_to_run, kfold.split(X_full))): X, X_test = X_full[idx_train], X_full[idx_test] scaler = preprocessing.StandardScaler() X = scaler.fit_transform(X) X_test = scaler.transform(X_test) delta = 1. / (X.shape[0]**1.1) print('X: {}'.format(X.shape)) print('X test: {}'.format(X_test.shape)) print('Delta: {}'.format(delta))
import utils from names import SplitPartNames, DatasetNames, set_names import trees_algorithms from sklearn import model_selection print('downloading datasets...') datasets = utils.get_datasets() set_names() Xs = [] ys = [] for dataset_index in range(0, len(datasets)): X, y = utils.split_dataset(datasets[dataset_index]) Xs.append(X), ys.append(y) k = 30 cv = model_selection.StratifiedKFold(n_splits=k) id3_measures = [] cart_measures = [] for dataset_index in range(0, len(datasets)): fold = 0 id3_fold_measures = [] cart_fold_measures = [] for train_indexes, test_indexes in cv.split(Xs[dataset_index], ys[dataset_index]): print('processing {} fold of {} algorithm...'.format( fold, utils.get_dataset_name(dataset_index)))
from models import test parser = argparse.ArgumentParser() parser.add_argument('--saving_folder', type=str) parser.add_argument('--confusion_matrix', action='store_true', default=False) parser.add_argument('--read_history', action='store_true', default=False) parser.add_argument('--f1_mean', action='store_true', default=False) args = parser.parse_args() run_info = json.load( open(os.path.join(args.saving_folder, 'run_info.json'), 'r')) args = namedtuple('Struct', run_info.keys())(*run_info.values()) datasets = utils.get_datasets(args.dataset, validation=True, window_size=args.window_size, step=args.window_step, downsample=args.downsample_factor) testing_loader = DataLoader(dataset=datasets['testing_set'], batch_size=args.batch_size, shuffle=True, num_workers=2, drop_last=True, pin_memory=True) random_sample = next(iter(testing_loader))[0] model = resnets.HAR_ResNet1D( input_channels=random_sample.shape[1], kernel_size=args.kernel_size, depth=[int(item) for item in args.architecture_depth.split(',')], dilated=args.dilated,
def home_page(): datasets = utils.get_datasets() return render_template("home.html", dataset_names=datasets)
def run(): model = torch.hub.load('AdeelH/WideResNet-pytorch:torch_hub', 'WideResNet', depth=28, num_classes=NUM_CLASSES, widen_factor=2) model = model.cuda() ema_model = ModelEMA(model, decay=0.999) train_params = {} train_params['batch_size'] = 100 train_params['val_batch_size'] = 256 train_ds, train_subset_ds, val_ds = get_datasets(subset_size=4000) train_dl_l = torch.utils.data.DataLoader( train_subset_ds, batch_size=train_params['batch_size'], pin_memory=True, num_workers=4, shuffle=True, drop_last=False, collate_fn=collate_fn) train_dl_ul = torch.utils.data.DataLoader( train_ds, batch_size=train_params['batch_size'], pin_memory=True, num_workers=4, shuffle=True, drop_last=False, collate_fn=collate_fn) val_dl = torch.utils.data.DataLoader( val_ds, batch_size=train_params['val_batch_size'], pin_memory=True, num_workers=2, drop_last=False, collate_fn=collate_fn) train_params['epochs'] = 300 train_params['learning_rate'] = 3e-4 optimizer = optim.Adam(model.parameters(), lr=train_params['learning_rate'], betas=(0.9, 0.999)) sched = optim.lr_scheduler.CosineAnnealingLR( optimizer, train_params['epochs'], eta_min=train_params['learning_rate'] / 10) train_mixmatch(model, ema_model, train_dl_l, train_dl_ul, val_dl, optimizer, sched, train_params, num_augs=2, T=0.5, α=0.75, w_scale=100, rampup_epochs=int(train_params['epochs'] * .4), rampdown_epochs=int(train_params['epochs'] / 6), start_epoch=0)
def fwd_pass(x): x = conv(unfold_1, x, conv_wts_1, conv_bias_1) x = F.relu(x) x = conv(unfold_2, x, conv_wts_2, conv_bias_2) x = F.relu(x) x = x.view(x.size(0), -1) x = torch.matmul(x, torch.t(fc_1)) + fc_bias_1.unsqueeze(0) x = F.relu(x) x = torch.matmul(x, torch.t(fc_2)) + fc_bias_2.unsqueeze(0) return x ############################################################################## #################################Data fetch#################################### train_dataset, val_dataset, test_dataset = get_datasets(".") train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=8) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=256, shuffle=True, num_workers=8) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=True, num_workers=8) ################################################################################# # optimizer = optim.SGD([conv_wts_1, conv_wts_2, fc_1, fc_2], lr = 0.01, momentum = 0.9) # optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)
def main(args): wall_start = time.time() parameters = get_parameters(args) print("Candidate generator parameters:", parameters) datasets = utils.get_datasets(args.include_aida_train, args.keep_pregenerated_candidates) if args.single_dataset: datasets = [datasets[0]] mentions = utils.get_list_of_mentions(datasets) # NUM_TREADS = multiprocessing.cpu_count() NUM_THREADS = args.num_threads pool = ThreadPool(NUM_THREADS) # Split the data into approximately equal parts and give one block to each thread data_per_thread = split(mentions, NUM_THREADS) if args.keep_pregenerated_candidates: arguments = [{ "id": idx, "data": data_bloc, "args": args, "candidate_generator": Simple_Candidate_Generator(parameters), "pregenereted_cands_data_fetcher": Pregenerated_Candidates_Data_Fetcher(parameters), } for idx, data_bloc in enumerate(data_per_thread)] else: arguments = [{ "id": idx, "data": data_bloc, "args": args, "candidate_generator": Simple_Candidate_Generator(parameters), } for idx, data_bloc in enumerate(data_per_thread)] results = pool.map(run_thread, arguments) # Merge the results processed_mentions = [] for _id, mentions in results: processed_mentions = processed_mentions + mentions has_gold = 0 pool.terminate() pool.join() execution_time = (time.time() - wall_start) / 60 print("The execution took:", execution_time, " minutes") # Evaluate the generation evaluator = Evaluator(processed_mentions) evaluator.candidate_generation( save_gold_pos=True, save_pregenerated_gold_pos=args.keep_pregenerated_candidates) # Dump the data if the dump_mentions flag was set if args.dump_mentions: print("Dumping processed mentions") # Create the directory for the mention dumps if it does not exist dump_folder = args.dump_mentions_folder os.makedirs(dump_folder, exist_ok=True) dump_object = {} dump_object["mentions"] = processed_mentions dump_object["total_per_dataset"] = evaluator.total_per_dataset dump_object["has_gold_per_dataset"] = evaluator.has_gold_per_dataset dump_object["parameters"] = parameters dump_object["args"] = args dump_object["execution_time"] = execution_time pickle.dump( dump_object, open(os.path.join(dump_folder, args.dump_file_id), "wb"), protocol=4, ) # evaluator.candidate_generation(max_rank=100) return evaluator.recall
''' Reinforcement Learning ''' import os import torch import torch.backends.cudnn as cudnn from utils import get_datasets, Mode, Data import torch.utils.data as D import argparse os.environ["CUDA_VISIBLE_DEVICES"] = '2' cudnn.benchmark = True device = 'cuda' if torch.cuda.is_available() else 'cpu' parser = argparse.ArgumentParser( description='Dynamic ResNet Reinforcement Learning') parser.add_argument('--batch_size', type=int, default=128, help='batch size') args = parser.parse_args() trainset, testset = get_datasets(Data.cifar10, Mode.with_policy) trainloader = D.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=4) testloader = D.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=4)
def gmm_client_specific(features, label_dict): runs_eer = [] runs_hter = [] for _ in range(5): train_set, development_set, test_set, train_dev_set = utils.shuffle_split_data( features, label_dict) train_x, train_y, development_x, development_y, test_x, test_y, train_dev_x, train_dev_y = utils.get_datasets( train_set, development_set, test_set, train_dev_set) nb_of_components = 11 all_gmms = build_GMMs(train_set, nb_of_components, label_dict) dist_matrix = compute_dist_matrix(development_x, all_gmms, label_dict) cur_eers, cur_thresholds = compute_eer_client_threshold( dist_matrix, development_y, label_dict) runs_eer.append(np.mean(cur_eers)) print(f"Client thresholds:{np.array(cur_thresholds)}") all_gmms = build_GMMs(train_dev_set, nb_of_components, label_dict) dist_matrix = compute_dist_matrix(test_x, all_gmms, label_dict) client_hters = [] for i in range(len(label_dict)): cur_dm = dist_matrix[:, i] genuine_indexes = (test_y == i) client_threshold = cur_thresholds[i] cur_frr, cur_far = compute_frr_far_client(cur_dm, genuine_indexes, client_threshold) client_hters.append((cur_frr + cur_far) / 2) cur_hter = np.mean(client_hters) runs_hter.append(cur_hter) print(f"EERs:{np.array(runs_eer)}, HTERs:{np.array(runs_hter)}") print( f"Average EER:{np.array(runs_eer).mean():.4f}, std:{np.array(runs_eer).std():.4f}" ) print( f"Average HTER:{np.array(runs_hter).mean():.4f}, std:{np.array(runs_hter).std():.4f}" )
def main(): EPOCHS = 10 tasks_nb = 50 models_nb_per_task = 1 multi_task_dataset = False use_kfac = True accumulate_last_kfac = False ewc = False lmbd = 10**4 seed = 1234 dataset_name = 'pMNIST' save_models = False set_seed(seed) train_datasets, test_datasets = get_datasets( dataset_name=dataset_name, task_number=tasks_nb, batch_size_train=128, batch_size_test=4096, include_prev=multi_task_dataset, seed=seed) all_models = {} models = [Net().cuda() for i in range(models_nb_per_task)] optimizers = [ optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4) for model in models ] kfacs = [] train_criterion = [ create_loss_function(kfacs, model, accumulate_last_kfac, lmbd, use_kfac) for model in models ] test_criterion = torch.nn.CrossEntropyLoss() val_accs = [[0.0] * tasks_nb for _ in range(tasks_nb)] for task_id in range(tasks_nb): task_kfacs = [] for model_id, model in enumerate(models): print('Task {} Model {}:'.format(task_id + 1, model_id + 1)) for epoch in range(1, EPOCHS + 1): train(model, train_datasets[task_id], optimizers[model_id], train_criterion[model_id], epoch, task_id + 1) all_models['{:d}-{:d}'.format(task_id, model_id)] = deepcopy(model) for test_task_id in range(tasks_nb): print('Test model {} on task {}'.format( model_id + 1, test_task_id + 1), flush=True) val_acc = validate(model, test_datasets[test_task_id], test_criterion)[0].avg.item() prev_acc = val_accs[task_id][test_task_id] * model_id val_accs[task_id][test_task_id] = (prev_acc + val_acc) / (model_id + 1) task_kfacs.append(KFAC(model, train_datasets[task_id], ewc)) task_kfacs[-1].update_stats() kfacs.append(task_kfacs) if accumulate_last_kfac and len(kfacs) > 1: for model_kfac_id in range(len(kfacs[-1])): for module_id in range(len(kfacs[-1][model_kfac_id].modules)): kfacs[-1][model_kfac_id].m_aa[module_id] += kfacs[-2][ model_kfac_id].m_aa[module_id] kfacs[-1][model_kfac_id].m_gg[module_id] += kfacs[-2][ model_kfac_id].m_gg[module_id] # kfacs[-1][-1].visualize_attr('images/', task_id, 'gg') # kfacs[-1][-1].visualize_attr('images/', task_id, 'aa') print( '#' * 60, 'Avg acc: {:.2f}'.format( np.sum(val_accs[task_id][:task_id + 1]) / (task_id + 1))) if save_models: for i in range(len(kfacs)): kfac = kfacs[i][-1] with open('kfacs/{:d}_weights.pkl'.format(i), 'wb') as output: pickle.dump(kfac.weights, output, pickle.HIGHEST_PROTOCOL) with open('kfacs/{:d}_maa.pkl'.format(i), 'wb') as output: pickle.dump(kfac.m_aa, output, pickle.HIGHEST_PROTOCOL) with open('kfacs/{:d}_mgg.pkl'.format(i), 'wb') as output: pickle.dump(kfac.m_gg, output, pickle.HIGHEST_PROTOCOL) for model_name, model in all_models.items(): torch.save(model.state_dict(), 'models/{:s}.pt'.format(model_name))
def gmm_global_threshold(features, label_dict): runs_eer = [] runs_hter = [] for experiment_i in range(5): train_set, development_set, test_set, train_dev_set = utils.shuffle_split_data( features, label_dict) train_x, train_y, development_x, development_y, test_x, test_y, train_dev_x, train_dev_y = utils.get_datasets( train_set, development_set, test_set, train_dev_set) nb_of_components = 11 all_gmms = build_GMMs(train_set, nb_of_components, label_dict) dist_matrix = compute_dist_matrix(development_x, all_gmms, label_dict) cur_eer, cur_threshold = compute_eer(dist_matrix, development_y, label_dict) runs_eer.append(cur_eer) if experiment_i == 0: utils.plot_scores(dist_matrix, development_y, "First Section", "e1", label_dict) frr_list, far_list, threshold_list = compute_frr_far_list( dist_matrix, development_y, label_dict) utils.plot_far_frr(frr_list, far_list, threshold_list, "First Section", "e1") print(f"Threshold:{cur_threshold}") all_gmms = build_GMMs(train_dev_set, nb_of_components, label_dict) dist_matrix = compute_dist_matrix(test_x, all_gmms, label_dict) cur_frr, cur_far = compute_frr_far(dist_matrix, test_y, cur_threshold, label_dict) cur_hter = (cur_frr + cur_far) / 2 runs_hter.append(cur_hter) print(f"EERs:{np.array(runs_eer)}, HTERs:{np.array(runs_hter)}") print( f"Average EER:{np.array(runs_eer).mean():.4f}, std:{np.array(runs_eer).std():.4f}" ) print( f"Average HTER:{np.array(runs_hter).mean():.4f}, std:{np.array(runs_hter).std():.4f}" )
device = u.get_backend(args) # initialize logger logger = WandBLogger( args=args, name=args.model, ) # make experiments reproducible if args.seed: u.set_seed(args.seed) # load dataset train_loader, val_loader, (width, height, channels) = u.get_datasets( dataset=args.dataset, batch_size=args.batch_size, test_batch_size=args.test_batch_size, cuda=args.cuda, verbose=args.verbose) encoder_params = dict(encoder=args.encoder, device=device, noise=args.noise, std=1.0, scaling=args.scale, leak=args.decay) decoder_params = dict(decoder=args.decoder, device=device, scaling=args.steps * args.scale) loss_fn = losses.get_loss_function(
def ubm(features, label_dict): runs_eer = [] runs_hter = [] for experiment_i in range(5): train_set, development_set, test_set, train_dev_set = utils.shuffle_split_data( features, label_dict) train_x, train_y, development_x, development_y, test_x, test_y, train_dev_x, train_dev_y = utils.get_datasets( train_set, development_set, test_set, train_dev_set) nb_of_components = 11 nb_of_components_background = 15 all_gmms = build_GMMs(train_set, nb_of_components, label_dict) all_ubms = build_UBMs(train_set, nb_of_components_background, label_dict) dist_matrix = compute_dist_matrix_with_ubm(development_x, all_gmms, all_ubms, label_dict) cur_eers, cur_thresholds = compute_eer_client_threshold( dist_matrix, development_y, label_dict) runs_eer.append(np.mean(cur_eers)) if experiment_i == 0: utils.plot_scores(dist_matrix, development_y, "Second Section", "e2", label_dict) frr_list, far_list, threshold_list = compute_frr_far_list( dist_matrix, development_y, label_dict) utils.plot_far_frr(frr_list, far_list, threshold_list, "Second Section", "e2") print(f"Client thresholds:{np.array(cur_thresholds)}") all_gmms = build_GMMs(train_dev_set, nb_of_components, label_dict) all_ubms = build_UBMs(train_dev_set, nb_of_components_background, label_dict) dist_matrix = compute_dist_matrix_with_ubm(test_x, all_gmms, all_ubms, label_dict) client_hters = [] for i in range(len(label_dict)): cur_dm = dist_matrix[:, i] genuine_indexes = (test_y == i) client_threshold = cur_thresholds[i] cur_frr, cur_far = compute_frr_far_client(cur_dm, genuine_indexes, client_threshold) client_hters.append((cur_frr + cur_far) / 2) cur_hter = np.mean(client_hters) runs_hter.append(cur_hter) print(f"EERs:{np.array(runs_eer)}, HTERs:{np.array(runs_hter)}") print( f"Average EER:{np.array(runs_eer).mean():.4f}, std:{np.array(runs_eer).std():.4f}" ) print( f"Average HTER:{np.array(runs_hter).mean():.4f}, std:{np.array(runs_hter).std():.4f}" )
type=float, metavar='A', help='value of spike variable (default: 0.5') args = parser.parse_args() print('VSC Baseline Experiments\n') args.cuda = not args.no_cuda and torch.cuda.is_available() #Set reproducibility seed torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) #Define device for training device = torch.device('cuda' if args.cuda else 'cpu') print(f'Using {device} device...') #Load datasets train_loader, test_loader, (width, height, channels) = get_datasets( args.dataset, args.batch_size, args.cuda) # Tune the learning rate (All training rates used were between 0.001 and 0.01) vsc = VariationalSparseCoding(args.dataset, width, height, channels, args.hidden_size, args.latent_size, args.lr, args.alpha, device, args.log_interval, args.normalize) vsc.run_training(train_loader, test_loader, args.epochs, args.report_interval, args.sample_size, reload_model=not args.do_not_resume)
import tensorflow as tf from utils import MyModel, get_datasets if __name__ == '__main__': train_ds, test_ds = get_datasets() # Create an instance of the model model = MyModel() loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.Adam() train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy') @tf.function def train_step(images, labels): with tf.GradientTape() as tape: # training=True is only needed if there are layers with different # behavior during training versus inference (e.g. Dropout). predictions = model(images, training=True) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss)
def main(): path = 'orders/' classes_groups, class_map, map_reverse = utils.get_class_maps_from_files( path + 'classgroups1.pickle', path + 'map1.pickle', path + 'revmap1.pickle') print(classes_groups, class_map, map_reverse) net = LwF(0, class_map) net.to(DEVICE) for i in range(int(100 / CLASSES_BATCH)): print('-' * 30) print(f'**** ITERATION {i+1} ****') print('-' * 30) #torch.cuda.empty_cache() print('Loading the Datasets ...') print('-' * 30) train_dataset, val_dataset, test_dataset = utils.get_datasets( classes_groups[i]) print('-' * 30) print('Updating representation ...') print('-' * 30) net.update_representation(dataset=train_dataset, val_dataset=val_dataset, class_map=class_map, map_reverse=map_reverse) ''' print('Reducing exemplar sets ...') print('-'*30) m = int(math.ceil(MEMORY_SIZE/net.n_classes)) net.reduce_exemplars_set(m) print('Constructing exemplar sets ...') print('-'*30) for y in classes_groups[i]: net.construct_exemplars_set(train_dataset.dataset.get_class_imgs(y), m) ''' net.n_known = net.n_classes print('Testing ...') print('-' * 30) print('New classes') net.classify_all(test_dataset, map_reverse) if i > 0: previous_classes = np.array([]) for j in range(i): previous_classes = np.concatenate( (previous_classes, classes_groups[j])) prev_classes_dataset, all_classes_dataset = utils.get_additional_datasets( previous_classes, np.concatenate((previous_classes, classes_groups[i]))) print('Old classes') net.classify_all(prev_classes_dataset, map_reverse) print('All classes') net.classify_all(all_classes_dataset, map_reverse) print('-' * 30)
def main(): global args, best_prec1, Bk, p0, P # Check the save_dir exists or not print(args.save_dir) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Define model model = torch.nn.DataParallel(get_model(args)) model.cuda() # Load sampled model parameters print('params: from', args.params_start, 'to', args.params_end) W = [] for i in range(args.params_start, args.params_end): ############################################################################ #if (i % 2 == 1 or i % 6 == 2 and i < 150): continue if (i % 2 == 1): continue model.load_state_dict( torch.load(os.path.join(args.save_dir, str(i) + '.pt'))) W.append(get_model_param_vec(model)) W = np.array(W) print('W:', W.shape) # Obtain base variables through PCA pca = PCA(n_components=args.n_components) pca.fit_transform(W) P = np.array(pca.components_) print('ratio:', pca.explained_variance_ratio_) print('P:', P.shape) P = torch.from_numpy(P).cuda() # Resume from params_start model.load_state_dict( torch.load(os.path.join(args.save_dir, str(args.params_start) + '.pt'))) # Prepare Dataloader train_loader, val_loader = get_datasets(args) # Define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() if args.half: model.half() criterion.half() cudnn.benchmark = True optimizer = optim.SGD(model.parameters(), lr=1, momentum=0) if args.evaluate: validate(val_loader, model, criterion) return print('Train:', (args.start_epoch, args.epochs)) end = time.time() end1 = end p0 = get_model_param_vec(model) epoch_time = [] for epoch in range(args.start_epoch, args.epochs): # Train for one epoch train(train_loader, model, criterion, optimizer, epoch) epoch_time.append(time.time() - end1) end1 = time.time() # Bk = torch.eye(args.n_components).cuda() # Evaluate on validation set prec1 = validate(val_loader, model, criterion) # Remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) print('total time:', time.time() - end) print('train loss: ', train_loss) print('train acc: ', train_acc) print('test loss: ', test_loss) print('test acc: ', test_acc) print('best_prec1:', best_prec1) print('epoch time:', epoch_time) # torch.save(model.state_dict(), 'PBFGS.pt',_use_new_zipfile_serialization=False) torch.save(model.state_dict(), 'PBFGS.pt')
print(f'Will save to {exp_dir}') if not os.path.exists(exp_dir): os.mkdir(exp_dir) losses_save_path = os.path.join(exp_dir, 'losses.npy') with open(os.path.join(exp_dir, 'config.yml'), 'w') as f: yaml.dump(args.__dict__, f) print('Configuration file written') # ************** CREATE DATASET, MODEL AND OPTIMIZER****************** bpe = yttm.BPE(model=args.bpe_path) TEXT = torchtext.data.Field(tokenize=lambda x: utils.bpe_tokenize(x, bpe), lower=True) train_txt, val_txt, test_txt = utils.get_datasets(args.dataset).splits(TEXT) print('Dataset fetched') TEXT.build_vocab(train_txt) vocab_size = len(TEXT.vocab.stoi) print(f"Unique tokens in vocabulary: {len(TEXT.vocab)}") device = torch.device( f"cuda:{args.gpu_id}" if torch.cuda.is_available() else "cpu") train_data = utils.batchify(train_txt, TEXT, args.batch_size, device) val_data = utils.batchify(val_txt, TEXT, args.batch_size, device) layernorm = not args.nolayernorm model = transformer.LMTransformer(vocab_size, args.dmodel, args.nheads,
default="./logdir", help="where to store Tensorboard summaries", ) parser.add_argument( "--save-dir", type=str, default="./model.ckpt", help="where to store Tensorflow model", ) args = parser.parse_args() tf.set_random_seed(args.random_seed) print("Loading dataset...") data = get_datasets(args.data_path, args.val_split, args.test_split, 32) print("Dataset loaded") print("Building graph...") model = Model( data, tf.train.AdamOptimizer(learning_rate=0.0008408132388618728), 0.003683848079337278, 0.6275728419832726, tf.nn.elu, 1000, 100, 0.10805612575300722, ) print("Graph built")
epochs = args.epochs if not os.path.isdir(data_dir): print('{} is not a valid directory'.format(data_dir)) exit() if not utils.is_valid_architecture(architecture): print('{} is not a valid architecture'.format(architecture)) exit() if not torch.cuda.is_available() and args.gpu: print('WARNING : No Cuda available for training, will use CPU') #Load data trainloader, validloader, testloader = utils.load_data(data_dir) # Get torchvision architecture pre_trained_model = utils.get_torchvision_model(architecture) # Build network in_features = utils.get_input_features(architecture) model = setup_model(pre_trained_model, in_features, hidden_units) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), learning_rate) # Train the network train(model, trainloader, validloader, criterion, optimizer, args.gpu, epochs) # Save the model checkpoint class_to_idx = utils.get_datasets(data_dir)[0].class_to_idx save_path = args.save_dir + '/checkpoint.pth' utils.save_checkpoint(model, optimizer, architecture, hidden_units, learning_rate, epochs, class_to_idx, save_path)
def patch_dataset(cfg: GumiConfig) -> GumiConfig: if cfg.dataset != "dogs-vs-cats": return cfg cfg.dataset = get_datasets(cfg.dataset_dir) return cfg