def main(): args = get_command_line_arguments() dataset_path = Path(args.data) model_path = args.model output_path = Path(args.output) output_path.mkdir(parents=True, exist_ok=True) x_test, y_test = get_data(dataset_path, NORMALIZATION) _, evaluation_mapping, _ = timit.get_phone_mapping() for version in ['mono_directional', 'bidirectional', 'imbalanced']: model = DualStudent(n_classes=get_number_of_classes(), n_hidden_layers=N_HIDDEN_LAYERS, n_units=N_UNITS, padding_value=PADDING_VALUE, version=version) model.build( input_shape=(None, ) + x_test[0].shape) # necessary, otherwise load_weights() fails try: model.load_weights(model_path) print(f'model version: {version}') break except ValueError: print(f'not {version}, retrying...') results = model.test(x_test, y_test, evaluation_mapping=evaluation_mapping) with open(output_path / 'performance.txt', mode='w') as f: for k, v in results.items(): output = f'{k}: {v}' print(output) f.write(output + '\n')
def run_evaluation(args, model, data_loaders, model_description, n_choices, layers_types, downsample_layers): start = time.time() num_samples = utils.get_number_of_samples(args.dataset) all_values = {} device = 'cuda' #setting up random seeds utils.setup_torch(args.seed) #creating model skeleton based on description propagate_weights = [] for layer in model_description: cur_weights = [0 for i in range(n_choices)] cur_weights[layers_types.index(layer)] = 1 propagate_weights.append(cur_weights) model.propagate = propagate_weights #Create the computationally identical model but without multiple choice blocks (just a single path net) #This is needed to correctly measure MACs pruned_model = models.SinglePathSupernet( num_classes=utils.get_number_of_classes(args.dataset), propagate=propagate_weights, put_downsampling=downsample_layers) #.to(device) pruned_model.propagate = propagate_weights inputs = torch.randn((1, 3, 32, 32)) total_ops, total_params = profile(pruned_model, (inputs, ), verbose=True) all_values['MMACs'] = np.round(total_ops / (1000.0**2), 2) all_values['Params'] = int(total_params) del pruned_model del inputs ################################################ criterion = torch.nn.CrossEntropyLoss() #Initialize batch normalization parameters utils.bn_update(device, data_loaders['train_for_bn_recalc'], model) val_res = utils.evaluate(device, data_loaders['val'], model, criterion, num_samples['val']) test_res = utils.evaluate(device, data_loaders['test'], model, criterion, num_samples['test']) all_values['val_loss'] = np.round(val_res['loss'], 3) all_values['val_acc'] = np.round(val_res['accuracy'], 3) all_values['test_loss'] = np.round(test_res['loss'], 3) all_values['test_acc'] = np.round(test_res['accuracy'], 3) print(all_values, 'time taken: %.2f sec.' % (time.time() - start)) utils.save_result(all_values, args.dir, model_description)
def main(): args = get_command_line_arguments() dataset_path = args.data checkpoint_path = args.checkpoint version = args.version model_path = Path(args.output) model_path.parent.mkdir(parents=True, exist_ok=True) model_path = str(model_path) train_set, _ = timit.load_data(dataset_path) model = DualStudent(n_classes=get_number_of_classes(), n_hidden_layers=N_HIDDEN_LAYERS, n_units=N_UNITS, padding_value=PADDING_VALUE, version=version) model.build(input_shape=(None, ) + train_set[0]['features'].shape) checkpoint = tf.train.Checkpoint(model=model) checkpoint.restore(checkpoint_path) model.save_weights(model_path)
def main(): args = get_command_line_arguments() dataset_path = Path(args.data) model_path = Path(args.model) logs_path = model_path / 'logs' checkpoints_path = model_path / 'checkpoints' model_path.mkdir(parents=True, exist_ok=True) checkpoints_path.mkdir(exist_ok=True) logs_path.mkdir(exist_ok=True) model_path = str(model_path / 'model.h5') logs_path = str(logs_path) config = Config( version=VERSION, n_hidden_layers=N_HIDDEN_LAYERS, n_units=N_UNITS, n_epochs=N_EPOCHS, batch_size=BATCH_SIZE, unlabeled_percentage=UNLABELED_PERCENTAGE, optimizer=OPTIMIZER, consistency_loss=CONSISTENCY_LOSS, consistency_scale=CONSISTENCY_SCALE, stabilization_scale=STABILIZATION_SCALE, xi=XI, sigma=SIGMA, schedule=SCHEDULE, schedule_length=SCHEDULE_LENGTH, normalization=NORMALIZATION, seed=SEED ) x_train_labeled, x_train_unlabeled, y_train_labeled, x_val, y_val = get_data( dataset_path=dataset_path, normalization=config.normalization, unlabeled_percentage=config.unlabeled_percentage, seed=config.seed ) _, evaluation_mapping, _ = timit.get_phone_mapping() model = DualStudent( n_classes=get_number_of_classes(), n_hidden_layers=config.n_hidden_layers, n_units=config.n_units, consistency_loss=config.consistency_loss, consistency_scale=config.consistency_scale, stabilization_scale=config.stabilization_scale, xi=config.xi, padding_value=PADDING_VALUE, sigma=config.sigma, schedule=config.schedule, schedule_length=config.schedule_length, version=config.version ) model.compile(optimizer=get_optimizer(config.optimizer)) model.train( x_labeled=x_train_labeled, x_unlabeled=x_train_unlabeled, y_labeled=y_train_labeled, x_val=x_val, y_val=y_val, n_epochs=config.n_epochs, batch_size=config.batch_size, checkpoints_path=checkpoints_path, logs_path=logs_path, evaluation_mapping=evaluation_mapping, initial_epoch=0, seed=config.seed ) model.save_weights(model_path)
def run_possibilities(dataset_path, logs_path, possibilities): x_train_labeled, x_train_unlabeled, y_train_labeled, x_val, y_val = get_data( dataset_path=dataset_path, normalization=NORMALIZATION, unlabeled_percentage=UNLABELED_PERCENTAGE, seed=SEED) _, evaluation_mapping, _ = timit.get_phone_mapping() n_classes = get_number_of_classes() for consistency_loss, schedule, sigma, consistency_scale, stabilization_scale, xi in possibilities: hparams = { 'consistency_loss': consistency_loss, 'schedule': schedule, 'sigma': sigma, 'consistency_scale': consistency_scale, 'stabilization_scale': stabilization_scale, 'xi': xi } for k, v in hparams.items(): print(f'{k}={v}, ', end='') print() config = Config(version='mono_directional', n_hidden_layers=N_HIDDEN_LAYERS, n_units=N_UNITS, n_epochs=N_EPOCHS, batch_size=BATCH_SIZE, unlabeled_percentage=UNLABELED_PERCENTAGE, optimizer=OPTIMIZER, consistency_loss=consistency_loss, consistency_scale=consistency_scale, stabilization_scale=stabilization_scale, xi=xi, sigma=sigma, schedule=schedule, schedule_length=SCHEDULE_LENGTH, normalization=NORMALIZATION, seed=SEED) logs_path_ = logs_path / str(config) if logs_path_.is_dir( ): # skip what already done (e.g. in case of crashes) print('already done, skipping...') continue logs_path_.mkdir(parents=True) logs_path_ = str(logs_path_) model = DualStudent(n_classes=n_classes, n_hidden_layers=config.n_hidden_layers, n_units=config.n_units, consistency_loss=config.consistency_loss, consistency_scale=config.consistency_scale, stabilization_scale=config.stabilization_scale, xi=config.xi, padding_value=PADDING_VALUE, sigma=config.sigma, schedule=config.schedule, schedule_length=config.schedule_length, version=config.version) model.compile(optimizer=get_optimizer(config.optimizer)) model.train(x_labeled=x_train_labeled, x_unlabeled=x_train_unlabeled, y_labeled=y_train_labeled, n_epochs=config.n_epochs, batch_size=config.batch_size, seed=config.seed) results = model.test(x=x_val, y=y_val, batch_size=config.batch_size, evaluation_mapping=evaluation_mapping) with tf.summary.create_file_writer(logs_path_).as_default(): hp.hparams(hparams) for k, v in results.items(): tf.summary.scalar(k, v, step=N_EPOCHS)
def run_evaluations(args): print('Making directory %s' % args.dir) os.makedirs(args.dir, exist_ok=True) os.makedirs(args.dir + '/evaluations', exist_ok=True) #We'll be loading datasets from torchvision print('Loading dataset %s from %s' % (args.dataset, args.data_path)) ds = getattr(torchvision.datasets, args.dataset) path = os.path.join(args.data_path, args.dataset.lower()) if args.dataset == 'CIFAR10': transforms_dict = cifar.get_cifar10_transforms() elif args.dataset == 'CIFAR100': transforms_dict = cifar.get_cifar100_transforms() else: print('Dataset %s not implemented' % args.dataset) exit(1) #Download and plug in necessary transforms train_set = ds(path, train=True, download=True, transform=transforms_dict['train']) val_set = ds(path, train=True, download=True, transform=transforms_dict['test']) test_set = ds(path, train=False, download=True, transform=transforms_dict['test']) np.random.seed(42) #If using for the first time if not os.path.exists(os.path.join(args.dir, 'data_split.pkl')): #train/val stratified split, val size is 10000 stratified_targets_split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42) for train_indices, val_indices in stratified_targets_split.split( train_set.targets, train_set.targets): print("TRAIN:", len(train_indices)) print("VAL:", len(val_indices)) #10000 from train are selected for recalculating batch normalization coefficients during nets evaluations stratified_targets_split = StratifiedShuffleSplit(n_splits=1, test_size=0.25, random_state=42) train_targets = np.array(train_set.targets)[np.array(train_indices)] for _, train_for_bn_indices in stratified_targets_split.split( train_targets, train_targets): train_for_bn_indices = train_indices[train_for_bn_indices] print("TRAIN FOR BN:", len(train_for_bn_indices)) indices = { 'train': train_indices, 'val': val_indices, 'train_for_bn': train_for_bn_indices } pickle.dump(indices, open(os.path.join(args.dir, 'data_split.pkl'), 'wb')) else: indices = pickle.load( open(os.path.join(args.dir, 'data_split.pkl'), 'rb')) train_indices, val_indices, train_for_bn_indices = indices[ 'train'], indices['val'], indices['train_for_bn'] train_targets = np.array(train_set.targets)[train_indices] val_targets = np.array(train_set.targets)[val_indices] train_for_bn_targets = np.array(train_set.targets)[train_for_bn_indices] #Making sure that class proportions in all subsets are equal print('Number of samples per class in train:', Counter(train_targets)) print('Number of samples per class in val:', Counter(val_targets)) print( 'Number of samples per class in samples for batch norm recalculation:', Counter(train_for_bn_targets)) train_sampler = torch.utils.data.SubsetRandomSampler(train_indices) val_sampler = torch.utils.data.SubsetRandomSampler(val_indices) train_to_bn_sampler = torch.utils.data.SubsetRandomSampler( train_for_bn_indices) data_loaders = { 'val': torch.utils.data.DataLoader( val_set, #without augmentations batch_size=args.batch_size, sampler=val_sampler, num_workers=args.num_workers, pin_memory=True), 'test': torch.utils.data.DataLoader( test_set, #without augmentations batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True), 'train_for_bn_recalc': torch.utils.data.DataLoader( val_set, #without augmentations batch_size=args.batch_size, sampler=train_to_bn_sampler, num_workers=args.num_workers, pin_memory=True) } #Train data is needed only for supernet training if args.training: data_loaders['train'] = torch.utils.data.DataLoader( train_set, #with_augmentations batch_size=args.batch_size, sampler=train_sampler, num_workers=args.num_workers, pin_memory=True) if args.training: #supernet creation and training train_supernet.train_oneshot_model(args, data_loaders, N_CELLS, N_CHOICES, put_downsampling=DOWNSAMPLE_LAYERS) else: print('Initializing models...') all_possible_nets = [] for length in range(N_CELLS, N_CELLS + 1): all_possible_nets += list( itertools.product(CELL_TYPES, repeat=length)) all_possible_nets = sorted(all_possible_nets) np.random.seed(42) #this seed is used for ordering all nets only all_possible_nets = np.random.permutation(all_possible_nets) all_possible_nets = all_possible_nets[args.first_net_id:args. last_net_id] #model initialization model_class = models.Supernet model = model_class( num_classes=utils.get_number_of_classes(args.dataset), propagate=[[1 for i in range(N_CHOICES)] for j in range(N_CELLS)], training=False, n_choices=N_CHOICES, put_downsampling=DOWNSAMPLE_LAYERS) #uploading weights from trained supernet (ensemble is for weights obtained with SWA) if not os.path.exists(args.dir + '/supernet_swa.pth'): print('You need to train the supernet first!') exit(1) model.load_state_dict(torch.load(args.dir + '/supernet_swa.pth')) model.cuda() for i, model_description in enumerate(all_possible_nets): if i % 10000 == 0 and i > 0: torch.cuda.empty_cache() print('evaluating genotype #%d:' % i, ''.join(model_description)) #Check if the same model already evaluated if utils.check_model_exist(args.dir, model_description): print('model already evaluated') continue #Translate the genotype to minimal computationally equivalent genotype (omitting identity cells) real_model_description = [] for l in range(N_CELLS): if l in DOWNSAMPLE_LAYERS: real_model_description.append('D') if model_description[l] != 'I': #not indentity real_model_description.append(model_description[l]) print('real model to be computed:', ''.join(real_model_description)) #Check if the identical (computationally) model already evaluated if utils.check_model_exist(args.dir, real_model_description): print('identical model already evaluated') utils.copy_solution(args.dir, model_description, real_model_description) continue try: evaluate_net.run_evaluation(args, model, data_loaders, model_description, N_CHOICES, CELL_TYPES, DOWNSAMPLE_LAYERS) except RuntimeError as e: print(e) continue try: #copy from current genotype to real (after removing identities) genotype to re-use it utils.copy_solution(args.dir, real_model_description, model_description) except Exception as e: print(e)
def train_oneshot_model(args, data_loaders, n_cells, n_choices, put_downsampling=[]): num_samples = utils.get_number_of_samples(args.dataset) device = 'cuda' utils.setup_torch(args.seed) print('Initializing model...') #Create a supernet skeleton (include all cell types for each position) propagate_weights = [[1, 1, 1] for i in range(n_cells)] model_class = getattr(models, 'Supernet') #Create the supernet model and its SWA ensemble version model = model_class(num_classes=utils.get_number_of_classes(args.dataset), propagate=propagate_weights, training=True, n_choices=n_choices, put_downsampling=put_downsampling).to(device) ensemble_model = model_class(num_classes=utils.get_number_of_classes( args.dataset), propagate=propagate_weights, training=True, n_choices=n_choices, put_downsampling=put_downsampling).to(device) #These summaries are for verification purposes only #However, removing them will cause inconsistency in results since random generators are used inside them to propagate summary(model, (3, 32, 32), batch_size=args.batch_size, device='cuda') summary(ensemble_model, (3, 32, 32), batch_size=args.batch_size, device='cuda') criterion = torch.nn.CrossEntropyLoss().to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr_init, momentum=0.9, weight_decay=1e-4) start_epoch = 0 columns = [ 'epoch time', 'overall training time', 'epoch', 'lr', 'train_loss', 'train_acc', 'val_loss', 'val_acc', 'test_loss', 'test_acc' ] lrs = [] n_models = 0 all_values = {} all_values['epoch'] = [] all_values['lr'] = [] all_values['tr_loss'] = [] all_values['tr_acc'] = [] all_values['val_loss'] = [] all_values['val_acc'] = [] all_values['test_loss'] = [] all_values['test_acc'] = [] n_models = 0 print('Start training...') time_start = time.time() for epoch in range(start_epoch, args.epochs): time_ep = time.time() #lr = utils.get_cosine_annealing_lr(epoch, args.lr_init, args.epochs) lr = utils.get_cyclic_lr(epoch, lrs, args.lr_init, args.lr_start_cycle, args.cycle_period) utils.set_learning_rate(optimizer, lr) lrs.append(lr) train_res = utils.train_epoch(device, data_loaders['train'], model, criterion, optimizer, num_samples['train']) values = [epoch + 1, lr, train_res['loss'], train_res['accuracy']] if (epoch + 1) >= args.lr_start_cycle and (epoch + 1) % args.cycle_period == 0: all_values['epoch'].append(epoch + 1) all_values['lr'].append(lr) all_values['tr_loss'].append(train_res['loss']) all_values['tr_acc'].append(train_res['accuracy']) val_res = utils.evaluate(device, data_loaders['val'], model, criterion, num_samples['val']) test_res = utils.evaluate(device, data_loaders['test'], model, criterion, num_samples['test']) all_values['val_loss'].append(val_res['loss']) all_values['val_acc'].append(val_res['accuracy']) all_values['test_loss'].append(test_res['loss']) all_values['test_acc'].append(test_res['accuracy']) values += [ val_res['loss'], val_res['accuracy'], test_res['loss'], test_res['accuracy'] ] utils.moving_average_ensemble(ensemble_model, model, 1.0 / (n_models + 1)) utils.bn_update(device, data_loaders['train'], ensemble_model) n_models += 1 print(all_values) overall_training_time = time.time() - time_start values = [time.time() - time_ep, overall_training_time] + values table = tabulate.tabulate([values], columns, tablefmt='simple', floatfmt='8.4f') print(table) print('Training finished. Saving final nets...') utils.save_result(all_values, args.dir, 'model_supernet') torch.save(model.state_dict(), args.dir + '/supernet.pth') torch.save(ensemble_model.state_dict(), args.dir + '/supernet_swa.pth')