def main(): args = get_args() print_training_config(args) train_loader, valid_loader, test_loader, class_to_idx = data_utils.get_data_loaders( args.data_dir) model = network_utils.build_network(args.arch, args.hidden_units, args.output_units, args.drop_prob) model.class_to_idx = class_to_idx criterion = network_utils.get_loss_function() optimizer = network_utils.get_optimizer(model, args.learning_rate) train(model, train_loader, valid_loader, criterion, optimizer, args.epochs, 10, args.gpu) network_utils.save_model(model, args.save_dir, args.arch, args.epochs, args.learning_rate, args.hidden_units)
args.nepochs = 5 args.window_size = 10 args.batch_size = 32 num_labels = 4 which_feats = [0, 1, 2, 3, 4, 5, 6, 7] # MAV Features ONLY batch_size = args.batch_size window_size = args.window_size fingers = ALL_FINGERS[:num_labels] train_loader, test_loader, valid_loader = data_utils.get_data_loaders( EMG_data, fingers, num_labels, which_feats, args.window_size, args.batch_size, train_split=0.8, validation_split=0.2, center=False, shuffle=True) _, test_loader_unshuffled, _ = data_utils.get_data_loaders( EMG_data, fingers, num_labels, which_feats, args.window_size, args.batch_size, train_split=0.8, validation_split=0.2, center=False,
def run_experiments(experiments): print("Running {} Experiments..\n".format(len(experiments))) for xp_count, xp in enumerate(experiments): hp = dhp.get_hp(xp.hyperparameters) xp.prepare(hp) print(xp) # Load the Data and split it among the Clients client_loaders, train_loader, test_loader, stats = data_utils.get_data_loaders( hp) # Instantiate Clients and Server with Neural Net net = getattr(neural_nets, hp['net']) clients = [ Client(loader, net().to(device), hp, xp, id_num=i) for i, loader in enumerate(client_loaders) ] server = Server(test_loader, net().to(device), hp, xp, stats) # Print optimizer specs print_model(device=clients[0]) print_optimizer(device=clients[0]) # Start Distributed Training Process print("Start Distributed Training..\n") t1 = time.time() for c_round in range(1, hp['communication_rounds'] + 1): participating_clients = random.sample( clients, int(len(clients) * hp['participation_rate'])) # Clients do for client in participating_clients: client.synchronize_with_server(server) client.compute_weight_update(hp['local_iterations']) client.compress_weight_update_up( compression=hp['compression_up'], accumulate=hp['accumulation_up'], count_bits=hp["count_bits"]) # Server does server.aggregate_weight_updates(participating_clients, aggregation=hp['aggregation']) server.compress_weight_update_down( compression=hp['compression_down'], accumulate=hp['accumulation_down'], count_bits=hp["count_bits"]) # Evaluate if xp.is_log_round(c_round): print("Experiment: {} ({}/{})".format(args.schedule, xp_count + 1, len(experiments))) print("Evaluate...") results_train = server.evaluate(max_samples=5000, loader=train_loader) results_test = server.evaluate(max_samples=10000) # Logging xp.log({ 'communication_round': c_round, 'lr': clients[0].optimizer.__dict__['param_groups'][0]['lr'], 'epoch': clients[0].epoch, 'iteration': c_round * hp['local_iterations'] }) xp.log( { 'client{}_loss'.format(client.id): client.train_loss for client in clients }, printout=False) xp.log({ key + '_train': value for key, value in results_train.items() }) xp.log({ key + '_test': value for key, value in results_test.items() }) if hp["count_bits"]: xp.log( { 'bits_sent_up': sum( participating_clients[0].bits_sent), 'bits_sent_down': sum(server.bits_sent) }, printout=False) xp.log({'time': time.time() - t1}, printout=False) # Save results to Disk if 'log_path' in hp and hp['log_path']: xp.save_to_disc(path=hp['log_path']) # Timing total_time = time.time() - t1 avrg_time_per_c_round = (total_time) / c_round e = int(avrg_time_per_c_round * (hp['communication_rounds'] - c_round)) print( "Remaining Time (approx.):", '{:02d}:{:02d}:{:02d}'.format(e // 3600, (e % 3600 // 60), e % 60), "[{:.2f}%]\n".format(c_round / hp['communication_rounds'] * 100)) # Delete objects to free up GPU memory del server clients.clear() torch.cuda.empty_cache()
all_data = sio.loadmat( '/Users/ScottEnsel/Desktop/Deep Learning/Project/NEW files/Z_run-010_thumb_index_middle.mat', struct_as_record=False, squeeze_me=True) EMG_data = all_data['z'] #load in our data # all_data = sio.loadmat(os.path.join(data_utils.DATA_DIR,data_utils.DATA_SET1), struct_as_record=False, squeeze_me=True) # EMG_data = all_data['z'] train_loader, test_loader, valid_loader = data_utils.get_data_loaders( EMG_data, fingers, num_labels, which_feats, window_size, batch_size, train_split=0.8, validation_split=0.2, center=False) data_gen = inf_generator(train_loader) batches_per_epoch = len(train_loader) dimension = len(which_feats) + ((window_size - 1) * len(fingers)) feature_layers = [ODEBlock(ODEfunc(dimension))] fc_layers = [nn.Linear(dimension, len(fingers))] model = nn.Sequential(*feature_layers, *fc_layers).to(device)
def main(): # from some github repo... torch.multiprocessing.set_sharing_strategy('file_system') args = get_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) np.random.seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") train_loader, valid_loader, test_loader = get_data_loaders( args.dataset, args.batch_size, sub_task=args.sub_task, dim=args.input_dim, train_shuffle=False) train_labels, valid_labels, test_labels = get_labels( [train_loader, valid_loader, test_loader]) if args.dataset in ['sider_split/', 'tox21_split/']: args.dataset = args.dataset[:-1] + '-' + str(args.sub_task) print('batch number: train={}, valid={}, test={}'.format( len(train_loader), len(valid_loader), len(test_loader))) if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') train_pred = np.zeros( (len(train_labels), args.ensemble_n, args.output_dim), dtype=np.float32) valid_pred = np.zeros( (len(valid_labels), args.ensemble_n, args.output_dim), dtype=np.float32) test_pred = np.zeros((len(test_labels), args.ensemble_n, args.output_dim), dtype=np.float32) if args.task == 'classification': offset = np.array([[0., 0.]], dtype=np.float32) else: offset = np.array([[0.]], dtype=np.float32) ckpt_dir = 'checkpoint/' + args.dataset.strip('/') + '/ensemble/' log_dir = 'log/' + args.dataset.strip('/') + '/ensemble/' if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) ckpt_file = ckpt_dir + 'depth{}_backn{}_drop{}_p{}_shrinkage{}_seed{}.ckpt'.format( args.depth, args.back_n, args.drop_type, args.p, args.shrinkage, args.seed) best_file = ckpt_dir + 'depth{}_backn{}_drop{}_p{}_shrinkage{}_seed{}.t7'.format( args.depth, args.back_n, args.drop_type, args.p, args.shrinkage, args.seed) log_file = log_dir + 'depth{}_backn{}_drop{}_p{}_shrinkage{}_seed{}.log'.format( args.depth, args.back_n, args.drop_type, args.p, args.shrinkage, args.seed) for ensemble_idx in range(args.ensemble_n): feat_indices = np.arange(args.input_dim) feat_dim = args.input_dim data_indices = np.arange(len(train_loader.dataset.x)) model = Net(input_dim=feat_dim, output_dim=args.output_dim, hidden_dim=args.hidden_dim, num_layer=args.depth, num_back_layer=args.back_n, dense=True, drop_type=args.drop_type, net_type=args.net_type, approx=args.anneal).to(device) if args.optimizer == 'SGD': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, nesterov=True) elif args.optimizer == 'AMSGrad': optimizer = optim.Adam(model.parameters(), lr=args.lr, amsgrad=True) scheduler = StepLR(optimizer, step_size=args.lr_step_size, gamma=args.gamma) best_score = -1e30 start_epoch = 1 # start from epoch 1 or last checkpoint epoch start = time() if ensemble_idx == 0 and args.task == 'regression': shrinkage = 1 else: shrinkage = args.shrinkage for epoch in range(start_epoch, args.epochs + start_epoch): scheduler.step(epoch) alpha = get_alpha(epoch, args.epochs) train_approximate_loss = train(args, model, device, train_loader, optimizer, epoch, args.anneal, train_pred, offset, feat_indices, data_indices, alpha) if epoch % 30 == 0: print('Train Epoch: {} \tLoss: {:.6f}'.format( epoch, train_approximate_loss), flush=True) train_pred[:, ensemble_idx, :] = shrinkage * predict( args, model, device, train_loader, feat_indices) valid_pred[:, ensemble_idx, :] = shrinkage * predict( args, model, device, valid_loader, feat_indices) test_pred[:, ensemble_idx, :] = shrinkage * predict( args, model, device, test_loader, feat_indices) save_pklgz(ckpt_file, [ train_pred, train_labels, valid_pred, valid_labels, test_pred, test_labels ]) if args.task == 'classification': train_score = get_AUC(train_pred, train_labels) valid_score = get_AUC(valid_pred, valid_labels) test_score = get_AUC(test_pred, test_labels) print( 'Iteration {}, AUC: train = {:.3f}, valid = {:.3f}, test = {:.3f}' .format(ensemble_idx, train_score, valid_score, test_score)) else: train_score = get_RMSE(train_pred, train_labels, offset) valid_score = get_RMSE(valid_pred, valid_labels, offset) test_score = get_RMSE(test_pred, test_labels, offset) print( 'Iteration {}, RMSE: train = {:.3f}, valid = {:.3f}, test = {:.3f}' .format(ensemble_idx, train_score, valid_score, test_score)) with open(log_file, 'a') as fp: fp.write('{}\t{}\t{:.4f}\t{:.4f}\t{:.4f}\n'.format( args.seed, ensemble_idx, train_score, valid_score, test_score)) del model, optimizer, scheduler
# which_feats: list of which features you want to use. [0,1,2,...,7] means use features 0 to 7 only (MAV features) # [0,1,2,...,32] means use features 0 to 32 (All features # window_size: the size of the sliding window. sliding window = 100 means that the model recieves the last 100 time # points when it tries to predict the current timepoint's label # batch_size: the usual meaning of batch size # Center: True if you want to zero center the labels. False otherwise. which_feats = [0, 1, 2, 3, 4, 5, 6, 7] # MAV FEATURES ONLY fingers = [data_utils.THUMB_INDEX ] #,data_utils.INDEX_INDEX,data_utils.MIDDLE_INDEX] num_labels = 1 center = False train_loader, test_loader, train_eval_loader = data_utils.get_data_loaders( data_utils.z_1, fingers, num_labels, which_feats, window_size, batch_size, shuffle=True, center=center) data_gen = inf_generator(train_loader) batches_per_epoch = len(train_loader) LSTM_layer = SingleLSTMResidual(input_size=8 + num_labels * (window_size - 1), seq_len=window_size, hidden_size=hidden_size, dropout=dropout, batch_size=batch_size) model = LSTM_layer.to(device)
def main(): # from some github repo... torch.multiprocessing.set_sharing_strategy('file_system') args = get_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) np.random.seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") train_loader, valid_loader, test_loader = get_data_loaders( args.dataset, args.batch_size, sub_task=args.sub_task, dim=args.input_dim) if args.dataset in ['sider_split/', 'tox21_split/']: args.dataset = args.dataset[:-1] + '-' + str(args.sub_task) print('batch number: train={}, valid={}, test={}'.format( len(train_loader), len(valid_loader), len(test_loader))) model = Net(input_dim=args.input_dim, output_dim=args.output_dim, hidden_dim=args.hidden_dim, num_layer=args.depth, num_back_layer=args.back_n, dense=True, drop_type=args.drop_type, net_type=args.net_type, approx=args.anneal).to(device) if args.optimizer == 'SGD': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, nesterov=True) elif args.optimizer == 'AMSGrad': optimizer = optim.Adam(model.parameters(), lr=args.lr, amsgrad=True) scheduler = StepLR(optimizer, step_size=args.lr_step_size, gamma=args.gamma) best_score = -1e30 start_epoch = 1 # start from epoch 1 or last checkpoint epoch if args.anneal == 'approx': args.net_type = 'approx_' + args.net_type best_model_name = './checkpoint/{}/{}/best_seed{}_depth{}_ckpt.t7'.format( args.dataset.strip('/'), args.net_type, args.seed, args.depth) last_model_name = './checkpoint/{}/{}/last_seed{}_depth{}_ckpt.t7'.format( args.dataset.strip('/'), args.net_type, args.seed, args.depth) best_log_file = 'log/' + args.dataset.strip( '/') + '/{}/depth{}_backn{}_drop{}_p{}_best.log'.format( args.net_type, args.depth, args.back_n, args.drop_type, args.p) last_log_file = 'log/' + args.dataset.strip( '/') + '/{}/depth{}_backn{}_drop{}_p{}_last.log'.format( args.net_type, args.depth, args.back_n, args.drop_type, args.p) model_dir = './checkpoint/{}/{}/'.format(args.dataset.strip('/'), args.net_type) if not os.path.exists(model_dir): os.makedirs(model_dir) log_dir = 'log/' + args.dataset.strip('/') + '/{}/'.format(args.net_type) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') for epoch in range(start_epoch, args.epochs + start_epoch): scheduler.step(epoch) alpha = get_alpha(epoch, args.epochs) train_approximate_loss = train(args, model, device, train_loader, optimizer, epoch, args.anneal, alpha) # used for plotting learning curves train_loss, train_score = test(args, model, device, train_loader, 'train') valid_loss, valid_score = test(args, model, device, valid_loader, 'valid') test_loss, test_score = test(args, model, device, test_loader, 'test') # early stopping version if valid_score > best_score: state = {'model': model.state_dict()} torch.save(state, best_model_name) best_score = valid_score # "convergent" version state = {'model': model.state_dict()} torch.save(state, last_model_name) print('Training finished. Loading models from validation...') for model_name, log_file, setting in zip( [best_model_name, last_model_name], [best_log_file, last_log_file], ['best', 'last']): print('\nLoading the {} model...'.format(setting)) checkpoint = torch.load(model_name) model.load_state_dict(checkpoint['model']) train_loss, train_score = test(args, model, device, train_loader, 'train') valid_loss, valid_score = test(args, model, device, valid_loader, 'valid') test_loss, test_score = test(args, model, device, test_loader, 'test ') with open(log_file, 'a') as fp: if args.task == 'classification': log_str = '{}\t{:.4f}\t{:.4f}\t{:.4f}'.format( args.seed, train_score, valid_score, test_score) elif args.task == 'regression': log_str = '{}\t{:.4f}\t{:.4f}\t{:.4f}'.format( args.seed, -train_score, -valid_score, -test_score) fp.write(log_str + '\n')