示例#1
0
def main():
    args = get_args()
    print_training_config(args)
    train_loader, valid_loader, test_loader, class_to_idx = data_utils.get_data_loaders(
        args.data_dir)
    model = network_utils.build_network(args.arch, args.hidden_units,
                                        args.output_units, args.drop_prob)
    model.class_to_idx = class_to_idx
    criterion = network_utils.get_loss_function()
    optimizer = network_utils.get_optimizer(model, args.learning_rate)
    train(model, train_loader, valid_loader, criterion, optimizer, args.epochs,
          10, args.gpu)
    network_utils.save_model(model, args.save_dir, args.arch, args.epochs,
                             args.learning_rate, args.hidden_units)
    args.nepochs = 5
    args.window_size = 10
    args.batch_size = 32
    num_labels = 4

    which_feats = [0, 1, 2, 3, 4, 5, 6, 7]  # MAV Features ONLY
    batch_size = args.batch_size
    window_size = args.window_size
    fingers = ALL_FINGERS[:num_labels]

    train_loader, test_loader, valid_loader = data_utils.get_data_loaders(
        EMG_data,
        fingers,
        num_labels,
        which_feats,
        args.window_size,
        args.batch_size,
        train_split=0.8,
        validation_split=0.2,
        center=False,
        shuffle=True)
    _, test_loader_unshuffled, _ = data_utils.get_data_loaders(
        EMG_data,
        fingers,
        num_labels,
        which_feats,
        args.window_size,
        args.batch_size,
        train_split=0.8,
        validation_split=0.2,
        center=False,
示例#3
0
def run_experiments(experiments):
    print("Running {} Experiments..\n".format(len(experiments)))
    for xp_count, xp in enumerate(experiments):
        hp = dhp.get_hp(xp.hyperparameters)
        xp.prepare(hp)
        print(xp)

        # Load the Data and split it among the Clients
        client_loaders, train_loader, test_loader, stats = data_utils.get_data_loaders(
            hp)

        # Instantiate Clients and Server with Neural Net
        net = getattr(neural_nets, hp['net'])
        clients = [
            Client(loader, net().to(device), hp, xp, id_num=i)
            for i, loader in enumerate(client_loaders)
        ]
        server = Server(test_loader, net().to(device), hp, xp, stats)

        # Print optimizer specs
        print_model(device=clients[0])
        print_optimizer(device=clients[0])

        # Start Distributed Training Process
        print("Start Distributed Training..\n")
        t1 = time.time()

        for c_round in range(1, hp['communication_rounds'] + 1):

            participating_clients = random.sample(
                clients, int(len(clients) * hp['participation_rate']))

            # Clients do
            for client in participating_clients:
                client.synchronize_with_server(server)
                client.compute_weight_update(hp['local_iterations'])
                client.compress_weight_update_up(
                    compression=hp['compression_up'],
                    accumulate=hp['accumulation_up'],
                    count_bits=hp["count_bits"])

            # Server does
            server.aggregate_weight_updates(participating_clients,
                                            aggregation=hp['aggregation'])
            server.compress_weight_update_down(
                compression=hp['compression_down'],
                accumulate=hp['accumulation_down'],
                count_bits=hp["count_bits"])

            # Evaluate
            if xp.is_log_round(c_round):
                print("Experiment: {} ({}/{})".format(args.schedule,
                                                      xp_count + 1,
                                                      len(experiments)))
                print("Evaluate...")
                results_train = server.evaluate(max_samples=5000,
                                                loader=train_loader)
                results_test = server.evaluate(max_samples=10000)

                # Logging
                xp.log({
                    'communication_round':
                    c_round,
                    'lr':
                    clients[0].optimizer.__dict__['param_groups'][0]['lr'],
                    'epoch':
                    clients[0].epoch,
                    'iteration':
                    c_round * hp['local_iterations']
                })
                xp.log(
                    {
                        'client{}_loss'.format(client.id): client.train_loss
                        for client in clients
                    },
                    printout=False)

                xp.log({
                    key + '_train': value
                    for key, value in results_train.items()
                })
                xp.log({
                    key + '_test': value
                    for key, value in results_test.items()
                })

                if hp["count_bits"]:
                    xp.log(
                        {
                            'bits_sent_up': sum(
                                participating_clients[0].bits_sent),
                            'bits_sent_down': sum(server.bits_sent)
                        },
                        printout=False)

                xp.log({'time': time.time() - t1}, printout=False)

                # Save results to Disk
                if 'log_path' in hp and hp['log_path']:
                    xp.save_to_disc(path=hp['log_path'])

                # Timing
                total_time = time.time() - t1
                avrg_time_per_c_round = (total_time) / c_round
                e = int(avrg_time_per_c_round *
                        (hp['communication_rounds'] - c_round))
                print(
                    "Remaining Time (approx.):",
                    '{:02d}:{:02d}:{:02d}'.format(e // 3600, (e % 3600 // 60),
                                                  e % 60),
                    "[{:.2f}%]\n".format(c_round / hp['communication_rounds'] *
                                         100))

        # Delete objects to free up GPU memory
        del server
        clients.clear()
        torch.cuda.empty_cache()
    all_data = sio.loadmat(
        '/Users/ScottEnsel/Desktop/Deep Learning/Project/NEW files/Z_run-010_thumb_index_middle.mat',
        struct_as_record=False,
        squeeze_me=True)
    EMG_data = all_data['z']

    #load in our data
    # all_data = sio.loadmat(os.path.join(data_utils.DATA_DIR,data_utils.DATA_SET1), struct_as_record=False, squeeze_me=True)
    # EMG_data = all_data['z']

    train_loader, test_loader, valid_loader = data_utils.get_data_loaders(
        EMG_data,
        fingers,
        num_labels,
        which_feats,
        window_size,
        batch_size,
        train_split=0.8,
        validation_split=0.2,
        center=False)

    data_gen = inf_generator(train_loader)
    batches_per_epoch = len(train_loader)

    dimension = len(which_feats) + ((window_size - 1) * len(fingers))

    feature_layers = [ODEBlock(ODEfunc(dimension))]
    fc_layers = [nn.Linear(dimension, len(fingers))]

    model = nn.Sequential(*feature_layers, *fc_layers).to(device)
def main():
    # from some github repo...
    torch.multiprocessing.set_sharing_strategy('file_system')

    args = get_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")

    train_loader, valid_loader, test_loader = get_data_loaders(
        args.dataset,
        args.batch_size,
        sub_task=args.sub_task,
        dim=args.input_dim,
        train_shuffle=False)
    train_labels, valid_labels, test_labels = get_labels(
        [train_loader, valid_loader, test_loader])

    if args.dataset in ['sider_split/', 'tox21_split/']:
        args.dataset = args.dataset[:-1] + '-' + str(args.sub_task)

    print('batch number: train={}, valid={}, test={}'.format(
        len(train_loader), len(valid_loader), len(test_loader)))

    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')

    train_pred = np.zeros(
        (len(train_labels), args.ensemble_n, args.output_dim),
        dtype=np.float32)
    valid_pred = np.zeros(
        (len(valid_labels), args.ensemble_n, args.output_dim),
        dtype=np.float32)
    test_pred = np.zeros((len(test_labels), args.ensemble_n, args.output_dim),
                         dtype=np.float32)

    if args.task == 'classification':
        offset = np.array([[0., 0.]], dtype=np.float32)
    else:
        offset = np.array([[0.]], dtype=np.float32)

    ckpt_dir = 'checkpoint/' + args.dataset.strip('/') + '/ensemble/'
    log_dir = 'log/' + args.dataset.strip('/') + '/ensemble/'
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    ckpt_file = ckpt_dir + 'depth{}_backn{}_drop{}_p{}_shrinkage{}_seed{}.ckpt'.format(
        args.depth, args.back_n, args.drop_type, args.p, args.shrinkage,
        args.seed)
    best_file = ckpt_dir + 'depth{}_backn{}_drop{}_p{}_shrinkage{}_seed{}.t7'.format(
        args.depth, args.back_n, args.drop_type, args.p, args.shrinkage,
        args.seed)
    log_file = log_dir + 'depth{}_backn{}_drop{}_p{}_shrinkage{}_seed{}.log'.format(
        args.depth, args.back_n, args.drop_type, args.p, args.shrinkage,
        args.seed)

    for ensemble_idx in range(args.ensemble_n):
        feat_indices = np.arange(args.input_dim)
        feat_dim = args.input_dim
        data_indices = np.arange(len(train_loader.dataset.x))
        model = Net(input_dim=feat_dim,
                    output_dim=args.output_dim,
                    hidden_dim=args.hidden_dim,
                    num_layer=args.depth,
                    num_back_layer=args.back_n,
                    dense=True,
                    drop_type=args.drop_type,
                    net_type=args.net_type,
                    approx=args.anneal).to(device)

        if args.optimizer == 'SGD':
            optimizer = optim.SGD(model.parameters(),
                                  lr=args.lr,
                                  momentum=args.momentum,
                                  nesterov=True)
        elif args.optimizer == 'AMSGrad':
            optimizer = optim.Adam(model.parameters(),
                                   lr=args.lr,
                                   amsgrad=True)
        scheduler = StepLR(optimizer,
                           step_size=args.lr_step_size,
                           gamma=args.gamma)

        best_score = -1e30
        start_epoch = 1  # start from epoch 1 or last checkpoint epoch

        start = time()
        if ensemble_idx == 0 and args.task == 'regression':
            shrinkage = 1
        else:
            shrinkage = args.shrinkage

        for epoch in range(start_epoch, args.epochs + start_epoch):
            scheduler.step(epoch)

            alpha = get_alpha(epoch, args.epochs)
            train_approximate_loss = train(args, model, device, train_loader,
                                           optimizer, epoch, args.anneal,
                                           train_pred, offset, feat_indices,
                                           data_indices, alpha)
            if epoch % 30 == 0:
                print('Train Epoch: {} \tLoss: {:.6f}'.format(
                    epoch, train_approximate_loss),
                      flush=True)

        train_pred[:, ensemble_idx, :] = shrinkage * predict(
            args, model, device, train_loader, feat_indices)
        valid_pred[:, ensemble_idx, :] = shrinkage * predict(
            args, model, device, valid_loader, feat_indices)
        test_pred[:, ensemble_idx, :] = shrinkage * predict(
            args, model, device, test_loader, feat_indices)

        save_pklgz(ckpt_file, [
            train_pred, train_labels, valid_pred, valid_labels, test_pred,
            test_labels
        ])
        if args.task == 'classification':
            train_score = get_AUC(train_pred, train_labels)
            valid_score = get_AUC(valid_pred, valid_labels)
            test_score = get_AUC(test_pred, test_labels)
            print(
                'Iteration {}, AUC: train = {:.3f}, valid = {:.3f}, test = {:.3f}'
                .format(ensemble_idx, train_score, valid_score, test_score))
        else:
            train_score = get_RMSE(train_pred, train_labels, offset)
            valid_score = get_RMSE(valid_pred, valid_labels, offset)
            test_score = get_RMSE(test_pred, test_labels, offset)
            print(
                'Iteration {}, RMSE: train = {:.3f}, valid = {:.3f}, test = {:.3f}'
                .format(ensemble_idx, train_score, valid_score, test_score))

        with open(log_file, 'a') as fp:
            fp.write('{}\t{}\t{:.4f}\t{:.4f}\t{:.4f}\n'.format(
                args.seed, ensemble_idx, train_score, valid_score, test_score))
        del model, optimizer, scheduler
    # which_feats: list of which features you want to use. [0,1,2,...,7] means use features 0 to 7 only (MAV features)
    #              [0,1,2,...,32] means use features 0 to 32 (All features
    # window_size: the size of the sliding window. sliding window = 100 means that the model recieves the last 100 time
    #              points when it tries to predict the current timepoint's label
    # batch_size:  the usual meaning of batch size
    # Center:      True if you want to zero center the labels. False otherwise.
    which_feats = [0, 1, 2, 3, 4, 5, 6, 7]  # MAV FEATURES ONLY
    fingers = [data_utils.THUMB_INDEX
               ]  #,data_utils.INDEX_INDEX,data_utils.MIDDLE_INDEX]
    num_labels = 1
    center = False
    train_loader, test_loader, train_eval_loader = data_utils.get_data_loaders(
        data_utils.z_1,
        fingers,
        num_labels,
        which_feats,
        window_size,
        batch_size,
        shuffle=True,
        center=center)

    data_gen = inf_generator(train_loader)
    batches_per_epoch = len(train_loader)

    LSTM_layer = SingleLSTMResidual(input_size=8 + num_labels *
                                    (window_size - 1),
                                    seq_len=window_size,
                                    hidden_size=hidden_size,
                                    dropout=dropout,
                                    batch_size=batch_size)
    model = LSTM_layer.to(device)
示例#7
0
def main():
    # from some github repo...
    torch.multiprocessing.set_sharing_strategy('file_system')

    args = get_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")

    train_loader, valid_loader, test_loader = get_data_loaders(
        args.dataset,
        args.batch_size,
        sub_task=args.sub_task,
        dim=args.input_dim)

    if args.dataset in ['sider_split/', 'tox21_split/']:
        args.dataset = args.dataset[:-1] + '-' + str(args.sub_task)

    print('batch number: train={}, valid={}, test={}'.format(
        len(train_loader), len(valid_loader), len(test_loader)))

    model = Net(input_dim=args.input_dim,
                output_dim=args.output_dim,
                hidden_dim=args.hidden_dim,
                num_layer=args.depth,
                num_back_layer=args.back_n,
                dense=True,
                drop_type=args.drop_type,
                net_type=args.net_type,
                approx=args.anneal).to(device)

    if args.optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum,
                              nesterov=True)
    elif args.optimizer == 'AMSGrad':
        optimizer = optim.Adam(model.parameters(), lr=args.lr, amsgrad=True)
    scheduler = StepLR(optimizer,
                       step_size=args.lr_step_size,
                       gamma=args.gamma)

    best_score = -1e30
    start_epoch = 1  # start from epoch 1 or last checkpoint epoch
    if args.anneal == 'approx':
        args.net_type = 'approx_' + args.net_type

    best_model_name = './checkpoint/{}/{}/best_seed{}_depth{}_ckpt.t7'.format(
        args.dataset.strip('/'), args.net_type, args.seed, args.depth)
    last_model_name = './checkpoint/{}/{}/last_seed{}_depth{}_ckpt.t7'.format(
        args.dataset.strip('/'), args.net_type, args.seed, args.depth)

    best_log_file = 'log/' + args.dataset.strip(
        '/') + '/{}/depth{}_backn{}_drop{}_p{}_best.log'.format(
            args.net_type, args.depth, args.back_n, args.drop_type, args.p)
    last_log_file = 'log/' + args.dataset.strip(
        '/') + '/{}/depth{}_backn{}_drop{}_p{}_last.log'.format(
            args.net_type, args.depth, args.back_n, args.drop_type, args.p)

    model_dir = './checkpoint/{}/{}/'.format(args.dataset.strip('/'),
                                             args.net_type)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    log_dir = 'log/' + args.dataset.strip('/') + '/{}/'.format(args.net_type)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')

    for epoch in range(start_epoch, args.epochs + start_epoch):
        scheduler.step(epoch)

        alpha = get_alpha(epoch, args.epochs)
        train_approximate_loss = train(args, model, device, train_loader,
                                       optimizer, epoch, args.anneal, alpha)

        # used for plotting learning curves
        train_loss, train_score = test(args, model, device, train_loader,
                                       'train')
        valid_loss, valid_score = test(args, model, device, valid_loader,
                                       'valid')
        test_loss, test_score = test(args, model, device, test_loader, 'test')

        # early stopping version
        if valid_score > best_score:
            state = {'model': model.state_dict()}
            torch.save(state, best_model_name)
            best_score = valid_score

        # "convergent" version
        state = {'model': model.state_dict()}
        torch.save(state, last_model_name)

    print('Training finished. Loading models from validation...')
    for model_name, log_file, setting in zip(
        [best_model_name, last_model_name], [best_log_file, last_log_file],
        ['best', 'last']):
        print('\nLoading the {} model...'.format(setting))

        checkpoint = torch.load(model_name)
        model.load_state_dict(checkpoint['model'])
        train_loss, train_score = test(args, model, device, train_loader,
                                       'train')
        valid_loss, valid_score = test(args, model, device, valid_loader,
                                       'valid')
        test_loss, test_score = test(args, model, device, test_loader, 'test ')

        with open(log_file, 'a') as fp:
            if args.task == 'classification':
                log_str = '{}\t{:.4f}\t{:.4f}\t{:.4f}'.format(
                    args.seed, train_score, valid_score, test_score)
            elif args.task == 'regression':
                log_str = '{}\t{:.4f}\t{:.4f}\t{:.4f}'.format(
                    args.seed, -train_score, -valid_score, -test_score)
            fp.write(log_str + '\n')