Пример #1
0
def main():
    start_time = time.time()
    args = ARGS
    ray.init(include_dashboard=False, num_gpus=args.num_gpus)
    log_filename = os.path.join(args.metrics_dir, args.metrics_name + '.csv')
    if os.path.exists(log_filename):
        os.remove(log_filename)
    logging.basicConfig(filename=log_filename,
                        level=logging.INFO,
                        format='%(message)s')
    # Set the random seed if provided (affects client sampling, and batching)
    random.seed(1 + args.seed)
    np.random.seed(12 + args.seed)

    tup = MAIN_PARAMS[args.dataset][args.t]
    num_rounds = args.num_rounds if args.num_rounds != -1 else tup[0]
    eval_every = args.eval_every if args.eval_every != -1 else tup[1]
    clients_per_round = args.clients_per_round if args.clients_per_round != -1 else tup[
        2]
    args.clients_per_round = args.clients_per_round if args.clients_per_round != -1 else tup[
        2]

    manager = ClientManager(args)
    clients = manager.setup_clients(args.setup_clients)
    clients.sort(key=lambda x: x.num_train_samples)

    manager.corrupt_clients()

    print('Clients in Total: %d' % len(clients))

    # Create server
    server = Server(clients, manager, args)
    if args.loadmodel:
        model = tf.keras.models.load_model(log_filename + "_model")
        server.set_model(model)
    client_ids, client_groups, num_train_samples, num_test_samples = manager.get_clients_info(
    )
    total_train_samples = np.sum(list(num_train_samples.values()))
    for c, n in zip(clients, num_train_samples.values()):
        c.set_weight(float(n) / total_train_samples)

    # Initial status
    print('--- Random Initialization ---')
    server.test_model(0, set_to_use='train', log=False)
    # Simulate training
    for i in range(num_rounds):
        # Select clients to train this round
        server.select_clients(i, num_clients=clients_per_round)

        server.train_model(num_epochs=args.num_epochs,
                           batch_size=args.batch_size,
                           round=i)
        aggregation_start = time.time()
        server.aggregate(args.method)

        if args.method == "arfl":
            server.update_alpha(i)

        print(
            datetime.datetime.now(),
            '--- Round %d of %d: Training %d Clients. Time cost in total %s. Aggregation time %s --- '
            % (i + 1, num_rounds, clients_per_round, time.time() - start_time,
               time.time() - aggregation_start))
        # Test model
        if (i + 1) % eval_every == 0 or (i + 1) == num_rounds:
            test_stat_metrics = server.test_model(
                i, set_to_use='train')  # Evaluate training loss
            print_metrics(test_stat_metrics,
                          num_train_samples,
                          prefix='{}_'.format('train'))
            test_stat_metrics = server.test_model(i, set_to_use='test')
            print_metrics(test_stat_metrics,
                          num_test_samples,
                          prefix='{}_'.format('test'))

    # Save model when training ends
    server.save_model(log_filename + "_model")