示例#1
0
    def test_asscoicated_with_p(self):
        size = bf.size()
        rank = bf.rank()
        if size <= 3:
            fname = inspect.currentframe().f_code.co_name
            warnings.warn(
                "Skip {} because it only supports test over at least 3 nodes".
                format(fname))
            return

        dtypes = [torch.FloatTensor, torch.DoubleTensor]
        if TEST_ON_GPU and not bf.nccl_built():
            dtypes += [torch.cuda.FloatTensor, torch.cuda.DoubleTensor]

        bf.set_topology(topology_util.RingGraph(size))
        bf.turn_on_win_ops_with_associated_p()
        for dtype, send_rank in itertools.product(dtypes, range(size)):
            tensor = torch.FloatTensor([23]).fill_(1).mul_(rank)
            tensor = self.cast_and_place(tensor, dtype)
            window_name = "win_asscoicate_with_p_{}_{}".format(
                dtype, send_rank)
            bf.win_create(tensor, window_name)
            left_neighbor_rank = (send_rank - 1) % size
            right_neighbor_rank = (send_rank + 1) % size
            if rank == send_rank:
                bf.win_accumulate(tensor,
                                  name=window_name,
                                  self_weight=0.5,
                                  dst_weights={
                                      left_neighbor_rank: 0.5,
                                      right_neighbor_rank: 0.5
                                  })
            bf.barrier()
            bf.win_update_then_collect(name=window_name)
            associated_p = bf.win_associated_p(name=window_name)
            if rank == send_rank:
                assert associated_p == 0.5, (
                    "associated_p for sender {} is wrong. Get {}".format(
                        rank, associated_p))
            elif (rank == left_neighbor_rank) or (rank == right_neighbor_rank):
                assert (associated_p - 1.5) < EPSILON, (
                    "associated_p for received neighbor {} is wrong. Get {}".
                    format(rank, associated_p))
            else:
                assert associated_p == 1.0, (
                    "associated_p for untouched node {} is wrong. Get {}".
                    format(rank, associated_p))
        bf.turn_off_win_ops_with_associated_p()
示例#2
0
torch.random.manual_seed(args.seed * bf.rank())
if args.cuda:
    device = bf.local_rank() % torch.cuda.device_count()
    x = torch.randn(args.data_size, device=device, dtype=torch.double)
else:
    x = torch.randn(args.data_size, dtype=torch.double)

if args.virtual_topology == "expo2":
    pass
elif args.virtual_topology == "expo3":
    bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=3))
elif args.virtual_topology == "expo4":
    bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=4))
elif args.virtual_topology == "ring":
    bf.set_topology(topology_util.RingGraph(bf.size(), connect_style=1))
elif args.virtual_topology == "mesh":
    bf.set_topology(topology_util.RingGraph(bf.size(), connect_style=0),
                    is_weighted=True)
elif args.virtual_topology == "star":
    bf.set_topology(topology_util.StarGraph(bf.size()), is_weighted=True)
elif args.virtual_topology == "full":
    bf.set_topology(topology_util.FullyConnectedGraph(bf.size()))
else:
    raise ValueError("Unknown args.virtual_topology, supporting options are " +
                     "[expo2(Default), ring, mesh, star].")

x_bar = bf.allreduce(x, average=True)
mse = [torch.norm(x - x_bar, p=2) / torch.norm(x_bar, p=2)]

if not args.asynchronous_mode:
    w = bf.win_update_then_collect(name="w_buff")
    x.data = w[:n] / w[-1]

    return x, mse


# ======================= Code starts here =======================
bf.init()
if args.topology == 'mesh':
    bf.set_topology(topology_util.MeshGrid2DGraph(bf.size()), is_weighted=True)
elif args.topology == 'expo2':
    bf.set_topology(topology_util.ExponentialGraph(bf.size()))
elif args.topology == 'star':
    bf.set_topology(topology_util.StarGraph(bf.size()), is_weighted=True)
elif args.topology == 'ring':
    bf.set_topology(topology_util.RingGraph(bf.size()))
else:
    raise NotImplementedError(
        'Topology not supported. This example only supports' +
        ' mesh, star, ring and expo2')

# Generate data for logistic regression (synthesized data)
torch.random.manual_seed(123417 * bf.rank())
m, n = 20, 5
rho = 1e-2
X, y = generate_data(m, n, task=args.task)

# calculate the global solution w_opt via distributed gradient descent
w_opt = distributed_grad_descent(X,
                                 y,
                                 loss=args.task,