def test_asscoicated_with_p(self): size = bf.size() rank = bf.rank() if size <= 3: fname = inspect.currentframe().f_code.co_name warnings.warn( "Skip {} because it only supports test over at least 3 nodes". format(fname)) return dtypes = [torch.FloatTensor, torch.DoubleTensor] if TEST_ON_GPU and not bf.nccl_built(): dtypes += [torch.cuda.FloatTensor, torch.cuda.DoubleTensor] bf.set_topology(topology_util.RingGraph(size)) bf.turn_on_win_ops_with_associated_p() for dtype, send_rank in itertools.product(dtypes, range(size)): tensor = torch.FloatTensor([23]).fill_(1).mul_(rank) tensor = self.cast_and_place(tensor, dtype) window_name = "win_asscoicate_with_p_{}_{}".format( dtype, send_rank) bf.win_create(tensor, window_name) left_neighbor_rank = (send_rank - 1) % size right_neighbor_rank = (send_rank + 1) % size if rank == send_rank: bf.win_accumulate(tensor, name=window_name, self_weight=0.5, dst_weights={ left_neighbor_rank: 0.5, right_neighbor_rank: 0.5 }) bf.barrier() bf.win_update_then_collect(name=window_name) associated_p = bf.win_associated_p(name=window_name) if rank == send_rank: assert associated_p == 0.5, ( "associated_p for sender {} is wrong. Get {}".format( rank, associated_p)) elif (rank == left_neighbor_rank) or (rank == right_neighbor_rank): assert (associated_p - 1.5) < EPSILON, ( "associated_p for received neighbor {} is wrong. Get {}". format(rank, associated_p)) else: assert associated_p == 1.0, ( "associated_p for untouched node {} is wrong. Get {}". format(rank, associated_p)) bf.turn_off_win_ops_with_associated_p()
torch.random.manual_seed(args.seed * bf.rank()) if args.cuda: device = bf.local_rank() % torch.cuda.device_count() x = torch.randn(args.data_size, device=device, dtype=torch.double) else: x = torch.randn(args.data_size, dtype=torch.double) if args.virtual_topology == "expo2": pass elif args.virtual_topology == "expo3": bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=3)) elif args.virtual_topology == "expo4": bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=4)) elif args.virtual_topology == "ring": bf.set_topology(topology_util.RingGraph(bf.size(), connect_style=1)) elif args.virtual_topology == "mesh": bf.set_topology(topology_util.RingGraph(bf.size(), connect_style=0), is_weighted=True) elif args.virtual_topology == "star": bf.set_topology(topology_util.StarGraph(bf.size()), is_weighted=True) elif args.virtual_topology == "full": bf.set_topology(topology_util.FullyConnectedGraph(bf.size())) else: raise ValueError("Unknown args.virtual_topology, supporting options are " + "[expo2(Default), ring, mesh, star].") x_bar = bf.allreduce(x, average=True) mse = [torch.norm(x - x_bar, p=2) / torch.norm(x_bar, p=2)] if not args.asynchronous_mode:
w = bf.win_update_then_collect(name="w_buff") x.data = w[:n] / w[-1] return x, mse # ======================= Code starts here ======================= bf.init() if args.topology == 'mesh': bf.set_topology(topology_util.MeshGrid2DGraph(bf.size()), is_weighted=True) elif args.topology == 'expo2': bf.set_topology(topology_util.ExponentialGraph(bf.size())) elif args.topology == 'star': bf.set_topology(topology_util.StarGraph(bf.size()), is_weighted=True) elif args.topology == 'ring': bf.set_topology(topology_util.RingGraph(bf.size())) else: raise NotImplementedError( 'Topology not supported. This example only supports' + ' mesh, star, ring and expo2') # Generate data for logistic regression (synthesized data) torch.random.manual_seed(123417 * bf.rank()) m, n = 20, 5 rho = 1e-2 X, y = generate_data(m, n, task=args.task) # calculate the global solution w_opt via distributed gradient descent w_opt = distributed_grad_descent(X, y, loss=args.task,