def forward(self, x, init_states=None): """Assumes x is of shape (batch, sequence, feature)""" bs, seq_sz, _ = x.size() hidden_seq = [] if init_states is None: h_t, c_t = ( flow.zeros((bs, self.hidden_size)).to(x.device), flow.zeros((bs, self.hidden_size)).to(x.device), ) else: h_t, c_t = init_states HS = self.hidden_size for t in range(seq_sz): x_t = x[:, t, :].reshape(x.shape[0], x.shape[2]) gates = flow.matmul(x_t, self.W) + flow.matmul(h_t, self.U) + self.bias i_t, f_t, g_t, o_t = ( flow.sigmoid(gates[:, :HS]), flow.sigmoid(gates[:, HS : HS * 2]), flow.tanh(gates[:, HS * 2 : HS * 3]), flow.sigmoid(gates[:, HS * 3 :]), ) c_t = f_t * c_t + i_t * g_t h_t = o_t * flow.tanh(c_t) hidden_seq.append(h_t.unsqueeze(1)) hidden_seq = flow.cat(hidden_seq, dim=1) return hidden_seq, (h_t, c_t)
def forward(self, x, init_states=None): """Assumes x is of shape (batch, sequence, feature)""" seq_sz, bs, _ = x.size() hidden_seq = [] if init_states is None: h_t, c_t = ( flow.zeros((bs, self.hidden_size)).to("cuda"), flow.zeros((bs, self.hidden_size)).to("cuda"), ) else: h_t, c_t = init_states HS = self.hidden_size for t in range(seq_sz): x_t = x[t, :, :].reshape(x.shape[1], x.shape[2]) # batch the computations into a single matrix multiplication # NOTE(Xu Zhiqiu): flow does not support view now, use reshape instead gates = flow.matmul(x_t, self.W) + flow.matmul(h_t, self.U) + self.bias i_t, f_t, g_t, o_t = ( flow.sigmoid(gates[:, :HS]), flow.sigmoid(gates[:, HS:HS * 2]), flow.tanh(gates[:, HS * 2:HS * 3]), flow.sigmoid(gates[:, HS * 3:]), ) c_t = f_t * c_t + i_t * g_t h_t = o_t * flow.tanh(c_t) hidden_seq.append(h_t.unsqueeze(0)) hidden_seq = flow.cat(hidden_seq, dim=0) return hidden_seq, (h_t, c_t)
def forward(self, x, init_states=None): seq_sz, bs, _ = x.size() hidden_seq = [] if init_states is None: h_t, c_t = ( flow.zeros((bs, self.hidden_size)).to("cuda"), flow.zeros((bs, self.hidden_size)).to("cuda"), ) else: h_t, c_t = init_states HS = self.hidden_size for t in range(seq_sz): x_t = x[t, :, :] x_t = x_t.reshape(x.shape[1], x.shape[2]) gates = flow.matmul(x_t, self.W) + flow.matmul(h_t, self.U) + self.bias i_t, f_t, g_t, o_t = ( flow.sigmoid(gates[:, :HS]), flow.sigmoid(gates[:, HS:HS * 2]), flow.tanh(gates[:, HS * 2:HS * 3]), flow.sigmoid(gates[:, HS * 3:]), ) c_t = f_t * c_t + i_t * g_t h_t = o_t * flow.tanh(c_t) hidden_seq.append(h_t.unsqueeze(0)) hidden_seq = flow.cat(hidden_seq, dim=0) return hidden_seq, (h_t, c_t)
def _create_parameters(self, weight_shape, weight_bound, bias_shape, bias_bound): self.weight = flow.nn.Parameter(flow.zeros(weight_shape).uniform_( -weight_bound, weight_bound), requires_grad=True) if bias_shape is not None: self.bias = flow.nn.Parameter(flow.zeros(bias_shape).uniform_( -bias_bound, bias_bound), requires_grad=True) else: self.bias = None
def get_mean_and_std(dataset): '''Compute the mean and std value of dataset.''' dataloader = flow.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2) mean = flow.zeros(3) std = flow.zeros(3) print('==> Computing mean and std..') for inputs, targets in dataloader: for i in range(3): mean[i] += inputs[:,i,:,:].mean() std[i] += inputs[:,i,:,:].std() mean.div_(len(dataset)) std.div_(len(dataset)) return mean, std
def __init__(self, pred): super().__init__() if pred.is_global: self.param = flow.nn.Parameter( flow.zeros( *pred.shape, dtype=pred.dtype, placement=pred.placement, sbp=pred.sbp, ) ) else: self.param = flow.nn.Parameter( flow.zeros(*pred.shape, dtype=pred.dtype, device=pred.device) )
def __init__(self, features, eps=1e-6): super(LayerNorm, self).__init__() self.eps = eps self.weight = nn.Parameter( flow.Tensor(flow.ones(features, dtype=flow.float32))) self.bias = nn.Parameter( flow.Tensor(flow.zeros(features, dtype=flow.float32)))
def forward(self, inputs, targets): """ Args: inputs (torch.Tensor): feature matrix with shape (batch_size, feat_dim). targets (torch.LongTensor): ground truth labels with shape (num_classes). """ n = inputs.size(0) # Compute pairwise distance, replace by the official when merged dist = flow.pow(inputs, 2).sum(dim=1).expand(n, n) dist = dist + flow.transpose(dist, dim0=1, dim1=0) temp1 = -2 * flow.matmul(inputs, flow.transpose(inputs, dim0=1, dim1=0)) dist = flow.add(dist, temp1) dist = flow.sqrt(flow.clamp(dist, min=1e-12)) # For each anchor, find the hardest positive and negative mask = targets.expand(n, n).eq( flow.transpose(targets.expand(n, n), dim0=1, dim1=0)) dist_ap, dist_an = [], [] y1 = flow.zeros((1, n), dtype=flow.float32).to("cuda") y2 = flow.Tensor(np.exp(100 * np.ones((1, n)))).to("cuda") for i in range(n): temp_dist = flow.slice(dist, [(i, i + 1, 1)]) temp_mask = flow.slice(mask, [(i, i + 1, 1)]) temp_mask_rev = flow.slice(1 - mask, [(i, i + 1, 1)]) dist_ap.append(temp_mask.where(temp_dist, y1).max().unsqueeze(0)) dist_an.append( temp_mask_rev.where(temp_dist, y2).min().unsqueeze(0)) dist_ap = flow.cat(dist_ap) dist_an = flow.cat(dist_an) # Compute ranking hinge loss y = flow.ones_like(dist_an) return self.ranking_loss(dist_an, dist_ap, y)
def _setitem(self, key, value): if self.is_consistent: if isinstance(value, (int, float)): value = flow._C.consistent_constant( [1], value, dtype=self.dtype, placement=self.placement, sbp=flow.sbp.broadcast, ) else: if value.is_consistent: value = value.to_consistent(sbp=flow.sbp.broadcast) # TODO: remove these lines after asymmetric boxing is ready local_tensor = value.to_local() if local_tensor.nelement() == 0: local_tensor = flow.zeros(*value.shape) value = local_tensor.to_consistent(self.placement, sbp=flow.sbp.broadcast) else: value = value.to_consistent(self.placement, sbp=flow.sbp.broadcast) else: if isinstance(value, (int, float)): value = flow._C.constant([1], value, dtype=self.dtype, device=self.device) else: value = value.to(device=self.device) flow._C.tensor_setitem(self, key, value) return self
def __init__( self, num_features: int, eps: float = 1e-05, momentum: float = 0.1, affine: bool = True, track_running_stats: bool = True, ) -> None: super().__init__() self.num_features = num_features self.eps = eps self.momentum = momentum self.affine = affine self.track_running_stats = track_running_stats if self.affine: self.weight = flow.nn.Parameter(flow.Tensor(num_features)) self.bias = flow.nn.Parameter(flow.Tensor(num_features)) else: self.register_parameter("weight", None) self.register_parameter("bias", None) if self.track_running_stats: self.register_buffer("running_mean", flow.zeros(num_features)) self.register_buffer("running_var", flow.ones(num_features)) self.register_buffer("num_batches_tracked", flow.tensor(0, dtype=flow.long)) else: self.register_buffer("running_mean", None) self.register_buffer("running_var", None) self.register_buffer("num_batches_tracked", None) self.reset_parameters()
def forward( self, input_ids: flow.Tensor, token_type_ids: Optional[flow.Tensor] = None, position_ids: Optional[flow.Tensor] = None, ) -> flow.Tensor: input_shape = input_ids.size() seq_length = input_shape[1] if token_type_ids is None: token_type_ids = flow.zeros(input_shape, dtype=flow.long, device=input_ids.device) if position_ids is None: position_ids = flow.arange(seq_length, dtype=flow.long, device=input_ids.device) position_ids = position_ids.unsqueeze(0).expand(input_shape) input_embeddings = self.token_embeddings(input_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) position_embeddings = self.position_embeddings(position_ids) embeddings = input_embeddings + position_embeddings + \ token_type_embeddings embeddings = self.layer_norm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def forward(self, x, hidden=None): batch_size, seq_len, _ = x.size() H_S = self.hidden_size hidden_seq = [] if hidden is None: h_t = flow.zeros((batch_size, self.hidden_size)) else: h_t = hidden for t in range(seq_len): x_t = x[:, t, :] gates_1 = flow.matmul(x_t, self.inp_W) + self.inp_b gates_2 = flow.matmul(h_t, self.hid_W) + self.hid_b r_gate = flow.sigmoid(gates_1[:, :H_S] + gates_2[:, :H_S]) z_gate = flow.sigmoid(gates_1[:, H_S:H_S * 2] + gates_2[:, H_S:H_S * 2]) h_t_ = flow.tanh(gates_1[:, H_S * 2:H_S * 3] + r_gate * gates_2[:, H_S * 2:H_S * 3]) h_t = (1 - z_gate) * h_t_ + z_gate * h_t hidden_seq.append(h_t.unsqueeze(1)) hidden_seq = flow.cat(hidden_seq, dim=1) return hidden_seq, h_t
def test_cuda_manual_seed(test_case): flow.cuda.manual_seed(30) device = flow.device("cuda", flow.cuda.current_device()) x = flow.randn(2, 4, device=device) tensor_list = [flow.zeros((2, 4), dtype=flow.int32) for _ in range(2)] flow.comm.all_gather(tensor_list, x) test_case.assertTrue( np.allclose(tensor_list[0].numpy(), tensor_list[1].numpy()))
def noisy_top_k_gating(self, x, train, noise_epsilon=1e-2): """Noisy top-k gating. See paper: https://arxiv.org/abs/1701.06538. Args: x: input Tensor with shape [batch_size, input_size] train: a boolean - we only add noise at training time. noise_epsilon: a float Returns: gates: a Tensor with shape [batch_size, num_experts] load: a Tensor with shape [num_experts] """ clean_logits = oneflow.matmul(x, self.w_gate) if self.noisy_gating: raw_noise_stddev = oneflow.matmul(x, self.w_noise) noise_stddev = (self.softplus(raw_noise_stddev) + noise_epsilon) * train # noisy_logits = clean_logits + ( torch.randn(clean_logits.size()) * noise_stddev) # TODO, fix this after torch randn argument fixed noisy_logits = clean_logits + ( flow.randn( clean_logits.size()[0], clean_logits.size()[1], device=clean_logits.device, ) * noise_stddev ) logits = noisy_logits else: logits = clean_logits # calculate topk + 1 that will be needed for the noisy gates top_logits, top_indices = logits.topk(min(self.k + 1, self.num_experts), dim=1) top_k_logits = top_logits[:, : self.k] top_k_indices = top_indices[:, : self.k] top_k_gates = self.softmax(top_k_logits) top_k_logits = top_k_logits.to(logits.device) top_indices = top_indices.to(logits.device) top_logits = top_logits.to(logits.device) zeros = flow.zeros( logits.shape, dtype=logits.dtype, requires_grad=True, device=logits.device ) gates = oneflow.scatter(zeros, 1, top_k_indices, top_k_gates) if self.noisy_gating and self.k < self.num_experts: load = ( self._prob_in_top_k( clean_logits, noisy_logits, noise_stddev, top_logits ) ).sum(0) else: load = self._gates_to_load(gates) return gates, load
def forward(self, cosine: flow.Tensor, label): index = flow.where(label != -1)[0] m_hot = flow.zeros(index.size()[0], cosine.size()[1], device=cosine.device) m_hot.scatter_(1, label[index, None], self.m) cosine.acos_() cosine[index] += m_hot cosine.cos_().mul_(self.s) return cosine
def masked_select_op(input, mask): """ Returns a new 1-D tensor which indexes the input tensor according to the boolean mask mask which is a BoolTensor(In oneFlow BoolTensor is replaced by Int8Tensor). The shapes of the mask tensor and the input tensor don’t need to match, but they must be broadcastable. Args: input (Tensor): the input tensor. mask (Tensor): the tensor containing the binary mask to index with For example: .. code-block:: python >>> import oneflow as flow >>> import numpy as np >>> input = flow.tensor(np.array([[-0.4620, 0.3139], [0.3898, -0.7197], [0.0478, -0.1657]]), dtype=flow.float32) >>> mask = input.gt(0.05) >>> out = flow.masked_select(input, mask) >>> out tensor([0.3139, 0.3898], dtype=oneflow.float32) """ assert len(input.shape) == len( mask.shape ), f"The dim of masked_select module's inputs can not match, please check!" broadcast_like_shape = [] broadcast_x_axes = [] broadcast_mask_axes = [] for i in range(len(input.shape)): max_dim = max(input.shape[i], mask.shape[i]) broadcast_like_shape.append(max_dim) if max_dim != input.shape[i]: broadcast_x_axes.append(i) if max_dim != mask.shape[i]: broadcast_mask_axes.append(i) broadcast_like_tensor = flow.zeros(tuple(broadcast_like_shape), dtype=flow.float32, device=input.device) broadcast_like_tensor.requires_grad = input.requires_grad or mask.requires_grad if len(broadcast_x_axes) != 0: input = flow.broadcast_like(input, broadcast_like_tensor, broadcast_axes=tuple(broadcast_x_axes)) if len(broadcast_mask_axes) != 0: mask = flow.broadcast_like(mask, broadcast_like_tensor, broadcast_axes=tuple(broadcast_mask_axes)) mask = mask.to(dtype=input.dtype) res = flow._C.mul(input, mask) indices = flow.argwhere(res) gather_res = flow._C.gather_nd(res, indices) return gather_res.flatten()
def evaluate(encoder, decoder, sentence, input_lang, output_lang, max_length=MAX_LENGTH): with flow.no_grad(): input_tensor = tensorFromSentence(input_lang, sentence) input_length = input_tensor.size()[0] encoder_hidden = encoder.init_Hidden().to(device) encoder_outputs = [] for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs.append(encoder_output[0]) if len(encoder_outputs) != max_length: for _ in range(max_length - len(encoder_outputs)): encoder_outputs.append(flow.zeros((1, 256)).to(device)) encoder_outputs = flow.cat(encoder_outputs, dim=0) decoder_input = flow.tensor([[SOS_token]]).to(device) decoder_hidden = encoder_hidden decoded_words = [] decoder_attentions = flow.zeros((max_length, max_length)).to(device) for di in range(max_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) decoder_attentions[di] = decoder_attention.squeeze(0).data topv, topi = decoder_output.data.topk(1) if topi.squeeze().numpy() == EOS_token: decoded_words.append("<EOS>") break else: decoded_words.append(output_lang.index2word[int( topi.squeeze().numpy())]) decoder_input = topi.detach() return decoded_words, decoder_attentions[:di + 1]
def forward(self, cosine, label): index = flow.where(label != -1)[0] m_hot = flow.zeros(index.size()[0], cosine.size()[1], device=cosine.device) m_hot = flow.scatter(m_hot, 1, label[index, None], self.m) cosine = cosine[index] - m_hot ret = cosine * self.s return ret
def __init__(self, dim, eps=1e-05, elementwise_affine=True): super(GlobalChannelLayerNorm, self).__init__() self.eps = eps self.normalized_dim = dim self.elementwise_affine = elementwise_affine if elementwise_affine: self.beta = nn.Parameter(flow.zeros(dim, 1)) self.gamma = nn.Parameter(flow.ones(dim, 1)) else: self.register_parameter("weight", None) self.register_parameter("bias", None)
def forward(self, preds, labels): top1_num = flow.zeros(1, dtype=flow.float32) num_samples = 0 for pred, label in zip(preds, labels): clsidxs = pred.argmax(dim=-1) clsidxs = clsidxs.to(flow.int32) match = (clsidxs == label).sum() top1_num += match.to(device=top1_num.device, dtype=top1_num.dtype) num_samples += np.prod(label.shape).item() top1_acc = top1_num / num_samples return top1_acc
def test_normal_out_tensor_data_type_error(test_case): with test_case.assertRaises(RuntimeError) as ctx: out = flow.zeros((3, 3), dtype=flow.float64) x = flow._C.normal(mean=0.0, std=1.0, size=(3, 3), dtype=flow.float32, out=out) test_case.assertTrue( "data type oneflow.float32 does not match data type of out parameter oneflow.float64" in str(ctx.exception))
def __init__(self, d_model, dropout=0.1, max_len=5000): super(PositionalEncoding, self).__init__() self.dropout = nn.Dropout(p=dropout) pe = flow.zeros((max_len, d_model)) position = flow.arange(0, max_len, dtype=flow.float).unsqueeze(1) div_term = flow.exp( flow.arange(0, d_model, 2).to(flow.float) * (-math.log(10000.0) / d_model) ).unsqueeze(0) pe[:, 0::2] = flow.sin(position * div_term) pe[:, 1::2] = flow.cos(position * div_term) pe = pe.unsqueeze(0).transpose(0, 1) self.pe = flow.nn.Parameter(pe, requires_grad=False)
def __init__(self, d_model, max_len=5000): super(PositionalEncoding, self).__init__() # Compute the positional encodings once in log space. pe = flow.zeros(max_len, d_model, requires_grad=False) position = flow.arange(0, max_len).unsqueeze(1).to(dtype=flow.float32) div_term = flow.exp( flow.arange(0, d_model, 2).to(dtype=flow.float32) * -(math.log(10000.0) / d_model) ) pe[:, 0::2] = flow.sin(position * div_term) pe[:, 1::2] = flow.cos(position * div_term) pe = pe.unsqueeze(0) self.register_buffer("pe", pe)
def test(iter, model, loss_fn): size = len(iter.dataset) num_batches = len(iter) model.eval() test_loss, correct = 0, 0 flag = 0 with flow.no_grad(): for x, y in iter: if x.shape[0] != config.batch_size: flag = 1 n = config.batch_size - x.shape[0] x_comp = flow.zeros((n, x.shape[1])) y_comp = flow.zeros(y.shape[0]) x = flow.tensor(np.vstack((x.numpy(), x_comp.numpy()))) y = flow.tensor(np.hstack((y.numpy(), y_comp.numpy()))) x = x.reshape(1, x.shape[0], x.shape[1]) x = flow.tensor(x, dtype=flow.float32, device="cuda") y = flow.tensor(y, dtype=flow.int32, device="cuda") pred = model(x) test_loss += loss_fn(pred, y) if flag == 0: bool_value = np.argmax(pred.numpy(), 1) == y.numpy() else: bool_value = np.argmax(pred.numpy()[0:16], 1) == y.numpy()[0:16] correct += float(bool_value.sum()) test_loss /= num_batches print("test_loss", test_loss, "num_batches ", num_batches) correct /= size print( f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f}" ) return test_loss, 100 * correct
def __init__( self, embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False, ) -> None: super(MultiheadAttention, self).__init__() self.embed_dim = embed_dim self.kdim = kdim if kdim is not None else embed_dim self.vdim = vdim if vdim is not None else embed_dim self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim self.num_heads = num_heads self.dropout = dropout self.batch_first = batch_first self.head_dim = embed_dim // num_heads assert (self.head_dim * num_heads == self.embed_dim ), "embed_dim must be divisible by num_heads" if self._qkv_same_embed_dim is False: self.q_proj_weight = Parameter(flow.zeros((embed_dim, embed_dim))) self.k_proj_weight = Parameter(flow.zeros((embed_dim, self.kdim))) self.v_proj_weight = Parameter(flow.zeros((embed_dim, self.vdim))) self.register_parameter("in_proj_weight", None) else: self.in_proj_weight = Parameter( flow.zeros((3 * embed_dim, embed_dim))) self.register_parameter("q_proj_weight", None) self.register_parameter("k_proj_weight", None) self.register_parameter("v_proj_weight", None) if bias: self.in_proj_bias = Parameter(flow.zeros(3 * embed_dim)) else: self.register_parameter("in_proj_bias", None) self.out_proj = Linear(embed_dim, embed_dim, bias=bias) if add_bias_kv: self.bias_k = Parameter(flow.zeros((1, 1, embed_dim))) self.bias_v = Parameter(flow.zeros((1, 1, embed_dim))) else: self.bias_k = self.bias_v = None self.add_zero_attn = add_zero_attn self._reset_parameters()
def __init__(self, hidden_size, vocab_size, hidden_act=nn.GELU()): super().__init__() self.hidden_size = hidden_size self.transform = BertPredictionHeadTransform(hidden_size, hidden_act) # The output weights are the same as the input embeddings, but there is # an output-only bias for each token. self.decoder = nn.Linear(hidden_size, vocab_size, bias=False) self.output_bias = nn.Parameter(flow.zeros(vocab_size)) # Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings` self.decoder.bias = self.output_bias
def test_normal_out_tensor_device_type_error(test_case): with test_case.assertRaises(RuntimeError) as ctx: out = flow.zeros((3, 3), dtype=flow.float32, device="cuda") x = flow._C.normal( mean=0.0, std=1.0, size=(3, 3), dtype=flow.float32, out=out, device="cpu", ) test_case.assertTrue("does not match device type of out parameter" in str(ctx.exception))
def test_all_gather_1n2d(test_case): if flow.env.get_rank() == 0: np_arr = np.array([[2, 3], [4, 5]]) elif flow.env.get_rank() == 1: np_arr = np.array([[1, 2], [3, 4]]) input = flow.tensor(np_arr, device="cuda", dtype=flow.int32) tensor_list = [ flow.zeros(np_arr.shape, dtype=flow.int32) for _ in range(2) ] flow.comm.all_gather(tensor_list, input) test_case.assertTrue( np.allclose(tensor_list[0].numpy(), np.array([[2, 3], [4, 5]]))) test_case.assertTrue( np.allclose(tensor_list[1].numpy(), np.array([[1, 2], [3, 4]])))
def test_copy(test_case): x = flow.zeros(2, 3) y = flow.ones(2, 3) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y.numpy())) x = flow.zeros(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) y = flow.ones(4, 6, placement=flow.placement("cpu", [0]), sbp=flow.sbp.broadcast) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y.numpy())) x = flow.zeros(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) y = flow.ones(4, 6, placement=flow.placement("cuda", [0]), sbp=flow.sbp.broadcast) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y.numpy())) x = flow.zeros(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.split(0)) y = flow.ones(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y.numpy())) x = flow.zeros(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) y = flow.ones(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y.numpy())) x = flow.zeros(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) y = np.ones((4, 6), dtype=np.float32) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y))
def test_lazy_1d_to_2d_sbp(test_case): P_1d = flow.placement( device_type="cuda", device_ids={0: range(4)}, hierarchy=(4,) ) P_2d = flow.placement( device_type="cuda", device_ids={0: range(4)}, hierarchy=(2, 2) ) B = flow.sbp.broadcast class Test1dTo2dModule(flow.nn.Module): def forward(self, x): return x.to_global(placement=P_2d, sbp=[B, B]) class Test1dTo2dGraph(flow.nn.Graph): def __init__(self, model): super().__init__() self.model = model def build(self, x): return self.model(x) class Test2dTo1dModule(flow.nn.Module): def forward(self, x): return x.to_global(placement=P_1d, sbp=[B]) class Test2dTo1dGraph(flow.nn.Graph): def __init__(self, model): super().__init__() self.model = model def build(self, x): return self.model(x) model_1d_to_2d = Test1dTo2dModule() graph_1d_to_2d = Test1dTo2dGraph(model_1d_to_2d) x = flow.zeros(4, 4, 4, 4, sbp=[B, B], placement=P_2d) x = x.to_global(placement=P_1d, sbp=[B]) test_case.assertTrue(x.sbp == (B,)) test_case.assertTrue(x.placement == P_1d) y = graph_1d_to_2d(x) test_case.assertTrue(y.sbp == (B, B)) test_case.assertTrue(y.placement == P_2d) model_2d_to_1d = Test2dTo1dModule() graph_2d_to_1d = Test2dTo1dGraph(model_2d_to_1d) z = graph_2d_to_1d(y) test_case.assertTrue(z.sbp == x.sbp) test_case.assertTrue(z.placement == x.placement)