def forward(self, logits: np.ndarray, labels: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: pred = npx.log_softmax(logits, axis=-1) # (batch, len) neg_log_likelihood = - npx.pick(pred, # pylint: disable=invalid-unary-operand-type labels, axis=-1, keepdims=False) # label smoothing as in # https://github.com/dmlc/gluon-nlp/blob/b714eaccc67619d7bdcbd1574d30be87d9c73f0c/src/gluonnlp/loss.py#L4 if self._alpha > 0: all_scores = np.sum(pred, axis=-1) neg_log_likelihood = (1 - self._alpha) * neg_log_likelihood - self._alpha / self._num_labels * all_scores # (batch, len,) valid_mask = labels != self.ignore_label # (batch, len) loss = neg_log_likelihood * valid_mask # (1,) num_valid = np.sum(valid_mask) # (1,) ce = np.sum(loss) * self.weight # we need to divide by num_valid here to backpropagate a 'valid' normalized loss value like in SoftmaxOutput. return ce / num_valid, np.ones((1,))
def forward(self, x): square_of_sum = np.sum(self.embedding(x), axis=1)**2 sum_of_square = np.sum(self.embedding(x)**2, axis=1) x = self.linear_layer(self.fc(x).sum(1)) \ + 0.5 * (square_of_sum - sum_of_square).sum(1, keepdims=True) x = npx.sigmoid(x) return x
def optimize_quantization_mse(tensor, rounds=10): """ Minimize mean squared error of quantizing a tensor, returning the top value (i.e. the one that quantizes to 127). Scaling = 127.0 / return value. This is a convex optimization problem. EM works but makes slow steps. Instead of EM, use binary search in the direction minimization suggests. """ best_mse = math.inf best_top = None maxabs = npx.intgemm_maxabsolute(tensor) low = 0.0 high = maxabs for _ in range(rounds): value = (low + high) / 2.0 quant = npx.intgemm_prepare_data(tensor, value) quant_float = quant.astype(C.DTYPE_FP32) mse = (quant_float * (value / 127.0) - tensor).norm().item() / math.sqrt( float(tensor.size)) if mse < best_mse: best_mse = mse best_top = value # This optimizes scaling subject to cluster assignment. # It can be used for EM but the step is really slow, so use it for direction. scale = np.sum(quant_float * quant_float) / np.sum( quant_float * tensor) top = 127.0 / scale.item() if top < value: high = value else: low = value return best_top
def forward(self, x): embed_x = self.embedding(x) square_of_sum = np.sum(embed_x, axis=1)**2 sum_of_square = np.sum(embed_x**2, axis=1) inputs = np.reshape(embed_x, (-1, self.embed_output_dim)) x = self.linear_layer(self.fc(x).sum(1)) \ + 0.5 * (square_of_sum - sum_of_square).sum(1, keepdims=True) \ + self.mlp(inputs) x = npx.sigmoid(x) return x
def evaluator(network, inter_matrix, test_data, ctx): scores = [] for values in inter_matrix: feat = gluon.utils.split_and_load(values, ctx, even_split=False) scores.extend([network(i).asnumpy() for i in feat]) recons = np.array([item for sublist in scores for item in sublist]) # Calculate the test RMSE. rmse = np.sqrt( np.sum(np.square(test_data - np.sign(test_data) * recons)) / np.sum(np.sign(test_data))) return float(rmse)
def test_fully_connected(): a = np.ones(shape=(LARGE_X, SMALL_Y)) b = np.ones(shape=(SMALL_Y, SMALL_Y)) c = np.ones(shape=(b.shape[0], )) # w/o bias res = mx.npx.fully_connected(a, b, num_hidden=b.shape[0], no_bias=True) assert np.sum(res[-1] == a.shape[1]) == b.shape[0] # w/ bias res = mx.npx.fully_connected(a, b, c, num_hidden=b.shape[0], no_bias=False) assert np.sum(res[-1] == a.shape[1] + 1) == b.shape[0]
def forward(self, length_predictions, labels): """ Returns Poisson loss and output given data and expected integers as labels. :param length_predictions: Length predictions. Shape: (batch_size,). :param labels: Targets. Shape: (batch_size,). :return: Poisson loss of length predictions of the batch, and number of samples (batch size). """ # (batch_size,) loss = length_predictions - labels * np.log(np.maximum(1e-10, length_predictions)) # (1,) loss = np.sum(loss * self.weight) num_samples = np.sum(np.ones_like(length_predictions)) return loss, num_samples
def forward(self, length_predictions, labels): """ Returns MSE loss. :param length_predictions: Length predictions. Shape: (batch_size,). :param labels: Targets. Shape: (batch_size,). :return: MSE loss of length predictions of the batch. """ # (batch_size,) loss = (self.weight / 2) * np.square(length_predictions - labels) # (1,) loss = np.sum(loss) num_samples = np.sum(np.ones_like(length_predictions)) return loss, num_samples
def forward(self, logits, labels, length_ratio, source_length, target_length): """ :param logits: Model logits. Shape: (batch, length, vocab_size). :param labels: Gold targets. Shape: (batch, length). :param length_ratio: Length Ratios. Shape: (batch,). :param source_length: Source lengths. Shape: (batch,). :param target_length: Target lengths. Shape: (batch,). :return: Sequence scores. Shape: (batch,). """ logprobs = npx.log_softmax(logits, axis=-1, temperature=self.softmax_temperature) # Select the label probability, then take their logs. # probs and scores: (batch_size, target_seq_len) token_scores = npx.pick(logprobs, labels, axis=-1) if self.score_type == C.SCORING_TYPE_NEGLOGPROB: token_scores = token_scores * -1 # Sum, then apply length penalty. The call to `np.where` masks out invalid values from scores. # zeros and sums: (batch_size,) scores = np.sum(np.where(labels != 0, token_scores, np.zeros_like(token_scores)), axis=1) if self.constant_length_ratio is not None and self.constant_length_ratio > 0.0: predicted_output_length = source_length * self.constant_length_ratio else: predicted_output_length = source_length * length_ratio scores = self.scorer(scores, target_length, predicted_output_length) return scores
def forward(self, scores, target_dists, finished, best_hyp_indices): """ Choose an extension of each hypothesis from its softmax distribution. :param scores: Vocabulary scores for the next beam step. (batch_size * beam_size, target_vocabulary_size) :param target_dists: The non-cumulative target distributions (ignored). :param finished: The list of finished hypotheses. :param best_hyp_indices: Best hypothesis indices constant. :return: The row indices, column indices, and values of the sampled words. """ # Map the negative logprobs to probabilities so as to have a distribution target_dists = np.exp(-target_dists) # n == 0 means sample from the full vocabulary. Otherwise, we sample from the top n. if self.n != 0: # select the top n in each row, via a mask masked_items = npx.topk(target_dists, k=self.n, ret_typ='mask', axis=1, is_ascend=False) # set unmasked items to 0 masked_items = np.where(masked_items, target_dists, masked_items) # renormalize target_dists = masked_items / np.sum(masked_items, axis=1, keepdims=True) # Sample from the target distributions over words, then get the corresponding values from the cumulative scores best_word_indices = npx.random.categorical(target_dists, get_prob=False) # Zeroes for finished hypotheses. best_word_indices = np.where(finished, np.zeros_like(best_word_indices), best_word_indices) values = npx.pick(scores, best_word_indices, axis=1, keepdims=True) best_hyp_indices = npx.slice_like(best_hyp_indices, best_word_indices, axes=(0,)) return best_hyp_indices, best_word_indices, values
def get_rmse_log(net, X_train, y_train): """Gets root mse between the logarithms of the prediction and the truth.""" num_train = X_train.shape[0] clipped_preds = np.clip(net(X_train), 1, float('inf')) return np.sqrt( 2 * np.sum(square_loss(np.log(clipped_preds), np.log(y_train))).item() / num_train)
def products(A): x = np.arange(4) y = np.ones(4) print("x . y : {}, {}".format(np.dot(x, y), np.sum(x * y))) print("A . x : {} has shape {}".format(np.dot(A, x), np.dot(A, x).shape)) B = np.ones(shape=(4, 3)) print("A . B : {} has shape {}".format(np.dot(A, B), np.dot(A, B).shape)) print("{}.{} has shape {}".format(A.shape, B.shape, np.dot(A, B).shape))
def test_np_sum(): class TestSum(HybridBlock): def __init__(self, axis=None, dtype=None, keepdims=False): super(TestSum, self).__init__() self._axis = axis self._dtype = dtype self._keepdims = keepdims def hybrid_forward(self, F, a, *args, **kwargs): return F.np.sum(a, axis=self._axis, dtype=self._dtype, keepdims=self._keepdims) def is_int(dtype): return 'int' in dtype in_data_dim = random.choice([2, 3, 4]) shape = rand_shape_nd(in_data_dim, dim=3) acc_type = {'float16': 'float32', 'float32': 'float64', 'float64': 'float64', 'int8': 'int32', 'int32': 'int64', 'int64': 'int64'} for hybridize in [False, True]: for keepdims in [True, False]: for axis in ([i for i in range(in_data_dim)] + [(), None]): for itype in ['float16', 'float32', 'float64', 'int8', 'int32', 'int64']: for dtype in ['float16', 'float32', 'float64', 'int8', 'int32', 'int64']: if is_int(dtype) and not is_int(itype): continue # test gluon test_sum = TestSum(axis=axis, dtype=dtype, keepdims=keepdims) if hybridize: test_sum.hybridize() if is_int(itype): x = _np.random.randint(-128, 128, shape, dtype=itype) x = mx.nd.array(x) else: x = mx.nd.random.uniform(-1.0, 1.0, shape=shape, dtype=itype) x = x.as_np_ndarray() x.attach_grad() expected_ret = _np.sum(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims) expected_ret = expected_ret.astype(dtype) with mx.autograd.record(): y = test_sum(x) assert y.shape == expected_ret.shape assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if dtype == 'float16' else 1e-3, atol=1e-5 if dtype == 'float16' else 1e-5) y.backward() assert same(x.grad.asnumpy(), _np.ones(shape=x.shape, dtype=x.dtype)) # test numeric if itype == 'float32' and dtype == 'float32': x_sym = mx.sym.Variable("x").as_np_ndarray() mx_sym = mx.sym.np.sum(x_sym, axis=axis, dtype=dtype, keepdims=keepdims).as_nd_ndarray() check_numeric_gradient(mx_sym, [x.as_nd_ndarray()], numeric_eps=1e-3, rtol=1e-3, atol=1e-4, dtype=_np.float32) # test imperative mx_out = np.sum(x, axis=axis, dtype=dtype, keepdims=keepdims) np_out = _np.sum(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims).astype(dtype) assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
def forward(self, user_id, item_id): p_mf = self.P(user_id) q_mf = self.Q(item_id) gmf = p_mf * q_mf p_mlp = self.U(user_id) q_mlp = self.V(item_id) mlp = self.mlp(np.concatenate([p_mlp, q_mlp], axis=1)) # 1024*20 con_res = np.concatenate([gmf, mlp], axis=1) return np.sum(con_res, axis=-1) # 1024*1
def test_sum(): inp = np.zeros((2, INT_OVERFLOW)) inp[-1, -1] = 10 inp.attach_grad() with mx.autograd.record(): out1 = np.sum(inp, axis=1) out1.backward() assert out1.shape == (2, ) assert out1[0] == 0 and out1[1] == 10 assert inp.grad.shape == inp.shape assert inp.grad[-1, -1] == 1 with mx.autograd.record(): out2 = np.sum(inp, axis=0) out2.backward() assert out2.shape == (INT_OVERFLOW, ) assert out2[0] == 0 and out2[-1] == 10 assert inp.grad.shape == inp.shape assert inp.grad[-1, -1] == 1
def test_samplek_func(batch_size, beam_size, target_vocab_size, top_n): pytest.importorskip("mxnet") from mxnet import np import sockeye.beam_search # arrange scores increasing values from left to right, so the best item is always index 0, next-best 1, and so on scores = np.array([ list(range(1, target_vocab_size + 1)) for _ in range(batch_size * beam_size) ]) # normalize target_dists = scores / scores.sum(axis=1, keepdims=True) samplek = sockeye.beam_search.SampleK(n=top_n) samplek.initialize() sample_best_hyp_indices = np.arange(0, batch_size * beam_size, dtype='int32') # 0..(batch_size * beam_size)-1 expected_hyps = np.array(range(batch_size * beam_size), dtype='int32') finished = (np.random.uniform(0, 1, (batch_size * beam_size)) > 0.5).astype('int32') for i in [1, 2]: if i == 2: samplek.hybridize() hyps, words, values = samplek(scores, scores, finished, sample_best_hyp_indices) assert hyps.shape[0] == batch_size * beam_size # The indices should always be the integers from 0 to batch*beam-1 assert sum(hyps == expected_hyps).item() == (batch_size * beam_size) if top_n != 0: # Scores are increasing left-to-right, so best items are all the lowest word IDs. # No word id greater than the cap (top_n) should be selected assert np.sum(words >= top_n).item() == 0 # word index should be zero for all finished hypotheses assert np.sum(np.where(finished, words, finished)).item() == 0
def test_np_loss_ndarray(): # Ported from test_loss.test_loss_ndarray output = np.array([1, 2, 3, 4]) label = np.array([1, 3, 5, 7]) weighting = np.array([0.5, 1, 0.5, 1]) loss = gluon.loss.L1Loss() assert float(np.sum(loss(output, label))) == 6. loss = gluon.loss.L1Loss(weight=0.5) assert float(np.sum(loss(output, label))) == 3. loss = gluon.loss.L1Loss() assert float(np.sum(loss(output, label, weighting))) == 5. loss = gluon.loss.L2Loss() assert float(np.sum(loss(output, label))) == 7. loss = gluon.loss.L2Loss(weight=0.25) assert float(np.sum(loss(output, label))) == 1.75 loss = gluon.loss.L2Loss() assert float(np.sum(loss(output, label, weighting))) == 6 output = np.array([[0, 2], [1, 4]]) label = np.array([0, 1]) weighting = np.array([[0.5], [1.0]]) loss = gluon.loss.SoftmaxCrossEntropyLoss() L = loss(output, label).asnumpy() assert_almost_equal(L, _np.array([2.12692809, 0.04858733]), use_broadcast=False, rtol=1e-3) L = loss(output, label, weighting).asnumpy() assert_almost_equal(L, _np.array([1.06346405, 0.04858733]), use_broadcast=False, rtol=1e-3)
def pixel_accuracy(output, y): ''' binary class prediction accuracy. output is dim(B, 1, W, H, {D}) target is dim(B, W, H, {D}) ''' true_pos = np.sum(y) if output.shape[1] == 1: classes = (output > 0.5).astype('float32') if output.shape[1] == 2: classes = np.argmax(output, axis=1) acc = (classes.astype('bool') * y.astype('bool')).sum() # print('Acc:',acc) if true_pos > 0: pix_acc = acc / y.sum().astype('float32') if true_pos == 0 and acc == 0: pix_acc = 1.0 if true_pos == 0 and acc != 0: pix_acc = 0.0 return pix_acc
def forward(self, source_encoded: np.ndarray, source_encoded_length: np.ndarray) -> np.ndarray: """ Transformation to the length ratio. Returns a vector. :param source_encoded: Encoder representation for n elements. Shape: (n, source_encoded_length, hidden_size). :param source_encoded_length: A vector of encoded sequence lengths. Shape: (n,). :return: Predictions of the ratio length(hypothesis)/length(reference). Shape(n, 1). """ # source_masked: (n, source_encoded_length, hidden_size) source_masked = npx.sequence_mask( source_encoded, axis=1, sequence_length=source_encoded_length, use_sequence_length=True, value=0.) # calculate the proper means of encoded sources # data: (n, hidden_size) data = np.sum(source_masked, axis=1, keepdims=False) / np.reshape( source_encoded_length, (-1, 1)) # MLP. Shape: (n, 1) data = self.layers(data) # Shape: (n,) return np.squeeze(data)
def inner_prod(self, prob, label): prod = prob * label prod = FF.sum(prod, axis=self.axis, keepdims=True) return prod
def test_get_training_data_iters(): pytest.importorskip('mxnet') from sockeye import data_io from mxnet import np from sockeye.test_utils import tmp_digits_dataset train_line_count = 100 train_line_count_empty = 0 train_max_length = 30 dev_line_count = 20 dev_max_length = 30 expected_mean = 1.0 expected_std = 0.0 test_line_count = 20 test_line_count_empty = 0 test_max_length = 30 batch_size = 5 num_source_factors = num_target_factors = 1 with tmp_digits_dataset("tmp_corpus", train_line_count, train_line_count_empty, train_max_length - C.SPACE_FOR_XOS, dev_line_count, dev_max_length - C.SPACE_FOR_XOS, test_line_count, test_line_count_empty, test_max_length - C.SPACE_FOR_XOS) as data: # tmp common vocab vcb = vocab.build_from_paths([data['train_source'], data['train_target']]) train_iter, val_iter, config_data, data_info = data_io.get_training_data_iters( sources=[data['train_source']], targets=[data['train_target']], validation_sources=[data['dev_source']], validation_targets=[data['dev_target']], source_vocabs=[vcb], target_vocabs=[vcb], source_vocab_paths=[None], target_vocab_paths=[None], shared_vocab=True, batch_size=batch_size, batch_type=C.BATCH_TYPE_SENTENCE, batch_num_devices=1, max_seq_len_source=train_max_length, max_seq_len_target=train_max_length, bucketing=True, bucket_width=10) assert isinstance(train_iter, data_io.ParallelSampleIter) assert isinstance(val_iter, data_io.ParallelSampleIter) assert isinstance(config_data, data_io.DataConfig) assert data_info.sources == [data['train_source']] assert data_info.targets == [data['train_target']] assert data_info.source_vocabs == [None] assert data_info.target_vocabs == [None] assert config_data.data_statistics.max_observed_len_source == train_max_length assert config_data.data_statistics.max_observed_len_target == train_max_length assert np.isclose(config_data.data_statistics.length_ratio_mean, expected_mean) assert np.isclose(config_data.data_statistics.length_ratio_std, expected_std) assert train_iter.batch_size == batch_size assert val_iter.batch_size == batch_size assert train_iter.default_bucket_key == (train_max_length, train_max_length) assert val_iter.default_bucket_key == (dev_max_length, dev_max_length) assert train_iter.dtype == 'float32' # test some batches bos_id = vcb[C.BOS_SYMBOL] eos_id = vcb[C.EOS_SYMBOL] expected_first_target_symbols = np.full((batch_size, 1), bos_id, dtype='float32') for epoch in range(2): while train_iter.iter_next(): batch = train_iter.next() assert isinstance(batch, data_io.Batch) source = batch.source target = batch.target label = batch.labels[C.TARGET_LABEL_NAME] # TODO: still 2-shape: (batch, length) length_ratio_label = batch.labels[C.LENRATIO_LABEL_NAME] assert source.shape[0] == target.shape[0] == label.shape[0] == batch_size assert source.shape[2] == target.shape[2] == num_source_factors == num_target_factors # target first symbol should be BOS # each source sequence contains one EOS symbol assert np.sum(source == eos_id) == batch_size assert np.array_equal(target[:, 0], expected_first_target_symbols) # label first symbol should be 2nd target symbol assert np.array_equal(label[:, 0], target[:, 1, 0]) # each label sequence contains one EOS symbol assert np.sum(label == eos_id) == batch_size train_iter.reset()
A / sum_A # we can call the cumsum function # this function will not reduce the input tensor along any axis. A.cumsum(axis=0) ############### 2.3.7. Dor Products ############### y = np.ones(4) x y np.dot(x, y) # we can express the dot product of two vectors equivalently by performing an elementwise multiplication and then a sum: np.sum(x * y) ############### 2.3.8. Matrix-Vector Products ############### # we can begin to understand matrix-vector products A.shape, x.shape, np.dot(A, x) ############### 2.3.9. Matrix-Matrix Multiplication ############### # if you have gotten the hang of dot products and matrix-vector products, then matrix-matrix multiplication should be straightforward. B = np.ones(shape=(4, 3)) np.dot(A, B)
def forward(self, positive, negative, margin=1): distances = positive - negative loss = np.sum(np.maximum(- distances + margin, 0)) return loss
def inner_prod(self, prob, label): prod = prob * label prod = FF.sum(prod, axis=self.axis) return prod
def dynamic_masking(self, input_ids, valid_lengths): # TODO(zheyuye), two additional flag `disallow_from_mask` and `already_masked` # that control the masking status for each positions in the sequence. """ Generate masking positions on-the-fly instead of during preprocessing Parameters ---------- input_ids The batchified input_ids with shape (batch_size, max_seq_length) valid_lengths The batchified valid_lengths with shape (batch_size, ) Returns ------ masked_input_ids The masked input sequence with 15% tokens are masked with [MASK] shape (batch_size, max_seq_length) length_masks The masking matrix for the whole sequence that indicates the positions are greater than valid_length. shape (batch_size, max_seq_length) unmasked_tokens The original tokens that appear in the unmasked input sequence shape (batch_size, num_masked_positions) masked_positions The masking positions in mx.np.ndarray with shape (batch_size, num_masked_positions) shape (batch_size, num_masked_positions) masked_lm_weights The weight matrix containing 0 or 1 to mark the actual effect of masked positions shape (batch_size, num_masked_positions) """ N = self._max_num_masked_position # Only valid token without special token are allowed to mask valid_candidates = np.ones_like(input_ids, dtype=np.bool) ignore_tokens = [ self.vocab.cls_id, self.vocab.sep_id, self.vocab.pad_id ] for ignore_token in ignore_tokens: # TODO(zheyuye), Update when operation += supported valid_candidates = valid_candidates * \ np.not_equal(input_ids, ignore_token) valid_lengths = valid_lengths.astype(np.float32) valid_candidates = valid_candidates.astype(np.float32) num_masked_position = mxnp.maximum( 1, np.minimum(N, round(valid_lengths * self._mask_prob))) # Get the masking probability of each position sample_probs = self._proposal_distribution * valid_candidates sample_probs /= mxnp.sum(sample_probs, axis=-1, keepdims=True) sample_probs = npx.stop_gradient(sample_probs) gumbels = mxnp.random.gumbel(np.zeros_like(sample_probs)) # Following the instruction of official repo to avoid deduplicate postions # with Top_k Sampling as https://github.com/google-research/electra/issues/41 masked_positions = npx.topk(mxnp.log(sample_probs) + gumbels, k=N, axis=-1, ret_typ='indices', dtype=np.int32) masked_weights = npx.sequence_mask(mxnp.ones_like(masked_positions), sequence_length=num_masked_position, use_sequence_length=True, axis=1, value=0) masked_positions = masked_positions * masked_weights length_masks = npx.sequence_mask(mxnp.ones_like(input_ids, dtype=np.float32), sequence_length=valid_lengths, use_sequence_length=True, axis=1, value=0) unmasked_tokens = select_vectors_by_position( input_ids, masked_positions) * masked_weights masked_weights = masked_weights.astype(np.float32) replaced_positions = (mxnp.random.uniform( mxnp.zeros_like(masked_positions), mxnp.ones_like( masked_positions)) < self._replace_prob) * masked_positions # dealing with multiple zero values in replaced_positions which causes # the [CLS] being replaced filled = mxnp.where(replaced_positions, self.vocab.mask_id, self.vocab.cls_id).astype(np.int32) # Masking token by replacing with [MASK] masked_input_ids = update_vectors_by_position(input_ids, filled, replaced_positions) # Note: It is likely have multiple zero values in masked_positions if number of masked of # positions not reached the maximum. However, this example hardly exists since valid_length # is almost always equal to max_seq_length masked_input = self.MaskedInput(input_ids=masked_input_ids, masks=length_masks, unmasked_tokens=unmasked_tokens, masked_positions=masked_positions, masked_weights=masked_weights) return masked_input
def forward(self, positive, negative): distances = positive - negative loss = - np.sum(np.log(npx.sigmoid(distances)), 0, keepdims=True) return loss
############### 2.1.2. Operations ############### # import from mxnet import np, npx npx.set_np() # 기본 계산 x = np.array([1, 2, 4, 8]) y = np.array([2, 2, 2, 2]) x + y # 더하기 x - y # 빼기 x * y # 곱하기 x / y # 나누기 x**y # 제곱 # Many more operations can be applied elementwise, including unary operators like exponentiation. np.exp(x) # concatenate multiple ndarrays together x = np.arange(12).reshape(3, 4) y = np.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) np.concatenate([x, y], axis=0) np.concatenate([x, y], axis=1) # logical statements 로 나타내기 x == y # 모든 요소 합하기 x.sum() np.sum(x)
train_acc_sum = sum( d2l.accuracy(py.asnumpy(), y.asnumpy()) for py, y in zip(pys, ys)) l, acc = train_loss_sum, train_acc_sum metric.add(l, acc, ys_in.shape[0], ys_in.size) timer.stop() if (i + 1) % (num_batches // 5) == 0: animator.add( epoch + i / num_batches, (metric[0] / metric[2], metric[1] / metric[3], None, None)) # val_acc = d2l.evaluate_accuracy_gpus(net, val_iter, split_f) metric_val = d2l.Accumulator(2) # num_corrected_examples, num_examples for i, (Xs_in, ys_in) in enumerate(DataLoader_Single_test): Xs = gluon.utils.split_and_load(Xs_in.astype("float32"), ctx) ys = gluon.utils.split_and_load(ys_in.astype("float32"), ctx) pys = [net(X) for X in Xs] ls = [loss(py, y) for py, y in zip(pys, ys)] val_loss_sum = sum([float(l.sum().asnumpy()[0]) for l in ls]) OA_val = np.sum( np.argmax(pys[0].asnumpy(), axis=1) == ys[0].asnumpy()).astype( "float32") / np.prod(ys[0].shape) metric_val.add(OA_val, len(ys)) val_acc = OA_val animator.add(epoch + 1, (None, None, val_loss_sum / ys_in.shape[0], val_acc)) print('loss %.3f, train acc %.3f, val acc %.3f' % (metric[0] / metric[2], metric[1] / metric[3], val_acc)) print('%.1f examples/sec on %s' % (metric[2] * num_epochs / timer.sum(), d2l.try_all_gpus()))
def loss(y_hat, y): m = y.shape[0] p = softmax(y_hat) return np.sum(-np.log(p[range(m), y]))
def softmax(y_hat): exps = np.exp(y_hat - np.max(y_hat, axis=1, keepdims=True)) return exps / np.sum(exps, axis=1, keepdims=True)