def trunc_gumbel(logits, truncation): """Sample from the TruncGumbel distribution. The cumulative density function (CDF) of the Truncated Gumbel distribution is defined as TruncGumbel(\alpha, truncation) \prop max(Gumbel(\alpha), truncation) To sample from the distribution, we can use the CDF inversion technique. References: 1. [NIPS2014] A* Sampling, https://papers.nips.cc/paper/5449-a-sampling.pdf 2. https://cmaddis.github.io/gumbel-machinery Parameters ---------- logits The logits. Shape (...,) truncation The truncation. Shape (...,) Returns ------- samples Samples from the TruncGumbel(logits, truncation) Shape (...,) """ gumbels = np.random.gumbel(np.zeros_like(logits)) + logits return -np.log(np.exp(-gumbels) + np.exp(-truncation))
def SoftMax(X): x1 = np.exp(X[0]) x2 = np.exp(X[1]) x3 = np.exp(X[2]) denominator = x1 + x2 + x3 y1 = x1 / denominator y2 = x2 / denominator y3 = x3 / denominator return np.array([y1, y2, y3])
def forward(self, scores, target_dists, finished, best_hyp_indices): """ Choose an extension of each hypothesis from its softmax distribution. :param scores: Vocabulary scores for the next beam step. (batch_size * beam_size, target_vocabulary_size) :param target_dists: The non-cumulative target distributions (ignored). :param finished: The list of finished hypotheses. :param best_hyp_indices: Best hypothesis indices constant. :return: The row indices, column indices, and values of the sampled words. """ # Map the negative logprobs to probabilities so as to have a distribution target_dists = np.exp(-target_dists) # n == 0 means sample from the full vocabulary. Otherwise, we sample from the top n. if self.n != 0: # select the top n in each row, via a mask masked_items = npx.topk(target_dists, k=self.n, ret_typ='mask', axis=1, is_ascend=False) # set unmasked items to 0 masked_items = np.where(masked_items, target_dists, masked_items) # renormalize target_dists = masked_items / np.sum(masked_items, axis=1, keepdims=True) # Sample from the target distributions over words, then get the corresponding values from the cumulative scores best_word_indices = npx.random.categorical(target_dists, get_prob=False) # Zeroes for finished hypotheses. best_word_indices = np.where(finished, np.zeros_like(best_word_indices), best_word_indices) values = npx.pick(scores, best_word_indices, axis=1, keepdims=True) best_hyp_indices = npx.slice_like(best_hyp_indices, best_word_indices, axes=(0,)) return best_hyp_indices, best_word_indices, values
def offset_inverse(anchors, offset_preds): c_anc = d2l.box_corner_to_center(anchors) c_pred_bb_xy = (offset_preds[:, :2] * anchors[:, 2:]) / 10 + c_anc[:, :2] c_pred_bb_wh = np.exp(offset_preds[:, 2:] / 5) * c_anc[:, 2:] c_pred_bb = np.concatenate((c_pred_bb_xy, c_pred_bb_wh), axis=1) predicted_bb = d2l.box_center_to_corner(c_pred_bb) return predicted_bb
def compute_normal_distribution(samples, mean, variance): """ Compute the probability density of a normal distribution. https://en.wikipedia.org/wiki/Probability_density_function """ probability = 1 / math.sqrt(2 * math.pi * variance**2) return probability * np.exp((-0.5 / variance**2) * (samples - mean)**2)
def test_exp(): inp = np.ones((2, INT_OVERFLOW)) inp[-1, -1] = 2 inp.attach_grad() with mx.autograd.record(): out = np.exp(inp) out.backward() assert out.shape == inp.shape assert_almost_equal(out[0, 0], np.array(np.e**1), rtol=1e-5, atol=1e-5) assert_almost_equal(out[-1, -1], np.array(np.e**2), rtol=1e-5, atol=1e-5) assert inp.grad.shape == inp.shape assert_almost_equal(inp.grad[-1, -1], out[-1, -1], rtol=1e-5, atol=1e-5)
def n_dim_array_operations(): x = np.array([1, 2, 4, 8]) y = np.array([2, 2, 2, 2]) print(x + y, x - y, x * y, x / y, x**y) # The ** operator is exponentiation print("e^x of {} = {}".format(x, np.exp(x))) print("sin(x) of {} = {}".format(x, np.sin(x))) x = np.arange(12).reshape(3, 4) y = np.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) axis0 = np.concatenate([x, y], axis=0) print("concat axis 0 : {}, shape {}".format(axis0, axis0.shape)) axis1 = np.concatenate([x, y], axis=1) print("concat axis 1 : {}, shape {}".format(axis1, axis1.shape)) equal = x == y greater = x > y print("equal x = y: {} == {} = {}".format(x, y, equal)) print("greater x > y: {} > {} = {}".format(x, y, greater))
def normal(x, mu, sigma): #@save p = 1 / math.sqrt(2 * math.pi * sigma**2) return p * np.exp(-0.5 / sigma**2 * (x - mu)**2)
def normal(z, mu, sigma): p = 1 / math.sqrt(2 * math.pi * sigma ** 2) return p * np.exp(- 0.5 / sigma ** 2 * (z - mu) ** 2)
def forward(self, x): return -self._alpha * npx.relu(1.0 - np.exp(x)) + npx.relu(x)
def _init_sinusoidal_base(units): half_units = units // 2 val = np.log(10000) / (half_units - 1) val = np.exp(np.arange(half_units, dtype=np.float32) * -val) return val
def logreg(X, w, b): #@save """The logistic regression model.""" return 1 / (1 + np.exp(np.dot(X, w.T))) + b
def softmax(X): X_exp = np.exp(X) partition = X_exp.sum(axis=1, keepdims=True) return X_exp / partition
def softmax(y_hat): exps = np.exp(y_hat - np.max(y_hat, axis=1, keepdims=True)) return exps / np.sum(exps, axis=1, keepdims=True)
def softmax(X): X_exp = np.exp(X) X_exp_normalization = X_exp.sum( 1, keepdims=True ) # denominator or normalization constant or partition function return X_exp / X_exp_normalization
def softmax(X): #@save X_exp = np.exp(X) partition = X_exp.sum(1, keepdims=True) return X_exp / partition #The broadcasting mechanism is applied here
############### 2.1.2. Operations ############### # import from mxnet import np, npx npx.set_np() # 기본 계산 x = np.array([1, 2, 4, 8]) y = np.array([2, 2, 2, 2]) x + y # 더하기 x - y # 빼기 x * y # 곱하기 x / y # 나누기 x**y # 제곱 # Many more operations can be applied elementwise, including unary operators like exponentiation. np.exp(x) # concatenate multiple ndarrays together x = np.arange(12).reshape(3, 4) y = np.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) np.concatenate([x, y], axis=0) np.concatenate([x, y], axis=1) # logical statements 로 나타내기 x == y # 모든 요소 합하기 x.sum() np.sum(x)
def softmax(X): X_exp = np.exp(X) partition = X_exp.sum(axis=1, keepdims=True) return X_exp / partition # The broadcast mechanism is applied here