def softmax_loss(x, y): """ Computes the loss and gradient for softmax classification. Inputs: - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class for the ith input. - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 0 <= y[i] < C Returns a tuple of: - loss: Scalar giving the loss - dx: Gradient of the loss with respect to x """ #np.expand_dims(correct_class_scores, axis = 1) #probs = np.exp(x - np.max(x, axis=1, keepdims=True)) #print "x.shape", x.shape #Somehow Buggy. Max doesn't work. probs = np.exp(x - np.max(x, axis=1)) #probs /= np.expand_dims(np.sum(probs, axis=1), axis = 1) probs /= np.expand_dims(np.sum(probs, axis=1), axis=1) N = x.shape[0] loss = -np.sum(np.log(probs[np.arange(N), y])) / N dx = probs.copy() dx[np.arange(N), y] -= 1 dx /= N return loss, dx
def svm_loss(x, y): """ Computes the loss and gradient using for multiclass SVM classification. Inputs: - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class for the ith input. - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 0 <= y[i] < C Returns a tuple of: - loss: Scalar giving the loss - dx: Gradient of the loss with respect to x """ N = x.shape[0] correct_class_scores = x[np.arange(N), y] #TODO: Support broadcast case: (X,) (X, Y) #shape(x) is (d0, d1) #shape(correct_class_scores) is (d0,) #margins = np.maximum(0, x - correct_class_scores + 1.0) margins = np.transpose( np.maximum(0, np.transpose(x) - np.transpose(correct_class_scores) + 1.0)) loss = (np.sum(margins) - np.sum(margins[np.arange(N), y])) / N return loss
def svm_loss(x, y, mode): """ Computes the loss and gradient using for multiclass SVM classification. Inputs: - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class for the ith input. - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 0 <= y[i] < C Returns a tuple of: - loss: Scalar giving the loss - dx: Gradient of the loss with respect to x """ if mode == 'cpu': np.set_policy(policy.OnlyNumpyPolicy()) else: np.set_policy(policy.PreferMXNetPolicy()) N = x.shape[0] correct_class_scores = x[np.arange(N), y] #margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0) margins = np.maximum(0, x - np.expand_dims(correct_class_scores, axis = 1) + 1.0) margins[np.arange(N), y] = 0 loss = np.sum(margins) / N num_pos = np.sum(margins > 0, axis=1) dx = np.zeros_like(x) dx[margins > 0] = 1 dx[np.arange(N), y] -= num_pos dx /= N return loss, dx
def svm_loss(x, y): """ Computes the loss and gradient using for multiclass SVM classification. Inputs: - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class for the ith input. - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 0 <= y[i] < C Returns a tuple of: - loss: Scalar giving the loss - dx: Gradient of the loss with respect to x """ N = x.shape[0] correct_class_scores = x[np.arange(N), y] #TODO: Support broadcast case: (X,) (X, Y) #shape(x) is (d0, d1) #shape(correct_class_scores) is (d0,) #margins = np.maximum(0, x - correct_class_scores + 1.0) margins = np.transpose(np.maximum(0, np.transpose(x) - np.transpose(correct_class_scores) + 1.0)) loss = (np.sum(margins) - np.sum(margins[np.arange(N), y])) / N return loss
def softmax_loss(x, y): """ Computes the loss and gradient for softmax classification. Inputs: - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class for the ith input. - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 0 <= y[i] < C Returns a tuple of: - loss: Scalar giving the loss - dx: Gradient of the loss with respect to x """ #np.expand_dims(correct_class_scores, axis = 1) #probs = np.exp(x - np.max(x, axis=1, keepdims=True)) #print "x.shape", x.shape #Somehow Buggy. Max doesn't work. probs = np.exp(x - np.max(x, axis=1)) #probs /= np.expand_dims(np.sum(probs, axis=1), axis = 1) probs /= np.expand_dims(np.sum(probs, axis=1), axis = 1) N = x.shape[0] loss = -np.sum(np.log(probs[np.arange(N), y])) / N dx = probs.copy() dx[np.arange(N), y] -= 1 dx /= N return loss, dx
def _softmax_loss(self, X, y, *args): N = X.shape[0] scores = self._forward(X, *args) scores = np.exp(scores - np.max(scores, axis=1, keepdims=True)) prob = scores / np.sum(scores, axis=1, keepdims=True) loss = np.sum(-np.log(prob[np.arange(N), y])) / float(N) return loss
def test_lr_grad(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 wshape = (500, 250) weights = random.rand(*wshape) - 0.5 xshape = (256, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 gradient_checker.quick_grad_check(training_loss, inputs)
def _select_polygon_points(self, num): x = self.np_random.randint(self.screen_width / 2 - 6, self.screen_width / 2 + 20, 1) y = self.np_random.randint(self.screen_width / 2 - 3, self.screen_width / 2 + 5, 1) start = (x[0], y[0]) base = 180 / config['rotate_degree'] base_angle = math.pi / base angle_options = (np.arange(base - 2) + 1) * base_angle angle = 0 rotate_degree = self.np_random.choice(angle_options, 3) size = np.array(np.random.choice(range(3, 5), 3)) * 5 #size = [3,3,4] points = [] points.append(start) for i in range(num - 1): angle += rotate_degree[i] next_x = int(start[0] + size[i] * math.cos(angle)) next_y = int(start[1] + size[i] * math.sin(angle)) next_point = (next_x, next_y) points.append(next_point) start = next_point dist = points[0][0] - points[-1][0] if dist < 0: next_y = math.tan(angle_options[0]) * dist else: next_y = math.tan(-angle_options[0]) * dist next_point = (points[0][0], int(points[-1][1] + next_y)) points.append(next_point) return points
def test_lr_grad(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 wshape = (500, 250) weights = random.rand(*wshape) - 0.5 xshape = (256, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 gradient_checker.quick_grad_check(training_loss, inputs)
def test_context(): set_context(gpu(1)) # set the global context as gpu(1) def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 with gpu(0): xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 print("\nff and bp on {0}".format(weights.context)) print("\nexecute on cpu") with cpu(): x_cpu = random.rand(32, 64) - 0.5 y_cpu = random.rand(64, 32) - 0.5 z_cpu = np.dot(x_cpu, y_cpu) print('z_cpu.context = {0}'.format(z_cpu.context)) print("\nexecute on gpu(0)") with gpu(0): x_gpu0 = random.rand(32, 64) - 0.5 y_gpu0 = random.rand(64, 32) - 0.5 z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() print('z_gpu0.context = {0}'.format(z_gpu0.context)) print("\n[use global context] execute on gpu(1)") x_gpu1 = random.rand(32, 64) - 0.5 y_gpu1 = random.rand(64, 32) - 0.5 z_gpu1 = np.dot(x_gpu1, y_gpu1) z_gpu1.asnumpy() print('z_gpu1.context = {0}'.format(z_gpu1.context))
def test_index_slice(): a = np.arange(6) a = np.reshape(a, (3, 2)) print(a[:]) a[1:2] = -1 print(a) d = np.slice_axis(a, axis=1, begin=1, end=2) print(d)
def softmax(x, y): import numpy as np y = y.astype(int) probs = np.exp(x - np.max(x, axis=1, keepdims=True)) probs /= np.sum(probs, axis=1, keepdims=True) N = x.shape[0] loss = -np.sum(np.log(probs[np.arange(N), y])) / N return loss
def softmax_crossentropy(x, y): # x should be (batch, prob) # y should be (batch, ) x_dev = x - np.max(x, axis=1, keepdims=True) # minpy doesn't support x.max() sm = x_dev - np.log(np.sum(np.exp(x_dev), axis=1, keepdims=True)) ids = np.arange(0, y.shape[0])*x.shape[1] + y ce = -np.sum(sm.reshape((sm.shape[0]*sm.shape[1],))[ids])/(1.0*y.shape[0]) # minpy doesn't support -1 in shape inference return ce
def grad(g): import numpy as np y = label.astype(int) probs = np.exp(x - np.max(x, axis=1, keepdims=True)) probs /= np.sum(probs, axis=1, keepdims=True) N = x.shape[0] dx = probs.copy() dx[np.arange(N), y] -= 1 dx /= N return dx
def attack_mnist(model, alpha=0.2, beta=0.001, isTarget=False, num_attacks=10): imgs = np.array( nnp.load('data/mnist_data.npy')[60000:].transpose( (0, 2, 3, 1)).astype(np.float32) / 255.) labs = nnp.load('data/mnist_labels.npy')[60000:] nb_labs = nnp.max(labs) print( "\n\n Running {} attack on {} random MNIST test images for alpha= {} beta= {}\n\n" .format("targetted" if isTarget else "untargetted", num_attacks, alpha, beta)) total_distortion = [] samples = [] for i in range(nb_labs + 1): samples.append( np.random.permutation(np.arange(len(labs))[labs == i])[0]) # samples = [6312, 6891, 4243, 8377, 7962, 6635, 4970, 7809, 5867, 9559, 3579, 8269, 2282, 4618, 2290, 1554, 4105, 9862, 2408, 5082, 1619, 1209, 5410, 7736, 9172, 1650, 5181, 3351, 9053, 7816, 7254, 8542, 4268, 1021, 8990, 231, 1529, 6535, 19, 8087, 5459, 3997, 5329, 1032, 3131, 9299, 3910, 2335, 8897, 7340, 1495, 5244,8323, 8017, 1787, 4939, 9032, 4770, 2045, 8970, 5452, 8853, 3330, 9883, 8966, 9628, 4713, 7291, 9770, 6307, 5195, 9432, 3967, 4757, 3013, 3103, 3060, 541, 4261, 7808, 1132, 1472, 2134, 634, 1315, 8858, 6411, 8595, 4516, 8550, 3859, 3526] #true_labels = [3, 1, 6, 6, 9, 2, 7, 5, 5, 3, 3, 4, 5, 6, 7, 9, 1, 6, 3, 4, 0, 6, 5, 9, 7, 0, 3, 1, 6, 6, 9, 6, 4, 7, 6, 3, 4, 3, 4, 3, 0, 7, 3, 5, 3, 9, 3, 1, 9, 1, 3, 0, 2, 9, 9, 2, 2, 3, 3, 3, 0, 5, 2, 5, 2, 7, 2, 2, 5, 7, 4, 9, 9, 0, 0, 7, 9, 4, 5, 5, 2, 3, 5, 9, 3, 0, 9, 0, 1, 2, 9, 9] for idx in samples: #idx = random.randint(100, len(test_dataset)-1) image, label = imgs[idx], labs[idx] print("\n\n\n\n======== Image %d =========" % idx) print("Original label: ", label) lab = predict(model, image) print("Predicted label: ", lab) if lab != label: print( 'CHANGE IMAGES#{}: prediction of original image is not the same with true label' .format(i)) continue #target = None if not isTarget else random.choice(list(range(label)) + list(range(label+1, 10))) advs = [image] for i in range(nb_labs): target = (label + i) % (nb_labs + 1) adv = attack_targeted(model, imgs[labs == target], image, label, target, alpha=alpha, beta=beta, iterations=1000) print(i, "Predicted label for adversarial example: ", predict(model, adversarial)) advs.append(np.clip(adv, 0, 1)) total_distortion.append( np.linalg.norm(adv.reshape(-1) - image.reshape(-1))) np.save('advs/opt_attacks_{}_show.npy'.format(idx), advs) print("Average distortion on random {} images is {}".format( len(total_distortion), np.mean(total_distortion)))
def svm_loss(x, y, mode): """ Computes the loss and gradient using for multiclass SVM classification. Inputs: - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class for the ith input. - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 0 <= y[i] < C Returns a tuple of: - loss: Scalar giving the loss - dx: Gradient of the loss with respect to x """ if mode == 'cpu': np.set_policy(policy.OnlyNumpyPolicy()) else: np.set_policy(policy.PreferMXNetPolicy()) N = x.shape[0] correct_class_scores = x[np.arange(N), y] #TODO: Support broadcast case: (X,) (X, Y) #margins = np.maximum(0, x - correct_class_scores + 1.0) margins = np.transpose( np.maximum(0, np.transpose(x) - np.transpose(correct_class_scores) + 1.0)) #margins[np.arange(N), y] = 0 #loss = np.sum(margins) / N loss = (np.sum(margins) - np.sum(margins[np.arange(N), y])) / N margins[np.arange(N), y] = 0 num_pos = np.sum(margins > 0, axis=1) dx = np.zeros_like(x) dx[margins > 0] = 1 dx[np.arange(N), y] -= num_pos dx /= N return loss, dx
def train_loss(*args): inputs = args[0] softmax_label = args[1] probs = self.symbol_func(**self.make_mxnet_weight_dict(inputs, softmax_label, args[self.data_target_cnt:len(args)])) if softmax_label is None: return probs samples_num = X.shape[0] targets = np.zeros((samples_num, self.num_classes)) targets[np.arange(samples_num), softmax_label] = 1 loss = -np.sum(targets * np.log(probs)) / samples_num for i in self.get_index_reg_weight(): loss = loss + np.sum(0.5*args[i]**2*self.reg) return loss
def predict(models, img, t=0): img = np.clip(img, 0, 1) * 255 img = extend_data(config['permutation'], np.array([img])) scores = np.hstack([m.predict(img) for m in models])[0] #print(scores.shape) nat_labels = np.zeros(scores.shape).astype(np.float32) nat_labels[scores >= 0.5] = 1. rep = rep_labels[:len(scores)].T tmp = np.repeat([nat_labels], rep.shape[0], axis=0) dists = np.sum(np.absolute(tmp - rep), axis=-1) min_dist = np.min(dists) pred_labels = np.arange(len(dists))[dists == min_dist] pred_scores = [ np.sum([ scores[k] if rep[j][k] == 1 else 1 - scores[k] for k in np.arange(len(scores)) ]) for j in pred_labels ] pred_label = pred_labels[np.argmax(pred_scores)] if min_dist <= 0: return pred_label else: return -1
def train_loss(*args): inputs = args[0] softmax_label = args[1] probs = self.symbol_func(**self.make_mxnet_weight_dict( inputs, softmax_label, args[self.data_target_cnt:len(args)])) if softmax_label is None: return probs samples_num = X.shape[0] targets = np.zeros((samples_num, self.num_classes)) targets[np.arange(samples_num), softmax_label] = 1 loss = -np.sum(targets * np.log(probs)) / samples_num for i in self.get_index_reg_weight(): loss = loss + np.sum(0.5 * args[i]**2 * self.reg) return loss
def test_op_statistics(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax( preds, axis=1) - np.argmax( targets, axis=1)) return (256 - error) * 100 / 256.0 np.record_op_stat() xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(30): print('Trained accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 # Print Op Statistics Info np.show_op_stat()
def softmax_crossentropy(x, y): EPSI = 1e-6 batch_size, seq_len, prob_dim = x.shape x = x.reshape((x.shape[0] * x.shape[1], x.shape[2])) y = y.reshape((y.shape[0] * y.shape[1], )) #print x.shape, y.shape # x should be (batch, prob) # y should be (batch, ) x_dev = x - np.max(x, axis=1, keepdims=True) # minpy doesn't support x.max() sm = x_dev - np.log(EPSI + np.sum(np.exp(x_dev), axis=1, keepdims=True)) ids = np.arange(0, y.shape[0]) * seq_len + y ce = -np.sum(sm.reshape((sm.shape[0] * sm.shape[1], ))[ids]) / ( 1.0 * y.shape[0]) # minpy doesn't support -1 in shape inference return ce
def test_lr_grad(): inputs = rng.rand(32, 64) * 0.1 targets = np.zeros((32, 10)) truth = rng.randint(0, 10, 32) targets[np.arange(32), truth] = 1 def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def training_loss(weights): preds = sigmoid(np.dot(inputs, weights)) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l weights = rng.rand(64, 10) * 0.01 return gradient_checker.quick_grad_check(training_loss, weights, rs=rng)
def test_mxnet_logistic(): def sigmoid(x): return np.multiply(0.5, np.add(np.tanh(x), 1)) xshape = (256, 500) #needs to reverse. because of mxnet's setting wshape = (250, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = np.random.rand(*wshape) - 0.5 x = mx.sym.Variable(name='x') fc = mx.sym.FullyConnected(name='fc', data=x, num_hidden=250) act = mx.sym.Activation(data=fc, act_type='sigmoid') f = core.Function(act, {'x': xshape}) def predict(weights, inputs): #return f( data=[('x', inputs)], weight=[('fc_weight', weights)], ctx=mx.cpu()) return f(x=inputs, fc_weight=weights) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities)) training_gradient_fun = core.grad(training_loss) print('Initial loss: {}'.format(training_loss(weights, inputs))) for i in range(100): gr = training_gradient_fun(weights, inputs) #print('Training gradient: {}'.format(gr)) weights -= gr * 0.1 if i % 10 == 0: print('Trained loss: {}'.format(training_loss(weights, inputs))) # The training loss should be around 300 in a bug-free Minpy if (training_loss(weights, inputs)[0] > 600): assert (False)
def softmax_loss(x, y): """ Computes the loss and gradient for softmax classification. Inputs: - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class for the ith input. - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 0 <= y[i] < C Returns a tuple of: - loss: Scalar giving the loss """ #TODO: Missing Max Operator probs = np.exp(x - np.expand_dims(np.max(x, axis=1), axis = 1)) probs = probs / np.expand_dims(np.sum(probs, axis=1), axis = 1) N = x.shape[0] loss = -np.sum(np.log(probs[np.arange(N), y])) / N return loss
def softmax_loss(x, y): """ Computes the loss and gradient for softmax classification. Inputs: - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class for the ith input. - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 0 <= y[i] < C Returns a tuple of: - loss: Scalar giving the loss """ #TODO: Missing Max Operator probs = np.exp(x - np.expand_dims(np.max(x, axis=1), axis=1)) probs = probs / np.expand_dims(np.sum(probs, axis=1), axis=1) N = x.shape[0] loss = -np.sum(np.log(probs[np.arange(N), y])) / N return loss
def test_logistic(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax( preds, axis=1) - np.argmax( targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(200): print('Trained accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01
def test_slice(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): # Test Slice sliced_weights = weights[:, ::2] y = sigmoid(np.dot(inputs, sliced_weights)) return y def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) # wshape = (500, 250) wshape = (500, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format( i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) print('Gradient Size', gr.shape) print('Gradient example', gr[0, :10].asnumpy()) weights -= gr * 0.01
def test_logistic(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax( preds, axis=1) - np.argmax( targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(200): print('Trained accuracy #{}: {}%'.format( i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 # The accuracy should be 100 in bug-free MinPy if (training_accuracy(weights, inputs) < 95): assert (False)
def forward(X,y,*p): N, C, H, W = X.shape X = X.reshape((N,C*H*W)) print '>>',X.shape print '>>',p[0].shape first = np.dot( X, p[0] ) + p[1] second = np.dot( first, p[2] ) + p[3] exp = np.exp(second) pred = exp / np.sum(exp) N = X.shape[0] loss = -np.sum( pred[np.arange(N),y] ) return loss
def test_slice(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): # Test Slice sliced_weights = weights[:, ::2] y = sigmoid(np.dot(inputs, sliced_weights)) return y def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) # wshape = (500, 250) wshape = (500, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) print('Gradient Size', gr.shape) print('Gradient example', gr[0,:10].asnumpy()) weights -= gr * 0.01
def temporal_softmax_loss(x, y, mask, verbose=False): """ A temporal version of softmax loss for use in RNNs. We assume that we are making predictions over a vocabulary of size V for each timestep of a timeseries of length T, over a minibatch of size N. The input x gives scores for all vocabulary elements at all timesteps, and y gives the indices of the ground-truth element at each timestep. We use a cross-entropy loss at each timestep, summing the loss over all timesteps and averaging across the minibatch. As an additional complication, we may want to ignore the model output at some timesteps, since sequences of different length may have been combined into a minibatch and padded with NULL tokens. The optional mask argument tells us which elements should contribute to the loss. Inputs: - x: Input scores, of shape (N, T, V) - y: Ground-truth indices, of shape (N, T) where each element is in the range 0 <= y[i, t] < V - mask: Boolean array of shape (N, T) where mask[i, t] tells whether or not the scores at x[i, t] should contribute to the loss. Returns a tuple of: - loss: Scalar giving loss - dx: Gradient of loss with respect to scores x. """ N, T, V = x.shape x_flat = x.reshape(N * T, V) y_flat = y.reshape(N * T) mask_flat = mask.reshape(N * T) probs = np.exp(x_flat - np.max(x_flat, axis=1, keepdims=True)) probs = probs / np.sum(probs, axis=1, keepdims=True) loss = -np.sum(mask_flat * np.log(probs[np.arange(N * T), y_flat])) / N return loss
preds = predict(weights, bias, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, bias, inputs): preds = predict(weights, bias, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) wshape = (500, 250) bshape = (250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 #bias = random.rand(bshape) - 0.5 #print bias.shape bias = np.zeros(bshape) print bias.shape training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, bias, inputs))) gr = training_gradient_fun(weights, bias, inputs) weights -= gr * 0.01
def identity(n): array = np.zeros((n, n)) array[np.arange(n), np.arange(n)] = 1 return array
def test_ufunc(): x = np.array([-1.2, 1.2]) np.absolute(x) np.absolute(1.2 + 1j) x = np.linspace(start=-10, stop=10, num=101) np.add(1.0, 4.0) x1 = np.arange(9.0).reshape((3, 3)) x2 = np.arange(3.0) np.add(x1, x2) np.arccos([1, -1]) x = np.linspace(-1, 1, num=100) np.arccosh([np.e, 10.0]) np.arccosh(1) np.arcsin(0) np.arcsinh(np.array([np.e, 10.0])) np.arctan([0, 1]) np.pi/4 x = np.linspace(-10, 10) x = np.array([-1, +1, +1, -1]) y = np.array([-1, -1, +1, +1]) np.arctan2(y, x) * 180 / np.pi np.arctan2([1., -1.], [0., 0.]) np.arctan2([0., 0., np.inf], [+0., -0., np.inf]) np.arctanh([0, -0.5]) np.bitwise_and(13, 17) np.bitwise_and(14, 13) # np.binary_repr(12) return str np.bitwise_and([14,3], 13) np.bitwise_and([11,7], [4,25]) np.bitwise_and(np.array([2,5,255]), np.array([3,14,16])) np.bitwise_and([True, True], [False, True]) np.bitwise_or(13, 16) # np.binary_repr(29) np.bitwise_or(32, 2) np.bitwise_or([33, 4], 1) np.bitwise_or([33, 4], [1, 2]) np.bitwise_or(np.array([2, 5, 255]), np.array([4, 4, 4])) # np.array([2, 5, 255]) | np.array([4, 4, 4]) np.bitwise_or(np.array([2, 5, 255, 2147483647], dtype=np.int32), np.array([4, 4, 4, 2147483647], dtype=np.int32)) np.bitwise_or([True, True], [False, True]) np.bitwise_xor(13, 17) # np.binary_repr(28) np.bitwise_xor(31, 5) np.bitwise_xor([31,3], 5) np.bitwise_xor([31,3], [5,6]) np.bitwise_xor([True, True], [False, True]) a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) np.ceil(a) a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) np.trunc(a) np.cos(np.array([0, np.pi/2, np.pi])) np.cosh(0) x = np.linspace(-4, 4, 1000) rad = np.arange(12.)*np.pi/6 np.degrees(rad) out = np.zeros((rad.shape)) r = np.degrees(rad, out) # np.all(r == out) return bool np.rad2deg(np.pi/2) np.divide(2.0, 4.0) x1 = np.arange(9.0).reshape((3, 3)) x2 = np.arange(3.0) np.divide(2, 4) np.divide(2, 4.) np.equal([0, 1, 3], np.arange(3)) np.equal(1, np.ones(1)) x = np.linspace(-2*np.pi, 2*np.pi, 100) np.exp2([2, 3]) np.expm1(1e-10) np.exp(1e-10) - 1 np.fabs(-1) np.fabs([-1.2, 1.2]) a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) np.floor(a) np.floor_divide(7,3) np.floor_divide([1., 2., 3., 4.], 2.5) np.fmod([-3, -2, -1, 1, 2, 3], 2) np.remainder([-3, -2, -1, 1, 2, 3], 2) np.fmod([5, 3], [2, 2.]) a = np.arange(-3, 3).reshape(3, 2) np.fmod(a, [2,2]) np.greater([4,2],[2,2]) a = np.array([4,2]) b = np.array([2,2]) a > b np.greater_equal([4, 2, 1], [2, 2, 2]) np.hypot(3*np.ones((3, 3)), 4*np.ones((3, 3))) np.hypot(3*np.ones((3, 3)), [4]) np.bitwise_not is np.invert np.invert(np.array([13], dtype=np.uint8)) # np.binary_repr(242, width=8) np.invert(np.array([13], dtype=np.uint16)) np.invert(np.array([13], dtype=np.int8)) # np.binary_repr(-14, width=8) np.invert(np.array([True, False])) # np.isfinite(1) # np.isfinite(0) # np.isfinite(np.nan) # np.isfinite(np.inf) # np.isfinite(np.NINF) x = np.array([-np.inf, 0., np.inf]) y = np.array([2, 2, 2]) np.isfinite(x, y) # np.isinf(np.inf) # np.isinf(np.nan) # np.isinf(np.NINF) # np.isinf([np.inf, -np.inf, 1.0, np.nan]) x = np.array([-np.inf, 0., np.inf]) y = np.array([2, 2, 2]) # np.isinf(x, y) # np.isnan(np.nan) # np.isnan(np.inf) # np.binary_repr(5) np.left_shift(5, 2) # np.binary_repr(20) np.left_shift(5, [1,2,3]) np.less([1, 2], [2, 2]) np.less_equal([4, 2, 1], [2, 2, 2]) x = np.array([0, 1, 2, 2**4]) xi = np.array([0+1.j, 1, 2+0.j, 4.j]) np.log2(xi) prob1 = np.log(1e-50) prob2 = np.log(2.5e-50) prob12 = np.logaddexp(prob1, prob2) prob12 np.exp(prob12) prob1 = np.log2(1e-50) prob2 = np.log2(2.5e-50) prob12 = np.logaddexp2(prob1, prob2) prob1, prob2, prob12 2**prob12 np.log1p(1e-99) np.log(1 + 1e-99) # np.logical_and(True, False) # np.logical_and([True, False], [False, False]) x = np.arange(5) # np.logical_and(x>1, x<4) # np.logical_not(3) # np.logical_not([True, False, 0, 1]) x = np.arange(5) # np.logical_not(x<3) # np.logical_or(True, False) # np.logical_or([True, False], [False, False]) x = np.arange(5) # np.logical_or(x < 1, x > 3) # np.logical_xor(True, False) # np.logical_xor([True, True, False, False], [True, False, True, False]) x = np.arange(5) # np.logical_xor(x < 1, x > 3) # np.logical_xor(0, np.eye(2)) np.maximum([2, 3, 4], [1, 5, 2]) # np.maximum([np.nan, 0, np.nan], [0, np.nan, np.nan]) # np.maximum(np.Inf, 1) np.minimum([2, 3, 4], [1, 5, 2]) # np.minimum([np.nan, 0, np.nan],[0, np.nan, np.nan]) # np.minimum(-np.Inf, 1) np.fmax([2, 3, 4], [1, 5, 2]) np.fmax(np.eye(2), [0.5, 2]) # np.fmax([np.nan, 0, np.nan],[0, np.nan, np.nan]) np.fmin([2, 3, 4], [1, 5, 2]) np.fmin(np.eye(2), [0.5, 2]) # np.fmin([np.nan, 0, np.nan],[0, np.nan, np.nan]) np.modf([0, 3.5]) np.modf(-0.5) np.multiply(2.0, 4.0) x1 = np.arange(9.0).reshape((3, 3)) x2 = np.arange(3.0) np.multiply(x1, x2) np.negative([1.,-1.]) np.not_equal([1.,2.], [1., 3.]) np.not_equal([1, 2], [[1, 3],[1, 4]]) x1 = range(6) np.power(x1, 3) x2 = [1.0, 2.0, 3.0, 3.0, 2.0, 1.0] np.power(x1, x2) x2 = np.array([[1, 2, 3, 3, 2, 1], [1, 2, 3, 3, 2, 1]]) np.power(x1, x2) deg = np.arange(12.) * 30. np.radians(deg) out = np.zeros((deg.shape)) ret = np.radians(deg, out) ret is out np.deg2rad(180) np.reciprocal(2.) np.reciprocal([1, 2., 3.33]) np.remainder([4, 7], [2, 3]) np.remainder(np.arange(7), 5) # np.binary_repr(10) np.right_shift(10, 1) # np.binary_repr(5) np.right_shift(10, [1,2,3]) a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) np.rint(a) np.sign([-5., 4.5]) np.sign(0) # np.sign(5-2j) # np.signbit(-1.2) np.signbit(np.array([1, -2.3, 2.1])) np.copysign(1.3, -1) np.copysign([-1, 0, 1], -1.1) np.copysign([-1, 0, 1], np.arange(3)-1) np.sin(np.pi/2.) np.sin(np.array((0., 30., 45., 60., 90.)) * np.pi / 180. ) x = np.linspace(-np.pi, np.pi, 201) np.sinh(0) # np.sinh(np.pi*1j/2) np.sqrt([1,4,9]) np.sqrt([4, -1, -3+4J]) np.cbrt([1,8,27]) np.square([-1j, 1]) np.subtract(1.0, 4.0) x1 = np.arange(9.0).reshape((3, 3)) x2 = np.arange(3.0) np.subtract(x1, x2) np.tan(np.array([-pi,pi/2,pi])) np.tanh((0, np.pi*1j, np.pi*1j/2)) x = np.arange(5) np.true_divide(x, 4) x = np.arange(9) y1, y2 = np.frexp(x) y1 * 2**y2 np.ldexp(5, np.arange(4)) x = np.arange(6) np.ldexp(*np.frexp(x))
def test_numeric(): # 'newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc', # 'arange', 'array', 'zeros', 'count_nonzero', 'empty', 'broadcast', # 'dtype', 'fromstring', 'fromfile', 'frombuffer', 'int_asbuffer', # 'where', 'argwhere', 'copyto', 'concatenate', 'fastCopyAndTranspose', # 'lexsort', 'set_numeric_ops', 'can_cast', 'promote_types', # 'min_scalar_type', 'result_type', 'asarray', 'asanyarray', # 'ascontiguousarray', 'asfortranarray', 'isfortran', 'empty_like', # 'zeros_like', 'ones_like', 'correlate', 'convolve', 'inner', 'dot', # 'einsum', 'outer', 'vdot', 'alterdot', 'restoredot', 'roll', # 'rollaxis', 'moveaxis', 'cross', 'tensordot', 'array2string', # 'get_printoptions', 'set_printoptions', 'array_repr', 'array_str', # 'set_string_function', 'little_endian', 'require', 'fromiter', # 'array_equal', 'array_equiv', 'indices', 'fromfunction', 'isclose', 'load', # 'loads', 'isscalar', 'binary_repr', 'base_repr', 'ones', 'identity', # 'allclose', 'compare_chararrays', 'putmask', 'seterr', 'geterr', # 'setbufsize', 'getbufsize', 'seterrcall', 'geterrcall', 'errstate', # 'flatnonzero', 'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN', 'False_', # 'True_', 'bitwise_not', 'full', 'full_like', 'matmul' x = np.arange(6) x = x.reshape((2, 3)) np.zeros_like(x) y = np.arange(3, dtype=np.float) np.zeros_like(y) np.ones(5) np.ones((5,), dtype=np.int) np.ones((2, 1)) s = (2,2) np.ones(s) x = np.arange(6) x = x.reshape((2, 3)) np.ones_like(x) y = np.arange(3, dtype=np.float) np.ones_like(y) np.full((2, 2), np.inf) x = np.arange(6, dtype=np.int) np.full_like(x, 1) np.full_like(x, 0.1) np.full_like(y, 0.1) np.count_nonzero(np.eye(4)) np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]]) np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=0) np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=1) a = [1, 2] np.asarray(a) a = np.array([1, 2]) np.asarray(a) is a a = np.array([1, 2], dtype=np.float32) np.asarray(a, dtype=np.float32) is a np.asarray(a, dtype=np.float64) is a np.asarray(a) is a np.asanyarray(a) is a a = [1, 2] np.asanyarray(a) np.asanyarray(a) is a x = np.arange(6).reshape(2,3) np.ascontiguousarray(x, dtype=np.float32) x = np.arange(6).reshape(2,3) y = np.asfortranarray(x) x = np.arange(6).reshape(2,3) y = np.require(x, dtype=np.float32, requirements=['A', 'O', 'W', 'F']) a = np.array([[1, 2, 3], [4, 5, 6]], order='C') np.isfortran(a) b = np.array([[1, 2, 3], [4, 5, 6]], order='FORTRAN') np.isfortran(b) a = np.array([[1, 2, 3], [4, 5, 6]], order='C') np.isfortran(a) b = a.T np.isfortran(b) np.isfortran(np.array([1, 2], order='FORTRAN')) x = np.arange(6).reshape(2,3) np.argwhere(x>1) x = np.arange(-2, 3) np.flatnonzero(x) np.correlate([1, 2, 3], [0, 1, 0.5]) np.correlate([1, 2, 3], [0, 1, 0.5], "same") np.correlate([1, 2, 3], [0, 1, 0.5], "full") np.correlate([1+1j, 2, 3-1j], [0, 1, 0.5j], 'full') np.correlate([0, 1, 0.5j], [1+1j, 2, 3-1j], 'full') np.convolve([1, 2, 3], [0, 1, 0.5]) np.convolve([1,2,3],[0,1,0.5], 'same') np.convolve([1,2,3],[0,1,0.5], 'valid') rl = np.outer(np.ones((5,)), np.linspace(-2, 2, 5)) # im = np.outer(1j*np.linspace(2, -2, 5), np.ones((5,))) # grid = rl + im x = np.array(['a', 'b', 'c'], dtype=object) np.outer(x, [1, 2, 3]) a = np.arange(60.).reshape(3,4,5) b = np.arange(24.).reshape(4,3,2) c = np.tensordot(a,b, axes=([1,0],[0,1])) c.shape # A slower but equivalent way of computing the same... d = np.zeros((5,2)) a = np.array(range(1, 9)) A = np.array(('a', 'b', 'c', 'd'), dtype=object) x = np.arange(10) np.roll(x, 2) x2 = np.reshape(x, (2,5)) np.roll(x2, 1) np.roll(x2, 1, axis=0) np.roll(x2, 1, axis=1) a = np.ones((3,4,5,6)) np.rollaxis(a, 3, 1).shape np.rollaxis(a, 2).shape np.rollaxis(a, 1, 4).shape x = np.zeros((3, 4, 5)) np.moveaxis(x, 0, -1).shape np.moveaxis(x, -1, 0).shape np.transpose(x).shape np.moveaxis(x, [0, 1], [-1, -2]).shape np.moveaxis(x, [0, 1, 2], [-1, -2, -3]).shape x = [1, 2, 3] y = [4, 5, 6] np.cross(x, y) x = [1, 2] y = [4, 5, 6] np.cross(x, y) x = [1, 2, 0] y = [4, 5, 6] np.cross(x, y) x = [1,2] y = [4,5] np.cross(x, y) x = np.array([[1,2,3], [4,5,6]]) y = np.array([[4,5,6], [1,2,3]]) np.cross(x, y) np.cross(x, y, axisc=0) x = np.array([[1,2,3], [4,5,6], [7, 8, 9]]) y = np.array([[7, 8, 9], [4,5,6], [1,2,3]]) np.cross(x, y) np.cross(x, y, axisa=0, axisb=0) # np.array_repr(np.array([1,2])) # np.array_repr(np.ma.array([0.])) # np.array_repr(np.array([], np.int32)) x = np.array([1e-6, 4e-7, 2, 3]) # np.array_repr(x, precision=6, suppress_small=True) # np.array_str(np.arange(3)) a = np.arange(10) x = np.arange(4) np.set_string_function(lambda x:'random', repr=False) grid = np.indices((2, 3)) grid.shape grid[0] # row indices grid[1] # column indices x = np.arange(20).reshape(5, 4) row, col = np.indices((2, 3)) x[row, col] np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int) np.fromfunction(lambda i, j: i + j, (3, 3), dtype=int) np.isscalar(3.1) np.isscalar([3.1]) np.isscalar(False) # np.binary_repr(3) # np.binary_repr(-3) # np.binary_repr(3, width=4) # np.binary_repr(-3, width=3) # np.binary_repr(-3, width=5) # np.base_repr(5) # np.base_repr(6, 5) # np.base_repr(7, base=5, padding=3) # np.base_repr(10, base=16) # np.base_repr(32, base=16) np.identity(3) np.allclose([1e10,1e-7], [1.00001e10,1e-8]) np.allclose([1e10,1e-8], [1.00001e10,1e-9]) np.allclose([1e10,1e-8], [1.0001e10,1e-9]) # np.allclose([1.0, np.nan], [1.0, np.nan]) # np.allclose([1.0, np.nan], [1.0, np.nan], equal_nan=True) np.isclose([1e10,1e-7], [1.00001e10,1e-8]) np.isclose([1e10,1e-8], [1.00001e10,1e-9]) np.isclose([1e10,1e-8], [1.0001e10,1e-9]) # np.isclose([1.0, np.nan], [1.0, np.nan]) # np.isclose([1.0, np.nan], [1.0, np.nan], equal_nan=True) np.array_equal([1, 2], [1, 2]) np.array_equal(np.array([1, 2]), np.array([1, 2])) np.array_equal([1, 2], [1, 2, 3]) np.array_equal([1, 2], [1, 4]) np.array_equiv([1, 2], [1, 2]) np.array_equiv([1, 2], [1, 3]) np.array_equiv([1, 2], [[1, 2], [1, 2]]) np.array_equiv([1, 2], [[1, 2, 1, 2], [1, 2, 1, 2]]) np.array_equiv([1, 2], [[1, 2], [1, 3]])
def test_fromnumeric(): # Functions # 'alen', 'all', 'alltrue', 'amax', 'amin', 'any', 'argmax', # 'argmin', 'argpartition', 'argsort', 'around', 'choose', 'clip', # 'compress', 'cumprod', 'cumproduct', 'cumsum', 'diagonal', 'mean', # 'ndim', 'nonzero', 'partition', 'prod', 'product', 'ptp', 'put', # 'rank', 'ravel', 'repeat', 'reshape', 'resize', 'round_', # 'searchsorted', 'shape', 'size', 'sometrue', 'sort', 'squeeze', # 'std', 'sum', 'swapaxes', 'take', 'trace', 'transpose', 'var', a = [4, 3, 5, 7, 6, 8] indices = [0, 1, 4] np.take(a, indices) a = np.array(a) # a[indices] np.take(a, [[0, 1], [2, 3]]) a = np.zeros((10, 2)) b = a.T a = np.arange(6).reshape((3, 2)) np.reshape(a, (2, 3)) # C-like index ordering np.reshape(np.ravel(a), (2, 3)) # equivalent to C ravel then C reshape np.reshape(a, (2, 3), order='F') # Fortran-like index ordering np.reshape(np.ravel(a, order='F'), (2, 3), order='F') a = np.array([[1,2,3], [4,5,6]]) np.reshape(a, 6) np.reshape(a, 6, order='F') np.reshape(a, (3,-1)) # the unspecified value is inferred to be 2 choices = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33]] np.choose([2, 3, 1, 0], choices) np.choose([2, 4, 1, 0], choices, mode='clip') # 4 goes to 3 (4-1) np.choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4) a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]] choices = [-10, 10] np.choose(a, choices) a = np.array([0, 1]).reshape((2,1,1)) c1 = np.array([1, 2, 3]).reshape((1,3,1)) c2 = np.array([-1, -2, -3, -4, -5]).reshape((1,1,5)) np.choose(a, (c1, c2)) # result is 2x3x5, res[0,:,:]=c1, res[1,:,:]=c2 np.repeat(3, 4) x = np.array([[1,2],[3,4]]) np.repeat(x, 2) np.repeat(x, 3, axis=1) np.repeat(x, [1, 2], axis=0) a = np.arange(5) np.put(a, [0, 2], [-44, -55]) a = np.arange(5) np.put(a, 22, -5, mode='clip') x = np.array([[1,2,3]]) np.swapaxes(x,0,1) x = np.array([[[0,1],[2,3]],[[4,5],[6,7]]]) np.swapaxes(x,0,2) x = np.arange(4).reshape((2,2)) np.transpose(x) x = np.ones((1, 2, 3)) np.transpose(x, (1, 0, 2)).shape a = np.array([3, 4, 2, 1]) np.partition(a, 3) np.partition(a, (1, 3)) x = np.array([3, 4, 2, 1]) x[np.argpartition(x, 3)] x[np.argpartition(x, (1, 3))] x = [3, 4, 2, 1] np.array(x)[np.argpartition(x, 3)] a = np.array([[1,4],[3,1]]) np.sort(a) # sort along the last axis np.sort(a, axis=None) # sort the flattened array np.sort(a, axis=0) # sort along the first axis dtype = [('name', 'S10'), ('height', float), ('age', int)] values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38), ('Galahad', 1.7, 38)] a = np.array(values, dtype=dtype) # create a structured array np.sort(a, order='height') # doctest: +SKIP np.sort(a, order=['age', 'height']) # doctest: +SKIP x = np.array([3, 1, 2]) np.argsort(x) x = np.array([[0, 3], [2, 2]]) np.argsort(x, axis=0) np.argsort(x, axis=1) x = np.array([(1, 0), (0, 1)], dtype=[('x', '<i4'), ('y', '<i4')]) np.argsort(x, order=('x','y')) np.argsort(x, order=('y','x')) a = np.arange(6).reshape(2,3) np.argmax(a) np.argmax(a, axis=0) np.argmax(a, axis=1) b = np.arange(6) b[1] = 5 np.argmax(b) # Only the first occurrence is returned. a = np.arange(6).reshape(2,3) np.argmin(a) np.argmin(a, axis=0) np.argmin(a, axis=1) b = np.arange(6) b[4] = 0 np.argmin(b) # Only the first occurrence is returned. np.searchsorted([1,2,3,4,5], 3) np.searchsorted([1,2,3,4,5], 3, side='right') np.searchsorted([1,2,3,4,5], [-10, 10, 2, 3]) a=np.array([[0,1],[2,3]]) np.resize(a,(2,3)) np.resize(a,(1,4)) np.resize(a,(2,4)) x = np.array([[[0], [1], [2]]]) x.shape np.squeeze(x).shape np.squeeze(x, axis=(2,)).shape a = np.arange(4).reshape(2,2) a = np.arange(8).reshape(2,2,2); a a[:,:,0] # main diagonal is [0 6] a[:,:,1] # main diagonal is [1 7] np.trace(np.eye(3)) a = np.arange(8).reshape((2,2,2)) np.trace(a) a = np.arange(24).reshape((2,2,2,3)) np.trace(a).shape x = np.array([[1, 2, 3], [4, 5, 6]]) np.ravel(x) x.reshape(-1) np.ravel(x, order='F') np.ravel(x.T) np.ravel(x.T, order='A') a = np.arange(3)[::-1]; a # a = np.arange(12).reshape(2,3,2).swapaxes(1,2); a x = np.eye(3) np.nonzero(x) x[np.nonzero(x)] np.transpose(np.nonzero(x)) a = np.array([[1,2,3],[4,5,6],[7,8,9]]) a > 3 np.nonzero(a > 3) np.shape(np.eye(3)) np.shape([[1, 2]]) np.shape([0]) np.shape(0) a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) np.shape(a) a.shape a = np.array([[1, 2], [3, 4], [5, 6]]) np.compress([0, 1], a, axis=0) np.compress([False, True, True], a, axis=0) np.compress([False, True], a, axis=1) np.compress([False, True], a) a = np.arange(10) np.clip(a, 1, 8) np.clip(a, 3, 6, out=a) a = np.arange(10) np.clip(a, [3,4,1,1,1,4,4,4,4,4], 8) np.sum([]) np.sum([0.5, 1.5]) np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32) np.sum([[0, 1], [0, 5]]) np.sum([[0, 1], [0, 5]], axis=0) np.sum([[0, 1], [0, 5]], axis=1) # np.ones(128, dtype=np.int8).sum(dtype=np.int8) # np.any([[True, False], [True, True]]) # np.any([[True, False], [False, False]], axis=0) # np.any([-1, 0, 5]) # np.any(np.nan) # np.all([[True,False],[True,True]]) # np.all([[True,False],[True,True]], axis=0) # np.all([-1, 4, 5]) # np.all([1.0, np.nan]) a = np.array([[1,2,3], [4,5,6]]) np.cumsum(a) np.cumsum(a, dtype=float) # specifies type of output value(s) np.cumsum(a,axis=0) # sum over rows for each of the 3 columns np.cumsum(a,axis=1) # sum over columns for each of the 2 rows x = np.arange(4).reshape((2,2)) np.ptp(x, axis=0) np.ptp(x, axis=1) a = np.arange(4).reshape((2,2)) np.amax(a) # Maximum of the flattened array np.amax(a, axis=0) # Maxima along the first axis np.amax(a, axis=1) # Maxima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amax(b) np.nanmax(b) a = np.arange(4).reshape((2,2)) np.amin(a) # Minimum of the flattened array np.amin(a, axis=0) # Minima along the first axis np.amin(a, axis=1) # Minima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amin(b) np.nanmin(b) a = np.zeros((7,4,5)) a.shape[0] np.alen(a) x = np.array([536870910, 536870910, 536870910, 536870910]) np.prod(x) #random np.prod([]) np.prod([1.,2.]) np.prod([[1.,2.],[3.,4.]]) np.prod([[1.,2.],[3.,4.]], axis=1) x = np.array([1, 2, 3], dtype=np.uint8) # np.prod(x).dtype == np.uint x = np.array([1, 2, 3], dtype=np.int8) # np.prod(x).dtype == np.int a = np.array([1,2,3]) np.cumprod(a) # intermediate results 1, 1*2 a = np.array([[1, 2, 3], [4, 5, 6]]) np.cumprod(a, dtype=float) # specify type of output np.cumprod(a, axis=0) np.cumprod(a,axis=1) np.ndim([[1,2,3],[4,5,6]]) np.ndim(np.array([[1,2,3],[4,5,6]])) np.ndim(1) a = np.array([[1,2,3],[4,5,6]]) np.size(a) np.size(a,1) np.size(a,0) np.around([0.37, 1.64]) np.around([0.37, 1.64], decimals=1) np.around([.5, 1.5, 2.5, 3.5, 4.5]) # rounds to nearest even value np.around([1,2,3,11], decimals=1) # ndarray of ints is returned np.around([1,2,3,11], decimals=-1) a = np.array([[1, 2], [3, 4]]) np.mean(a) np.mean(a, axis=0) np.mean(a, axis=1) a = np.zeros((2, 512*512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.mean(a) np.mean(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.std(a) np.std(a, axis=0) np.std(a, axis=1) a = np.zeros((2, 512*512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.std(a) np.std(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.var(a) np.var(a, axis=0) np.var(a, axis=1) a = np.zeros((2, 512*512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.var(a) np.var(a, dtype=np.float64)
def string_to_one_hot(string, maxchar): """Converts an ASCII string to a one-of-k encoding.""" ascii = np.array([ord(c) for c in string]).T return np.array(ascii[:, None] == np.arange(maxchar)[None, :], dtype=int)
l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) # wshape = (500, 250) wshape = (500, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) def NumpyVarToMinpy(var): return minpy.array.Value.wrap(var) def MinpyVarToNumpy(var): return minpy.array.Value.wrap(var).get_data(ArrayType.NUMPY) for i in range(20): print('Trained loss accuracy #{}: {}%'.format(i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) print('Gradient Size', gr.shape) print('Gradient example', MinpyVarToNumpy(gr[0,:10]))
caffe.TanhLayer(), caffe.FullyConnectedLayer(10), caffe.SoftMaxLayer()] # Training parameters param_scale = 0.1 learning_rate = 1e-3 momentum = 0.9 batch_size = 256 num_epochs = 50 # Load and process MNIST data (borrowing from Kayak) print("Loading training data...") import imp, urllib add_color_channel = lambda x : x.reshape((x.shape[0], 1, x.shape[1], x.shape[2])) one_hot = lambda x, K : np.array(x[:,None] == np.arange(K)[None, :], dtype=int) source, _ = urllib.urlretrieve( 'https://raw.githubusercontent.com/HIPS/Kayak/master/examples/data.py') data = imp.load_source('data', source).mnist() train_images, train_labels, test_images, test_labels = data train_images = add_color_channel(train_images) / 255.0 test_images = add_color_channel(test_images) / 255.0 train_labels = one_hot(train_labels, 10) test_labels = one_hot(test_labels, 10) N_data = train_images.shape[0] # Make neural net functions N_weights, pred_fun, loss_fun, frac_err = make_nn_funs(input_shape, caffe_layer_specs, L2_reg) loss_grad = grad(loss_fun) # Initialize weights
def test_numeric(): # 'newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc', # 'arange', 'array', 'zeros', 'count_nonzero', 'empty', 'broadcast', # 'dtype', 'fromstring', 'fromfile', 'frombuffer', 'int_asbuffer', # 'where', 'argwhere', 'copyto', 'concatenate', 'fastCopyAndTranspose', # 'lexsort', 'set_numeric_ops', 'can_cast', 'promote_types', # 'min_scalar_type', 'result_type', 'asarray', 'asanyarray', # 'ascontiguousarray', 'asfortranarray', 'isfortran', 'empty_like', # 'zeros_like', 'ones_like', 'correlate', 'convolve', 'inner', 'dot', # 'einsum', 'outer', 'vdot', 'alterdot', 'restoredot', 'roll', # 'rollaxis', 'moveaxis', 'cross', 'tensordot', 'array2string', # 'get_printoptions', 'set_printoptions', 'array_repr', 'array_str', # 'set_string_function', 'little_endian', 'require', 'fromiter', # 'array_equal', 'array_equiv', 'indices', 'fromfunction', 'isclose', 'load', # 'loads', 'isscalar', 'binary_repr', 'base_repr', 'ones', 'identity', # 'allclose', 'compare_chararrays', 'putmask', 'seterr', 'geterr', # 'setbufsize', 'getbufsize', 'seterrcall', 'geterrcall', 'errstate', # 'flatnonzero', 'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN', 'False_', # 'True_', 'bitwise_not', 'full', 'full_like', 'matmul' x = np.arange(6) x = x.reshape((2, 3)) np.zeros_like(x) y = np.arange(3, dtype=np.float) np.zeros_like(y) np.ones(5) np.ones((5, ), dtype=np.int) np.ones((2, 1)) s = (2, 2) np.ones(s) x = np.arange(6) x = x.reshape((2, 3)) np.ones_like(x) y = np.arange(3, dtype=np.float) np.ones_like(y) np.full((2, 2), np.inf) x = np.arange(6, dtype=np.int) np.full_like(x, 1) np.full_like(x, 0.1) np.full_like(y, 0.1) np.count_nonzero(np.eye(4)) np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]]) np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]], axis=0) np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]], axis=1) a = [1, 2] np.asarray(a) a = np.array([1, 2]) np.asarray(a) is a a = np.array([1, 2], dtype=np.float32) np.asarray(a, dtype=np.float32) is a np.asarray(a, dtype=np.float64) is a np.asarray(a) is a np.asanyarray(a) is a a = [1, 2] np.asanyarray(a) np.asanyarray(a) is a x = np.arange(6).reshape(2, 3) np.ascontiguousarray(x, dtype=np.float32) x = np.arange(6).reshape(2, 3) y = np.asfortranarray(x) x = np.arange(6).reshape(2, 3) y = np.require(x, dtype=np.float32, requirements=['A', 'O', 'W', 'F']) a = np.array([[1, 2, 3], [4, 5, 6]], order='C') np.isfortran(a) b = np.array([[1, 2, 3], [4, 5, 6]], order='FORTRAN') np.isfortran(b) a = np.array([[1, 2, 3], [4, 5, 6]], order='C') np.isfortran(a) b = a.T np.isfortran(b) np.isfortran(np.array([1, 2], order='FORTRAN')) x = np.arange(6).reshape(2, 3) np.argwhere(x > 1) x = np.arange(-2, 3) np.flatnonzero(x) np.correlate([1, 2, 3], [0, 1, 0.5]) np.correlate([1, 2, 3], [0, 1, 0.5], "same") np.correlate([1, 2, 3], [0, 1, 0.5], "full") np.correlate([1 + 1j, 2, 3 - 1j], [0, 1, 0.5j], 'full') np.correlate([0, 1, 0.5j], [1 + 1j, 2, 3 - 1j], 'full') np.convolve([1, 2, 3], [0, 1, 0.5]) np.convolve([1, 2, 3], [0, 1, 0.5], 'same') np.convolve([1, 2, 3], [0, 1, 0.5], 'valid') rl = np.outer(np.ones((5, )), np.linspace(-2, 2, 5)) # im = np.outer(1j*np.linspace(2, -2, 5), np.ones((5,))) # grid = rl + im x = np.array(['a', 'b', 'c'], dtype=object) np.outer(x, [1, 2, 3]) a = np.arange(60.).reshape(3, 4, 5) b = np.arange(24.).reshape(4, 3, 2) c = np.tensordot(a, b, axes=([1, 0], [0, 1])) c.shape # A slower but equivalent way of computing the same... d = np.zeros((5, 2)) a = np.array(range(1, 9)) A = np.array(('a', 'b', 'c', 'd'), dtype=object) x = np.arange(10) np.roll(x, 2) x2 = np.reshape(x, (2, 5)) np.roll(x2, 1) np.roll(x2, 1, axis=0) np.roll(x2, 1, axis=1) a = np.ones((3, 4, 5, 6)) np.rollaxis(a, 3, 1).shape np.rollaxis(a, 2).shape np.rollaxis(a, 1, 4).shape x = np.zeros((3, 4, 5)) np.moveaxis(x, 0, -1).shape np.moveaxis(x, -1, 0).shape np.transpose(x).shape np.moveaxis(x, [0, 1], [-1, -2]).shape np.moveaxis(x, [0, 1, 2], [-1, -2, -3]).shape x = [1, 2, 3] y = [4, 5, 6] np.cross(x, y) x = [1, 2] y = [4, 5, 6] np.cross(x, y) x = [1, 2, 0] y = [4, 5, 6] np.cross(x, y) x = [1, 2] y = [4, 5] np.cross(x, y) x = np.array([[1, 2, 3], [4, 5, 6]]) y = np.array([[4, 5, 6], [1, 2, 3]]) np.cross(x, y) np.cross(x, y, axisc=0) x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) y = np.array([[7, 8, 9], [4, 5, 6], [1, 2, 3]]) np.cross(x, y) np.cross(x, y, axisa=0, axisb=0) # np.array_repr(np.array([1,2])) # np.array_repr(np.ma.array([0.])) # np.array_repr(np.array([], np.int32)) x = np.array([1e-6, 4e-7, 2, 3]) # np.array_repr(x, precision=6, suppress_small=True) # np.array_str(np.arange(3)) a = np.arange(10) x = np.arange(4) np.set_string_function(lambda x: 'random', repr=False) grid = np.indices((2, 3)) grid.shape grid[0] # row indices grid[1] # column indices x = np.arange(20).reshape(5, 4) row, col = np.indices((2, 3)) x[row, col] np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int) np.fromfunction(lambda i, j: i + j, (3, 3), dtype=int) np.isscalar(3.1) np.isscalar([3.1]) np.isscalar(False) # np.binary_repr(3) # np.binary_repr(-3) # np.binary_repr(3, width=4) # np.binary_repr(-3, width=3) # np.binary_repr(-3, width=5) # np.base_repr(5) # np.base_repr(6, 5) # np.base_repr(7, base=5, padding=3) # np.base_repr(10, base=16) # np.base_repr(32, base=16) np.identity(3) np.allclose([1e10, 1e-7], [1.00001e10, 1e-8]) np.allclose([1e10, 1e-8], [1.00001e10, 1e-9]) np.allclose([1e10, 1e-8], [1.0001e10, 1e-9]) # np.allclose([1.0, np.nan], [1.0, np.nan]) # np.allclose([1.0, np.nan], [1.0, np.nan], equal_nan=True) np.isclose([1e10, 1e-7], [1.00001e10, 1e-8]) np.isclose([1e10, 1e-8], [1.00001e10, 1e-9]) np.isclose([1e10, 1e-8], [1.0001e10, 1e-9]) # np.isclose([1.0, np.nan], [1.0, np.nan]) # np.isclose([1.0, np.nan], [1.0, np.nan], equal_nan=True) np.array_equal([1, 2], [1, 2]) np.array_equal(np.array([1, 2]), np.array([1, 2])) np.array_equal([1, 2], [1, 2, 3]) np.array_equal([1, 2], [1, 4]) np.array_equiv([1, 2], [1, 2]) np.array_equiv([1, 2], [1, 3]) np.array_equiv([1, 2], [[1, 2], [1, 2]]) np.array_equiv([1, 2], [[1, 2, 1, 2], [1, 2, 1, 2]]) np.array_equiv([1, 2], [[1, 2], [1, 3]])
def string_to_one_hot(string, maxchar): """Converts an ASCII string to a one-of-k encoding.""" ascii = np.array([ord(c) for c in string]).T return np.array(ascii[:,None] == np.arange(maxchar)[None, :], dtype=int)
def test_fromnumeric(): # Functions # 'alen', 'all', 'alltrue', 'amax', 'amin', 'any', 'argmax', # 'argmin', 'argpartition', 'argsort', 'around', 'choose', 'clip', # 'compress', 'cumprod', 'cumproduct', 'cumsum', 'diagonal', 'mean', # 'ndim', 'nonzero', 'partition', 'prod', 'product', 'ptp', 'put', # 'rank', 'ravel', 'repeat', 'reshape', 'resize', 'round_', # 'searchsorted', 'shape', 'size', 'sometrue', 'sort', 'squeeze', # 'std', 'sum', 'swapaxes', 'take', 'trace', 'transpose', 'var', a = [4, 3, 5, 7, 6, 8] indices = [0, 1, 4] np.take(a, indices) a = np.array(a) # a[indices] np.take(a, [[0, 1], [2, 3]]) a = np.zeros((10, 2)) b = a.T a = np.arange(6).reshape((3, 2)) np.reshape(a, (2, 3)) # C-like index ordering np.reshape(np.ravel(a), (2, 3)) # equivalent to C ravel then C reshape np.reshape(a, (2, 3), order='F') # Fortran-like index ordering np.reshape(np.ravel(a, order='F'), (2, 3), order='F') a = np.array([[1, 2, 3], [4, 5, 6]]) np.reshape(a, 6) np.reshape(a, 6, order='F') np.reshape(a, (3, -1)) # the unspecified value is inferred to be 2 choices = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33]] np.choose([2, 3, 1, 0], choices) np.choose([2, 4, 1, 0], choices, mode='clip') # 4 goes to 3 (4-1) np.choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4) a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]] choices = [-10, 10] np.choose(a, choices) a = np.array([0, 1]).reshape((2, 1, 1)) c1 = np.array([1, 2, 3]).reshape((1, 3, 1)) c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5)) np.choose(a, (c1, c2)) # result is 2x3x5, res[0,:,:]=c1, res[1,:,:]=c2 np.repeat(3, 4) x = np.array([[1, 2], [3, 4]]) np.repeat(x, 2) np.repeat(x, 3, axis=1) np.repeat(x, [1, 2], axis=0) a = np.arange(5) np.put(a, [0, 2], [-44, -55]) a = np.arange(5) np.put(a, 22, -5, mode='clip') x = np.array([[1, 2, 3]]) np.swapaxes(x, 0, 1) x = np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]) np.swapaxes(x, 0, 2) x = np.arange(4).reshape((2, 2)) np.transpose(x) x = np.ones((1, 2, 3)) np.transpose(x, (1, 0, 2)).shape a = np.array([3, 4, 2, 1]) np.partition(a, 3) np.partition(a, (1, 3)) x = np.array([3, 4, 2, 1]) x[np.argpartition(x, 3)] x[np.argpartition(x, (1, 3))] x = [3, 4, 2, 1] np.array(x)[np.argpartition(x, 3)] a = np.array([[1, 4], [3, 1]]) np.sort(a) # sort along the last axis np.sort(a, axis=None) # sort the flattened array np.sort(a, axis=0) # sort along the first axis dtype = [('name', 'S10'), ('height', float), ('age', int)] values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38), ('Galahad', 1.7, 38)] a = np.array(values, dtype=dtype) # create a structured array np.sort(a, order='height') # doctest: +SKIP np.sort(a, order=['age', 'height']) # doctest: +SKIP x = np.array([3, 1, 2]) np.argsort(x) x = np.array([[0, 3], [2, 2]]) np.argsort(x, axis=0) np.argsort(x, axis=1) x = np.array([(1, 0), (0, 1)], dtype=[('x', '<i4'), ('y', '<i4')]) np.argsort(x, order=('x', 'y')) np.argsort(x, order=('y', 'x')) a = np.arange(6).reshape(2, 3) np.argmax(a) np.argmax(a, axis=0) np.argmax(a, axis=1) b = np.arange(6) b[1] = 5 np.argmax(b) # Only the first occurrence is returned. a = np.arange(6).reshape(2, 3) np.argmin(a) np.argmin(a, axis=0) np.argmin(a, axis=1) b = np.arange(6) b[4] = 0 np.argmin(b) # Only the first occurrence is returned. np.searchsorted([1, 2, 3, 4, 5], 3) np.searchsorted([1, 2, 3, 4, 5], 3, side='right') np.searchsorted([1, 2, 3, 4, 5], [-10, 10, 2, 3]) a = np.array([[0, 1], [2, 3]]) np.resize(a, (2, 3)) np.resize(a, (1, 4)) np.resize(a, (2, 4)) x = np.array([[[0], [1], [2]]]) x.shape np.squeeze(x).shape np.squeeze(x, axis=(2, )).shape a = np.arange(4).reshape(2, 2) a = np.arange(8).reshape(2, 2, 2) a a[:, :, 0] # main diagonal is [0 6] a[:, :, 1] # main diagonal is [1 7] np.trace(np.eye(3)) a = np.arange(8).reshape((2, 2, 2)) np.trace(a) a = np.arange(24).reshape((2, 2, 2, 3)) np.trace(a).shape x = np.array([[1, 2, 3], [4, 5, 6]]) np.ravel(x) x.reshape(-1) np.ravel(x, order='F') np.ravel(x.T) np.ravel(x.T, order='A') a = np.arange(3)[::-1] a # a = np.arange(12).reshape(2,3,2).swapaxes(1,2); a x = np.eye(3) np.nonzero(x) x[np.nonzero(x)] np.transpose(np.nonzero(x)) a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) a > 3 np.nonzero(a > 3) np.shape(np.eye(3)) np.shape([[1, 2]]) np.shape([0]) np.shape(0) a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) np.shape(a) a.shape a = np.array([[1, 2], [3, 4], [5, 6]]) np.compress([0, 1], a, axis=0) np.compress([False, True, True], a, axis=0) np.compress([False, True], a, axis=1) np.compress([False, True], a) a = np.arange(10) np.clip(a, 1, 8) np.clip(a, 3, 6, out=a) a = np.arange(10) np.clip(a, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8) np.sum([]) np.sum([0.5, 1.5]) np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32) np.sum([[0, 1], [0, 5]]) np.sum([[0, 1], [0, 5]], axis=0) np.sum([[0, 1], [0, 5]], axis=1) # np.ones(128, dtype=np.int8).sum(dtype=np.int8) # np.any([[True, False], [True, True]]) # np.any([[True, False], [False, False]], axis=0) # np.any([-1, 0, 5]) # np.any(np.nan) # np.all([[True,False],[True,True]]) # np.all([[True,False],[True,True]], axis=0) # np.all([-1, 4, 5]) # np.all([1.0, np.nan]) a = np.array([[1, 2, 3], [4, 5, 6]]) np.cumsum(a) np.cumsum(a, dtype=float) # specifies type of output value(s) np.cumsum(a, axis=0) # sum over rows for each of the 3 columns np.cumsum(a, axis=1) # sum over columns for each of the 2 rows x = np.arange(4).reshape((2, 2)) np.ptp(x, axis=0) np.ptp(x, axis=1) a = np.arange(4).reshape((2, 2)) np.amax(a) # Maximum of the flattened array np.amax(a, axis=0) # Maxima along the first axis np.amax(a, axis=1) # Maxima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amax(b) np.nanmax(b) a = np.arange(4).reshape((2, 2)) np.amin(a) # Minimum of the flattened array np.amin(a, axis=0) # Minima along the first axis np.amin(a, axis=1) # Minima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amin(b) np.nanmin(b) a = np.zeros((7, 4, 5)) a.shape[0] np.alen(a) x = np.array([536870910, 536870910, 536870910, 536870910]) np.prod(x) #random np.prod([]) np.prod([1., 2.]) np.prod([[1., 2.], [3., 4.]]) np.prod([[1., 2.], [3., 4.]], axis=1) x = np.array([1, 2, 3], dtype=np.uint8) # np.prod(x).dtype == np.uint x = np.array([1, 2, 3], dtype=np.int8) # np.prod(x).dtype == np.int a = np.array([1, 2, 3]) np.cumprod(a) # intermediate results 1, 1*2 a = np.array([[1, 2, 3], [4, 5, 6]]) np.cumprod(a, dtype=float) # specify type of output np.cumprod(a, axis=0) np.cumprod(a, axis=1) np.ndim([[1, 2, 3], [4, 5, 6]]) np.ndim(np.array([[1, 2, 3], [4, 5, 6]])) np.ndim(1) a = np.array([[1, 2, 3], [4, 5, 6]]) np.size(a) np.size(a, 1) np.size(a, 0) np.around([0.37, 1.64]) np.around([0.37, 1.64], decimals=1) np.around([.5, 1.5, 2.5, 3.5, 4.5]) # rounds to nearest even value np.around([1, 2, 3, 11], decimals=1) # ndarray of ints is returned np.around([1, 2, 3, 11], decimals=-1) a = np.array([[1, 2], [3, 4]]) np.mean(a) np.mean(a, axis=0) np.mean(a, axis=1) a = np.zeros((2, 512 * 512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.mean(a) np.mean(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.std(a) np.std(a, axis=0) np.std(a, axis=1) a = np.zeros((2, 512 * 512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.std(a) np.std(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.var(a) np.var(a, axis=0) np.var(a, axis=1) a = np.zeros((2, 512 * 512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.var(a) np.var(a, dtype=np.float64)
def test_ufunc(): x = np.array([-1.2, 1.2]) np.absolute(x) np.absolute(1.2 + 1j) x = np.linspace(start=-10, stop=10, num=101) np.add(1.0, 4.0) x1 = np.arange(9.0).reshape((3, 3)) x2 = np.arange(3.0) np.add(x1, x2) np.arccos([1, -1]) x = np.linspace(-1, 1, num=100) np.arccosh([np.e, 10.0]) np.arccosh(1) np.arcsin(0) np.arcsinh(np.array([np.e, 10.0])) np.arctan([0, 1]) np.pi / 4 x = np.linspace(-10, 10) x = np.array([-1, +1, +1, -1]) y = np.array([-1, -1, +1, +1]) np.arctan2(y, x) * 180 / np.pi np.arctan2([1., -1.], [0., 0.]) np.arctan2([0., 0., np.inf], [+0., -0., np.inf]) np.arctanh([0, -0.5]) np.bitwise_and(13, 17) np.bitwise_and(14, 13) # np.binary_repr(12) return str np.bitwise_and([14, 3], 13) np.bitwise_and([11, 7], [4, 25]) np.bitwise_and(np.array([2, 5, 255]), np.array([3, 14, 16])) np.bitwise_and([True, True], [False, True]) np.bitwise_or(13, 16) # np.binary_repr(29) np.bitwise_or(32, 2) np.bitwise_or([33, 4], 1) np.bitwise_or([33, 4], [1, 2]) np.bitwise_or(np.array([2, 5, 255]), np.array([4, 4, 4])) # np.array([2, 5, 255]) | np.array([4, 4, 4]) np.bitwise_or(np.array([2, 5, 255, 2147483647], dtype=np.int32), np.array([4, 4, 4, 2147483647], dtype=np.int32)) np.bitwise_or([True, True], [False, True]) np.bitwise_xor(13, 17) # np.binary_repr(28) np.bitwise_xor(31, 5) np.bitwise_xor([31, 3], 5) np.bitwise_xor([31, 3], [5, 6]) np.bitwise_xor([True, True], [False, True]) a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) np.ceil(a) a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) np.trunc(a) np.cos(np.array([0, np.pi / 2, np.pi])) np.cosh(0) x = np.linspace(-4, 4, 1000) rad = np.arange(12.) * np.pi / 6 np.degrees(rad) out = np.zeros((rad.shape)) r = np.degrees(rad, out) # np.all(r == out) return bool np.rad2deg(np.pi / 2) np.divide(2.0, 4.0) x1 = np.arange(9.0).reshape((3, 3)) x2 = np.arange(3.0) np.divide(2, 4) np.divide(2, 4.) np.equal([0, 1, 3], np.arange(3)) np.equal(1, np.ones(1)) x = np.linspace(-2 * np.pi, 2 * np.pi, 100) np.exp2([2, 3]) np.expm1(1e-10) np.exp(1e-10) - 1 np.fabs(-1) np.fabs([-1.2, 1.2]) a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) np.floor(a) np.floor_divide(7, 3) np.floor_divide([1., 2., 3., 4.], 2.5) np.fmod([-3, -2, -1, 1, 2, 3], 2) np.remainder([-3, -2, -1, 1, 2, 3], 2) np.fmod([5, 3], [2, 2.]) a = np.arange(-3, 3).reshape(3, 2) np.fmod(a, [2, 2]) np.greater([4, 2], [2, 2]) a = np.array([4, 2]) b = np.array([2, 2]) a > b np.greater_equal([4, 2, 1], [2, 2, 2]) np.hypot(3 * np.ones((3, 3)), 4 * np.ones((3, 3))) np.hypot(3 * np.ones((3, 3)), [4]) np.bitwise_not is np.invert np.invert(np.array([13], dtype=np.uint8)) # np.binary_repr(242, width=8) np.invert(np.array([13], dtype=np.uint16)) np.invert(np.array([13], dtype=np.int8)) # np.binary_repr(-14, width=8) np.invert(np.array([True, False])) # np.isfinite(1) # np.isfinite(0) # np.isfinite(np.nan) # np.isfinite(np.inf) # np.isfinite(np.NINF) x = np.array([-np.inf, 0., np.inf]) y = np.array([2, 2, 2]) np.isfinite(x, y) # np.isinf(np.inf) # np.isinf(np.nan) # np.isinf(np.NINF) # np.isinf([np.inf, -np.inf, 1.0, np.nan]) x = np.array([-np.inf, 0., np.inf]) y = np.array([2, 2, 2]) # np.isinf(x, y) # np.isnan(np.nan) # np.isnan(np.inf) # np.binary_repr(5) np.left_shift(5, 2) # np.binary_repr(20) np.left_shift(5, [1, 2, 3]) np.less([1, 2], [2, 2]) np.less_equal([4, 2, 1], [2, 2, 2]) x = np.array([0, 1, 2, 2**4]) xi = np.array([0 + 1.j, 1, 2 + 0.j, 4.j]) np.log2(xi) prob1 = np.log(1e-50) prob2 = np.log(2.5e-50) prob12 = np.logaddexp(prob1, prob2) prob12 np.exp(prob12) prob1 = np.log2(1e-50) prob2 = np.log2(2.5e-50) prob12 = np.logaddexp2(prob1, prob2) prob1, prob2, prob12 2**prob12 np.log1p(1e-99) np.log(1 + 1e-99) # np.logical_and(True, False) # np.logical_and([True, False], [False, False]) x = np.arange(5) # np.logical_and(x>1, x<4) # np.logical_not(3) # np.logical_not([True, False, 0, 1]) x = np.arange(5) # np.logical_not(x<3) # np.logical_or(True, False) # np.logical_or([True, False], [False, False]) x = np.arange(5) # np.logical_or(x < 1, x > 3) # np.logical_xor(True, False) # np.logical_xor([True, True, False, False], [True, False, True, False]) x = np.arange(5) # np.logical_xor(x < 1, x > 3) # np.logical_xor(0, np.eye(2)) np.maximum([2, 3, 4], [1, 5, 2]) # np.maximum([np.nan, 0, np.nan], [0, np.nan, np.nan]) # np.maximum(np.Inf, 1) np.minimum([2, 3, 4], [1, 5, 2]) # np.minimum([np.nan, 0, np.nan],[0, np.nan, np.nan]) # np.minimum(-np.Inf, 1) np.fmax([2, 3, 4], [1, 5, 2]) np.fmax(np.eye(2), [0.5, 2]) # np.fmax([np.nan, 0, np.nan],[0, np.nan, np.nan]) np.fmin([2, 3, 4], [1, 5, 2]) np.fmin(np.eye(2), [0.5, 2]) # np.fmin([np.nan, 0, np.nan],[0, np.nan, np.nan]) np.modf([0, 3.5]) np.modf(-0.5) np.multiply(2.0, 4.0) x1 = np.arange(9.0).reshape((3, 3)) x2 = np.arange(3.0) np.multiply(x1, x2) np.negative([1., -1.]) np.not_equal([1., 2.], [1., 3.]) np.not_equal([1, 2], [[1, 3], [1, 4]]) x1 = range(6) np.power(x1, 3) x2 = [1.0, 2.0, 3.0, 3.0, 2.0, 1.0] np.power(x1, x2) x2 = np.array([[1, 2, 3, 3, 2, 1], [1, 2, 3, 3, 2, 1]]) np.power(x1, x2) deg = np.arange(12.) * 30. np.radians(deg) out = np.zeros((deg.shape)) ret = np.radians(deg, out) ret is out np.deg2rad(180) np.reciprocal(2.) np.reciprocal([1, 2., 3.33]) np.remainder([4, 7], [2, 3]) np.remainder(np.arange(7), 5) # np.binary_repr(10) np.right_shift(10, 1) # np.binary_repr(5) np.right_shift(10, [1, 2, 3]) a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) np.rint(a) np.sign([-5., 4.5]) np.sign(0) # np.sign(5-2j) # np.signbit(-1.2) np.signbit(np.array([1, -2.3, 2.1])) np.copysign(1.3, -1) np.copysign([-1, 0, 1], -1.1) np.copysign([-1, 0, 1], np.arange(3) - 1) np.sin(np.pi / 2.) np.sin(np.array((0., 30., 45., 60., 90.)) * np.pi / 180.) x = np.linspace(-np.pi, np.pi, 201) np.sinh(0) # np.sinh(np.pi*1j/2) np.sqrt([1, 4, 9]) np.sqrt([4, -1, -3 + 4J]) np.cbrt([1, 8, 27]) np.square([-1j, 1]) np.subtract(1.0, 4.0) x1 = np.arange(9.0).reshape((3, 3)) x2 = np.arange(3.0) np.subtract(x1, x2) np.tan(np.array([-pi, pi / 2, pi])) np.tanh((0, np.pi * 1j, np.pi * 1j / 2)) x = np.arange(5) np.true_divide(x, 4) x = np.arange(9) y1, y2 = np.frexp(x) y1 * 2**y2 np.ldexp(5, np.arange(4)) x = np.arange(6) np.ldexp(*np.frexp(x))
cumulative_update = np.zeros_like(net_params) # initialize update values for ui, k_id in enumerate(kids_rank): np.random.seed(noise_seed[k_id]) # reconstruct noise using seed cumulative_update += utility[ui] * sign(k_id) * np.random.randn( net_params.size) gradients = optimizer.get_gradients(cumulative_update / (2 * N_KID * SIGMA)) return net_params + gradients, rewards if __name__ == "__main__": # utility instead reward for update parameters (rank transformation) base = N_KID * 2 # *2 for mirrored sampling rank = np.arange(1, base + 1) util_ = np.maximum(0, numpy.log(base / 2 + 1) - np.log(rank)) utility = util_ / sum(util_) - 1 / base # training net_shapes, net_params = build_net() env = gym.make(CONFIG['game']).unwrapped optimizer = SGD(net_params, LR) pool = mp.Pool(processes=N_CORE) mar = None # moving average reward for g in range(N_GENERATION): t0 = time.time() net_params, kid_rewards = train(net_shapes, net_params, optimizer, utility, pool) # test trained net without noise
def test_context(): set_context(gpu(1)) # set the global context as gpu(1) def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(weights, inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 with gpu(0): xshape = (256, 500) wshape = (500, 250) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 weights = random.rand(*wshape) - 0.5 training_gradient_fun = grad(training_loss) for i in range(20): print('Trained loss accuracy #{}: {}%'.format( i, training_accuracy(weights, inputs))) gr = training_gradient_fun(weights, inputs) weights -= gr * 0.01 print("\nff and bp on {0}".format(weights.context)) print("\nexecute on cpu") with cpu(): x_cpu = random.rand(32, 64) - 0.5 y_cpu = random.rand(64, 32) - 0.5 z_cpu = np.dot(x_cpu, y_cpu) print('z_cpu.context = {0}'.format(z_cpu.context)) print("\nexecute on gpu(0)") with gpu(0): x_gpu0 = random.rand(32, 64) - 0.5 y_gpu0 = random.rand(64, 32) - 0.5 z_gpu0 = np.dot(x_gpu0, y_gpu0) z_gpu0.asnumpy() print('z_gpu0.context = {0}'.format(z_gpu0.context)) print("\n[use global context] execute on gpu(1)") x_gpu1 = random.rand(32, 64) - 0.5 y_gpu1 = random.rand(64, 32) - 0.5 z_gpu1 = np.dot(x_gpu1, y_gpu1) z_gpu1.asnumpy() print('z_gpu1.context = {0}'.format(z_gpu1.context))