def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax( preds, axis=1) - np.argmax( targets, axis=1)) return (256 - error) * 100 / 256.0
def check_accuracy(self, dataiter, num_samples=None): """ Check accuracy of the model on the provided data. Inputs: - dataiter: data iterator that can produce batches. - num_samples: If not None and dataiter has more than num_samples datapoints, subsample the data and only test the model on num_samples datapoints. Returns: - acc: Scalar giving the fraction of instances that were correctly classified by the model. """ # Maybe subsample the data N = dataiter.num_data check_dataiter = dataiter if num_samples is not None and N > num_samples: # Sample a sub iter check_dataiter = dataiter.getsubiter(num_samples) else: # Use the entire dataiter otherwise. check_dataiter.reset() acc_count = 0 num_samples = 0 for each_batch in check_dataiter: predict = self.model.forward_batch(each_batch, mode='test').asnumpy() # TODO(minjie): multiple labels. acc_count += np.sum( np.argmax(predict, axis=1) == each_batch.label[0]) num_samples += check_dataiter.batch_size return float(acc_count.asnumpy()) / num_samples
def predict(x, wh, bh, w_out, b_out, alpha): """ Predicts the classes of the given input observations. :param x: The input observations to classify. :param wh: The weights of each hidden layer connection as array. Each weight is a matrix of (H_i-1 ... H_i), where H_i-1 is the size of the previous hidden layer (or the input layer) and H_i is the size of the corresponding hidden layer.. :param bh: The biases of each hidden layer as array. Each bias is a vector of the same length of the corresponding hidden layer. :param w_out: The weight of the output layer as matrix of form (H x out_size), where H is the size of the last hidden layer. :param b_out: The bias of the output layer as vector of length out_size. :param alpha: The factor by which negative inputs are scaled in ReLU activations. Set to 0 to avoid leaky ReLU. :return: The indices of the correct classes. """ hidden_layers = [] for h in range(len(wh)): if h == 0: hidden = x.dot(wh[h]) + bh[h] else: hidden = hidden_layers[h - 1].dot(wh[h]) + bh[h] hidden = calculate_activation(hidden, alpha) hidden_layers.append(hidden) outs = hidden_layers[-1].dot(w_out) + b_out return np.argmax(outs, axis=1)
def check_accuracy(self, dataiter, num_samples=None): """ Check accuracy of the model on the provided data. Inputs: - dataiter: data iterator that can produce batches. - num_samples: If not None and dataiter has more than num_samples datapoints, subsample the data and only test the model on num_samples datapoints. Returns: - acc: Scalar giving the fraction of instances that were correctly classified by the model. """ # Maybe subsample the data N = dataiter.num_data check_dataiter = dataiter if num_samples is not None and N > num_samples: # Sample a sub iter check_dataiter = dataiter.getsubiter(num_samples) else: # Use the entire dataiter otherwise. check_dataiter.reset() acc_count = 0 num_samples = 0 for each_batch in check_dataiter: predict = self.model.forward_batch( each_batch, mode='test').asnumpy() # TODO(minjie): multiple labels. acc_count += np.sum( np.argmax( predict, axis=1) == each_batch.label[0]) num_samples += check_dataiter.batch_size return float(acc_count.asnumpy()) / num_samples
def classification_accuracy(model, X, Y): import minpy.numpy as np N = X.shape[0] predictions = model.forward(X, 'test') predicted_Y = np.argmax(predictions, axis=1) errors = np.count_nonzero(predicted_Y - Y) return 1 - errors / float(N)
def get_action(params, x, continuous_a): x = x[np.newaxis, :] x = np.tanh(x.dot(params[0]) + params[1]) x = np.tanh(x.dot(params[2]) + params[3]) x = x.dot(params[4]) + params[5] if not continuous_a[0]: return np.argmax(x, axis=1)[0] # for discrete action else: return continuous_a[1] * np.tanh(x)[0] # for continuous action
def check_accuracy(self, dataiter, num_samples=None): """ Check accuracy of the model on the provided data. Parameters ---------- dataiter data iterator that can produce batches. num_samples If not None and dataiter has more than num_samples datapoints, subsample the data and only test the model on num_samples datapoints. Returns ------- acc Scalar giving the fraction of instances that were correctly classified by the model. """ # Maybe subsample the data N = dataiter.num_data check_dataiter = dataiter if num_samples is not None and N > num_samples: # Sample a sub iter check_dataiter = dataiter.getsubiter(num_samples) else: # Use the entire dataiter otherwise. check_dataiter.reset() if self.task_type is 'classification': acc_count = 0 num_samples = 0 for each_batch in check_dataiter: predict = self.model.forward_batch(each_batch, mode='test').asnumpy() # TODO(minjie): multiple labels. acc_count += np.sum( np.argmax(predict, axis=1) == each_batch.label[0]) num_samples += check_dataiter.batch_size return float(acc_count.asnumpy()) / num_samples elif self.task_type is 'regression': loss = 0 batch_count = 0 for each_batch in check_dataiter: predict = self.model.forward_batch(each_batch, mode='test').asnumpy() loss += self.model.loss(predict, each_batch.label[0]) batch_count += 1 return float(loss.asnumpy()) / batch_count else: raise ValueError( 'Task type is either classification or regression.')
def check_accuracy(self, dataiter, num_samples=None): """ Check accuracy of the model on the provided data. Parameters ---------- dataiter data iterator that can produce batches. num_samples If not None and dataiter has more than num_samples datapoints, subsample the data and only test the model on num_samples datapoints. Returns ------- acc Scalar giving the fraction of instances that were correctly classified by the model. """ # Maybe subsample the data N = dataiter.num_data check_dataiter = dataiter if num_samples is not None and N > num_samples: # Sample a sub iter check_dataiter = dataiter.getsubiter(num_samples) else: # Use the entire dataiter otherwise. check_dataiter.reset() if self.task_type is 'classification': acc_count = 0 num_samples = 0 for each_batch in check_dataiter: predict = self.model.forward_batch(each_batch, mode='test').asnumpy() # TODO(minjie): multiple labels. acc_count += np.sum(np.argmax(predict, axis=1) == each_batch.label[0]) num_samples += check_dataiter.batch_size return float(acc_count.asnumpy()) / num_samples elif self.task_type is 'regression': loss = 0 batch_count = 0 for each_batch in check_dataiter: predict = self.model.forward_batch(each_batch, mode='test').asnumpy() loss += self.model.loss(predict, each_batch.label[0]) batch_count += 1 return float(loss.asnumpy()) / batch_count else: raise ValueError('Task type is either classification or regression.')
def predict(models, img, t=0): img = np.clip(img, 0, 1) * 255 img = extend_data(config['permutation'], np.array([img])) scores = np.hstack([m.predict(img) for m in models])[0] #print(scores.shape) nat_labels = np.zeros(scores.shape).astype(np.float32) nat_labels[scores >= 0.5] = 1. rep = rep_labels[:len(scores)].T tmp = np.repeat([nat_labels], rep.shape[0], axis=0) dists = np.sum(np.absolute(tmp - rep), axis=-1) min_dist = np.min(dists) pred_labels = np.arange(len(dists))[dists == min_dist] pred_scores = [ np.sum([ scores[k] if rep[j][k] == 1 else 1 - scores[k] for k in np.arange(len(scores)) ]) for j in pred_labels ] pred_label = pred_labels[np.argmax(pred_scores)] if min_dist <= 0: return pred_label else: return -1
iterations = 10000 interval = 10 validation_interval = 1000 validation_X, validation_Y = data[2 : 4] validation_X = validation_X[:1024] validation_Y = validation_Y[:1024] settings = {'learning_rate' : 0.01} initialize(model) updater = Updater(model, 'sgd', settings) for i in range(iterations): X_batch = data[0][batch_index * batch_size : (batch_index + 1) * batch_size] Y_batch = data[1][batch_index * batch_size : (batch_index + 1) * batch_size] batch_index = (batch_index + 1) % batches gradients, loss = stochastic_gradient_loss(model, X_batch, Y_batch, 0.000) loss = loss.asnumpy()[0] updater.update(gradients) if (i + 1) % interval == 0: print 'iteration %d loss %f' % (i + 1, loss) if (i + 1) % validation_interval == 0: outputs = model.forward(validation_X, 'train') predicted_Y = np.argmax(outputs, axis=1) errors = np.count_nonzero(predicted_Y - validation_Y) accuracy = 1 - errors / float(validation_Y.shape[0]) print accuracy
def train(x, y, epochs, wh, bh, w_out, b_out, learning_rate, p, alpha, beta1, beta2, eps, lambda_, batch_size=0): """ Trains a neural network. The learnable parameters `wh`, `bh`, `w_out` and `b_out` are optimized as long as `epochs` indicates and then returned. :param x: The input data of form (N x D), where N is the number of observations an D is the dimensionality. :param y: The ground truth labels for each observation. :param epochs: The amount of runs a network should take. (Note: The number of iterations is 2 * N * epochs, where N is the number of inputs.) :param wh: The initialized weights of each hidden layer connection as array. Each weight is a matrix of (H_i-1 ... H_i), where H_i-1 is the size of the previous hidden layer (or the input layer) and H_i is the size of the corresponding hidden layer.. :param bh: The initialized biases of each hidden layer as array. Each bias is a vector of the same length of the corresponding hidden layer. :param w_out: The initialized weight of the output layer as matrix of form (H x K), where H is the size of the last hidden layer and K is the number of classes. :param b_out: The initialized bias of the output layer as vector of length K, where K is the number of classes. :param learning_rate: Indicates the step size of each learning epoch. High values lead to faster trainings, but also inhibit the risk of overstepping. Low values take longer to train, but will eventually reach the desired effect. :param p: The probability of each neuron to be dropped out. Set to 1 to disable dropout. :param alpha: The factor by which negative inputs are scaled in ReLU activations. Set to 0 to avoid leaky ReLU. :param beta1: Hyperparameter for the Adam parameter update. Recommended to be .9. :param beta2: Hyperparameter for the Adam parameter update. Recommended to be .999. :param eps: Hyperparameter for the Adam parameter update. Recommended to be 1e-8. :param batch_size: The number of input observations to use in each epoch. If this parameter is set to a positive value, it will enable Minibatch gradient descent, whereby only a random subset of the input observations is used to optimize parameters. This greatly improves performance and yields about the same accuracy as using all input observations. :return: A tuple containing the optimized learnable parameters `wh`, `bh`, `w_out` and `b_out`. """ m_wh = [.0 for i in range(len(wh))] v_wh = [.0 for i in range(len(wh))] m_bh = [.0 for i in range(len(bh))] v_bh = [.0 for i in range(len(bh))] m_w_out = .0 v_w_out = .0 m_b_out = .0 v_b_out = .0 losses = [] accuracies = [] for epoch in range(1, epochs + 1): if batch_size > 0: random_indices = np.random.randint(x.shape[0], size=batch_size) batch_x = x[random_indices, :] batch_y = np.array([y[int(i)] for i in random_indices]) else: batch_x = x batch_y = y # Feed-forward the network hidden_layers, outs, w_out = forward_pass(batch_x, hidden_sizes, wh, bh, w_out, b_out, alpha, p) # Calculate loss loss = calculate_cross_entropy_loss(outs, batch_y, w_out, lambda_) losses.append(loss) predicted_classes = np.argmax(outs, axis=1, out=None) correct_classes = np.where(predicted_classes == batch_y, x=predicted_classes, y=np.array([]))[0].shape[0] accuracy = correct_classes / predicted_classes.shape[0] accuracies.append(accuracy) print(epoch, loss, accuracy) # Backpropagation dwh, dbh, dw_out, db_out = backpropagation(batch_x, outs, batch_y, hidden_layers, wh, bh, w_out, b_out, alpha) # Update parameters using gradients of backpropagation for h in range(len(hidden_layers)): wh[h], m_wh[h], v_wh[h] = update_parameter(wh[h], dwh[h], epoch, learning_rate, m_wh[h], v_wh[h], beta1, beta2, eps) bh[h], m_bh[h], v_bh[h] = update_parameter(bh[h], dbh[h], epoch, learning_rate, m_bh[h], v_bh[h], beta1, beta2, eps) w_out, m_w_out, v_w_out = update_parameter(w_out, dw_out, epoch, learning_rate, m_w_out, v_w_out, beta1, beta2, eps) b_out, m_b_out, v_b_out = update_parameter(b_out, db_out, epoch, learning_rate, m_b_out, v_b_out, beta1, beta2, eps) with open('losses.txt', 'w') as file: for loss in losses: file.write(str(loss) + '\n') with open('accuracies.txt', 'w') as file: for accuracy in accuracies: file.write(str(accuracy) + '\n') return wh, bh, w_out, b_out
t1 = time.time() resnet.forward(data) # TODO only forward once forward_time += time.time() - t1 t2 = time.time() grad_dict, loss = resnet.grad_and_loss(data, labels) backward_time += time.time() - t2 t3 = time.time() updater(grad_dict) updating_time += time.time() - t3 if (iteration + 1) % 100 == 0: print 'epoch %d iteration %d loss %f' % (epoch, iteration + 1, loss[0]) print 'epoch %d %f seconds consumed' % (epoch, time.time() - t0) print 'forward %f' % forward_time print 'backward %f' % backward_time print 'updating %f' % updating_time # validation val_data_iter.reset() # data iterator must be reset every epoch n_errors, n_samples = 0.0, 0.0 for batch in val_data_iter: data, labels = unpack_batch(batch) probs = resnet.forward(data, True) preds = np.argmax(probs, axis=1) n_errors += np.count_nonzero(preds - labels) n_samples += len(data) print 'epoch %d validation error %f' % (epoch, n_errors / n_samples) # TODO dump model
train_data_iter.reset() for iteration,batch in enumerate(train_data_iter): iteration_number +=1 if iteration_number > 64000: terminated = True break if iteration_number in (32000,48000): updater.learning_rate *= 0.1 data,labels = unpack_batch(batch) loss = model(data,labels = labels) grad_dict = model.backward() updater(grad_dict) if iteration_number % 100 == 0: print 'iteration %d loss %f' %(iteration_number, loss) # validation val_data_iter.reset() errors,samples = 0.0, 0.0 for batch in val_data_iter: data,labels = unpack_batch(batch) scores = model.forward(data,'inference') predictions = np.argmax (scores, axis = 1) errors += np.count_nonzero(predictions - labels) samples += len(data) print 'epoch %d validation error %f' % (epoch_number, errors / samples)
def test_fromnumeric(): # Functions # 'alen', 'all', 'alltrue', 'amax', 'amin', 'any', 'argmax', # 'argmin', 'argpartition', 'argsort', 'around', 'choose', 'clip', # 'compress', 'cumprod', 'cumproduct', 'cumsum', 'diagonal', 'mean', # 'ndim', 'nonzero', 'partition', 'prod', 'product', 'ptp', 'put', # 'rank', 'ravel', 'repeat', 'reshape', 'resize', 'round_', # 'searchsorted', 'shape', 'size', 'sometrue', 'sort', 'squeeze', # 'std', 'sum', 'swapaxes', 'take', 'trace', 'transpose', 'var', a = [4, 3, 5, 7, 6, 8] indices = [0, 1, 4] np.take(a, indices) a = np.array(a) # a[indices] np.take(a, [[0, 1], [2, 3]]) a = np.zeros((10, 2)) b = a.T a = np.arange(6).reshape((3, 2)) np.reshape(a, (2, 3)) # C-like index ordering np.reshape(np.ravel(a), (2, 3)) # equivalent to C ravel then C reshape np.reshape(a, (2, 3), order='F') # Fortran-like index ordering np.reshape(np.ravel(a, order='F'), (2, 3), order='F') a = np.array([[1,2,3], [4,5,6]]) np.reshape(a, 6) np.reshape(a, 6, order='F') np.reshape(a, (3,-1)) # the unspecified value is inferred to be 2 choices = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33]] np.choose([2, 3, 1, 0], choices) np.choose([2, 4, 1, 0], choices, mode='clip') # 4 goes to 3 (4-1) np.choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4) a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]] choices = [-10, 10] np.choose(a, choices) a = np.array([0, 1]).reshape((2,1,1)) c1 = np.array([1, 2, 3]).reshape((1,3,1)) c2 = np.array([-1, -2, -3, -4, -5]).reshape((1,1,5)) np.choose(a, (c1, c2)) # result is 2x3x5, res[0,:,:]=c1, res[1,:,:]=c2 np.repeat(3, 4) x = np.array([[1,2],[3,4]]) np.repeat(x, 2) np.repeat(x, 3, axis=1) np.repeat(x, [1, 2], axis=0) a = np.arange(5) np.put(a, [0, 2], [-44, -55]) a = np.arange(5) np.put(a, 22, -5, mode='clip') x = np.array([[1,2,3]]) np.swapaxes(x,0,1) x = np.array([[[0,1],[2,3]],[[4,5],[6,7]]]) np.swapaxes(x,0,2) x = np.arange(4).reshape((2,2)) np.transpose(x) x = np.ones((1, 2, 3)) np.transpose(x, (1, 0, 2)).shape a = np.array([3, 4, 2, 1]) np.partition(a, 3) np.partition(a, (1, 3)) x = np.array([3, 4, 2, 1]) x[np.argpartition(x, 3)] x[np.argpartition(x, (1, 3))] x = [3, 4, 2, 1] np.array(x)[np.argpartition(x, 3)] a = np.array([[1,4],[3,1]]) np.sort(a) # sort along the last axis np.sort(a, axis=None) # sort the flattened array np.sort(a, axis=0) # sort along the first axis dtype = [('name', 'S10'), ('height', float), ('age', int)] values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38), ('Galahad', 1.7, 38)] a = np.array(values, dtype=dtype) # create a structured array np.sort(a, order='height') # doctest: +SKIP np.sort(a, order=['age', 'height']) # doctest: +SKIP x = np.array([3, 1, 2]) np.argsort(x) x = np.array([[0, 3], [2, 2]]) np.argsort(x, axis=0) np.argsort(x, axis=1) x = np.array([(1, 0), (0, 1)], dtype=[('x', '<i4'), ('y', '<i4')]) np.argsort(x, order=('x','y')) np.argsort(x, order=('y','x')) a = np.arange(6).reshape(2,3) np.argmax(a) np.argmax(a, axis=0) np.argmax(a, axis=1) b = np.arange(6) b[1] = 5 np.argmax(b) # Only the first occurrence is returned. a = np.arange(6).reshape(2,3) np.argmin(a) np.argmin(a, axis=0) np.argmin(a, axis=1) b = np.arange(6) b[4] = 0 np.argmin(b) # Only the first occurrence is returned. np.searchsorted([1,2,3,4,5], 3) np.searchsorted([1,2,3,4,5], 3, side='right') np.searchsorted([1,2,3,4,5], [-10, 10, 2, 3]) a=np.array([[0,1],[2,3]]) np.resize(a,(2,3)) np.resize(a,(1,4)) np.resize(a,(2,4)) x = np.array([[[0], [1], [2]]]) x.shape np.squeeze(x).shape np.squeeze(x, axis=(2,)).shape a = np.arange(4).reshape(2,2) a = np.arange(8).reshape(2,2,2); a a[:,:,0] # main diagonal is [0 6] a[:,:,1] # main diagonal is [1 7] np.trace(np.eye(3)) a = np.arange(8).reshape((2,2,2)) np.trace(a) a = np.arange(24).reshape((2,2,2,3)) np.trace(a).shape x = np.array([[1, 2, 3], [4, 5, 6]]) np.ravel(x) x.reshape(-1) np.ravel(x, order='F') np.ravel(x.T) np.ravel(x.T, order='A') a = np.arange(3)[::-1]; a # a = np.arange(12).reshape(2,3,2).swapaxes(1,2); a x = np.eye(3) np.nonzero(x) x[np.nonzero(x)] np.transpose(np.nonzero(x)) a = np.array([[1,2,3],[4,5,6],[7,8,9]]) a > 3 np.nonzero(a > 3) np.shape(np.eye(3)) np.shape([[1, 2]]) np.shape([0]) np.shape(0) a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) np.shape(a) a.shape a = np.array([[1, 2], [3, 4], [5, 6]]) np.compress([0, 1], a, axis=0) np.compress([False, True, True], a, axis=0) np.compress([False, True], a, axis=1) np.compress([False, True], a) a = np.arange(10) np.clip(a, 1, 8) np.clip(a, 3, 6, out=a) a = np.arange(10) np.clip(a, [3,4,1,1,1,4,4,4,4,4], 8) np.sum([]) np.sum([0.5, 1.5]) np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32) np.sum([[0, 1], [0, 5]]) np.sum([[0, 1], [0, 5]], axis=0) np.sum([[0, 1], [0, 5]], axis=1) # np.ones(128, dtype=np.int8).sum(dtype=np.int8) # np.any([[True, False], [True, True]]) # np.any([[True, False], [False, False]], axis=0) # np.any([-1, 0, 5]) # np.any(np.nan) # np.all([[True,False],[True,True]]) # np.all([[True,False],[True,True]], axis=0) # np.all([-1, 4, 5]) # np.all([1.0, np.nan]) a = np.array([[1,2,3], [4,5,6]]) np.cumsum(a) np.cumsum(a, dtype=float) # specifies type of output value(s) np.cumsum(a,axis=0) # sum over rows for each of the 3 columns np.cumsum(a,axis=1) # sum over columns for each of the 2 rows x = np.arange(4).reshape((2,2)) np.ptp(x, axis=0) np.ptp(x, axis=1) a = np.arange(4).reshape((2,2)) np.amax(a) # Maximum of the flattened array np.amax(a, axis=0) # Maxima along the first axis np.amax(a, axis=1) # Maxima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amax(b) np.nanmax(b) a = np.arange(4).reshape((2,2)) np.amin(a) # Minimum of the flattened array np.amin(a, axis=0) # Minima along the first axis np.amin(a, axis=1) # Minima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amin(b) np.nanmin(b) a = np.zeros((7,4,5)) a.shape[0] np.alen(a) x = np.array([536870910, 536870910, 536870910, 536870910]) np.prod(x) #random np.prod([]) np.prod([1.,2.]) np.prod([[1.,2.],[3.,4.]]) np.prod([[1.,2.],[3.,4.]], axis=1) x = np.array([1, 2, 3], dtype=np.uint8) # np.prod(x).dtype == np.uint x = np.array([1, 2, 3], dtype=np.int8) # np.prod(x).dtype == np.int a = np.array([1,2,3]) np.cumprod(a) # intermediate results 1, 1*2 a = np.array([[1, 2, 3], [4, 5, 6]]) np.cumprod(a, dtype=float) # specify type of output np.cumprod(a, axis=0) np.cumprod(a,axis=1) np.ndim([[1,2,3],[4,5,6]]) np.ndim(np.array([[1,2,3],[4,5,6]])) np.ndim(1) a = np.array([[1,2,3],[4,5,6]]) np.size(a) np.size(a,1) np.size(a,0) np.around([0.37, 1.64]) np.around([0.37, 1.64], decimals=1) np.around([.5, 1.5, 2.5, 3.5, 4.5]) # rounds to nearest even value np.around([1,2,3,11], decimals=1) # ndarray of ints is returned np.around([1,2,3,11], decimals=-1) a = np.array([[1, 2], [3, 4]]) np.mean(a) np.mean(a, axis=0) np.mean(a, axis=1) a = np.zeros((2, 512*512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.mean(a) np.mean(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.std(a) np.std(a, axis=0) np.std(a, axis=1) a = np.zeros((2, 512*512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.std(a) np.std(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.var(a) np.var(a, axis=0) np.var(a, axis=1) a = np.zeros((2, 512*512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.var(a) np.var(a, dtype=np.float64)
import minpy.dispatch.policy as ply from minpy.core import grad_and_loss, function import util import mxnet as mx logging.getLogger('minpy.array').setLevel(logging.WARN) x = mx.symbol.Variable('x') sm = mx.symbol.SoftmaxOutput(data=x, name='softmax', grad_scale=1/10000.0) softmax = function(sm, [('x', (10000, 5)), ('softmax_label', (10000,))]) x, t = util.get_data() #w = np.random.randn(500, 5) w = util.get_weight() softmax_label = np.argmax(t, axis=1) def predict(w, x): ''' a = np.exp(np.dot(x, w)) a_sum = np.sum(a, axis=1, keepdims=True) prob = a / a_sum ''' y = np.dot(x, w) prob = softmax(x=y, softmax_label=softmax_label) return prob #util.plot_data(x, t) #util.plot_data(x, predict(w, x)) '''
def one_hot_to_string(one_hot_matrix): return "".join([chr(np.argmax(c)) for c in one_hot_matrix])
resnet.forward(data) # TODO only forward once forward_time += time.time() - t1 t2 = time.time() grad_dict, loss = resnet.grad_and_loss(data, labels) backward_time += time.time() - t2 t3 = time.time() updater(grad_dict) updating_time += time.time() - t3 if (iteration + 1) % 100 == 0: print 'epoch %d iteration %d loss %f' % (epoch, iteration + 1, loss[0]) print 'epoch %d %f seconds consumed' % (epoch, time.time() - t0) print 'forward %f' % forward_time print 'backward %f' % backward_time print 'updating %f' % updating_time # validation val_data_iter.reset() # data iterator must be reset every epoch n_errors, n_samples = 0.0, 0.0 for batch in val_data_iter: data, labels = unpack_batch(batch) probs = resnet.forward(data, True) preds = np.argmax(probs, axis=1) n_errors += np.count_nonzero(preds - labels) n_samples += len(data) print 'epoch %d validation error %f' % (epoch, n_errors / n_samples) # TODO dump model
def train(self): x_train = self.data[0] y_train = self.data[1] x_val = self.data[2] y_val = self.data[3] x_test = self.data[4] y_test = self.data[5] N = x_train.shape[0] if not N % self.batch_size == 0: print 'Illegal Batch Size' return num_batch = N // self.batch_size optimize = getattr(__import__('optimizer'), self.optimizer) accuracy_record = [0.0] loss_record = [] param = [] validation_flag = x_val.shape[0] > 0 flags.RECORD_FLAG = False for epoch in range(self.epochs): flags.EPOCH = epoch flags.MODE = 'Train' for batch in range(num_batch): data = x_train[batch * self.batch_size:(batch + 1) * self.batch_size] label = y_train[batch * self.batch_size:(batch + 1) * self.batch_size] gradient, loss = self.model.loss(data, label) for p in range(len(self.model.param)): self.model.param[p] = optimize(self.model.param[p], gradient[p], **self.update_setting) loss_record.append(loss.asnumpy()) if batch % self.batch_size == 0: print 'epoch %d batch %d loss: %f' % (epoch, batch, loss.val) flags.MODE = 'Test' if validation_flag: flags.RECORD_FLAG = True validation_accuracy = utils.get_accuracy( np.argmax(self.model.loss(x_val), axis=1), y_val) print 'validation accuracy: %f' % (validation_accuracy) if validation_accuracy > max(accuracy_record): param = [np.copy(p) for p in self.model.param] accuracy_record.append(validation_accuracy) self.model.param = [np.copy(p) for p in param] flags.RECORD_FLAG = False test_accuracy = utils.get_accuracy( np.argmax(self.model.loss(x_test), axis=1), y_test) else: test_accuracy = utils.get_accuracy( np.argmax(self.model.loss(x_test), axis=1), y_test) if test_accuracy > max(accuracy_record): param = [np.copy(p) for p in self.model.param] accuracy_record.append(test_accuracy) self.model.param = [np.copy(p) for p in param] print 'test accuracy: %f' % test_accuracy print 'optimal accuracy: %f' % max(accuracy_record) if (epoch + 1) % self.decay_interval == 0: self.update_setting['learning_rate'] *= self.decay_rate print 'learning rate decayed to %f' % self.update_setting[ 'learning_rate'] utils.record_loss(loss_record) return accuracy_record[1:]
def frac_err(W_vect, X, T): return np.mean(np.argmax(T, axis=1) != np.argmax(pred_fun(W_vect, X), axis=1))
def test_fromnumeric(): # Functions # 'alen', 'all', 'alltrue', 'amax', 'amin', 'any', 'argmax', # 'argmin', 'argpartition', 'argsort', 'around', 'choose', 'clip', # 'compress', 'cumprod', 'cumproduct', 'cumsum', 'diagonal', 'mean', # 'ndim', 'nonzero', 'partition', 'prod', 'product', 'ptp', 'put', # 'rank', 'ravel', 'repeat', 'reshape', 'resize', 'round_', # 'searchsorted', 'shape', 'size', 'sometrue', 'sort', 'squeeze', # 'std', 'sum', 'swapaxes', 'take', 'trace', 'transpose', 'var', a = [4, 3, 5, 7, 6, 8] indices = [0, 1, 4] np.take(a, indices) a = np.array(a) # a[indices] np.take(a, [[0, 1], [2, 3]]) a = np.zeros((10, 2)) b = a.T a = np.arange(6).reshape((3, 2)) np.reshape(a, (2, 3)) # C-like index ordering np.reshape(np.ravel(a), (2, 3)) # equivalent to C ravel then C reshape np.reshape(a, (2, 3), order='F') # Fortran-like index ordering np.reshape(np.ravel(a, order='F'), (2, 3), order='F') a = np.array([[1, 2, 3], [4, 5, 6]]) np.reshape(a, 6) np.reshape(a, 6, order='F') np.reshape(a, (3, -1)) # the unspecified value is inferred to be 2 choices = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33]] np.choose([2, 3, 1, 0], choices) np.choose([2, 4, 1, 0], choices, mode='clip') # 4 goes to 3 (4-1) np.choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4) a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]] choices = [-10, 10] np.choose(a, choices) a = np.array([0, 1]).reshape((2, 1, 1)) c1 = np.array([1, 2, 3]).reshape((1, 3, 1)) c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5)) np.choose(a, (c1, c2)) # result is 2x3x5, res[0,:,:]=c1, res[1,:,:]=c2 np.repeat(3, 4) x = np.array([[1, 2], [3, 4]]) np.repeat(x, 2) np.repeat(x, 3, axis=1) np.repeat(x, [1, 2], axis=0) a = np.arange(5) np.put(a, [0, 2], [-44, -55]) a = np.arange(5) np.put(a, 22, -5, mode='clip') x = np.array([[1, 2, 3]]) np.swapaxes(x, 0, 1) x = np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]) np.swapaxes(x, 0, 2) x = np.arange(4).reshape((2, 2)) np.transpose(x) x = np.ones((1, 2, 3)) np.transpose(x, (1, 0, 2)).shape a = np.array([3, 4, 2, 1]) np.partition(a, 3) np.partition(a, (1, 3)) x = np.array([3, 4, 2, 1]) x[np.argpartition(x, 3)] x[np.argpartition(x, (1, 3))] x = [3, 4, 2, 1] np.array(x)[np.argpartition(x, 3)] a = np.array([[1, 4], [3, 1]]) np.sort(a) # sort along the last axis np.sort(a, axis=None) # sort the flattened array np.sort(a, axis=0) # sort along the first axis dtype = [('name', 'S10'), ('height', float), ('age', int)] values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38), ('Galahad', 1.7, 38)] a = np.array(values, dtype=dtype) # create a structured array np.sort(a, order='height') # doctest: +SKIP np.sort(a, order=['age', 'height']) # doctest: +SKIP x = np.array([3, 1, 2]) np.argsort(x) x = np.array([[0, 3], [2, 2]]) np.argsort(x, axis=0) np.argsort(x, axis=1) x = np.array([(1, 0), (0, 1)], dtype=[('x', '<i4'), ('y', '<i4')]) np.argsort(x, order=('x', 'y')) np.argsort(x, order=('y', 'x')) a = np.arange(6).reshape(2, 3) np.argmax(a) np.argmax(a, axis=0) np.argmax(a, axis=1) b = np.arange(6) b[1] = 5 np.argmax(b) # Only the first occurrence is returned. a = np.arange(6).reshape(2, 3) np.argmin(a) np.argmin(a, axis=0) np.argmin(a, axis=1) b = np.arange(6) b[4] = 0 np.argmin(b) # Only the first occurrence is returned. np.searchsorted([1, 2, 3, 4, 5], 3) np.searchsorted([1, 2, 3, 4, 5], 3, side='right') np.searchsorted([1, 2, 3, 4, 5], [-10, 10, 2, 3]) a = np.array([[0, 1], [2, 3]]) np.resize(a, (2, 3)) np.resize(a, (1, 4)) np.resize(a, (2, 4)) x = np.array([[[0], [1], [2]]]) x.shape np.squeeze(x).shape np.squeeze(x, axis=(2, )).shape a = np.arange(4).reshape(2, 2) a = np.arange(8).reshape(2, 2, 2) a a[:, :, 0] # main diagonal is [0 6] a[:, :, 1] # main diagonal is [1 7] np.trace(np.eye(3)) a = np.arange(8).reshape((2, 2, 2)) np.trace(a) a = np.arange(24).reshape((2, 2, 2, 3)) np.trace(a).shape x = np.array([[1, 2, 3], [4, 5, 6]]) np.ravel(x) x.reshape(-1) np.ravel(x, order='F') np.ravel(x.T) np.ravel(x.T, order='A') a = np.arange(3)[::-1] a # a = np.arange(12).reshape(2,3,2).swapaxes(1,2); a x = np.eye(3) np.nonzero(x) x[np.nonzero(x)] np.transpose(np.nonzero(x)) a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) a > 3 np.nonzero(a > 3) np.shape(np.eye(3)) np.shape([[1, 2]]) np.shape([0]) np.shape(0) a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) np.shape(a) a.shape a = np.array([[1, 2], [3, 4], [5, 6]]) np.compress([0, 1], a, axis=0) np.compress([False, True, True], a, axis=0) np.compress([False, True], a, axis=1) np.compress([False, True], a) a = np.arange(10) np.clip(a, 1, 8) np.clip(a, 3, 6, out=a) a = np.arange(10) np.clip(a, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8) np.sum([]) np.sum([0.5, 1.5]) np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32) np.sum([[0, 1], [0, 5]]) np.sum([[0, 1], [0, 5]], axis=0) np.sum([[0, 1], [0, 5]], axis=1) # np.ones(128, dtype=np.int8).sum(dtype=np.int8) # np.any([[True, False], [True, True]]) # np.any([[True, False], [False, False]], axis=0) # np.any([-1, 0, 5]) # np.any(np.nan) # np.all([[True,False],[True,True]]) # np.all([[True,False],[True,True]], axis=0) # np.all([-1, 4, 5]) # np.all([1.0, np.nan]) a = np.array([[1, 2, 3], [4, 5, 6]]) np.cumsum(a) np.cumsum(a, dtype=float) # specifies type of output value(s) np.cumsum(a, axis=0) # sum over rows for each of the 3 columns np.cumsum(a, axis=1) # sum over columns for each of the 2 rows x = np.arange(4).reshape((2, 2)) np.ptp(x, axis=0) np.ptp(x, axis=1) a = np.arange(4).reshape((2, 2)) np.amax(a) # Maximum of the flattened array np.amax(a, axis=0) # Maxima along the first axis np.amax(a, axis=1) # Maxima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amax(b) np.nanmax(b) a = np.arange(4).reshape((2, 2)) np.amin(a) # Minimum of the flattened array np.amin(a, axis=0) # Minima along the first axis np.amin(a, axis=1) # Minima along the second axis b = np.arange(5, dtype=np.float) # b[2] = np.NaN np.amin(b) np.nanmin(b) a = np.zeros((7, 4, 5)) a.shape[0] np.alen(a) x = np.array([536870910, 536870910, 536870910, 536870910]) np.prod(x) #random np.prod([]) np.prod([1., 2.]) np.prod([[1., 2.], [3., 4.]]) np.prod([[1., 2.], [3., 4.]], axis=1) x = np.array([1, 2, 3], dtype=np.uint8) # np.prod(x).dtype == np.uint x = np.array([1, 2, 3], dtype=np.int8) # np.prod(x).dtype == np.int a = np.array([1, 2, 3]) np.cumprod(a) # intermediate results 1, 1*2 a = np.array([[1, 2, 3], [4, 5, 6]]) np.cumprod(a, dtype=float) # specify type of output np.cumprod(a, axis=0) np.cumprod(a, axis=1) np.ndim([[1, 2, 3], [4, 5, 6]]) np.ndim(np.array([[1, 2, 3], [4, 5, 6]])) np.ndim(1) a = np.array([[1, 2, 3], [4, 5, 6]]) np.size(a) np.size(a, 1) np.size(a, 0) np.around([0.37, 1.64]) np.around([0.37, 1.64], decimals=1) np.around([.5, 1.5, 2.5, 3.5, 4.5]) # rounds to nearest even value np.around([1, 2, 3, 11], decimals=1) # ndarray of ints is returned np.around([1, 2, 3, 11], decimals=-1) a = np.array([[1, 2], [3, 4]]) np.mean(a) np.mean(a, axis=0) np.mean(a, axis=1) a = np.zeros((2, 512 * 512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.mean(a) np.mean(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.std(a) np.std(a, axis=0) np.std(a, axis=1) a = np.zeros((2, 512 * 512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.std(a) np.std(a, dtype=np.float64) a = np.array([[1, 2], [3, 4]]) np.var(a) np.var(a, axis=0) np.var(a, axis=1) a = np.zeros((2, 512 * 512), dtype=np.float32) a[0, :] = 1.0 a[1, :] = 0.1 np.var(a) np.var(a, dtype=np.float64)