def attention_parameters(network_outputs): g_x = 0.5 * (A + 1) * (network_outputs[0] + 1) # grid centre - x (cols) g_y = 0.5 * (B + 1) * (network_outputs[1] + 1) # grid centre - y (rows) sigma2 = C.exp(network_outputs[2]) # isotropic variance delta = (max(A, B) - 1) / (n - 1) * C.exp(network_outputs[3]) # stride gamma = C.exp(network_outputs[4]) # intensity return g_x, g_y, sigma2, delta, gamma
def true_density(z): z1, z2 = z[0], z[1] norm = C.sqrt(C.square(z1) + C.square(z2)) exp1 = C.exp(-0.5 * C.square((z1 - 2) / 0.8)) exp2 = C.exp(-0.5 * C.square((z1 + 2) / 0.8)) u = 0.5 * C.square(((norm - 4) / 0.4)) - C.log(exp1 + exp2) return C.exp(-u)
def forward_network(cls, input_dim: int):# , batch_norm: bool = False): chunk = {} log_det_J = 0 chunk['input_dim'] = input_dim _out = _ph = C.placeholder(input_dim, name='place_holder') _half_dim = input_dim//2 _x1, _x2 = _out[:_half_dim], _out[_half_dim:] chunk['log_s_func'] = _log_s_func = cls.basic_network(_half_dim, 'log_s_func') chunk['t_func'] = _t_func = cls.basic_network(_half_dim, 't_func') _log_s, _t = _log_s_func(_x1), _t_func(_x1) _x2 = _t + _x2 * C.exp(_log_s) log_det_J += C.reduce_sum(_log_s) _out = C.splice(_x1, _x2) # ==== _x1, _x2 = _out[:_half_dim], _out[_half_dim:] chunk['log_s_func2'] = _log_s_func2 = cls.basic_network(_half_dim, 'log_s_func2') chunk['t_func2'] = _t_func2 = cls.basic_network(_half_dim, 't_func2') _log_s2, _t2 = _log_s_func2(_x2), _t_func2(_x2) _x1 = _x1 * C.exp(_log_s2) + _t2 log_det_J += C.reduce_sum(_log_s2) _out = _Y = C.splice(_x1, _x2) # _out = C.as_block(_out, [(_ph,_ph)],'asdf1','zxcv1') return _out, log_det_J, chunk
def test_Exp(tmpdir): data = np.asarray([0., 1.], dtype=np.float32) model = C.exp(data) verify_no_input(model, tmpdir, 'Exp_0') x = C.input_variable(data.shape) model = C.exp(x) verify_one_input(model, data, tmpdir, 'Exp_1')
def model(seq_image, decoded): params = dense(decoded) g_x, g_y, sigma2, delta, gamma = attention_parameters(params) i = C.Constant(np.arange(n) + 1, ) # col of patch j = C.Constant(np.arange(n) + 1, ) # row of patch mu_x = g_x + (i - n / 2 - 0.5) * delta mu_y = g_y + (j - n / 2 - 0.5) * delta mu_x = C.expand_dims(mu_x, axis=-1) mu_y = C.expand_dims(mu_y, axis=-1) # mu_x: [#, *] [n, 1] # mu_y: [#, *] [n, 1] image = C.sequence.unpack(seq_image, padding_value=0, no_mask_output=True) # image: [#] [*image_width, filters, image_height] width_pos = Cx.sequence.position(seq_image) # width_pos: [#, *] [1] width_pos_unpacked = C.sequence.unpack(width_pos, padding_value=999_999, no_mask_output=True) # width_pos: [#] [*image_width, 1] a = C.sequence.broadcast_as(C.swapaxes(width_pos_unpacked), mu_x) # a: [#, *] [1, *image_width] # x pos index of image (width) b = C.Constant(np.arange(image_height).reshape((1, -1))) # b: [] [1, image_height] # y pos index of image (height) # calculate the which portion of the image that is attended by the gaussian filter f_xi = C.exp(-0.5 * C.square(a - mu_x) / sigma2) f_yj = C.exp(-0.5 * C.square(b - mu_y) / sigma2) # f_xi: [#, *] [n, *image_width] # f_yj: [#, *] [n, image_height] z_x = C.reduce_sum(f_xi, axis=1) z_y = C.reduce_sum(f_yj, axis=1) # z_x: [#, *] [n] # z_y: [#, *] [n] f_xi = f_xi / z_x f_yj = f_yj / z_y # f_xi: [#, *] [n, *image_width] # f_yj: [#, *] [n, image_height] # combine filters from x and y image_broadcasted = C.sequence.broadcast_as(image, f_yj) attended = gamma * C.times( f_xi, C.times_transpose(image_broadcasted, f_yj), output_rank=2) # attended: [#, *] [n, filters, n] attended = C.swapaxes(attended) # attended: [#, *] [filters, n (x) , n (y)] return attended
def test_Exp(tmpdir, dtype): with C.default_options(dtype=dtype): data = np.asarray([0., 1.], dtype=dtype) model = C.exp(data) verify_no_input(model, tmpdir, 'Exp_0') x = C.input_variable(data.shape) model = C.exp(x) verify_one_input(model, data, tmpdir, 'Exp_1')
def test_Exp(tmpdir, dtype): with C.default_options(dtype = dtype): data = np.asarray([0., 1.], dtype=dtype) model = C.exp(data) verify_no_input(model, tmpdir, 'Exp_0') x = C.input_variable(data.shape) model = C.exp(x) verify_one_input(model, data, tmpdir, 'Exp_1')
def gaussian_windows_attention_coefficients(abk, nb_mixtures): """ Split into 3 equal tensor of dim nb_mixtures """ a = C.exp(C.slice(abk, 0, 0, nb_mixtures)) b = C.exp(C.slice(abk, 0, nb_mixtures, 2 * nb_mixtures)) k = C.exp(C.slice(abk, 0, 2 * nb_mixtures, 0)) k = Recurrence(C.plus)(k) a = C.expand_dims(a, axis=-1) b = C.expand_dims(b, axis=-1) k = C.expand_dims(k, axis=-1) return a, b, k
def window_weight(a, b, k, u): """ Calculate Phi is the window weight of character seq at position u of time t. Function tested to be correct on 2018-25-02 using numpy equivalent math: phi = summation of mixtures { a * exp ( -b * (k - u) ^ 2 ) } Args: a: importance of window within the mixture. Not normalised and doesn't sum to one. b: width of attention window k: location of window u: integer position of each item in sequence. Value from 1 to seq_length. (rank 2 tensor) [-3, 1] Returns: :class:`~cntk.ops.functions.Function` """ # print(f"k shape: {k.shape}, u shape: {u.shape}") phi = a * C.exp(-1 * b * C.square(k - u)) # print("internal phi shape:", phi.shape) phi = C.swapaxes(C.reduce_sum(phi, axis=0)) # Reduce sum the mixture axis # phi: [#, n] [*-c, 1] return phi
def gaussian_mdn_coeff(x, nmix: int, ndim: int): """ Extracts the coefficients for gaussian mixture density network. Assumes independence between gaussian dimensions. Example: ndim, nmix = 1, 3 a = C.input_variable(ndim) prediction = Dense((ndim + 2) * nmix)(a) coeffs = C.combine(gaussian_mdn_coeff(prediction_tensor, nmix=nmix, ndim=ndim)).eval({a: x}) alpha, mu, sigma = coeffs.values() Arguments: x: input tensor nmix (int): number of mixture ndim (int): number of dimension of gaussian Returns: tuple """ if len(x.shape) != 1: raise ValueError("Must be a 1d tensor, but input has shape {0}".format( x.shape)) alpha = C.softmax(C.slice(x, 0, 0, nmix), name='alpha') sigma = C.exp( C.slice(x, 0, nmix, 2 * nmix), name='sigma' ) # common variance for all components in single gaussian kernel mu = C.reshape(C.slice(x, 0, 2 * nmix, (ndim + 2) * nmix), shape=(nmix, ndim), name='mu') return alpha, mu, sigma
def flow_reverse(chunk): input_dim = chunk['input_dim'] log_det_J = 0 _half_dim = input_dim//2 _ph = C.placeholder(input_dim, name='place_holder') _log_s_func = chunk['log_s_func'] _t_func = chunk['t_func'] _y1, _y2 = _ph[:_half_dim], _ph[_half_dim:] _log_s = _log_s_func(_y2) _t = _t_func(_y2) _s = C.exp(_log_s) _x1 = (_y1-_t)/_s _x2 = _y2 _X = C.splice(_x1, _x2) log_det_J += C.reduce_sum(C.log(C.abs(_s))) _w = chunk['W_rot_mat'] chunk['W_rot_mat_inv'] = _inv_w = C.Constant(np.linalg.inv(_w.value), name='inv_W') _out = _X@_inv_w log_det_J += input_dim*C.log(C.det(_inv_w)) # if 'scale' in chunk: # _out -= chunk['bias'] # _out /= chunk['scale'] # log_det_J += input_dim*C.reduce_sum(C.log(C.abs(chunk['scale']))) # _out -= chunk['b'] # _out @= _inv_w return _out, log_det_J
def test_exp_2(): cntk_op = C.exp([0.]) cntk_ret = cntk_op.eval() ng_op, _ = CNTKImporter().import_model(cntk_op) ng_ret = ng.transformers.make_transformer().computation(ng_op)() assert np.isclose(cntk_ret, ng_ret).all()
def test_exp_3(): cntk_op = C.exp([-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0.]) cntk_ret = cntk_op.eval() ng_op, _ = CNTKImporter().import_model(cntk_op) ng_ret = ng.transformers.make_transformer().computation(ng_op)() assert np.isclose(cntk_ret, ng_ret).all()
def __local_response_normalization(self, k, n, alpha, beta, name=''): x = cntk.placeholder(name='lrn_arg') x2 = cntk.square(x) x2s = cntk.reshape(x2, (1, cntk.InferredDimension), 0, 1) W = cntk.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W') y = cntk.convolution(W, x2s) b = cntk.reshape(y, cntk.InferredDimension, 0, 2) den = cntk.exp(beta * cntk.log(k + b)) apply_x = cntk.element_divide(x, den) return apply_x
def g(self, input_dim): x = C.input_variable(input_dim, name='data_input') for i in range(len(self.t)): x_ = x * self.mask[i] s = self.s[i](x_) * (1 - self.mask[i]) t = self.t[i](x_) * (1 - self.mask[i]) x = x_ + (1 - self.mask[i]) * (x * C.exp(s) + t) x = C.squeeze(x) return x
def inner(a): not_negative = C.greater_equal(a, 0) sign = C.element_select(not_negative, not_negative, -1) abs_x = C.abs(a) # A&S formula 7.1.26 t = 1.0 / (1.0 + p * a) y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * C.exp( -abs_x * abs_x) return C.element_times(sign, y)
def lrn(x, depth_radius, bias, alpha, beta, name=''): x2 = C.square(x) # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed. x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1) W = C.constant(alpha/(2*depth_radius+1), shape=(1,2*depth_radius+1,1,1), dtype=dtype, name='W') # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1 y = C.convolution (W, x2s) # reshape back to remove the fake singleton reduction dimension b = C.reshape(y, C.InferredDimension, 0, 2) den = C.exp(beta * C.log(bias + b)) return C.element_divide(x, den)
def LocalResponseNormalization(k, n, alpha, beta, name=''): x = C.placeholder(name='lrn_arg') x2 = C.square(x) x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1) W = C.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W') y = C.convolution(W, x2s) b = C.reshape(y, C.InferredDimension, 0, 2) den = C.exp(beta * C.log(k + b)) apply_x = C.element_divide(x, den) return apply_x
def true_density(z): z1, z2 = z[0], z[1] w1 = lambda x: C.sin(2 * np.pi * x/4) u = 0.5 * C.square((z2 - w1(z1))/0.4) dummy = C.ones_like(u) * 1e7 # u = C.element_select(C.less_equal(z1,4), u, dummy) cond = C.less_equal(z1,4) u = C.element_select(cond, u, dummy) # u = cond*u + (1-cond)*dummy return C.exp(-u)
def LocalResponseNormalization(k, n, alpha, beta, name=''): x = C.placeholder(name='lrn_arg') x2 = C.square(x) # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed. x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1) W = C.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W') # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1 y = C.convolution(W, x2s) # reshape back to remove the fake singleton reduction dimension b = C.reshape(y, C.InferredDimension, 0, 2) den = C.exp(beta * C.log(k + b)) apply_x = C.element_divide(x, den) return apply_x
def LocalResponseNormalization(k, n, alpha, beta, name=''): x = C.placeholder(name='lrn_arg') x2 = C.square(x) # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed. x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1) W = C.constant(alpha/(2*n+1), (1,2*n+1,1,1), name='W') # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1 y = C.convolution (W, x2s) # reshape back to remove the fake singleton reduction dimension b = C.reshape(y, C.InferredDimension, 0, 2) den = C.exp(beta * C.log(k + b)) apply_x = C.element_divide(x, den) return apply_x
def f(self, input_dim): x = C.input_variable(input_dim, needs_gradient=True, name='input') z, sum_log_det_jacob = x, C.Constant(0, name='log_det_zero') for i in reversed(range(len(self.t))): z_ = self.mask[i] * z s = self.s[i](z_) * (1 - self.mask[i]) t = self.t[i](z_) * (1 - self.mask[i]) z = z_ + (1 - self.mask[i]) * (z - t) * C.exp(-s) sum_log_det_jacob -= C.reduce_sum(s) z = C.squeeze(z) return z, sum_log_det_jacob
def reverse_network(cls, chunk): input_dim = chunk['input_dim'] log_det_J = 0 _half_dim = input_dim//2 _out = _ph = C.placeholder(input_dim, name='place_holder') _log_s_func, _t_func = chunk['log_s_func'], chunk['t_func'] _log_s_func2, _t_func2 = chunk['log_s_func2'], chunk['t_func2'] _y1, _y2 = _ph[:_half_dim], _ph[_half_dim:] _log_s2, _t2 = _log_s_func2(_y2), _t_func2(_y2) _y1 = (_y1 - _t2) / C.exp(_log_s2)
def exp(x, name=''): ''' Computes the element-wise exponential of `x`: :math:`exp(x) = {e^x}` Example: >>> C.eval(C.exp([0., 1.])) [array([[ 1. , 2.718282]])] Args: x: numpy array or any :class:`cntk.Function` that outputs a tensor name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import exp x = sanitize_input(x) return exp(x, name).output()
def Loss(self): # Evaluating old actions and values : logprobs, state_value, dist_entropy = self.policy.evaluate() # Finding the ratio (pi_theta / pi_theta__old): # (importance sampling) c_old_logprobs = C.input_variable(logprobs.shape, name='old_log_probs') ratios = C.exp(logprobs - C.stop_gradient(c_old_logprobs)) c_rewards = C.input_variable(1, name='rewards') advantages = c_rewards - C.stop_gradient(state_value) # Finding Surrogate Loss: surr1 = ratios * advantages surr2 = C.clip(ratios, 1 - self.eps_clip, 1 + self.eps_clip) * advantages neglog_loss = -C.element_min(surr1, surr2) entropy_loss = -0.01 * dist_entropy actor_loss = C.reduce_mean(neglog_loss + entropy_loss) critic_loss = 0.5 * C.reduce_mean(C.square(state_value - c_rewards)) loss = actor_loss + critic_loss chunk = { 'neglog_loss': neglog_loss, 'entropy_loss': entropy_loss, 'actor_loss': actor_loss, 'critic_loss': critic_loss } trainer = C.Trainer( loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule_per_sample(self.lr), C.momentum_schedule_per_sample(self.betas[0]), variance_momentum=C.momentum_schedule_per_sample( self.betas[1]))) # trainer = C.Trainer(loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule(10), C.momentum_schedule(0.9), variance_momentum=C.momentum_schedule(0.999))) # higher learning rate return loss, chunk, trainer
def gaussian_mdn_phi(target, mu, sigma, ndim: int): """ Calculates phi between the target tensor and the network prediction Does not assumes independence between components of target. Arguments: target: target tensor with shape (ndim, ) mu: means of gaussian mdn with shape (nmix, ndim) sigma: sigma of gaussian mdn nmix (int): number of mixtures ndim (int): number of dimensions in gaussian Returns: :class:`~cntk.ops.functions.Function` """ if not len(mu.shape) == 2: raise ValueError("mu {0} must have shape (nmix, ndim)".format(mu.shape)) t = C.expand_dims(target, axis=0) exp_term = C.exp(C.negate(C.square(C.reduce_l2(t - mu, axis=-1)) / (2 * C.square(sigma)))) factor = C.reciprocal((2 * pi) ** (ndim / 2) * C.pow(sigma, ndim)) return factor * exp_term
def main(): data_matrix = load_data('.\\visceral-fat-rating.data') checked_data_matrix = check_for_NaN(data_matrix) sorted_data_matrix = sort_data_by_column(checked_data_matrix, 13) #save_data(sorted_data_matrix, 'sorted_visceral-fat-rating.data') # features matrix unnorm_features_matrix = sorted_data_matrix[:, 0:13] min_max_scaler = preprocessing.MinMaxScaler() features_matrix = min_max_scaler.fit_transform(unnorm_features_matrix) # labels matrix labels_matrix = np.reshape(sorted_data_matrix[:, 13], (-1, 1)) print(' Training data:') combined_matrix = np.concatenate((features_matrix, labels_matrix), axis = 1) print(combined_matrix) features_dimension = 13 labels_dimension = 1 X = C.input_variable(features_dimension, np.float32) y = C.input_variable(labels_dimension, np.float32) z, W, b = linear_layer(X, features_dimension, labels_dimension) p = 1.0 / (1.0 + C.exp(-z)) model = p ### cee = C.cross_entropy_with_softmax(model, y) eval_error = C.classification_error(model, y) learning_rate = 0.1 learner = C.sgd(model.parameters, learning_rate) ### trainer = C.Trainer(model, (cee, eval_error), [learner]) max_iterations = 8000 ### np.random.seed(4) N = len(features_matrix) for i in range(0, max_iterations): row = np.random.choice(N, 1) trainer.train_minibatch({ X: features_matrix[row], y: labels_matrix[row]}) if i % 1000 == 0 and i > 0: mcee = trainer.previous_minibatch_loss_average print(str(i) + ' Cross entropy error on current item = %0.4f ' %mcee) # print out results - weights and bias np.set_printoptions(precision=4, suppress=True) print('Model weights:') print(W.value) print('Model bias:') print(b.value) # save results print('\nSaving files:') weights_file_name = str(learning_rate) + '-' + str(max_iterations) + '_' + 'weights' + '.txt' bias_file_name = str(learning_rate) + '-' + str(max_iterations) + '_' + 'bias' + '.txt' print(weights_file_name) print(bias_file_name) np.savetxt(weights_file_name, W.value) np.savetxt(bias_file_name, b.value) print('Saving complete') ########################## print('\n ### End training\n')
def softmax(x): e = C.exp(x) s = C.reduce_sum(e, axis=0) return e / s
def main(): # HEADERS print( '\n Begin logistic regression on breast-cancer-wisconsin data training' ) ver = C.__version__ print('(Using CNTK version ' + str(ver) + ')') # LOADING DATA data_file = '.\\breast-cancer-wisconsin.data' print('\nLoading data from ' + data_file + '\n') data_matrix = np.genfromtxt(data_file, dtype=np.float32, delimiter=',', usecols=range(1, 11)) # checking for NaNs and filtering data for i in range(699): for j in range(10): if np.isnan(data_matrix[i, j]): location = str(i) + ', ' + str(j) filtered_data_matrix = data_matrix[~np.isnan(data_matrix).any(axis=1)] sorted_by_label_data_matrix = filtered_data_matrix[ filtered_data_matrix[:, 9].argsort()] np.savetxt('sorted-breast-cancer-wisconsin.data', sorted_by_label_data_matrix, delimiter=',', newline='\n') # features matrix unnorm_features_matrix = sorted_by_label_data_matrix[:, 0:9] min_max_scaler = preprocessing.MinMaxScaler() features_matrix = min_max_scaler.fit_transform(unnorm_features_matrix) #print(features_matrix) # labels matrix - sorted and encoded to 0 or 1 unshaped_labels_matrix = sorted_by_label_data_matrix[:, 9] uncoded_labels_matrix = np.reshape(unshaped_labels_matrix, (-1, 1)) labels_logic_matrix = uncoded_labels_matrix > 2 labels_matrix = labels_logic_matrix.astype(np.float32) #print(labels_logic_matrix) #print(labels_matrix) #print(labels_matrix.shape) # making training data print('Training data:') combined_matrix = np.concatenate((features_matrix, labels_matrix), axis=1) #print(combined_matrix) # create a model features_dimension = 9 # x1, x2, x3, x4, x5, x6, x7, x8, x9 labels_dimension = 1 # always 1 for logistic regression, y X = C.input_variable(features_dimension, np.float32) # cntk.Variable y = C.input_variable(labels_dimension, np.float32) # correct class value W = C.parameter(shape=(features_dimension, 1)) # trainable cntk.Parameter b = C.parameter(shape=(labels_dimension)) z = C.times(X, W) + b # or z = C.plus(C.times(X, W), b) p = 1.0 / (1.0 + C.exp(-z)) # or p = C.sigmoid(z) model = p # create 'model' alias # create learner cross_entropy_error = C.binary_cross_entropy(model, y) learning_rate = 0.01 learner = C.sgd(model.parameters, learning_rate) # create trainer trainer = C.Trainer(model, (cross_entropy_error), [learner]) max_iterations = 5000 # train print('Start training') print('Iterations: ' + str(max_iterations)) print('Learning Rate (LR): ' + str(learning_rate)) print('Mini-batch = 1') np.random.seed(4) N = len(features_matrix) for i in range(0, max_iterations): row = np.random.choice(N, 1) trainer.train_minibatch({ X: features_matrix[row], y: labels_matrix[row] }) if i % 1000 == 0 and i > 0: mcee = trainer.previous_minibatch_loss_average print( str(i) + ' Cross entropy error on current item = %0.4f ' % mcee) print('Training complete') # print out results - weights and bias np.set_printoptions(precision=4, suppress=True) print('Model weights:') print(W.value) print('Model bias:') print(b.value) # save results print('\nSaving files:') weights_file_name = str(learning_rate) + '-' + str( max_iterations) + '_' + 'weights' + '.txt' bias_file_name = str(learning_rate) + '-' + str( max_iterations) + '_' + 'bias' + '.txt' print(weights_file_name) print(bias_file_name) np.savetxt(weights_file_name, W.value) np.savetxt(bias_file_name, b.value) print('Saving complete') print('\n End training\n')
def test_exp(): assert_cntk_ngraph_isclose(C.exp([-2, -1., 0., 1., 2.])) assert_cntk_ngraph_isclose(C.exp([0.])) assert_cntk_ngraph_isclose( C.exp([-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0.]))
def main(): print('\nBegin logistic regression training demo') ver = C.__version__ print('(Using CNTK version ' + str(ver) + ')') # training data format: # 4.0, 3.0, 1 # 9.0, 5.0, 1 # . . . data_file = '.\\age_edu_sex.txt' print('\nLoading data from ' + data_file + '\n') features_matrix = np.loadtxt(data_file, dtype=np.float32, delimiter=',', skiprows=0, usecols=[0, 1]) print(features_matrix) labels_matrix = np.loadtxt(data_file, dtype=np.float32, delimiter=',', skiprows=0, usecols=[2], ndmin=2) print(labels_matrix) print(labels_matrix.shape) print('Training data:') combined_matrix = np.concatenate((features_matrix, labels_matrix), axis=1) print(combined_matrix) # create model features_dimension = 2 # x1, x2 labels_dimension = 1 # always 1 for logistic regression X = C.input_variable(features_dimension, np.float32) # cntk.Variable y = C.input_variable(labels_dimension, np.float32) # correct class value W = C.parameter(shape=(features_dimension, 1)) # trainable cntk.Parameter b = C.parameter(shape=(labels_dimension)) z = C.times(X, W) + b # or z = C.plus(C.times(X, W), b) p = 1.0 / (1.0 + C.exp(-z)) # or p = C.sigmoid(z) model = p # create an alias # create Learner and Trainer cross_entropy_error = C.binary_cross_entropy( model, y) # Cross entropy a bit more principled for Learning Rate # squared_error = C.squared_error(model, y) learning_rate = 0.010 learner = C.sgd( model.parameters, learning_rate) # stochastic gradient descent, adadelta, adam, nesterov trainer = C.Trainer(model, (cross_entropy_error), [learner]) max_iterations = 4000 # train print('Start training') print('Iterations: ' + str(max_iterations)) print('Learning Rate (LR): ' + str(learning_rate)) print('Mini-batch = 1') np.random.seed(4) N = len(features_matrix) for i in range(0, max_iterations): row = np.random.choice(N, 1) # pick a random row from training items trainer.train_minibatch({ X: features_matrix[row], y: labels_matrix[row] }) if i % 1000 == 0 and i > 0: mcee = trainer.previous_minibatch_loss_average print( str(i) + ' Cross entropy error on current item = %0.4f ' % mcee) print('Training complete') # print out results np.set_printoptions(precision=4, suppress=True) print('Model weights:') print(W.value) print('Model bias:') print(b.value)
def flow_forward(input_dim: int, act_func_pair: tuple = (None, None), batch_norm: bool = False): chunk = {} log_det_J = 0 chunk['input_dim'] = input_dim _ph = C.placeholder(input_dim, name='place_holder') _out = _ph if batch_norm: # _bn = C.layers.BatchNormalization(name='batch_norm')(_ph) # chunk['scale'] = _bn.parameters[0] # chunk['bias'] = _bn.parameters[1] chunk['mu'] = C.Constant(np.zeros(shape=input_dim)) chunk['var'] = C.Constant(np.ones(shape=input_dim)) _eps = C.Constant(1e-7) _mu = C.reduce_mean(_ph, axis=C.Axis.default_batch_axis()) _var = C.reduce_mean(C.square(_ph-_mu), axis=C.Axis.default_batch_axis()) chunk['muB'] = _mu chunk['varB'] = _var # _bn = (_ph-chunk['mu'])/C.sqrt(chunk['var']+_eps) _bn = C.sqrt(chunk['var']+_eps)*_ph + chunk['mu'] _ph = _bn log_det_J += -0.5*C.reduce_sum(C.log((_var+_eps))) # log_det_J += C.reduce_sum(C.log()) chunk['W_rot_mat'] = _W = C.parameter((input_dim, input_dim)) _W.value = random_rotation_matrix = special_ortho_group.rvs(input_dim) # _W.value = np.roll(np.eye(input_dim),input_dim//2,axis=0) _out = _ph@_W log_det_J += C.log(C.abs(C.det(_W))) # or # log_det_J += C.slogdet(_W)[1] _half_dim = input_dim//2 _x1 = _out[:_half_dim] _x2 = _out[_half_dim:] _log_s_func, _t_func = act_func_pair if _log_s_func is None: # basic network _log_s_func = C.layers.Sequential([ C.layers.Dense(256, C.leaky_relu), C.layers.Dense(256, C.leaky_relu), C.layers.Dense(_half_dim, C.tanh), ])#(C.placeholder(input_dim, name='place_holder')) if _t_func is None: # basic network _t_func = C.layers.Sequential([ C.layers.Dense(256, C.leaky_relu), C.layers.Dense(256, C.leaky_relu), C.layers.Dense(_half_dim), ])#(C.placeholder(input_dim, name='place_holder')) chunk['log_s_func'] = _log_s_func chunk['t_func'] = _t_func _log_s, _t = _log_s_func(_x2), _t_func(_x2) _s = C.exp(_log_s) _y1 = _s*_x1 + _t _y2 = _x2 _Y = C.splice(_y1, _y2) chunk['output'] = _Y log_det_J += C.reduce_sum(_log_s) return _Y, log_det_J, chunk