def test_binary_focal_loss_image(): output = C.input_variable((5, 5)) target = C.input_variable((5, 5)) o = np.random.random((1, 5, 5)).astype(np.float32) t = (np.random.random((1, 5, 5)) < 0).astype(np.float32) bce = C.binary_cross_entropy(output, target).eval({output: o, target: t}) bfl = Cx.binary_focal_loss(output, target, alpha=1, gamma=0).eval({ output: o, target: t }) np.testing.assert_almost_equal(bce, bfl, decimal=3) bce = C.binary_cross_entropy(output, target).eval({output: o, target: t}) bfl = Cx.binary_focal_loss(output, target, alpha=1, gamma=2).eval({ output: o, target: t }) np.testing.assert_array_less(bfl, bce) o = np.random.random((1, 5, 5)).astype(np.float32) t = np.zeros((1, 5, 5)).astype(np.float32) bce = C.binary_cross_entropy(output, target).eval({output: o, target: t}) bfl = Cx.binary_focal_loss(output, target, alpha=1, gamma=0).eval({ output: o, target: t }) np.testing.assert_almost_equal(bce, bfl, decimal=2)
def test_binary_focal_loss(): output = C.input_variable(1) target = C.input_variable(1) o = np.array([[0.5]], dtype=np.float32) t = np.array([[1.]], dtype=np.float32) bce = C.binary_cross_entropy(output, target).eval({output: o, target: t}) bfl = Cx.binary_focal_loss(output, target, alpha=1, gamma=0).eval({output: o, target: t}) np.testing.assert_almost_equal(bce, bfl, decimal=5) bce = C.binary_cross_entropy(output, target).eval({output: o, target: t}) bfl = Cx.binary_focal_loss(output, target, alpha=1, gamma=2).eval({output: o, target: t}) np.testing.assert_array_less(bfl, bce) o = np.array([[0.00001]], dtype=np.float32) t = np.array([[1.]], dtype=np.float32) bce = C.binary_cross_entropy(output, target).eval({output: o, target: t}) bfl = Cx.binary_focal_loss(output, target, alpha=1, gamma=2).eval({output: o, target: t}) bfl0 = Cx.binary_focal_loss(output, target, alpha=1, gamma=0).eval({output: o, target: t}) np.testing.assert_almost_equal(bfl, bce, decimal=0) np.testing.assert_almost_equal(bfl0, bfl, decimal=0)
def implementing_1d_convnet_cntk(): max_features = 10000 # number of words to consider as features max_len = 500 # cut texts after this number of words (among top max_features most common words) x_train, y_train, x_test, y_test = load_data(max_features, max_len) model = build_model_cntk(max_features, max_len) x = cntk.input_variable(shape=(max_len, ), dtype=np.float32) y = cntk.input_variable(shape=(1, ), dtype=np.float32) model.replace_placeholders({model.placeholders[0]: x}) loss_function = cntk.binary_cross_entropy(model.output, y) round_predictions = cntk.round(model.output) equal_elements = cntk.equal(round_predictions, y) accuracy_function = cntk.reduce_mean(equal_elements, axis=0) max_epochs = 10 batch_size = 32 learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.0001), cntk.learning_parameter_schedule_per_sample(0.99)) progress_printer = cntk.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = cntk.Trainer(model, (loss_function, accuracy_function), [learner], progress_printer) evaluator = cntk.Evaluator(accuracy_function) cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer, evaluator)
def learning_word_embeddings_with_the_embedding_layer_cntk(): x_train, y_train, x_test, y_test = load_from_files() max_features = 10000 maxlen = 20 embedding_dim = 8 x = cntk.input_variable(shape=(maxlen, ), dtype=np.float32) y = cntk.input_variable(shape=(1, ), dtype=np.float32) model = cntk.one_hot(x, num_classes=max_features, sparse_output=True) model = cntk.layers.Embedding(embedding_dim)(model) model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model) loss_function = cntk.binary_cross_entropy(model.output, y) round_predictions = cntk.round(model.output) equal_elements = cntk.equal(round_predictions, y) accuracy_function = cntk.reduce_mean(equal_elements, axis=0) max_epochs = 30 batch_size = 32 learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.0001), cntk.learning_parameter_schedule_per_sample(0.99)) progress_printer = cntk.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = cntk.Trainer(model, (loss_function, accuracy_function), [learner], progress_printer) evaluator = cntk.Evaluator(accuracy_function) cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer, evaluator)
def cost_func(prediction, target): ''' Images can contain multiple FACS AU, each multiple labels weighted exactly the same. We use binary cross entropy loss for the multi-label loss ''' train_loss = None train_loss = C.binary_cross_entropy(prediction, target) return train_loss
def create_trainer(): # Will take the model and the batch generator to create a Trainer # Will return the input variables, trainer variable, model and the embedding layer ################################################## ################### Inputs ####################### ################################################## word_one_hot = C.input_variable((G.embedding_vocab_size), np.float32, is_sparse=True, name='word_input') context_one_hots = [ C.input_variable((G.embedding_vocab_size), np.float32, is_sparse=True, name='context_input{}'.format(i)) for i in range(context_size) ] negative_one_hots = [ C.input_variable((G.embedding_vocab_size), np.float32, is_sparse=True, name='negative_input{}'.format(i)) for i in range(G.negative) ] # The target labels should have first as 1 and rest as 0 target = C.input_variable((G.negative + 1), np.float32) word_negative_context_product, embedding_layer = create_word2vec_cbow_model( word_one_hot, context_one_hots, negative_one_hots) loss = C.binary_cross_entropy(word_negative_context_product, target) eval_loss = C.binary_cross_entropy(word_negative_context_product, target) lr_schedule = learning_rate_schedule(G.learning_rate, UnitType.minibatch) learner = adam_sgd(word_negative_context_product.parameters, lr=lr_schedule, momentum=momentum_as_time_constant_schedule(700)) trainer = Trainer(word_negative_context_product, (loss, eval_loss), learner) return word_one_hot, context_one_hots, negative_one_hots, target, trainer, word_negative_context_product, embedding_layer
def train(self, X1_train, X2_train, Y_train, X1_val, X2_val, Y_val, batch_size=128, epochs=10): assert X1_train.shape == X2_train.shape assert len(X1_train) == len(Y_train) assert X1_val.shape == X2_val.shape assert len(X1_val) == len(Y_val) if cntk.try_set_default_device(cntk.gpu(0)): print("GPU Training enabled") else: print("CPU Training :(") input_shape = (X1_train.shape[1], X1_train.shape[2], X1_train.shape[3]) self.siamese_net = self.build_network(input_shape) lr_per_minibatch = cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch) pp = cntk.logging.ProgressPrinter() out = input_variable((1)) loss = cntk.binary_cross_entropy(self.out, out) learner = cntk.adam(self.out.parameters, lr=lr_per_minibatch, momentum=0.9) trainer = cntk.Trainer(self.out, (loss, loss), [learner], [pp]) cntk.logging.log_number_of_parameters(self.out) for epoch in range(epochs): # perm = np.random.permutation(len(Y_train)) for i in range(0, len(Y_train), batch_size): max_n = min(i + batch_size, len(Y_train)) # x1 = X1_train[perm[i:max_n]] # x2 = X2_train[perm[i:max_n]] # y = Y_train[perm[i:max_n]] x1 = X1_train[i:max_n] x2 = X2_train[i:max_n] y = Y_train[i:max_n] trainer.train_minibatch({ self.left_input: x1, self.right_input: x2, out: y }) pp.update_with_trainer(trainer, with_metric=True) print('.') pp.epoch_summary(with_metric=False)
def test(streamf): input_map={ input_var : streamf.streams.features, label_var : streamf.streams.labels } minibatch_size = 512 loss = cntk.binary_cross_entropy(net,label_var) progress_printer = cntk.logging.ProgressPrinter(tag='Evaluation', num_epochs=0) evaluator = cntk.eval.Evaluator(loss, progress_printer) while True: dat1=streamf.next_minibatch(minibatch_size,input_map = input_map) if not dat1: break evaluator.test_minibatch(dat1) evaluator.summarize_test_progress()
def use_glove_word_embeddings_cntk(preload_weights=False): tokenizer, x_train, y_train, x_val, y_val = from_raw_text_to_word_embeddings( ) x = cntk.input_variable(shape=(Constants.maxlen, ), dtype=np.float32) y = cntk.input_variable(shape=(1, ), dtype=np.float32) model = cntk.one_hot(x, num_classes=Constants.max_words, sparse_output=True) if preload_weights is True: embedding_matrix = compute_embedding_matrix(tokenizer) assert (Constants.embedding_dim == embedding_matrix.shape[0]) or (Constants.embedding_dim == embedding_matrix.shape[1]) model = cntk.layers.Embedding(weights=embedding_matrix)(model) else: model = cntk.layers.Embedding(Constants.embedding_dim)(model) model = cntk.layers.Dense(32, activation=cntk.relu)(model) model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model) loss_function = cntk.binary_cross_entropy(model.output, y) round_predictions = cntk.round(model.output) equal_elements = cntk.equal(round_predictions, y) accuracy_function = cntk.reduce_mean(equal_elements, axis=0) max_epochs = 10 batch_size = 32 learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.0001), cntk.learning_parameter_schedule_per_sample(0.99)) progress_printer = cntk.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = cntk.Trainer(model, (loss_function, accuracy_function), [learner], progress_printer) evaluator = cntk.Evaluator(accuracy_function) cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer, evaluator)
def model(self): token_axis = C.Axis.new_unique_dynamic_axis('token_axis') b = C.Axis.default_batch_axis() token = C.input_variable(self.word_dim, dynamic_axes=[b, token_axis], name='token') # 8 classes emotion = C.input_variable(self.num_emotions, dynamic_axes=[b], name='emotion') processed_token = self.input_layer(token) att = self.attention_layer(processed_token, processed_token, 'attention') test = C.sequence.last(att) test = C.layers.Stabilizer()(test) test_w = C.parameter(shape=(2*self.hidden_dim, self.num_emotions), init=C.glorot_uniform()) test_v = C.parameter(shape=(self.num_emotions), init=C.glorot_uniform()) out = C.softmax(C.times(test, test_w) + test_v) loss = C.binary_cross_entropy(out, emotion) f1 = C.losses.fmeasure(C.hardmax(out), emotion) return out, loss, f1
def TrainAndValidate(trainfile): #*****Hyper-Parameters****** q_max_words= 12 p_max_words = 50 emb_dim = 50 num_classes = 2 minibatch_size = 32 epoch_size = 500000 #No.of samples in training set total_epochs = 5 #Total number of epochs to run query_total_dim = q_max_words*emb_dim label_total_dim = num_classes passage_total_dim = p_max_words*emb_dim #****** Create placeholders for reading Training Data *********** query_input_var = C.ops.input_variable((1,q_max_words,emb_dim),np.float32,is_sparse=False) passage_input_var = C.ops.input_variable((1,p_max_words,emb_dim),np.float32,is_sparse=False) output_var = C.input_variable(num_classes,np.float32,is_sparse = False) train_reader = create_reader(trainfile, True, query_total_dim, passage_total_dim, label_total_dim) input_map = { query_input_var : train_reader.streams.queryfeatures, passage_input_var:train_reader.streams.passagefeatures, output_var : train_reader.streams.labels} # ********* Model configuration ******* model_output = cnn_network(query_input_var, passage_input_var, num_classes) loss = C.binary_cross_entropy(model_output, output_var) pe = C.classification_error(model_output, output_var) lr_per_minibatch = C.learning_rate_schedule(0.03, C.UnitType.minibatch) learner = C.adagrad(model_output.parameters, lr=lr_per_minibatch) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=total_epochs) #************Create Trainer with model_output object, learner and loss parameters************* trainer = C.Trainer(model_output, (loss, pe), learner, progress_printer) C.logging.log_number_of_parameters(model_output) ; print() # **** Train the model in batchwise mode ***** for epoch in range(total_epochs): # loop over epochs print("Epoch : ",epoch) sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = train_reader.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # training step sample_count += data[output_var].num_samples # count samples processed so far trainer.summarize_training_progress() model_output.save("data/models/CNN_{}.dnn".format(epoch)) # Save the model for every epoch #*** Find metrics on validation set after every epoch ******# (Note : you can skip doing this for every epoch instead to optimize the time, do it after every k epochs) predicted_labels=[] for i in range(len(validation_query_vectors)): queryVec = np.array(validation_query_vectors[i],dtype="float32").reshape(1,q_max_words,emb_dim) passageVec = np.array(validation_passage_vectors[i],dtype="float32").reshape(1,p_max_words,emb_dim) scores = model_output(queryVec,passageVec)[0] # do forward-prop on model to get score predictLabel = 1 if scores[1]>=scores[0] else 0 predicted_labels.append(predictLabel) metrics = precision_recall_fscore_support(np.array(validation_labels), np.array(predicted_labels), average='binary') #print("precision : "+str(metrics[0])+" recall : "+str(metrics[1])+" f1 : "+str(metrics[2])+"\n") return model_output
def run_experiment_cntk(): if os.path.isfile('x_train_imdb.bin'): print('Loading from .bin files') x_train, y_train, x_test, y_test = load_from_files(x_shape=(25000, 500), y_shape=(25000, )) else: print('Loading data...') (x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data( num_words=Constants.max_words) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = keras.preprocessing.sequence.pad_sequences( x_train, maxlen=Constants.maxlen) x_test = keras.preprocessing.sequence.pad_sequences( x_test, maxlen=Constants.maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('Saving to .bin files') save_to_files(x_train, y_train, x_test, y_test) x = cntk.sequence.input_variable(shape=(), dtype=np.float32) y = cntk.input_variable(shape=(), dtype=np.float32) x_placeholder = cntk.placeholder(shape=(), dynamic_axes=[ cntk.Axis.default_batch_axis(), cntk.Axis.default_dynamic_axis() ]) model = cntk.one_hot(x_placeholder, num_classes=Constants.max_words, sparse_output=True) model = cntk.layers.Embedding(Constants.embedding_dim)(model) model = cntk.layers.Recurrence(cntk.layers.LSTM(32))(model) model = cntk.sequence.last(model) model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model) model.save('ch6-2.cntk.model') model = None model = cntk.load_model('ch6-2.cntk.model') model.replace_placeholders({model.placeholders[0]: x}) loss_function = cntk.binary_cross_entropy(model.output, y) round_predictions = cntk.round(model.output) equal_elements = cntk.equal(round_predictions, y) accuracy_function = cntk.reduce_mean(equal_elements, axis=cntk.Axis.all_static_axes()) max_epochs = 10 batch_size = 128 learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.01), cntk.learning_parameter_schedule_per_sample(0.9)) progress_printer = cntk.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = cntk.Trainer(model, (loss_function, accuracy_function), [learner], progress_printer) evaluator = cntk.Evaluator(accuracy_function) cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer, evaluator)
def main(): print('\nBegin logistic regression training demo') ver = C.__version__ print('(Using CNTK version ' + str(ver) + ')') # training data format: # 4.0, 3.0, 1 # 9.0, 5.0, 1 # . . . data_file = '.\\age_edu_sex.txt' print('\nLoading data from ' + data_file + '\n') features_matrix = np.loadtxt(data_file, dtype=np.float32, delimiter=',', skiprows=0, usecols=[0, 1]) print(features_matrix) labels_matrix = np.loadtxt(data_file, dtype=np.float32, delimiter=',', skiprows=0, usecols=[2], ndmin=2) print(labels_matrix) print(labels_matrix.shape) print('Training data:') combined_matrix = np.concatenate((features_matrix, labels_matrix), axis=1) print(combined_matrix) # create model features_dimension = 2 # x1, x2 labels_dimension = 1 # always 1 for logistic regression X = C.input_variable(features_dimension, np.float32) # cntk.Variable y = C.input_variable(labels_dimension, np.float32) # correct class value W = C.parameter(shape=(features_dimension, 1)) # trainable cntk.Parameter b = C.parameter(shape=(labels_dimension)) z = C.times(X, W) + b # or z = C.plus(C.times(X, W), b) p = 1.0 / (1.0 + C.exp(-z)) # or p = C.sigmoid(z) model = p # create an alias # create Learner and Trainer cross_entropy_error = C.binary_cross_entropy( model, y) # Cross entropy a bit more principled for Learning Rate # squared_error = C.squared_error(model, y) learning_rate = 0.010 learner = C.sgd( model.parameters, learning_rate) # stochastic gradient descent, adadelta, adam, nesterov trainer = C.Trainer(model, (cross_entropy_error), [learner]) max_iterations = 4000 # train print('Start training') print('Iterations: ' + str(max_iterations)) print('Learning Rate (LR): ' + str(learning_rate)) print('Mini-batch = 1') np.random.seed(4) N = len(features_matrix) for i in range(0, max_iterations): row = np.random.choice(N, 1) # pick a random row from training items trainer.train_minibatch({ X: features_matrix[row], y: labels_matrix[row] }) if i % 1000 == 0 and i > 0: mcee = trainer.previous_minibatch_loss_average print( str(i) + ' Cross entropy error on current item = %0.4f ' % mcee) print('Training complete') # print out results np.set_printoptions(precision=4, suppress=True) print('Model weights:') print(W.value) print('Model bias:') print(b.value)
def TrainAndValidate(trainfile): #*****Hyper-Parameters****** q_max_words = 12 p_max_words = 50 emb_dim = 50 num_classes = 2 minibatch_size = 13100 epoch_size = 5241880 #No.of samples in training set total_epochs = 145 #Total number of epochs to run query_total_dim = q_max_words * emb_dim label_total_dim = num_classes passage_total_dim = p_max_words * emb_dim #****** Create placeholders for reading Training Data *********** query_input_var = C.ops.input_variable((1, q_max_words, emb_dim), np.float32, is_sparse=False) passage_input_var = C.ops.input_variable((1, p_max_words, emb_dim), np.float32, is_sparse=False) output_var = C.input_variable(num_classes, np.float32, is_sparse=False) train_reader = create_reader(trainfile, True, query_total_dim, passage_total_dim, label_total_dim) input_map = { query_input_var: train_reader.streams.queryfeatures, passage_input_var: train_reader.streams.passagefeatures, output_var: train_reader.streams.labels } # ********* Model configuration ******* model_output = cnn_network(query_input_var, passage_input_var, num_classes) model_output.restore('google_54.dnn') loss = C.binary_cross_entropy(model_output, output_var) pe = C.classification_error(model_output, output_var) lr_per_minibatch = C.learning_rate_schedule(0.03, C.UnitType.minibatch) learner = C.adam(model_output.parameters, lr=lr_per_minibatch, momentum=C.learners.momentum_schedule( 0.9, minibatch_size)) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=total_epochs) #************Create Trainer with model_output object, learner and loss parameters************* trainer = C.Trainer(model_output, (loss, pe), learner, progress_printer) C.logging.log_number_of_parameters(model_output) print() # **** Train the model in batchwise mode ***** for epoch in range(total_epochs): # loop over epochs print("Epoch : ", epoch) sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = train_reader.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # training step sample_count += data[ output_var].num_samples # count samples processed so far trainer.summarize_training_progress() model_output.save( "google_{}.dnn".format(epoch + 55)) # Save the model for every epoch return model_output
def main(): # HEADERS print( '\n Begin logistic regression on breast-cancer-wisconsin data training' ) ver = C.__version__ print('(Using CNTK version ' + str(ver) + ')') # LOADING DATA data_file = '.\\breast-cancer-wisconsin.data' print('\nLoading data from ' + data_file + '\n') data_matrix = np.genfromtxt(data_file, dtype=np.float32, delimiter=',', usecols=range(1, 11)) # checking for NaNs and filtering data for i in range(699): for j in range(10): if np.isnan(data_matrix[i, j]): location = str(i) + ', ' + str(j) filtered_data_matrix = data_matrix[~np.isnan(data_matrix).any(axis=1)] sorted_by_label_data_matrix = filtered_data_matrix[ filtered_data_matrix[:, 9].argsort()] np.savetxt('sorted-breast-cancer-wisconsin.data', sorted_by_label_data_matrix, delimiter=',', newline='\n') # features matrix unnorm_features_matrix = sorted_by_label_data_matrix[:, 0:9] min_max_scaler = preprocessing.MinMaxScaler() features_matrix = min_max_scaler.fit_transform(unnorm_features_matrix) #print(features_matrix) # labels matrix - sorted and encoded to 0 or 1 unshaped_labels_matrix = sorted_by_label_data_matrix[:, 9] uncoded_labels_matrix = np.reshape(unshaped_labels_matrix, (-1, 1)) labels_logic_matrix = uncoded_labels_matrix > 2 labels_matrix = labels_logic_matrix.astype(np.float32) #print(labels_logic_matrix) #print(labels_matrix) #print(labels_matrix.shape) # making training data print('Training data:') combined_matrix = np.concatenate((features_matrix, labels_matrix), axis=1) #print(combined_matrix) # create a model features_dimension = 9 # x1, x2, x3, x4, x5, x6, x7, x8, x9 labels_dimension = 1 # always 1 for logistic regression, y X = C.input_variable(features_dimension, np.float32) # cntk.Variable y = C.input_variable(labels_dimension, np.float32) # correct class value W = C.parameter(shape=(features_dimension, 1)) # trainable cntk.Parameter b = C.parameter(shape=(labels_dimension)) z = C.times(X, W) + b # or z = C.plus(C.times(X, W), b) p = 1.0 / (1.0 + C.exp(-z)) # or p = C.sigmoid(z) model = p # create 'model' alias # create learner cross_entropy_error = C.binary_cross_entropy(model, y) learning_rate = 0.01 learner = C.sgd(model.parameters, learning_rate) # create trainer trainer = C.Trainer(model, (cross_entropy_error), [learner]) max_iterations = 5000 # train print('Start training') print('Iterations: ' + str(max_iterations)) print('Learning Rate (LR): ' + str(learning_rate)) print('Mini-batch = 1') np.random.seed(4) N = len(features_matrix) for i in range(0, max_iterations): row = np.random.choice(N, 1) trainer.train_minibatch({ X: features_matrix[row], y: labels_matrix[row] }) if i % 1000 == 0 and i > 0: mcee = trainer.previous_minibatch_loss_average print( str(i) + ' Cross entropy error on current item = %0.4f ' % mcee) print('Training complete') # print out results - weights and bias np.set_printoptions(precision=4, suppress=True) print('Model weights:') print(W.value) print('Model bias:') print(b.value) # save results print('\nSaving files:') weights_file_name = str(learning_rate) + '-' + str( max_iterations) + '_' + 'weights' + '.txt' bias_file_name = str(learning_rate) + '-' + str( max_iterations) + '_' + 'bias' + '.txt' print(weights_file_name) print(bias_file_name) np.savetxt(weights_file_name, W.value) np.savetxt(bias_file_name, b.value) print('Saving complete') print('\n End training\n')
def TrainAndValidate(trainfile): #*****Hyper-Parameters****** global tf, l, a, r q_max_words = 15 p_max_words = 120 emb_dim = 50 num_classes = 2 minibatch_size = 13100 epoch_size = 5241880 #No.of samples in training set total_epochs = 20 #Total number of epochs to run query_total_dim = q_max_words * emb_dim label_total_dim = num_classes passage_total_dim = p_max_words * emb_dim #****** Create placeholders for reading Training Data *********** #axis_qry = C.Axis.new_unique_dynamic_axis('axis_qry') query_input_var = C.sequence.input_variable((1, q_max_words, emb_dim), np.float32, is_sparse=False) #axis_ans = C.Axis.new_unique_dynamic_axis('axis_ans') passage_input_var = C.sequence.input_variable((1, p_max_words, emb_dim), np.float32, is_sparse=False) output_var = C.input_variable(num_classes, np.float32, is_sparse=False) train_reader = create_reader(trainfile, True, query_total_dim, passage_total_dim, label_total_dim) input_map = { query_input_var: train_reader.streams.queryfeatures, passage_input_var: train_reader.streams.passagefeatures, output_var: train_reader.streams.labels } # ********* Model configuration ******* model_output = cnn_network(query_input_var, passage_input_var, num_classes) #model_output = C.combine(network['query_vector'], network['answer_vector']) #query_reconciled = C.reconcile_dynamic_axes(network['query_vector'], network['answer_vector']) #x =np.array( cosine(network['query_vector'],network['answer_vector'])) #l[tf-1] = np.sum(x) '''if(output_var[1]=='1'): a=1 else: a=0''' loss = C.binary_cross_entropy(model_output, output_var) pe = C.classification_error(model_output, output_var) lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [ 0.00015625 ] * 20 + [0.000046785] * 10 + [0.000015625] lr_schedule = C.learning_parameter_schedule_per_sample( lr_per_sample, epoch_size=epoch_size) mms = [0] * 20 + [0.9200444146293233] * 20 + [0.9591894571091382] mm_schedule = C.learners.momentum_schedule(mms, epoch_size=epoch_size, minibatch_size=minibatch_size) l2_reg_weight = 0.0002 dssm_learner = C.learners.momentum_sgd(model_output.parameters, lr_schedule, mm_schedule) learner = dssm_learner progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=total_epochs) #************Create Trainer with model_output object, learner and loss parameters************* trainer = C.Trainer(model_output, (loss, pe), learner, progress_printer) C.logging.log_number_of_parameters(model_output) # **** Train the model in batchwise mode ***** for epoch in range(total_epochs): # loop over epochs print("Epoch : ", epoch) sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = train_reader.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # training step sample_count += data[ output_var].num_samples # count samples processed so far trainer.summarize_training_progress() model_output.save("RAJHIBHAGWAN_{}.dnn".format(epoch)) ''' #*** Find metrics on validation set after every epoch ******# (Note : you can skip doing this for every epoch instead to optimize the time, do it after every k epochs) predicted_labels=[] for i in range(len(validation_query_vectors)): queryVec = np.array(validation_query_vectors[i],dtype="float32").reshape(1,q_max_words,emb_dim) passageVec = np.array(validation_passage_vectors[i],dtype="float32").reshape(1,p_max_words,emb_dim) scores = model_output(queryVec,passageVec)[0] # do forward-prop on model to get score predictLabel = 1 if scores[1]>=scores[0] else 0 predicted_labels.append(predictLabel) metrics = precision_recall_fscore_support(np.array(validation_labels), np.array(predicted_labels), average='binary')''' #print("precision : "+str(metrics[0])+" recall : "+str(metrics[1])+" f1 : "+str(metrics[2])+"\n") return model_output
def main(base_folder, model_folder, ft_model, model_name='VGG13', max_epochs=300): # create needed folders. output_model_path = os.path.join(model_folder, R'train_results') output_model_folder = os.path.join(output_model_path, model_name) if not os.path.exists(output_model_folder): os.makedirs(output_model_folder) # creating logging file logging.basicConfig(filename=os.path.join(output_model_folder, "train.log"), filemode='w', level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler()) logging.info("Starting training using {} model and max epochs {}.".format( model_name, max_epochs)) # create the model num_classes = len(FACS_table) model = build_model(num_classes, model_name, ft_model) # set the input variables. input_var = C.input_variable((1, model.input_height, model.input_width), np.float32) label_var = C.input_variable((num_classes), np.float32) # read FACS dataset. logging.info("Loading data...") train_params = FACSParameters(num_classes, model.input_height, model.input_width, False) test_params = FACSParameters(num_classes, model.input_height, model.input_width, True) train_data_reader = FACSReader.create(base_folder, train_folders, "train_label.csv", train_params) test_data_reader = FACSReader.create(base_folder, test_folders, "test_label.csv", test_params) # print summary of the data. display_summary(train_data_reader, test_data_reader) # get the probalistic output of the model. z = model.model(input_var) pred = z epoch_size = train_data_reader.size() minibatch_size = 32 # Training config lr_per_minibatch = [model.learning_rate ] * 20 + [model.learning_rate / 2.0] * 20 + [ model.learning_rate / 10.0 ] mm_time_constant = -minibatch_size / np.log(0.9) lr_schedule = C.learning_rate_schedule(lr_per_minibatch, unit=C.UnitType.minibatch, epoch_size=epoch_size) mm_schedule = C.momentum_as_time_constant_schedule(mm_time_constant) epoch = 0 # loss and error cost train_loss = cost_func(pred, label_var) pe = C.binary_cross_entropy(z, label_var) # construct the trainer learner = C.adam(z.parameters, lr_schedule, mm_schedule) trainer = C.Trainer(z, (train_loss, pe), learner) # Get minibatches of images to train with and perform model training # Make sure to set inital minimum test loss sufficiently high min_test_sample_loss = 1e15 logging.info("Start training...") best_epoch = 0 while epoch < max_epochs: train_data_reader.reset() test_data_reader.reset() # Training start_time = time.time() training_loss = 0 training_sample_loss = 0 test_sample_loss = 0 while train_data_reader.has_more(): images, labels, current_batch_size = train_data_reader.next_minibatch( minibatch_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({input_var: images, label_var: labels}) # keep track of statistics. training_loss += trainer.previous_minibatch_loss_average * current_batch_size training_sample_loss += trainer.previous_minibatch_evaluation_average * current_batch_size training_sample_loss /= train_data_reader.size() while test_data_reader.has_more(): images, labels, current_batch_size = test_data_reader.next_minibatch( minibatch_size) test_sample_loss += trainer.test_minibatch({ input_var: images, label_var: labels }) * current_batch_size test_sample_loss /= test_data_reader.size() if test_sample_loss < min_test_sample_loss: min_test_sample_loss = test_sample_loss trainer.save_checkpoint( os.path.join(output_model_folder, "model_{}".format(best_epoch))) logging.info("Epoch {}: took {:.3f}s".format(epoch, time.time() - start_time)) logging.info(" batch training loss:\t{:e}".format(training_loss)) logging.info(" average training sample loss:\t\t{:.4f}".format( training_sample_loss)) logging.info( " average test sample loss:\t\t{:.4f}".format(test_sample_loss)) # create a csv writer to keep track of training progress with open(os.path.join(output_model_folder) + '/progress.csv', 'a+', newline='') as csvFile: writer = csv.writer(csvFile) if not epoch: writer.writerow([ 'epoch', 'batch training_loss', 'avg training sample loss', 'avg test sample loss' ]) writer.writerow( [epoch, training_loss, training_sample_loss, test_sample_loss]) csvFile.close() epoch += 1 logging.info("") logging.info("Final test loss:\t\t{:.2f}".format(min_test_sample_loss))