def build_graph(noise_shape, image_shape, G_progress_printer, D_progress_printer): input_dynamic_axes = [C.Axis.default_batch_axis()] Z = C.input_variable(noise_shape, dynamic_axes=input_dynamic_axes) X_real = C.input_variable(image_shape, dynamic_axes=input_dynamic_axes) X_real_scaled = 2*(X_real / 255.0) - 1.0 # Create the model function for the generator and discriminator models X_fake = generator(Z) D_real = discriminator(X_real_scaled) D_fake = D_real.clone( method = 'share', substitutions = {X_real_scaled.output: X_fake.output} ) # Create loss functions and configure optimazation algorithms G_loss = 1.0 - C.log(D_fake) D_loss = -(C.log(D_real) + C.log(1.0 - D_fake)) G_learner = C.fsadagrad( parameters = X_fake.parameters, lr = C.learning_parameter_schedule_per_sample(lr), momentum = C.momentum_schedule_per_sample(0.9985724484938566) ) D_learner = C.fsadagrad( parameters = D_real.parameters, lr = C.learning_parameter_schedule_per_sample(lr), momentum = C.momentum_schedule_per_sample(0.9985724484938566) ) DistG_learner = C.train.distributed.data_parallel_distributed_learner(G_learner) # The following API marks a learner as the matric aggregator, which is used by # the trainer to determine the training progress. # It is required, only when more than one learner is provided to a *single* trainer. # In this example, we use two trainers each with a single learner, so it # is not required and automatically set by CNTK for each single learner. However, if you # plan to use both learners with a single trainer, then it needs to be call before # creating the trainer. #DistG_learner.set_as_metric_aggregator() DistD_learner = C.train.distributed.data_parallel_distributed_learner(D_learner) # Instantiate the trainers G_trainer = C.Trainer( X_fake, (G_loss, None), DistG_learner, G_progress_printer ) D_trainer = C.Trainer( D_real, (D_loss, None), DistD_learner, D_progress_printer ) return X_real, X_fake, Z, G_trainer, D_trainer
def train(streamf): global net net=nn(input_var) loss = cntk.losses.squared_error(net,label_var) error=cntk.squared_error(net,label_var) learning_rate=0.01 lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch) momentum_time_constant = cntk.momentum_as_time_constant_schedule(140 / -np.math.log(0.9)) learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True) progres=cntk.logging.ProgressPrinter(0) trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres) input_map={ input_var : streamf.streams.features, label_var : streamf.streams.labels } minibatch_size = 1024 max_epochs = 500 epoch_size = 48985 t = 0 for epoch in range(max_epochs): epoch_end = (epoch+1) * epoch_size while t < epoch_end: dat1=streamf.next_minibatch(minibatch_size,input_map = input_map) trainer.train_minibatch(dat1) t += dat1[label_var].num_samples trainer.summarize_training_progress() return trainer
def train(streamf): global net minibatch_size = 1024 max_epochs = 2000 epoch_size = 48985 net = nn(input_var) loss = cntk.losses.binary_cross_entropy(net, label_var) error = cntk.classification_error(net, label_var) lr_per_sample = [3e-4] * 4 + [1.5e-4] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = cntk.learning_rate_schedule(lr_per_minibatch, cntk.UnitType.minibatch) momentum_as_time_constant = cntk.momentum_as_time_constant_schedule(200) learner = cntk.fsadagrad(net.parameters, lr_schedule, momentum_as_time_constant) progres = cntk.logging.ProgressPrinter(0) trainer = cntk.Trainer(net, (loss, error), [learner], progress_writers=progres) input_map = { input_var: streamf.streams.features, label_var: streamf.streams.labels } t = 0 for epoch in range(max_epochs): epoch_end = (epoch + 1) * epoch_size while t < epoch_end: dat1 = streamf.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(dat1) t += dat1[label_var].num_samples trainer.summarize_training_progress() return trainer
def train(streamf): input_var = cntk.input_variable(45,np.float32, name = 'features',dynamic_axes=cntk.axis.Axis.default_input_variable_dynamic_axes()) label_var=cntk.input_variable(3,np.float32, name = 'labels') net=nn(input_var) loss = cntk.squared_error(net,label_var) error=cntk.squared_error(net,label_var) learning_rate=0.02 lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch) momentum_time_constant = cntk.momentum_as_time_constant_schedule(5000 / -np.math.log(0.9)) learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True) progres=cntk.logging.ProgressPrinter(0) trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres) input_map={ input_var : streamf.streams.features, label_var : streamf.streams.labels } minibatch_size = 5000 num_samples_per_sweep = 2000 for i in range(0,num_samples_per_sweep): dat1=streamf.next_minibatch(minibatch_size,input_map = input_map) trainer.train_minibatch(dat1) training_loss = trainer.previous_minibatch_loss_average eval_error = trainer.previous_minibatch_evaluation_average if training_loss<0.002: break return trainer
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(context, context), scp=features_file))) ld = HTKMLFDeserializer( label_mapping_file, StreamDefs( awesome_labels=StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd, ld]) features = C.sequence.input_variable(((2 * context + 1) * feature_dim)) labels = C.sequence.input_variable((num_classes)) model = Sequential( [For(range(3), lambda: Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error(z, labels) learner = C.fsadagrad(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) progress_printer = C.logging.ProgressPrinter(freq=0) trainer = C.Trainer(z, (ce, errs), learner, progress_printer) input_map = { features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) assert True os.chdir(abs_path)
def train(self): tmp_d = {"x": [], "y": []} num_list = [] count = 0 for idx, value in enumerate(self.series): if idx % self.h_dims == 0: num_list = [] count += 1 if (self.h_dims * count) > len(self.series): break num_list.append(np.float32(value)) increment_list = [] for num in num_list: increment_list.append(num) tmp_d["x"].append(np.array(increment_list)) tmp_d["y"].append( np.array([np.float32(self.series[self.h_dims * count])])) x = {"train": tmp_d["x"]} y = {"train": np.array(tmp_d["y"])} z = self.create_model(self.input_node, self.h_dims) var_l = cntk.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") learning_rate = 0.005 lr_schedule = cntk.learning_parameter_schedule(learning_rate) loss = cntk.squared_error(z, var_l) error = cntk.squared_error(z, var_l) momentum_schedule = cntk.momentum_schedule( 0.9, minibatch_size=self.batch_size) learner = cntk.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule) trainer = cntk.Trainer(z, (loss, error), [learner]) # training loss_summary = [] start = time.time() for epoch in range(0, self.epochs): for x_batch, l_batch in self.next_batch(x, y, "train", self.batch_size): trainer.train_minibatch({ self.input_node: x_batch, var_l: l_batch }) if epoch % (self.epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format( epoch, training_loss, time.time() - start)) return z
def lstm_basic(x, y, epochs=1000, batch_size=100, input_dim=5): x_axes = [C.Axis.default_batch_axis(), C.Axis.default_dynamic_axis()] C.input_variable(1, dynamic_axes=x_axes) # input sequences input_seq = C.sequence.input_variable(1) # create the model z = create_model(input_seq, input_dim) # expected output (label), also the dynamic axes of the model output # is specified as the model of the label input lb = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") # the learning rate learning_rate = 0.02 lr_schedule = C.learning_parameter_schedule(learning_rate) # loss function loss = C.squared_error(z, lb) # use squared error to determine error for now error = C.squared_error(z, lb) # use fsadagrad optimizer momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule, unit_gain=True) trainer = C.Trainer(z, (loss, error), [learner]) # train loss_summary = [] start = time.time() for epoch in range(0, epochs): for x1, y1 in next_batch(x, y, "train", batch_size): trainer.train_minibatch({input_seq: x1, lb: y1}) if epoch % (epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format( epoch, training_loss, time.time() - start)) print("training took {0:.1f} sec".format(time.time() - start)) return z, trainer, input_seq
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer(StreamDefs( amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file))) ld = HTKMLFDeserializer(label_mapping_file, StreamDefs( awesome_labels = StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd,ld]) features = C.sequence.input_variable(((2*context+1)*feature_dim)) labels = C.sequence.input_variable((num_classes)) model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error (z, labels) learner = C.fsadagrad(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) progress_printer = C.logging.ProgressPrinter(freq=0) trainer = C.Trainer(z, (ce, errs), learner, progress_printer) input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) assert True os.chdir(abs_path)
def train (self, train_file, output_resources_pickle_file, \ network_type = 'unidirectional', \ num_epochs = 1, batch_size = 50, \ dropout = 0.2, reg_alpha = 0.0, \ num_hidden_units = 150, num_layers = 1): train_X, train_Y = self.reader.read_and_parse_training_data(train_file, output_resources_pickle_file) print("Data Shape: ") print(train_X.shape) # (15380, 613) print(train_Y.shape) # (15380, 613, 8) #self.wordvecs.shape (66962, 50) print("Hyper parameters:") print("output_resources_pickle_file = {}".format(output_resources_pickle_file)) print("network_type = {}".format(network_type)) print("num_epochs= {}".format(num_epochs )) print("batch_size = {}".format(batch_size )) print("dropout = ".format(dropout )) print("reg_alpha = {}".format(reg_alpha )) print("num_hidden_units = {}".format(num_hidden_units)) print("num_layers = {}".format(num_layers )) # Instantiate the model function; features = C.sequence.input_variable(self.wordvecs.shape[0]) labels = C.input_variable(train_Y.shape[2], dynamic_axes=[C.Axis.default_batch_axis()]) self.model = self.__create_model(features, train_Y.shape[2], num_hidden_units, dropout) plot_path = "./lstm_model.png" plot(self.model, plot_path) # Instantiate the loss and error function loss = C.cross_entropy_with_softmax(self.model, labels) error = C.classification_error(self.model, labels) # LR schedule learning_rate = 0.02 lr_schedule = C.learning_parameter_schedule(learning_rate) momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(self.model.parameters, lr = lr_schedule, momentum = momentum_schedule, unit_gain = True) # Setup the progress updater progress_printer = C.logging.ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=num_epochs) # Instantiate the trainer. We have all data in memory. https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_feed_data.ipynb print('Start training') train_summary = loss.train((train_X.astype('float32'), train_Y.astype('float32')), parameter_learners=[learner], callbacks=[progress_printer])
def train(create_model, X, Y, epochs=500, batch_size=10, N=1): dim = Y.shape[1] # input sequences x = C.sequence.input_variable(dim) # create the model z = create_model(x, N=N, outputs=dim) # expected output (label), also the dynamic axes of the model output # is specified as the model of the label input l = C.input_variable(dim, dynamic_axes=z.dynamic_axes, name="y") # the learning rate learning_rate = 0.02 lr_schedule = C.learning_parameter_schedule(learning_rate) # loss function loss = C.squared_error(z, l) # use squared error to determine error for now error = C.squared_error(z, l) # use fsadagrad optimizer momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule, unit_gain=True) trainer = C.Trainer(z, (loss, error), [learner]) # train loss_summary = [] start = time.time() for epoch in range(0, epochs): for x1, y1 in next_batch(X, Y, batch_size): trainer.train_minibatch({x: x1, l: y1}) if epoch % (epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.5f}".format(epoch, training_loss)) print("training took {0:.1f} sec".format(time.time() - start)) return z
def main(): window_len = int(input("window_len: ")) word_len = int(input("word_len: ")) alphabet_len = int(input("alphabet_len: ")) alpha_to_num_step = float(1 / alphabet_len) alpha_to_num_map = float(alpha_to_num_step / 2) source = "weather_JAN.csv" ts_data = pd.read_csv(source, index_col="date", parse_dates=["date"], dtype=np.float32) sax_ret = sax_via_window(ts_data["temp"].values, window_len, word_len, alphabet_size=alphabet_len, nr_strategy="none", z_threshold=0.01) my_sax = dict() for k, v in sax_ret.items(): for i in v: my_sax[i] = k tmp_d = {"x": [], "y": []} for k, v in my_sax.items(): num_list = [ np.float32(((ord(char) - 96) * alpha_to_num_step) - alpha_to_num_map) for char in v[:-1] ] increment_list = [] for num in num_list: increment_list.append(num) tmp_d["x"].append(np.array(increment_list)) tmp_d["y"].append( np.array([ np.float32("".join([ str(((ord(char) - 96) * alpha_to_num_step) - alpha_to_num_map) for char in v[-1] ])) ])) # FORMAT: # result_x[0] = [1] result_y[0] = 3 # result_x[1] = [1,4] result_y[1] = 3 # result_x[2] = [1,4,2] result_y[2] = 3 # result_x[3] = [1,4,2,2] result_y[3] = 3 # result_x[4] = [1,4,2,2,4] result_y[4] = 3 ##### result_x = dict() result_x["train"] = tmp_d["x"][:len(tmp_d["x"]) - 2000] result_x["test"] = tmp_d["x"][len(tmp_d["x"]) - 2000:len(tmp_d["x"]) - 1000] result_x["val"] = tmp_d["x"][len(tmp_d["x"]) - 1000:len(tmp_d["x"])] result_y = dict() result_y["train"] = np.array(tmp_d["y"][:len(tmp_d["y"]) - 2000]) result_y["test"] = np.array(tmp_d["y"][len(tmp_d["y"]) - 2000:len(tmp_d["y"]) - 1000]) result_y["val"] = np.array(tmp_d["y"][len(tmp_d["y"]) - 1000:len(tmp_d["y"])]) batch_size = window_len * (word_len - 1) h_dims = word_len epochs = input("Epochs: ") if not epochs == "": epochs = int(epochs) else: epochs = 100 start_time = time.time() model_file = "{}_epochs.model".format(epochs) if not os.path.exists(model_file): x = C.sequence.input_variable(1) z = create_model(x, h_dims) var_l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") learning_rate = 0.005 lr_schedule = C.learning_parameter_schedule(learning_rate) loss = C.squared_error(z, var_l) error = C.squared_error(z, var_l) momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule) trainer = C.Trainer(z, (loss, error), [learner]) # training loss_summary = [] start = time.time() for epoch in range(0, epochs): for x_batch, l_batch in next_batch(result_x, result_y, "train", batch_size): trainer.train_minibatch({x: x_batch, var_l: l_batch}) if epoch % (epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f}".format(epoch, training_loss)) print("Training took {:.1f} sec".format(time.time() - start)) # Print the train, validation and test errors for label_txt in ["train", "val", "test"]: print("mse for {}: {:.6f}".format( label_txt, get_mse(trainer, x, result_x, result_y, batch_size, var_l, label_txt))) z.save(model_file) else: z = C.load_model(model_file) x = C.logging.find_all_with_name(z, "")[-1] # Print out all layers in the model print("Loading {} and printing all nodes:".format(model_file)) node_outputs = C.logging.find_all_with_name(z, "") for n in node_outputs: print(" {}".format(n)) results = [] # predict # f, a = plt.subplots(2, 1, figsize=(12, 8)) for j, ds in enumerate(["val", "test"]): fig = plt.figure() a = fig.add_subplot(2, 1, 1) results = [] for x_batch, y_batch in next_batch(result_x, result_y, ds, batch_size): pred = z.eval({x: x_batch}) results.extend(pred[:, 0]) # because we normalized the input data we need to multiply the prediction # with SCALER to get the real values. a.plot((result_y[ds]).flatten(), label=ds + " raw") a.plot(np.array(results), label=ds + " pred") a.legend() fig.savefig("{}_chart_{}_epochs.jpg".format(ds, epochs)) print("Delta: ", time.time() - start_time) return result_x, result_y, results
def test_learner_init(): i = C.input_variable(shape=(1, ), needs_gradient=True, name='a') w = parameter(shape=(1, )) res = i * w #test new API: learning_parameter_schedule #explictly specify reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=0.1, minibatch_size=25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 25 assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20)) assert learner.is_compatible_mode() == False #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd.learning_rate() == 0.4 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum.learning_rate() == 0.4 myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta.learning_rate() == 0.4 myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam.learning_rate() == 0.4 myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad.learning_rate() == 0.4 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myfsadagrad.minibatch_size == 32 assert myfsadagrad._learning_rate_schedule.minibatch_size == 32 assert myfsadagrad.learning_rate() == 0.4 mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov.learning_rate() == 0.4 myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop.learning_rate() == 0.4 mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd._learning_rate_schedule[0] == 0.4 assert mysgd._learning_rate_schedule[512] == 0.1 assert mysgd._learning_rate_schedule[512 * 2] == 0.001 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum._learning_rate_schedule[0] == 0.4 assert mymomentum._learning_rate_schedule[512] == 0.1 assert mymomentum._learning_rate_schedule[512 * 2] == 0.001 myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta._learning_rate_schedule[0] == 0.4 assert myadadelta._learning_rate_schedule[512] == 0.1 assert myadadelta._learning_rate_schedule[512 * 2] == 0.001 myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam._learning_rate_schedule[0] == 0.4 assert myadam._learning_rate_schedule[512] == 0.1 assert myadam._learning_rate_schedule[512 * 2] == 0.001 myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov._learning_rate_schedule[0] == 0.4 assert mynesterov._learning_rate_schedule[512] == 0.1 assert mynesterov._learning_rate_schedule[512 * 2] == 0.001 myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32, epoch_size=512) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop._learning_rate_schedule[0] == 0.4 assert myrmsrop._learning_rate_schedule[512] == 0.1 assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum = C.momentum_schedule(0.999, minibatch_size=1) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1) C.momentum_sgd(res.parameters, lr_per_sample, momentum) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum) C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) lr_per_sample = learning_parameter_schedule([0.1] * 3 + [0.2] * 2 + [0.3], minibatch_size=1) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_parameter_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], minibatch_size=1) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum) C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1, epoch_size=100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample)
def test_learner_init_legacy(): i = C.input_variable(shape=(1, ), needs_gradient=True, name='a') w = parameter(shape=(1, )) res = i * w # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample)) assert learner._learning_rate_schedule.minibatch_size == 1 # the deprecated per sample schedule should not use compatible mode assert learner.learning_rate() == 0.1 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch)) assert learner.is_compatible_mode() == False assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == 0 # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate( learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past. learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), use_mean_gradient=True) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 #test the override in the new version assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate( learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value # back compatible API test momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], unit=UnitType.sample) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_rate_schedule([0.1, 0.2], unit=UnitType.sample, epoch_size=100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
def train(config, model, args, enable_eval=False): max_epoch = config['max_epoch'] batchsize = config['batchsize'] epoch_size = config['epoch_size'] lr = config['lr'] save_freq = config['save_freq'] # create models embed_layer = GloveEmbed() inp_ph, train_model, loss_errs = create_train_model(model, embed_layer) train_reader, input_map = create_reader('aq_train.ctf', inp_ph, config) if enable_eval: inp_ph2, pred_sym, gt_sym = create_eval_model(model, embed_layer) eval_reader, input_map2 = create_reader('aq_dev.ctf', inp_ph2, config, True) i2w = get_i2w(vocabs) # create loggers progress_printer = C.logging.ProgressPrinter(freq=500, tag="Train") tensorboard_writer = C.logging.TensorBoardProgressWriter( 500, 'tensorlog/{}'.format(args.tensorboard), model=train_model) lrs = [(1, lr), (5000, lr * 0.1), (10000, lr * 0.01), (30000, lr * 0.001)] learner = C.fsadagrad( train_model.parameters, #apply the learning rate as if it is a minibatch of size 1 lr=C.learning_parameter_schedule(lrs), momentum=C.momentum_schedule(0.9, minibatch_size=batchsize), gradient_clipping_threshold_per_sample=2, gradient_clipping_with_truncation=True) trainer = C.Trainer(train_model, loss_errs, [learner], [progress_printer, tensorboard_writer]) total_samples = 0 for epoch in range(max_epoch): while total_samples < (epoch + 1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(batchsize, input_map=input_map) # do the training trainer.train_minibatch(mb_train) total_samples += mb_train[list(mb_train.keys())[0]].num_sequences trainer.summarize_training_progress() if epoch + 1 % save_freq == 0: save_name = '{}_{}.model'.format(config['save_name'], epoch + 1) print('save {} in {}'.format(save_name, config['output_dir'])) trainer.save_checkpoint('output/{}/{}'.format( config['output_dir'], save_name)) if enable_eval: vis_mb = eval_reader.next_minibatch(1, input_map=input_map2) pred = pred_sym.eval(vis_mb)[0] gt = gt_sym.eval(vis_mb)[0] print(pred.shape, gt.shape) res = visualize(pred, i2w) print("predict res: {}".format(res)) res = visualize(gt, i2w) print("ground truth: {}".format(res)) pres = 0.0 count = 1 while count < 5: mb_eval = eval_reader.next_minibatch(512, input_map=input_map2) pred = pred_sym.eval(mb_eval) gt = gt_sym.eval(mb_eval) pres += report_classification_info(pred, gt) count += 1 if not mb_eval: break print('average precision:{}'.format(pres / count))
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_test(s2smodel) # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 learner = C.fsadagrad(model_train.parameters, #apply the learning rate as if it is a minibatch of size 1 lr = C.learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size), momentum = C.momentum_schedule(0.9366416204111472, minibatch_size=minibatch_size), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = C.Trainer(None, criterion, learner) # records total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information C.logging.log_number_of_parameters(model_train) ; print() progress_printer = C.logging.ProgressPrinter(freq=30, tag='Training') # a hack to allow us to print sparse vectors sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): while total_samples < (epoch+1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) # do the training trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]}) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # visualizing attention window if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model model_path = "model_%d.cmf" % epoch print("Saving final model to '%s'" % model_path) s2smodel.save(model_path) print("%d epochs complete." % max_epochs)
print l # the learning rate learning_rate = 0.02 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) # loss function loss = C.squared_error(z, l) # use squared error to determine error for now error = C.squared_error(z, l) # use fsadagrad optimizer momentum_time_constant = C.momentum_as_time_constant_schedule(BATCH_SIZE / -math.log(0.9)) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_time_constant, unit_gain=True) trainer = C.Trainer(z, (loss, error), [learner]) # train loss_summary = [] start = time.time() for epoch in range(0, EPOCHS): for x1, y1 in next_batch(X, Y, "train"): trainer.train_minibatch({x: x1, l: y1}) if epoch % (EPOCHS / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.5f}".format(epoch, training_loss))
MOMENTUM_SCHEDULE_PARAMS = [ ((0.2, ), [0.2]), ((0.2, ), [0.2, 0.2, 0.2, 0.2]), (([0.2, 0.4], 5), [0.2] * 5 + [0.4] * 20), (([(3, 0.2), (2, 0.4), (1, 0.8)], 5), [0.2] * 15 + [0.4] * 10 + [0.8] * 20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad( params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_rate_schedule( 1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(
def train_and_test(reader_train, reader_test, model_func): ############################### # Training the model ############################### input = C.input_variable(input_dim) label = C.input_variable(input_dim) model = model_func(input) target = label / 255.0 loss = -(target * C.log(model) + (1 - target) * C.log(1 - model)) label_error = C.classification_error(model, target) epoch_size = 30000 minibatch_size = 64 num_sweeps_to_train_with = 5 if isFast else 100 num_samples_per_sweep = 60000 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) // minibatch_size lr_per_sample = [3e-4] lr_schedule = C.learning_parameter_schedule_per_sample( lr_per_sample, epoch_size) momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size) learner = C.fsadagrad(model.parameters, lr=lr_schedule, momentum=momentum_schedule) progress_printer = C.logging.ProgressPrinter(0) trainer = C.Trainer(model, (loss, label_error), learner, progress_printer) input_map = { input: reader_train.streams.features, label: reader_train.streams.features } aggregate_metric = 0 for i in range(num_minibatches_to_train): data = reader_train.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(data) samples = trainer.previous_minibatch_sample_count aggregate_metric += trainer.previous_minibatch_evaluation_average * samples train_error = (aggregate_metric * 100) / (trainer.total_number_of_samples_seen) print("Average training error: {0:0.2f}%".format(train_error)) ############################################################################# # Testing the model # Note: we use a test file reader to read data different from a training data ############################################################################# test_minibatch_size = 32 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0 # Test error metric calculation metric_numer = 0 metric_denom = 0 test_input_map = { input: reader_test.streams.features, label: reader_test.streams.features } for i in range(0, int(num_minibatches_to_test)): data = reader_test.next_minibatch(test_minibatch_size, input_map=test_input_map) eval_error = trainer.test_minibatch(data) metric_numer += np.abs(eval_error * test_minibatch_size) metric_denom += test_minibatch_size test_error = (metric_numer * 100) / (metric_denom) print("Average test error: {0:0.2f}%".format(test_error)) return model, train_error, test_error
def test_learner_init(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w = parameter(shape=(1,)) res = i * w learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample)) assert learner.learning_rate() == 0.1 learner.reset_learning_rate(learning_rate_schedule([1,2,3], UnitType.minibatch)); assert learner.learning_rate() == 1.0 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.nesterov(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) lr_per_sample = learning_rate_schedule([0.1]*3 +[0.2]*2 +[0.3], UnitType.sample) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_rate_schedule([(3,0.1), (2, 0.2), (1, 0.3)], UnitType.sample) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.1]*5 lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample, 100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.set_default_use_mean_gradient_value(False) use_mean_gradient_value = C.default_use_mean_gradient_value() assert not use_mean_gradient_value C.adadelta(res.parameters, lr_per_sample) C.set_default_use_mean_gradient_value(True) use_mean_gradient_value = C.default_use_mean_gradient_value() assert use_mean_gradient_value C.adadelta(res.parameters, lr_per_sample)
(([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0), (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0), ] MOMENTUM_SCHEDULE_PARAMS = [ ((0.2,), [0.2]), ((0.2,), [0.2, 0.2, 0.2, 0.2]), (([0.2,0.4], 5), [0.2]*5+[0.4]*20), (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9))] @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY) def test_learning_rate_schedule(params, expectation, minibatch_size): l = learning_rate_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS) def test_learning_parameter_schedule(params, expectation, minibatch_size): l = learning_parameter_schedule(*params) assert l.minibatch_size == minibatch_size
def train(self, save_intermediates=False, save_prefix='', num_epochs=None): print("Setting up model for training...") x = self.x_seq labels = C.input(shape=(self.batch_size, 3), name="y") #labels = C.input_variable(3) # the learning rate learning_rate = 0.001 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) # loss function loss = C.squared_error(self._model, labels) # use squared error to determine error for now error = C.squared_error(self._model, labels) # use adam optimizer momentum_time_constant = C.momentum_as_time_constant_schedule( LSTMRegressionModel.BATCH_SIZE_DEFAULT / -math.log(0.9)) learner = C.fsadagrad(self._model.parameters, lr = lr_schedule, momentum = momentum_time_constant, unit_gain = True) trainer = C.Trainer(self._model, (loss, error), [learner]) # train loss_summary = [] start = time.time() f, a = plt.subplots(3, 2, figsize=(100, 12)) x_axis = list(range(len(self.train_x))) if num_epochs is None: epoch_list = list(range(0, LSTMRegressionModel.EPOCHS_DEFAULT)) else: epoch_list = list(range(0, num_epochs)) train_loss_epochs = [0.0] * len(epoch_list) val_loss_epochs = [0.0] * len(epoch_list) test_loss_epochs = [0.0] * len(epoch_list) print("done. Starting training.") for epoch in epoch_list: print("Training on epoch {}".format(epoch)) for x1, y1 in self._next_batch(): trainer.train_minibatch({x: x1, labels: y1}) val_loss = 0.0 for idx, example in enumerate(self.val_x): y = np.array([self.val_y[idx]]) example = np.array([example]) loss_amt = loss.eval({x: example, labels: y})[0] val_loss += loss_amt #print("val loss is {}".format(val_loss)) val_loss_epochs[epoch] = val_loss test_loss = 0.0 for idx, example in enumerate(self.test_x): y = np.array([self.test_y[idx]]) example = np.array([example]) loss_amt = loss.eval({x: example, labels: y})[0] test_loss += loss_amt #print("test loss is {}".format(test_loss)) test_loss_epochs[epoch] = test_loss training_loss = trainer.previous_minibatch_loss_average train_loss_epochs[epoch] = training_loss if epoch % (100) == 0: if save_intermediates: self.save(save_prefix + 'WIP-training-epoch-{}.dat'.format(epoch)) loss_summary.append(training_loss) evaluation = [] for example in self.train_x: prediction = self._model.eval({x: example})[0].tolist() evaluation.append(prediction) evaluation = np.array(evaluation) a[0][0].cla() a[1][0].cla() a[2][0].cla() a[0][1].cla() a[1][1].cla() a[2][1].cla() a[0][0].plot(x_axis, evaluation[:, 0], label='approval') a[0][0].plot(x_axis, self.train_y[:, 0], label='approval actual') a[0][0].set_title("Approval rating prediction over training set") a[0][0].legend() a[1][0].plot(x_axis, evaluation[:, 1], label='disapproval') a[1][0].plot(x_axis, self.train_y[:, 1], label='disapproval actual') a[1][0].set_title("Dispproval rating prediction over training set") a[1][0].legend() a[2][0].plot(x_axis, evaluation[:, 2], label='neutral') a[2][0].plot(x_axis, self.train_y[:, 2], label='neutral actual') a[2][0].set_title("Neutral rating prediction over training set") a[2][0].legend() a[0][1].plot(epoch_list, train_loss_epochs) a[0][1].set_title("Training loss vs. epochs") a[1][1].plot(epoch_list, val_loss_epochs) a[1][1].set_title("Validation loss vs. epochs") a[2][1].plot(epoch_list, test_loss_epochs) a[2][1].set_title("Test loss vs. epochs") for axes in a: axes[0].set_xlabel('training example number/idx') axes[0].set_ylabel('rating (% represented as fraction)') axes[1].set_xlabel('epoch number') axes[1].set_ylabel('MSE loss') display.clear_output(wait=True) display.display(plt.gcf()) print("training took {0:.1f} sec".format(time.time() - start))
def build_graph(noise_shape, image_shape, G_progress_printer, D_progress_printer): ''' The rest of the computational graph is mostly responsible for coordinating the training algorithms and parameter updates, which is particularly tricky with GANs for couple reasons. First, the discriminator must be used on both the real MNIST images and fake images generated by the generator function. One way to represent this in the computational graph is to create a clone of the output of the discriminator function, but with substituted inputs. Setting method=share in the clone function ensures that both paths through the discriminator model use the same set of parameters. Second, we need to update the parameters for the generator and discriminator model separately using the gradients from different loss functions. We can get the parameters for a Function in the graph with the parameters attribute. However, when updating the model parameters, update only the parameters of the respective models while keeping the other parameters unchanged. In other words, when updating the generator we will update only the parameters of the GG function while keeping the parameters of the DD function fixed and vice versa. ''' input_dynamic_axes = [C.Axis.default_batch_axis()] Z = C.input_variable(noise_shape, dynamic_axes=input_dynamic_axes) X_real = C.input_variable(image_shape, dynamic_axes=input_dynamic_axes) X_real_scaled = 2*(X_real / 255.0) - 1.0 # Create the model function for the generator and discriminator models X_fake = generator(Z) D_real = discriminator(X_real_scaled) D_fake = D_real.clone( method = 'share', substitutions = {X_real_scaled.output: X_fake.output} ) # Create loss functions and configure optimazation algorithms G_loss = 1.0 - C.log(D_fake) D_loss = -(C.log(D_real) + C.log(1.0 - D_fake)) G_learner = C.fsadagrad( parameters = X_fake.parameters, lr = C.learning_parameter_schedule_per_sample(lr), momentum = C.momentum_schedule_per_sample(0.9985724484938566) ) D_learner = C.fsadagrad( parameters = D_real.parameters, lr = C.learning_parameter_schedule_per_sample(lr), momentum = C.momentum_schedule_per_sample(0.9985724484938566) ) # Instantiate the trainers G_trainer = C.Trainer( X_fake, (G_loss, None), G_learner, G_progress_printer ) D_trainer = C.Trainer( D_real, (D_loss, None), D_learner, D_progress_printer ) return X_real, X_fake, Z, G_trainer, D_trainer
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_test(s2smodel) # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 learner = C.fsadagrad( model_train.parameters, #apply the learning rate as if it is a minibatch of size 1 lr=C.learning_parameter_schedule_per_sample([lr] * 2 + [lr / 2] * 3 + [lr / 4], epoch_size), momentum=C.momentum_schedule(0.9366416204111472, minibatch_size=minibatch_size), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = C.Trainer(None, criterion, learner) # records total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information C.logging.log_number_of_parameters(model_train) print() progress_printer = C.logging.ProgressPrinter(freq=30, tag='Training') # a hack to allow us to print sparse vectors sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): while total_samples < (epoch + 1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) # do the training trainer.train_minibatch({ criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels] }) progress_printer.update_with_trainer( trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print( format_sequences( sparse_to_dense( mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # visualizing attention window if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model model_path = "model_%d.cmf" % epoch print("Saving final model to '%s'" % model_path) s2smodel.save(model_path) print("%d epochs complete." % max_epochs)
# is specified as the model of the label input l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") # the learning rate learning_rate = 0.005 lr_schedule = C.learning_parameter_schedule(learning_rate) # loss function loss = C.squared_error(z, l) # use squared error to determine error for now error = C.squared_error(z, l) # use adam optimizer momentum_schedule = C.momentum_schedule(0.9, minibatch_size=BATCH_SIZE) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule) trainer = C.Trainer(z, (loss, error), [learner]) # training loss_summary = [] # time to start training start = time.time() for epoch in range(0, EPOCHS): for x_batch, l_batch in next_batch(X, Y, "train"): trainer.train_minibatch({x: x_batch, l: l_batch}) if epoch % (EPOCHS / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f}".format(epoch, training_loss))
net = create_model(input/255.0) # loss and error calculations. target_normalized = target/255.0 loss = -(target_normalized * C.log(net) + (1 - target_normalized) * C.log(1 - net)) label_error = C.classification_error(net, target_normalized) # Instantiate the trainer object to drive the model training lr_per_sample = [0.0001] learning_rate_schedule = C.learning_rate_schedule(lr_per_sample, C.UnitType.sample, epoch_size=int(num_training_samples/2.0)) # Momentum momentum_as_time_constant = C.momentum_as_time_constant_schedule(200) # Define the learner learner = C.fsadagrad(net.parameters, lr=learning_rate_schedule, momentum=momentum_as_time_constant) # Instantiate the trainer progress_printer = C.logging.ProgressPrinter(0) train_op = C.Trainer(net, (loss, label_error), learner, progress_printer) ############################### ########## Training ########### ############################### # Plot data dictionary. plotdata = {"iteration":[], "loss":[], "error":[]} # Initialize the parameters for the trainer num_iterations = (num_training_samples * num_epochs) / batch_size
def train(i2w, data_path, model_path, log_file, config_file, restore=True, profiling=False, gen_heartbeat=False): polymath = PolyMath(config_file) z, loss = polymath.model() training_config = importlib.import_module(config_file).training_config max_epochs = training_config['max_epochs'] log_freq = training_config['log_freq'] progress_writers = [ C.logging.ProgressPrinter(num_epochs=max_epochs, freq=log_freq, tag='Training', log_to_file=log_file, metric_is_pct=False, rank=C.Communicator.rank(), gen_heartbeat=gen_heartbeat) ] lr = C.learning_parameter_schedule(training_config['lr'], minibatch_size=None, epoch_size=None) ema = {} dummies = [] for p in z.parameters: ema_p = C.constant(0, shape=p.shape, dtype=p.dtype, name='ema_%s' % p.uid) ema[p.uid] = ema_p dummies.append(C.reduce_sum(C.assign(ema_p, 0.999 * ema_p + 0.001 * p))) dummy = C.combine(dummies) # learner = C.adadelta(z.parameters, lr) learner = C.fsadagrad( z.parameters, #apply the learning rate as if it is a minibatch of size 1 lr, momentum=C.momentum_schedule( 0.9366416204111472, minibatch_size=training_config['minibatch_size']), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) if C.Communicator.num_workers() > 1: learner = C.data_parallel_distributed_learner(learner) trainer = C.Trainer(z, loss, learner, progress_writers) if profiling: C.debugging.start_profiler(sync_gpu=True) train_data_file = os.path.join(data_path, training_config['train_data']) train_data_ext = os.path.splitext(train_data_file)[-1].lower() model_file = os.path.join(model_path, model_name) model = C.combine(z.outputs + loss.outputs) #this is for validation only epoch_stat = {'best_val_err': 1000, 'best_since': 0, 'val_since': 0} print(restore, os.path.isfile(model_file)) # if restore and os.path.isfile(model_file): if restore and os.path.isfile(model_file): z.restore(model_file) #after restore always re-evaluate #TODO replace with rougel with external script(possibly) #epoch_stat['best_val_err'] = validate_model(i2w, os.path.join(data_path, training_config['val_data']), model, polymath) def post_epoch_work(epoch_stat): trainer.summarize_training_progress() epoch_stat['val_since'] += 1 if epoch_stat['val_since'] == training_config['val_interval']: epoch_stat['val_since'] = 0 temp = dict((p.uid, p.value) for p in z.parameters) for p in trainer.model.parameters: p.value = ema[p.uid].value #TODO replace with rougel with external script(possibly) val_err = validate_model( i2w, os.path.join(data_path, training_config['val_data']), model, polymath) #if epoch_stat['best_val_err'] > val_err: # epoch_stat['best_val_err'] = val_err # epoch_stat['best_since'] = 0 # trainer.save_checkpoint(model_file) # for p in trainer.model.parameters: # p.value = temp[p.uid] #else: # epoch_stat['best_since'] += 1 # if epoch_stat['best_since'] > training_config['stop_after']: # return False z.save(model_file) epoch_stat['best_since'] += 1 if epoch_stat['best_since'] > training_config['stop_after']: return False if profiling: C.debugging.enable_profiler() return True init_pointer_importance = polymath.pointer_importance if train_data_ext == '.ctf': mb_source, input_map = create_mb_and_map(loss, train_data_file, polymath) minibatch_size = training_config['minibatch_size'] # number of samples epoch_size = training_config['epoch_size'] for epoch in range(max_epochs): num_seq = 0 while True: if trainer.total_number_of_samples_seen >= training_config[ 'distributed_after']: data = mb_source.next_minibatch( minibatch_size * C.Communicator.num_workers(), input_map=input_map, num_data_partitions=C.Communicator.num_workers(), partition_index=C.Communicator.rank()) else: data = mb_source.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(data) num_seq += trainer.previous_minibatch_sample_count dummy.eval() if num_seq >= epoch_size: break if not post_epoch_work(epoch_stat): break print('Before Pointer_importance:', polymath.pointer_importance) if polymath.pointer_importance > 0.1 * init_pointer_importance: polymath.pointer_importance = polymath.pointer_importance * 0.9 print('Pointer_importance:', polymath.pointer_importance) else: if train_data_ext != '.tsv': raise Exception("Unsupported format") minibatch_seqs = training_config[ 'minibatch_seqs'] # number of sequences for epoch in range(max_epochs): # loop over epochs tsv_reader = create_tsv_reader(loss, train_data_file, polymath, minibatch_seqs, C.Communicator.num_workers()) minibatch_count = 0 for data in tsv_reader: if (minibatch_count % C.Communicator.num_workers()) == C.Communicator.rank(): trainer.train_minibatch(data) # update model with it dummy.eval() minibatch_count += 1 if not post_epoch_work(epoch_stat): break if profiling: C.debugging.stop_profiler()
def forecast(self): # Mapping each letter to number between 0-1 alpha_to_num_step = float(1 / self.alphabet_size) alpha_to_num_shift = float(alpha_to_num_step / 2) # Dict = [floor, point, celling] alpha_to_num = dict() for i in range(self.alphabet_size): step = (alpha_to_num_step * i) alpha_to_num[chr(97 + i)] = [ step, step + alpha_to_num_shift, step + alpha_to_num_step ] x, y, last_sax_word = self._prepare_data(alpha_to_num) response = { "last_sax_word": "Fail", "forecast_sax_letter": "Fail", "position_in_sax_interval": -1 } if not x or not y or not last_sax_word: error_msg = "Error while preparing data!" log.error(error_msg) response["error"] = error_msg return response # Trying to optimize settings for training. # Forbidding heavy training (max 100k input rows from CSV or Financial data) batch_size = self.window_len * self.word_len if batch_size != 1152: batch_size = 1152 h_dims = self.word_len + 1 epochs = 100 if len(x["train"]) > 200000: error_msg = "Configured data set too large (max: 200k): {}".format( len(x["train"])) log.error(error_msg) response["error"] = error_msg return response if len(x["train"]) < 100000: epochs = 250 if len(x["train"]) < 40000: epochs = 500 if len(x["train"]) < 20000: epochs = 1000 log.debug("Training Info:") log.debug("len(x[train]): {}".format(len(x["train"]))) log.debug("Epochs : {}".format(epochs)) log.debug("Batch Size : {}".format(batch_size)) if x and y: input_node = C.sequence.input_variable(1) z = self._create_model(input_node, h_dims) var_l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") learning_rate = 0.005 lr_schedule = C.learning_parameter_schedule(learning_rate) loss = C.squared_error(z, var_l) error = C.squared_error(z, var_l) momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule) trainer = C.Trainer(z, (loss, error), [learner]) # training loss_summary = [] start = time.time() for epoch in range(0, epochs): for x_batch, l_batch in self._next_batch( x, y, "train", batch_size): trainer.train_minibatch({ input_node: x_batch, var_l: l_batch }) if epoch % (epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) log.debug("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format( epoch, training_loss, time.time() - start)) pred, position_in_sax_interval = self._get_pred( z, input_node, last_sax_word, alpha_to_num) forecast_sax_letter = self._get_letter(pred, alpha_to_num) log.debug("================= PRED =====================") log.debug("last_sax_word : {}".format(last_sax_word)) log.debug("pred : {}".format(pred)) log.debug( "forecast_sax_letter : {}".format(forecast_sax_letter)) log.debug("position_in_sax_interval: {}".format( position_in_sax_interval)) log.debug("============================================") response["last_sax_word"] = last_sax_word response["forecast_sax_letter"] = forecast_sax_letter response["position_in_sax_interval"] = position_in_sax_interval else: error_msg = "X and/or Y with no length: {} and {}".format( len(x), len(y)) log.error(error_msg) response["error"] = error_msg return response
def test_learner_init_legacy(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w = parameter(shape=(1,)) res = i * w # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample)) assert learner._learning_rate_schedule.minibatch_size == 1 # the deprecated per sample schedule should not use compatible mode assert learner.learning_rate() == 0.1 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch)) assert learner.is_compatible_mode() == False assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == 0 # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past. learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), use_mean_gradient=True) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 #test the override in the new version assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value # back compatible API test momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], unit=UnitType.sample) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_rate_schedule([0.1, 0.2], unit=UnitType.sample, epoch_size=100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
def main(): window_len = int(input("window_len: ")) word_len = int(input("word_len: ")) alphabet_len = int(input("alphabet_len: ")) epochs = input("Epochs: ") if not epochs == "": epochs = int(epochs) else: epochs = 100 # batch_size = window_len * (word_len - 1) batch_size = int(input("Batch size: ")) h_dims = word_len + 1 alpha_to_num_step = float(1 / alphabet_len) alpha_to_num_shift = float(alpha_to_num_step / 2) # Dict = [floor, point, celling] alpha_to_num = dict() for i in range(alphabet_len): step = (alpha_to_num_step * i) alpha_to_num[chr(97 + i)] = [ step, step + alpha_to_num_shift, step + alpha_to_num_step ] model_file = "{}_{}_{}_{}.model".format(window_len, word_len, alphabet_len, epochs) opt_model_file = input("Change model name [{}]? ".format(model_file)) if opt_model_file != "" and opt_model_file != "n": model_file = opt_model_file x, y = prepare_data(window_len, word_len, alphabet_len, alpha_to_num) if input("Training? ") == "y": input_node = C.sequence.input_variable(1) z = create_model(input_node, h_dims) var_l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") learning_rate = 0.005 lr_schedule = C.learning_parameter_schedule(learning_rate) loss = C.squared_error(z, var_l) error = C.squared_error(z, var_l) momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule) trainer = C.Trainer(z, (loss, error), [learner]) # training loss_summary = [] start = time.time() for epoch in range(0, epochs): for x_batch, l_batch in next_batch(x, y, "train", batch_size): trainer.train_minibatch({input_node: x_batch, var_l: l_batch}) if epoch % (epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format( epoch, training_loss, time.time() - start)) print("Training took {:.1f} sec".format(time.time() - start)) # Print the train, validation and test errors for label_txt in ["train", "val", "test"]: if label_txt in x: print("mse for {}: {:.6f}".format( label_txt, get_mse(trainer, input_node, x, y, batch_size, var_l, label_txt))) z.save(model_file) else: z = C.load_model(model_file) input_node = C.logging.find_all_with_name(z, "")[-1] # Print out all layers in the model print("Loading {} and printing all nodes:".format(model_file)) node_outputs = C.logging.find_all_with_name(z, "") for n in node_outputs: print(" {}".format(n)) # predict last_p_result = [] last_p_y = [] for j, ds in enumerate(["val", "test"]): if ds in x: fig = plt.figure() chart = fig.add_subplot(2, 1, 1) results = [] for x_batch, _ in next_batch(x, y, ds, batch_size): pred = z.eval({input_node: x_batch}) results.extend(pred[:, 0]) if results: print("LAST_PRED({}): {}".format(ds, results[-1])) last_p_y = [] for idx, i in enumerate(y[ds]): if (idx + 1) % (word_len + 1) == 0: last_p_y.append(i) chart.plot(np.array(last_p_y).flatten(), label=ds + " raw") last_p_result = [] for idx, i in enumerate(results): if (idx + 1) % (word_len + 1) == 0: alpha_list = sorted(alpha_to_num) a = "a" for a in alpha_list[::-1]: if i >= alpha_to_num[a][0]: break last_p_result.append(alpha_to_num[a][1]) chart.plot(np.array(last_p_result), label=ds + " pred") chart.legend() fig.savefig("{}_chart_{}_epochs.jpg".format(ds, epochs)) correct_pred = dict() for idx, _ in enumerate(last_p_y): print("{}: {} == {} ({})".format( idx, last_p_result[idx], float(last_p_y[idx][0]), last_p_result[idx] - float(last_p_y[idx][0]))) alpha_list = sorted(alpha_to_num) for pred_a in alpha_list[::-1]: if last_p_result[idx] >= alpha_to_num[pred_a][0]: pred_l_num = ord(pred_a) for y_a in alpha_list[::-1]: if float(last_p_y[idx][0]) >= alpha_to_num[y_a][0]: stp = abs(ord(y_a) - pred_l_num) print("stp: ", stp) if stp not in correct_pred: correct_pred[stp] = 1 else: correct_pred[stp] += 1 break break for k, v in correct_pred.items(): print("Set({}) Delta[{}]: {}/{} = {:.4f}".format( ds, k, v, len(last_p_y), float(v / len(last_p_y)))) print("len(last_p_y): ", len(last_p_y)) print("len(last_p_result): ", len(last_p_result)) word_pred = input("Word to get pred (cedcaadc): ") if word_pred == "": word_pred = "cedcaadc" r, perc = get_pred(z, input_node, word_pred, window_len, alpha_to_num) print("================= PRED =====================") print("r = ", r) print("perc = ", perc) print("============================================") for k, v in alpha_to_num.items(): print(k, v) return x, y, last_p_result, last_p_y
def train_and_test(reader_train, reader_test, model_func): ############################################### # Training the model ############################################### # Instantiate the input and the label variables input = C.input_variable(input_dim) label = C.input_variable(input_dim) # Create the model function model = model_func(input) # The labels for this network is same as the input MNIST image. # Note: Inside the model we are scaling the input to 0-1 range # Hence we rescale the label to the same range # We show how one can use their custom loss function # loss = -(y* log(p)+ (1-y) * log(1-p)) where p = model output and y = target # We have normalized the input between 0-1. Hence we scale the target to same range target = label / 255.0 loss = -(target * C.log(model) + (1 - target) * C.log(1 - model)) label_error = C.classification_error(model, target) # training config epoch_size = 30000 # 30000 samples is half the dataset size minibatch_size = 64 num_sweeps_to_train_with = 5 if isFast else 100 num_samples_per_sweep = 60000 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) // minibatch_size # Instantiate the trainer object to drive the model training lr_per_sample = [0.00003] lr_schedule = C.learning_parameter_schedule_per_sample( lr_per_sample, epoch_size) # Momentum which is applied on every minibatch_size = 64 samples momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size) # We use a variant of the Adam optimizer which is known to work well on this dataset # Feel free to try other optimizers from # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner learner = C.fsadagrad(model.parameters, lr=lr_schedule, momentum=momentum_schedule) # Instantiate the trainer progress_printer = C.logging.ProgressPrinter(0) trainer = C.Trainer(model, (loss, label_error), learner, progress_printer) # Map the data streams to the input and labels. # Note: for autoencoders input == label input_map = { input: reader_train.streams.features, label: reader_train.streams.features } aggregate_metric = 0 for i in range(num_minibatches_to_train): # Read a mini batch from the training data file data = reader_train.next_minibatch(minibatch_size, input_map=input_map) # Run the trainer on and perform model training trainer.train_minibatch(data) samples = trainer.previous_minibatch_sample_count aggregate_metric += trainer.previous_minibatch_evaluation_average * samples train_error = (aggregate_metric * 100.0) / (trainer.total_number_of_samples_seen) print("Average training error: {0:0.2f}%".format(train_error)) ############################################################################# # Testing the model # Note: we use a test file reader to read data different from a training data ############################################################################# # Test data for trained model test_minibatch_size = 32 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size # Test error metric calculation metric_numer = 0 metric_denom = 0 test_input_map = { input: reader_test.streams.features, label: reader_test.streams.features } for i in range(0, int(num_minibatches_to_test)): # We are loading test data in batches specified by test_minibatch_size # Each data point in the minibatch is a MNIST digit image of 784 dimensions # with one pixel per dimension that we will encode / decode with the # trained model. data = reader_test.next_minibatch(test_minibatch_size, input_map=test_input_map) # Specify the mapping of input variables in the model to actual # minibatch data to be tested with eval_error = trainer.test_minibatch(data) # minibatch data to be trained with metric_numer += np.abs(eval_error * test_minibatch_size) metric_denom += test_minibatch_size # Average of evaluation errors of all test minibatches test_error = (metric_numer * 100.0) / (metric_denom) print("Average test error: {0:0.2f}%".format(test_error)) return model, train_error, test_error
def test_learner_init(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w = parameter(shape=(1,)) res = i * w #test new API: learning_parameter_schedule #explicitly specify reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=0.1, minibatch_size = 25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 25 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size = 25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20)) assert learner.is_compatible_mode() == False #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd.learning_rate() == 0.4 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum.learning_rate() == 0.4 myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta.learning_rate() == 0.4 myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam.learning_rate() == 0.4 myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad.learning_rate() == 0.4 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myfsadagrad.minibatch_size == 32 assert myfsadagrad._learning_rate_schedule.minibatch_size == 32 assert myfsadagrad.learning_rate() == 0.4 mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov.learning_rate() == 0.4 myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop.learning_rate() == 0.4 mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd._learning_rate_schedule[0] == 0.4 assert mysgd._learning_rate_schedule[512] == 0.1 assert mysgd._learning_rate_schedule[512 * 2] == 0.001 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum._learning_rate_schedule[0] == 0.4 assert mymomentum._learning_rate_schedule[512] == 0.1 assert mymomentum._learning_rate_schedule[512 * 2] == 0.001 myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta._learning_rate_schedule[0] == 0.4 assert myadadelta._learning_rate_schedule[512] == 0.1 assert myadadelta._learning_rate_schedule[512 * 2] == 0.001 myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam._learning_rate_schedule[0] == 0.4 assert myadam._learning_rate_schedule[512] == 0.1 assert myadam._learning_rate_schedule[512 * 2] == 0.001 myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov._learning_rate_schedule[0] == 0.4 assert mynesterov._learning_rate_schedule[512] == 0.1 assert mynesterov._learning_rate_schedule[512 * 2] == 0.001 myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32, epoch_size=512) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop._learning_rate_schedule[0] == 0.4 assert myrmsrop._learning_rate_schedule[512] == 0.1 assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum = C.momentum_schedule(0.999, minibatch_size=1) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size = 1) C.momentum_sgd(res.parameters, lr_per_sample, momentum) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum) C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) lr_per_sample = learning_parameter_schedule([0.1]*3 +[0.2]*2 +[0.3], minibatch_size=1) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_parameter_schedule([(3,0.1), (2, 0.2), (1, 0.3)], minibatch_size=1) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum) C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1, epoch_size = 100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample)
def main(): # We keep upto 14 inputs from a day TIMESTEPS = int(input("TIMESTEPS: ")) # 20000 is the maximum total output in our dataset. We normalize all values with # this so our inputs are between 0.0 and 1.0 range. NORMALIZE = int(input("NORMALIZE: ")) # process batches of 10 days BATCH_SIZE = int(input("BATCH_SIZE: ")) BATCH_SIZE_TEST = int(input("BATCH_SIZE_TEST: ")) # Specify the internal-state dimensions of the LSTM cell H_DIMS = int(input("H_DIMS: ")) data_source = input("Source(1=solar,2=local,3=sin,4=my): ") if data_source == "1" or data_source == "": X, Y = get_solar_old(TIMESTEPS, NORMALIZE) elif data_source == "2": X, Y = get_solar(TIMESTEPS, NORMALIZE) elif data_source == "3": X, Y = get_sin(5, 5, input("Data length: ")) else: X, Y = get_my_data(H_DIMS, H_DIMS) epochs = input("Epochs: ") if epochs == "": EPOCHS = 100 else: EPOCHS = int(epochs) start_time = time.time() # input sequences x = C.sequence.input_variable(1) model_file = "{}_epochs.model".format(EPOCHS) if not os.path.exists(model_file): print("Training model {}...".format(model_file)) # create the model z = create_model(x, H_DIMS) # expected output (label), also the dynamic axes of the model output # is specified as the model of the label input var_l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") # the learning rate learning_rate = 0.005 lr_schedule = C.learning_parameter_schedule(learning_rate) # loss function loss = C.squared_error(z, var_l) # use squared error to determine error for now error = C.squared_error(z, var_l) # use adam optimizer momentum_schedule = C.momentum_schedule(0.9, minibatch_size=BATCH_SIZE) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule) trainer = C.Trainer(z, (loss, error), [learner]) # training loss_summary = [] start = time.time() for epoch in range(0, EPOCHS): for x_batch, l_batch in next_batch(X, Y, "train", BATCH_SIZE): trainer.train_minibatch({x: x_batch, var_l: l_batch}) if epoch % (EPOCHS / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f}".format(epoch, training_loss)) print("Training took {:.1f} sec".format(time.time() - start)) # Print the train, validation and test errors for labeltxt in ["train", "val", "test"]: print("mse for {}: {:.6f}".format( labeltxt, get_mse(trainer, x, X, Y, BATCH_SIZE, var_l, labeltxt))) z.save(model_file) else: z = C.load_model(model_file) x = cntk.logging.find_all_with_name(z, "")[-1] # Print out all layers in the model print("Loading {} and printing all nodes:".format(model_file)) node_outputs = cntk.logging.find_all_with_name(z, "") for n in node_outputs: print(" {}".format(n)) # predict # f, a = plt.subplots(2, 1, figsize=(12, 8)) for j, ds in enumerate(["val", "test"]): fig = plt.figure() a = fig.add_subplot(2, 1, 1) results = [] for x_batch, y_batch in next_batch(X, Y, ds, BATCH_SIZE_TEST): pred = z.eval({x: x_batch}) results.extend(pred[:, 0]) # because we normalized the input data we need to multiply the prediction # with SCALER to get the real values. a.plot((Y[ds] * NORMALIZE).flatten(), label=ds + " raw") a.plot(np.array(results) * NORMALIZE, label=ds + " pred") a.legend() fig.savefig("{}_chart_{}_epochs.jpg".format(ds, EPOCHS)) print("Delta: ", time.time() - start_time)
(([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0), (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0), ] MOMENTUM_SCHEDULE_PARAMS = [ ((0.2,), [0.2]), ((0.2,), [0.2, 0.2, 0.2, 0.2]), (([0.2,0.4], 5), [0.2]*5+[0.4]*20), (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)), lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params, lr=learning_parameter_schedule(1)), lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))] @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY) def test_learning_rate_schedule(params, expectation, minibatch_size): l = learning_rate_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS) def test_learning_parameter_schedule(params, expectation, minibatch_size): l = learning_parameter_schedule(*params) assert l.minibatch_size == minibatch_size