def validation_measure(data_index, which_measure): x_array, t_array = convert.concat_examples(validation[data_index]) x = chainer.Variable(x_array) y_validation_predict = model.forward_2(x).data if data_set_name[data_index] in ['cocnas', 'maxwell', 'opens']: y_validation_predict = np.power(math.e, y_validation_predict) t_array = np.power(math.e, t_array) if which_measure == 'Pred(25)': return criteria.pred25(t_array, y_validation_predict) elif which_measure == 'MdAE': return criteria.mae(t_array, y_validation_predict) elif which_measure == 'SA': return criteria.sa(t_array, y_validation_predict) elif which_measure == 'RE*': return criteria.re(t_array, y_validation_predict) else: return None
def main(encoder_n_units=32, common_size=16, regression_n_units=32, discriminator_n_units=32, batch_size_def=None, epoch=3000, data_set_name=None, validation_patience_original=1000, train_size=0.7, save_code=False): if batch_size_def is None: batch_size_def = [6, 20] if data_set_name is None: data_set_name = ['china', 'kitchenham'] print("---------------------------------------------Reading data...-----------------------------------------------") train = [] validation = [] test = [] in_size = [] x_train = [] y_train = [] x_validation = [] y_validation = [] x_test = [] y_test = [] for i in range(len(data_set_name)): a_name = data_set_name[i] a_train, a_validation, a_test, a_in_size, a_x_train, a_y_train, a_x_validation, a_y_validation, a_x_test, \ a_y_test = read_data_validation.get_train_and_test(dataset=a_name, train_size=train_size, validation_size=0.5) train.append(a_train) validation.append(a_validation) test.append(a_test) in_size.append(a_in_size) x_train.append(a_x_train) y_train.append(a_y_train) x_validation.append(x_validation) y_validation.append(y_validation) x_test.append(a_x_test) y_test.append(a_y_test) # Prepare the train iter. train_iter = [] for i in range(len(data_set_name)): a_train_iter = chainer.iterators.SerialIterator(train[i], batch_size_def[i]) train_iter.append(a_train_iter) # Build model print("---------------------------------------Building model...---------------------------------------------------") model = EncoderRegressionModel(in_size=in_size, encoder_n_units=encoder_n_units, regression_n_units=regression_n_units, common_out_size=common_size) model_optimizer = chainer.optimizers.Adam() model_optimizer.setup(model) # Build Discriminator discriminator = Discriminator(common_size, discriminator_n_units) discriminator_optimizer = chainer.optimizers.SGD(lr=0.001) discriminator_optimizer.setup(discriminator) # --------------------------------Measures: pred、MdAE、SA、RE*----------------------------------------------------- def test_measure(data_index, which_measure): x_array, t_array = convert.concat_examples(test[data_index]) x = chainer.Variable(x_array) y_test_predict = model.forward_2(x).data if data_set_name[data_index] in ['cocnas', 'maxwell', 'opens']: y_test_predict = np.power(math.e, y_test_predict) t_array = np.power(math.e, t_array) if which_measure == 'Pred(25)': return criteria.pred25(t_array, y_test_predict) elif which_measure == 'MdAE': return criteria.mae(t_array, y_test_predict) elif which_measure == 'SA': return criteria.sa(t_array, y_test_predict) elif which_measure == 'RE*': return criteria.re(t_array, y_test_predict) else: return None def validation_measure(data_index, which_measure): x_array, t_array = convert.concat_examples(validation[data_index]) x = chainer.Variable(x_array) y_validation_predict = model.forward_2(x).data if data_set_name[data_index] in ['cocnas', 'maxwell', 'opens']: y_validation_predict = np.power(math.e, y_validation_predict) t_array = np.power(math.e, t_array) if which_measure == 'Pred(25)': return criteria.pred25(t_array, y_validation_predict) elif which_measure == 'MdAE': return criteria.mae(t_array, y_validation_predict) elif which_measure == 'SA': return criteria.sa(t_array, y_validation_predict) elif which_measure == 'RE*': return criteria.re(t_array, y_validation_predict) else: return None # train def discriminator_loss_fun(x1, x2, y1, y2): y1_hat = discriminator(x1) y1_hat = y1_hat.reshape(len(y1_hat)) loss1 = f.sigmoid_cross_entropy(y1_hat, y1) y2_hat = discriminator(x2) y2_hat = y2_hat.reshape(len(y2_hat)) loss2 = f.sigmoid_cross_entropy(y2_hat, y2) loss = loss1 + loss2 dis_loss.append(loss.data) return loss def loss_fun(x1, x2, y2, label1, label2): encoder_1_output_def, encoder_2_output_def, decoder_1_output, regression_2_output_def = model(x1, x2) regression_2_output_def = regression_2_output_def.reshape((len(regression_2_output_def), 1)) decoder_1_loss = f.mean_absolute_error(decoder_1_output, x1) regression_2_loss = f.mean_absolute_error(regression_2_output_def, y2) # Generator loss y1_hat = discriminator(encoder_1_output_def) y1_hat = y1_hat.reshape(len(y1_hat)) encoder_1_loss = f.sigmoid_cross_entropy(y1_hat, label1) y2_hat = discriminator(encoder_2_output_def) y2_hat = y2_hat.reshape(len(y2_hat)) encoder_2_loss = f.sigmoid_cross_entropy(y2_hat, label2) loss = decoder_1_loss + regression_2_loss * 2 + encoder_1_loss + encoder_2_loss loss_all.append(loss.data) # print("Generator loss = ", loss.data) return loss print("----------------------------------------------------Training...--------------------------------------------") chainer.using_config('train', True) running = True # Storage the loss loss_all = [] dis_loss = [] validation_frequency = 1 validation_patience = validation_patience_original # Measures: [pred、MdAE、SA、RE*] best_validation = [0, 0, 0, 0] best_test = [0, 0, 0, 0] # running while running: running_count = 0 for i in range(len(data_set_name)): if train_iter[i].epoch < epoch: running_count += 1 if running_count == 0: running = False # get batch batch1 = train_iter[0].next() x_array, t_array = convert.concat_examples(batch1) input_x1 = chainer.Variable(x_array) input_y1 = chainer.Variable(t_array) batch2 = train_iter[1].next() x_array, t_array = convert.concat_examples(batch2) input_x2 = chainer.Variable(x_array) input_y2 = chainer.Variable(t_array) # Train Discriminator on the real data encoder_1_output, encoder_2_output, regression_1_output, regression_2_output = model(input_x1, input_x2) zeros = np.zeros(len(encoder_1_output), dtype=np.int32) ones = np.ones(len(encoder_2_output), dtype=np.int32) discriminator_optimizer.update(discriminator_loss_fun, encoder_1_output, encoder_2_output, zeros, ones) # Train Generator zeros = np.zeros(len(encoder_2_output), dtype=np.int32) ones = np.ones(len(encoder_1_output), dtype=np.int32) model_optimizer.update(loss_fun, input_x1, input_x2, input_y2, ones, zeros) # validation validation_patience -= 1 if train_iter[1].epoch % validation_frequency == 0: # compute pred25 validation_pred25 = validation_measure(1, "Pred(25)") if validation_pred25 >= best_validation[0]: best_validation[0] = validation_pred25 # test on the test dataset test_pred25 = test_measure(1, "Pred(25)") if test_pred25 > best_test[0]: best_test[0] = test_pred25 # save model if True: serializers.save_npz('../models/multi_' + data_set_name[1] + '.model', model) validation_patience = validation_patience_original if validation_patience == 0: break chainer.using_config('train', False) print("-------------------------------------- Train finished -----------------------------------------------------") # Save Code, including the train, validation and test. if save_code is True: data_index = 0 x_array, t_array = convert.concat_examples(train[data_index]) x = chainer.Variable(x_array) code_train = model.encoder1_forward(x) data1_train_code = code_train.data x_array, t_array = convert.concat_examples(validation[data_index]) x = chainer.Variable(x_array) code_validation = model.encoder1_forward(x) data1_validation_code = code_validation.data x_array, t_array = convert.concat_examples(test[data_index]) x = chainer.Variable(x_array) code_test = model.encoder1_forward(x) data1_test_code = code_test.data data_index = 1 x_array, t_array = convert.concat_examples(train[data_index]) x = chainer.Variable(x_array) code_train = model.encoder2_forward(x) data2_train_code = code_train.data x_array, t_array = convert.concat_examples(validation[data_index]) x = chainer.Variable(x_array) code_validation = model.encoder2_forward(x) data2_validation_code = code_validation.data x_array, t_array = convert.concat_examples(test[data_index]) x = chainer.Variable(x_array) code_test = model.encoder2_forward(x) data2_test_code = code_test.data code = np.vstack((data1_train_code, data1_validation_code, data1_test_code, data2_train_code, data2_validation_code, data2_test_code)) print("Code.shape is ", code.shape) gen_data = pandas.DataFrame(code) gen_data.to_csv('./data/prevModel.csv') print('---------------------------------------------Criteria Test------------------------------------------------') data_index = 1 x_array, t_array = convert.concat_examples(test[data_index]) x = chainer.Variable(x_array) y_predict_data = model.forward_2(x).data best_test[0] = criteria.pred25(t_array, y_predict_data) best_test[1] = criteria.mae(t_array, y_predict_data) best_test[2] = criteria.sa(t_array, y_predict_data) best_test[3] = criteria.re(t_array, y_predict_data) return best_test