def __init__(self, sess, args): ''' the main neural network model class ''' #self.config = vars(args) self.x = tf.placeholder(tf.float32, [None, feature_dim], name="input") self.y_ = tf.placeholder(tf.float32, [None, output_dim], name="output") self.is_training = tf.placeholder(tf.bool) ## for the augmented data self.x1 = tf.placeholder(tf.float32, [None, feature_dim], name="input") self.class_label = tf.placeholder (tf.float32, [None], name="condition_checking") self.layer_sizes = [args.net1_h1, args.net1_h2] ## build the model self.y = am_util.build_model(self.x, self.layer_sizes, self.is_training, output_dim, None) # reuse none so that the variables are created self.prob = tf.nn.softmax(self.y, name='prob') self.pred = tf.arg_max(self.prob, 1, name='pred') ##accuarcy self.correct_predictions = tf.equal(self.pred, tf.argmax(self.y_, 1)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, "float"), name="accuracy") #loss and optimizer self.loss_f = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.y, labels=self.y_)) self.optimizer_f = tf.train.AdamOptimizer(args.lr, name = "opt1").minimize(self.loss_f) ## build all the summaries and writers self.summaries = tf.summary.merge(self.get_summaries()) self.train_summary_writer = tf.summary.FileWriter("%s/logs/train" % output_dir, sess.graph, flush_secs=60) self.val_summary_writer = tf.summary.FileWriter("%s/logs/val" % output_dir, sess.graph, flush_secs=60) self.test_summary_writer = tf.summary.FileWriter("%s/logs/test" % output_dir, sess.graph, flush_secs=60) # build a saver object self.saver = tf.train.Saver(tf.global_variables() + tf.local_variables()) # for augmented data self.y1 = am_util.build_model(self.x1, self.layer_sizes, self.is_training, output_dim, True) # reuse true so that the variables are shared from previosly builded network self.cond = tf.reduce_sum(tf.squared_difference(self.y1, self.y), 1) self.row_index = tf.where(self.class_label > 0) self.y1_filterred = tf.squeeze(tf.gather(self.y1, self.row_index)) self.y_filtered = tf.squeeze(tf.gather(self.y, self.row_index)) self.is_empty = tf.equal(tf.size(self.row_index), 0) self.loss_y_y1 = output_dim * tf.reduce_mean(tf.squared_difference(self.y_filtered, self.y1_filterred), name="loss_f_G") self.loss_y_y1_filtered = tf.cond(tf.cast(self.is_empty, tf.bool), lambda: tf.constant(0, tf.float32), lambda:self.loss_y_y1) #then corresponding loss is zero, in this way avoid nan self.y__filtered = tf.squeeze(tf.gather(self.y_, self.row_index)) self.loss_fx_y= tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.y1_filterred, labels=self.y__filtered), name = "filtered_reg") self.loss_fx_y_filtered = tf.cond(tf.cast(self.is_empty, tf.bool), lambda: tf.constant(0, tf.float32), lambda:self.loss_fx_y) #then corresponding loss is zero, in this way avoid nan #final loss self.final_reg = tf.add(args.reg_param1 * self.loss_y_y1_filtered, args.reg_param2 * self.loss_fx_y_filtered, name="loss_final") self.loss_final = tf.add(self.final_reg, self.loss_f, name="loss_final") self.optimizer_final = tf.train.AdamOptimizer(args.lr, name = "opt2").minimize(self.loss_final)
def run(): #load data in dataframe data = util.get_dataset() # print(data.head()) # print(data.tail()) weighted_price = data.Weighted_Price.values.astype('float32') # print(weighted_price) weighted_price = weighted_price.reshape(len(weighted_price), 1) # print(weighted_price) #scale data scaler = MinMaxScaler(feature_range=(0, 1)) data_scaled = scaler.fit_transform(weighted_price) # print(data_scaled) look_back = 5 train_set, test_set = util.split_data(data_scaled, train_percentage=0.85) x_train, y_train = util.create_labels(train_set, look_back=5) x_test, y_test = util.create_labels(test_set, look_back=5) model = util.build_model() history = util.train_model(model, x_train, y_train) util.plot_training_history(history) model.load_weights('saved_models/weights.best.lstm.hdf5')
def cnn_regression(train_set, test_set, train_labels, test_labels, step=0.001, max_epochs=100, batch_size=64, activation=tf.nn.tanh, tolerance=0, reg_coeff=0, is_initial=True): tf.reset_default_graph() sample_len = len(train_set.columns) W1, b1 = get_variables(kind="conv") W2, b2 = get_variables(kind="FC", size_in=int(sample_len/2), size_out=int(sample_len/4)) W3, b3 = get_variables(kind="FC", size_in=int(sample_len/4), size_out=1) reg_term = tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3) x, y = tf.placeholder(tf.float32, [None, sample_len]), tf.placeholder(tf.float32, [None, 1]) x_reshaped = tf.reshape(x, [-1, 1, sample_len, 1]) x1 = activation(max_pool(conv2d(x_reshaped, W1, stride = 1) + b1)) x1 = tf.reshape(x1, [-1, int(sample_len/2)]) x2 = activation(tf.matmul(x1, W2) + b2) x3 = tf.matmul(x2, W3) + b3 loss = tf.reduce_mean(tf.square(x3 - y)) + reg_coeff*reg_term train = tf.train.RMSPropOptimizer(step).minimize(loss, var_list=[W1, W2, W3, b1, b2, b3]) saver = tf.train.Saver() save_path='path' init = tf.global_variables_initializer() with tf.Session() as sess: build_model(sess, saver, init, is_initial, save_path) test_loss, count = np.inf, 0 for epoch in range(max_epochs): new_test_loss = loss.eval(feed_dict={x: test_set, y: test_labels}) count = count+1 if new_test_loss > test_loss else 0 if count > tolerance: print("Overfitting, early stopping...") break test_loss = new_test_loss for batch in range(int(len(train_set.index) / batch_size)): sess.run(train, feed_dict={x: train_set.iloc[batch*batch_size:batch*batch_size+batch_size], y: train_labels.iloc[batch*batch_size:batch*batch_size+batch_size]}) # Uncomment to track train loss # if epoch % 10 == 0: # print("epoch: %g, train loss: %g" % (epoch, loss.eval(feed_dict={x: train_set, y: train_labels}))) saver.save(sess, save_path=save_path) print("Training finished and saved. Calculating results...") pred = np.reshape(x3.eval(feed_dict={x: test_set, y: test_labels}), len(test_labels.index)) score = mean_squared_error(pred, np.reshape(test_labels.values, len(test_labels.index))) print("Done. Averaged test loss: %f" % score) return pred
def FC_autoencoder(train_set, test_set, encoding_len=None, step=0.001, max_epochs=100, batch_size=64, activation=tf.nn.tanh, tolerance=0, reg_coeff=0, is_initial=True): tf.reset_default_graph() sample_len = len(train_set.columns) if encoding_len == None: encoding_len = int(np.sqrt(sample_len)) W1, b1 = get_variables(kind="FC", size_in=sample_len, size_out=encoding_len) W2, b2 = get_variables(kind="FC", size_in=encoding_len, size_out=sample_len) reg_term = tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) x = tf.placeholder(tf.float32, [None, sample_len]) x_encoded = activation(tf.matmul(x, W1) + b1) x_decoded = tf.matmul(x_encoded, W2) + b2 loss = tf.reduce_mean(tf.square(x_decoded - x)) + reg_coeff*reg_term train = tf.train.RMSPropOptimizer(step).minimize(loss, var_list=[W1, W2, b1, b2]) saver = tf.train.Saver() save_path='path' init = tf.global_variables_initializer() with tf.Session() as sess: build_model(sess, saver, init, is_initial, save_path) test_loss, count = np.inf, 0 for epoch in range(max_epochs): new_test_loss = loss.eval(feed_dict={x: test_set}) count = count+1 if new_test_loss > test_loss else 0 if count > tolerance: print("Overfitting, early stopping...") break test_loss = new_test_loss for batch in range(int(len(train_set.index) / batch_size)): sess.run(train, feed_dict={x: train_set.iloc[batch*batch_size:batch*batch_size+batch_size]}) # Uncomment to track train loss # if epoch % 10 == 0: # print("epoch: %g, train loss: %g" % (epoch, loss.eval(feed_dict={x: train_set}))) saver.save(sess, save_path=save_path) print("Training finished and saved. Calculating results...") pred = np.reshape(x_decoded.eval(feed_dict={x: test_set}), [len(test_set.index), sample_len]) score = mean_squared_error(pred, np.reshape(test_set.values, [len(test_set.index), sample_len])) print("Done. Averaged test loss: %f" % score) encoded = np.reshape(x_encoded.eval(feed_dict={x: test_set}), [len(test_set.index), encoding_len]) return pd.DataFrame(data=encoded, index=test_set.index), pd.DataFrame(data=pred, index=test_set.index, columns=test_set.columns)