def main(_): assert sum([FLAGS.train, FLAGS.predict, FLAGS.eval]) == 1 if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.log_dir): os.makedirs(FLAGS.log_dir) # config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5)) # config.gpu_options.allow_growth = True os.environ["CUDA_VISIBLE_DEVICES"] = '1' # with tf.Session(config=config) as sess: with tf.Session() as sess: dnn = DNN(sess, FLAGS) if FLAGS.train: # os.environ["CUDA_VISIBLE_DEVICES"] = '1' dnn.fit() elif FLAGS.predict: dnn.load_network() samples = np.array( pd.read_csv('dataset/gen_samples.csv', header=None)) gen_y = samples[:, -1] predict = dnn.predict(np.delete(samples, -1, 1)) # assert gen_y.shape[0] == predict.shape[0] # print 'Accuracy: {}%'.format((predict == gen_y).sum() / float(predict.shape[0]) * 100) samples = samples[gen_y != predict] pd.DataFrame(samples).to_csv('dataset/gen_samples.csv', index=False, header=None) elif FLAGS.eval: dnn.load_network() dnn.eval()
def main(): options = { 'learning_rate': 0.1, 'beta1': 0.9, 'optimizer': 'gd', 'loss': 'crossentropy' } train_x, test_x, train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_data( ) X = np.array([[1, 2], [1, 2], [4, 2]]) Y = np.array([[0], [0], [0]]) print(train_x.shape) print(test_x.shape) print(train_set_y_orig.shape) print(train_set_y_orig[0, 0:10]) layers = [ Dense(32, activation='relu'), Dense(5, activation='relu'), Dense(1, activation='sigmoid') ] print(len(layers)) dnn = DNN(train_x, train_set_y_orig, layers, options) print(dnn.params.keys()) #for param in sorted(dnn.params): # print(param, dnn.params[param].shape) print(dnn) print(dnn.loss(dnn.predict(test_x), test_set_y_orig)) dnn.train()
#print "mrse mean {0}".format(mrse(mean, target)) #print "rmse mean {0}".format(rmse(mean, target)) #for i in range(10): # d = matrix[i] # t = target[i] # pred = full.activate(d) #print "Prediction: {0} for data {1} target: {2}".format(pred, d, t) # print "Prediction: {0} for target: {1}".format(pred, t) print "\n" for i in range(10): d = matrix[i] t = target[i] pred = dnn.predict(d) #print "Prediction: {0} for data {1} target: {2}".format(pred, d, t) print "Prediction: {0} for target: {1}".format(pred, t) ##### r = LinearRegression() r.fit(matrix, target) preds = [r.predict(d) for d in matrix] print "mrse preds {0}".format(mrse(preds, target)) print "rmse preds {0}".format(rmse(preds, target)) for i in range(10): d = matrix[i] t = target[i] pred = r.predict(d) #print "Prediction: {0} for data {1} target: {2}".format(pred, d, t)
class MountainCar: """ 定义预测出来的模型 """ def __init__(self, name='Goodone', net=None, train=0): """ 初始化 net: 训练的神经网络 verity:使用还是验证阶段 验证阶段,神经网络未训练 使用阶段,神经网络已训练 """ self.env = gym.make("MountainCarContinuous-v0") self.name = name self.simulation_step = 0.1 self.units = 50 self.ratio = 200 self.reset() if net: self.net = net else: self.net = DNN(1, 1, self.units, train=train, name=self.name) def save_samples(self, big_epis=100): """ 保存运行得到的数据 得到的数据有big_epis*3000行 """ record = [] for big_epi in range(big_epis): # 初始化 # 为了能够达到目标点 a = 0.0025 change = 100 observation = self.reset() for epi in range(10000): if epi % change == 0: u = self.action_sample() * 3 print(big_epi, int(20 * epi / 3000) * '=') observation_old = observation.copy() observation, _, done, _ = self.env.step(u) target = self._get_target(observation_old, observation, u) x = observation_old[0] # 保存真实值和计算得到的值,后期作为比较 # record.append([x, target, -a * math.cos(3 * x)]) record.append([x, target]) data = np.array(record) np.save(os.path.join(self.net.model_path0, 'memory.npy'), data) return data def verity_data(self): """ 验证数据集的正确性,画出两个自己计算出来的值和真实值的区别 """ import matplotlib.pyplot as plt import pandas as pd import seaborn as sns sns.set() self.data = self._load_data() data_size = len(self.data) indexs = np.random.choice(data_size, size=int(data_size / 10)) df = pd.DataFrame(self.data[indexs, :], columns=['position', 'target_dot', 'real_dot']) plt.figure() plt.scatter(df['position'], df['target_dot'] * 1.1, s=5, label='target') # 为了显示出区别乘以1.1 plt.scatter(df['position'], df['real_dot'], s=5, label='real') plt.legend() plt.show() def train_model(self): """ 利用得到的数据对模型进行训练,首先对数据进行缩放,之后利用神经网络进行拟合 """ # 训练 data = self._load_data() data[:, 1:] = data[:, 1:] * self.ratio self.net.learn_data(data) self.net.store_net() def verity_net_1(self): """ 验证神经网络的正确性 """ a = 0.0025 x_ = np.arange(-1.1, 0.5, 0.001) y_tru = -a * np.cos(3 * x_) y_pre = self.net.predict(x_.reshape((-1, 1))) / self.ratio # 验证对所有的x的拟合情况 fig = plt.figure() plt.plot(x_, y_tru, label='x_tru') plt.plot(x_, y_pre, label='x_pre') plt.legend() y_tru_dot = 3 * a * np.sin(3 * x_) y_pre_dot = self.net.predict_dot(x_.reshape( (-1, 1)))[:, 0] / self.ratio # y_pre_dot = self.net.predict_dot(x_.reshape((-1, 1)))[:, 0] # 验证对所有的x_dot的拟合情况 fig = plt.figure() plt.plot(x_, y_tru_dot, label='x_dot_tru') plt.plot(x_, y_pre_dot, label='x_dot_pre') plt.legend() plt.show() def verity_net_2(self): """ 验证神经网络的正确性2 与真实系统的的比较 """ observation_record = [] observation_record_net = [] time_record = [] observation = self.reset() observation_net = observation change = 100 time = 0 epi = 0 while True: observation_record.append(observation) observation_record_net.append(observation_net) time_record.append(time) if epi % change == 0: action = self.action_sample() * 3 epi += 1 observation, _, done, info = self.env.step(action) observation_net, _, done_net, info_net = self.step(action) time += self.simulation_step print(observation, observation_net) if done_net: break observation_record = np.array(observation_record) observation_record_net = np.array(observation_record_net) time_record = np.array(time_record) plt.figure(1) plt.plot(time_record, observation_record[:, 0], label='x_ture') plt.plot(time_record, observation_record_net[:, 0], label='x_pre') plt.xlabel('Time(s)') plt.ylabel('Xposition') plt.plot(time_record, 0.45 * np.ones(len(observation_record)), 'r') plt.legend() plt.figure(2) plt.plot(time_record, observation_record[:, 1], label='v_ture') plt.plot(time_record, observation_record_net[:, 1], label='v_pre') plt.xlabel('Time(s)') plt.ylabel('Vspeed') plt.legend() plt.show() def _load_data(self): """ 将最开始得到的数据读取出来 :return: """ data = np.load(os.path.join(self.net.model_path0, 'memory.npy')) return data def action_sample(self): """ 随机选取符合环境的动作 """ return self.env.action_space.sample() def reset(self): """ 利用原始问题的初始化,随机初始化 """ self.state = self.env.reset() return self.state def step(self, action): """ 利用神经网络进行模型辨识 """ action = min(max(action, -1.0), 1.0) x, v = self.state # 神经网络得到的导数 dot = self.get_dot(self.state) v_dot = 0.0015 * action + dot[0] v = v + v_dot * self.simulation_step v = min(max(v, -0.07), 0.07) # 通过v计算x x = x + self.simulation_step * v x = min(max(x, -1.2), 0.6) X = np.array([x, v]) if X.ndim == 2: X = X.reshape((2, )) self.state = X # 返回参数 info = {} done = {} reward = {} if x >= 0.45: done = True return self.state, reward, done, info def step_true(self, action): """ 利用原进行模型辨识 """ action = min(max(action, -1.0), 1.0) x, v = self.state # 神经网络得到的导数 # dot = self.get_dot(self.state) v_dot = 0.0015 * action - 0.0025 * math.cos(3 * x) v = v + v_dot * self.simulation_step v = min(max(v, -0.07), 0.07) # 通过v计算x x = x + self.simulation_step * v x = min(max(x, -1.2), 0.6) X = np.array([x, v]) if X.ndim == 2: X = X.reshape((2, )) self.state = X # 返回参数 info = {} done = {} reward = {} if x >= 0.45: done = True return self.state, reward, done, info def get_dot(self, X): return self.net.predict(X[0:1])[0] / self.ratio def get_dot2(self, X): return self.net.predict_dot(X[0:1])[0] / self.ratio def _get_target(self, X, X_new, u): """ 得到神经网络需要的真实值 首先求真实的导数,之后计算真实值 """ u = min(max(u, -1.0), 1.0) return (((X_new - X) / self.simulation_step)[1] - u * 0.0015)
class DQN: def __init__(self, num_actions, observation_shape, dqn_params, cnn_params, folder): self.num_actions = num_actions self.observation_shape= observation_shape self.cnn_params = cnn_params self.folder = folder self.epsilon = dqn_params['epsilon'] self.gamma = dqn_params['gamma'] self.mini_batch_size = dqn_params['mini_batch_size'] self.time_step = 0 self.decay_rate = dqn_params['decay_rate'] self.epsilon_min = dqn_params['epsilon_min'] self.current_epsilon = self.epsilon self.use_ddqn = dqn_params['use_ddqn'] self.print_obs = dqn_params['print_obs'] self.print_reward = dqn_params['print_reward'] self.startTraining = False #memory for printing reward and observations self.memory = deque(maxlen=1000) #PER memory self.per_memory = Memory(dqn_params['memory_capacity']) #initialize network self.model = DNN(folder, num_actions, observation_shape, cnn_params) print("model initialized") #extra network for Double DQN if self.use_ddqn == 1: self.target_model = CNN(folder, num_actions, observation_shape, cnn_params) def select_action(self, observation, iterations): #epislon decay if(iterations%1000==0): self.current_epsilon = self.epsilon_min + (self.epsilon - self.epsilon_min) * np.exp(-self.decay_rate * self.time_step) self.time_step += 1 #ue.log('Trainable TF conv variables: ' + str(self.model.conv_kernels)) if random.random() < self.current_epsilon: # with epsilon probability select a random action action = np.random.randint(0, self.num_actions) else: # select the action a which maximizes the Q value obs = np.array([observation]) q_values = self.model.predict(obs) action = np.argmax(q_values) return action def update_state(self, action, observation, new_observation, reward, done): #this an experience that we save in memory transition = {'action': action, 'observation': observation, 'new_observation': new_observation, 'reward': reward, 'is_done': done} #memory for observation and reward saving for outprints self.memory.append(transition) #PER memory for observation used to train self.per_memory.store(transition) def train_step(self): """ Updates the model based on the mini batch from PER """ if self.startTraining == True: tree_idx, mini_batch = self.per_memory.sample(self.mini_batch_size) new_states = np.zeros((self.mini_batch_size, self.observation_shape[0])) old_states = np.zeros((self.mini_batch_size, self.observation_shape[0])) actionsBatch = [] for i in range(self.mini_batch_size): new_states[i] = mini_batch[i]['new_observation'] old_states[i] = mini_batch[i]['observation'] actionsBatch.append(mini_batch[i]['action']) target = self.model.predict(old_states) target_old = np.array(target) target_next = self.model.predict(new_states) target_val = 0 if self.use_ddqn: target_val = self.target_model.predict(new_states) Xs = [] ys = [] actions = [] for i in range(self.mini_batch_size): y_j = mini_batch[i]['reward'] if not mini_batch[i]['is_done']: q_new_values = target_next[i] action = np.max(q_new_values) actionIndex = np.argmax(q_new_values) if self.use_ddqn == 1: y_j += self.gamma*target_val[i][actionIndex] else: y_j += self.gamma*target_next[i][actionIndex] action = np.zeros(self.num_actions) action[mini_batch[i]['action']] = 1 observation = mini_batch[i]['observation'] Xs.append(observation.copy()) ys.append(y_j) actions.append(action.copy()) #Seting up for training Xs = np.array(Xs) ys = np.array(ys) actions = np.array(actions) #Updateing PER bintree indices = np.arange(self.mini_batch_size, dtype=np.int32) actionsInt = np.array(actionsBatch, dtype=int) absolute_errors = np.abs(target_old[indices, actionsInt]-ys[indices]) # Update priority self.per_memory.batch_update(tree_idx, absolute_errors) self.model.train_step(Xs, ys, actions) self.model.train_step(Xs, ys, actions) def saveBatchReward(self, iterations): #Need this on set iteration update #self.target_model = CNN(self.folder, self.num_actions, self.observation_shape, self.cnn_params) os = "" r = 0 it = iterations/1000 index = 0 if self.print_obs == 1 or self.print_reward == 1: for x in range(len(self.memory)): t = self.memory[x] r += t['reward'] #For writing observations to file to use to calculate means and standarddiviations if self.print_obs == 1 and iterations > 1: for obs in t['observation']: os += str(obs) + "," os += "\n" if self.print_reward == 1: file = self.model.model_directory + "/plot.txt" try: f = open(file, "r") lines = f.read().splitlines() last_line = lines[-1] index = int(str(last_line.split(",")[0])) + 1 f.close() except: index = 1 ue.log("Saved: " + str(index) + ", Reward: " + str(r) + ", Epislon: " + str(self.current_epsilon)) f = open(file, "a+") f.write(str(index)+ "," + str(r) + "\n") f.close() #For writing observations to file to use to calculate means and standarddiviations if self.print_obs == 1: f = open(self.model.model_directory + "/observations.txt", "a+") f.write(os) f.close pass
# 训练模式 X = memory_norm[:, [0, 1]].copy() Y = memory_norm[:, 2:].copy() losses = [] for i in range(40000): sample_index = np.random.choice(len(X), size=500) batch_x = X[sample_index, :] batch_y = Y[sample_index, :] loss, mae = net.learn(batch_x, batch_y) losses.append(loss) print(i + 1, '射程预测平均误差是', mae * 100, 'km') plt.plot(losses) sample_index = np.random.choice(len(X), size=100) batch_x = X[sample_index, :] batch_y = Y[sample_index, :] batch_y_pre = net.predict(batch_x) error_y = net.unorm(np.hstack((batch_x, batch_y - batch_y_pre)))[:, -1] print(np.mean(np.abs(error_y))) net.store() plt.show() else: X = memory_norm[:, [0, 1]].copy() Y = memory_norm[:, 2:].copy() tes_num = 100 sample_index = np.random.choice(len(X), size=tes_num) batch_x = X[sample_index, :] batch_y = Y[sample_index, :] batch_y_pre = net.predict(batch_x) for i in range(tes_num): print( "第%d个数据,w = %f,v = %f,range_real = %f,range_pre = %f,delta_range = %f"
def train(data, path): os.system("mkdir -p " + path) x_train, y_train = data['x'], data['y'] #pdb.set_trace() x = tf.placeholder(tf.float32, [None, 11, input_dim], name='x') y = tf.placeholder(tf.float32, [None, input_dim], name='y') model = DNN() loss = model.loss(x, y) pred = model.predict(x) tf.add_to_collection('pred', pred) tf.add_to_collection('loss', loss) optimize = tf.train.AdamOptimizer(learning_rate=5e-5, beta1=0.9, beta2=0.999).minimize(loss) merged = tf.summary.merge_all() with tf.Session() as sess: start = time.time() init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() writer = tf.summary.FileWriter(path + 'logs', sess.graph) err_old = 100 for i in range(iter_num): err_new = 0 count = 0 for j in range(len(x_train)): idx = random.randint(0, len(x_train) - 1) #pdb.set_trace() xt = next_batch(x_train[idx]) yt = y_train[idx % len(y_train)][5:-5, :] k = 0 for k in range(0, len(xt), batch_num): xb = xt[k:k + batch_num] yb = yt[k:k + batch_num] _ = sess.run([optimize], feed_dict={x: xb, y: yb}) xb = xt[k:] yb = yt[k:] err, result = sess.run([loss, merged], feed_dict={ x: xb, y: yb }) err_new += err count += 1 if j % 100 == 0: writer.add_summary(result, len(x_train) * i + j) if err_new / count < err_old: err_old = err_new / count saver.save(sess, path + 'test_best_model') #print('Epoch [%4d] Iter [%4d] Time [%5.4f] \nLoss: [%.4f]' % # (i+1, j, time.time() - start, err)) print('Epoch [%4d] Time [%10.4f] Loss: [%.4f]: Saved ' % (i + 1, time.time() - start, err_new / count)) else: print('Epoch [%4d] Time [%5.4f] Loss: [%.4f]: No save ' % (i + 1, time.time() - start, err_new / count))
opts.W_init = W_init print "Build DNN structure..." dnn = DNN(opts) # training print "Start DNN training...(lr_decay = %s)" % (str(opts.lr_decay)) acc_all = [] lr = opts.learning_rate ts = time.time() for i in range(opts.epoch): dnn.train(X_train, Y_train, opts.batch_size, lr) acc = np.mean(np.argmax(Y_valid, axis=1) == dnn.predict(X_valid)) acc_all.append(acc) print "Epoch %d, lr = %.4f, accuracy = %f" % (i + 1, lr, acc) # dump intermediate model and log per 100 epoch if (np.mod((i + 1), 100) == 0): model_filename = os.path.join(opts.model_dir, "epoch%d.model" % (i + 1)) dnn_save_model(model_filename, dnn) log_filename = '../log/%s.log' % parameters print "Save %s" % log_filename np.savetxt(log_filename, acc_all, fmt='%.7f') lr *= opts.lr_decay