def SIN(max_epochs, learning_rate, no_hidden): np.random.seed(213) inputs = [] outputs = [] for i in range(0, 200): four_inputs_vector = list(np.random.uniform(-1.0, 1.0, 4)) four_inputs_vector = [float(four_inputs_vector[0]),float(four_inputs_vector[1]), float(four_inputs_vector[2]),float(four_inputs_vector[3])] inputs.append(four_inputs_vector) inputs=np.array(inputs) for i in range(200): outputs.append(np.sin([inputs[i][0] - inputs[i][1] + inputs[i][2] - inputs[i][3]])) no_in = 4 no_out = 1 NN = MLP(no_in, no_hidden, no_out) NN.randomise() print('\nMax Epoch:\n' + str(max_epochs), file=log) print('\nLearning Rate:\n' + str(learning_rate), file=log) print('\nBefore Training:\n', file=log) for i in range(150): NN.forward(inputs[i],'tanh') print('Target:\t{}\t Output:\t {}'.format(str(outputs[i]),str(NN.O)), file=log) print('Training:\n', file=log) # training process for i in range(0, max_epochs): error = 0 NN.forward(inputs[:150],'tanh') error = NN.backward(inputs[:150], outputs[:150],'tanh') NN.updateWeights(learning_rate) #prints error every 5% of epochs if (i + 1) % (max_epochs / 20) == 0: print(' Error at Epoch:\t' + str(i + 1) + '\t is \t' + str(error), file=log) difference=float(0) print('\n Testing :\n', file=log) for i in range(150, len(inputs)): NN.forward(inputs[i], 'tanh') print('Target:\t{}\t Output:\t {}'.format(str(outputs[i]), str(NN.O)), file=log) difference+=np.abs(outputs[i][0]-NN.O[0]) accuracy=1-(difference/50) accuracylist.append(accuracy) print('\nAccuracy:{}'.format(accuracy),file=log) print('\ntestError:{}'.format(difference/50),file=log)
def XOR(max_epochs, learning_rate): np.random.seed(1) inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) outputs = np.array([[0], [1], [1], [0]]) NI = 2 NH = 4 NO = 1 NN = MLP(NI, NH, NO) NN.randomise() print('\nMax Epoch:\n' + str(max_epochs), file=log) print('\nLearning Rate:\n' + str(learning_rate), file=log) print('\nBefore Training:\n', file=log) for i in range(len(inputs)): NN.forward(inputs[i], 'sigmoid') print('Target:\t {} Output:\t {}'.format(str(outputs[i]), str(NN.O)), file=log) print('\nTraining:\n', file=log) for i in range(0, max_epochs): NN.forward(inputs, 'sigmoid') error = NN.backward(inputs, outputs, 'sigmoid') NN.updateWeights(learning_rate) if (i + 1) % (max_epochs / 20) == 0: print(' Error at Epoch:\t' + str(i + 1) + '\t\t is \t\t' + str(error), file=log) print('\n After Training :\n', file=log) accuracy = float(0) for i in range(len(inputs)): NN.forward(inputs[i], 'sigmoid') print('Target:\t {} Output:\t {}'.format(str(outputs[i]), str(NN.O)), file=log) if (outputs[i][0] == 0): accuracy += 1 - NN.O[0] elif (outputs[i][0] == 1): accuracy += NN.O[0] print('\nAccuracy:{}'.format(accuracy / 4), file=log)
for i in range(0, 50): summed_vector_comps = numpy.sum(sin_inputs) sin_desired_output.append([numpy.sin(numpy.sum(sin_inputs[i]))]) sin_desired_output = numpy.array(sin_desired_output) # saves output to file with open( 'test_output/sin_output/sin_output_size_(' + str(INPUTS) + ', ' + str(HIDDEN) + ', ' + str(OUTPUTS) + ')_learning_rate_' + str(LEARNING_RATE) + '_epochs_' + str(MAX_EPOCHS) + '.txt', 'w') as f: print("\nPreTraining Testing:\n") f.write('\nPreTraining Testing:\n') for i in range(len(sin_inputs) - 10, len(sin_inputs)): mlp.forward(sin_inputs[i], True) print("Target:\t" + str(sin_desired_output[i]) + "\t\tOutput:\t" + str(mlp.o) + "\n") f.write('Target:\t' + str(sin_desired_output[i]) + '\t\tOutput:\t' + str(mlp.o) + '\n') f.write('MLP Size\t\t\t(' + str(INPUTS) + ', ' + str(HIDDEN) + ', ' + str(OUTPUTS) + ')\n') f.write('Epochs:\t\t\t\t' + str(MAX_EPOCHS) + '\n') f.write('Learning Rate:\t\t' + str(LEARNING_RATE) + '\n\n') print("Training:\n") f.write('Training:\n') for i in range(0, MAX_EPOCHS): error = 0 mlp.forward(sin_inputs[:len(sin_inputs) - 10], True)
mlp.randomise() # print(mlp) xor_inputs = numpy.array([[0, 0], [0, 1], [1, 0], [1, 1]]) xor_desired_output = numpy.array([[0], [1], [1], [0]]) # saves output to file with open( 'test_output/xor_output/xor_output_size_(' + str(INPUTS) + ', ' + str(HIDDEN) + ', ' + str(OUTPUTS) + ')_learning_rate_' + str(LEARNING_RATE) + '_epochs_' + str(MAX_EPOCHS) + '.txt', 'w') as f: print("\nPreTraining Testing:\n") f.write('\nPreTraining Testing:\n') for i in range(len(xor_inputs)): mlp.forward(xor_inputs[i], False) print("Target:\t" + str(xor_desired_output[i]) + "\t\tOutput:\t" + str(mlp.o) + "\n") f.write('Target:\t' + str(xor_desired_output[i]) + '\t\tOutput:\t' + str(mlp.o) + '\n') f.write('MLP Size\t(' + str(INPUTS) + ', ' + str(HIDDEN) + ', ' + str(OUTPUTS) + ')\n\n') f.write('Epochs:\t\t' + str(MAX_EPOCHS) + '\n') f.write('Learning Rate:\t' + str(LEARNING_RATE) + '\n\n') print("Training:\n") f.write('Training:\n') for i in range(0, MAX_EPOCHS): error = 0 mlp.forward(xor_inputs, False)
class DQN(): def __init__(self, env, alpha, gamma, episode_num, target_reward, step_count, minbatch, memory_size, flag): self.env = env self.alpha = alpha self.gamma = gamma self.episode_num = episode_num self.target_reward = target_reward self.step_count = step_count # self.test_step=test_step self.minbatch = minbatch self.memory_size = memory_size self.flag = flag self.Q = MLP() self.state_dim = env.observation_space.shape[0] self.action_dim = env.action_space.spaces[ 0].n * env.action_space.spaces[1].n # self.action_dim = env.action_space.n self.Q.creat2(self.state_dim, env.action_space.spaces[0].n, env.action_space.spaces[1].n) self.memory_num = 0 self.memory = np.zeros((memory_size, self.state_dim * 2 + 4)) self.optimizer = torch.optim.Adam(self.Q.parameters(), lr=alpha) self.loss_func = nn.MSELoss() # action1 action2 legal def action2set(self, action2s): actionLen = example.action2Len(action2s) action2Seleced = [] for index in range(actionLen): action2Seleced.append(example.get_action2(action2s, index)) return action2Seleced def getAction1(self, act1Set, action1_value): nonzeroind = np.nonzero(act1Set)[0] index = torch.LongTensor([nonzeroind]) action1_values = torch.gather(action1_value.data, 1, index) action1 = torch.max(Variable(action1_values), 1)[1].data.numpy()[0] action1 = nonzeroind[action1] return action1 def getAction1_random(self, act1Set): act1Set1 = np.nonzero(act1Set) action1 = choice(act1Set1[0]) return action1 def getAction2(self, candidate, action1, action2_value): action2 = example.getLegalAction2(candidate, action1) action2set_ = self.action2set(action2) action2_ = [] for index2 in range(50): action2_.append(0) for index in range(len(action2set_)): a = action2set_[index] action2_[a] = 1 nonzeroind2 = np.nonzero(action2_)[0] index2 = torch.LongTensor([nonzeroind2]) action2_values = torch.gather(action2_value.data, 1, index2) action2 = torch.max(Variable(action2_values), 1)[1].data.numpy()[0] action2 = nonzeroind2[action2] return action2 def getAction2_random(self, candidate, action1): action2_ = example.getLegalAction2(candidate, action1) action2set = self.action2set(action2_) action2 = choice(action2set) return action2 def getAction(self, action1, action2): action = [] action.append(action1) action.append(action2) action = tuple(action) return action def choose_action(self, state, episode, act1Set, candidate): # epsilon = 0.5 * (0.993) ** episode epsilon = 0.8 * (0.993)**episode if epsilon < 0.3: epsilon = 0.3 state = Variable(torch.unsqueeze(torch.FloatTensor(state), 0)) if np.random.uniform() > epsilon: # print("action by rl") action1_value, action2_value = self.Q.forward(state) #actions_value = self.Q.forward(state) action1 = torch.max(action1_value, 1)[1].data.numpy()[0] action2 = torch.max(action2_value, 1)[1].data.numpy()[0] # action1 = self.getAction1(act1Set, action1_value) # action2 = self.getAction2(candidate, action1, action2_value) action = self.getAction(action1, action2) else: # print("action randomly") action1 = random.randint(0, 34) action2 = random.randint(0, 49) # action1 = self.getAction1_random(act1Set) # action2 = self.getAction2_random(candidate, action1) action = self.getAction(action1, action2) return action def select_action(self, state, act1Set, candidate): ''' action = np.random.randint(0, self.action_dim) ''' state = Variable(torch.unsqueeze(torch.FloatTensor(state), 0)) action1_value, action2_value = self.Q.forward(state) action1 = self.getAction1(act1Set, action1_value) action2 = self.getAction2(candidate, action1, action2_value) action = self.getAction(action1, action2) return action def store_transition(self, state, action0, action1, reward, done, next_state): transition = np.hstack((state, [action0, action1, reward, done], next_state)) index = self.memory_num % self.memory_size self.memory[index, :] = transition self.memory_num += 1 def learn(self): sample = np.random.choice(self.memory_size, self.minbatch) batch = self.memory[sample, :] state_batch = Variable(torch.FloatTensor(batch[:, :self.state_dim])) action1_batch = Variable( torch.LongTensor(batch[:, self.state_dim:self.state_dim + 1].astype(int))) action2_batch = Variable( torch.LongTensor(batch[:, self.state_dim + 1:self.state_dim + 2].astype(int))) reward_batch = Variable( torch.FloatTensor(batch[:, self.state_dim + 2:self.state_dim + 3])) done_batch = Variable( torch.FloatTensor(batch[:, self.state_dim + 3:self.state_dim + 4].astype(int))) next_state_batch = Variable( torch.FloatTensor(batch[:, -self.state_dim:])) # q = self.Q(state_batch).gather(1, action_batch) q1 = self.Q(state_batch)[0].gather(1, action1_batch) q2 = self.Q(state_batch)[1].gather(1, action2_batch) q1_next = self.Q(next_state_batch)[0].detach() q2_next = self.Q(next_state_batch)[1].detach() q1_val = q1_next.max(1)[0].view(self.minbatch, 1) q2_val = q2_next.max(1)[0].view(self.minbatch, 1) if self.flag == 0: for i in range(len(done_batch)): if done_batch[i].data[0] == 1: q1_val[i] = 0 y1 = reward_batch + self.gamma * q1_val loss1 = self.loss_func(q1, y1) y2 = reward_batch + self.gamma * q2_val loss2 = self.loss_func(q2, y2) loss = loss1 + loss2 self.optimizer.zero_grad() loss.backward() self.optimizer.step() result = loss.data[0] return result def train_CartPole(self, label): loss = [] buf = StringIO.StringIO() # buf2 = StringIO.StringIO() buf3 = StringIO.StringIO() state_init, info_init = self.env.reset() for i_episode in range(self.episode_num): ep_r = 0 #if i_episode % 500 == 0: state_init, info_init = self.env.reset() state = state_init info_ = info_init print("new episode") actIndex = astEncoder.setAction1s(info_) loss_num = 0 epr = 0 buf2 = StringIO.StringIO() # buf2.write("episode: %d" % (i_episode)) for t in range(self.step_count): # env.render() action = self.choose_action(state, i_episode, actIndex, info_.candidate) next_state, reward, done, info_ = self.env.step(action) #if info_.fitness > 69: #state_init = next_state #info_init = info_ actIndex = astEncoder.setAction1s(info_) # spin_reward = example.spin_(info_.candidate) if done and example.get_fitness(info_.candidate) > 78.4: reward = self.target_reward print("i_ep" + str(i_episode) + " step:" + str(t) + " fitness" + str(example.get_fitness(info_.candidate))) spin_reward = example.spin_(info_.candidate) # buf2.write("program at i_ep %d: step:%d fitnessValue:%d\n" % (i_episode, t, example.get_fitness(info_.candidate))) if spin_reward == 20: buf.write("correct program at i_ep %d: step:%d \n" % (i_episode, t)) fo = open("./correctProg.txt", "a+") fo.write(buf.getvalue()) fo.close() if spin_reward == 5: print("liveness") if spin_reward == 10: print("safety") reward = reward + spin_reward self.store_transition(state, action[0], action[1], reward, done, next_state) epr += reward if self.memory_num > self.memory_size: loss_num += self.learn() if done: loss.append(loss_num / (t + 1)) state = next_state if t % 100 == 0: print("i_ep: ", i_episode, "step: ", t, "reward: ", epr) #buf2.write("i_ep %d: step:%d reward: %f\n" % (i_episode, t, ep_r)) fou = open("./rewards.txt", "a+") fou.write(buf2.getvalue()) fou.close() # fou1 = open("/Users/zhuang/workspace-gp/testSwig2/record.txt", "a+") # fou1.write(buf3.getvalue()) # fou1.close() # fo = open("/Users/zhuang/workspace-gp/testSwig2/foo.txt", "a+") # fo.write('state:\n' + str(state) + '\n' + 'action:\n' + str(action) + '\n' + 'reward:\n' + str( # reward) + '\n' + 'nextState:\n' + str(next_state) + '\n') # fo.close() def test(self, label): total_step = 0 x = [] y = [] total_reward = 0 rlist = [] for i_episode in range(1000): if i_episode == 9999: self.env = wrappers.Monitor(self.env, './video/DQN/' + label) state = self.env.reset() i_reward = 0 x.append(i_episode) for t in range(self.test_step): # self.env.render() action = self.select_action(state) next_state, reward, done, info = self.env.step(action) i_reward += reward if t == (self.test_step - 1): total_step += t + 1 y.append(i_reward) break if done: y.append(i_reward) total_step += t + 1 break state = next_state rlist.append(i_reward) total_reward += i_reward print('%d Episode finished after %f time steps' % (i_episode, t + 1)) ar = total_reward / (i_episode + 1) print('average reward:', ar) av = total_reward / (i_episode + 1) sum = 0 for count in range(len(rlist)): sum += (rlist[count] - av)**2 sr = math.sqrt(sum / len(y)) print('standard deviation:', sr) self.pic(x, y, label, 'Reward')
def letter(max_epochs, learning_rate): np.random.seed(1) inputs = [] outputs = [] doutput = [] columns = [ "letter", "x-box", "y-box", "width", "height", "onpix", "x-bar", "y-bar", "x2bar", "y2bar", "xybar", "x2ybr", "xy2br", "x-ege", "xegvy", "y-ege", "yegvx" ] df = pd.read_csv("letter-recognition.data", names=columns) doutput = df["letter"] for i in range(len(doutput)): outputs.append(ord(str(doutput[i])) - ord('A')) inputs = df.drop(["letter"], axis=1) inputs = np.array(inputs) inputs = inputs / 15 #normalization #train set inputs_train = inputs[:16000] categorical_y = np.zeros((16000, 26)) for i, l in enumerate(outputs[:16000]): categorical_y[i][l] = 1 outputs_train = categorical_y #test set inputs_test = inputs[16000:] # categorical_y = np.zeros((4000, 26)) # for i, l in enumerate(outputs[16000:]): # categorical_y[i][l] = 1 # outputs_test=categorical_y #training process no_in = 16 no_hidden = 10 no_out = 26 NN = MLP(no_in, no_hidden, no_out) NN.randomise() print('\nMax Epoch:\n' + str(max_epochs), file=log) print('\nLearning Rate:\n' + str(learning_rate), file=log) print('\nTraining Process:\n', file=log) for i in range(0, max_epochs): NN.forward(inputs_train, 'tanh') error = NN.backward(inputs_train, outputs_train, 'tanh') NN.updateWeights(learning_rate) if (i + 1) % (max_epochs / 20) == 0: print(' Error at Epoch:\t' + str(i + 1) + '\t is \t' + str(error), file=log) #testing process def to_character0(outputvector): listov = list(outputvector) a = listov.index(max(listov)) return chr(a + ord('A')) prediction = [] for i in range(4000): NN.forward(inputs_test[i], 'tanh') # print('Target:\t{}\t Output:\t{}'.format(str(outputs_test[i]),str(NN.O))) # print('Target:\t{}\t Output:\t{}'.format(str(doutput[16000+i]),str(to_character0(NN.O)))) prediction.append(to_character0(NN.O)) def to_character(n): return chr(int(n) + ord('A')) correct = {to_character(i): 0 for i in range(26)} letter_num = {to_character(i): 0 for i in range(26)} print('==' * 30, file=log) for i, _ in enumerate(doutput[16000:]): letter_num[doutput[16000 + i]] += 1 # Print some predictions if i % 300 == 0: print('Expected: {} | Output: {}'.format(doutput[16000 + i], prediction[i]), file=log) if doutput[16000 + i] == prediction[i]: correct[prediction[i]] += 1 print('==' * 30, file=log) # Calculate the accuracy accuracy = sum(correct.values()) / len(prediction) print('Test sample size: {} | Correctly predicted sample size: {}'.format( len(prediction), sum(correct.values())), file=log) print('Accuracy: %.3f' % accuracy, file=log) # Performance on each class print('==' * 30, file=log) for k, v in letter_num.items(): print('{} => Sample Number: {} | Correct Number: {} | Accuracy: {}'. format(k, v, correct[k], correct[k] / v), file=log)