def stacking_assign_q_learning(shorter_init, longer_init): env = Stacking(shorter_init, longer_init) RL = QLearningTable(actions=list(range(6)), e_greedy=1) if shorter_init[0] == 'A' and longer_init[0] == 'U': RL.q_table = RL.q_table.append(q_table_A_U) elif shorter_init[0] == 'C' and longer_init[0] == 'G': RL.q_table = RL.q_table.append(q_table_C_G) elif shorter_init[0] == 'G' and longer_init[0] == 'C': RL.q_table = RL.q_table.append(q_table_G_C) elif shorter_init[0] == 'G' and longer_init[0] == 'U': RL.q_table = RL.q_table.append(q_table_G_U) elif shorter_init[0] == 'U' and longer_init[0] == 'A': RL.q_table = RL.q_table.append(q_table_U_A) elif shorter_init[0] == 'U' and longer_init[0] == 'G': RL.q_table = RL.q_table.append(q_table_U_G) observation = env.shorter + "_" + env.longer while True: action = RL.choose_action(observation) shorter_, longer_, reward, done = env.step(action) observation_ = shorter_ + "_" + longer_ # RL.learn(str(observation), action, reward, str(observation_)) observation = observation_ if done: break shorter_final = observation.split('_')[0] longer_final = observation.split('_')[1] return shorter_final, longer_final
def ubp_6_assign_q_learning(shorter_init): env = ubp_6(shorter_init) RL = QLearningTable(actions=list(range(4)), e_greedy=1) RL.q_table = RL.q_table.append(q_table_ubp_6) observation = env.shorter while True: action = RL.choose_action(observation) shorter_, reward, done = env.step(action) observation_ = shorter_ # RL.learn(str(observation), action, reward, str(observation_)) observation = observation_ if done: break return observation