def main(args): """ Enter point for Sequential Compare Function. Compare DQTS and our algorithm. :param args: :return: """ config = parameter_setup(args, DEFAULT_CONFIG) logger_nns, logger_dqts, logger_heft = setup_logger_all() model = get_model(args) reward_heft, end_time = heft(config['wfs_name'], config['nodes']) for i in range(args.num_episodes): logger_heft.log_scalar('main/reward', reward_heft, i) args.state_size = 20 dqts_model = get_dqts_model(args) print(dqts_model) print(model) dqts_reward = [ run_dqts_episode(dqts_model, ei, args, logger_dqts) for ei in range(args.num_episodes) ] reward = [ run_episode(model, ei, args, logger_nns) for ei in range(args.num_episodes) ] plot_heft_dqts_ours(args, reward_heft, dqts_reward, reward) test(model, args) dqts_test(dqts_model, args)
def run_dqts_episode(ei, logger, args): URL = f"http://{args.host}:{args.port}/" config = parameter_setup(args, DEFAULT_CONFIG) test_wfs, test_times, test_scores, test_size = wf_setup(config['wfs_name']) reward, sars_list = episode_dqts(ei, config, test_wfs, test_size, URL) remember(sars_list, URL) if logger is not None: logger.log_scalar('main/reward', reward, ei) return reward
def interective_test(model, args): """ Interective Test :param model: :param args: :return: """ config = parameter_setup(args, DEFAULT_CONFIG) test_wfs, test_times, test_scores, test_size = wf_setup(config['wfs_name']) for i in range(test_size): ttree, tdata, trun_times = test_wfs[i] wfl = ctx.Context(config['agent_task'], config['nodes'], trun_times, ttree, tdata) sch = ScheduleInterectivePlotter(wfl.worst_time, wfl.m, wfl.n) wfl.name = config['wfs_name'][i] if config['actor_type'] == 'rnn': deq = RNNDeque(seq_size=config['seq_size'], size=config['state_size']) done = wfl.completed state = wfl.state for time in range(wfl.n): mask = wfl.get_mask() q = model.act_q(state.reshape(1, state.shape[0]), mask, False) q = np.squeeze(q, axis=0) if len(q.shape) > 1 else q action_idx = np.argmax(q) actions = [wfl.actions[action] for action in range(q.shape[-1])] best_t, best_n = actions[action_idx] copies_of_wfl = [deepcopy(wfl) for _ in range(len(actions))] reward, wf_time = wfl.make_action(best_t, best_n) next_state = wfl.state acts = [] for idx, action in enumerate(actions): wfl_copy = copies_of_wfl[idx] t, n = action if q[idx] != 0 or idx == action_idx: reward, wf_time, item = wfl_copy.make_action_item(t, n) acts.append((item, reward, n)) sch.draw_item(wfl.schedule, acts) if config['actor_type'] == 'rnn': deq.push(next_state) next_state = deq.show() done = wfl.completed state = next_state if done: test_scores[i].append(reward) test_times[i].append(wf_time) write_schedule(args.run_name, i, wfl)
def run_dqts_episode(model, ei, args, logger=None): """ Run episode of Learning, Remember and Replay :param model: :param ei: :param args: :return: """ config = parameter_setup(args, DEFAULT_CONFIG) test_wfs, test_times, test_scores, test_size = wf_setup(config['wfs_name']) reward, sars_list = episode_dqts(model, ei, config, test_wfs, test_size) remember(model, sars_list, args) replay(model, config['batch_size']) if logger is not None: logger.log_scalar('main/reward', reward, ei) return reward
def dqts_test(model, args): """ Create Schedule using current NN without learning parameters :param model: :param args: :return: """ config = parameter_setup(args, DEFAULT_CONFIG) test_wfs, test_times, test_scores, test_size = wf_setup(config['wfs_name']) for i in range(test_size): ttree, tdata, trun_times = test_wfs[i] wfl = dqts_ctx.Context(config['agent_task'], config['nodes'], trun_times, ttree, tdata) wfl.name = config['wfs_name'][i] if config['actor_type'] == 'rnn': deq = RNNDeque(seq_size=config['seq_size'], size=config['state_size']) done = wfl.completed state = wfl.state if config['actor_type'] == 'rnn': deq.push(state) state = deq.show() for time in range(wfl.n): mask = wfl.get_mask() action = model.act(state.reshape(1, state.shape[0]), mask, False) act_t, act_n = wfl.actions[action] reward, wf_time = wfl.make_action(act_t, act_n) next_state = wfl.state if config['actor_type'] == 'rnn': deq.push(next_state) next_state = deq.show() done = wfl.completed state = next_state if done: test_scores[i].append(reward) test_times[i].append(wf_time) write_schedule(args.run_name, i, wfl)
def do_heft(args, URL, logger): config = parameter_setup(args, DEFAULT_CONFIG) response = requests.post(f'{URL}heft', json={ 'wf_name': config['wfs_name'], 'nodes': config['nodes'].tolist() }).json() cur_dir = os.getcwd() reward_path = pathlib.Path( cur_dir ) / 'results' / f'{args.run_name}_{datetime.now().strftime("%d%b%y_%I%M%p")}_heft_reward.csv' rewards = response['reward'] if args.logger: for i in range(args.num_episodes): logger.log_scalar('main/reward', rewards, i) makespan = response['makespan'] rewards = np.array(rewards) result = pd.DataFrame() result['reward'] = rewards result.to_csv(reward_path, sep=',', index=None, columns=['reward']) print(f'Schedule makespan: {makespan}') return response
dtype=tf.int32) next_time_step = tf_env.step(action) traj = trajectory.from_transition(time_step, action_step_act, next_time_step) # Add trajectory to the replay buffer replay_buffer.add_batch(traj) if traj.is_boundary(): episode_counter += 1 if __name__ == '__main__': args = parser.parse_args() config = parameter_setup(args, DEFAULT_CONFIG) test_wfs, test_times, test_scores, test_size = wf_setup(config['wfs_name']) ttree, tdata, trun_times = test_wfs[0] real_env = Context(config['agent_task'], config['nodes'], trun_times, ttree, tdata) environment = tf_py_environment.TFPyEnvironment(real_env) eval_real_env = Context(config['agent_task'], config['nodes'], trun_times, ttree, tdata) eval_environment = tf_py_environment.TFPyEnvironment(eval_real_env) actor_net = actor_distribution_network.ActorDistributionNetwork( environment.observation_spec(), environment.action_spec(), fc_layer_params=(200, 100))