def rollout_based_policy_estimation_fast_scheduling(domain: SchedulingDomain, policy: DeterministicPolicies, nb_rollout:int=1) \ ->Tuple[Dict[Any, Any], Dict[Any, Any], Dict[Any, Set[Any]], Dict[Any, Set[Any]]]: policy_dict = {} nb_visit_dict = {} summed_value = {} final_value = {} preds = {} succs = {} s = domain.get_initial_state() summed_value[s] = 0 nb_visit_dict[s] = 0 domain.set_inplace_environment(True) for rollout in range(nb_rollout): states, summed_value_rollout, values, actions = my_custom_rollout( domain, s.copy(), policy) summed_value[s] += states[-1].t - s.t nb_visit_dict[s] += 1 final_value = { st: summed_value[st] / nb_visit_dict[st] for st in summed_value } return final_value, policy_dict, preds, succs
def get_resource_requirements_across_duration(domain: SchedulingDomain, task_id: int, **kwargs): values = [] mode_consumption = domain.get_task_modes(task_id)[1] duration = domain.get_latest_sampled_duration(task_id, 1, 0.) if duration > 0: for res in mode_consumption.get_ressource_names(): tmp = 0 for t in range(duration): need = domain.get_task_modes(task_id)[1].get_resource_need_at_time(res, t) total = domain.sample_quantity_resource(res, t) tmp += need / total values.append(tmp/duration) else: values = [0.] # print(task_id,':', values) return values
def from_last_state_to_solution(state: State, domain: SchedulingDomain): modes = [ state.tasks_mode.get(j, 1) for j in sorted(domain.get_tasks_ids()) ] modes = modes[1:-1] schedule = { j: { "start_time": state.tasks_details[j].start, "end_time": state.tasks_details[j].end } for j in state.tasks_details } return RCPSPSolution(problem=build_do_domain(domain), rcpsp_permutation=None, rcpsp_modes=modes, rcpsp_schedule=schedule)
def my_custom_rollout(domain: GoalMDPDomain, state, policy: DeterministicPolicies): states = [state] values = [] summed_value = 0. actions = [] while True: action = policy.get_next_action(states[-1]) next_state = SchedulingDomain._state_sample(domain, states[-1], action).state value = domain.get_transition_value(states[-1], action, next_state) values += [value.cost] summed_value += value.cost states += [next_state] actions += [action] if domain.is_goal(states[-1]): break if domain.is_terminal(states[-1]): summed_value += 1000 # penalty break return states, summed_value, values, actions
def feature_n_predecessors(domain: SchedulingDomain, cpm, cpm_esd, task_id: int, **kwargs): return len(domain.get_predecessors_task(task_id))/ len(domain.get_tasks_ids())
def feature_total_n_res(domain: SchedulingDomain, cpm, cpm_esd, task_id: int, **kwargs): val = 0 mode_consumption = domain.get_task_modes(task_id)[1] for res in mode_consumption.get_ressource_names(): val += mode_consumption.get_resource_need(res) return val
def feature_task_duration(domain: SchedulingDomain, cpm, cpm_esd, task_id: int, **kwargs): return domain.sample_task_duration(task_id)
def feature_precedence_done(domain: SchedulingDomain, cpm, cpm_esd, task_id: int, state: State, **kwargs): return task_id in domain.task_possible_to_launch_precedence(state=state)
def feature_all_descendants(domain: SchedulingDomain, cpm, cpm_esd, task_id: int, **kwargs): return len(domain.full_successors[task_id]) / len(domain.get_tasks_ids())