from introrl.utils import pickle_esp start_time = time.time() CR = CarRentalSimulation() get_sim = Model(CR, build_initial_model=True) get_sim.collect_transition_data(num_det_calls=50, num_stoic_calls=100000) print('Total recorded actions Before:', "{:,}".format(get_sim.total_num_action_data_points())) CR.layout.s_hash_print() get_sim.num_calls_layout_print(row_tickL=[c for c in ' First Location'], const_col_w=True, x_axis_label='Second Location', none_str='*') get_sim.min_num_calls_layout_print( row_tickL=[c for c in ' First Location'], const_col_w=True, x_axis_label='Second Location', none_str='*') #get_sim.est_reward_error_layout_print(row_tickL=[c for c in ' First Location'], const_col_w=True, # x_axis_label='Second Location', none_str='*') #get_sim.define_statesD[(20,0)].summ_print() #sys.exit() # <------------------------------------- #get_sim.collect_transition_data( num_det_calls=10, num_stoic_calls=100 )
fname = os.path.split(__file__)[-1].split('.')[ 0] # use file prefix for pickle file print('Pickle File Name Prefix:', fname) if not get_sim.read_pickle_file(fname): get_sim.collect_transition_data(num_det_calls=10, num_stoic_calls=10000) #get_sim.collect_transition_data( num_det_calls=10, num_stoic_calls=10000 ) print('Total recorded actions Before:', "{:,}".format(get_sim.total_num_action_data_points())) BJ.layout.s_hash_print() get_sim.num_calls_layout_print( ) #row_tickL=[c for c in ' Player Sum'], const_col_w=True, #x_axis_label='Dealer Showing', none_str='*') get_sim.min_num_calls_layout_print( ) # row_tickL=[c for c in ' Player Sum'], const_col_w=True, #x_axis_label='Dealer Showing', none_str='*') get_sim.est_reward_error_layout_print( ) #row_tickL=[c for c in ' Player Sum'], const_col_w=True, #x_axis_label='Dealer Showing', none_str='*') #sys.exit() # <------------------------------------- get_sim.collect_transition_data(num_det_calls=10, num_stoic_calls=100) print('Total recorded actions After:', "{:,}".format(get_sim.total_num_action_data_points()))
from introrl.dp_funcs.dp_value_iter import dp_value_iteration from introrl.environments.env_baseline import EnvBaseline from introrl.agent_supt.model import Model from introrl.utils import pickle_esp from introrl.black_box_sims.blackjack_sim import BlackJackSimulation start_time = time.time() BJ = BlackJackSimulation() get_sim = Model(BJ, build_initial_model=True) get_sim.collect_transition_data(num_det_calls=50, num_stoic_calls=100000) BJ.layout.s_hash_print() get_sim.num_calls_layout_print() get_sim.min_num_calls_layout_print() print('got sim data') print('_' * 55) env = EnvBaseline(s_hash_rowL=BJ.s_hash_rowL, x_axis_label=BJ.x_axis_label, y_axis_label=BJ.y_axis_label) get_sim.add_all_data_to_an_environment(env) print('built environment') print('_' * 55) #env.summ_print()