Exemplo n.º 1
0
def new_stage_data(action, portfolio, old_state, new_state, portfolio_value,
                   done, episode_data):
    old_portfolio_value = portfolio_value
    #low_price = new_state[2]
    #changing code to use average price rather than normalized price
    price = episodic_data.data_average_price(data_dict, episode_data)
    next_price = episodic_data.data_average_price(data_dict, new_state)
    #price = data_dict[episodic_data.list_md5_string_value(episode_data)][-1]
    #next_price = data_dict[episodic_data.list_md5_string_value(new_state)][-1]
    #buying
    if action == 1:
        #old_price = old_state[1]
        portfolio_value -= price
        portfolio += 1
    #selling
    elif action == 2:
        #old_price = old_state[2]
        portfolio_value += price
        portfolio -= 1
    elif action == 0:
        portfolio = portfolio
    #reward = 0
    #if new_state:
    new_state = new_state + [portfolio]
    #if portfolio >= 0:
    #low_price = new_state[2]
    #else:
    #low_price = new_state[1]
    #reward system might need to change and require some good thinking
    #if done:
    reward = (portfolio_value + portfolio * next_price)
    if reward > 0:
        reward = 2 * reward  #increasing reward
    #pdb.set_trace();
    return new_state, reward, done, portfolio, portfolio_value
Exemplo n.º 2
0
def new_stage_data(action, portfolio, old_state, new_state, portfolio_value, done, episode_data):
    old_portfolio_value = portfolio_value
    #low_price = new_state[2]
    #changing code to use average price rather than normalized price
    price = episodic_data.data_average_price(data_dict, episode_data)
    next_price = episodic_data.data_average_price(data_dict, new_state)
    #price = data_dict[episodic_data.list_md5_string_value(episode_data)][-1]
    #next_price = data_dict[episodic_data.list_md5_string_value(new_state)][-1]
    #buying
    if action == 1:
        #old_price = old_state[1]
        portfolio_value -= price
        portfolio += 1
    #selling
    elif action == 2:
        #old_price = old_state[2]
        portfolio_value += price
        portfolio -= 1
    elif action == 0:
        portfolio = portfolio
    #reward = 0
    #if new_state:
    new_state = new_state + [portfolio]
    #if portfolio >= 0:
        #low_price = new_state[2]
    #else:
        #low_price = new_state[1]
    #reward system might need to change and require some good thinking
    #if done:
    reward = (portfolio_value + portfolio * next_price)
    if reward > 0:
        reward = 2*reward #increasing reward
    #pdb.set_trace();
    return new_state, reward, done, portfolio, portfolio_value
Exemplo n.º 3
0
def show_trader_path(actions, episode_data, portfolio_list, portfolio_value_list, reward_list):
    i = 0
    #print("Action, Average Price, Portfolio, Portfolio Value, Reward")
    for index, action in enumerate(actions):
        episode = episode_data[index]
        action_name = action_map[actions[index]]
        price = episodic_data.data_average_price(data_dict, episode)
        portfolio = portfolio_list[index]
        portfolio_value = portfolio_value_list[index]
        i += 1
        reward = reward_list[index]
        #print(action_name, price, portfolio, portfolio_value, reward)
    #print("last price:")
    episode = episode_data[i]
    last_price = episodic_data.data_average_price(data_dict, episode)
    #print(last_price)
    reward = (portfolio_value_list[-1] + portfolio_list[-1]*last_price)
    return reward 
Exemplo n.º 4
0
def show_trader_path(actions, episode_data, portfolio_list,
                     portfolio_value_list, reward_list):
    i = 0
    #print("Action, Average Price, Portfolio, Portfolio Value, Reward")
    for index, action in enumerate(actions):
        episode = episode_data[index]
        action_name = action_map[actions[index]]
        price = episodic_data.data_average_price(data_dict, episode)
        portfolio = portfolio_list[index]
        portfolio_value = portfolio_value_list[index]
        i += 1
        reward = reward_list[index]
        #print(action_name, price, portfolio, portfolio_value, reward)
    #print("last price:")
    episode = episode_data[i]
    last_price = episodic_data.data_average_price(data_dict, episode)
    #print(last_price)
    reward = (portfolio_value_list[-1] + portfolio_list[-1] * last_price)
    return reward