Exemplo n.º 1
0
def sample_agent_data(N, args, env, model, obs_normalizer, customers,
                      customer_states):
    agent_states = []
    agent_actions = []

    closest_expert = N * [0]

    for i in range(N):
        # Initialize agent with data from ith expert
        initial_state = random.choice(customer_states[i])
        if args['state_rep'] == 22 or args['state_rep'] == 221 or args[
                'state_rep'] == 23 and i >= n_experts:
            # Find closest expert
            c = customers[i]
            distances = [wd(c, e) for e in experts]
            dummy = np.argsort(distances)[0]
            closest_expert[i] = dummy
            initial_state[dummy] = 1
        states, actions = pe.sample_from_policy(env,
                                                model,
                                                obs_normalizer,
                                                initial_state=initial_state)
        agent_states.append(states)
        agent_actions.append(actions)
    agent_states = np.array(agent_states)
    agent_actions = np.array(agent_actions)
    return agent_states, agent_actions, closest_expert
Exemplo n.º 2
0
def earthmover_distance_categorical(p1, p2):
    if not isinstance(p1, Categorical) or not isinstance(p2, Categorical):
        raise ValueError('Input distributions must be Categoricals')

    probs1 = p1._probs
    probs2 = p2._probs

    return wd(range(len(probs1)), range(len(probs2)), probs1, probs2)
Exemplo n.º 3
0
def find_distance(*argv):
    total_distance = 0
    for i1, i2 in combinations(np.arange(len(argv[0])), 2):
        if len(argv[0][i1]) == 0 or len(argv[0][i2]) == 0:
            total_distance += 100  #np.inf
        else:
            total_distance += wd(argv[0][i1], argv[0][i2])
    return total_distance
Exemplo n.º 4
0
def Distance(A, trueW, plot=False):
    """
    compute wasserstein distance between vector m1, m2 
    m0: list of trueW of elements where A[element][0] == 1
    m1: list of trueW of elements where A[element][1] == 1
    """
    m0, m1 = [], []
    for i in range(len(A)):
        if A[i][0] == 1:
            m0.append(trueW[i])
        else:
            m1.append(trueW[i])
    if plot:
        #        plt.subplot(2,1,1)
        sns.distplot(m0, hist=False)
        #        plt.subplot(2,1,2)
        sns.distplot(m1, hist=False)
    return wd(m0, m1)
Exemplo n.º 5
0
def plot(data):
    # Sample customer data
    args = json.loads(
        open(
            join([
                join(dir_path, x) for x in os.listdir(dir_path)
                if x.startswith('2020')
            ][0], 'args.txt'), 'r').read())
    n_experts = args['n_experts']

    if args['state_rep'] == 71:
        env = pe.get_env_and_model(args,
                                   '',
                                   sample_length,
                                   only_env=True,
                                   n_experts_in_adam_basket=n_experts +
                                   n_new_customers)
    else:
        env = pe.get_env_and_model(args, '', sample_length, only_env=True)

    customer_trajectories = env.generate_expert_trajectories(
        out_dir=None,
        n_demos_per_expert=1,
        n_experts=n_experts + n_new_customers,
        n_expert_time_steps=sample_length)
    customer_states = np.array(customer_trajectories['states'])
    customer_actions = np.array(customer_trajectories['actions'])
    customers = get_distribs(customer_states, customer_actions)

    expert_states = np.array(customer_trajectories['states'][:n_experts])
    expert_actions = np.array(customer_trajectories['actions'][:n_experts])
    avg_expert = pe.get_distrib(expert_states, expert_actions)

    import seaborn as sns
    import pandas as pd

    df_experts = pd.DataFrame(columns=[
        'int_value', 'Parameter value', 'Number of training episodes',
        'Wasserstein distance'
    ])
    df_new_customers = pd.DataFrame(columns=[
        'int_value', 'Parameter value', 'Number of training episodes',
        'Wasserstein distance'
    ])

    for param_value, results in data.items():
        print('Processing parameter value {}'.format(param_value))
        for result in results:
            for n_train_steps, agent in result.models.items():
                if int(n_train_steps) <= 1000000: continue
                for i, (a, c) in enumerate(zip(agent, customers)):
                    assert len(a) == 1
                    diff = wd(a[0], c)
                    n_train_episodes = int(
                        n_train_steps) / args['episode_length']
                    if i < n_experts:
                        df_experts.loc[len(df_experts.index)] = [
                            param_value,
                            get_label_from_param_value(param_value, param),
                            n_train_episodes, diff
                        ]
                    else:
                        df_new_customers.loc[len(df_new_customers.index)] = [
                            param_value,
                            get_label_from_param_value(param_value, param),
                            n_train_episodes, diff
                        ]

    df_experts.sort_values(by=['int_value'])
    df_new_customers.sort_values(by=['int_value'])

    sns.set(style='darkgrid')

    g1 = sns.relplot(x='Number of training episodes', y='Wasserstein distance', hue='Parameter value', ci=95, kind='line', data=df_experts, \
        facet_kws={'legend_out': False})
    g1.fig.subplots_adjust(top=0.95)
    ax1 = g1.axes[0][0]
    ax1.set_title('Comparison with experts')


    g2 = sns.relplot(x='Number of training episodes', y='Wasserstein distance', hue='Parameter value', ci=95, kind='line', data=df_new_customers, \
        facet_kws={'legend_out': False})
    g2.fig.subplots_adjust(top=0.95)
    ax2 = g2.axes[0][0]
    ax2.set_title('Comparison with new customers')

    for ax in (ax1, ax2):
        handles, labels = ax.get_legend_handles_labels()
        labels2, handles2 = zip(*sorted(zip(labels[1:], handles[1:]),
                                        key=lambda t: int(t[0].split(' ')[0])))
        labels2 = list(labels2)
        handles2 = list(handles2)
        labels2.insert(0, get_label_from_param(param))
        handles2.insert(0, handles[0])
        ax.legend(handles2, labels2)

    plt.show()
Exemplo n.º 6
0
def save_df(dir_path, folder_name, sample_length=10000, n_new_customers=50):
    args_path = join(dir_path, 'args.txt')
    args = json.loads(open(args_path, 'r').read())

    n_experts = args['n_experts']

    final_model_dir_path = next(
        (d for d in [x[0] for x in os.walk(dir_path)] if d.endswith('finish')),
        None)
    if args['state_rep'] == 71 or args['state_rep'] == 81:
        env, model, obs_normalizer = pe.get_env_and_model(
            args,
            final_model_dir_path,
            sample_length,
            n_experts_in_adam_basket=n_experts + n_new_customers)
    else:
        env, model, obs_normalizer = pe.get_env_and_model(
            args, final_model_dir_path, sample_length)

    customer_states, customer_actions = sample_customer_data(
        env, n_experts, sample_length, n_new_customers)
    customers = res.get_distribs(customer_states, customer_actions)
    expert_states = customer_states[:n_experts]
    expert_actions = customer_actions[:n_experts]
    experts = customers[:n_experts]
    avg_expert = pe.get_distrib(expert_states, expert_actions)

    model_dir_paths = [
        d for d in [x[0] for x in os.walk(dir_path)]
        if d.endswith('checkpoint')
    ]
    model_dir_paths.sort(key=res.get_key_from_path)

    data = []
    for mdp in model_dir_paths:
        n_steps = res.get_key_from_path(mdp)

        print('Processing model saved after %s' % n_steps)

        if int(n_steps) <= 1000000: continue

        if args['state_rep'] == 71 or args['state_rep'] == 81:
            env, model, obs_normalizer = pe.get_env_and_model(
                args,
                mdp,
                sample_length,
                n_experts_in_adam_basket=n_experts + n_new_customers)
        else:
            env, model, obs_normalizer = pe.get_env_and_model(
                args, mdp, sample_length)

        agent_states, agent_actions, closest_expert = sample_agent_data(
            n_experts + n_new_customers, args, env, model, obs_normalizer,
            customers, customer_states)

        agents = res.get_distribs(agent_states, agent_actions)
        avg_agent = pe.get_distrib(agent_states[:n_experts],
                                   agent_actions[:n_experts])

        temp = []
        for i, (a, c) in enumerate(zip(agents, customers)):
            if i < n_experts:
                data.append([n_steps, wd(a, c), 'Experts'])
            else:
                data.append([n_steps, wd(a, c), 'New customers'])
                data.append([
                    n_steps,
                    wd(a, experts[closest_expert[i]]), 'Closest expert'
                ])

        data.append([n_steps, wd(avg_agent, avg_expert), 'Average expert'])

    df = pd.DataFrame(data,
                      columns=[
                          'Number of training steps', 'Wasserstein distance',
                          'Comparison with'
                      ])

    os.makedirs(join('report', folder_name), exist_ok=True)
    counter = len([
        x for x in os.listdir(join('report', folder_name))
        if x.endswith('.csv')
    ])
    df.to_csv(join('report', folder_name,
                   'df_' + folder_name + str(counter + 1) + '.csv'),
              index=False)
Exemplo n.º 7
0
def main():

    import custom_gym
    import gym
    from customer_behaviour.tools.dgm import DGM
    import numpy as np
    import itertools
    import os, sys
    from os.path import join
    import pandas as pd
    tools_path = join(os.getcwd(), 'customer_behaviour/tools')
    sys.path.insert(1, tools_path)
    import policy_evaluation as pe
    import results2 as res
    from scipy.stats import wasserstein_distance as wd
    import seaborn as sns

    sample_length = 10000
    expert_ls = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 50]
    n_customers = 20
    n_runs = 3
    tot_customers = max(expert_ls) + n_customers
    n_last_days = 7
    model = DGM()
    case = Case22(model=model, n_experts=tot_customers)
    states = []
    actions = []
    for expert in range(tot_customers):
        state, action = get_states_actions(case=case,
                                           model=model,
                                           seed=expert,
                                           episode_length=sample_length,
                                           n_historical_events=1000,
                                           save_for_visualisation=False)
        states.append(state)
        actions.append(action)
    states = np.array(states)
    actions = np.array(actions)

    customers = res.get_distribs(states, actions)

    data = []
    for n_experts in expert_ls:
        #expert_states = states[:n_experts]
        #expert_actions = actions[:n_experts]
        for _ in range(n_runs):

            #experts = np.random.choice(customers[:max(expert_ls)], n_experts)
            choice_indices = np.random.choice(
                range(len(customers) - n_customers), n_experts, replace=False)
            experts = [customers[i] for i in choice_indices]
            #experts = customers[:n_experts]

            #avg_expert = pe.get_distrib(expert_states, expert_actions)
            new_customers = customers[-n_customers:]
            dist = []
            for i in range(n_customers):
                c = new_customers[i]
                distances = [wd(c, e) for e in experts]
                dist.append(min(distances))
            data.append([n_experts, np.mean(dist)])

    df = pd.DataFrame(
        data,
        columns=['Number of experts', 'Average distance to closest customer'])
    sns.set(style='darkgrid')

    g = sns.relplot(x='Number of experts',
                    y='Average distance to closest customer',
                    hue=None,
                    ci=95,
                    kind='line',
                    data=df,
                    facet_kws={'legend_out': False})
    plt.show()
Exemplo n.º 8
0
def main():

    import custom_gym
    import gym
    import numpy as np
    import itertools
    import os, sys
    from os.path import join
    import pandas as pd
    tools_path = join(os.getcwd(), 'customer_behaviour/tools')
    sys.path.insert(1, tools_path)
    import policy_evaluation as pe
    import results2 as res
    from scipy.stats import wasserstein_distance as wd

    sample_length = 10000
    n_experts = 10
    n_customers = 50
    tot_customers = n_experts + n_customers
    n_last_days = 7
    model = DGM()
    case = Case22(model=model, n_experts=tot_customers)
    states = []
    actions = []
    for expert in range(tot_customers):
        state, action = get_states_actions(case=case,
                                           model=model,
                                           seed=expert,
                                           episode_length=sample_length,
                                           n_historical_events=10,
                                           save_for_visualisation=False)
        states.append(state)
        actions.append(action)
    states = np.array(states)
    actions = np.array(actions)

    customers = res.get_distribs(states, actions)
    experts_ts = actions[:n_experts]
    new_customers_dis = customers[-n_customers:]
    experts_dis = customers[:n_experts]

    dist = []
    final_dist = []
    for i in range(n_customers):
        c = new_customers_dis[i]
        distances = [wd(c, e) for e in experts_dis]
        dist.append(min(distances))
        dummy = np.argsort(distances)[0]
        expert_ts = experts_ts[dummy, :].tolist()[0]

        length_subsample = int(sample_length / 10)
        samples = [
            expert_ts[x:x + length_subsample]
            for x in range(0, len(expert_ts), length_subsample)
        ]
        ratios = [get_purchase_ratio(sample) for sample in samples]

        freq_probs, bins = get_freqs_probs(ratios, length_subsample)
        purchase_days = []
        for sample in samples:
            purchase_days.extend(np.nonzero(sample * 10)[0].tolist())

        #purchase_days = [np.nonzero(sample)[0].tolist() for sample in samples]
        #print(purchase_days[0])
        purchase_histo = get_purchase_probs(purchase_days, sample_length)
        purchase_days = generate_purchase_days(sample_length, bins, freq_probs,
                                               purchase_histo)
        c_actions = np.zeros((sample_length, )).astype(int)
        for day in purchase_days:
            c_actions[day] = 1

        temp = c_actions.tolist()
        c_states = []
        for x in range(len(temp) - 10):
            c_states.append(temp[x:x + 10])
        c_states = np.asarray(c_states)
        n_actions = len(temp)
        n_states = c_states.shape[0]
        c_actions = c_actions[:n_states]
        c_dis = pe.get_distrib(c_states, c_actions)
        #c_dis= res.get_distribs(c_states, c_actions)

        final_dist.append(wd(c_dis, experts_dis[dummy]))
    print(np.mean(final_dist))