示例#1
0
def get_distribs(all_states, all_actions):
    N = len(all_states)
    distribs = []
    for states, actions in zip(np.split(all_states, N),
                               np.split(all_actions,
                                        N)):  # Loop over individuals
        distribs.append(pe.get_distrib(states, actions))
    return distribs
示例#2
0
def save_data(path, sample_length, n_new_customers, compare_features):
    args = json.loads(open(join(path, 'args.txt'), 'r').read())

    n_experts = args['n_experts']

    # Sample customer data
    if args['state_rep'] == 71:
        env = pe.get_env_and_model(args, '', sample_length, only_env=True, n_experts_in_adam_basket=n_experts+n_new_customers)
    else:
        env = pe.get_env_and_model(args, '', sample_length, only_env=True)

    customer_trajectories = env.generate_expert_trajectories(
        out_dir=None, 
        n_demos_per_expert=1,
        n_experts=n_experts+n_new_customers,
        n_expert_time_steps=sample_length
        )
    expert_states = np.array(customer_trajectories['states'][:n_experts])
    expert_actions = np.array(customer_trajectories['actions'][:n_experts])
    new_states = np.array(customer_trajectories['states'][n_experts:])
    new_actions = np.array(customer_trajectories['actions'][n_experts:])

    if compare_features:
        avg_expert = get_features(expert_actions, average=True)
        experts = get_features(expert_actions)
        new_customers = get_features(new_actions)
    else:
        avg_expert = pe.get_distrib(expert_states, expert_actions)
        experts = get_distribs(expert_states, expert_actions)
        new_customers = get_distribs(new_states, new_actions)

    models = {}

    model_paths = [d for d in [x[0] for x in os.walk(path)] if d.endswith('checkpoint')]
    model_paths.sort(key=get_key_from_path)

    for mp in model_paths:
        n_train_steps = get_key_from_path(mp)
        if int(n_train_steps) <= 1000000: continue

        print('Collecting data from model saved after %s steps' % n_train_steps)

        if args['state_rep'] == 71:
            agent, abs_diffs, errors = evaluate_on_new_customers(args, mp, experts, new_customers, compare_features, n_new_customers)
        else:
            agent, abs_diffs, errors = evaluate_on_new_customers(args, mp, experts, new_customers, compare_features)
        avg_dist = evaluate_on_pop_level(args, mp, avg_expert, compare_features)
        
        models[n_train_steps] = (agent, abs_diffs, errors, avg_dist)

    # final_model_path = next((d for d in [x[0] for x in os.walk(path)] if d.endswith('finish')), None)
    # agent, abs_diffs, errors = evaluate_on_new_customers(args, final_model_path, experts, new_customers, compare_features)
    
    # models['final'] = (agent, abs_diffs, errors)

    result = Result(models)
    save_result(result, path)
示例#3
0
def evaluate_on_pop_level(args, model_path, avg_expert, compare_features):
    n_experts = args['n_experts']
        
    env, model, obs_normalizer = pe.get_env_and_model(args, model_path, sample_length, only_env=False)

    metric = ed if compare_features else wd

    agent_states = []
    agent_actions = []
    for i in range(n_experts):
        # Initialize agent with data from ith expert
        env.model.spawn_new_customer(i)
        sample = env.case.get_sample(
            n_demos_per_expert=1, 
            n_historical_events=args['n_historical_events'], 
            n_time_steps=1000
            )
        all_data = np.hstack(sample[0])  # history, data = sample[0]
        j = np.random.randint(0, all_data.shape[1] - args['n_historical_events'])
        history = all_data[:, j:j + args['n_historical_events']]
        if args['state_rep'] == 71:
            adam_basket = np.random.permutation(env.case.adam_baskets[i])
            env.case.i_expert = i
            initial_state = env.case.get_initial_state(history, adam_basket[0])
        else:
            initial_state = env.case.get_initial_state(history, i)

        states, actions = pe.sample_from_policy(env, model, obs_normalizer, initial_state=initial_state)
        agent_states.append(states)
        agent_actions.append(actions)
    agent_states = np.array(agent_states)
    agent_actions = np.array(agent_actions)

    avg_agent = get_features(agent_actions, average=True) if compare_features else pe.get_distrib(agent_states, agent_actions)

    distance = metric(avg_agent, avg_expert)

    return distance
示例#4
0
def plot(data):
    # Sample customer data
    args = json.loads(
        open(
            join([
                join(dir_path, x) for x in os.listdir(dir_path)
                if x.startswith('2020')
            ][0], 'args.txt'), 'r').read())
    n_experts = args['n_experts']

    if args['state_rep'] == 71:
        env = pe.get_env_and_model(args,
                                   '',
                                   sample_length,
                                   only_env=True,
                                   n_experts_in_adam_basket=n_experts +
                                   n_new_customers)
    else:
        env = pe.get_env_and_model(args, '', sample_length, only_env=True)

    customer_trajectories = env.generate_expert_trajectories(
        out_dir=None,
        n_demos_per_expert=1,
        n_experts=n_experts + n_new_customers,
        n_expert_time_steps=sample_length)
    customer_states = np.array(customer_trajectories['states'])
    customer_actions = np.array(customer_trajectories['actions'])
    customers = get_distribs(customer_states, customer_actions)

    expert_states = np.array(customer_trajectories['states'][:n_experts])
    expert_actions = np.array(customer_trajectories['actions'][:n_experts])
    avg_expert = pe.get_distrib(expert_states, expert_actions)

    import seaborn as sns
    import pandas as pd

    df_experts = pd.DataFrame(columns=[
        'int_value', 'Parameter value', 'Number of training episodes',
        'Wasserstein distance'
    ])
    df_new_customers = pd.DataFrame(columns=[
        'int_value', 'Parameter value', 'Number of training episodes',
        'Wasserstein distance'
    ])

    for param_value, results in data.items():
        print('Processing parameter value {}'.format(param_value))
        for result in results:
            for n_train_steps, agent in result.models.items():
                if int(n_train_steps) <= 1000000: continue
                for i, (a, c) in enumerate(zip(agent, customers)):
                    assert len(a) == 1
                    diff = wd(a[0], c)
                    n_train_episodes = int(
                        n_train_steps) / args['episode_length']
                    if i < n_experts:
                        df_experts.loc[len(df_experts.index)] = [
                            param_value,
                            get_label_from_param_value(param_value, param),
                            n_train_episodes, diff
                        ]
                    else:
                        df_new_customers.loc[len(df_new_customers.index)] = [
                            param_value,
                            get_label_from_param_value(param_value, param),
                            n_train_episodes, diff
                        ]

    df_experts.sort_values(by=['int_value'])
    df_new_customers.sort_values(by=['int_value'])

    sns.set(style='darkgrid')

    g1 = sns.relplot(x='Number of training episodes', y='Wasserstein distance', hue='Parameter value', ci=95, kind='line', data=df_experts, \
        facet_kws={'legend_out': False})
    g1.fig.subplots_adjust(top=0.95)
    ax1 = g1.axes[0][0]
    ax1.set_title('Comparison with experts')


    g2 = sns.relplot(x='Number of training episodes', y='Wasserstein distance', hue='Parameter value', ci=95, kind='line', data=df_new_customers, \
        facet_kws={'legend_out': False})
    g2.fig.subplots_adjust(top=0.95)
    ax2 = g2.axes[0][0]
    ax2.set_title('Comparison with new customers')

    for ax in (ax1, ax2):
        handles, labels = ax.get_legend_handles_labels()
        labels2, handles2 = zip(*sorted(zip(labels[1:], handles[1:]),
                                        key=lambda t: int(t[0].split(' ')[0])))
        labels2 = list(labels2)
        handles2 = list(handles2)
        labels2.insert(0, get_label_from_param(param))
        handles2.insert(0, handles[0])
        ax.legend(handles2, labels2)

    plt.show()
示例#5
0
def evaluate(args, model_path, n_new_customers, sample_length, N,
             customer_states):
    n_experts = args['n_experts']

    if args['state_rep'] == 71:
        env, model, obs_normalizer = pe.get_env_and_model(
            args,
            model_path,
            sample_length,
            only_env=False,
            n_experts_in_adam_basket=n_experts + n_new_customers)
    else:
        env, model, obs_normalizer = pe.get_env_and_model(args,
                                                          model_path,
                                                          sample_length,
                                                          only_env=False)

    agents = []

    for i in range(n_experts + n_new_customers):
        temp_agents = []
        for j in range(N):
            if args['state_rep'] == 22 or args['state_rep'] == 221 or args[
                    'state_rep'] == 23 and i >= n_experts:
                raise NotImplementedError
            else:
                initial_state = random.choice(customer_states[i])
            states, actions = pe.sample_from_policy(
                env, model, obs_normalizer, initial_state=initial_state)
            states = np.array(states)
            actions = np.array(actions)

            a = pe.get_distrib(states, actions)

            temp_agents.append(a)

        agents.append(temp_agents)

    # for seed in range(n_experts + n_new_customers):
    #     temp_agents = []

    #     if args['state_rep'] == 71:
    #         adam_basket = np.random.permutation(env.case.adam_baskets[seed])
    #         env.case.i_expert = seed

    #     env.model.spawn_new_customer(seed)
    #     sample = env.case.get_sample(
    #         n_demos_per_expert=1,
    #         n_historical_events=args['n_historical_events'],
    #         n_time_steps=1000
    #         )
    #     all_data = np.hstack(sample[0])  # history, data = sample[0]

    #     for i in range(N):
    #         j = np.random.randint(0, all_data.shape[1] - args['n_historical_events'])
    #         history = all_data[:, j:j + args['n_historical_events']]
    #         if args['state_rep'] == 71:
    #            initial_state = env.case.get_initial_state(history, adam_basket[i])
    #         else:
    #             raise NotImplementedError

    #         states, actions = pe.sample_from_policy2(env, model, obs_normalizer, initial_state=initial_state)
    #         states = np.array(states)
    #         actions = np.array(actions)

    #         a = pe.get_distrib(states, actions)

    #         temp_agents.append(a)

    #     agents.append(temp_agents)

    return agents
示例#6
0
def evaluate_on_new_customers(args, model_path, experts, new_customers, compare_features, n_new_customers=None):
    global k, N

    n_experts = args['n_experts']

    if n_new_customers is not None:
        env, model, obs_normalizer = pe.get_env_and_model(args, model_path, sample_length, only_env=False, n_experts_in_adam_basket=n_experts+n_new_customers)
    else:
        env, model, obs_normalizer = pe.get_env_and_model(args, model_path, sample_length, only_env=False)

    agents = []
    abs_diffs = []
    errors = []

    metric = ed if compare_features else wd

    for i, nc in enumerate(new_customers):
        distances = [metric(nc, e) for e in experts]
        closest_experts = np.argsort(distances)[:k]
        dummy = closest_experts[0]

        temp_agents = []
        temp_abs_diffs = []
        n_errors = 0

        seed = n_experts + i

        if args['state_rep'] == 71: 
            adam_basket = np.random.permutation(env.case.adam_baskets[seed])
            env.case.i_expert = seed

        env.model.spawn_new_customer(seed)
        sample = env.case.get_sample(
            n_demos_per_expert=1, 
            n_historical_events=args['n_historical_events'], 
            n_time_steps=1000
            )
        all_data = np.hstack(sample[0])  # history, data = sample[0]

        for l in range(N):
            j = np.random.randint(0, all_data.shape[1] - args['n_historical_events'])
            history = all_data[:, j:j + args['n_historical_events']]
            if args['state_rep'] == 71: 
               initial_state = env.case.get_initial_state(history, adam_basket[l])
            else:
                initial_state = env.case.get_initial_state(history, dummy)  # We set dummy to closest expert

            states, actions = pe.sample_from_policy(env, model, obs_normalizer, initial_state=initial_state)
            states = np.array(states)
            actions = np.array(actions)

            a = get_features([actions]) if compare_features else pe.get_distrib(states, actions)

            temp_agents.append(a)
            temp_abs_diffs.append(metric(a, nc))

            distances = [metric(a, e) for e in experts]
            if np.argmin(distances) not in closest_experts:
                n_errors += 1

        agents.append(temp_agents)
        abs_diffs.append(temp_abs_diffs)
        errors.append(n_errors / N)

    return agents, abs_diffs, errors
示例#7
0
def load_data():
    data = {}
    
    # Load data
    data_paths = [join(dir_path, x) for x in os.listdir(dir_path) if x.startswith('2020')]
    data_paths.sort()

    for i, path in enumerate(data_paths):
        print('Processing folder {} of {}'.format(i + 1, len(data_paths)))

        content = os.listdir(path)

        args = json.loads(open(join(path, 'args.txt'), 'r').read())

        if not (plot_only and not update_classification) and i == 0:
            env = pe.get_env_and_model(args, '', sample_length, only_env=True)

            # Sample customer data
            n_experts = args['n_experts']
            customer_trajectories = env.generate_expert_trajectories(
                out_dir=None, 
                n_demos_per_expert=1,
                n_experts=n_experts+n_new_customers,
                n_expert_time_steps=sample_length
                )
            expert_states = np.array(customer_trajectories['states'][:n_experts])
            expert_actions = np.array(customer_trajectories['actions'][:n_experts])
            new_states = np.array(customer_trajectories['states'][n_experts:])
            new_actions = np.array(customer_trajectories['actions'][n_experts:])

            if compare_features:
                avg_expert = get_features(expert_actions, average=True)
                experts = get_features(expert_actions)
                new_customers = get_features(new_actions)
            else:
                avg_expert = pe.get_distrib(expert_states, expert_actions)
                experts = get_distribs(expert_states, expert_actions)
                new_customers = get_distribs(new_states, new_actions)

        if 'result.pkl' in content and not resample:
            print('Loading saved result')
            result = load_result(path)

            if update_classification:
                print('Updating classification with k = %d' % k)

                for n_train_steps, t in result.models.items():
                    agent = t[0]
                    errors = compare_with_new_customers(agent, experts, new_customers, compare_features)

                    l = list(t)
                    l[2] = errors
                    t = tuple(l)
                    result.models.update({n_train_steps: t})
        else:
            if plot_only: 
                print('Ignoring')
                continue

            print('Collecting result by sampling from model')
            
            models = {}

            model_paths = [d for d in [x[0] for x in os.walk(path)] if d.endswith('checkpoint')]
            model_paths.sort(key=get_key_from_path)

            for mp in model_paths:
                n_train_steps = get_key_from_path(mp)
                if n_train_steps < 1000000: continue

                agent, abs_diffs, errors = evaluate_on_new_customers(args, mp, experts, new_customers, compare_features)
                avg_dist = evaluate_on_pop_level(args, mp, avg_expert, compare_features)
                
                models[n_train_steps] = (agent, abs_diffs, errors, avg_dist)

            # final_model_path = next((d for d in [x[0] for x in os.walk(path)] if d.endswith('finish')), None)
            # agent, abs_diffs, errors = evaluate_on_new_customers(args, final_model_path, experts, new_customers, compare_features)
            
            # models['final'] = (agent, abs_diffs, errors)

            result = Result(models)
            save_result(result, path)

        if args[param] in data:
            data[args[param]].append(result)
        else:
            data[args[param]] = [result]

    return data
示例#8
0
def save_df(dir_path, folder_name, sample_length=10000, n_new_customers=50):
    args_path = join(dir_path, 'args.txt')
    args = json.loads(open(args_path, 'r').read())

    n_experts = args['n_experts']

    final_model_dir_path = next(
        (d for d in [x[0] for x in os.walk(dir_path)] if d.endswith('finish')),
        None)
    if args['state_rep'] == 71 or args['state_rep'] == 81:
        env, model, obs_normalizer = pe.get_env_and_model(
            args,
            final_model_dir_path,
            sample_length,
            n_experts_in_adam_basket=n_experts + n_new_customers)
    else:
        env, model, obs_normalizer = pe.get_env_and_model(
            args, final_model_dir_path, sample_length)

    customer_states, customer_actions = sample_customer_data(
        env, n_experts, sample_length, n_new_customers)
    customers = res.get_distribs(customer_states, customer_actions)
    expert_states = customer_states[:n_experts]
    expert_actions = customer_actions[:n_experts]
    experts = customers[:n_experts]
    avg_expert = pe.get_distrib(expert_states, expert_actions)

    model_dir_paths = [
        d for d in [x[0] for x in os.walk(dir_path)]
        if d.endswith('checkpoint')
    ]
    model_dir_paths.sort(key=res.get_key_from_path)

    data = []
    for mdp in model_dir_paths:
        n_steps = res.get_key_from_path(mdp)

        print('Processing model saved after %s' % n_steps)

        if int(n_steps) <= 1000000: continue

        if args['state_rep'] == 71 or args['state_rep'] == 81:
            env, model, obs_normalizer = pe.get_env_and_model(
                args,
                mdp,
                sample_length,
                n_experts_in_adam_basket=n_experts + n_new_customers)
        else:
            env, model, obs_normalizer = pe.get_env_and_model(
                args, mdp, sample_length)

        agent_states, agent_actions, closest_expert = sample_agent_data(
            n_experts + n_new_customers, args, env, model, obs_normalizer,
            customers, customer_states)

        agents = res.get_distribs(agent_states, agent_actions)
        avg_agent = pe.get_distrib(agent_states[:n_experts],
                                   agent_actions[:n_experts])

        temp = []
        for i, (a, c) in enumerate(zip(agents, customers)):
            if i < n_experts:
                data.append([n_steps, wd(a, c), 'Experts'])
            else:
                data.append([n_steps, wd(a, c), 'New customers'])
                data.append([
                    n_steps,
                    wd(a, experts[closest_expert[i]]), 'Closest expert'
                ])

        data.append([n_steps, wd(avg_agent, avg_expert), 'Average expert'])

    df = pd.DataFrame(data,
                      columns=[
                          'Number of training steps', 'Wasserstein distance',
                          'Comparison with'
                      ])

    os.makedirs(join('report', folder_name), exist_ok=True)
    counter = len([
        x for x in os.listdir(join('report', folder_name))
        if x.endswith('.csv')
    ])
    df.to_csv(join('report', folder_name,
                   'df_' + folder_name + str(counter + 1) + '.csv'),
              index=False)
def main():

    import custom_gym
    import gym
    import numpy as np
    import itertools
    import os, sys
    from os.path import join
    import pandas as pd
    tools_path = join(os.getcwd(), 'customer_behaviour/tools')
    sys.path.insert(1, tools_path)
    import policy_evaluation as pe
    import results2 as res
    from scipy.stats import wasserstein_distance as wd

    sample_length = 10000
    n_experts = 10
    n_customers = 50
    tot_customers = n_experts + n_customers
    n_last_days = 7
    model = DGM()
    case = Case22(model=model, n_experts=tot_customers)
    states = []
    actions = []
    for expert in range(tot_customers):
        state, action = get_states_actions(case=case,
                                           model=model,
                                           seed=expert,
                                           episode_length=sample_length,
                                           n_historical_events=10,
                                           save_for_visualisation=False)
        states.append(state)
        actions.append(action)
    states = np.array(states)
    actions = np.array(actions)

    customers = res.get_distribs(states, actions)
    experts_ts = actions[:n_experts]
    new_customers_dis = customers[-n_customers:]
    experts_dis = customers[:n_experts]

    dist = []
    final_dist = []
    for i in range(n_customers):
        c = new_customers_dis[i]
        distances = [wd(c, e) for e in experts_dis]
        dist.append(min(distances))
        dummy = np.argsort(distances)[0]
        expert_ts = experts_ts[dummy, :].tolist()[0]

        length_subsample = int(sample_length / 10)
        samples = [
            expert_ts[x:x + length_subsample]
            for x in range(0, len(expert_ts), length_subsample)
        ]
        ratios = [get_purchase_ratio(sample) for sample in samples]

        freq_probs, bins = get_freqs_probs(ratios, length_subsample)
        purchase_days = []
        for sample in samples:
            purchase_days.extend(np.nonzero(sample * 10)[0].tolist())

        #purchase_days = [np.nonzero(sample)[0].tolist() for sample in samples]
        #print(purchase_days[0])
        purchase_histo = get_purchase_probs(purchase_days, sample_length)
        purchase_days = generate_purchase_days(sample_length, bins, freq_probs,
                                               purchase_histo)
        c_actions = np.zeros((sample_length, )).astype(int)
        for day in purchase_days:
            c_actions[day] = 1

        temp = c_actions.tolist()
        c_states = []
        for x in range(len(temp) - 10):
            c_states.append(temp[x:x + 10])
        c_states = np.asarray(c_states)
        n_actions = len(temp)
        n_states = c_states.shape[0]
        c_actions = c_actions[:n_states]
        c_dis = pe.get_distrib(c_states, c_actions)
        #c_dis= res.get_distribs(c_states, c_actions)

        final_dist.append(wd(c_dis, experts_dis[dummy]))
    print(np.mean(final_dist))