示例#1
0
 def save_checkpoint(self):
     """Persist checkpoint information"""
     # the history loss
     utils.plot_scores(self.checkpoint_prefix + "_actor_loss.png", self.actor_loss_episodes, label="loss")
     
     # network
     torch.save(self.actor.state_dict(), self.checkpoint_prefix + "_actor.pth")
def ubm(features, label_dict):
    runs_eer = []
    runs_hter = []

    for experiment_i in range(5):
        train_set, development_set, test_set, train_dev_set = utils.shuffle_split_data(
            features, label_dict)
        train_x, train_y, development_x, development_y, test_x, test_y, train_dev_x, train_dev_y = utils.get_datasets(
            train_set, development_set, test_set, train_dev_set)
        nb_of_components = 11

        nb_of_components_background = 15

        all_gmms = build_GMMs(train_set, nb_of_components, label_dict)
        all_ubms = build_UBMs(train_set, nb_of_components_background,
                              label_dict)
        dist_matrix = compute_dist_matrix_with_ubm(development_x, all_gmms,
                                                   all_ubms, label_dict)
        cur_eers, cur_thresholds = compute_eer_client_threshold(
            dist_matrix, development_y, label_dict)
        runs_eer.append(np.mean(cur_eers))

        if experiment_i == 0:
            utils.plot_scores(dist_matrix, development_y, "Second Section",
                              "e2", label_dict)
            frr_list, far_list, threshold_list = compute_frr_far_list(
                dist_matrix, development_y, label_dict)
            utils.plot_far_frr(frr_list, far_list, threshold_list,
                               "Second Section", "e2")

        print(f"Client thresholds:{np.array(cur_thresholds)}")

        all_gmms = build_GMMs(train_dev_set, nb_of_components, label_dict)
        all_ubms = build_UBMs(train_dev_set, nb_of_components_background,
                              label_dict)
        dist_matrix = compute_dist_matrix_with_ubm(test_x, all_gmms, all_ubms,
                                                   label_dict)

        client_hters = []
        for i in range(len(label_dict)):
            cur_dm = dist_matrix[:, i]
            genuine_indexes = (test_y == i)
            client_threshold = cur_thresholds[i]
            cur_frr, cur_far = compute_frr_far_client(cur_dm, genuine_indexes,
                                                      client_threshold)
            client_hters.append((cur_frr + cur_far) / 2)

        cur_hter = np.mean(client_hters)
        runs_hter.append(cur_hter)

        print(f"EERs:{np.array(runs_eer)}, HTERs:{np.array(runs_hter)}")

    print(
        f"Average EER:{np.array(runs_eer).mean():.4f}, std:{np.array(runs_eer).std():.4f}"
    )
    print(
        f"Average HTER:{np.array(runs_hter).mean():.4f}, std:{np.array(runs_hter).std():.4f}"
    )
def gmm_global_threshold(features, label_dict):
    runs_eer = []
    runs_hter = []

    for experiment_i in range(5):
        train_set, development_set, test_set, train_dev_set = utils.shuffle_split_data(
            features, label_dict)
        train_x, train_y, development_x, development_y, test_x, test_y, train_dev_x, train_dev_y = utils.get_datasets(
            train_set, development_set, test_set, train_dev_set)
        nb_of_components = 11

        all_gmms = build_GMMs(train_set, nb_of_components, label_dict)
        dist_matrix = compute_dist_matrix(development_x, all_gmms, label_dict)
        cur_eer, cur_threshold = compute_eer(dist_matrix, development_y,
                                             label_dict)
        runs_eer.append(cur_eer)

        if experiment_i == 0:
            utils.plot_scores(dist_matrix, development_y, "First Section",
                              "e1", label_dict)
            frr_list, far_list, threshold_list = compute_frr_far_list(
                dist_matrix, development_y, label_dict)
            utils.plot_far_frr(frr_list, far_list, threshold_list,
                               "First Section", "e1")

        print(f"Threshold:{cur_threshold}")

        all_gmms = build_GMMs(train_dev_set, nb_of_components, label_dict)
        dist_matrix = compute_dist_matrix(test_x, all_gmms, label_dict)
        cur_frr, cur_far = compute_frr_far(dist_matrix, test_y, cur_threshold,
                                           label_dict)
        cur_hter = (cur_frr + cur_far) / 2
        runs_hter.append(cur_hter)

        print(f"EERs:{np.array(runs_eer)}, HTERs:{np.array(runs_hter)}")

        print(
            f"Average EER:{np.array(runs_eer).mean():.4f}, std:{np.array(runs_eer).std():.4f}"
        )
        print(
            f"Average HTER:{np.array(runs_hter).mean():.4f}, std:{np.array(runs_hter).std():.4f}"
        )
示例#4
0
def train_agent():
    plot_scores = []
    plot_mean_scores = []
    total_score = 0
    record = 0
    agent = Agent()
    game = SnakeGame()
    while True:
        # get old state
        state_old = agent.get_state(game)

        # get move
        final_move = agent.get_action(state_old)

        # perform move and get new state
        reward, done, score = game.play_step(final_move)
        state_new = agent.get_state(game)

        # train short memory
        agent.train_short_memory(state_old, final_move, reward, state_new,
                                 done)

        # remember
        agent.remember(state_old, final_move, reward, state_new, done)

        if done:
            game.reset()
            agent.num_games += 1
            agent.train_long_memory()

            if score > record:
                record = score
                agent.model.save()

            print(f'Game: {agent.num_games}, Score: {score}, Record: {record}')

            plot_scores.append(score)
            total_score += score
            mean_score = total_score / agent.num_games
            plot_mean_scores.append(mean_score)
            plot_scores(plot_scores, plot_mean_scores)
示例#5
0
def train(
    n_episodes,
    max_t,
    env_fp,
    no_graphics,
    seed,
    save_every_nth,
    buffer_size,
    batch_size,
    gamma,
    tau,
    lr_actor,
    lr_critic,
    weight_decay,
    log,
):
    log.info("#### Initializing environment...")
    # init environment
    env = UnityEnvironment(file_name=env_fp, no_graphics=no_graphics)

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # number of agents
    num_agents = len(env_info.agents)
    log.info(f"Number of agents: {num_agents}")

    # size of each action
    action_size = brain.vector_action_space_size
    log.info(f"Size of each action: {action_size}")

    # examine the state space
    states = env_info.vector_observations
    state_size = states.shape[1]
    log.info(
        f"There are {states.shape[0]} agents. Each observes a state with length: {state_size}"
    )
    log.info(f"The state for the first agent looks like: {states[0]}")

    agent = Agent(
        num_agents=len(env_info.agents),
        state_size=state_size,
        action_size=action_size,
        buffer_size=buffer_size,
        batch_size=batch_size,
        gamma=gamma,
        tau=tau,
        lr_actor=lr_actor,
        lr_critic=lr_critic,
        weight_decay=weight_decay,
        random_seed=seed,
    )

    log.info("#### Training...")

    scores_deque = deque(maxlen=100)
    scores = []
    for i_episode in range(1, n_episodes + 1):
        brain_name = env.brain_names[0]
        env_info = env.reset(train_mode=True)[brain_name]
        states = env_info.vector_observations
        agent.reset()
        score = np.zeros((len(env_info.agents), 1))
        for t in range(max_t):
            actions = agent.act(states)
            env_info = env.step(actions)[brain_name]
            next_states = env_info.vector_observations
            rewards = env_info.rewards
            rewards = np.array(rewards).reshape((next_states.shape[0], 1))
            dones = env_info.local_done
            dones = np.array(dones).reshape((next_states.shape[0], 1))
            agent.step(states, actions, rewards, next_states, dones)
            score += rewards
            states = next_states
            if np.any(dones):
                break
        scores_deque.append(np.mean(score))
        scores.append(np.mean(score))
        print(
            "Episode {}\tAverage Score: {:.2f}\tScore: {:.2f}".format(
                i_episode, np.mean(scores_deque), scores[-1]),
            end="\r",
        )

        if i_episode % 100 == 0:
            print("\rEpisode {}\tAverage Score: {:.2f}".format(
                i_episode, np.mean(scores_deque)))
        if i_episode % save_every_nth == 0:
            save_checkpoint(
                state={
                    "episode": i_episode,
                    "actor_state_dict": agent.actor_local.state_dict(),
                    "critic_state_dict": agent.critic_local.state_dict(),
                    "scores_deque": scores_deque,
                    "scores": scores,
                },
                filename="checkpoint.pth",
            )
            plot_scores(
                scores=scores,
                title=f"Avg score over {len(env_info.agents)} agents",
                fname="avg_scores.png",
                savefig=True,
            )

        if np.mean(scores_deque) >= 30:
            torch.save(agent.actor_local.state_dict(), "checkpoint_actor.pth")
            torch.save(agent.critic_local.state_dict(),
                       "checkpoint_critic.pth")
            print(
                "\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}"
                .format(i_episode - 100, np.mean(scores_deque)))
            break
示例#6
0
    return model


def get_minibatch_grad(model, X_train, y_train):
    xs, hs, errs = [], [], []

    for x, cls_idx in zip(X_train, y_train):
        h, y_pred = forward(x, model)

        y_true = np.zeros(n_class)
        y_true[int(cls_idx)] = 1.

        err = y_true - y_pred

        # Accumulate the informations of minibatch
        # x: input
        # h: hidden state
        # err: gradient of output layer
        xs.append(x)
        hs.append(h)
        errs.append(err)

    # Backprop using the informations we get from the current minibatch
    return backward(model, np.array(xs), np.array(hs), np.array(errs))


model = make_network()
trained_model, accuracy_scores = SGD_Optimizer(model, X_train, y_train,
                                               minibatch_size)
utils.plot_scores(accuracy_scores)
示例#7
0
    batch_size = 50
    num_epochs = 100
    num_classes = 2
    hidden_units = 100
    hidden_units2 = 10
    dimensions = 2

    # PeaksData  da, SwissRollData, GMMData
    X_train, y_train, X_test, y_test = utils.get_data('PeaksData')
    X_train, y_train = shuffle(X_train, y_train)

    # gradient and jacobian tests
    grad_test_W(X_train, y_train)
    grad_test_b(X_train, y_train)
    jacobian_test_W(X_train, y_train)
    jacobian_test_b(X_train, y_train)
    grad_test_W_whole_network(X_train, y_train)
    grad_test_b_whole_network(X_train, y_train)

    model = models.MyNeuralNetwork()
    model.add(layers.Linear(dimensions, hidden_units))
    model.add(activations.ReLU())
    model.add(layers.Softmax(hidden_units, 5))
    optimizer = optimizers.SGD(model.parameters, lr=0.1)
    losses, train_accuracy, test_accuracy = model.fit(X_train, y_train, X_test,
                                                      y_test, batch_size,
                                                      num_epochs, optimizer)

    # plotting
    utils.plot_scores(train_accuracy, test_accuracy)
示例#8
0
def dqn_algorithm(agent,
                  env,
                  brain_name,
                  max_n_episodes=2000,
                  max_n_steps=1000,
                  epsilon_start=1.0,
                  epsilon_min=0.01,
                  epsilon_decay_rate=0.995):
    """Deep Q-Learning Agent.
    
    Parameters
    ----------
        max_n_episodes : int
            Maximum number of training episodes
        max_n_steps : int
            Maximum number of steps per episode
        epsilon_start : float
            Starting value of epsilon, for epsilon-greedy action selection
        epsilon_min : float)
            Minimum value of epsilon
        epsilon_decay_rate : float
            Multiplicative factor (per episode) for decreasing epsilon
    """
    all_scores = []
    last_100_scores = deque(maxlen=100)
    last_100_scores_rolling_means = []
    epsilon = epsilon_start
    # loop through episodes
    is_game_over = False
    episode_count = 1
    while not is_game_over:
        # observe state and initialize score
        state = env.reset(train_mode=True)[brain_name].vector_observations[0]
        score = 0
        # loop through steps within each episode
        is_episode_over = False
        agent.t_step = 1
        while not is_episode_over:
            # pick action
            action = agent.act(state, epsilon)
            # observe updated environment, reward and next state
            updated_env = env.step(action)[brain_name]
            next_state = updated_env.vector_observations[0]
            reward = updated_env.rewards[0]
            is_episode_over = updated_env.local_done[0]
            # update next state and add reward from step to episode score
            agent.step(state, action, reward, next_state, is_episode_over)
            state = next_state
            score += reward
            # if episode is over or max_n_steps reached, end loop
            # otherwise, do one more step
            is_episode_over = is_episode_over or (agent.t_step >= max_n_steps)
            agent.t_step += 1
        # anneal epsilon
        epsilon = max(epsilon_min, epsilon_decay_rate * epsilon)
        # keep track of most recent score
        last_100_scores.append(score)
        all_scores.append(score)
        last_100_scores_mean = np.mean(last_100_scores)
        last_100_scores_rolling_means.append(last_100_scores_mean)
        plot_scores(all_scores, last_100_scores_rolling_means, episode_count,
                    agent.buffer_size, agent.batch_size, agent.gamma,
                    agent.tau, agent.lr, agent.update_every,
                    agent.qnetwork_local)
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(
            episode_count, last_100_scores_mean),
              end="")
        completed_100_episodes = episode_count % 100 == 0
        if completed_100_episodes:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                episode_count, last_100_scores_mean))
        is_problem_solved = last_100_scores_mean >= 13.0
        if is_problem_solved:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'\
                  .format(episode_count, last_100_scores_mean))
            torch.save(agent.qnetwork_local.state_dict(), 'weights.pth')
        # if problem solved or max_n_episodes reached, end loop
        # otherwise, play one more episode
        is_game_over = is_problem_solved or (episode_count >= max_n_episodes)
        episode_count += 1
示例#9
0
文件: main.py 项目: maks-ym/evolclust
def main():
    args = parse_arguments(sys.argv[1:])

    print("Parameters:")
    for arg_ in args.sys_args:
        print(arg_)
    print()

    # read data
    # =========

    hapt_data = data.HAPT()
    hapt_data.load_all_data()
    hapt_data.aggregate_groups()

    exp_data = hapt_data.get_train_data()
    exp_labs = hapt_data.get_train_labels()
    exp_labels_map = hapt_data.get_labels_map()
    exp_centroids_num = len(hapt_data.get_labels_map())

    if args.data == "test":
        exp_data = hapt_data.get_test_data()
        exp_labs = hapt_data.get_test_labels()
        exp_centroids_num = len(hapt_data.get_labels_map())

    if args.aggregate:
        exp_labs = hapt_data.get_aggregated_train_labels()
        exp_labels_map = hapt_data.get_aggregated_labels_map()
        exp_centroids_num = len(hapt_data.get_aggregated_labels_map())
        if args.data == "test":
            exp_labs = hapt_data.get_aggregated_test_labels()

    # Show experiment data
    # ====================

    if args.showdata:
        utils.plot_clusters(exp_data, exp_labs, exp_labels_map, True)
        return

    # evolution
    # =========

    iterations_list, scores_list, populations_list, total_time_list, log_dir_list, best_indiv_idx_list = [],[],[],[],[],[]
    best_overall = (-1, 0, 0, 0
                    )  # score, experiment, generation (iteration), individual

    for exp_i in range(args.repeat):
        iterations, scores, populations, total_time, log_dir, best_indiv_idx = evolution.run_SGA(
            args.iter_num,
            exp_data,
            exp_labs,
            args.pop_num,
            args.prob_cross,
            args.prob_mutation,
            exp_centroids_num,
            args.adapt_function,
            args.dist_measure,
            log_dir="logs",
            loggin_pref="exp {}/{}: ".format(exp_i + 1, args.repeat))
        cur_best_score = scores[best_indiv_idx[0], best_indiv_idx[1]]
        if best_overall[0] < cur_best_score:
            best_overall = (cur_best_score, exp_i, best_indiv_idx[0],
                            best_indiv_idx[1])

        iterations_list.append(iterations)
        scores_list.append(scores)
        populations_list.append(populations)
        total_time_list.append(total_time)
        log_dir_list.append(log_dir)
        best_indiv_idx_list.append(best_indiv_idx)

        # save plot
        plot_tuple = ("pop:" + str(args.pop_num), "p_c:" +
                      str(args.prob_cross), "p_m:" + str(args.prob_mutation),
                      "data size:" + str(len(exp_labs)), args.adapt_function,
                      args.dist_measure)
        utils.plot_scores(iterations,
                          scores,
                          args.adapt_function,
                          plot_tuple,
                          to_file=True,
                          out_dir=log_dir)

    # visualize
    # =========
    if 1 < args.repeat:
        plot_tuple = ("pop:" + str(args.pop_num), "p_c:" +
                      str(args.prob_cross), "p_m:" + str(args.prob_mutation),
                      "data size:" + str(len(exp_labs)), args.adapt_function,
                      args.dist_measure)
        utils.plot_avg_scores(iterations_list,
                              scores_list,
                              args.adapt_function,
                              best_indiv_idx_list,
                              plot_tuple,
                              to_file=True,
                              out_dirs=log_dir_list)
示例#10
0
        reward = torch.tensor([reward], device=device)

        # Observe new state
        if not done:
            next_state = get_screen(env).to(device)
        else:
            next_state = None

        # Store the transition in memory
        memory.push(state, action, next_state, reward)

        # Move to the next state
        state = next_state

        # Perform one step of the optimization (on the target network)
        optimize_model(device, pred_net, target_net, optimizer, memory)
        steps += 1

        if steps == TARGET_UPDATE:  # update the target net weights
            steps = 0
            target_net.load_state_dict(pred_net.state_dict())

    plot_scores(episode_rewards)

print('Done')
env.render()
env.close()
plt.ioff()
plt.show()
示例#11
0
def main():
    args = parse_arguments(sys.argv[1:])

    # read params
    # ===========
    # possible params:
    # iter_num, pop_num, centers_num, prob_cross, prob_mutation, data shape, labs shape,
    # adapt_function, dist_measure, log_dir, best score, best score (index), total_time

    exp_params = {}
    text_file = [f for f in os.listdir(args.path) if f.endswith(".txt")][0]
    with open(os.path.join(args.path, text_file), "r") as text_f:
        for line in text_f:
            line = line.replace("\t", "").strip().split(":")
            if len(line) == 2 and line[0] != "" and line[1] != "":
                if line[0] == "iter_num" or line[0] == "pop_num" or line[
                        0] == "centers_num":
                    exp_params[line[0].replace(" ", "_")] = int(line[1])
                elif line[0] == "prob_cross" or line[
                        0] == "prob_mutation" or line[0] == "best score":
                    exp_params[line[0].replace(" ", "_")] = float(line[1])
                elif line[0] == "data shape" or line[0] == "labs shape":
                    exp_params[line[0].replace(" ", "_")] = make_tuple(line[1])
                elif line[0] == "best score (index)":
                    #best score (index):	generation 95, individual 99
                    line[1] = line[1].strip().split(",")
                    exp_params["best_index"] = (
                        int(line[1][0].strip().split(" ")[1]),
                        int(line[1][1].strip().split(" ")[1]))
                else:
                    exp_params[line[0].replace(" ", "_")] = line[1]

    print("\nexperiment parameters were:")
    for k, v in exp_params.items():
        print("{:20}: {}".format(k, v))

    # read results
    # ============

    generations = np.load(os.path.join(args.path, "generations.npy"))
    iterations = np.load(os.path.join(args.path, "iterations.npy"))
    scores = np.load(os.path.join(args.path, "scores.npy"))

    best_centers = generations[exp_params["best_index"][0],
                               exp_params["best_index"][1]]

    print("\nobtained results are:")
    print(
        "generations (total num, pop size, centrs num, feats num): {}".format(
            generations.shape))
    print(
        "iterations (iterations num, ):                            {}".format(
            iterations.shape))
    print(
        "scores (total num, pop size):                             {}".format(
            scores.shape))
    print(
        "generations total num, iterations num and scores total num must be equal!"
    )
    print("generations pop size and scores pop size must be equal too!")

    plot_tuple = ("pop:" + str(exp_params["pop_num"]),
                  "p_c:" + str(exp_params["prob_cross"]),
                  "p_m:" + str(exp_params["prob_mutation"]),
                  "data size:" + str(len(exp_params["data_shape"])),
                  exp_params["adapt_function"], exp_params["dist_measure"],
                  "best score:" + str(exp_params["best_score"])[:9] + " at " +
                  str(exp_params["best_index"]))
    utils.plot_scores(iterations,
                      scores,
                      exp_params["adapt_function"],
                      plot_tuple,
                      not args.nooutput,
                      out_dir=args.outdir)

    # read data
    # =========
    print("reading data...")
    hapt_data = data.HAPT()
    hapt_data.load_all_data()
    hapt_data.aggregate_groups()

    test_data = hapt_data.get_test_data()
    test_labs = hapt_data.get_test_labels()
    train_data = hapt_data.get_train_data()
    train_labs = hapt_data.get_train_labels()
    labs_map = hapt_data.get_labels_map()
    if exp_params["centers_num"] == 3:
        test_labs = hapt_data.get_aggregated_test_labels()
        train_labs = hapt_data.get_aggregated_train_labels()
        labs_map = hapt_data.get_aggregated_labels_map()
    centroids_num = len(labs_map)

    assert exp_params["centers_num"] == centroids_num

    # do clusterizations
    # ==================
    print("clustering...")
    labels_names = list(labs_map.values())
    # train data
    train_clust_labs = cluster.Centroids.cluster(
        train_data, best_centers, dist_func=exp_params["dist_measure"])
    train_clust_labs = cluster.Utils.adjust_labels(train_clust_labs,
                                                   train_labs)
    train_silh = cluster.Evaluate.silhouette(train_data, train_clust_labs,
                                             exp_params["dist_measure"])
    train_silh_normalized = (train_silh + 1) / 2
    train_info_gain = cluster.Evaluate.information_gain(
        train_labs, train_clust_labs)
    mapped_train_clust_labs = [labs_map[l] for l in train_clust_labs]
    mapped_train_labs = [labs_map[l] for l in train_labs]
    train_conf_mtx = confusion_matrix(mapped_train_labs,
                                      mapped_train_clust_labs,
                                      labels=labels_names)
    print("train set\tsilh: {:.6}, silh normalized: {:.6}, info gain: {:.6}".
          format(train_silh, train_silh_normalized, train_info_gain))
    # test data
    test_clust_labs = cluster.Centroids.cluster(
        test_data, best_centers, dist_func=exp_params["dist_measure"])
    test_clust_labs = cluster.Utils.adjust_labels(test_clust_labs, test_labs)
    test_silh = cluster.Evaluate.silhouette(test_data, test_clust_labs,
                                            exp_params["dist_measure"])
    test_silh_normalized = (test_silh + 1) / 2
    test_info_gain = cluster.Evaluate.information_gain(test_labs,
                                                       test_clust_labs)
    mapped_test_clust_labs = [labs_map[l] for l in test_clust_labs]
    mapped_test_labs = [labs_map[l] for l in test_labs]
    test_conf_mtx = confusion_matrix(mapped_test_labs,
                                     mapped_test_clust_labs,
                                     labels=labels_names)
    print("test set\tsilh: {:.6}, silh normalized: {:.6}, info gain: {:.6}".
          format(test_silh, test_silh_normalized, test_info_gain))

    # Show data
    # =========
    print("creating plots...")
    # clusters
    utils.plot_clusters(train_data,
                        train_labs,
                        labs_map,
                        True,
                        out_dir=args.outdir,
                        filename="train_orig_clusters")
    utils.plot_clusters(train_data,
                        train_clust_labs,
                        labs_map,
                        True,
                        out_dir=args.outdir,
                        filename="train_obtained_clusters")
    utils.plot_clusters(test_data,
                        test_labs,
                        labs_map,
                        True,
                        out_dir=args.outdir,
                        filename="test_orig_clusters")
    utils.plot_clusters(test_data,
                        test_clust_labs,
                        labs_map,
                        True,
                        out_dir=args.outdir,
                        filename="test_obtained_clusters")

    # confusion matrices
    utils.plot_confusion_matrix(
        train_conf_mtx,
        labels_names,
        normalize=False,
        title=
        'Confusion matrix\ntrain set\n(silh: {:.6}, silh normalized: {:.6}, info gain: {:.6})'
        .format(train_silh, train_silh_normalized, train_info_gain),
        cmap=plt.cm.Blues,
        out_dir=args.outdir,
        filename="train_conf_matr_silh_info_gain")
    utils.plot_confusion_matrix(
        test_conf_mtx,
        labels_names,
        normalize=False,
        title=
        'Confusion matrix\ntest set\n(silh: {:.6}, silh normalized: {:.6}, info gain: {:.6})'
        .format(test_silh, test_silh_normalized, test_info_gain),
        cmap=plt.cm.Blues,
        out_dir=args.outdir,
        filename="test_conf_matr_silh_info_gain")
    print("inference ended")
示例#12
0
            states = next_states
            episode_reward += rewards

            if np.any(dones):
                break

        agent1_reward.append(episode_reward[0])
        agent2_reward.append(episode_reward[1])

        if i_episode % print_every == 0:
            avg_rewards = [np.mean(agent1_reward[-100:]), np.mean(agent2_reward[-100:])]
            print("\rEpisode {} - \tAverage Score: {:.2f} {:.2f}".format(i_episode, avg_rewards[0], avg_rewards[1]),
                  end="")

            torch.save(agent1.actor_local.state_dict(), 'agent1_actor_checkpoint.pth')
            torch.save(agent1.critic_local.state_dict(), 'agent1_critic_checkpoint.pth')

            torch.save(agent2.actor_local.state_dict(), 'agent2_actor_checkpoint.pth')
            torch.save(agent2.critic_local.state_dict(), 'agent2_critic_checkpoint.pth')

    return {'agent1_scores': agent1_reward, 'agent2_scores': agent2_reward}

scores = ddpg()

env.close()

plot_scores(scores['agent1_scores'])
plot_scores(scores['agent2_scores'])

max_scores = [max(scores['agent1_scores'][i], scores['agent2_scores'][i]) for i in range(len(scores['agent1_scores']))]
示例#13
0
    scores, best_model, best_score = tune_learning_rate(
        X_train, Y_train, X_test, Y_test, best_model, best_score, activation)
    scores_3, best_model, best_score = tune_model(X_train, Y_train, X_test,
                                                  Y_test, best_model,
                                                  best_score, activation)

    fig, ax = plt.subplots(2, 5, figsize=(30, 10))
    fig.tight_layout(pad=5.0)
    fig.subplots_adjust(left=0.062,
                        right=0.97,
                        bottom=0.148,
                        top=0.88,
                        wspace=0.34,
                        hspace=0.383)

    plot_scores(scores, mapper, ax)
    plot_scores_3(scores_3, mapper_3, ax)

    print('\n\
        #########################################\n\
        ##                                     ##\n\
        ##          BEST MODEL FOUND           ##\n\
        ##       (HYPER PARAMETER TUNING)      ##\n\
        ##                                     ##\n\
        #########################################\n\
        \n')
    pprint(best_model.get_params())
    print('\nTrain Accuracy:\t{:0.3f}'.format(clf.score(X_train, Y_train)))
    print('\nTest  Accuracy:\t{:0.3f}\n\n'.format(clf.score(X_test, Y_test)))

    print("Time elapsed  =  {} s\n".format(time.time() - start))
示例#14
0
def save_checkpoint(agent, scores_episodes, scores_window):
    utils.plot_scores(args.checkpoint_prefix + "_reward_history_plot.png",
                      scores_episodes)
    utils.plot_scores(args.checkpoint_prefix + "_reward_plot.png",
                      scores_window)
    agent.save_checkpoint()
示例#15
0
            agent.actions = agent.act(add_noise=True)
            agent.rewards, agent.next_states, agent.dones = env.step(
                agent.actions)
            agent.step()
            agent.states = agent.next_states

        scores.append(agent.scores.mean())
        scores_window.append(agent.scores.mean())

        if ep % print_every == 0:
            print('Episode %d, avg score: %.2f' % (ep, agent.scores.mean()))

        if np.mean(scores_window) >= 30:
            print(
                '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'
                .format(ep - 100, np.mean(scores_window)))
            torch.save(agent.actor.state_dict(),
                       'checkpoints/reacher_%s_actor_checkpoint.pth' % model)
            torch.save(agent.critic.state_dict(),
                       'checkpoints/reacher_%s_critic_checkpoint.pth' % model)

    env.close()

    return scores, agent


if __name__ == '__main__':
    model_name = 'DDPG'
    scores, agent = train_agent(300, model_name)
    plot_scores(scores, model_name)
示例#16
0
def by_window(config):
    result = apply_algorithm(by_window_func, config)
    plot_scores(result, config)
示例#17
0
# Let's explore the enviroment with random acitions
#run_gym(env)

from agent import DQNAgent

state_size = env.observation_space.shape[0]
action_size = env.action_space.n

# Instantiate agent
agent = DQNAgent(
    state_size=state_size,
    action_size=action_size,
    #                 use_double=True,
    #                 use_dueling=True,
    #                 use_priority=True,
    use_noise=True,
    seed=42)

agent.summary()

# Let's watch an untrained agent
#run_gym(env, get_action=lambda state: agent.act(state))

scores = train_agent(agent, env)

plot_scores(scores, 'NoisyNets Deep Q-Network', polyfit_deg=6)

#agent.load_weights('prioritized_local_weights.pth')

run_gym(env, get_action=lambda state: agent.act(state), max_t=1000)