Пример #1
0
def prune_benchmark():
    logger = get_logger("prune_pong_agent_benchmark")
    prune_model = DQNPacman(input_size=prune_config.input_size,
                            output_size=prune_config.output_size,
                            model_path=prune_config.model_path,
                            scope=prune_config.scope,
                            epsilon_stop=prune_config.final_epsilon,
                            epsilon=prune_config.initial_epsilon,
                            pruning_end=prune_config.pruning_end,
                            pruning_freq=prune_config.pruning_freq,
                            sparsity_end=prune_config.sparsity_end,
                            target_sparsity=prune_config.target_sparsity,
                            prune_till_death=True)
    target_model = PacmanTargetNet(input_size=dense_config.input_size,
                                   output_size=dense_config.output_size)
    logger.info("loading models")
    print("loading models")
    target_model.load_model(dense_config.ready_path)
    prune_model.load_model(dense_config.ready_path)
    prune_model.change_loss_to_benchmark_loss()
    prune_model.reset_global_step()
    logger.info("Commencing iterative pruning")
    sparsity_vs_accuracy = iterative_pruning(logger,
                                             prune_model,
                                             target_model,
                                             prune_config.n_epoch,
                                             benchmarking=True)
    print("benchmark finished")
    plot_graph(sparsity_vs_accuracy,
               "sparsity_vs_accuracy_benchmark",
               figure_num=1)
Пример #2
0
def main():
    prune_model = SimpleNet(input_size=prune_config.input_size,
                            output_size=prune_config.output_size,
                            model_path=FLAGS.model_path,
                            pruning_start=0,
                            pruning_end=100000,
                            target_sparsity=FLAGS.goal_sparsity,
                            dropout=FLAGS.dropout,
                            pruning_freq=FLAGS.prune_freq,
                            initial_sparsity=prune_config.initial_sparsity,
                            sparsity_start=prune_config.sparsity_start,
                            sparsity_end=FLAGS.end_sparsity,
                            scope='SimpleNetPruned')
    prune_model.load_model(FLAGS.ready_path)
    prune_model.reset_global_step()
    # important to allow a stable pruning, the pruning mechanism takes the global step as a parameter
    sparsity_vs_accuracy = prune_model.fit(
        n_epochs=FLAGS.n_epochs,
        learning_rate_schedule=prune_config.learning_rate_schedule,
        batch_size=FLAGS.batch_size,
        prune=True,
        config=prune_config)
    # the fit function allow gentle pruning along with fine-tuning to reach maximum sparsity and accuracy possible
    plot_graph(sparsity_vs_accuracy, "sparsity_vs_accuracy_simplenet")
    prune_model.load_model(FLAGS.best_path)
    plot_conv_weights(prune_model, title='after')
    prune_model.sess.close()
Пример #3
0
def testEnv():
    env = Env()
    channelThroughPut = 0  # fraction of time that packets are successfully delivered over the channel
    # i.e no collisions or idle time slots
    for iteration in range(config.Iterations):
        for t in range(config.TimeSlots):
            initialState = env.reset()
            for user in range(config.N):
                action = slottedAlohaProtocol()
                env.step(action=action, user=user)
                # each user changes the inner state of the environment where the environment uses the inner state
                # in order to keep track on the channels and the ACK signals for each user
            nextStateForEachUser, rewardForEachUser = env.getNextState()
            # if a reward is one that means that a packets was successfully delivered over the channel
            # the sum has a maximum of the number of channels -> config.K
            channelThroughPut = channelThroughPut + np.sum(rewardForEachUser)
    # measuring the expected value
    channelThroughPut = channelThroughPut / (config.Iterations *
                                             config.TimeSlots)
    print("Channel Utilization average {}".format(channelThroughPut))
    ToPlotX = range(config.Iterations * config.TimeSlots)
    ToPlotY = np.ones_like(ToPlotX) * channelThroughPut
    plot_graph(data=[ToPlotX, ToPlotY],
               filename="Aloha",
               title="Aloha",
               xlabel="Time slot",
               ylabel="Average channel utilization",
               legend="SlottedAloha")


#
#
# def testTimeEnv():
#     env = TimeDependentEnv()
#     channelThroughPut = 0  # fraction of time that packets are successfully delivered over the channel
#     # i.e no collisions or idle time slots
#     for iteration in range(config.Iterations):
#         TimeSPU = env.reset()
#         for t in range(config.TimeSlots):
#             env.resetTimeStep()
#             #  reset the internal state of the environment
#             #  which keep tracks of the users actions through out the time step
#             for user in range(config.N):
#                 action = slottedAlohaProtocol()
#                 env.step(action=action, user=user)
#                 # each user changes the inner state of the environment where the environment uses the inner state
#                 # in order to keep track on the channels and the ACK signals for each user
#             nextStateForEachUser, rewardForEachUser = env.tstep(timestep=t)
#             # if a reward is one that means that a packets was successfully delivered over the channel
#             # the sum has a maximum of the number of channels -> config.K
#             channelThroughPut = channelThroughPut + np.sum(rewardForEachUser)
#     # measuring the expected value
#     channelThroughPut = channelThroughPut / (config.Iterations * config.TimeSlots)
#     print("Channel Utilization average {}".format(channelThroughPut))
#     ToPlotX = range(config.Iterations * config.TimeSlots)
#     ToPlotY = np.ones_like(ToPlotX) * channelThroughPut
#     plot_graph(data=[ToPlotX, ToPlotY], filename="Aloha", title="Aloha",
#                xlabel="Time slot", ylabel="Average channel utilization", legend="SlottedAloha")
Пример #4
0
def main():
    #   ----------------- Setting initial variables Section -----------------
    logger = get_logger(FLAGS.PoPS_dir + "/PoPS_ITERATIVE")
    logger.info(" ------------- START: -------------")
    logger.info("Setting initial data structures")
    accuracy_vs_size = [[], []]
    logger.info("Loading models")
    teacher = CartPoleDQNTarget(input_size=dense_config.input_size,
                                output_size=dense_config.output_size)
    teacher.load_model(path=FLAGS.teacher_path)  # load teacher
    logger.info("----- evaluating teacher -----")
    print("----- evaluating teacher -----")
    teacher_score = evaluate(agent=teacher, n_epoch=FLAGS.eval_epochs)
    logger.info("----- teacher evaluated with {} ------".format(teacher_score))
    print("----- teacher evaluated with {} -----".format(teacher_score))
    prune_step_path = FLAGS.PoPS_dir + "/prune_step_"
    policy_step_path = FLAGS.PoPS_dir + "/policy_step_"
    initial_path = policy_step_path + "0"
    logger.info(
        "creating policy step 0 model, which is identical in size to the original model"
    )
    copy_weights(
        output_path=initial_path,
        teacher_path=FLAGS.teacher_path)  # inorder to create the initial model
    compressed_agent = StudentCartpole(
        input_size=student_config.input_size,
        output_size=student_config.output_size,
        model_path=initial_path,
        tau=student_config.tau,
        pruning_freq=student_config.pruning_freq,
        sparsity_end=student_config.sparsity_end,
        target_sparsity=student_config.target_sparsity)
    compressed_agent.load_model()
    initial_size = compressed_agent.get_number_of_nnz_params()
    accuracy_vs_size[0].append(initial_size)
    accuracy_vs_size[1].append(teacher_score)
    initial_number_of_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer(
    )
    initial_number_of_nnz = sum(initial_number_of_params_at_each_layer)
    converge = False
    iteration = 0
    convergence_information = deque(maxlen=2)
    convergence_information.append(100)
    precent = 100
    arch_type = 0
    last_measure = initial_size
    while not converge:
        iteration += 1
        print("-----  Pruning Step {} -----".format(iteration))
        logger.info(" -----  Pruning Step {} -----".format(iteration))
        path_to_save_pruned_model = prune_step_path + str(iteration)
        #   ----------------- Pruning Section -----------------
        if arch_type == 2:
            arch_type = 3  # special arch_type for prune-oriented learning rate
        sparsity_vs_accuracy = iterative_pruning_policy_distilliation(
            logger=logger,
            agent=compressed_agent,
            target_agent=teacher,
            iterations=FLAGS.iterations,
            config=student_config,
            best_path=path_to_save_pruned_model,
            arch_type=arch_type,
            lower_bound=student_config.LOWER_BOUND,
            accumulate_experience_fn=accumulate_experience_cartpole,
            evaluate_fn=evaluate,
            objective_score=student_config.OBJECTIVE_SCORE)
        plot_graph(data=sparsity_vs_accuracy,
                   name=FLAGS.PoPS_dir +
                   "/initial size {}%,  Pruning_step number {}".format(
                       precent, iteration),
                   figure_num=iteration)

        # loading model which has reasonable score with the highest sparsity
        compressed_agent.load_model(path_to_save_pruned_model)
        #   ----------------- Measuring redundancy Section -----------------
        # the amount of parameters that are not zero at each layer
        nnz_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer(
        )
        # the amount of parameters that are not zero
        nnz_params = sum(nnz_params_at_each_layer)
        # redundancy is the parameters we dont need, nnz_params / initial is the params we need the opposite
        redundancy = (1 - nnz_params / initial_number_of_nnz) * 100
        print(
            "-----  Pruning Step {} finished, got {}% redundancy in net params -----"
            .format(iteration, redundancy))
        logger.info(
            "-----  Pruning Step {} finished , got {}% redundancy in net params -----"
            .format(iteration, redundancy))
        logger.info(
            "-----  Pruning Step {} finished with {} NNZ params at each layer".
            format(iteration, nnz_params_at_each_layer))
        print(
            " -----  Evaluating redundancy at each layer Step {}-----".format(
                iteration))
        logger.info(
            " -----  Evaluating redundancy at each layer Step {} -----".format(
                iteration))
        redundancy_at_each_layer = calculate_redundancy(
            initial_nnz_params=initial_number_of_params_at_each_layer,
            next_nnz_params=nnz_params_at_each_layer)
        logger.info(
            "----- redundancy for each layer at step {} is {} -----".format(
                iteration, redundancy_at_each_layer))
        if iteration == 1:
            redundancy_at_each_layer = [
                0.83984375, 0.8346405029296875, 0.83795166015625, 0.83984375
            ]
        #   ----------------- Policy distillation Section -----------------
        print(
            " -----  Creating Model with size according to the redundancy at each layer ----- "
        )
        logger.info(
            "----- Creating Model with size according to the redundancy at each layer -----"
        )
        policy_distilled_path = policy_step_path + str(iteration)
        # creating the compact model where every layer size is determined by the redundancy measure
        compressed_agent = StudentCartpole(
            input_size=student_config.input_size,
            output_size=student_config.output_size,
            model_path=policy_distilled_path,
            tau=student_config.tau,
            redundancy=redundancy_at_each_layer,
            pruning_freq=student_config.pruning_freq,
            sparsity_end=student_config.sparsity_end,
            target_sparsity=student_config.target_sparsity,
            last_measure=last_measure)
        nnz_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer(
        )
        logger.info(
            "-----  Step {} ,Created Model with {} NNZ params at each layer".
            format(iteration, nnz_params_at_each_layer))
        iterative_size = compressed_agent.get_number_of_nnz_params()
        last_measure = iterative_size
        precent = (iterative_size / initial_size) * 100
        convergence_information.append(precent)
        print(
            " ----- Step {}, Created Model with size {} which is {}% from original size ----- "
            .format(iteration, iterative_size, precent))
        logger.info(
            "----- Created Model with size {} which is {}% from original size -----"
            .format(iterative_size, precent))
        # scheduling the right learning rate for the size of the model
        if precent > 40:
            arch_type = 0
        elif 10 <= precent <= 40:
            arch_type = 1
        else:
            arch_type = 2
        print(" -----  policy distilling Step {} ----- ".format(iteration))
        logger.info("----- policy distilling Step {} -----".format(iteration))
        fit_supervised(logger=logger,
                       arch_type=arch_type,
                       student=compressed_agent,
                       teacher=teacher,
                       n_epochs=FLAGS.n_epoch,
                       evaluate_fn=evaluate,
                       accumulate_experience_fn=accumulate_experience_cartpole,
                       lower_score_bound=student_config.LOWER_BOUND,
                       objective_score=student_config.OBJECTIVE_SCORE)

        policy_distilled_score = evaluate(agent=compressed_agent,
                                          n_epoch=FLAGS.eval_epochs)
        compressed_agent.reset_global_step()
        print(
            " -----  policy distilling Step {} finished  with score {} ----- ".
            format(iteration, policy_distilled_score))
        logger.info(
            "----- policy distilling Step {} finished with score {}  -----".
            format(iteration, policy_distilled_score))
        # checking convergence
        converge = check_convergence(convergence_information)
        # for debugging purposes
        accuracy_vs_size[0].append(iterative_size)
        accuracy_vs_size[1].append(policy_distilled_score)

    plot_graph(data=accuracy_vs_size,
               name=FLAGS.PoPS_dir + "/accuracy_vs_size",
               figure_num=iteration + 1,
               xaxis='NNZ params',
               yaxis='Accuracy')
def main():
    #variables used for models logics
    raw_scores, states, actions_booleans = [BEGINING_SCORE], [], []
    episode_number = 0
    update_weights = False  # if too much time passed, update the weights even if the game is not finished
    grads_sums = get_empty_grads_sums(
    )  # initialize the gradients holder for the trainable variables

    #variables for debugging:
    manual_prob_use = 0  #consider use the diffrences from 1
    prob_deviation_sum = 0
    default_data_counter = 0  # counts number of exceptions in reading the observations' file (and getting a default data)
    step_counter = 0  #for tests

    #variables for evaluation:
    best_average_score = 0
    current_average_score = 0
    average_scores_along_the_game = []

    with tf.Session() as sess:
        sess.run(init)
        #sess.run(init2)     #check if this necessary

        # check if file is not empty
        if (os.path.isfile(WEIGHTS_FILE) and LOAD_WEIGHTS):
            # Load with shmickle
            with open(WEIGHTS_FILE, 'rb') as f:  # BEST_WEIGHTS
                for var, val in zip(tvars, pkl.load(f)):
                    sess.run(tf.assign(var, val))
            print("loaded weights successfully!")

        # creates file if it doesn't exisits:
        if (not os.path.isfile(WEIGHTS_FILE)):
            open(WEIGHTS_FILE, 'a').close()
        if (not os.path.isfile(BEST_WEIGHTS)):
            open(BEST_WEIGHTS, 'a').close()
            print("created weights file sucessfully!")

        while episode_number < MAX_GAMES:
            start_time = time.time()
            #get data and process score to reward
            obsrv, score, bonus, is_dead, request_id, default_obsrv, AI_action, AI_accel = get_observation(
            )  # get observation

            #if from some reason the bot died and the message for it got lost, we check it here.
            if (not is_dead):
                is_dead = check_if_died(raw_scores[-1], score)

            #if default takes the score from last step
            if (score == 0):
                score = raw_scores[-1]

            raw_scores.append(score)

            #FOR DEBUGGING
            #is_dead = False
            default_data_counter += default_obsrv
            #vars = sess.run(tvars)

            # append the relevant observation to the following action, to states
            states.append(obsrv)
            # Run the policy network and get a distribution over actions
            action_probs = sess.run(actions_probs,
                                    feed_dict={observations: [obsrv]})

            # if - exploration, else - explotation
            if (np.random.binomial(1, EPSILON_FOR_EXPLORATION, 1)[0]):
                chosen_actions = pick_action_uniformly(action_probs[0])
                logger.write_to_log('Tried exploration!')
            else:
                # np.random.multinomial cause problems
                try:
                    chosen_actions = np.random.multinomial(1, action_probs[0])
                except:
                    chosen_actions = pick_random_action_manually(
                        action_probs[0])
                    manual_prob_use += 1
                    prob_deviation_sum += np.abs(np.sum(action_probs) - 1)

            # Saves the selected action for a later use
            actions_booleans.append(chosen_actions)
            # index of the selected action
            action = np.argmax(actions_booleans[-1])

            #FOR DEBUGGING
            '''
            #print("action_probs: " + str(action_probs))
            print("observation got: " + str(obsrv))
            print("action chosen: " + str(action))
            print("manual_prob_use: " + str(manual_prob_use))
            print("prob_deviation_sum: " + str(prob_deviation_sum))
            print("default_data_counter: " + str(default_data_counter))
            print("step_counter: "+str(step_counter))
            '''

            if (step_counter % WRITE_TO_LOG == 0):
                #logger.write_to_log("observation got: " + str(obsrv))
                logger.write_to_log("action_probs: " + str(action_probs))
                logger.write_to_log("action chosen: " + str(action))

            # step the environment and get new measurements
            send_action(action, request_id)
            # add reward to rewards for a later use in the training step
            #rewards.append(reward)
            step_counter += 1  #this is for tests

            if (step_counter % STEPS_UNTIL_BACKPROP == 0):
                update_weights = True
            #So the model won't read the same frame many times
            time.sleep(0.25)

            if (is_dead or update_weights) and len(raw_scores) > 2:
                #UPDATE MODEL:

                #calculate rewards from raw scores:
                #processed_rewards = calc_reward_from_raw(raw_scores,is_dead)
                processed_rewards = get_reward(raw_scores, is_dead)
                #processed_rewards = raw_score_reward(raw_scores,is_dead)

                #FOR DEBUGGING:
                if (is_dead):
                    print('just died!')
            #     print("processed_rewards: " + str(processed_rewards))
            #logger.write_to_log("raw_score: " +str(raw_scores))
                logger.write_to_log("processed_rewards: " +
                                    str(processed_rewards))

                #'''
                # create the rewards sums of the reversed rewards array
                rewards_sums = np.cumsum(processed_rewards[::-1])
                # normalize prizes and reverse
                rewards_sums = decrese_rewards(rewards_sums[::-1])
                rewards_sums -= np.mean(rewards_sums)
                rewards_sums = np.divide(rewards_sums, np.std(rewards_sums))
                #logger.write_to_log("rewards_sums: " + str(rewards_sums))
                #'''

                modified_rewards_sums = np.reshape(
                    rewards_sums, [1, len(processed_rewards)])
                # modify actions_booleans to be an array of booleans
                actions_booleans = np.array(actions_booleans)
                actions_booleans = actions_booleans == 1

                #FOR DEBUGGING:
                '''
                fa_res = sess.run(filtered_actions, feed_dict={observations: states, actions_mask: actions_booleans,
                                       rewards_arr: modified_rewards_sums})
                pi_res = sess.run(pi, feed_dict={observations: states, actions_mask: actions_booleans,
                                                       rewards_arr: modified_rewards_sums})
                loss_res = sess.run(loss, feed_dict={observations: states, actions_mask: actions_booleans,
                                                       rewards_arr: modified_rewards_sums})

                logger.write_to_log("filtered_actions: "+ str(fa_res))
                '''

                # gradients for current episode
                grads = sess.run(Gradients,
                                 feed_dict={
                                     observations: states,
                                     actions_mask: actions_booleans,
                                     rewards_arr: modified_rewards_sums
                                 })
                grads_sums += np.array(grads)

                episode_number += 1
                update_weights = False

                #evaluation:
                current_average_score = np.average(raw_scores)
                average_scores_along_the_game.append(current_average_score)
                logger.write_to_log("average score after " +
                                    str(step_counter) + ' steps: ' +
                                    str(current_average_score))

                #nullify step_counter:
                step_counter = 0

                # Do the training step
                if (episode_number % BATCH_SIZE == 0):
                    #if (episode_number % WRITE_TO_LOG == 0):
                    #logger.write_to_log("learned variables: "+str(vars[0]))
                    print("taking the update step")
                    grad_dict = {
                        Gradients_holder[i]: grads_sums[i]
                        for i in range(VAR_NO)
                    }
                    #TODO choose learning rate?
                    # take the train step
                    sess.run(train_step, feed_dict=grad_dict)
                    #nullify grads_sum
                    grads_sums = get_empty_grads_sums()
                # evaluate and save:
                if (best_average_score < current_average_score):
                    best_average_score = current_average_score
                    # save with shmickle
                    with open(BEST_WEIGHTS, 'wb') as f:
                        pkl.dump(sess.run(tvars), f, protocol=2)
                    print('Saved best weights successfully!')
                    # print('Current best result for %d episodes: %f.' % (episode_number, best_average_score))
                    logger.write_to_log('Saved best weights successfully!')
                    logger.write_to_log('Current best result for ' +
                                        str(episode_number) + ' episodes: ' +
                                        str(best_average_score))

                # manual save
                with open(WEIGHTS_FILE, 'wb') as f:
                    pkl.dump(sess.run(tvars), f, protocol=2)
                #print('auto-saved weights successfully.')

                # nullify relevant vars and updates episode number.
                raw_scores, states, actions_booleans = [BEGINING_SCORE], [], []
                manual_prob_use = 0

                wait_for_game_to_start()

                logger.write_spacer()

    plot_graph(average_scores_along_the_game,
               "Policy Gradient Average Score During Game",
               "PG_avg_score_during_game.png", "Epsisodes No.",
               "Average Score")
Пример #6
0
                env.step(action=action, user=user)
                # each user changes the inner state of the environment where the environment uses the inner state
                # in order to keep track on the channels and the ACK signals for each user
            nextStateForEachUser, rewardForEachUser = env.getNextState()
            # if a reward is one that means that a packets was successfully delivered over the channel
            # the sum has a maximum of the number of channels -> config.K
            channelThroughPut = channelThroughPut + np.sum(rewardForEachUser)
    # measuring the expected value
    channelThroughPut = channelThroughPut / (config.Iterations *
                                             config.TimeSlots)
    print("Channel Utilization average {}".format(channelThroughPut))
    ToPlotX = range(config.Iterations * config.TimeSlots)
    ToPlotY = np.ones_like(ToPlotX) * channelThroughPut
    plot_graph(data=[ToPlotX, ToPlotY],
               filename="Aloha",
               title="Aloha",
               xlabel="Time slot",
               ylabel="Average channel utilization",
               legend="SlottedAloha")


def testEnv():
    env = Env()
    channelThroughPut = 0  # fraction of time that packets are successfully delivered over the channel
    # i.e no collisions or idle time slots
    for iteration in range(config.Iterations):
        for t in range(config.TimeSlots):
            initialState = env.reset()
            for user in range(config.N):
                action = slottedAlohaProtocol()
                env.step(action=action, user=user)
                # each user changes the inner state of the environment where the environment uses the inner state
Пример #7
0
        # Loop over k steps
        while agent.step_number < MAX_STEPS:
            agent.take_one_step()
            #write to log and add to plot array:
            if (agent.step_number % agent.WRITE_TO_LOG_EVERY == 0):
                avg_scores_per_step.append(np.average(agent.last_raw_scores))
                logger.write_to_log("avg_scores_per_step" +
                                    str(avg_scores_per_step))

        # Save weights and best weights:
        #last loaded weights will be override on the first save
        if (not agent.TEST_MODE):
            agent.save_weights(WEIGHTS_FILE)
            if (avg_scores_per_step[-1] > best_avg_per_step):
                best_avg_per_step = avg_scores_per_step[-1]
                agent.save_weights(BEST_WEIGHTS)

        # Evaluation and plotting
        plot_graph(avg_scores_per_step, "Average Score Per 100 Steps",
                   "DQN_avg_score_per_step_by_epoch_" + str(agent.epoch_no),
                   "Step No.", "Average score")
        avg_scores_per_epoch.append(np.average(avg_scores_per_step))
        logger.write_to_log("avg_scores_per_epoch" + str(avg_scores_per_epoch))
        logger.write_spacer()
        agent.epoch_no += 1

    plot_graph(avg_scores_per_epoch, "Average Score Per Epoch",
               "DQN_avg_score_per_epoch_for_experiment", "Epoch No.",
               "Average score")
    print("finished experiement")
Пример #8
0
def main():
    logger = get_logger(FLAGS.PoPS_dir + "/PoPS_ITERATIVE")
    logger.info(" ------------- START: -------------")
    logger.info("Setting initial data structures")
    accuracy_vs_size = [[], []]
    logger.info("Loading models")
    teacher = PongTargetNet(input_size=dense_config.input_size, output_size=dense_config.output_size)
    teacher.load_model(path=FLAGS.teacher_path)  # load teacher
    logger.info("----- evaluating teacher -----")
    print("----- evaluating teacher -----")
    teacher_score = evaluate(agent=teacher, n_epoch=FLAGS.eval_epochs)
    logger.info("----- teacher evaluated with {} ------".format(teacher_score))
    print("----- teacher evaluated with {} -----".format(teacher_score))
    prune_step_path = FLAGS.PoPS_dir + "/prune_step_"
    policy_step_path = FLAGS.PoPS_dir + "/policy_step_"
    initial_path = policy_step_path + "0"
    copy_weights(output_path=initial_path, teacher_path=FLAGS.teacher_path)  # inorder to create the initial model
    compressed_agent = StudentPong(input_size=student_config.input_size,
                                   output_size=student_config.output_size,
                                   model_path=initial_path,
                                   tau=student_config.tau, prune_till_death=True,
                                   pruning_freq=prune_config.pruning_freq,
                                   sparsity_end=prune_config.sparsity_end,
                                   target_sparsity=prune_config.target_sparsity)
    compressed_agent.load_model()
    initial_size = compressed_agent.get_number_of_nnz_params()
    accuracy_vs_size[0].append(initial_size)
    accuracy_vs_size[1].append(teacher_score)
    initial_number_of_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer()
    initial_number_of_nnz = sum(initial_number_of_params_at_each_layer)
    converge = False
    iteration = 0
    convergence_information = deque(maxlen=2)
    convergence_information.append(100)
    precent = 100
    arch_type = 0
    last_measure = initial_size
    while not converge:
        iteration += 1
        print("-----  Pruning Step {} -----".format(iteration))
        logger.info(" -----  Pruning Step {} -----".format(iteration))
        path_to_save_pruned_model = prune_step_path + str(iteration)
        sparsity_vs_accuracy = iterative_pruning_policy_distilliation(logger=logger, agent=compressed_agent,
                                                                          target_agent=teacher,
                                                                          iterations=FLAGS.iterations,
                                                                          config=student_config,
                                                                          best_path=path_to_save_pruned_model,
                                                                          arch_type=arch_type)
        plot_graph(data=sparsity_vs_accuracy, name=FLAGS.PoPS_dir + "/initial size {}%,  Pruning_step number {}"
                   .format(precent, iteration), figure_num=iteration)

        # loading model which has reasonable score with the highest sparsity
        compressed_agent.load_model(path_to_save_pruned_model)
        # the amount of parameters that are not zero at each layer
        nnz_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer()
        # the amount of parameters that are not zero
        nnz_params = sum(nnz_params_at_each_layer)
        # redundancy is the parameters we dont need, nnz_params / initial is the params we need the opposite
        redundancy = (1 - nnz_params / initial_number_of_nnz) * 100
        print("-----  Pruning Step {} finished, got {}% redundancy in net params -----"
              .format(iteration, redundancy))
        logger.info("-----  Pruning Step {} finished , got {}% redundancy in net params -----"
                    .format(iteration, redundancy))
        logger.info("-----  Pruning Step {} finished with {} NNZ params at each layer"
                    .format(iteration, nnz_params_at_each_layer))
        print(" -----  Evaluating redundancy at each layer Step {}-----".format(iteration))
        logger.info(" -----  Evaluating redundancy at each layer Step {} -----".format(iteration))
        redundancy_at_each_layer = calculate_redundancy(initial_nnz_params=initial_number_of_params_at_each_layer,
                                                        next_nnz_params=nnz_params_at_each_layer)
        logger.info("----- redundancy for each layer at step {} is {} -----".format(iteration, redundancy_at_each_layer))

        print(" -----  Creating Model with size according to the redundancy at each layer ----- ")
        logger.info("----- Creating Model with size according to the redundancy at each layer -----")
        policy_distilled_path = policy_step_path + str(iteration)
        # creating the compact model where every layer size is determined by the redundancy measure
        compressed_agent = StudentPong(input_size=student_config.input_size,
                                       output_size=student_config.output_size,
                                       model_path=policy_distilled_path,
                                       tau=student_config.tau,
                                       redundancy=redundancy_at_each_layer,
                                       pruning_freq=prune_config.pruning_freq,
                                       sparsity_end=prune_config.sparsity_end,
                                       target_sparsity=prune_config.target_sparsity,
                                       prune_till_death=True,
                                       last_measure=last_measure)
        nnz_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer()
        logger.info("-----  Step {} ,Created Model with {} NNZ params at each layer"
                    .format(iteration, nnz_params_at_each_layer))
        iterative_size = compressed_agent.get_number_of_nnz_params()
        precent = (iterative_size / initial_size) * 100
        convergence_information.append(precent)
        print(" ----- Step {}, Created Model with size {} which is {}% from original size ----- "
              .format(iteration, iterative_size, precent))
        logger.info("----- Created Model with size {} which is {}% from original size -----"
                    .format(iterative_size, precent))
        if precent > 10:
            arch_type = 0
        else:
            arch_type = 1
        print(" -----  policy distilling Step {} ----- ".format(iteration))
        logger.info("----- policy distilling Step {} -----".format(iteration))
        fit_supervised(logger=logger, arch_type=arch_type, student=compressed_agent, teacher=teacher,
                           n_epochs=FLAGS.n_epoch)
        compressed_agent.load_model(path=policy_distilled_path)
        policy_distilled_score = evaluate(agent=compressed_agent, n_epoch=FLAGS.eval_epochs)
        compressed_agent.reset_global_step()
        print(" -----  policy distilling Step {} finished  with score {} ----- "
              .format(iteration, policy_distilled_score))
        logger.info("----- policy distilling Step {} finished with score {}  -----"
                    .format(iteration, policy_distilled_score))
        converge = check_convergence(convergence_information)

        accuracy_vs_size[0].append(iterative_size)
        accuracy_vs_size[1].append(policy_distilled_score)

    plot_graph(data=accuracy_vs_size, name=FLAGS.PoPS_dir + "/accuracy_vs_size", figure_num=iteration + 1, xaxis='NNZ params', yaxis='Accuracy')