Exemplo n.º 1
0
class ACCovWorker:
    def __init__(self, name, trainer, model_path, global_episodes, global_rewards):
        self.name = "worker_" + str(name)
        self.number = name
        self.model_path = model_path
        self.trainer = trainer
        self.global_rewards = global_rewards
        self.global_episodes = global_episodes
        self.increment = self.global_episodes.assign_add(1)
        self.episode_rewards = []
        self.episode_coverages = []
        self.episode_lengths = []
        self.episode_mean_values = []
        self.batcher = Batcher()
        self.summary_writer = tf.summary.FileWriter("logs/train_" + str(self.number))

        with open('vectors_cov.pkl', 'rb') as fh:
            self.embeddings, self.embed_lookup = pickle.load(fh)
            self.num_feats = len(self.embeddings[0])

        # Create the local copy of the network and the tensorflow op to copy global paramters to local network
        self.local_AC = AConv_Network(self.name, trainer, self.num_feats)
        self.update_local_ops = update_target_graph('global', self.name)
        self.avgFunctions = {}

        self.sleep_time = 0.028
        self.env = Enviroment()

    def train(self, global_AC, rollout, sess, gamma, bootstrap_value):
        rollout = np.array(rollout)

        self.batcher.pad(rollout[:, 0], self.num_feats)
        self.batcher.init_child()
        self.batcher.pad_child(rollout[:, 1])

        nodes_observations = rollout[:, 0]
        children_observations = rollout[:, 1]
        actions = rollout[:, 2]
        rewards = rollout[:, 3]
        values = rollout[:, 5]

        # Here we take the rewards and values from the rollout, and use them to
        # generate the advantage and discounted returns.
        # The advantage function uses "Generalized Advantage Estimation"
        self.rewards_plus = np.asarray(rewards.tolist() + [bootstrap_value])
        discounted_rewards = discount(self.rewards_plus, gamma)[:-1]
        self.value_plus = np.asarray(values.tolist() + [bootstrap_value])
        advantages = rewards + gamma * self.value_plus[1:] - self.value_plus[:-1]
        advantages = discount(advantages, gamma)

        # Update the global network using gradients from loss
        # Generate network statistics to periodically save
        rnn_state = self.local_AC.state_init
        feed_dict = {self.local_AC.target_v: discounted_rewards,
                     self.local_AC.nodes: np.vstack(nodes_observations),
                     self.local_AC.children: np.vstack(children_observations),
                     self.local_AC.actions: actions,
                     self.local_AC.advantages: advantages,
                     self.local_AC.state_in[0]: self.batch_rnn_state[0],
                     self.local_AC.state_in[1]: self.batch_rnn_state[1]}
        v_l, p_l, e_l, g_n, v_n, adv, apl_g, self.batch_rnn_state = sess.run([self.local_AC.value_loss,
                                                        self.local_AC.policy_loss,
                                                        self.local_AC.entropy,
                                                        self.local_AC.grad_norms,
                                                        self.local_AC.var_norms,
                                                        self.local_AC.adv_sum,
                                                        self.local_AC.apply_grads,
                                                        self.local_AC.state_out],
                                                       feed_dict=feed_dict)
        return v_l / len(rollout), p_l / len(rollout), e_l / len(rollout), g_n, v_n, adv / len(rollout)

    def work(self, max_episode_length, gamma, global_AC, sess, coord, saver):
        episode_count = sess.run(self.global_episodes)
        total_steps = 0
        print("Starting worker " + str(self.number))
        with sess.as_default(), sess.graph.as_default():
            while not coord.should_stop():
                sess.run(self.update_local_ops)
                self.env.reset()
                self.batcher = Batcher()
                episode_buffer = []
                episode_values = []
                episode_reward = 0
                episode_coverage = 0
                episode_step_count = 0
                m = 0
                rnn_state = self.local_AC.state_init
                self.batch_rnn_state = rnn_state

                while m < len(self.env.listOfFiles):
                    m += 1
                    self.env.prepareNextFileConvWithCov(self.number)
                    self.env.currentNumOfTable = 0
                    while self.env.currentNumOfTable < 1:
                        totalRows = 0
                        for table in self.env.listOfTables:
                            totalRows += len(table)
                        self.env.startTable()
                        self.env.currentNumOfRow = 0
                        for currentRow in self.env.listOfTableVectors[:1]:
                            numOfTimes = 0
                            d = False
                            self.env.initializeArgumentValuesCov()
                            complexity = getNumOfReasonableNodes(currentRow)
                            #complexity = int(complexity ** (1 / 3) * (len(self.env.listOfTables[0]) ** (1 / 3)))
                            complexity = int(complexity ** (1 / 3))
                            if complexity == 0:
                                complexity = 1
                            total = len(self.env.arguments) * totalRows * complexity
                            while numOfTimes < total:
                                if d:
                                    break
                                # self.env.initializeArgumentValues()
                                batches = list(
                                    enumerate(
                                        batch_samples(gen_samples(currentRow, self.embeddings, self.embed_lookup), 1)))
                                iterator = iter(batches)
                                batch = next(iterator, None)
                                while batch is not None:
                                    # self.env.initializeArgumentValues()
                                    if isinstance(batch[0], int):
                                        num, batch = batch
                                    nodes, children = batch
                                    self.batcher.checkMaxDim(nodes)
                                    a_dist, v, rnn_state = sess.run(
                                        [self.local_AC.policy, self.local_AC.value, self.local_AC.state_out],
                                        feed_dict={self.local_AC.nodes: nodes, self.local_AC.children: children,
                                                   self.local_AC.state_in[0]: rnn_state[0],
                                                   self.local_AC.state_in[1]: rnn_state[1]})
                                    a = np.random.choice(a_dist[0], p=a_dist[0])
                                    a = np.argmax(a_dist == a)

                                    r, d, c, _ = self.env.step_cov(a, self.number)
                                    # nextBatch = next(iterator, None)
                                    total_steps += 1
                                    episode_step_count += 1

                                    batch = next(iterator, None)

                                    # batch = nextBatch
                                    episode_buffer.append([nodes, children, a, r, d, v[0, 0]])

                                    if d or numOfTimes + 1 == total:
                                        # Since we don't know what the true final return is, we "bootstrap" from our current
                                        # value estimation.
                                        v1 = sess.run(self.local_AC.value,
                                                      feed_dict={self.local_AC.nodes: nodes,
                                                                 self.local_AC.children: children,
                                                                 self.local_AC.state_in[0]: rnn_state[0],
                                                                 self.local_AC.state_in[1]: rnn_state[1]})[0, 0]
                                        v_l, p_l, e_l, g_n, v_n, adv = self.train(global_AC, episode_buffer, sess,
                                                                                  gamma,
                                                                                  v1)
                                        episode_buffer = []
                                        sess.run(self.update_local_ops)
                                    # if episode_step_count >= max_episode_length - 1 or d or nextBatch is None:
                                    if numOfTimes + 1 == total or d:
                                        episode_reward += r
                                        episode_coverage += c
                                        if self.env.rootTreeAdtNode.name not in self.avgFunctions:
                                            self.avgFunctions[self.env.rootTreeAdtNode.name] = [c]
                                        else:
                                            self.avgFunctions[self.env.rootTreeAdtNode.name].append(c)
                                        break
                                numOfTimes += 1
                                print(
                                    "Worker: " + str(self.number) + ", with number of times: " + str(
                                        numOfTimes) + ", for file: " + str(m))

                print("Worker: " + str(self.number) + ", with number of episodes: " + str(episode_count))
                episode_count += 1
                self.episode_rewards.append(episode_reward)
                self.episode_coverages.append(episode_coverage)
                self.episode_lengths.append(episode_step_count)
                self.episode_mean_values.append(np.mean(episode_values))

                # Update the network using the experience buffer at the end of the episode.
                if len(episode_buffer) != 0:
                    v_l, p_l, e_l, g_n, v_n, adv = self.train(global_AC, episode_buffer, sess, gamma, 0.0)

                # Periodically save gifs of episodes, model parameters, and summary statistics.
                if episode_count % 2 == 0 and episode_count != 0:
                    if episode_count % 100 == 0 and self.name == 'worker_0':
                        saver.save(sess, self.model_path + '/model-' + str(episode_count) + '.cptk')
                        print("Saved Model")

                    mean_reward = np.mean(self.episode_rewards[-2:])
                    mean_length = np.mean(self.episode_lengths[-2:])
                    mean_value = np.mean(self.episode_mean_values[-2:])
                    mean_coverage = np.mean(self.episode_coverages[-2:])
                    summary = tf.Summary()
                    summary.value.add(tag='Perf/Reward', simple_value=float(mean_reward))
                    summary.value.add(tag='Perf/Coverage', simple_value=float(mean_coverage))
                    summary.value.add(tag='Perf/Length', simple_value=float(mean_length))
                    summary.value.add(tag='Perf/Value', simple_value=float(mean_value))
                    summary.value.add(tag='Losses/Value Loss', simple_value=float(v_l))
                    summary.value.add(tag='Losses/Policy Loss', simple_value=float(p_l))
                    summary.value.add(tag='Losses/Entropy', simple_value=float(e_l))
                    summary.value.add(tag='Losses/Advantage', simple_value=float(adv))
                    summary.value.add(tag='Losses/Grad Norm', simple_value=float(g_n))
                    summary.value.add(tag='Losses/Var Norm', simple_value=float(v_n))
                    #for key in self.env.dict_of_max_r.keys():
                    #    summary.value.add(tag='Functions/Max coverage for function: ' + str(key),
                    #                     simple_value=float(self.env.dict_of_max_r[key]))
                    for key in self.avgFunctions.keys():
                        summary.value.add(tag='Avg Functions/Avg coverage for function: ' + str(key), simple_value=float(np.mean(self.avgFunctions[key][-2:])))
                    self.summary_writer.add_summary(summary, episode_count)

                    self.summary_writer.flush()
                sess.run(self.increment)
Exemplo n.º 2
0
class ACCovContWorker(object):
    def __init__(self, name, globalAC, sess, global_rewards, global_episodes,
                 model_path):
        self.number = str(name)
        self.summary_writer = tf.summary.FileWriter("logs/train_" + str(name))
        self.name = "worker_" + str(name)
        self.global_rewards = global_rewards
        self.global_episodes = global_episodes
        self.increment = self.global_episodes.assign_add(1)

        self.model_path = model_path

        with open('vectors_cov.pkl', 'rb') as fh:
            self.embeddings, self.embed_lookup = pickle.load(fh)
            self.num_feats = len(self.embeddings[0])

        self.AC = ACNet(self.name, sess, self.num_feats,
                        globalAC)  # create ACNet for each worker
        self.sess = sess
        self.episode_rewards = []
        self.episode_coverages = []
        self.episode_lengths = []
        self.a_loss = []
        self.c_loss = []

        self.batcher = Batcher()
        self.env = Enviroment()
        self.avgFunctions = {}

    def getCovVector(self, c):
        if self.env.argumentChangedVal % len(list(self.env.arguments.keys(
        ))) == 0 and self.env.argumentChangedVal != 0:
            argColVal = (self.env.argumentColumnValue + 1) % len(
                self.env.listOfTables[0])
            keyOfArg = (self.env.argumentChangedVal + 1) % len(
                list(self.env.arguments.keys()))
        else:
            argColVal = (self.env.argumentColumnValue) % len(
                self.env.listOfTables[0])
            keyOfArg = (self.env.argumentChangedVal) % len(
                list(self.env.arguments.keys()))
        return [argColVal, keyOfArg, c]

    def work(self, sess, coord, saver):
        episode_count = sess.run(self.global_episodes)
        total_step = 1
        buffer_s, buffer_a = [], []
        print("Starting " + str(self.name))
        with sess.as_default(), sess.graph.as_default():
            while not coord.should_stop():
                self.env.reset()
                self.batcher = Batcher()
                episode_buffer = []
                buffer_r = []
                buffer_v_target = []
                episode_reward = 0
                episode_coverage = 0
                episode_step_count = 0
                m = 0
                rnn_state = self.AC.state_init
                self.batch_rnn_state = rnn_state
                while m < len(self.env.listOfFiles):
                    m += 1
                    self.env.prepareNextFileConvWithCov(self.number)
                    self.env.currentNumOfTable = 0
                    while self.env.currentNumOfTable < 1:
                        totalRows = 0
                        for table in self.env.listOfTables:
                            totalRows += len(table)
                        self.env.startTable()
                        self.env.currentNumOfRow = 0
                        for currentRow in self.env.listOfTableVectors[:1]:
                            numOfTimes = 0
                            d = False
                            self.env.initializeArgumentValuesCov()
                            complexity = getNumOfReasonableNodes(currentRow)
                            #complexity = int(complexity ** (1/3) * (len(self.env.listOfTables[0]) ** (1/3)))
                            complexity = int(complexity**(1 / 1))
                            c = 0
                            if complexity == 0:
                                complexity = 1
                            total = len(
                                self.env.arguments) * totalRows * complexity
                            while numOfTimes < total:
                                if d:
                                    break
                                # self.env.initializeArgumentValues()
                                batches = list(
                                    enumerate(
                                        batch_samples(
                                            gen_samples(
                                                currentRow, self.embeddings,
                                                self.embed_lookup), 1)))
                                iterator = iter(batches)
                                batch = next(iterator, None)
                                while batch is not None:
                                    # self.env.initializeArgumentValues()
                                    if isinstance(batch[0], int):
                                        num, batch = batch
                                    nodes, children = batch
                                    self.batcher.checkMaxDim(nodes)
                                    vectorMatrixWithCov = [
                                        self.getCovVector(c)
                                    ]
                                    a, rnn_state = self.AC.choose_action(
                                        nodes, children, rnn_state,
                                        vectorMatrixWithCov)

                                    self.env.step_cov_continuos_without_reward(
                                        a, self.number)
                                    episode_buffer.append([
                                        nodes, children, a, vectorMatrixWithCov
                                    ])
                                    #buffer_r.append(r)
                                    if len(episode_buffer) % (
                                            len(self.env.arguments) * totalRows
                                    ) == 0 and len(episode_buffer) != 0:
                                        r, d, c, _ = self.env.step_cov_continuos_entire_matrix(
                                            self.number)
                                        temp = 0
                                        while temp < len(self.env.arguments
                                                         ) * totalRows:
                                            buffer_r.append(r)
                                            temp += 1
                                    #nextBatch = next(iterator, None)
                                    total_step += 1
                                    episode_step_count += 1

                                    batch = next(iterator, None)

                                    if d or numOfTimes + 1 == total:
                                        # Since we don't know what the true final return is, we "bootstrap" from our current
                                        # value estimation.
                                        if d:
                                            v_s_ = 0  # terminal
                                        else:
                                            v_s_ = self.sess.run(
                                                self.AC.v, {
                                                    self.AC.nodes:
                                                    nodes,
                                                    self.AC.children:
                                                    children,
                                                    self.AC.matrixWithCov:
                                                    vectorMatrixWithCov,
                                                    self.AC.state_in[0]:
                                                    rnn_state[0],
                                                    self.AC.state_in[1]:
                                                    rnn_state[1]
                                                })[0, 0]
                                        buffer_v_target = []

                                        rollout = np.array(episode_buffer)

                                        self.batcher.pad(
                                            rollout[:, 0], self.num_feats)
                                        self.batcher.init_child()
                                        self.batcher.pad_child(rollout[:, 1])

                                        for r in buffer_r[::
                                                          -1]:  # reverse buffer r
                                            v_s_ = r + GAMMA * v_s_
                                            buffer_v_target.append(v_s_)
                                        buffer_v_target.reverse()
                                        buffer_s, buffer_a, buffer_c, buffer_v_target, buffer_matrix_cov = np.vstack(
                                            rollout[:, 0]
                                        ), np.vstack(rollout[:, 2]), np.vstack(
                                            rollout[:, 1]), np.vstack(
                                                buffer_v_target), np.vstack(
                                                    rollout[:, 3])
                                        feed_dict = {
                                            self.AC.nodes:
                                            buffer_s,
                                            self.AC.children:
                                            buffer_c,
                                            self.AC.a_his:
                                            buffer_a,
                                            self.AC.v_target:
                                            buffer_v_target,
                                            self.AC.state_in[0]:
                                            self.batch_rnn_state[0],
                                            self.AC.state_in[1]:
                                            self.batch_rnn_state[1],
                                            self.AC.matrixWithCov:
                                            buffer_matrix_cov
                                        }
                                        _, _, self.batch_rnn_state, a_loss, c_loss = self.AC.update_global(
                                            feed_dict
                                        )  # actual training step, update global ACNet
                                        self.a_loss.append(a_loss)
                                        self.c_loss.append(c_loss)
                                        buffer_s, buffer_a, buffer_r, buffer_c, buffer_matrix_cov = [], [], [], [], []
                                        episode_buffer = []
                                        self.AC.pull_global(
                                        )  # get global parameters to local ACNet
                                    # if episode_step_count >= max_episode_length - 1 or d or nextBatch is None:
                                    if numOfTimes + 1 == total or d:
                                        episode_reward += r
                                        episode_coverage += c
                                        if self.env.rootTreeAdtNode.name not in self.avgFunctions:
                                            self.avgFunctions[
                                                self.env.rootTreeAdtNode.
                                                name] = [c]
                                        else:
                                            self.avgFunctions[
                                                self.env.rootTreeAdtNode.
                                                name].append(c)
                                        break
                                numOfTimes += 1
                                print("Worker: " + str(self.number) +
                                      ", with number of times: " +
                                      str(numOfTimes) + ", for file: " +
                                      str(m))

                episode_count += 1
                print("Worker: " + str(self.number) +
                      ", with number of episodes: " + str(episode_count))
                self.episode_rewards.append(episode_reward)
                self.episode_coverages.append(episode_coverage)
                self.episode_lengths.append(episode_step_count)

                if episode_count % 2 == 0 and episode_count != 0:
                    if episode_count % 100 == 0 and self.name == 'worker_0':
                        saver.save(
                            sess, self.model_path + '/model-' +
                            str(episode_count) + '.cptk')
                        print("Saved Model")

                    mean_reward = np.mean(self.episode_rewards[-2:])
                    mean_length = np.mean(self.episode_lengths[-2:])
                    mean_coverage = np.mean(self.episode_coverages[-2:])
                    mean_a_loss = np.mean(self.a_loss[-2:])
                    mean_c_loss = np.mean(self.c_loss[-2:])
                    summary = tf.Summary()
                    summary.value.add(tag='Perf/Reward',
                                      simple_value=float(mean_reward))
                    summary.value.add(tag='Perf/Length',
                                      simple_value=float(mean_length))
                    summary.value.add(tag='Perf/Coverage',
                                      simple_value=float(mean_coverage))
                    summary.value.add(tag='Loss/A_Loss',
                                      simple_value=float(mean_a_loss))
                    summary.value.add(tag='Loss/C_loss',
                                      simple_value=float(mean_c_loss))
                    for key in self.env.dict_of_max_r.keys():
                        summary.value.add(
                            tag='Max Functions/Max coverage for function: ' +
                            str(key),
                            simple_value=float(self.env.dict_of_max_r[key]))
                    for key in self.avgFunctions.keys():
                        summary.value.add(
                            tag='Avg Functions/Avg coverage for function: ' +
                            str(key),
                            simple_value=float(
                                np.mean(self.avgFunctions[key][-2:])))
                    self.summary_writer.add_summary(summary, episode_count)

                    self.summary_writer.flush()

                    sess.run(self.increment)