Exemplo n.º 1
0
    def __init__(self, cluster, server, worker_tasks):
        self.cluster = cluster
        self.server = server
        self.worker_tasks = worker_tasks
        self.num_workers = len(worker_tasks)
        self.aggregated_states = []
        self.aggregated_actions = []
        self.max_eps = 1000
        self.checkpoint_delta = 10
        self.checkpoint = self.checkpoint_delta
        self.learn_rate = 0.01
        self.regularization_lambda = 1e-4
        self.train_step = 0

        self.state_dim = Sender.state_dim
        self.action_cnt = Sender.action_cnt
        self.aug_state_dim = self.state_dim + self.action_cnt

        # Create the master network and training/sync queues
        with tf.variable_scope('global'):
            self.global_network = DaggerLSTM(state_dim=self.aug_state_dim,
                                             action_cnt=self.action_cnt)

        self.leader_device_cpu = '/job:ps/task:0/cpu:0'
        with tf.device(self.leader_device_cpu):
            with tf.variable_scope('global_cpu'):
                self.global_network_cpu = DaggerLSTM(
                    state_dim=self.aug_state_dim, action_cnt=self.action_cnt)

        cpu_vars = self.global_network_cpu.trainable_vars
        gpu_vars = self.global_network.trainable_vars
        self.sync_op = tf.group(
            *[v1.assign(v2) for v1, v2 in zip(cpu_vars, gpu_vars)])

        self.default_batch_size = 300
        self.default_init_state = self.global_network.zero_init_state(
            self.default_batch_size)

        # Each element is [[aug_state]], [action]
        self.train_q = tf.FIFOQueue(self.num_workers, [tf.float32, tf.int32],
                                    shared_name='training_feed')

        # Keys: worker indices, values: Tensorflow messaging queues
        # Queue Elements: Status message
        self.sync_queues = {}
        for idx in worker_tasks:
            queue_name = 'sync_q_%d' % idx
            self.sync_queues[idx] = tf.FIFOQueue(3, [tf.int16],
                                                 shared_name=queue_name)

        self.setup_tf_ops(server)

        self.sess = tf.Session(
            server.target, config=tf.ConfigProto(allow_soft_placement=True))
        self.sess.run(tf.global_variables_initializer())
Exemplo n.º 2
0
    def setup_tf_ops(self):
        """ Sets up the shared Tensorflow operators and structures
        Refer to DaggerLeader for more information
        """

        # Set up the shared global network and local network.
        with tf.device(self.leader_device):
            with tf.variable_scope('global_cpu'):
                self.global_network_cpu = DaggerLSTM(
                    state_dim=self.aug_state_dim, action_cnt=self.action_cnt)

        with tf.device(self.worker_device):
            with tf.variable_scope('local'):
                self.local_network = DaggerLSTM(state_dim=self.aug_state_dim,
                                                action_cnt=self.action_cnt)

        self.init_state = self.local_network.zero_init_state(1)
        self.lstm_state = self.init_state

        # Build shared queues for training data and synchronization
        self.train_q = tf.FIFOQueue(self.num_workers, [tf.float32, tf.int32],
                                    shared_name='training_feed')

        self.sync_q = tf.FIFOQueue(3, [tf.int16],
                                   shared_name=('sync_q_%d' % self.task_idx))

        # Training data is [[aug_state]], [action]
        self.state_data = tf.placeholder(tf.float32,
                                         shape=(None, self.aug_state_dim))
        self.action_data = tf.placeholder(tf.int32, shape=(None))
        self.enqueue_train_op = self.train_q.enqueue(
            [self.state_data, self.action_data])

        # Sync local network to global network (CPU)
        local_vars = self.local_network.trainable_vars
        global_vars = self.global_network_cpu.trainable_vars
        self.sync_op = tf.group(
            *[v1.assign(v2) for v1, v2 in zip(local_vars, global_vars)])
Exemplo n.º 3
0
    def __init__(self, sender, state_dim, restore_vars):
        self.aug_state_dim = state_dim + 1  #action_cnt
        self.prev_action = 0
        self.sender = sender
        with tf.variable_scope('global'):
            self.model = DaggerLSTM(state_dim=self.aug_state_dim,
                                    dwnd=Sender.dwnd)

        self.lstm_state = self.model.zero_init_state(1)

        self.sess = tf.Session()

        # restore saved variables
        saver = tf.train.Saver(self.model.trainable_vars)
        saver.restore(self.sess, restore_vars)

        # init the remaining vars, especially those created by optimizer
        uninit_vars = set(tf.global_variables())
        uninit_vars -= set(self.model.trainable_vars)
        self.sess.run(tf.variables_initializer(uninit_vars))
Exemplo n.º 4
0
    def __init__(self, state_dim, action_cnt, restore_vars):
        self.aug_state_dim = state_dim + action_cnt
        self.action_cnt = action_cnt
        self.prev_action = action_cnt - 1

        with tf.variable_scope('global'):
            self.model = DaggerLSTM(
                state_dim=self.aug_state_dim, action_cnt=action_cnt)

        self.lstm_state = self.model.zero_init_state(1)

        self.sess = tf.Session()

        logging.basicConfig(level=logging.WARNING, filename="/home/zyk/state.log")
        self.logger = logging.getLogger("state")

        # restore saved variables
        saver = tf.train.Saver(self.model.trainable_vars)
        saver.restore(self.sess, restore_vars)

        # init the remaining vars, especially those created by optimizer
        uninit_vars = set(tf.global_variables())
        uninit_vars -= set(self.model.trainable_vars)
        self.sess.run(tf.variables_initializer(uninit_vars))
Exemplo n.º 5
0
    def __init__(self, state_dim, action_cnt, restore_vars):
        self.aug_state_dim = state_dim + action_cnt
        self.action_cnt = action_cnt
        self.prev_action = action_cnt - 1

        with tf.variable_scope('global'):
            self.model = DaggerLSTM(state_dim=self.aug_state_dim,
                                    action_cnt=action_cnt)

        self.lstm_state = self.model.zero_init_state(1)

        self.sess = tf.Session()

        # restore saved variables
        saver = tf.train.Saver(self.model.trainable_vars)
        saver.restore(self.sess, restore_vars)

        # init the remaining vars, especially those created by optimizer
        uninit_vars = set(tf.global_variables())
        uninit_vars -= set(self.model.trainable_vars)
        self.sess.run(tf.variables_initializer(uninit_vars))

        self.log = open(
            '/home/eric/Dev/DRL-IL/pantheon/third_party/indigo/logs.txt', 'w')
Exemplo n.º 6
0
    def setup_tf_ops(self):  # called in __init__()
        """ Sets up the shared Tensorflow operators and structures
        Refer to DaggerLeader for more information
        """

        # Set up the shared global network and local network.
        with tf.device(self.leader_device):
            with tf.variable_scope('global_cpu'):
                self.global_network_cpu = DaggerLSTM(
                    state_dim=self.aug_state_dim, action_cnt=self.action_cnt)

        with tf.device(self.worker_device):
            with tf.variable_scope('local'):
                """
                Modify : change DaggerLSTM to DQN
                """
                self.local_network = DaggerLSTM(state_dim=self.aug_state_dim,
                                                action_cnt=self.action_cnt)

        self.init_state = self.local_network.zero_init_state(1)
        self.lstm_state = self.init_state

        # Build shared queues for training data and synchronization
        self.train_q = tf.FIFOQueue(self.num_workers, [tf.float32, tf.int32],
                                    shared_name='training_feed')

        self.sync_q = tf.FIFOQueue(3, [tf.int16],
                                   shared_name=('sync_q_%d' % self.task_idx))

        # Training data is [[aug_state]], [action]
        """
        Modify note 
        ------
        Remove  - self.action_data
        ------
        Add following placeholders:
                - self.pre_state_ph    (list)
                - self.action_ph   (int)
                - self.reward_ph       (float)
                - self.current_state(list)
        ------   
        Modify  - self.state_data shape->state_dim
                - change self.enqueue_train_op
        """
        self.prev_state_ph = tf.placeholder(tf.float32,
                                            shape=(None, self.aug_state_dim))
        # self.action_data = tf.placeholder(tf.int32, shape=(None))
        self.reward_ph = tf.placeholder(tf.float32, shape=())
        self.action_ph = tf.placeholder(tf.int32, shape=())
        self.cur_state_ph = tf.placeholder(tf.float32,
                                           shape=(None, self.aug_state_dim))
        self.enqueue_train_op = self.train_q.enqueue([
            self.prev_state_ph, self.reward_ph, self.action_ph,
            self.cur_state_ph
        ])

        # Sync local network to global network (CPU)
        local_vars = self.local_network.trainable_vars
        global_vars = self.global_network_cpu.trainable_vars
        self.sync_op = tf.group(
            *[v1.assign(v2) for v1, v2 in zip(local_vars, global_vars)])