示例#1
0
    def build_model(self, dev):
        with tf.variable_scope(self.name) and tf.device(dev):
            ## inputs of networks
            self.screen = tf.placeholder(
                tf.float32,
                [None, PP.screen_channel(), self.ssize, self.ssize],
                name='screen')

            ## build networks
            net = build_net(self.screen)
            self.spatial_action, self.value = net

            ## targets & masks
            self.valid_spatial_action = tf.placeholder(
                tf.float32, [None], name='valid_spatial_action')
            self.spatial_action_selected = tf.placeholder(
                tf.float32, [None, self.ssize**2],
                name='spatial_action_selected')
            self.value_target = tf.placeholder(tf.float32, [None],
                                               name='value_target')

            ## compute log probability
            spatial_action_prob = tf.reduce_sum(self.spatial_action *
                                                self.spatial_action_selected,
                                                axis=1)
            spatial_action_log_prob = tf.log(
                tf.clip_by_value(spatial_action_prob, 1e-10, 1.))

            ## policy loss & value loss
            action_log_prob = self.valid_spatial_action * spatial_action_log_prob
            advantage = tf.stop_gradient(self.value_target - self.value)
            policy_loss = -tf.reduce_mean(action_log_prob * advantage)
            value_loss = -tf.reduce_mean(self.value * advantage)
            self.summary.append(tf.summary.scalar('policy_loss', policy_loss))
            self.summary.append(tf.summary.scalar('value_loss', value_loss))
            loss = policy_loss + value_loss

            ## RMSProp optimizer
            self.learning_rate = tf.placeholder(tf.float32,
                                                None,
                                                name='learning_rate')
            opt = tf.train.RMSPropOptimizer(self.learning_rate,
                                            decay=0.99,
                                            epsilon=1e-10)
            grads = opt.compute_gradients(loss)
            cliped_grad = []
            for grad, var in grads:
                self.summary.append(tf.summary.histogram(var.op.name, var))
                self.summary.append(
                    tf.summary.histogram(var.op.name + '/grad', grad))
                grad = tf.clip_by_norm(grad, 10.0)
                cliped_grad.append([grad, var])
            self.train_op = opt.apply_gradients(cliped_grad)
            self.summary_op = tf.summary.merge(self.summary)

            self.saver = tf.train.Saver(max_to_keep=100)
示例#2
0
文件: train.py 项目: daxiafresh/plate
            elif isinstance(tup, type(np.empty(0))):
                annos = torch.from_numpy(tup).float()
                targets.append(annos)

    return (torch.stack(imgs, 0), targets)


USE_PRE_TRAIN = False
pre_train_path = ''
IS_TRAIN = True
MAX_ITER = 15000
BATCH = 3
ANCHORS = gen_anchors()
load_data = VOC_load()

net = build_net('train', 304)
Loss_function = MultiBoxLoss()

if USE_PRE_TRAIN:
    net.base.load_state_dict(torch.load(pre_train_path))

op = optim.SGD(net.parameters(), lr=4e-3, momentum=0.9, weight_decay=5e-4)

#or DATA.tensordata(x=,y=)
#collate_fn:取数据的函数,这里本来可以直接用false,但是target第一个维度不唯一,不重新定义会报错
train_data = data.DataLoader(load_data,
                             BATCH,
                             True,
                             collate_fn=detection_collate)

for i in range(len(load_data)):
示例#3
0
    def build_model(self, reuse, dev, ntype):
        with tf.variable_scope(self.name) and tf.device(dev):
            if reuse:
                tf.get_variable_scope().reuse_variables()
                assert tf.get_variable_scope().reuse

            # Set inputs of networks
            # mininap: feature minimap in observation
            # screen : feature screen in observation
            # info   : available action for current observation
            self.minimap = tf.placeholder(
                tf.float32,
                [None, U.minimap_channel(), self.msize, self.msize],
                name='minimap')
            self.screen = tf.placeholder(
                tf.float32,
                [None, U.screen_channel(), self.ssize, self.ssize],
                name='screen')
            self.info = tf.placeholder(tf.float32, [None, self.isize],
                                       name='info')

            # Build networks
            net = build_net(self.minimap, self.screen, self.info, self.msize,
                            self.ssize, self.isize, ntype)
            # net output
            self.spatial_action, self.non_spatial_action, self.value = net

            # Set targets and masks
            self.valid_spatial_action = tf.placeholder(
                tf.float32, [None], name='valid_spatial_action')
            self.spatial_action_selected = tf.placeholder(
                tf.float32, [None, self.ssize**2],
                name='spatial_action_selected')
            self.valid_non_spatial_action = tf.placeholder(
                tf.float32, [None, len(actions.FUNCTIONS)],
                name='valid_non_spatial_action')
            self.non_spatial_action_selected = tf.placeholder(
                tf.float32, [None, len(actions.FUNCTIONS)],
                name='non_spatial_action_selected')
            self.value_target = tf.placeholder(tf.float32, [None],
                                               name='value_target')

            # Compute logarithm probability
            spatial_action_prob = tf.reduce_sum(self.spatial_action *
                                                self.spatial_action_selected,
                                                axis=1)
            spatial_action_log_prob = tf.log(
                tf.clip_by_value(spatial_action_prob, 1e-10, 1.))
            non_spatial_action_prob = tf.reduce_sum(
                self.non_spatial_action * self.non_spatial_action_selected,
                axis=1)
            valid_non_spatial_action_prob = tf.reduce_sum(
                self.non_spatial_action * self.valid_non_spatial_action,
                axis=1)
            valid_non_spatial_action_prob = tf.clip_by_value(
                valid_non_spatial_action_prob, 1e-10, 1.)
            non_spatial_action_prob = non_spatial_action_prob / valid_non_spatial_action_prob
            non_spatial_action_log_prob = tf.log(
                tf.clip_by_value(non_spatial_action_prob, 1e-10, 1.))
            self.summary.append(
                tf.summary.histogram('spatial_action_prob',
                                     spatial_action_prob))
            self.summary.append(
                tf.summary.histogram('non_spatial_action_prob',
                                     non_spatial_action_prob))

            # Compute losses, more details in https://arxiv.org/abs/1602.01783
            # Policy loss and value loss
            action_log_prob = self.valid_spatial_action * spatial_action_log_prob + non_spatial_action_log_prob
            advantage = tf.stop_gradient(self.value_target - self.value)
            policy_loss = -tf.reduce_mean(action_log_prob * advantage)
            value_loss = -tf.reduce_mean(self.value * advantage)
            self.summary.append(tf.summary.scalar('policy_loss', policy_loss))
            self.summary.append(tf.summary.scalar('value_loss', value_loss))

            # TODO: policy penalty
            loss = policy_loss + value_loss

            # Build the optimizer
            self.learning_rate = tf.placeholder(tf.float32,
                                                None,
                                                name='learning_rate')
            opt = tf.train.RMSPropOptimizer(self.learning_rate,
                                            decay=0.99,
                                            epsilon=1e-10)
            grads = opt.compute_gradients(loss)
            cliped_grad = []
            for grad, var in grads:
                self.summary.append(tf.summary.histogram(var.op.name, var))
                self.summary.append(
                    tf.summary.histogram(var.op.name + '/grad', grad))
                grad = tf.clip_by_norm(grad, 10.0)
                cliped_grad.append([grad, var])
            self.train_op = opt.apply_gradients(cliped_grad)
            self.summary_op = tf.summary.merge(self.summary)

            self.saver = tf.train.Saver(max_to_keep=100)