示例#1
0
    def forward_message_embedding(self, inputs, loss, training):
        results = []
        for i in range(self.slots):
            results.append(self.local_trans(inputs[i], training=training))
        results.append(self.global_trans(inputs[-1], training=training))

        if self.use_comm:
            for i in range(self.slots):
                tmp = nd.zeros_like(results[i])
                for j in range(self.slots):
                    msg = nd.softmax(
                        self.local2local_msg_encode(inputs[j],
                                                    training=training))
                    tmp = tmp + self.local2local_extract(
                        self.local2local_embedding(msg, training=training),
                        training=training)
                msg = nd.softmax(
                    self.global2local_msg_encode(inputs[-1],
                                                 training=training))
                tmp = tmp + self.global2local_extract(
                    self.global2local_embedding(msg, training=training),
                    training=training)
                results[i] = results[i] + (tmp / float(self.slots))
            tmp = nd.zeros_like(results[-1])
            for i in range(self.slots):
                msg = nd.softmax(
                    self.local2global_msg_encode(inputs[i], training=training))
                tmp = tmp + self.local2global_extract(
                    self.local2global_embedding(msg, training=training),
                    training=training)
            results[-1] = results[-1] + (tmp / float(self.slots))
        return results
示例#2
0
    def hybrid_forward(self,
                       F,
                       input_logits,
                       target_logits,
                       sample_weight=None):
        input_softmax = F.softmax(input_logits, axis=1)
        target_softmax = F.softmax(target_logits, axis=1)

        loss = F.square(input_softmax - target_softmax)

        return F.mean(loss, axis=self._batch_axis, exclude=True)
示例#3
0
 def check_KL(self):
     ph_act = nd.dot(self.enum_states, self.W) + self.hb
     vt = nd.dot(self.enum_states, self.vb)
     ht = nd.sum(-nd.log(nd.sigmoid(-ph_act)), axis=1)
     p_th = nd.softmax(vt + ht)
     KL = nd.sum(self.prob_states * nd.log(self.prob_states / p_th))
     return KL.asnumpy()[0]
示例#4
0
    def inference(self):
        # self-attention
        x = self.embedding(1).reshape(-3, 0)  # .squeeze() # b x action x h
        kshape = (1, self.num_total_tokens, self.hidden_size)
        vshape = (1, self.num_total_tokens, 1)
        querry = self.querry(x).reshape(*kshape)  # b x actions x h
        key = self.key(x).reshape(*kshape)  # b x actions x h
        value = self.value(x).reshape(*vshape)  # b x actions x 1
        atten = mx.nd.linalg_gemm2(querry, key,
                                   transpose_b=True).softmax(axis=1)
        alphas = mx.nd.linalg_gemm2(atten, value).squeeze(axis=-1)

        actions = []
        for idx in range(len(self.num_tokens)):
            i0 = sum(self.num_tokens[:idx])
            i1 = sum(self.num_tokens[:idx + 1])
            logits = alphas[:, i0:i1]
            probs = F.softmax(logits, axis=-1)
            action = mx.nd.argmax(probs, 1)
            actions.append(action)

        config = {}
        for i, action in enumerate(actions):
            choice = action.asscalar()
            k, space = self.spaces[i]
            config[k] = int(choice)

        return config
def generate_text(model, seed, length=512, top_n=10):
    """
    generates text of specified length from trained model
    with given seed character sequence.
    """
    logger.info("generating %s characters from top %s choices.", length, top_n)
    logger.info('generating with seed: "%s".', seed)
    generated = seed
    encoded = mx.nd.array(encode_text(seed))
    seq_len = encoded.shape[0]

    x = F.expand_dims(encoded[:seq_len - 1], 1)
    # input shape: [seq_len, 1]
    state = model.begin_state()
    # get rnn state due to seed sequence
    _, state = model(x, state)

    next_index = encoded[seq_len - 1].asscalar()
    for i in range(length):
        x = mx.nd.array([[next_index]])
        # input shape: [1, 1]
        logit, state = model(x, state)
        # output shape: [1, vocab_size]
        probs = F.softmax(logit)
        next_index = sample_from_probs(probs.asnumpy().squeeze(), top_n)
        # append to sequence
        generated += ID2CHAR[next_index]

    logger.info("generated text: \n%s\n", generated)
    return generated
示例#6
0
def test(ctx, val_data, opt, net):
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)

    for i, batch in enumerate(val_data):
        data, label = batch_fn(batch, ctx)
        outputs = []
        for _, X in enumerate(data):
            X = X.reshape((-1, ) + X.shape[2:])
            pred = net(X.astype(opt.dtype, copy=False))
            if opt.use_softmax:
                pred = F.softmax(pred, axis=1)
            outputs.append(pred)

        acc_top1.update(label, outputs)
        acc_top5.update(label, outputs)
        mx.ndarray.waitall()

        _, cur_top1 = acc_top1.get()
        _, cur_top5 = acc_top5.get()

        if i > 0 and i % opt.log_interval == 0:
            print('%04d/%04d is done: acc-top1=%f acc-top5=%f' %
                  (i, len(val_data), cur_top1 * 100, cur_top5 * 100))

    _, top1 = acc_top1.get()
    _, top5 = acc_top5.get()
    return (top1, top5)
示例#7
0
    def softmax_viterbi_decode(self, feats):
        feats = nd.softmax(feats).asnumpy()
        transitions = self.transitions.asnumpy()

        label_size = len(self.tag_dictionary)
        sent_len = len(feats)
        pre_matrix = np.zeros((sent_len, label_size), dtype=int)
        score_matrix = np.zeros((2, label_size))
        score_matrix[0] = feats[0]
        for i in range(1, sent_len):
            _i = i & 1
            _i_1 = 1 - _i
            for cur_label in range(label_size):
                max_score = -sys.float_info.max
                for pre_label in range(label_size):
                    score = feats[i, cur_label]
                    cur_score = score_matrix[_i_1][pre_label] * transitions[pre_label, cur_label] * score
                    if max_score < cur_score:
                        max_score = cur_score
                        pre_matrix[i, cur_label] = pre_label
                        score_matrix[_i][cur_label] = max_score

        last_time = (sent_len - 1) & 1
        max_score = score_matrix[last_time].max()
        max_index = np.argmax(score_matrix[last_time])

        labels = []
        for i in range(sent_len - 1, -1, -1):
            labels.insert(0, max_index)
            max_index = pre_matrix[i, max_index]
        return labels
示例#8
0
文件: actor.py 项目: leferrad/powrl3
    def forward(self, x):
        x = self.embedding(nd.array(x))
        x = self.bn(x)

        x = self.pool(self.conv2(self.conv1(x)))

        x = self.h2(self.h1(x))

        return F.softmax(self.output(x))
示例#9
0
 def sample_v_given_h(self, h0):
     v1_prob = self.propdown(h0).reshape([-1, self.n_val])
     v1_prob = nd.softmax(v1_prob)
     v1_args = nd.sample_multinomial(v1_prob)
     v1 = nd.one_hot(v1_args, self.n_val)
     return [
         v1_prob.reshape([-1, self.n_node]),
         v1.reshape([-1, self.n_node])
     ]
示例#10
0
    def forward(self, x):
        embed = self.embed(x)

        xs = []
        for i in range(self.C * 2):
            x = self.outs[i](embed)
            x = F.softmax(x)
            xs.append(x)

        return xs
示例#11
0
 def forward(self, x):
     #import pdb
     #pdb.set_trace()
     X_ = self.attn(x) # (n, w) -> (n,num_hidden)
     # should be dot(X_, W)
     E = self.attn(X_)  # (n, hidden) -> (n, hidden)
     attn_weights = F.softmax(E, axis=1) # (n, hidden)
     attn_applied = F.elemwise_mul(attn_weights, X_) #(n,hidden)
     output = self.c*(F.elemwise_mul(X_, attn_weights)) + (1-self.c)*X_
     output = self.out(output) #(n,hidden) -> (n,output_size)
     return output
示例#12
0
    def forward(self, x):
        x = x / 255.

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.l1(x)

        value = self.value(x)
        logits = F.softmax(self.logits(x))
        return value, logits
示例#13
0
    def train(self, s_batch, a_batch_one_hot, V_trace, advantage):
        batch_size = s_batch.shape[0]
        action_indx = np.argmax(a_batch_one_hot,axis=1).tolist()
        action_stats = [action_indx.count(action_indx[i]) for i in range(batch_size)]
        action_bp_rate = (1 - np.array(action_stats)/float(batch_size))**2

        s_batch = copy.deepcopy(s_batch)
        a_batch_one_hot = copy.deepcopy(a_batch_one_hot)
        V_trace_batch = copy.deepcopy(V_trace)
        advantage_batch = copy.deepcopy(advantage)

        s_batch = nd.array(s_batch, ctx=CTX)
        a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX)
        V_trace_batch = nd.array(V_trace_batch, ctx=CTX)
        advantage_batch = nd.array(advantage_batch, ctx=CTX)
        action_bp_rate = nd.softmax(nd.array(action_bp_rate, ctx=CTX))

        self.actorcritic.collect_params().zero_grad()
        self.reset_noise()
        with mx.autograd.record():
            loss_vec = []
            probs, values, top_decisions = self.actorcritic.forward(s_batch, loss_vec)
            loss = 0.
            for element in loss_vec:
                loss = loss + element
            # print 'loss_dropout:', loss
            logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)+1e-5)
            entropy = -nd.sum(nd.sum(data=probs*nd.log(probs+1e-5), axis=1), axis=0)
            top_decision_entropy = -nd.sum(nd.sum(data=top_decisions*nd.log(top_decisions+1e-5), axis=1), axis=0)
            entropy_loss = - entropy
            top_decision_entropy_loss = - top_decision_entropy
            actorloss = -nd.sum(action_bp_rate*(logprob*advantage_batch), axis=0) 
            criticloss = nd.sum(action_bp_rate*nd.square(values-V_trace_batch), axis=0)
            # actorloss = -nd.sum(logprob*advantage_batch, axis=0) 
            # criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0)
            loss = actorloss + 0.3*criticloss + 0.001*entropy_loss
            
            # loss = actorloss + 0.3*criticloss + 0.0001*top_decision_entropy_loss
        loss.backward()

        # CTname = threading.currentThread().getName()

        # print(CTname + ' actorloss : '+str(actorloss))
        # print(CTname + ' criticloss : '+str(criticloss))
        # print(CTname + ' entropy_loss : '+str(entropy_loss))

        grads_list = []
        for name, value in self.actorcritic.collect_params().items():
            if name.find('batchnorm') < 0:
                # grads_list.append(mx.nd.array(value.grad().asnumpy()))
                grads_list.append(value.grad())

        return grads_list, batch_size
示例#14
0
    def sample(self, batch_size=1, with_details=False, with_entropy=False):
        """
        Returns
        -------
        configs : list of dict
            list of configurations
        """
        inputs = self.static_inputs[batch_size]
        hidden = self.static_init_hidden[batch_size]

        actions = []
        entropies = []
        log_probs = []

        for idx in range(len(self.num_tokens)):
            logits, hidden = self.forward(inputs,
                                          hidden,
                                          idx,
                                          is_embed=(idx == 0))

            probs = F.softmax(logits, axis=-1)
            log_prob = F.log_softmax(logits, axis=-1)
            entropy = -(log_prob *
                        probs).sum(1, keepdims=False) if with_entropy else None

            action = mx.random.multinomial(probs, 1)
            ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context),
                              action.astype('float32'))
            selected_log_prob = F.gather_nd(log_prob, ind)

            actions.append(action[:, 0])
            entropies.append(entropy)
            log_probs.append(selected_log_prob)

            inputs = action[:, 0] + sum(self.num_tokens[:idx])
            inputs.detach()

        configs = []
        for idx in range(batch_size):
            config = {}
            for i, action in enumerate(actions):
                choice = action[idx].asscalar()
                k, space = self.spaces[i]
                config[k] = int(choice)
            configs.append(config)

        if with_details:
            entropies = F.stack(*entropies,
                                axis=1) if with_entropy else entropies
            return configs, F.stack(*log_probs, axis=1), entropies
        else:
            return configs
示例#15
0
 def forward(self, x):
     x = F.relu(self.conv1(x))
     x = self.pool2(F.relu(self.conv2(x)))
     x = self.drop2D(x)
     # 0 means copy over size from corresponding dimension.
     # -1 means infer size from the rest of dimensions.
     # Essentially flattens to 1D.
     x = x.reshape((0, -1))
     x = F.relu(self.fc1(x))
     x = self.drop1D(x)
     x = F.relu(self.fc2(x))
     x = F.softmax(x)
     return x
示例#16
0
    def _update(self):
        # Train
        if self.frame_counter > self.opt.replay_start_size and \
           self.frame_counter % self.opt.learning_frequency == 0:
            batch_reward, batch_action, batch_done = self.replay_memory.sample(
                self.opt, self.batch_state, self.batch_state_next)

            batch_reward, batch_action, batch_done = batch_reward.asnumpy(), \
                                                     batch_action.asnumpy(), batch_done.asnumpy()
            targets_q = self.dqn(self.batch_state_next).asnumpy()
            targets_q = np.reshape(
                targets_q, (targets_q.shape[0], self.num_action, self.atoms))
            q_values = np.dot(targets_q, self.z_values)
            target_actions = np.argmax(q_values, axis=1).astype('int32')

            value_eval = self.target_dqn(self.batch_state_next).asnumpy()
            value_eval = np.reshape(
                value_eval, (value_eval.shape[0], self.num_action, self.atoms))

            distributed_q = value_eval[:, target_actions, :]
            m = np.zeros((self.opt.batch_size, self.z_values.size))

            for j in range(self.atoms):
                tzj = np.fmax(
                    np.fmin(
                        batch_reward -
                        batch_done * self.opt.gamma * self.z_values[j],
                        self.v_max), self.v_min)
                bj = ((tzj - self.z_values[0]) /
                      (self.z_values[1] - self.z_values[0]))
                u = np.ceil(bj).astype('int32')
                l = np.floor(bj).astype('int32')

                m[:,
                  l] = m[:, l] + distributed_q[:, target_actions, j] * (u - bj)
                m[:,
                  u] = m[:, u] + distributed_q[:, target_actions, j] * (bj - l)

            m = F.softmax(nd.array(m, self.opt.ctx))
            with autograd.record():
                TD_targets = nd.reshape(
                    self.dqn(self.batch_state),
                    (self.opt.batch_size, self.num_action, self.atoms))
                TD_targets_action = TD_targets[self.batches, batch_action]
                loss = self.cross_ent_loss(TD_targets_action, m)

            loss.backward()
            self.trainer.step(self.opt.batch_size)

            if self.frame_counter % 800 == 0:
                print('Loss is', nd.sum(loss).asscalar())
示例#17
0
    def sample(self, batch_size=1, with_details=False, with_entropy=False):
        # self-attention
        x = self.embedding(batch_size).reshape(
            -3, 0)  # .squeeze() # b x action x h
        kshape = (batch_size, self.num_total_tokens, self.hidden_size)
        vshape = (batch_size, self.num_total_tokens, 1)
        querry = self.querry(x).reshape(*kshape)  # b x actions x h
        key = self.key(x).reshape(*kshape)  # b x actions x h
        value = self.value(x).reshape(*vshape)  # b x actions x 1
        atten = mx.nd.linalg_gemm2(querry, key,
                                   transpose_b=True).softmax(axis=1)
        alphas = mx.nd.linalg_gemm2(atten, value).squeeze(axis=-1)

        actions = []
        entropies = []
        log_probs = []
        for idx in range(len(self.num_tokens)):
            i0 = sum(self.num_tokens[:idx])
            i1 = sum(self.num_tokens[:idx + 1])
            logits = alphas[:, i0:i1]

            probs = F.softmax(logits, axis=-1)
            log_prob = F.log_softmax(logits, axis=-1)

            entropy = -(log_prob *
                        probs).sum(1, keepdims=False) if with_entropy else None

            action = mx.random.multinomial(probs, 1)
            ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context),
                              action.astype('float32'))
            selected_log_prob = F.gather_nd(log_prob, ind)

            actions.append(action[:, 0])
            entropies.append(entropy)
            log_probs.append(selected_log_prob)

        configs = []
        for idx in range(batch_size):
            config = {}
            for i, action in enumerate(actions):
                choice = action[idx].asscalar()
                k, space = self.spaces[i]
                config[k] = int(choice)
            configs.append(config)

        if with_details:
            entropies = F.stack(*entropies,
                                axis=1) if with_entropy else entropies
            return configs, F.stack(*log_probs, axis=1), entropies
        else:
            return configs
示例#18
0
    def GRU_Cell(input, state):
        for x in input:
            z_t = nd.Activation(nd.FullyConnected(data=x,weight=wxz,no_bias=True,num_hidden=num_hidden)+
                                nd.FullyConnected(data=state,weight=whz,no_bias=True,num_hidden=num_hidden)+bz,act_type="sigmoid")
            r_t = nd.Activation(nd.FullyConnected(data=x,weight=wxr,no_bias=True,num_hidden=num_hidden)+
                                nd.FullyConnected(data=state,weight=whr,no_bias=True,num_hidden=num_hidden)+br,act_type="sigmoid")
            g_t = nd.Activation(nd.FullyConnected(data=x,weight=wxh,no_bias=True,num_hidden=num_hidden)+
                                nd.FullyConnected(data=r_t*state,weight=whh,no_bias=True,num_hidden=num_hidden)+bh,act_type="tanh")

            state = nd.multiply(z_t,state) + nd.multiply(1-z_t,g_t)

        output = nd.FullyConnected(data=state, weight=why, bias=by, num_hidden=num_outputs)
        output = nd.softmax(data=output)
        return output, state
示例#19
0
def transform_fn(model, request_body, content_type, accept_type):
    try:
        input_object=json.loads(request_body)
        board=input_object["board"]
        count=input_object["session"].get("count",0)
        input_object["session"]["count"]=count+1

        if count>10:
            board=nd.array(board)
            board=nd.concat(
                (board==1).expand_dims(axis=0),
                (board==2).expand_dims(axis=0),dim=0
            )

            board=board.expand_dims(axis=0)

            mask=board.clip(0,1)
            mask=-(mask-1)
            mask=mask.reshape((2,-1)) 
            
            p=nd.softmax(model(board).reshape((-1,)))
            p=p*mask[0]*mask[1]
            
            while True:
                loc=int(p.argmax(axis=0).asscalar())
                y=loc//board.shape[2]
                x=loc%board.shape[2]
                if input_object["board"][y][x]==0:
                    break
                else:
                    p[loc]=0
        else:
            while True:
                x=random.randint(0,len(input_object["board"][0])-1)
                y=random.randint(0,len(input_object["board"])-1)
                if input_object["board"][y][x]==0:
                    break

        input_object["session"]["shootType"]="CNNNet"
        return bytearray(json.dumps({
            "shot":{
                "x":x,
                "y":y
            },
            "session":input_object["session"]
        }),'utf-8'),accept_type
    except Exception as e:
        print(traceback.format_exc())
        print(e)
示例#20
0
    def inference(self):
        actions = []

        for idx in range(len(self.num_tokens)):
            logits = self.decoders[idx](1)
            probs = F.softmax(logits, axis=-1)
            action = mx.nd.argmax(probs, 1)
            actions.append(action)

        config = {}
        for i, action in enumerate(actions):
            choice = action.asscalar()
            k, space = self.spaces[i]
            config[k] = int(choice)

        return config
示例#21
0
 def forward(self, x):
     #x: 'nwc'
     #import pdb
     #pdb.set_trace()
     x = F.transpose(x, axes=(0, 2, 1))  # (nwc) -> (ncw)
     X_ = F.batch_dot(self.w1.data(ctx), x)  # (n,c,w) -> (n,c,w)
     # E =  dot(X_, W)
     E = F.batch_dot(X_, self.w.data(ctx))  # (n,c,w) -> (n,c,w)
     attn_weights = F.softmax(E, axis=2)  # (n, c, w)
     attn_applied = F.elemwise_mul(attn_weights, X_)  #(n,c,w)
     output = self.c.data(ctx) * (attn_applied) + (
         1 - self.c.data(ctx)) * X_  # (n,c,w)
     output = F.batch_dot(output, self.w2.data(ctx)) + self.b.data(
         ctx)  # (n, c,w)
     output = F.transpose(output, axes=(0, 2, 1))  # (ncw) -> (nwc)
     return output
示例#22
0
 def forward(self, x):
     # x=self.fc1(x)
     #print(x.shape)
     #print(self.l.shape)
     out = broad_multiply(x, self.l, self.ctx)
     # print(out.shape)
     out = self.avgpool(out)
     # print(out.shape)
     out2 = self.fc(out)
     # print(out2.shape)
     out3 = nd.softmax(out2)
     out = out2 * out3
     # print(out.shape)
     # print(x.shape[0],self.l.shape[1])
     out = out.reshape((x.shape[0], self.l.shape[1], -1))
     # print(out.shape)
     out = nd.sum(out, axis=1)
     # print(out.shape)
     return out
示例#23
0
    def inference(self):
        inputs = self.static_inputs[1]
        hidden = self.static_init_hidden[1]
        actions = []
        for block_idx in range(len(self.num_tokens)):
            logits, hidden = self.forward(inputs, hidden,
                                          block_idx, is_embed=(block_idx==0))
            probs = F.softmax(logits, axis=-1)
            action = mx.nd.argmax(probs, 1)
            actions.append(action)
            inputs = action + sum(self.num_tokens[:block_idx])
            inputs.detach()

        config = {}
        for i, action in enumerate(actions):
            choice = action.asscalar()
            k, space = self.spaces[i]
            config[k] = int(choice)

        return config
示例#24
0
    def forward(self, X, stride=1):
        filters = []
        for i in range(self._n_scales):
            kernel = (i * 2 + 1, ) * 2
            pad = (i, ) * 2
            f = nd.Pooling(data=data,
                           pool_type='max',
                           kernel=kernel,
                           stride=(stride, stride),
                           pad=pad,
                           cudnn_off=True)
            f = nd.reshape(f, (f.shape[0], 1) + f.shape[1:])
            filters.append(f)

        filters = nd.concat(*filters, dim=1)
        weight = nd.softmax(self._get_param(self.weight), axis=1)
        filters = nd.mean(filters, axis=1)
        #   filters = nd.sum(filters * weight, axis=1)

        return filters
示例#25
0
文件: hack_strac.py 项目: WowCZ/strac
    def train_update(self, s_batch, a_batch_one_hot, V_trace, advantage):
        batch_size = s_batch.shape[0]
        action_indx = np.argmax(a_batch_one_hot, axis=1).tolist()
        action_stats = [action_indx.count(action_indx[i]) for i in range(batch_size)]
        action_bp_rate = (1 - np.array(action_stats) / float(batch_size)) ** 2

        s_batch = copy.deepcopy(s_batch)
        a_batch_one_hot = copy.deepcopy(a_batch_one_hot)
        V_trace_batch = copy.deepcopy(V_trace)
        advantage_batch = copy.deepcopy(advantage)

        s_batch = nd.array(s_batch, ctx=CTX)
        a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX)
        V_trace_batch = nd.array(V_trace_batch, ctx=CTX)
        advantage_batch = nd.array(advantage_batch, ctx=CTX)
        action_bp_rate = nd.softmax(nd.array(action_bp_rate, ctx=CTX))

        self.actorcritic.collect_params().zero_grad()
        self.reset_noise()
        with mx.autograd.record():
            loss_vec = []
            probs, values, top_decisions = self.actorcritic.forward(s_batch, loss_vec)
            loss = 0.
            for element in loss_vec:
                loss = loss + element
            # print 'loss_dropout:', loss
            logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1) + 1e-5)
            entropy = -nd.sum(nd.sum(data=probs * nd.log(probs + 1e-5), axis=1), axis=0)
            top_decision_entropy = -nd.sum(nd.sum(data=top_decisions * nd.log(top_decisions + 1e-5), axis=1), axis=0)
            entropy_loss = - entropy
            top_decision_entropy_loss = - top_decision_entropy
            actorloss = -nd.sum(action_bp_rate * (logprob * advantage_batch), axis=0)
            criticloss = nd.sum(action_bp_rate * nd.square(values - V_trace_batch), axis=0)
            # actorloss = -nd.sum(logprob*advantage_batch, axis=0)
            # criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0)
            loss = actorloss + 0.3 * criticloss + 0.001 * entropy_loss

            # loss = actorloss + 0.3*criticloss + 0.0001*top_decision_entropy_loss
        loss.backward()

        self.trainer.step(batch_size=batch_size, ignore_stale_grad=True)
示例#26
0
    def forward(self, input, hidden, encoder_outputs):
        #input shape, (1,)
        embedded = self.embedding(input)
        if self.dropout_p > 0:
            embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(F.concat(embedded, hidden[0].flatten(), dim=1)))
        attn_applied = F.batch_dot(attn_weights.expand_dims(0),
                                 encoder_outputs.expand_dims(0))

        output = F.concat(embedded.flatten(), attn_applied.flatten(), dim=1)
        output = self.attn_combine(output).expand_dims(0)

        for i in range(self.n_layers):
            output = F.relu(output)
            output, hidden = self.gru(output, hidden)

        output = self.out(output)

        return output, hidden, attn_weights
示例#27
0
    def forward(self, input, hidden, encoder_outputs):
        #input shape, (1,)
        embedded = self.embedding(input)
        if self.dropout_p > 0:
            embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(F.concat(embedded, hidden[0].flatten(), dim=1)))
        attn_applied = F.batch_dot(attn_weights.expand_dims(0),
                                   encoder_outputs.expand_dims(0))

        output = F.concat(embedded.flatten(), attn_applied.flatten(), dim=1)
        output = self.attn_combine(output).expand_dims(0)

        for i in range(self.n_layers):
            output = F.relu(output)
            output, hidden = self.gru(output, hidden)

        output = self.out(output)

        return output, hidden, attn_weights
def test(ctx, val_data, opt, net):
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)
    true_labels = []
    predictions = []

    for i, batch in enumerate(val_data):
        data, label = batch_fn(batch, ctx)
        outputs = []
        for _, X in enumerate(data):
            X = X.reshape((-1, ) + X.shape[2:])
            # pred = net(X.astype(opt.dtype, copy=False))
            pred = net(X)
            if opt.use_softmax:
                pred = F.softmax(pred, axis=1)
            outputs.append(pred)

        predictions.append(outputs)
        true_labels.append(label)

        acc_top1.update(label, outputs)
        acc_top5.update(label, outputs)
        mx.ndarray.waitall()

        _, cur_top1 = acc_top1.get()
        _, cur_top5 = acc_top5.get()

        if i > 0 and i % opt.log_interval == 0:
            print('%04d/%04d is done: acc-top1=%f acc-top5=%f' %
                  (i, len(val_data), cur_top1 * 100, cur_top5 * 100))

    _, top1 = acc_top1.get()
    _, top5 = acc_top5.get()
    #save true_labels, predictions
    predictions = _list_to_numpy(predictions)
    true_labels = _list_to_numpy(true_labels)
    np.save(os.path.join(opt.save_dir, "labels"), true_labels)
    np.save(os.path.join(opt.save_dir, "predictions"), predictions)

    return top1, top5, true_labels, predictions
示例#29
0
    def LSTM_Cell(input, h_state, c_state):
        for x in input:
            f_t = nd.Activation(nd.FullyConnected(
                data=x, weight=wxhf, no_bias=True, num_hidden=num_hidden) +
                                nd.FullyConnected(data=h_state,
                                                  weight=whhf,
                                                  no_bias=True,
                                                  num_hidden=num_hidden) + bhf,
                                act_type="sigmoid")
            i_t = nd.Activation(nd.FullyConnected(
                data=x, weight=wxhi, no_bias=True, num_hidden=num_hidden) +
                                nd.FullyConnected(data=h_state,
                                                  weight=whhi,
                                                  no_bias=True,
                                                  num_hidden=num_hidden) + bhi,
                                act_type="sigmoid")
            o_t = nd.Activation(nd.FullyConnected(
                data=x, weight=wxho, no_bias=True, num_hidden=num_hidden) +
                                nd.FullyConnected(data=h_state,
                                                  weight=whho,
                                                  no_bias=True,
                                                  num_hidden=num_hidden) + bho,
                                act_type="sigmoid")
            g_t = nd.Activation(nd.FullyConnected(
                data=x, weight=wxhg, no_bias=True, num_hidden=num_hidden) +
                                nd.FullyConnected(data=h_state,
                                                  weight=whhg,
                                                  no_bias=True,
                                                  num_hidden=num_hidden) + bhg,
                                act_type="tanh")
            c_state = nd.multiply(f_t, c_state) + nd.multiply(i_t, g_t)
            h_state = nd.multiply(o_t, nd.tanh(c_state))

        output = nd.FullyConnected(data=h_state,
                                   weight=why,
                                   bias=by,
                                   num_hidden=num_outputs)
        output = nd.softmax(data=output)
        return output, h_state, c_state
    def forward(self, is_train, req, in_data, out_data, aux):
        arm_cls_preds = in_data[0]
        odm_cls_target = in_data[1]
        odm_loc_target_mask = in_data[2]

        arm_cls_preds = nd.softmax(data=arm_cls_preds)
        arm_cls_preds_classes = nd.split(data=arm_cls_preds,axis=1,num_outputs=2)
        # arm_cls_preds_bg shape : (batch , h*w*num_anchors[:layers]) 负类【0】
        arm_cls_preds_bg = nd.reshape(data=arm_cls_preds_classes[0],shape=(0,-1))
        prob_temp = nd.ones_like(arm_cls_preds_bg)*0.99
        cond1 = arm_cls_preds_bg >= prob_temp # > 0.99 idx is 1
        # print('negative cond1 ------- :',heapq.nlargest(2,arm_cls_preds_bg[0]))
        temp1 = nd.ones_like(odm_cls_target)*(-1) ### TODO: 0 还是-1表示背景??
        # 如果ARM分类出的负类的置信度大于0.99,将其在ODM的anchor标号中去掉(-1替代),负类转换为背景
        odm_cls_target_mask = nd.where(condition=cond1,x=temp1,y=odm_cls_target)

        # apply filtering to odm_loc_target_mask
        # odm_loc_target_mask_shape: (batch, num_anchors, 4)

        arm_cls_preds_bg = nd.reshape(data=arm_cls_preds_bg,shape=(0,-1,1))#(batch , h*w*num_anchors[:layers],1)
        # (batch , h*w*num_anchors[:layers] , 4 )
        odm_loc_target_mask = nd.reshape(data=odm_loc_target_mask,shape=(0,-1,4))
        odm_loc_target_mask = odm_loc_target_mask[:,:,0] #(batch , h*w*num_anchors[:layers])
        #(batch , h*w*num_anchors[:layers], 1)
        ## 取整个batch中 所有行的 第一列,相当于对原来的4个相同label[0 0 0 0 ],[1 1 1 1]变成[0],[1]
        odm_loc_target_mask = nd.reshape(data=odm_loc_target_mask,shape=(0,-1,1))
        loc_temp = nd.ones_like(odm_loc_target_mask)*0.99
        cond2 = arm_cls_preds_bg >= loc_temp
        temp2 = nd.zeros_like(odm_loc_target_mask) # 取0
        # 如果ARM分类出的负类的置信度大于0.99,将其在ODM的掩码置0
        ## 实际上不管IOU计算的大小,用AMR的分类结果,如果是大于0.99的负类,不管通过IOU判断的正负类结果如何,都设置为背景
        odm_loc_target_bg_mask = nd.where(cond2,temp2,odm_loc_target_mask)
        odm_loc_target_bg_mask = nd.concat(*[odm_loc_target_bg_mask]*4,dim=2)
        # 还原维度
        odm_loc_target_bg_mask = nd.reshape(odm_loc_target_bg_mask,shape=(0,-1))

        for ind, val in enumerate([odm_cls_target_mask, odm_loc_target_bg_mask]):
            self.assign(out_data[ind], req[ind], val)
示例#31
0
    def sample(self, batch_size=1, with_details=False, with_entropy=False):
        actions = []
        entropies = []
        log_probs = []

        for idx in range(len(self.num_tokens)):
            logits = self.decoders[idx](batch_size)

            probs = F.softmax(logits, axis=-1)
            log_prob = F.log_softmax(logits, axis=-1)

            entropy = -(log_prob *
                        probs).sum(1, keepdims=False) if with_entropy else None

            action = mx.random.multinomial(probs, 1)
            ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context),
                              action.astype('float32'))
            selected_log_prob = F.gather_nd(log_prob, ind)

            actions.append(action[:, 0])
            entropies.append(entropy)
            log_probs.append(selected_log_prob)

        configs = []
        for idx in range(batch_size):
            config = {}
            for i, action in enumerate(actions):
                choice = action[idx].asscalar()
                k, space = self.spaces[i]
                config[k] = int(choice)
            configs.append(config)

        if with_details:
            entropies = F.stack(*entropies,
                                axis=1) if with_entropy else entropies
            return configs, F.stack(*log_probs, axis=1), entropies
        else:
            return configs
示例#32
0
 def forward(self, x):
     x = self.dense(x)
     probs = self.action_pred(x)
     values = self.value_pred(x)
     return F.softmax(probs), values
# 5. Test the networks
total_batch = int(np.ceil(X_test.shape[0] / batch_size))
correct_counts = [0 for i in range(num_models)]
ensemble_correct_count = 0
total_num = 0
for i in range(total_batch):
    num_valid = batch_size if (i + 1) * batch_size <= X_test.shape[0]\
        else X_test.shape[0] - i * batch_size
    data_npy, label_npy, num_valid = get_batch(i, batch_size, X_test, y_test)
    prob_ensemble = nd.zeros(shape=(label_npy.shape[0], 10), ctx=mx.gpu())
    for i, test_net in enumerate(test_nets):
        test_net.forward(data_batch=mx.io.DataBatch(data=[nd.array(data_npy)],
                                                    label=None),
                         is_train=False)
        logits_nd = test_net.get_outputs()[0]
        prob_nd = nd.softmax(logits_nd)
        prob_ensemble += prob_nd
        pred_cls = nd.argmax(prob_nd, axis=-1).asnumpy()
        correct_counts[i] += (pred_cls[:num_valid] == label_npy[:num_valid]).sum()
    prob_ensemble /= num_models
    ensemble_pred_cls = nd.argmax(prob_ensemble, axis=-1).asnumpy()
    ensemble_correct_count += (ensemble_pred_cls[:num_valid] == label_npy[:num_valid]).sum()
for i in range(num_models):
    print(i, 'Accuracy:', correct_counts[i] / float(X_test.shape[0]))
print('Ensemble accuracy:', ensemble_correct_count / float(X_test.shape[0]))
'''
Learning Started!
Epoch: 0001 cost = [ 0.23813407  0.23717315]
Epoch: 0002 cost = [ 0.07455271  0.07434764]
Epoch: 0003 cost = [ 0.05925059  0.06024327]
Epoch: 0004 cost = [ 0.05032205  0.04895757]