Пример #1
0
def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_precision, L=10,
             eps=1E-6):
    init_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
    end_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
    init_momentums = {k: mx.random.normal(0, 1, v.shape) for k, v in init_params.items()}
    end_momentums = {k: v.copyto(v.context) for k, v in init_momentums.items()}
    init_potential = calc_potential(exe, init_params, label_key, noise_precision, prior_precision)

    # 0. Calculate Initial Energy and Kinetic
    init_kinetic = sum([nd.sum(nd.square(momentum)) / 2.0
                        for momentum in init_momentums.values()]).asscalar()
    # 1. Make a half step for momentum at the beginning
    exe.copy_params_from(end_params)
    exe.forward(is_train=True)
    exe.backward()
    for k, v in exe_grads.items():
        v.wait_to_read()
    for k, momentum in end_momentums.items():
        momentum[:] = momentum - (eps / 2) * exe_grads[k]
    # 2. Alternate full steps for position and momentum
    for i in range(L):
        # 2.1 Full step for position
        for k, param in exe_params.items():
            param[:] = param + eps * end_momentums[k]
        # 2.2 Full step for the momentum, except at the end of trajectory we perform a half step
        exe.forward(is_train=True)
        exe.backward()
        for v in exe_grads.values():
            v.wait_to_read()
        if i != L - 1:
            for k, momentum in end_momentums.items():
                momentum[:] = momentum - eps * exe_grads[k]
        else:
            for k, momentum in end_momentums.items():
                # We should reverse the sign of the momentum at the end
                momentum[:] = -(momentum - eps / 2.0 * exe_grads[k])
    copy_param(exe, end_params)
    # 3. Calculate acceptance ratio and accept/reject the move
    end_potential = calc_potential(exe, end_params, label_key, noise_precision, prior_precision)
    end_kinetic = sum([nd.sum(nd.square(momentum)) / 2.0
                       for momentum in end_momentums.values()]).asscalar()
    # print init_potential, init_kinetic, end_potential, end_kinetic
    r = numpy.random.rand(1)
    if r < numpy.exp(-(end_potential + end_kinetic) + (init_potential + init_kinetic)):
        exe.copy_params_from(end_params)
        return end_params, 1
    else:
        exe.copy_params_from(init_params)
        return init_params, 0
Пример #2
0
 def newgradfun(g):
     gg = gradfun(g)
     for axis, (i, j) in enumerate(zip(g.shape, padded_shape)):
         if i != j:
             gg = ndarray.sum(gg, axis=axis, keepdims=True)
     if gg.shape != x.shape:
         gg = gg.reshape(x.shape)
     return gg
Пример #3
0
 def saturation_aug(self, src, x):
     alpha = 1.0 + random.uniform(-x, x)
     coef = nd.array([[[0.299, 0.587, 0.114]]])
     gray = src * coef
     gray = nd.sum(gray, axis=2, keepdims=True)
     gray *= (1.0 - alpha)
     src *= alpha
     src += gray
     return src
Пример #4
0
def f(a):
    b = a * 2
    while nd.norm(b).asscalar() < 1000:
        b = b * 2
    if nd.sum(b).asscalar() > 0:
        c = b
    else:
        c = 100 * b
    return c
Пример #5
0
def grad_clipping(params, theta, ctx):
    if theta is not None:
        norm = nd.array([0.0], ctx)
        for p in params:
            norm += nd.sum(p.grad * p.grad)
        norm = nd.sqrt(norm).asscalar()
        if norm > theta:
            for p in params:
                p.grad[:] *= theta / norm
 def contrast_aug(self, src, x):
     alpha = 1.0 + random.uniform(-x, x)
     coef = np.array([[[0.299, 0.587, 0.114]]])
     gray = src * coef
     gray = (3.0 * (1.0 - alpha) / gray.size) * nd.sum(gray)
     src *= alpha
     src += gray
     src = nd.clip(src, 0, 255)
     return src
Пример #7
0
def _get_gaussian_initialization(num_features, neighborhood_size, num_data):
    """Initializes permutohedral filter as Gaussian kernel."""
    file_name = './experiments/gaussian_initializations/' \
                'gaussian_filter_neighborhood{}_features{}' \
                '.npy'.format(neighborhood_size, num_features)
    init_array = nd.array(np.load(file_name))
    # Normalize filter for better initialization.
    init_array = init_array / nd.sum(init_array)
    return init_array.repeat(repeats=num_data, axis=0)
Пример #8
0
def grad_clipping(params, clipping_norm, ctx):
    """Gradient clipping."""
    if clipping_norm is not None:
        norm = nd.array([0.0], ctx)
        for p in params:
            norm += nd.sum(p.grad ** 2)
        norm = nd.sqrt(norm).asscalar()
        if norm > clipping_norm:
            for p in params:
                p.grad[:] *= clipping_norm / norm
Пример #9
0
    def train(self, s_batch, a_batch_one_hot, V_trace, advantage):
        batch_size = s_batch.shape[0]
        s_batch = copy.deepcopy(s_batch)
        a_batch_one_hot = copy.deepcopy(a_batch_one_hot)
        V_trace_batch = copy.deepcopy(V_trace)
        advantage_batch = copy.deepcopy(advantage)
        sigma_prime = copy.deepcopy(self.sigma)
        mu_prime = copy.deepcopy(self.mu)
        self.presigma = (1-self.beta)*self.presigma + self.beta*np.sum(np.array(V_trace))/(np.array(V_trace).shape[0])
        self.mu = (1-self.beta)*self.mu + self.beta*np.sum((np.array(V_trace))**2)/(np.array(V_trace).shape[0])
        self.sigma = math.sqrt(self.presigma-self.mu**2)

        pop_art_hyper = self.sigma, sigma_prime, self.mu, mu_prime

        s_batch = nd.array(s_batch, ctx=CTX)
        a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX)
        V_trace_batch = nd.array(V_trace_batch, ctx=CTX)
        advantage_batch = nd.array(advantage_batch, ctx=CTX)
        self.reset_noise()

        self.actorcritic.collect_params().zero_grad()
        with mx.autograd.record():
            loss_vec = []
            probs, values = self.actorcritic.forward(s_batch, pop_art_hyper, loss_vec)
            loss = 0.
            for element in loss_vec:
                loss = loss + element
            # print 'loss_dropout:', loss
            logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1))
            entropyloss = -nd.sum(nd.sum(data=probs*nd.log(probs), axis=1), axis=0)
            actorloss = -nd.sum(logprob*advantage_batch, axis=0)
            criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0)
            
            loss = actorloss + criticloss
        loss.backward()

        grads_list = []
        for name, value in self.actorcritic.collect_params().items():
            if name.find('batchnorm') < 0:
                # grads_list.append(mx.nd.array(value.grad().asnumpy()))
                grads_list.append(value.grad())

        return grads_list, batch_size
Пример #10
0
def test_on_LFW(model,ctx=mx.gpu()):
    with open('/home1/LFW/pairs.txt', 'rt') as f:
        pairs_lines = f.readlines()[1:]
    sims = []
    model.get_feature=True
    normalize = transforms.Normalize(mean=0.5, std=0.25)
    transform = transforms.Compose([
        transforms.Resize((96, 112)),
        transforms.ToTensor(),
        normalize,
        # mTransform,
    ])
    start = time.time()
    forward_time = 0
    for i in range(6000):
        p = pairs_lines[i].replace('\n', '').split('\t')

        if 3 == len(p):
            sameflag = 1
            name1 = p[0] + '/' + p[0] + '_' + '{:04}.jpg'.format(int(p[1]))
            name2 = p[0] + '/' + p[0] + '_' + '{:04}.jpg'.format(int(p[2]))
        if 4 == len(p):
            sameflag = 0
            name1 = p[0] + '/' + p[0] + '_' + '{:04}.jpg'.format(int(p[1]))
            name2 = p[2] + '/' + p[2] + '_' + '{:04}.jpg'.format(int(p[3]))

        img1 = nd.array(Image.open('/home1/LFW/aligned_lfw-112X96/' + name1))
        img2 = nd.array(Image.open('/home1/LFW/aligned_lfw-112X96/' + name2))
        img1 = transform(img1)
        img2 = transform(img2)
        img = nd.stack(img1, img2)

        img = img.as_in_context(ctx)
        fstart = time.time()
        output = model(img)
        forward_time += time.time() - fstart
        f1, f2 = output[0], output[1]
        cosdistance = nd.sum(f1 * f2) / (f1.norm() * f2.norm() + 1e-5)
        sims.append('{}\t{}\t{}\t{}\n'.format(name1, name2, cosdistance.asscalar(), sameflag))

    accuracy = []
    thd = []
    folds = KFold(n=6000, n_folds=10, shuffle=False)
    thresholds = np.arange(0, 1.0, 0.005)
    predicts = np.array(map(lambda line: line.strip('\n').split(), sims))

    for idx, (train, test) in enumerate(folds):
        best_thresh = find_best_threshold(thresholds, predicts[train])
        accuracy.append(eval_acc(best_thresh, predicts[test]))
        thd.append(best_thresh)
    # print time.time() - start-cost # single 1080Ti about 100s
    msg = 'LFWACC={:.4f} std={:.4f} thd={:.4f}, model forward test time:{:.4f}, total time: {:.4f}'.format(
        np.mean(accuracy), np.std(accuracy),np.mean(thd),forward_time, time.time()-start)

    return msg
Пример #11
0
def cal_my_acc(test_files, target_files):
    '''
    this method is deprecated
    :param test_files:
    :param target_files:
    :return:
    '''
    mTransform = MTransform()
    normalize = transforms.Normalize(mean=0.5, std=0.5)
    transform = transforms.Compose([
        # transforms.Resize((96, 112)),
        transforms.ToTensor(),
        normalize,
        # mTransform,
    ])
    model = sphere_net.SphereNet20()
    model.load_params("log_bn_dy/spherenet.model", ctx=mx.gpu())
    correct = 0
    total = 0
    target_emb = {}
    for target_file in target_files:
        target_image = transform(nd.array(
            Image.open(target_file))).as_in_context(mx.gpu())
        target_image = nd.expand_dims(target_image, axis=0)
        target_label = ''.join(target_file.split('/')[-1].split('.')[:-1])
        target_out = model(target_image)
        target_emb[target_label] = target_out
    test_emb = {}
    for test_file in test_files:
        test_image = Image.open(test_file)
        test_image = nd.expand_dims(transform(nd.array(test_image)),
                                    axis=0).as_in_context(mx.gpu())
        test_label = ''.join(test_file.split('/')[-1].split('.')[:-1])
        test_out = model(test_image)
        max_s = mx.nd.zeros(1, ctx=mx.gpu())
        max_label = ''
        sims = {}
        for target_label, target_out in target_emb.items():
            similarity = nd.sum(test_out * target_out) / \
                         (nd.norm(test_out) * nd.norm(target_out))
            sims[target_label] = similarity.asscalar()
            if max_s < similarity:
                max_s = similarity
                max_label = target_label
        if ''.join(max_label.split('_')[:-1]) == ''.join(
                test_label.split('_')[:-1]):
            correct += 1
        else:
            print test_label, max_s.asscalar(), max_label
        total += 1
        test_emb[test_label] = test_out
        # print correct, total, float(correct)/total

    return float(correct) / total, test_emb, target_emb
Пример #12
0
 def accuracy(data):
     good = 0
     total = 0
     for (X, Y) in data:
         features = X.as_in_context(model_ctx)
         label = Y.as_in_context(model_ctx).reshape(Y.size, -1)
         prediction = nd.argmax(net(features),
                                axis=1).reshape(Y.size, -1)
         good += nd.sum(prediction == label).asscalar()
         total += len(X)
     return good / total
Пример #13
0
    def edge_func(self, edges):
        real_head, img_head = nd.split(edges.src['emb'], num_outputs=2, axis=-1)
        real_tail, img_tail = nd.split(edges.dst['emb'], num_outputs=2, axis=-1)
        real_rel, img_rel = nd.split(edges.data['emb'], num_outputs=2, axis=-1)

        score = real_head * real_tail * real_rel \
                + img_head * img_tail * real_rel \
                + real_head * img_tail * img_rel \
                - img_head * real_tail * img_rel
        # TODO: check if there exists minus sign and if gamma should be used here(jin)
        return {'score': nd.sum(score, -1)}
Пример #14
0
    def infer(self, head_emb, rel_emb, tail_emb):
        real_head, img_head = nd.split(head_emb, num_outputs=2, axis=-1)
        real_tail, img_tail = nd.split(tail_emb, num_outputs=2, axis=-1)
        real_rel, img_rel = nd.split(rel_emb, num_outputs=2, axis=-1)

        score = (real_head.expand_dims(axis=1) * real_rel.expand_dims(axis=0)).expand_dims(axis=2) * real_tail.expand_dims(axis=0).expand_dims(axis=0) \
                + (img_head.expand_dims(axis=1) * real_rel.expand_dims(axis=0)).expand_dims(axis=2) * img_tail.expand_dims(axis=0).expand_dims(axis=0) \
                + (real_head.expand_dims(axis=1) * img_rel.expand_dims(axis=0)).expand_dims(axis=2) * img_tail.expand_dims(axis=0).expand_dims(axis=0) \
                - (img_head.expand_dims(axis=1) * img_rel.expand_dims(axis=0)).expand_dims(axis=2) * real_tail.expand_dims(axis=0).expand_dims(axis=0)

        return nd.sum(score, -1)
Пример #15
0
 def hybrid_forward(self, F, data1, data2, **kwargs):
     basis_outputs_l = []
     for i in range(self._num_basis_functions):
         basis_out = F.sum(F.dot(data1, kwargs["weight{}".format(i)]) *
                           data2,
                           axis=1,
                           keepdims=True)
         basis_outputs_l.append(basis_out)
     basis_outputs = F.concat(*basis_outputs_l, dim=1)
     out = self.rate_out(basis_outputs)
     return out
Пример #16
0
def gradient_clipping(parameters, threshold, ctx):
    if threshold is not None:
        norm = nd.array([0.0], ctx)

        for parameter in parameters:
            norm += nd.sum(parameter.grad ** 2)
        norm = nd.sqrt(norm).asscalar()

        if norm > threshold:
            for parameter in parameters:
                parameter.grad[:] *= (threshold / norm)
Пример #17
0
def f(a):
    b = a *2
    #b的L2范数的标量
    while nd.norm(b).asscalar() < 1000:
        b = b *2
    #b的轴上和的标量
    if nd.sum(b).asscalar() > 0:
        c = b
    else:
        c = 100 * b
    return c
Пример #18
0
 def _evaluate_accuracy(self, data_iterator, net, layer_params):
     numerator = 0.
     denominator = 0.
     for i, (data, label) in enumerate(data_iterator):
         data = data.as_in_context(self._context_bnn).reshape((-1, data.shape[1]))
         label = label.as_in_context(self._context_bnn)
         replace_params_net(layer_params, net, self._context_bnn)
         output = net(data)
         predictions = nd.argmax(output, axis=1)
         numerator += nd.sum(predictions == label)
         denominator += data.shape[0]
     return (numerator / denominator).asscalar()
Пример #19
0
def f(a):
    b = a * 2
    print('a', a)
    print('nd.norm(a).asscalar()', nd.norm(a).asscalar())
    print('nd.norm(b).asscalar()', nd.norm(b).asscalar())
    while nd.norm(b).asscalar() < 1000:
        b = b * 2
    if nd.sum(b).asscalar() > 0:
        c = b
    else:
        c = 100 * b
    return c
Пример #20
0
def grad_clipping(params,theta,ctx):
    if theta is not None:
        norm = nd.array([0.0],ctx)
        for p in params:
            # print('grad_clipping:grad=',p.grad)
            norm += nd.sum(p.grad ** 2)
            # print('norm:',norm)
        norm = nd.sqrt(norm).asscalar()
        # print('grad_clipoing:norm=%f,theta=%f' %(norm,theta))
        if norm > theta:
            for p in params:
                p.grad[:] *= theta / norm
Пример #21
0
    def check_acc(self, data_iterator):
        numerator = 0.
        denominator = 0.
        for batch_i, (data, label) in enumerate(data_iterator):
            _, output = self.loss(data, label, train = False)
            predictions = nd.argmax(output, axis=1).as_in_context(ctx)
            numerator += nd.sum(predictions == label.as_in_context(ctx))
            denominator += data.shape[0]
            print('Evaluating accuracy. (complete percent: {:.2f}/100)'.format(1.0 * batch_i / (self.train_size/self.batch_size) * 100 /2)+' '*20, end='')
            sys.stdout.write("\r")        

        return (numerator / denominator).asscalar()
def evaluate_accuracy(data_iterator, net, ctx=[mx.cpu()]):
    acc = nd.array([0])
    n = 0.
    data_iterator.reset()
    for batch in data_iterator:
        data = batch.data
        label = batch.label
        for X, y in zip(data, label):
            y = y.astype('float32')
            acc += nd.sum(net(X).argmax(axis=1) == y).copyto(mx.cpu())
            n += y.size
        acc.wait_to_read()  # don't push too many operators into backend
    return acc.asscalar() / n
Пример #23
0
    def forward(self, is_train, req, in_data, out_data, aux):
        data_input = in_data[0]
        batch_size = data_input.shape[0]
        label_input = in_data[1]
        center_input = in_data[2]

        label_index = self.class_index[label_input]
        batch_center = center_input[label_index]
        batch_diff = data_input - batch_center

        loss = nd.sum(nd.square(batch_diff)) / batch_size / 2
        self.assign(out_data[0], req[0], loss)
        self.assign(out_data[1], req[0], batch_diff)
Пример #24
0
    def forward(self, is_train, req, in_data, out_data, aux):

        #Do nothing!
        fea = in_data[0]

        N,C,H,W = fea.shape

        mean = nd.sum(fea) / (N*C*H*W)
        std =  nd.squre(nd.fea-mean)/ (N*C*H*W)

        fea = fea/std - mean

        self.assign(out_data[0], req[0], fea)
Пример #25
0
    def rbf_kernels(self, x: NDArray, y: NDArray):
        """
        Computes exp(-c ||x - y||^2).
        ||x - y||^2 = x . x + y . y - 2 x . y
        Compute each term separately. x is are original features, y are features used for similarity
        """

        cross_products = nd.dot(x, y)

        x_products = nd.sum(sqr(x), axis=1, keepdims=True)
        x_products = nd.broadcast_axis(x_products, axis=1, size=y.shape[1])

        y_products = nd.sum(sqr(y), axis=0, keepdims=True)
        y_products = nd.broadcast_axis(y_products, axis=0, size=x.shape[0])

        sqr_difs = x_products + y_products - 2 * cross_products
        print(nd.mean(x_products), nd.mean(y_products),
              nd.mean(cross_products))
        print(nd.mean(sqr_difs))
        res = nd.exp(-0.05 * sqr_difs)
        print(res.shape)
        return res
Пример #26
0
def f(a):
    b = a * 2
    i = 0
    while nd.norm(b).asscalar() < 1000:
        i += 1
        print(i)
        b = b * 2
    if nd.sum(b).asscalar() > 0:
        c = b
    else:
        print('100')
        c = 100 * b
    return c
Пример #27
0
 def train_model(self):
     self.__epochs = 5
     for e in range(self.__epochs):
         total_loss = 0
         for self.__batch_X, self.__batch_y in self.__data_iter:
             with autograd.record():
                 self.__batch_y_hat = self.__net(self.__batch_X)
                 loss = self.__loss_function(self.__batch_y_hat,
                                             self.__batch_y)
             loss.backward()
             self.__trainer.step(self.__batch_size)
             total_loss += nd.sum(loss).asscalar()
         print("Epoch %d, average loss: %f" % (e, total_loss))
    def edge_func(self, edges):
        real_head, img_head = nd.split(edges.src["emb"], num_outputs=2, axis=-1)
        real_tail, img_tail = nd.split(edges.dst["emb"], num_outputs=2, axis=-1)
        real_rel, img_rel = nd.split(edges.data["emb"], num_outputs=2, axis=-1)

        score = (
            real_head * real_tail * real_rel
            + img_head * img_tail * real_rel
            + real_head * img_tail * img_rel
            - img_head * real_tail * img_rel
        )
        # TODO: check if there exists minus sign and if gamma should be used here(jin)
        return {"score": nd.sum(score, -1)}
Пример #29
0
    def train(self,
              epoch_cnt,
              learning_method,
              learning_params,
              verbose,
              model,
              is_random=True,
              progress=False):
        # 定义训练器
        wd = learning_params['wd']
        learning_params['wd'] = 0
        trainer = gluon.Trainer(model.collect_params(), learning_method,
                                learning_params)
        # dense_trainer = gluon.Trainer(model.collect_params(select='.*_(mlp[0-9]|y)'),
        #                               learning_method, learning_params)
        # svd_trainer = gluon.Trainer(model.collect_params(select='.*_(q|p|alpha|b)(_|$)'),
        #                             learning_method, learning_params)
        # alpha = nd.array([alpha]).reshape((1, 1))

        # 训练过程
        for epoch in range(epoch_cnt):
            total_loss = 0
            trained_cnt = 0
            data = gdata.DataLoader(self.train_dataset,
                                    batch_size=self.batch_size,
                                    shuffle=is_random)
            # 针对每个评分项进行迭代
            if progress is True:
                data = tqdm(data)
            for u, R_u, i, t, bint, dev, r in data:
                trained_cnt += self.batch_size
                # 预测结果
                with mxnet.autograd.record():
                    r_hat, reg = model(u, i, t, R_u, dev, bint)
                    loss = (r_hat - r)**2 + wd * reg
                loss.backward()
                # 调整参数
                # dense_trainer.step(self.batch_size)
                # svd_trainer.step(1)
                trainer.step(self.batch_size)

                total_loss += nd.sum(loss).asscalar()
                cur_loss = total_loss / trained_cnt
                if progress is True:
                    data.set_description('MSE=%.6f' % cur_loss)
            # # 输出结果
            # print('Epoch', epoch, 'finished, Loss =',
            #       total_loss[0].asscalar() / self.rating_cnt)
            # 测试效果
            if epoch >= verbose:
                self.test(progress, model)
Пример #30
0
    def train(self, s_batch, a_batch_one_hot, V_trace, advantage):
        batch_size = s_batch.shape[0]
        action_indx = np.argmax(a_batch_one_hot,axis=1).tolist()
        action_stats = [action_indx.count(action_indx[i]) for i in range(batch_size)]
        action_bp_rate = (1 - np.array(action_stats)/float(batch_size))**2

        s_batch = copy.deepcopy(s_batch)
        a_batch_one_hot = copy.deepcopy(a_batch_one_hot)
        V_trace_batch = copy.deepcopy(V_trace)
        advantage_batch = copy.deepcopy(advantage)

        s_batch = nd.array(s_batch, ctx=CTX)
        a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX)
        V_trace_batch = nd.array(V_trace_batch, ctx=CTX)
        advantage_batch = nd.array(advantage_batch, ctx=CTX)
        action_bp_rate = nd.softmax(nd.array(action_bp_rate, ctx=CTX))

        self.actorcritic.collect_params().zero_grad()
        self.reset_noise()
        with mx.autograd.record():
            loss_vec = []
            probs, values, top_decisions = self.actorcritic.forward(s_batch, loss_vec)
            loss = 0.
            for element in loss_vec:
                loss = loss + element
            # print 'loss_dropout:', loss
            logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)+1e-5)
            entropy = -nd.sum(nd.sum(data=probs*nd.log(probs+1e-5), axis=1), axis=0)
            top_decision_entropy = -nd.sum(nd.sum(data=top_decisions*nd.log(top_decisions+1e-5), axis=1), axis=0)
            entropy_loss = - entropy
            top_decision_entropy_loss = - top_decision_entropy
            actorloss = -nd.sum(action_bp_rate*(logprob*advantage_batch), axis=0) 
            criticloss = nd.sum(action_bp_rate*nd.square(values-V_trace_batch), axis=0)
            # actorloss = -nd.sum(logprob*advantage_batch, axis=0) 
            # criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0)
            loss = actorloss + 0.3*criticloss + 0.001*entropy_loss
            
            # loss = actorloss + 0.3*criticloss + 0.0001*top_decision_entropy_loss
        loss.backward()

        # CTname = threading.currentThread().getName()

        # print(CTname + ' actorloss : '+str(actorloss))
        # print(CTname + ' criticloss : '+str(criticloss))
        # print(CTname + ' entropy_loss : '+str(entropy_loss))

        grads_list = []
        for name, value in self.actorcritic.collect_params().items():
            if name.find('batchnorm') < 0:
                # grads_list.append(mx.nd.array(value.grad().asnumpy()))
                grads_list.append(value.grad())

        return grads_list, batch_size
Пример #31
0
    def train(self, s_batch, a_batch_one_hot, V_trace, advantage):
        batch_size = s_batch.shape[0]
        s_batch = copy.deepcopy(s_batch)
        a_batch_one_hot = copy.deepcopy(a_batch_one_hot)
        V_trace_batch = copy.deepcopy(V_trace)
        advantage_batch = copy.deepcopy(advantage)

        s_batch = nd.array(s_batch, ctx=CTX)
        a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX)
        V_trace_batch = nd.array(V_trace_batch, ctx=CTX)
        advantage_batch = nd.array(advantage_batch, ctx=CTX)

        self.actorcritic.collect_params().zero_grad()
        with mx.autograd.record():
            loss_vec = []
            probs, _ = self.actorcritic(s_batch, loss_vec)
            logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1))
            actorloss = -nd.sum(logprob*advantage_batch, axis=0)

        actorloss.backward()
        # self.actortrainer.step(batch_size=batch_size, ignore_stale_grad=True)

        with mx.autograd.record():
            loss_vec = []
            _, values = self.actorcritic(s_batch, loss_vec)
            criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0)

            # print loss
        criticloss.backward()
        # self.critictrainer.step(batch_size=batch_size, ignore_stale_grad=True)

        grads_list = []
        for name, value in self.actorcritic.collect_params().items():
            if name.find('batchnorm') < 0:
                # grads_list.append(mx.nd.array(value.grad().asnumpy()))
                grads_list.append(value.grad())

        return grads_list, batch_size
Пример #32
0
    def check_status(self, input, epoch):
        n_sample = input.shape[0]

        ph_prob, ph_sample = self.sample_h_given_v(input)
        nv_prob, nv_sample, nh_prob, nh_sample = self.gibbs_hvh(ph_sample)
        error = nd.sum((input - nv_sample)**2) / n_sample
        #use logsoftmax if nan
        cross = -nd.mean(nd.sum(input * nd.log(nv_prob), axis=1))
        freeE = self.get_free_energy(input)

        sys.stdout.write("Training: ")
        sys.stdout.write("epoch= %d " % epoch)
        sys.stdout.write("cross= %f " % cross.asnumpy()[0])
        sys.stdout.write("error= %f " % error.asnumpy()[0])
        sys.stdout.write("freeE= %f " % freeE.asnumpy()[0])

        if self.enum_states is not None:
            sys.stdout.write("KL= %f " % self.check_KL())
        if self.prob_RGs is not None:
            sys.stdout.write("rgKL= %f " % self.check_rgKL(nv_sample))

        sys.stdout.write("\n")
        return
Пример #33
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     if not self._from_logits:
         pred = F.log_softmax(pred, axis=self._axis)
     if self._sparse_label:
         if self._size_average:
             valid_label_map = (label !=
                                self._ignore_label).astype('float32')
             loss = -(F.pick(pred, label, axis=self._axis, keepdims=True) *
                      valid_label_map)
         else:
             loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
             loss = F.where(
                 label.expand_dims(axis=self._axis) == self._ignore_label,
                 F.zeros_like(loss), loss)
     else:
         label = _reshape_like(F, label, pred)
         loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     if self._size_average:
         return F.mean(loss, axis=self._batch_axis, exclude=True) * \
             valid_label_map.size / F.sum(valid_label_map)
     else:
         return F.mean(loss, axis=self._batch_axis, exclude=True)
Пример #34
0
def evaluate_accuracy(data_iterator, net, ctx=[mx.cpu()]):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    acc = nd.array([0])
    n = 0.
    if isinstance(data_iterator, mx.io.MXDataIter):
        data_iterator.reset()
    for batch in data_iterator:
        data, label, batch_size = _get_batch(batch, ctx)
        for X, y in zip(data, label):
            acc += nd.sum(net(X).argmax(axis=1)==y).copyto(mx.cpu())
            n += y.size
        acc.wait_to_read() # don't push too many operators into backend
    return acc.asscalar() / n
Пример #35
0
def log_sum_exp(vec):
    max_score = nd.max(vec).asscalar()
    return nd.log(nd.sum(nd.exp(vec - max_score))) + max_score
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})


### 训练
epochs = 5##训练迭代数据次数
batch_size = 10##每次训练输入的样例个数
#learning_rate = .001##学习率
for e in range(epochs):
    total_loss = 0
    for data, label in data_iter:
        with autograd.record():##自动微分
            output = net(data)
            loss = square_loss(output, label)
        loss.backward()## 反向传播
        #SGD(params, learning_rate)##求解梯度
        trainer.step(batch_size)
        total_loss += nd.sum(loss).asscalar()
    print("Epoch %d, average loss: %f" % (e, total_loss/num_examples))

## 查看训练结果
dense = net[0]#我们先从net拿到需要的层,然后访问其权重和位移
print true_w, dense.weight.data()
print true_b, dense.bias.data()







def get_rmse_log(net, X_train, y_train):
    """Gets root mse between the logarithms of the prediction and the truth."""
    num_train = X_train.shape[0]
    clipped_preds = nd.clip(net(X_train), 1, float('inf'))
    return np.sqrt(2 * nd.sum(square_loss(
        nd.log(clipped_preds), nd.log(y_train))).asscalar() / num_train)
Пример #38
0
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim,
                          learning_rate, clipping_theta, batch_size,
                          pred_period, pred_len, seqs, get_params, get_inputs,
                          ctx, corpus_indices, idx_to_char, char_to_idx,
                          is_lstm=False):
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()

    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    for e in range(1, epochs + 1):
        # 如使用相邻批量采样,在同一个epoch中,隐含变量只需要在该epoch开始的时候初始化。
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            if is_lstm:
                # 当RNN使用LSTM时才会用到,这里可以忽略。
                state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
        train_loss, num_examples = 0, 0
        for data, label in data_iter(corpus_indices, batch_size, num_steps,
                                     ctx):
            # 如使用随机批量采样,处理每个随机小批量前都需要初始化隐含变量。
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
                if is_lstm:
                    # 当RNN使用LSTM时才会用到,这里可以忽略。
                    state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
            with autograd.record():
                # outputs 尺寸:(batch_size, vocab_size)
                if is_lstm:
                    # 当RNN使用LSTM时才会用到,这里可以忽略。
                    outputs, state_h, state_c = rnn(get_inputs(data), state_h,
                                                    state_c, *params)
                else:
                    outputs, state_h = rnn(get_inputs(data), state_h, *params)
                # 设t_ib_j为i时间批量中的j元素:
                # label 尺寸:(batch_size * num_steps)
                # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]
                label = label.T.reshape((-1,))
                # 拼接outputs,尺寸:(batch_size * num_steps, vocab_size)。
                outputs = nd.concat(*outputs, dim=0)
                # 经上述操作,outputs和label已对齐。
                loss = softmax_cross_entropy(outputs, label)
            loss.backward()

            grad_clipping(params, clipping_theta, ctx)
            utils.SGD(params, learning_rate)

            train_loss += nd.sum(loss).asscalar()
            num_examples += loss.size

        if e % pred_period == 0:
            print("Epoch %d. Perplexity %f" % (e,
                                               exp(train_loss/num_examples)))
            for seq in seqs:
                print' - ', predict_rnn(rnn, seq, pred_len, params,
                      hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs,
                      is_lstm)
            print()
Пример #39
0
def main():
    parser = argparse.ArgumentParser(description='Script to test the trained network on a game.')
    parser.add_argument('-r', '--rom', required=False, type=str,
                        default=os.path.join('roms', 'breakout.bin'),
                        help='Path of the ROM File.')
    parser.add_argument('-v', '--visualization', action='store_true',
                        help='Visualize the runs.')
    parser.add_argument('--lr', required=False, type=float, default=0.01,
                        help='Learning rate of the AdaGrad optimizer')
    parser.add_argument('--eps', required=False, type=float, default=0.01,
                        help='Eps of the AdaGrad optimizer')
    parser.add_argument('--clip-gradient', required=False, type=float, default=None,
                        help='Clip threshold of the AdaGrad optimizer')
    parser.add_argument('--double-q', action='store_true',
                        help='Use Double DQN only if specified')
    parser.add_argument('--wd', required=False, type=float, default=0.0,
                        help='Weight of the L2 Regularizer')
    parser.add_argument('-c', '--ctx', required=False, type=str, default='gpu',
                        help='Running Context. E.g `-c gpu` or `-c gpu1` or `-c cpu`')
    parser.add_argument('-d', '--dir-path', required=False, type=str, default='',
                        help='Saving directory of model files.')
    parser.add_argument('--start-eps', required=False, type=float, default=1.0,
                        help='Eps of the epsilon-greedy policy at the beginning')
    parser.add_argument('--replay-start-size', required=False, type=int, default=50000,
                        help='The step that the training starts')
    parser.add_argument('--kvstore-update-period', required=False, type=int, default=1,
                        help='The period that the worker updates the parameters from the sever')
    parser.add_argument('--kv-type', required=False, type=str, default=None,
                        help='type of kvstore, default will not use kvstore, could also be dist_async')
    parser.add_argument('--optimizer', required=False, type=str, default="adagrad",
                        help='type of optimizer')
    args = parser.parse_args()

    if args.dir_path == '':
        rom_name = os.path.splitext(os.path.basename(args.rom))[0]
        args.dir_path = 'dqn-%s-lr%g' % (rom_name, args.lr)
    replay_start_size = args.replay_start_size
    max_start_nullops = 30
    replay_memory_size = 1000000
    history_length = 4
    rows = 84
    cols = 84

    ctx = parse_ctx(args.ctx)
    q_ctx = mx.Context(*ctx[0])

    game = AtariGame(rom_path=args.rom, resize_mode='scale', replay_start_size=replay_start_size,
                     resized_rows=rows, resized_cols=cols, max_null_op=max_start_nullops,
                     replay_memory_size=replay_memory_size, display_screen=args.visualization,
                     history_length=history_length)

    ##RUN NATURE
    freeze_interval = 10000
    epoch_num = 200
    steps_per_epoch = 250000
    update_interval = 4
    discount = 0.99

    eps_start = args.start_eps
    eps_min = 0.1
    eps_decay = (eps_start - eps_min) / 1000000
    eps_curr = eps_start
    freeze_interval /= update_interval
    minibatch_size = 32
    action_num = len(game.action_set)

    data_shapes = {'data': (minibatch_size, history_length) + (rows, cols),
                   'dqn_action': (minibatch_size,), 'dqn_reward': (minibatch_size,)}
    dqn_sym = dqn_sym_nature(action_num)
    qnet = Base(data_shapes=data_shapes, sym_gen=dqn_sym, name='QNet',
                initializer=DQNInitializer(factor_type="in"),
                ctx=q_ctx)
    target_qnet = qnet.copy(name="TargetQNet", ctx=q_ctx)

    use_easgd = False
    optimizer = mx.optimizer.create(name=args.optimizer, learning_rate=args.lr, eps=args.eps,
                    clip_gradient=args.clip_gradient,
                    rescale_grad=1.0, wd=args.wd)
    updater = mx.optimizer.get_updater(optimizer)

    qnet.print_stat()
    target_qnet.print_stat()

    # Begin Playing Game
    training_steps = 0
    total_steps = 0
    for epoch in range(epoch_num):
        # Run Epoch
        steps_left = steps_per_epoch
        episode = 0
        epoch_reward = 0
        start = time.time()
        game.start()
        while steps_left > 0:
            # Running New Episode
            episode += 1
            episode_loss = 0.0
            episode_q_value = 0.0
            episode_update_step = 0
            episode_action_step = 0
            time_episode_start = time.time()
            game.begin_episode(steps_left)
            while not game.episode_terminate:
                # 1. We need to choose a new action based on the current game status
                if game.state_enabled and game.replay_memory.sample_enabled:
                    do_exploration = (npy_rng.rand() < eps_curr)
                    eps_curr = max(eps_curr - eps_decay, eps_min)
                    if do_exploration:
                        action = npy_rng.randint(action_num)
                    else:
                        # TODO Here we can in fact play multiple gaming instances simultaneously and make actions for each
                        # We can simply stack the current_state() of gaming instances and give prediction for all of them
                        # We need to wait after calling calc_score(.), which makes the program slow
                        # TODO Profiling the speed of this part!
                        current_state = game.current_state()
                        state = nd.array(current_state.reshape((1,) + current_state.shape),
                                         ctx=q_ctx) / float(255.0)
                        qval_npy = qnet.forward(is_train=False, data=state)[0].asnumpy()
                        action = numpy.argmax(qval_npy)
                        episode_q_value += qval_npy[0, action]
                        episode_action_step += 1
                else:
                    action = npy_rng.randint(action_num)

                # 2. Play the game for a single mega-step (Inside the game, the action may be repeated for several times)
                game.play(action)
                total_steps += 1

                # 3. Update our Q network if we can start sampling from the replay memory
                #    Also, we update every `update_interval`
                if total_steps % update_interval == 0 and game.replay_memory.sample_enabled:
                    # 3.1 Draw sample from the replay_memory
                    training_steps += 1
                    episode_update_step += 1
                    states, actions, rewards, next_states, terminate_flags \
                        = game.replay_memory.sample(batch_size=minibatch_size)
                    states = nd.array(states, ctx=q_ctx) / float(255.0)
                    next_states = nd.array(next_states, ctx=q_ctx) / float(255.0)
                    actions = nd.array(actions, ctx=q_ctx)
                    rewards = nd.array(rewards, ctx=q_ctx)
                    terminate_flags = nd.array(terminate_flags, ctx=q_ctx)

                    # 3.2 Use the target network to compute the scores and
                    #     get the corresponding target rewards
                    if not args.double_q:
                        target_qval = target_qnet.forward(is_train=False, data=next_states)[0]
                        target_rewards = rewards + nd.choose_element_0index(target_qval,
                                                                nd.argmax_channel(target_qval))\
                                           * (1.0 - terminate_flags) * discount
                    else:
                        target_qval = target_qnet.forward(is_train=False, data=next_states)[0]
                        qval = qnet.forward(is_train=False, data=next_states)[0]

                        target_rewards = rewards + nd.choose_element_0index(target_qval,
                                                                nd.argmax_channel(qval))\
                                           * (1.0 - terminate_flags) * discount
                    outputs = qnet.forward(is_train=True,
                                           data=states,
                                           dqn_action=actions,
                                           dqn_reward=target_rewards)
                    qnet.backward()
                    qnet.update(updater=updater)

                    # 3.3 Calculate Loss
                    diff = nd.abs(nd.choose_element_0index(outputs[0], actions) - target_rewards)
                    quadratic_part = nd.clip(diff, -1, 1)
                    loss = 0.5 * nd.sum(nd.square(quadratic_part)).asnumpy()[0] +\
                           nd.sum(diff - quadratic_part).asnumpy()[0]
                    episode_loss += loss

                    # 3.3 Update the target network every freeze_interval
                    if training_steps % freeze_interval == 0:
                        qnet.copy_params_to(target_qnet)
            steps_left -= game.episode_step
            time_episode_end = time.time()
            # Update the statistics
            epoch_reward += game.episode_reward
            info_str = "Epoch:%d, Episode:%d, Steps Left:%d/%d, Reward:%f, fps:%f, Exploration:%f" \
                        % (epoch, episode, steps_left, steps_per_epoch, game.episode_reward,
                           game.episode_step / (time_episode_end - time_episode_start), eps_curr)
            if episode_update_step > 0:
                info_str += ", Avg Loss:%f/%d" % (episode_loss / episode_update_step,
                                                  episode_update_step)
            if episode_action_step > 0:
                info_str += ", Avg Q Value:%f/%d" % (episode_q_value / episode_action_step,
                                                  episode_action_step)
            if episode % 100 == 0:
                logging.info(info_str)
        end = time.time()
        fps = steps_per_epoch / (end - start)
        qnet.save_params(dir_path=args.dir_path, epoch=epoch)
        logging.info("Epoch:%d, FPS:%f, Avg Reward: %f/%d"
                 % (epoch, fps, epoch_reward / float(episode), episode))
Пример #40
0
    return nd.dot(X, w) + b # return the prediction value

# loss
def square_loss(yhat, y):
    return (yhat - y.reshape(yhat.shape)) ** 2

# optimization
def SGD(params, lr):
    for param in params:
        param[:] = param - lr * param.grad; # why param[:]


# training
epochs = 5  # scan 5 times for raw data
learning_rate = 0.001
for e in range(epochs):
    total_loss = 0
    for data, label in data_iter():
        with ag.record():
            output = net(data)
            loss = square_loss(output, label)  # label is the true value in traing set
        loss.backward()
        SGD(params, learning_rate)

        total_loss += nd.sum(loss).asscalar() # to float
    print("Epoch %d, average loss: %f" % (e, total_loss/num_examples))

print(true_b, b);
print(true_w, w);

Пример #41
0
 def newgradfun(g):
     gg = gradfun(g)
     return ndarray.sum(gg)
Пример #42
0
def accuracy(output, label):
    return nd.sum(output.argmax(axis = 1) == label).asscalar()