示例#1
0
        def log_softmax_likelihood(yhat_linear, y):
            """ 
				Likelihood of output yhat_linear, given the label y
				yhat_linear, y: ndarray
			"""
            return nd.nansum(y * nd.log_softmax(yhat_linear),
                             axis=0,
                             exclude=True)
示例#2
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [
            _as_list(x) for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1:
            # no positive samples found, return dummy losses
            return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,))

        # compute element-wise cross entropy loss and sort, then perform
        # negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < (
                pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where(
                (pos + hard_negative) > 0,
                cls_loss,
                nd.zeros_like(cls_loss))
            cls_losses.append(
                nd.sum(
                    cls_loss,
                    axis=0,
                    exclude=True) /
                num_pos_all)

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(
                box_loss > self._rho,
                box_loss - 0.5 * self._rho,
                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(
                nd.sum(
                    box_loss,
                    axis=0,
                    exclude=True) /
                num_pos_all)
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
示例#3
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        pos_ct = [ct > 0 for ct in cls_target]
        num_pos = [ct.sum() for ct in pos_ct]
        num_pos_all = sum([p.asscalar() for p in num_pos])
        # print ('num_pos_all: {}'.format(num_pos_all))
        if num_pos_all < 1 and self._min_hard_negatives < 1:
            # no positive samples and no hard negatives, return dummy losses
            cls_losses = [nd.sum(cp * 0) for cp in cls_pred]
            box_losses = [nd.sum(bp * 0) for bp in box_pred]
            sum_losses = [
                nd.sum(cp * 0) + nd.sum(bp * 0)
                for cp, bp in zip(cls_pred, box_pred)
            ]
            return sum_losses, cls_losses, box_losses

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            # print ('cp shape: {}'.format(cp.shape))
            # print ('bp shape: {}'.format(bp.shape))
            # print ('ct shape: {}'.format(ct.shape))
            # print ('bt shape: {}'.format(bt.shape))
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < nd.maximum(
                self._min_hard_negatives,
                pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss,
                                nd.zeros_like(cls_loss))
            cls_losses.append(
                nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all))

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho,
                                box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(
                nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all))
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
示例#4
0
文件: loss.py 项目: xcgoner/gluon-exp
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        # synchronize across different machines
        # print('before sync:', num_pos_all)
        if self._distributed:
            num_pos_out = nd.zeros(1, mx.cpu())
            num_pos_in = nd.zeros(1, mx.cpu()) + num_pos_all
            # allreduce only supports pushpull
            if 'allreduce' in self._kv_store_type:
                self._kv_store.pushpull(self._num_pos_key, num_pos_in, num_pos_out)
            else:
                self._kv_store.push(self._num_pos_key, num_pos_in)
                # self._kv_store._barrier()
                self._kv_store.pull(self._num_pos_key, out=num_pos_out)
            num_pos_all = num_pos_out.asscalar()
        # print('after sync:', num_pos_all)
        if num_pos_all < 1:
            # no positive samples found, return dummy losses
            return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,))

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < (pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss))
            cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / num_pos_all)

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / num_pos_all)
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
示例#5
0
    def forward(self, context):

        shared = self.hidden(context)
        if self.sm_out:
            preds = nd.softmax(
                nd.stack(*[l(shared)[:, 1] for l in self.actions]).T, axis=1)
        elif self.log_sm_out:
            preds = nd.log_softmax(
                nd.stack(*[l(shared)[:, 1] for l in self.actions]).T, axis=1)
        else:
            preds = nd.stack(*[l(shared)[:, 1] for l in self.actions]).T

        return preds
示例#6
0
    def retrain_enc(self, l2_alpha=0.1):
        docs = self.data.get_documents(key='train')
        with autograd.record():
            ### reconstruction phase ###
            y_onehot_u = self.Enc(docs)
            y_onehot_u_softmax = nd.softmax(y_onehot_u)
            x_reconstruction_u = self.Dec(y_onehot_u_softmax)

            logits = nd.log_softmax(x_reconstruction_u)
            loss_reconstruction = nd.mean(nd.sum(- docs * logits, axis=1))
            loss_reconstruction = loss_reconstruction + l2_alpha * nd.mean(nd.norm(y_onehot_u, ord=1, axis=1))
            loss_reconstruction.backward()

        self.optimizer_enc.step(1)
        return loss_reconstruction.asscalar()
示例#7
0
def total_loss(output, params, mus, sigmas, label_one_hot, log_prior):
    log_likelihood_s = nd.sum(
        nd.nansum(label_one_hot * nd.log_softmax(output), axis=0,
                  exclude=True))
    log_prior_pre_sum = []
    for param in params:
        log_prior_pre_sum.append(nd.sum(log_prior(param)))
    log_prior_sum = sum(log_prior_pre_sum)
    log_var_posterior_pre_sum = []
    for i in range(len(params)):
        log_var_posterior_pre_sum.append(
            nd.sum(log_gaussian(params[i], mus[i], sigmas[i])))
    log_var_posterior_sum = sum(log_var_posterior_pre_sum)
    total_loss = 1.0 / num_batches * (log_var_posterior_sum -
                                      log_prior_sum) - log_likelihood_s
    return total_loss
示例#8
0
def eval_step(data_tr, data_te, data_type="valid"):

    running_loss = 0.0
    eval_idxlist = list(range(data_tr.shape[0]))
    eval_N = data_tr.shape[0]
    eval_steps = len(range(0, eval_N, args.batch_size))

    n100_list, r20_list, r50_list = [], [], []

    with trange(eval_steps) as t:
        for batch_idx, start_idx in zip(t, range(0, eval_N, args.batch_size)):
            t.set_description(data_type)

            end_idx = min(start_idx + args.batch_size, eval_N)
            X_tr = data_tr[eval_idxlist[start_idx:end_idx]]
            X_te = data_te[eval_idxlist[start_idx:end_idx]]
            X_tr_inp = nd.array(X_tr.toarray()).as_in_context(ctx)

            with autograd.predict_mode():
                if model.__class__.__name__ == "MultiVAE":
                    X_out, mu, logvar = model(X_tr_inp)
                    loss = vae_loss_fn(X_tr_inp, X_out, mu, logvar, train_step.anneal)
                elif model.__class__.__name__ == "MultiDAE":
                    X_out = model(X_tr_inp)
                    loss = -nd.mean(nd.sum(nd.log_softmax(X_out) * X_tr_inp, -1))

            running_loss += loss.asscalar()
            avg_loss = running_loss / (batch_idx + 1)

            # Exclude examples from training set
            X_out = X_out.asnumpy()
            X_out[X_tr.nonzero()] = -np.inf

            n100 = NDCG_binary_at_k_batch(X_out, X_te, k=100)
            r20 = Recall_at_k_batch(X_out, X_te, k=20)
            r50 = Recall_at_k_batch(X_out, X_te, k=50)
            n100_list.append(n100)
            r20_list.append(r20)
            r50_list.append(r50)

            t.set_postfix(loss=avg_loss)

        n100_list = np.concatenate(n100_list)
        r20_list = np.concatenate(r20_list)
        r50_list = np.concatenate(r50_list)

    return avg_loss, np.mean(n100_list), np.mean(r20_list), np.mean(r50_list)
示例#9
0
def get_loss(pred, label, trg_vocab_size, trg_pad, epsilon=0.1):
    labelprob = nd.one_hot(label, trg_vocab_size)

    # Label smoothing
    smoothed_labelprob = (1 - epsilon) * labelprob + epsilon / trg_vocab_size

    logprob = nd.log_softmax(pred)

    loss = -nd.sum(logprob * smoothed_labelprob, axis=-1, keepdims=False)

    # mask PAD
    mask = label != trg_pad
    loss = loss * mask

    # batch_axis = 0
    loss = nd.mean(loss, axis=0, exclude=True)

    return loss
示例#10
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1 and self._min_hard_negatives < 1:
            # no positive samples and no hard negatives, return dummy losses
            cls_losses = [nd.sum(cp * 0) for cp in cls_pred]
            box_losses = [nd.sum(bp * 0) for bp in box_pred]
            sum_losses = [nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred)]
            return sum_losses, cls_losses, box_losses


        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < nd.maximum(self._min_hard_negatives, pos.sum(axis=1)
                                              * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss))
            cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all))

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all))
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
示例#11
0
    def decode(self, x):
        batch_size = x.shape[0]
        state = self.init_hidden(batch_size, self.ctx)
        outputs_pgm = []
        outputs_param = []

        for i in range(self.seq_length):
            if i == 0:
                xt = x
            else:
                prob_pre = nd.exp(outputs_pgm[-1])
                it1 = nd.argmax(prob_pre, axis=1)
                #print("it1 decode:",it1)
                xt = self.pgm_embed(it1)
            #print("xt decode:",xt)
            output, state = self.core(xt.expand_dims(axis=0), state)

            pgm_feat1 = nd.relu(self.logit1(output.squeeze(0)))
            pgm_feat2 = self.logit2(pgm_feat1)
            pgm_score = nd.log_softmax(pgm_feat2, axis=1)

            trans_prob = nd.softmax(pgm_feat2, axis=1).detach()
            param_feat1 = nd.relu(self.regress1(output.squeeze(0)))
            param_feat2 = nd.concat(trans_prob, param_feat1, dim=1)
            param_score = self.regress2(param_feat2)
            param_score = param_score.reshape(batch_size, self.vocab_size + 1,
                                              self.max_param)

            index = nd.argmax(trans_prob, axis=1)
            index = index.expand_dims(axis=1).expand_dims(axis=2).broadcast_to(
                shape=(batch_size, 1, self.max_param)).detach()  ##
            param_score = nd.pick(param_score, index, 1)

            outputs_pgm.append(pgm_score)
            outputs_param.append(param_score)
        outputs_pgm = [_.expand_dims(axis=1) for _ in outputs_pgm]
        outputs_param = [_.expand_dims(axis=1) for _ in outputs_param]
        pgms = outputs_pgm[0]
        params = outputs_param[0]
        for i in range(1, len(outputs_pgm)):
            pgms = nd.concat(pgms, outputs_pgm[i], dim=1)
            params = nd.concat(params, outputs_param[i], dim=1)
        return [pgms, params]
示例#12
0
def get_smoothed_loss(pred, label, num_classes, trg_pad, smooth_alpha=0.1):
    pred = nd.maximum(pred, 1e-10)
    logprob = nd.log_softmax(pred)

    # cross entropy
    ce = -nd.pick(logprob, label)

    pre_class_gain = smooth_alpha / (num_classes - 1)

    # loss = (1 - smooth_alpha - pre_class_gain) * ce - pre_class_gain * sum(logprob)
    loss = (1 - smooth_alpha - pre_class_gain) * ce - nd.sum(
        pre_class_gain * logprob, axis=-1, keepdims=False)

    mask = label != trg_pad
    loss = loss * mask

    loss = nd.sum(loss) / mask.sum()

    return loss
示例#13
0
def train_step(model, optimizer, data, epoch):

    running_loss = 0.0
    global update_count
    N = data.shape[0]
    idxlist = list(range(N))
    np.random.shuffle(idxlist)
    training_steps = len(range(0, N, args.batch_size))

    with trange(training_steps) as t:
        for batch_idx, start_idx in zip(t, range(0, N, args.batch_size)):
            t.set_description("epoch: {}".format(epoch + 1))

            end_idx = min(start_idx + args.batch_size, N)
            X_inp = data[idxlist[start_idx:end_idx]]
            X_inp = nd.array(X_inp.toarray()).as_in_context(ctx)

            if args.constant_anneal:
                anneal = args.anneal_cap
            elif args.anneal_epochs is not None:
                anneal = min(
                    args.anneal_cap,
                    args.anneal_cap * (update_count / total_anneal_steps),
                )
            else:
                anneal = min(args.anneal_cap, update_count / total_anneal_steps)
            update_count += 1

            with autograd.record():
                if model.__class__.__name__ == "MultiVAE":
                    X_out, mu, logvar = model(X_inp)
                    loss = vae_loss_fn(X_inp, X_out, mu, logvar, anneal)
                    train_step.anneal = anneal
                elif model.__class__.__name__ == "MultiDAE":
                    X_out = model(X_inp)
                    loss = -nd.mean(nd.sum(nd.log_softmax(X_out) * X_inp, -1))
            loss.backward()
            trainer.step(X_inp.shape[0])
            running_loss += loss.asscalar()
            avg_loss = running_loss / (batch_idx + 1)

            t.set_postfix(loss=avg_loss)
示例#14
0
def softmax_cross_entropy(yhat_linear, y):
    return -nd.nansum(y * nd.log_softmax(yhat_linear), axis=0, exclude=True)
示例#15
0
    def unlabeled_train_op_mmd_combine(self, update_enc=True):
        '''
        Trains the MMD model
        '''
        batch_size = self.args['batch_size']
        model_ctx = self.model_ctx
        eps = 1e-10

        # Retrieve data
        docs = self.data.get_documents(key='train')
        if self.args['use_kd']:
            split_on = docs.shape[1] // 2
            docs, bert_logits = docs[:,:split_on], docs[:,split_on:]
            t = self.args['kd_softmax_temp']
            kd_docs = nd.softmax(bert_logits / t) * nd.sum(docs, axis=1, keepdims=True)
            kd_docs = kd_docs * (kd_docs > self.args['kd_min_count'])

        y_true = np.random.dirichlet(np.ones(self.ndim_y) * self.args['dirich_alpha'], size=batch_size)
        y_true = nd.array(y_true, ctx=model_ctx)

        with autograd.record():
            ### reconstruction phase ###
            y_onehot_u = self.Enc(docs)
            y_onehot_u_softmax = nd.softmax(y_onehot_u)
            if self.args['latent_noise'] > 0:
                y_noise = np.random.dirichlet(np.ones(self.ndim_y) * self.args['dirich_alpha'], size=batch_size)
                y_noise = nd.array(y_noise, ctx=model_ctx)
                y_onehot_u_softmax = (1 - self.args['latent_noise']) * y_onehot_u_softmax + self.args['latent_noise'] * y_noise
            x_reconstruction_u = self.Dec(y_onehot_u_softmax)

            if self.args['use_kd']:
                kd_logits = nd.log_softmax(x_reconstruction_u / t)
                logits = nd.log_softmax(x_reconstruction_u)

                kd_loss_reconstruction = nd.mean(nd.sum(- kd_docs * kd_logits, axis=1))
                loss_reconstruction = nd.mean(nd.sum(- docs * logits, axis=1))

                loss_total = self.args['recon_alpha'] * (
                    self.args['kd_loss_alpha'] * t * t * (kd_loss_reconstruction) +
                    (1 - self.args['kd_loss_alpha']) * loss_reconstruction
                )
            else: 
                logits = nd.log_softmax(x_reconstruction_u)
                loss_reconstruction = nd.mean(nd.sum(- docs * logits, axis=1))
                loss_total = loss_reconstruction * self.args['recon_alpha']

            ### mmd phase ###
            if self.args['adverse']:
                y_fake = self.Enc(docs)
                y_fake = nd.softmax(y_fake)
                loss_mmd = mmd_loss(y_true, y_fake, ctx_model=model_ctx, t=self.args['kernel_alpha'])
                loss_total = loss_total + loss_mmd

            if self.args['l2_alpha'] > 0:
                loss_total = loss_total + self.args['l2_alpha'] * nd.mean(nd.sum(nd.square(y_onehot_u), axis=1))

            loss_total.backward()

        self.optimizer_enc.step(1)
        self.optimizer_dec.step(1)  # self.m.args['batch_size']

        latent_max = nd.zeros(self.args['ndim_y'], ctx=model_ctx)
        for max_ind in nd.argmax(y_onehot_u, axis=1):
            latent_max[max_ind] += 1.0
        latent_max /= batch_size
        latent_entropy = nd.mean(nd.sum(- y_onehot_u_softmax * nd.log(y_onehot_u_softmax + eps), axis=1))
        latent_v = nd.mean(y_onehot_u_softmax, axis=0)
        dirich_entropy = nd.mean(nd.sum(- y_true * nd.log(y_true + eps), axis=1))

        if self.args['adverse']:
            loss_mmd_return = loss_mmd.asscalar()
        else:
            loss_mmd_return = 0.0
        return nd.mean(loss_reconstruction).asscalar(), loss_mmd_return, latent_max.asnumpy(), latent_entropy.asscalar(), latent_v.asnumpy(), dirich_entropy.asscalar()
示例#16
0
    def test_op(self, num_samples=None, num_epochs=None, reset=True, dataset='test'):
        '''
        Evaluates the model using num_samples.

        Args
        ----
        num_samples: integer, default None
          The number of samples to evaluate on. This is converted to
          evaluating on (num_samples // batch_size) minibatches.
        num_epochs: integer, default None
          The number of epochs to evaluate on. This used if num_samples
          is not specified. If neither is specified, defaults to 1 epoch.
        reset: bool, default True
          Whether to reset the test data index to 0 before iterating
          through and evaluating on minibatches.
        dataset: string, default 'test':
          Which dataset to evaluate on: 'valid' or 'test'.

        Returns
        -------
        Loss_u: float
          The loss on the unlabeled data.
        Loss_l: float
          The loss on the labeled data.
        Eval_u: list of floats
          A list of evaluation metrics on the unlabeled data.
        Eval_l: list of floats
          A list of evaluation metrics on the labeled data.
        '''
        batch_size = self.args['batch_size']
        model_ctx = self.model_ctx

        if num_samples is None and num_epochs is None:
            # assume full dataset evaluation
            num_epochs = 1

        if reset:
            # Reset Data to Index Zero
            if self.data.data[dataset] is not None:
                self.data.force_reset_data(dataset)
            if self.data.data[dataset + '_with_labels'] is not None:
                self.data.force_reset_data(dataset+'_with_labels')

        # Unlabeled Data
        u_loss = 'NA'
        u_eval = []
        if self.data.data[dataset] is not None:
            u_loss = 0
            if num_samples is None:
                num_samps = self.data.data[dataset].shape[0] * num_epochs
            else:
                num_samps = num_samples
            batches = int(np.ceil(num_samps / self.args['batch_size']))
            batch_iter = range(batches)
            if batches > 1: batch_iter = tqdm(batch_iter, desc='unlabeled')
            for batch in batch_iter:
                # 1. Retrieve data
                docs = self.data.get_documents(key=dataset)
                if self.args['use_kd']:
                    split_on = docs.shape[1] // 2
                    docs, bert_logits = docs[:,:split_on], docs[:,split_on:]
                    # TODO: below is not used, but also may not be necessary
                    t = self.args['kd_softmax_temp']
                    kd_docs = nd.softmax(bert_logits / t) * nd.sum(docs, axis=1, keepdims=True)

                # 2. Compute loss
                y_u = self.Enc(docs)
                y_onehot_u_softmax = nd.softmax(y_u)
                x_reconstruction_u = self.Dec(y_onehot_u_softmax)

                logits = nd.log_softmax(x_reconstruction_u)
                loss_recon_unlabel = nd.sum(- docs * logits, axis=1)

                # 3. Convert to numpy
                u_loss += nd.mean(loss_recon_unlabel).asscalar()
            u_loss /= batches

        # Labeled Data
        l_loss = 0.0
        l_acc = 0.0
        if self.data.data[dataset+'_with_labels'] is not None:
            l_loss = 0
            if num_samples is None:
                num_samps = self.data.data[dataset+'_with_labels'].shape[0] * num_epochs
            else:
                num_samps = num_samples
            batches = int(np.ceil(num_samps / self.args['batch_size']))
            batch_iter = range(batches)
            if batches > 1: batch_iter = tqdm(batch_iter, desc='labeled')
            softmaxCEL = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=False)
            for batch in batch_iter:
                # 1. Retrieve data
                labeled_docs, labels = self.data.get_documents(key=dataset+'_with_labels', split_on=self.data.data_dim)
                # 2. Compute loss
                y_u = self.Enc(docs)
                y_onehot_u_softmax = nd.softmax(y_u)
                class_pred = nd.argmax(y_onehot_u_softmax, axis=1)
                l_a = labels[list(range(labels.shape[0])), class_pred]
                l_acc += nd.mean(l_a).asscalar()
                labels = labels / nd.sum(labels, axis=1, keepdims=True)
                l_l = softmaxCEL(y_onehot_u_softmax, labels)

                # 3. Convert to numpy
                l_loss += nd.mean(l_l).asscalar()
            l_loss /= batches
            l_acc /= batches

        return u_loss, l_loss, l_acc
示例#17
0
    def unlabeled_train_op_adv_combine_add(self, update_enc=True):
        '''
        Trains the GAN model
        '''
        batch_size = self.args['batch_size']
        model_ctx = self.model_ctx
        eps = 1e-10
        ##########################
        ### unsupervised phase ###
        ##########################
        # Retrieve data
        docs = self.data.get_documents(key='train')

        class_true = nd.zeros(batch_size, dtype='int32', ctx=model_ctx)
        class_fake = nd.ones(batch_size, dtype='int32', ctx=model_ctx)
        loss_reconstruction = nd.zeros((1,), ctx=model_ctx)

        ### adversarial phase ###
        discriminator_z_confidence_true = nd.zeros(shape=(1,), ctx=model_ctx)
        discriminator_z_confidence_fake = nd.zeros(shape=(1,), ctx=model_ctx)
        discriminator_y_confidence_true = nd.zeros(shape=(1,), ctx=model_ctx)
        discriminator_y_confidence_fake = nd.zeros(shape=(1,), ctx=model_ctx)
        loss_discriminator = nd.zeros(shape=(1,), ctx=model_ctx)
        dirich_entropy = nd.zeros(shape=(1,), ctx=model_ctx)

        ### generator phase ###
        loss_generator = nd.zeros(shape=(1,), ctx=model_ctx)

        ### reconstruction phase ###
        with autograd.record():
            y_u = self.Enc(docs)
            y_onehot_u_softmax = nd.softmax(y_u)
            x_reconstruction_u = self.Dec(y_onehot_u_softmax)

            logits = nd.log_softmax(x_reconstruction_u)
            loss_reconstruction = nd.sum(- docs * logits, axis=1)
            loss_total = loss_reconstruction * self.args['recon_alpha']

            if self.args['adverse']: #and np.random.rand()<0.8:
                y_true = np.random.dirichlet(np.ones(self.ndim_y) * self.args['dirich_alpha'], size=batch_size)
                y_true = nd.array(y_true, ctx=model_ctx)
                dy_true = self.Dis_y(y_true)
                dy_fake = self.Dis_y(y_onehot_u_softmax)
                discriminator_y_confidence_true = nd.mean(nd.softmax(dy_true)[:, 0])
                discriminator_y_confidence_fake = nd.mean(nd.softmax(dy_fake)[:, 1])
                softmaxCEL = gluon.loss.SoftmaxCrossEntropyLoss()
                loss_discriminator = softmaxCEL(dy_true, class_true) + \
                                       softmaxCEL(dy_fake, class_fake)
                loss_generator = softmaxCEL(dy_fake, class_true)
                loss_total = loss_total + loss_discriminator + loss_generator
                dirich_entropy = nd.mean(nd.sum(- y_true * nd.log(y_true + eps), axis=1))

        loss_total.backward()

        self.optimizer_enc.step(batch_size)
        self.optimizer_dec.step(batch_size)
        self.optimizer_dis_y.step(batch_size)

        latent_max = nd.zeros(self.args['ndim_y'], ctx=model_ctx)
        for max_ind in nd.argmax(y_onehot_u_softmax, axis=1):
            latent_max[max_ind] += 1.0
        latent_max /= batch_size
        latent_entropy = nd.mean(nd.sum(- y_onehot_u_softmax * nd.log(y_onehot_u_softmax + eps), axis=1))
        latent_v = nd.mean(y_onehot_u_softmax, axis=0)

        return nd.mean(loss_discriminator).asscalar(), nd.mean(loss_generator).asscalar(), nd.mean(loss_reconstruction).asscalar(), \
               nd.mean(discriminator_z_confidence_true).asscalar(), nd.mean(discriminator_z_confidence_fake).asscalar(), \
               nd.mean(discriminator_y_confidence_true).asscalar(), nd.mean(discriminator_y_confidence_fake).asscalar(), \
               latent_max.asnumpy(), latent_entropy.asscalar(), latent_v.asnumpy(), dirich_entropy.asscalar()
示例#18
0
def train(epoch, train_loader, model,loss, optimizer, opt,ctx,train_loss,train_iou):
    """
    one epoch training for program executor
    """
    loss_sum,iou_sum,n = 0.0,0.0,0
    for idx, data in enumerate(train_loader):
        start_t = time.time()

        shape, label, param = data
        bsz = shape.shape[0]
        n_step = label.shape[1]
        #print("label.shape:",label)
        #print("n_step:",n_step,"bsz:",bsz,"stop_id:",stop_id)
        
        index = np.array(list(map(lambda x: n_step, label)))-1
        #index = label
        
        # add noise during training, making the executor accept
        # continuous output from program generator
        label = label.reshape(-1,1).asnumpy()
        pgm_vector = 0.2 * np.random.uniform(0,1,(bsz * n_step, stop_id))
        pgm_noise = 0.2 *np.random.uniform(0,1,label.shape)
        pgm_value = 1 - pgm_noise
        #print('pgm_val.shape:',pgm_value.shape,'label.shape:',label.shape,'label.shape:',label.shape)
        pgm_vector = scatter_numpy(pgm_vector,1,label,pgm_value).reshape(bsz,n_step,stop_id)
        
        
        param_noise = nd.random_uniform(0,1,shape=param.shape)
        param_vector = param + 0.6 * (param_noise - 0.5)
        #print("param_vector.shape:",param_vector.shape)
        gt = shape.as_in_context(ctx)
        #print(pgm_vector.dtype)
        index = nd.from_numpy(index).astype('int64').as_in_context(ctx)
        pgm_vector = nd.from_numpy(pgm_vector).astype('float32').as_in_context(ctx)
        param_vector = param_vector.as_in_context(ctx)


        with autograd.record():
            pred = model(pgm_vector, param_vector, index)
            scores = nd.log_softmax(pred,axis=1)
            pred0 = scores[:,0].squeeze()*opt.n_weight
            pred1 = scores[:,1].squeeze()*opt.p_weight
            l = -nd.where(gt, pred1, pred0).mean((1,2,3))
            #l = -(nd.pick(scores1, gt, axis=1, keepdims=True)*opt.n_weight
            #    +nd.pick(scores2,(1-gt), axis=1, keepdims=True)*opt.p_weight).mean((1,2,3,4))
        l.backward()
                                        
        #clip_gradient(optimizer, opt.grad_clip)
        #optimizer._allreduce_grads();

        optimizer.step(l.shape[0],ignore_stale_grad=True)
        
        l = l.mean().asscalar()
        
        pred = nd.softmax(pred,axis = 1)
        pred = pred[:, 1, :, :, :]
        s1 = gt.reshape(-1, 32, 32, 32).astype('float32').as_in_context(mx.cpu())
        s2 = pred.squeeze().as_in_context(mx.cpu())
        #print(s2.shape)
        s2 = (s2 > 0.5)

        batch_iou = BatchIoU(s1, s2)
        iou = batch_iou.mean()
        end_t = time.time()
        loss_sum+=l
        n+=1
        iou_sum+=iou

        if idx % (opt.info_interval * 10) == 0:
            print("Train: epoch {} batch {}/{}, loss13 = {:.3f}, iou = {:.3f}, time = {:.3f}"
                  .format(epoch, idx, len(train_loader), l, iou, end_t - start_t))
            sys.stdout.flush()
        
    train_loss.append(loss_sum/n)
    train_iou.append(iou_sum/n)
示例#19
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices.
        
        Parameters
        ----------
        cls_pred : mxnet.nd.NDArray
        Predicted classes.
        box_pred : mxnet.nd.NDArray
        Predicted bounding-boxes.
        cls_target : mxnet.nd.NDArray
        Ground-truth classes.
        box_target : mxnet.nd.NDArray
        Ground-truth bounding-boxes.
        
        Returns
        -------
        tuple of NDArrays
            sum_losses : array with containing the sum of class prediction and bounding-box regression loss.
            cls_losses : array of class prediction loss.
            box_losses : array of box regression L1 loss.
        
        """
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1 and self._min_hard_negatives < 1:
            # no positive samples and no hard negatives, return dummy losses
            cls_losses = [nd.sum(cp * 0) for cp in cls_pred]
            box_losses = [nd.sum(bp * 0) for bp in box_pred]
            sum_losses = [
                nd.sum(cp * 0) + nd.sum(bp * 0)
                for cp, bp in zip(cls_pred, box_pred)
            ]
            return sum_losses, cls_losses, box_losses

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < nd.maximum(
                self._min_hard_negatives,
                pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss,
                                nd.zeros_like(cls_loss))
            cls_losses.append(
                nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all))

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho,
                                box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(
                nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all))
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
示例#20
0
 def forward(self, x):
     return nd.log_softmax(self.proj(x), axis=-1)
示例#21
0
 def forward(self, enc1, enc2):
     x = nd.concat(enc1, enc2)
     x = self.dense(x)
     x = nd.log_softmax(x)
     return x
示例#22
0
def vae_loss_fn(inp, out, mu, logvar, anneal):
    neg_ll = -nd.mean(nd.sum(nd.log_softmax(out) * inp, -1))
    KLD = -0.5 * nd.mean(nd.sum(1 + logvar - nd.power(mu, 2) - nd.exp(logvar), axis=1))
    return neg_ll + anneal * KLD
示例#23
0
 def _decode_step_CGED(self, step_input, state):
 
   step_output, state, _ = self.decoder(self.encoder.word_embed(step_input), state)
   step_output = self.fc_error(step_output)
   return nd.log_softmax(step_output), state
示例#24
0
    def forward(self, x, y, sample_prob=None):
        if sample_prob is not None:
            self.sample_prob = sample_prob
        batch_size = x.shape[0]
        state = self.init_hidden(batch_size, self.ctx)
        outputs_pgm = []
        outputs_param = []
        seq = y
        for i in range(seq.shape[1]):
            if i == 0:
                xt = x
            else:
                if i >= 1 and self.sample_prob > 0:
                    #print("x.shape:",x.shape)
                    sample_prob = nd.uniform(
                        0, 1, shape=(batch_size),
                        ctx=self.ctx)  #sample_prob.shape (10,)
                    sample_mask = sample_prob < self.sample_prob
                    #print("sample_mask:",sample_mask)
                    #print("sample_mask.sum:",sample_mask.sum().asscalar())
                    if sample_mask.sum() == 0:
                        it1 = seq[:, i - 1]
                    else:
                        sample_ind = sample_mask != 0
                        #print("sample_ind:",sample_ind)
                        it1 = seq[:, i - 1]  #it1.shape : (10,)
                        #print("it1:",it1.shape)
                        #print("output_prog:",outputs_pgm[-1])
                        prob_prev = nd.exp(outputs_pgm[-1])
                        #print("prob_pre:",prob_prev)
                        temp = nd.random.multinomial(
                            prob_prev, 1).reshape(-1).astype('int64')
                        #print("prob_prev:",nd.argmax(prob_prev,axis=1).astype('int64')==temp)
                        #print("temp",temp,"\n it1:",it1)
                        it1 = nd.where(sample_ind, temp, it1).astype('float32')
                else:
                    #print("obtain last ground truth")
                    it1 = seq[:, i - 1].copy()
                xt = self.pgm_embed(it1)
                #print("xt after embed:",xt)

            #print("xt                      :",xt)
            output, state = self.core(xt.expand_dims(axis=0), state)

            pgm_feat1 = nd.relu(self.logit1(output.squeeze(0)))
            pgm_feat2 = self.logit2(pgm_feat1)
            pgm_score = nd.log_softmax(pgm_feat2, axis=1)

            trans_prob = nd.softmax(pgm_feat2, axis=1).detach()
            param_feat1 = nd.relu(self.regress1(output.squeeze(0)))
            param_feat2 = nd.concat(trans_prob, param_feat1, dim=1)

            param_score = self.regress2(param_feat2)
            param_score = param_score.reshape(batch_size, self.vocab_size + 1,
                                              self.max_param)
            #index = nd.argmax(trans_prob, axis = 1)
            index = seq[:, i]
            index = index.expand_dims(axis=1).expand_dims(axis=2).broadcast_to(
                shape=(batch_size, 1, self.max_param)).detach()
            param_score = nd.pick(param_score, index, 1)

            outputs_pgm.append(pgm_score)
            outputs_param.append(param_score)

        outputs_pgm = [_.expand_dims(axis=1) for _ in outputs_pgm]
        outputs_param = [_.expand_dims(axis=1) for _ in outputs_param]
        pgms = outputs_pgm[0]
        params = outputs_param[0]
        for i in range(1, len(outputs_pgm)):
            pgms = nd.concat(pgms, outputs_pgm[i], dim=1)
            params = nd.concat(params, outputs_param[i], dim=1)
        #print("params", params.shape)
        #rint("pgm", pgms.shape)

        return [pgms, params]
示例#25
0
 def log_softmax(self, x):
     return nd.log_softmax(x, axis=1)
示例#26
0
 def softmax_cross_entropy(self, yhat_linear, y):
     return (-nd.nansum(y * nd.log_softmax(yhat_linear)))
示例#27
0
 def forward(self, edges):
     score_pred = nd.log_softmax(edges.data['preds'])[:,1:].max(axis=1)
     score_phr = score_pred + edges.src['node_class_logit'] + edges.dst['node_class_logit']
     return {'score_pred': score_pred,
             'score_phr': score_phr}
示例#28
0
 def forward(self, x):
     x = self.embed(x)
     x = x.reshape([x.shape[0], -1])
     x = nd.relu(self.hidden(x))
     out = nd.log_softmax(self.out(x))
     return out
 def log_softmax_likelihood(self, yhat_linear, y):
     return nd.nansum(y * nd.log_softmax(yhat_linear), axis=0, exclude=True)
示例#30
0
def validate(epoch, val_loader, model, loss, opt, ctx,val_loss,val_iou, gen_shape=False):

    # load pre-fixed randomization
    try:
        rand1 = np.load(opt.rand1)
        rand2 = np.load(opt.rand2)
        rand3 = np.load(opt.rand3)
    except:
        rand1 = np.random.rand(opt.batch_size * opt.seq_length, stop_id).astype(np.float32)
        rand2 = np.random.rand(opt.batch_size * opt.seq_length, 1).astype(np.float32)
        rand3 = np.random.rand(opt.batch_size, opt.seq_length, max_param - 1).astype(np.float32)
        np.save(opt.rand1, rand1)
        np.save(opt.rand2, rand2)
        np.save(opt.rand3, rand3)

    generated_shapes = None
    original_shapes = None
    
    loss_sum,iou_sum,n = 0.0,0.0,0
    for idx, data in enumerate(val_loader):
        start_t = time.time()

        shape, label, param = data

        bsz = shape.shape[0]
        n_step = label.shape[1]
        index = np.array(list(map(lambda x: n_step, label)))
        index = index - 1

        # add noise during training, making the executor accept
        # continuous output from program generator
        
        label = label.reshape(-1,1).asnumpy()
        pgm_vector = 0.1*rand1
        pgm_noise = 0.1*rand2
        pgm_value = np.ones(label.shape) - pgm_noise
        #print('pgm_val.shape:',pgm_value.shape,'label.shape:',label.shape,'label.shape:',label.shape)
        pgm_vector = scatter_numpy(pgm_vector,1,label,pgm_value).reshape(bsz,n_step,stop_id)

        param_noise = nd.from_numpy(rand3)
        #print(param.shape,param_noise.shape)
        param_vector = param + 0.6 * (param_noise - 0.5)
        
        
        gt = shape.astype('float32').as_in_context(ctx)
        index = nd.from_numpy(index).astype('int64').as_in_context(ctx)
        pgm_vector = nd.from_numpy(pgm_vector).as_in_context(ctx)
        param_vector = param_vector.as_in_context(ctx)
        #prediction
        pred = model(pgm_vector, param_vector, index)
        scores = nd.log_softmax(pred,axis=1)
        pred0 = scores[:,0].squeeze()*opt.p_weight
        pred1 = scores[:,1].squeeze()*opt.n_weight
        l = -nd.where(gt, pred1, pred0).mean((1,2,3))
        #print(pred2.dtype,gt.dtype)
        #l = loss(pred,gt,sample_weight = nd.array([opt.n_weight,opt.p_weight]))

        l = l.mean().asscalar()
        pred = nd.softmax(pred,axis=1)
        pred = pred[:, 1, :, :, :]
        s1 = gt.reshape(-1, 32, 32, 32).as_in_context(mx.cpu())
        s2 = pred.squeeze().as_in_context(mx.cpu())
        s2 = (s2 > 0.5)

        batch_iou = BatchIoU(s1, s2)
        iou = batch_iou.mean()
        loss_sum+=l
        n+=1
        iou_sum+=iou
        
        if(idx+1)%5==0 and gen_shape:
            if original_shapes is None:
                original_shapes = s1.expand_dims(axis=0)
                generated_shapes = s2.expand_dims(axis=0)
            else:
                original_shapes = nd.concat(original_shapes,s1.expand_dims(axis=0),dim=0)
                generated_shapes = nd.concat(generated_shapes,s2.expand_dims(axis=0),dim=0)
        end_t = time.time()

        if (idx + 1) % opt.info_interval == 0:
            print("Test: epoch {} batch {}/{}, loss13 = {:.3f}, iou = {:.3f}, time = {:.3f}"
                  .format(epoch, idx + 1, len(val_loader), l, iou, end_t - start_t))
            sys.stdout.flush()
        if(idx+1>len(val_loader)/10):
            
            break;     
    val_loss.append(loss_sum/n)
    val_iou.append(iou_sum/n)

    return generated_shapes, original_shapes