Exemplo n.º 1
0
def train_model():
    global best_acc, best_epoch
    batch_idx = 0
    model.train()
    N = len(train_loader.dataset)
    train_loss, all_preds, all_targets = 0., [], []
    val_preds, val_targets = [], []

    for batch in train_loader:
        optimizer.zero_grad()
        loss, output = model(batch)
        #
        if params.task == '1':
            target = batch['labels'].numpy()
            valid_mask = batch['valid_mask'].numpy()
            test_mask = batch['test_mask'].numpy()
            validation_flag = (1 - valid_mask) * test_mask
            training_flag = test_mask * valid_mask
        elif params.task == '2':
            target = batch['ans'].numpy()
            valid_mask = batch['valid_mask'].numpy()
            test_mask = batch['test_mask'].numpy()
            validation_flag = (1 - valid_mask) * test_mask
            training_flag = test_mask * valid_mask
        loss.backward()
        optimizer.step()
        all_preds.append(output[training_flag == 1])
        all_targets.append(target[training_flag == 1])
        val_preds.append(output[validation_flag == 1])
        val_targets.append(target[validation_flag == 1])
        train_loss += float(loss.detach().cpu().numpy())
        batch_idx += 1

    all_pred = np.concatenate(all_preds, axis=0)
    all_target = np.concatenate(all_targets, axis=0)
    val_pred = np.concatenate(val_preds, axis=0)
    val_target = np.concatenate(val_targets, axis=0)
    #model.eval()
    if params.task == '1':
        train_auc = compute_auc(all_target, all_pred)
        val_auc = compute_auc(val_target, val_pred)
        train_accuracy = compute_accuracy(all_target, all_pred)
        val_accuracy = compute_accuracy(val_target, val_pred)
        print(
            'Train Epoch {} Loss: {} train auc: {} train acc: {} val auc: {} val accuracy: {} n_validation : {}'
            .format(epoch, train_loss / batch_idx, train_auc, train_accuracy,
                    val_auc, val_accuracy, val_target.shape))
    if params.task == '2':
        train_accuracy = np.mean(all_target == all_pred)
        val_accuracy = np.mean(val_target == val_pred)
        print('Train Epoch {} Loss: {} train acc: {} val accuracy: {}'.format(
            epoch, train_loss / batch_idx, train_accuracy, val_accuracy))
    if best_acc is None or val_accuracy > best_acc:
        best_acc = val_accuracy
        best_epoch = epoch
    print('Train Epoch {} best val accuracy: {} best epoch: {}'.format(
        epoch, best_acc, best_epoch))
Exemplo n.º 2
0
def compute_bpsn_auc(df, subgroup, label, model_name):
    """Computes the AUC of the within-subgroup negative examples and the background positive examples."""
    subgroup_negative_examples = df[df[subgroup] & ~df[label]]
    non_subgroup_positive_examples = df[~df[subgroup] & df[label]]
    examples = subgroup_negative_examples.append(
        non_subgroup_positive_examples)
    return compute_auc(examples[label], examples[model_name])
def test_model(id_):
    valid_dataset.seed = id_
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               collate_fn=collate_fn,
                                               batch_size=64,
                                               num_workers=num_workers,
                                               shuffle=False,
                                               drop_last=False)
    total_loss, all_preds, all_targets = 0., [], []
    n_batch = 0
    for batch in valid_loader:
        with torch.no_grad():
            output = model.test(batch)
        target = batch['output_labels'].float().numpy()
        mask = batch['output_mask'].numpy() == 1
        all_preds.append(output[mask])
        all_targets.append(target[mask])
        n_batch += 1
    all_pred = np.concatenate(all_preds, axis=0)
    all_target = np.concatenate(all_targets, axis=0)
    auc = compute_auc(all_target, all_pred)
    accuracy = compute_accuracy(all_target, all_pred)
    return total_loss / n_batch, auc, accuracy
Exemplo n.º 4
0
def compute_subset_auc(indices, pred_set, y):
    subset = [vect for i, vect in enumerate(pred_set) if i in indices]
    mean_preds = sp.mean(subset, axis=0)
    mean_auc = compute_auc(y, mean_preds)

    return mean_auc, indices
Exemplo n.º 5
0
    def fit_predict(self, y, train=None, predict=None, show_steps=True):
        """
        Fit each model on the appropriate dataset, then return the average
        of their individual predictions. If train is specified, use a subset
        of the training set to train the models, then predict the outcome of
        either the remaining samples or (if given) those specified in predict.
        If train is omitted, train the models on the full training set, then
        predict the outcome of the full test set.

        Options:
        ------------------------------
        - y: numpy array. The full vector of the ground truths.
        - train: list. The indices of the elements to be used for training.
            If None, take the entire training set.
        - predict: list. The indices of the elements to be predicted.
        - show_steps: boolean. Whether to compute metrics after each stage
            of the computation.
        """
        y_train = y[train] if train is not None else y
        if train is not None and predict is None:
            predict = [i for i in range(len(y)) if i not in train]

        stage0_train = []
        stage0_predict = []
        for model, feature_set in self.models:
            X_train, X_predict = get_dataset(feature_set, train, predict)

            identifier = train[0] if train is not None else -1
            cache_file = stringify(model, feature_set) + str(identifier)

            model_preds = self._get_model_preds(
                model, X_train, X_predict, y_train, cache_file)
            stage0_predict.append(model_preds)

            # if stacking, compute cross-validated predictions on the train set
            if self.stack:
                model_cv_preds = self._get_model_cv_preds(
                    model, X_train, y_train, cache_file)
                stage0_train.append(model_cv_preds)

            # verbose mode: compute metrics after every model computation
            if show_steps:
                if train is not None:
                    mean_preds, stack_preds, fwls_preds = self._combine_preds(
                        np.array(stage0_train).T, np.array(stage0_predict).T,
                        y_train, train, predict,
                        stack=self.stack, fwls=self.fwls)

                    model_auc = compute_auc(y[predict], stage0_predict[-1])
                    mean_auc = compute_auc(y[predict], mean_preds)
                    stack_auc = compute_auc(y[predict], stack_preds) \
                        if self.stack else 0
                    fwls_auc = compute_auc(y[predict], fwls_preds) \
                        if self.fwls else 0

                    logger.info(
                        "> AUC: %.4f (%.4f, %.4f, %.4f) [%s]", model_auc,
                        mean_auc, stack_auc, fwls_auc,
                        stringify(model, feature_set))
                else:
                    logger.info("> used model %s:\n%s", stringify(
                        model, feature_set), model.get_params())

        if self.model_selection and predict is not None:
            best_subset = self._find_best_subset(y[predict], stage0_predict)
            stage0_train = [pred for i, pred in enumerate(stage0_train)
                            if i in best_subset]
            stage0_predict = [pred for i, pred in enumerate(stage0_predict)
                              if i in best_subset]

        mean_preds, stack_preds, fwls_preds = self._combine_preds(
            np.array(stage0_train).T, np.array(stage0_predict).T,
            y_train, stack=self.stack, fwls=self.fwls)

        if self.stack:
            selected_preds = stack_preds if not self.fwls else fwls_preds
        else:
            selected_preds = mean_preds

        return selected_preds
Exemplo n.º 6
0
def compute_subset_auc(indices, pred_set, y):
    subset = [vect for i, vect in enumerate(pred_set) if i in indices]
    mean_preds = sp.mean(subset, axis=0)
    mean_auc = compute_auc(y, mean_preds)

    return mean_auc, indices
Exemplo n.º 7
0
    def fit_predict(self, y, train=None, predict=None, show_steps=True):
        """
        Fit each model on the appropriate dataset, then return the average
        of their individual predictions. If train is specified, use a subset
        of the training set to train the models, then predict the outcome of
        either the remaining samples or (if given) those specified in cv.
        If train is omitted, train the models on the full training set, then
        predict the outcome of the full test set.

        Options:
        ------------------------------
        - y: numpy array. The full vector of the ground truths.
        - train: list. The indices of the elements to be used for training.
            If None, take the entire training set.
        - predict: list. The indices of the elements to be predicted.
        - show_steps: boolean. Whether to compute metrics after each stage
            of the computation.
        """
        y_train = y[train] if train is not None else y
        if train is not None and predict is None:
            predict = [i for i in range(len(y)) if i not in train]

        stage0_train = []
        stage0_predict = []
        for model, feature_set in self.models:
            X_train, X_predict = get_dataset(feature_set, train, predict)

            identifier = train[0] if train is not None else -1
            cache_file = stringify(model, feature_set) + str(identifier)

            model_preds = self._get_model_preds(model, X_train, X_predict,
                                                y_train, cache_file)
            stage0_predict.append(model_preds)

            # if stacking, compute cross-validated predictions on the train set
            if self.stack:
                model_cv_preds = self._get_model_cv_preds(
                    model, X_train, y_train, cache_file)
                stage0_train.append(model_cv_preds)

            # verbose mode: compute metrics after every model computation
            if show_steps:
                if train is not None:
                    mean_preds, stack_preds, fwls_preds = self._combine_preds(
                        np.array(stage0_train).T,
                        np.array(stage0_predict).T,
                        y_train,
                        train,
                        predict,
                        stack=self.stack,
                        fwls=self.fwls)

                    model_auc = compute_auc(y[predict], stage0_predict[-1])
                    mean_auc = compute_auc(y[predict], mean_preds)
                    stack_auc = compute_auc(y[predict], stack_preds) \
                        if self.stack else 0
                    fwls_auc = compute_auc(y[predict], fwls_preds) \
                        if self.fwls else 0

                    logger.info("> AUC: %.4f (%.4f, %.4f, %.4f) [%s]",
                                model_auc, mean_auc, stack_auc, fwls_auc,
                                stringify(model, feature_set))
                else:
                    logger.info("> used model %s:\n%s",
                                stringify(model, feature_set),
                                model.get_params())

        if self.model_selection and predict is not None:
            best_subset = self._find_best_subset(y[predict], stage0_predict)
            stage0_train = [
                pred for i, pred in enumerate(stage0_train) if i in best_subset
            ]
            stage0_predict = [
                pred for i, pred in enumerate(stage0_predict)
                if i in best_subset
            ]

        mean_preds, stack_preds, fwls_preds = self._combine_preds(
            np.array(stage0_train).T,
            np.array(stage0_predict).T,
            y_train,
            stack=self.stack,
            fwls=self.fwls)

        if self.stack:
            selected_preds = stack_preds if not self.fwls else fwls_preds
        else:
            selected_preds = mean_preds

        return selected_preds
Exemplo n.º 8
0
random.shuffle(trainset1)
random.shuffle(testset1)
random.shuffle(trainset2)
random.shuffle(testset2)
trainset1 = trainset1[:len(trainset1)//batch_size*batch_size]
testset1 = testset1[:len(testset1)//batch_size*batch_size]
trainset2 = trainset2[:len(trainset2)//batch_size*batch_size]
testset2 = testset2[:len(testset2)//batch_size*batch_size]

gpu_options = tf.GPUOptions(allow_growth=True)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    model = Model(user_count, item_count, batch_size)
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    print('Domain_A_Initialized_AUC: %.4f\tDomain_B_Initialized_AUC: %.4f' % compute_auc(sess, model, testset1, testset2))
    sys.stdout.flush()
    start_time = time.time()
    last_auc = 0.0
    
    for _ in range(1000):
        loss_sum = 0.0
        for uij in DataInput(trainset1, batch_size):
            loss = model.train_1(sess, uij, lr)
            loss_sum += loss
        for uij in DataInput(trainset2, batch_size):
            loss = model.train_2(sess, uij, lr)
            loss_sum += loss
        model.train_orth(sess, uij[0], lr)
        test_auc_1, test_auc_2 = compute_auc(sess, model, testset1, testset2)
        train_auc_1, train_auc_2 = compute_auc(sess, model, trainset1, trainset2)