示例#1
0
    def transform(self, X_df):
        # X_df_new = X_df[0].copy()
        # data_new = X_df[1].copy()
        X_df_new = X_df.copy()
        train, _ = get_train_data()
        test, _ = get_test_data()
        data_new = pd.concat([train, test])
        
        X_df_new = X_df_new.fillna('-1')  # replace missing values NaN
        data_new = data_new.fillna('-1')

        one_hot_feature = ['LBS', 'age', 'carrier', 'consumptionAbility',
            'education','gender', 'house', 'os', 'ct', 'marriageStatus',
            'advertiserId', 'campaignId', 'creativeId', 'adCategoryId',
            'productId', 'productType']  # features with only one scalar
        
        vector_feature = ['appIdAction', 'appIdInstall', 'interest1',
            'interest2', 'interest3', 'interest4', 'interest5', 'kw1',
            'kw2', 'kw3', 'topic1', 'topic2', 'topic3']  # vector features

        X_df_new = labelEncoder(data_new, X_df_new, one_hot_feature)
        data_new = labelEncoder(data_new, data_new, one_hot_feature)  # normalize features

        X_sparse = OneHot(data_new, X_df_new, one_hot_feature)
        X_sparse = Vectorize(data_new, X_df_new, vector_feature, X_sparse)

        return X_sparse.tocsr()
示例#2
0
def load_train_test_prediction(submission_name):
    """Load the true and predicted labels for a given submission.

    Parameters
    ----------
    submission_name : str
        The name of the submission (e.g. 'abethe_anatomy').

    Returns
    -------
    y_true_train : ndarray, shape (n_train_samples, )
        The true labels on the training set.
    y_pred_train : ndarray, shape (n_train_samples, )
        The predicted labels on the training set.
    y_true_test : ndarray, shape (n_test_samples, )
        The true labels on the testing set.
    y_pred_test : ndarray, shape (n_test_samples, )
        The predicted labels on the testing set.

    """
    path_store_pred = os.path.join('../submissions', submission_name,
                                   'training_output')

    y_pred_train = np.load(os.path.join(path_store_pred, 'y_pred_train.npy'))
    y_pred_test = np.load(os.path.join(path_store_pred, 'y_pred_test.npy'))

    _, y_true_train = get_train_data('..')
    _, y_true_test = get_test_data('..')

    return (y_true_train, y_pred_train, y_true_test, y_pred_test)
示例#3
0
def _get_data_rdb_out():
    """Split the data to provide the true label and data with only RDB as
    test.

    Returns
    -------
    X_train : ndarray, shape (n_train_samples, )
        The training data without RDB.
    X_test : ndarray, shape (n_test_samples, )
        The testing data corresponding to the RDB subjects.
    y_train : ndarray, shape (n_train _samples, )
        The labels of the training set.
    y_test : ndarrays, shape (n_test_samples, )
        The labels of the testing set.

    """
    rdb_idx = np.load('rdb_idx.npy')
    X_test, y_test = get_test_data('..')
    X_train, y_train = get_train_data('..')
    X_test_idx = X_test.index.values
    X_rdb_idx = [X_test_idx == ii for ii in rdb_idx]
    X_rdb_idx = np.vstack(X_rdb_idx)
    X_rdb_idx = np.sum(X_rdb_idx, axis=0).astype(bool)

    return (pd.concat([X_train, X_test[~X_rdb_idx]], axis=0),
            X_test[X_rdb_idx],
            np.concatenate([y_train, y_test[~X_rdb_idx]]),
            y_test[X_rdb_idx])
示例#4
0
import problem
import torch
from torch.nn.functional import relu
from torch.nn.utils.rnn import pad_sequence

X_train, y_train = problem.get_train_data()
X_test, y_test = problem.get_test_data()


class Regressor():
    """A PyTorch MLP model consisting of an MLP for each module type.
    The model is learnt only on single module.
    The model takes as input the input power and the meta data of the
    corresponding cascade. To predict the output power the model
    simply cascades the different MLPs matching the input module cascade."""
    def __init__(self):
        super().__init__()
        # Since the model need meta data present in the data
        # we will only instantiate the model when calling the fit function
        self.Model = PyTorchModel  # PyTorch model class
        self.model = None  # PyTorch model instance
        self.mod_id = None  # Module IDs

    def fit(self, X, y):
        # Retrieve some information about the modules from the data
        all_mods = set([(("type", mod[0]), ("nb_feat", len(mod[1])))
                        for seq, _, _ in X for mod in seq])
        mod_info = [dict(m) for m in all_mods]
        self.mod_id = {mod["type"]: i for i, mod in enumerate(mod_info)}

        # Instantiate the PyTorch model
示例#5
0
from nilearn import datasets, input_data, plotting, image
from sklearn.svm import SVC

import seaborn as sns
import numpy as np
import pandas as pd
import scipy as sp
from nilearn import datasets, input_data, plotting, image
from sklearn.preprocessing import QuantileTransformer
from sklearn.metrics import roc_auc_score

import matplotlib.colors as colors
from problem import get_train_data, get_test_data

X_train, y_train = get_train_data("..")
X_test, y_test = get_test_data("..")

all_submissions = [
    "abethe_functional_blast",
    "amicie_functional_blast",
    "ayoub.ghriss_functional_blast",
    "mk_functional_blast",
    "nguigui_functional_blast",
    "pearrr_functional_blast",
    "Slasnista_functional_blast",
    "vzantedeschi_functional_blast",
    "wwwwmmmm_functional_blast",
]

results = {"0%": [], "25%": [], "50%": [], "75%": []}
for submission_name in all_submissions:
示例#6
0
'''


#!/usr/bin/env python
# coding: utf-8

# In[ ]:


from problem import get_train_data

data_train, labels_train = get_train_data()
from problem import get_test_data

data_test, labels_test = get_test_data()


# In[ ]:


import numpy as np
import pandas as pd

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer
from nilearn.connectome import ConnectivityMeasure

def _load_fmri(fmri_filenames):
    return np.array([pd.read_csv(subject_filename, header=None).values for subject_filename in fmri_filenames])