def transform(self, X_df): # X_df_new = X_df[0].copy() # data_new = X_df[1].copy() X_df_new = X_df.copy() train, _ = get_train_data() test, _ = get_test_data() data_new = pd.concat([train, test]) X_df_new = X_df_new.fillna('-1') # replace missing values NaN data_new = data_new.fillna('-1') one_hot_feature = ['LBS', 'age', 'carrier', 'consumptionAbility', 'education','gender', 'house', 'os', 'ct', 'marriageStatus', 'advertiserId', 'campaignId', 'creativeId', 'adCategoryId', 'productId', 'productType'] # features with only one scalar vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4', 'interest5', 'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3'] # vector features X_df_new = labelEncoder(data_new, X_df_new, one_hot_feature) data_new = labelEncoder(data_new, data_new, one_hot_feature) # normalize features X_sparse = OneHot(data_new, X_df_new, one_hot_feature) X_sparse = Vectorize(data_new, X_df_new, vector_feature, X_sparse) return X_sparse.tocsr()
def load_train_test_prediction(submission_name): """Load the true and predicted labels for a given submission. Parameters ---------- submission_name : str The name of the submission (e.g. 'abethe_anatomy'). Returns ------- y_true_train : ndarray, shape (n_train_samples, ) The true labels on the training set. y_pred_train : ndarray, shape (n_train_samples, ) The predicted labels on the training set. y_true_test : ndarray, shape (n_test_samples, ) The true labels on the testing set. y_pred_test : ndarray, shape (n_test_samples, ) The predicted labels on the testing set. """ path_store_pred = os.path.join('../submissions', submission_name, 'training_output') y_pred_train = np.load(os.path.join(path_store_pred, 'y_pred_train.npy')) y_pred_test = np.load(os.path.join(path_store_pred, 'y_pred_test.npy')) _, y_true_train = get_train_data('..') _, y_true_test = get_test_data('..') return (y_true_train, y_pred_train, y_true_test, y_pred_test)
def _get_data_rdb_out(): """Split the data to provide the true label and data with only RDB as test. Returns ------- X_train : ndarray, shape (n_train_samples, ) The training data without RDB. X_test : ndarray, shape (n_test_samples, ) The testing data corresponding to the RDB subjects. y_train : ndarray, shape (n_train _samples, ) The labels of the training set. y_test : ndarrays, shape (n_test_samples, ) The labels of the testing set. """ rdb_idx = np.load('rdb_idx.npy') X_test, y_test = get_test_data('..') X_train, y_train = get_train_data('..') X_test_idx = X_test.index.values X_rdb_idx = [X_test_idx == ii for ii in rdb_idx] X_rdb_idx = np.vstack(X_rdb_idx) X_rdb_idx = np.sum(X_rdb_idx, axis=0).astype(bool) return (pd.concat([X_train, X_test[~X_rdb_idx]], axis=0), X_test[X_rdb_idx], np.concatenate([y_train, y_test[~X_rdb_idx]]), y_test[X_rdb_idx])
import problem import torch from torch.nn.functional import relu from torch.nn.utils.rnn import pad_sequence X_train, y_train = problem.get_train_data() X_test, y_test = problem.get_test_data() class Regressor(): """A PyTorch MLP model consisting of an MLP for each module type. The model is learnt only on single module. The model takes as input the input power and the meta data of the corresponding cascade. To predict the output power the model simply cascades the different MLPs matching the input module cascade.""" def __init__(self): super().__init__() # Since the model need meta data present in the data # we will only instantiate the model when calling the fit function self.Model = PyTorchModel # PyTorch model class self.model = None # PyTorch model instance self.mod_id = None # Module IDs def fit(self, X, y): # Retrieve some information about the modules from the data all_mods = set([(("type", mod[0]), ("nb_feat", len(mod[1]))) for seq, _, _ in X for mod in seq]) mod_info = [dict(m) for m in all_mods] self.mod_id = {mod["type"]: i for i, mod in enumerate(mod_info)} # Instantiate the PyTorch model
from nilearn import datasets, input_data, plotting, image from sklearn.svm import SVC import seaborn as sns import numpy as np import pandas as pd import scipy as sp from nilearn import datasets, input_data, plotting, image from sklearn.preprocessing import QuantileTransformer from sklearn.metrics import roc_auc_score import matplotlib.colors as colors from problem import get_train_data, get_test_data X_train, y_train = get_train_data("..") X_test, y_test = get_test_data("..") all_submissions = [ "abethe_functional_blast", "amicie_functional_blast", "ayoub.ghriss_functional_blast", "mk_functional_blast", "nguigui_functional_blast", "pearrr_functional_blast", "Slasnista_functional_blast", "vzantedeschi_functional_blast", "wwwwmmmm_functional_blast", ] results = {"0%": [], "25%": [], "50%": [], "75%": []} for submission_name in all_submissions:
''' #!/usr/bin/env python # coding: utf-8 # In[ ]: from problem import get_train_data data_train, labels_train = get_train_data() from problem import get_test_data data_test, labels_test = get_test_data() # In[ ]: import numpy as np import pandas as pd from sklearn.base import BaseEstimator, TransformerMixin from sklearn.pipeline import make_pipeline from sklearn.preprocessing import FunctionTransformer from nilearn.connectome import ConnectivityMeasure def _load_fmri(fmri_filenames): return np.array([pd.read_csv(subject_filename, header=None).values for subject_filename in fmri_filenames])