from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures from sklearn.model_selection import train_test_split # feature selection from sklearn.feature_selection import RFE if __name__ == '__main__': # Set variables and path save_figs = True filename = snakemake.input[0] # Print available access variables # print(pyh22.load_car22.__doc__) # Read in data ------------------------------------------------------------- data = pyh22.load_car22(filename, access_var="all", drop=True, version=1) cytokines = Cytokines(data.cytokines).days_to_int() # Plot missing data #msno.matrix(cytokines.df) # Get days relative to CRS # crs_transform does not work on new dataset days_to_index = data.cytokines_days_num.unstack().unstack(level=1) days_to_index.index.rename(names='date', level=0, inplace=True) # Merge with clinical data days_outcome = pd.merge(data.secondary_outcome.reset_index(), days_to_index.reset_index(), on='patient_id')
from sklearn.feature_selection import RFE if __name__ == '__main__': # Set variables and path save_figs = True #filename_old = 'revisions/data/Full Cytokine_De-identified CD22 Data for Bioinformatics_3-30-20_v1.xlsx' filename = 'revisions/data/clinical_data_05-12-20_v1.xlsx' figs_path = 'datasets/CAR-T/new_data/' # choose path # Print available access variables print(pyh22.load_car22.__doc__) # Read in data ------------------------------------------------------------- data = pyh22.load_car22(filename, access_var="all", drop=True, version=1) cytokines = Cytokines(data.cytokines).days_to_int() # Define features and target variable for baseline ------------------------- X = pd.concat( [data.clinical, cytokines.days_to_int().df[0], data.pb_tbnk['0'], data.bm_tbnk['0'], data.inflammatory['0']], axis=1, sort=False) y = data.outcome['HLH'] # Try building predictor for CRS+2 -----------------------------------------