def test_unique_values(self): df = pd.read_csv(fixture('iris_classification.csv'), na_values=['None']) unique_drgs = len(df.DRG.unique()) test_df = impact_coding_on_a_single_column(df, 'species', 'DRG') unique_impact_values = len(test_df.DRG_impact_coded.unique()) self.assertLessEqual(unique_impact_values, unique_drgs)
def test_column_renaming(self): df = pd.read_csv(fixture('iris_classification.csv'), na_values=['None']) code_column_name = 'DRG' test_df = impact_coding_on_a_single_column(df, 'species', code_column_name) self.assertTrue((code_column_name + '_impact_coded') in test_df.columns)
def setUp(self): cols = ['ThirtyDayReadmitFLG', 'SystolicBPNBR', 'LDLNBR'] df = pd.read_csv(fixture('HCPyDiabetesClinical.csv'), na_values=['None'], usecols=cols) np.random.seed(42) self.o = DevelopSupervisedModel(modeltype='classification', df=df, predictedcol='ThirtyDayReadmitFLG', impute=True)
def setUp(self): df = pd.read_csv(fixture('HCPyDiabetesClinical.csv'), na_values=['None']) # Drop uninformative columns df.drop(['PatientID', 'InTestWindowFLG'], axis=1, inplace=True) np.random.seed(42) self.o = DevelopSupervisedModel(modeltype='classification', df=df, predictedcol='ThirtyDayReadmitFLG', impute=True) self.o.linear(cores=1)
def setUp(self): df = pd.read_csv(fixture('DiabetesClinicalSampleData.csv'), na_values=['None']) # Drop uninformative columns df.drop(['PatientID', 'InTestWindowFLG'], axis=1, inplace=True) # Convert numeric columns to factor/category columns np.random.seed(42) self.o = DevelopSupervisedModel(modeltype='classification', df=df, predictedcol='ThirtyDayReadmitFLG', impute=True) self.o.random_forest(cores=1)
def setUp(self): df = pd.read_csv(fixture('HCPyDiabetesClinical.csv'), na_values=['None']) df.drop('PatientID', axis=1, inplace=True) # drop uninformative column np.random.seed(42) self.o = DeploySupervisedModel(modeltype='classification', df=df, graincol='PatientEncounterID', windowcol='InTestWindowFLG', predictedcol='ThirtyDayReadmitFLG', impute=True) self.o.deploy( method='linear', cores=1, server='localhost', dest_db_schema_table='[SAM].[dbo].[HCPyDeployClassificationBASE]', use_saved_model=False)