df['log_CT_CS'] = np.log(df['CT_CS']) df['log_CT_MCR'] = np.log(df['CT_MCR']) df['LMP_Model'] = df.apply(lambda x: 1e-3 * x['CT_Temp'] * (np.log(x['CT_RT']) + 25), axis=1) features = [i for i in df.columns if i not in ['CT_RT', 'CT_Temp', 'ID', 'CT_CS', 'LMP_Model', 'CT_MCR']] df = df[df['CT_RT'] < 200000] X = df[features].to_numpy(np.float32) y = df['LMP_Model'].to_numpy(np.float32) y2 = df[['ID', 'CT_RT', 'CT_Temp', 'CT_CS']].values.tolist() pdata = ProcessData(X=X, y=y, y2=y2, features=features) pdata.clean_data() data = pdata.get_data() scale = pdata.scale del pdata CT_RT = np.array([i[1] for i in data['y2']]) CT_Temp = np.array([i[2] for i in data['y2']]) CT_CS = np.array([i[3] for i in data['y2']]) ID = [i[0] for i in data['y2']] C = np.array([25 for i in ID]) skreg = SKREG(X=data['X'], y=data['y'], estimator="LR", validation="3-Fold", CT_Temp=CT_Temp, CT_RT=CT_RT,
ID = [1, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 82] ID = ['9Cr-{}'.format(str(i).zfill(3)) for i in ID] df = df[df.ID.isin(ID)] df['log_CT_CS'] = np.log(df['CT_CS']) X = df[[i for i in df.columns if i not in ['ID', 'CT_RT', 'CT_Temp', 'CT_CS']]].to_numpy(dtype=np.float) df['LMP'] = df.apply(lambda x: 1e-3*(x['CT_Temp'])*(np.log(x['CT_RT']) +data[x['ID']]['C']), axis=1) y = df['LMP'].to_numpy() dprocess = ProcessData(X=X, y=y, metadata=[i for i in df.columns if i not in ['ID', 'CT_RT', 'CT_Temp', 'CT_CS']]) dprocess.clean_data() X, y, metadata = dprocess.get_data() del dprocess df = pd.DataFrame(X, columns=metadata) df['LMP'] = y #Initialize the model bframe = BayesFrame(df=df, target="LMP", val_scheme='5-Fold', bic_scheme="per_n", model_scheme=["selection"]) #Print the best model print(bframe.zoo) np.save('best_model.npy', bframe.zoo)