def gen_data(self, debug=False, version='v0'): os.makedirs(get_file('curated', version), exist_ok=True) num_rows = 10000 if debug else None df = self.application_train_test(num_rows) with timer("Process bureau and bureau_balance"): bureau = self.bureau_and_balance(num_rows) print("Bureau df shape:", bureau.shape) df = df.join(bureau, how='left', on='SK_ID_CURR') del bureau gc.collect() with timer("Process previous_applications"): prev = self.previous_applications(num_rows) print("Previous applications df shape:", prev.shape) df = df.join(prev, how='left', on='SK_ID_CURR') del prev gc.collect() with timer("Process POS-CASH balance"): pos = self.pos_cash(num_rows) print("Pos-cash balance df shape:", pos.shape) df = df.join(pos, how='left', on='SK_ID_CURR') del pos gc.collect() with timer("Process installments payments"): ins = self.installments_payments(num_rows) print("Installments payments df shape:", ins.shape) df = df.join(ins, how='left', on='SK_ID_CURR') del ins gc.collect() with timer("Process credit card balance"): cc = self.credit_card_balance(num_rows) print("Credit card balance df shape:", cc.shape) df = df.join(cc, how='left', on='SK_ID_CURR') del cc gc.collect() with timer("Saving data"): print(df.shape) print('Dropping unimportant features') df.drop(features_with_no_imp_at_least_twice, axis=1, inplace=True) gc.collect() print(df.shape) df.to_csv(get_file('all_data', version), index=False) df[df['TARGET'].notnull()].to_csv(get_file('org_train', version), index=False) df[df['TARGET'].isnull()].to_csv(get_file('org_test', version), index=False) return df
import argparse import sys sys.path.append('/home/zoguntim/dev/home_credit_ml') from credit.utils import timer from credit.models import kfold_lightgbm, TrainConfig if __name__ == '__main__': parser = argparse.ArgumentParser('Home Credit') parser.add_argument("-c", "--config", help="path to configuration file", default=TrainConfig()) parsed = parser.parse_args(sys.argv[1:]) with timer("Train model"): data = kfold_lightgbm(tc=parsed.config) print('Finished!') # python train.py -c /home/zoguntim/dev/home_credit_ml/runs/configs/cfg-2.json
import sys sys.path.append('/home/zoguntim/dev/home_credit_ml') from credit.data import CurateData from credit.utils import timer if __name__ == "__main__": with timer("Generate curated data"): data = CurateData() data.gen_data() print('Finished!')
import argparse import sys sys.path.append('/home/zoguntim/dev/home_credit_ml') from credit.utils import timer from credit.models import BoardModel, TrainConfig if __name__ == '__main__': parser = argparse.ArgumentParser('Home Credit') parser.add_argument("-c", "--config", help="path to configuration file", default=TrainConfig()) parsed = parser.parse_args(sys.argv[1:]) tc = TrainConfig() tc.board_model['layers'] = [50] tc.board_model['dropouts'] = [.4] tc.board_model['use_multi_gpu'] = False model = BoardModel(tc=tc) with timer("Train Board model"): model.fit() print('Finished!') # python train_board_model.py -c /home/zoguntim/dev/home_credit_ml/runs/configs/cfg-2.json
import sys sys.path.append('/home/zoguntim/dev/home_credit_ml') from credit.data import BoardProbabilities from credit.utils import timer if __name__ == "__main__": with timer("Generate board probabilities"): data = BoardProbabilities() print('Finished!')