Пример #1
0
def level_two_wrapper():
	ftype=''
	config_instance=Config(ftype)
	level='level_two'#in which level right now is
    
    #for normal stacking (call level_data)
	clf_name_level_data=[
		  ftype+'_lr_sag',
		  ftype+'_lr_newton',
		  ftype+'_lr_lbfgs',
		  ftype+'_lr_liblinear',
		  ftype+'_rf100',
		  ftype+'_rf200',
		  ftype+'_rf500',
		  ftype+'_rf1000',
		  ftype+'_gbdt20',
		  ftype+'_gbdt50',
		  ftype+'_gbdt100',
		  ftype+'_ada20',
		  ftype+'_ada50',
		  ftype+'_ada100',
		  ftype+'_xgb2000',
		  ftype+'_xgb2500',
		  ftype+'_xgb2000_2',
		  ftype+'_xgb2500_2'
	]

     #for partial stacking model, call level_data_part, select the BBM, in this case xgb2000
	clf_name=[
		# ftype+'_lr_sag',
		# ftype+'_lr_newton',
		# ftype+'_lr_lbfgs',
		# ftype+'_lr_liblinear',
		# ftype+'_rf100',
		# ftype+'_rf200',
		# ftype+'_rf500',
		# ftype+'_rf1000',
		# ftype+'_gbdt20',
		# ftype+'_gbdt50',
		# ftype+'_gbdt100',
		# ftype+'_ada20',
		# ftype+'_ada50',
		# ftype+'_ada100',
		# ftype+'_bag20',
		# ftype+'_bag50',
		# ftype+'_xgb1000',
		 ftype+'_xgb2000',
		# ftype+'_xgb2500',
		# ftype+'_xgb1000_2',
		# ftype+'_xgb2000_2',
		# ftype+'_xgb2500_2',
		# ftype+'_xgb1000_3',
		# ftype+'_xgb2000_3',
		# ftype+'_xgb2500_3',
		# ftype+'_xgb2500_4',
		# ftype+'_xgb1000_5',
		# ftype+'_xgb2000_5',
		# ftype+'_xgb2500_5',
		# ftype+'_xgb1000_6',
		# ftype+'_xgb2000_6',
		# ftype+'_xgb2500_6'
	]
    ###############################################################################
    #                           choose one below to uncomment                     # 
    ############################################################################### 
     #normal stacking edition
	#load_data_instance=load_train_data.Load_train_data(config_instance,'level_one',clf_name_level_data)#instantiate Load_train_data
	#X_0,X_1,uid_0,uid_1=load_data_instance.level_data()#obtain output of level_one as training data for level two

     #partial stacking edition
	load_data_instance=load_train_data.Load_train_data(config_instance,'level_one',clf_name)#instantiate Load_train_data
	X_0,X_1,uid_0,uid_1=load_data_instance.level_data_part()#subsample of X where their risk to default are supposed to be overestimated by BBM

	threads=[]
	threads.append(Level_train_thread(config_instance,Ridge(),level,ftype+'_ridge',X_0,X_1,uid_0,uid_1))
	threads.append(Level_train_thread(config_instance,LogisticRegression(max_iter=1000,solver='sag'),level,ftype+'_lr_sag_1000',X_0,X_1,uid_0,uid_1))
	threads.append(Level_train_thread(config_instance,LogisticRegression(solver='sag'),level,ftype+'_lr_sag',X_0,X_1,uid_0,uid_1))

	threads.append(Level_train_thread(config_instance,LogisticRegression(solver='newton-cg'),level,ftype+'_lr_newton',X_0,X_1,uid_0,uid_1))
	threads.append(Level_train_thread(config_instance,LogisticRegression(solver='lbfgs'),level,ftype+'_lr_lbfgs',X_0,X_1,uid_0,uid_1))
	threads.append(Level_train_thread(config_instance,LogisticRegression(solver='liblinear'),level,ftype+'_lr_liblinear',X_0,X_1,uid_0,uid_1))
	
	# threads.append(Level_train_thread(config_instance,RandomForestClassifier(n_estimators=100,max_depth=2,min_samples_split=10),level,ftype+'_rf100',X_0,X_1,uid_0,uid_1))
	# threads.append(Level_train_thread(config_instance,RandomForestClassifier(n_estimators=200,max_depth=3,min_samples_split=10),level,ftype+'_rf200',X_0,X_1,uid_0,uid_1))
	# threads.append(Level_train_thread(config_instance,RandomForestClassifier(n_estimators=500,max_depth=3,min_samples_split=10),level,ftype+'_rf500',X_0,X_1,uid_0,uid_1))
	#threads.append(Level_train_thread(config_instance,RandomForestClassifier(n_estimators=1000,max_depth=8,min_samples_split=9),level,ftype+'_rf1000',X_0,X_1,uid_0,uid_1))

	# threads.append(Level_train_thread(config_instance,GradientBoostingClassifier(n_estimators=200,max_depth=3,min_samples_split=15,learning_rate=0.005,subsample=0.7),level,ftype+'_gbdt20',X_0,X_1,uid_0,uid_1))
	# threads.append(Level_train_thread(config_instance,GradientBoostingClassifier(n_estimators=50,max_depth=3,min_samples_split=15,learning_rate=0.01,subsample=0.7),level,ftype+'_gbdt50',X_0,X_1,uid_0,uid_1))
	# threads.append(Level_train_thread(config_instance,GradientBoostingClassifier(n_estimators=100,max_depth=3,min_samples_split=15,learning_rate=0.01,subsample=0.7),level,ftype+'_gbdt100',X_0,X_1,uid_0,uid_1))

	# threads.append(Level_train_thread(config_instance,AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=50,max_depth=3,min_samples_split=10),n_estimators=20,learning_rate=0.001),level,ftype+'_ada20',X_0,X_1,uid_0,uid_1))
	# threads.append(Level_train_thread(config_instance,AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=50,max_depth=3,min_samples_split=10),n_estimators=50,learning_rate=0.02),level,ftype+'_ada50',X_0,X_1,uid_0,uid_1))
	# threads.append(Level_train_thread(config_instance,AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=50,max_depth=3,min_samples_split=10),n_estimators=100,learning_rate=0.02),level,ftype+'_ada100',X_0,X_1,uid_0,uid_1))

	# threads.append(Level_train_thread(config_instance,BaggingClassifier(base_estimator=RandomForestClassifier(n_estimators=50,max_depth=3,min_samples_split=9),n_estimators=20),level,ftype+'_bag20',X_0,X_1,uid_0,uid_1))
	# threads.append(Level_train_thread(config_instance,BaggingClassifier(base_estimator=RandomForestClassifier(n_estimators=50,max_depth=3,min_samples_split=9),n_estimators=50),level,ftype+'_bag50',X_0,X_1,uid_0,uid_1))

	params={
	    'booster':'gbtree',
	    'objective': 'binary:logistic',
	   	'scale_pos_weight':40920/3080.0,
	    'eval_metric': 'auc',
	    'gamma':0,
	    'max_depth':8,
	    'lambda':700,
	    'subsample':0.7,
	    'colsample_bytree':1,
	    'min_child_weight':5,
	    'eta': 0.001,
	    'seed':1,
	    'nthread':10
	    }
	#threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb1000',X_0,X_1,uid_0,uid_1,params,1000))
	#threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2000',X_0,X_1,uid_0,uid_1,params,2000))    
	#threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2500',X_0,X_1,uid_0,uid_1,params,2500))

	# params['scale_pos_weight']=40920/2000.0
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb1000_2',X_0,X_1,uid_0,uid_1,params,1000))
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2000_2',X_0,X_1,uid_0,uid_1,params,2000))
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2500_2',X_0,X_1,uid_0,uid_1,params,2500))

	# params['eta']=0.001
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb1000_3',X_0,X_1,uid_0,uid_1,params,1000))
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2000_3',X_0,X_1,uid_0,uid_1,params,2000))
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2500_3',X_0,X_1,uid_0,uid_1,params,2500))

	# params['eta']=0.005
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb1000_4',X_0,X_1,uid_0,uid_1,params,1000))
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2000_4',X_0,X_1,uid_0,uid_1,params,2000))
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2500_4',X_0,X_1,uid_0,uid_1,params,2500))

	# params['eta']=0.002
	# params['max_depth']=4
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb1000_5',X_0,X_1,uid_0,uid_1,params,1000))
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2000_5',X_0,X_1,uid_0,uid_1,params,2000))
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2500_5',X_0,X_1,uid_0,uid_1,params,2500))

	# params['max_depth']=5
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb1000_6',X_0,X_1,uid_0,uid_1,params,1000))
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2000_6',X_0,X_1,uid_0,uid_1,params,2000))
	# threads.append(Xgb_level_train_thread(config_instance,level,ftype+'_xgb2500_6',X_0,X_1,uid_0,uid_1,params,2500))

	for thread in threads:
		thread.run()
Пример #2
0
def level_two_predict():
	config_instance=Config('')
	level='level_two'
	ftype=''
	clf_name=[
		ftype+'_lr_sag',
		ftype+'_lr_newton',
		ftype+'_lr_lbfgs',
		ftype+'_lr_liblinear',
		ftype+'_xgb1000',
		ftype+'_xgb2000',
		ftype+'_xgb2500',
		ftype+'_xgb1000_2',
		ftype+'_xgb2000_2',
		ftype+'_xgb2500_2',
		ftype+'_xgb1000_3',
		ftype+'_xgb2000_3',
		ftype+'_xgb2500_3',
		ftype+'_xgb1000_4',
		ftype+'_xgb2000_4',
		ftype+'_xgb2500_4',
		ftype+'_xgb1000_5',
		ftype+'_xgb2000_5',
		ftype+'_xgb2500_5',
		ftype+'_xgb1000_6',
		ftype+'_xgb2000_6',
		ftype+'_xgb2500_6'
	]
    
    #from load_train_data.py module Load Load_train_data class
    #this class has three parameters: config,level,clf_name
    #initialize by calling the class and asgin parameters vales
    #config=config_instance, level='level_one', clf_name=clf_name
	load_data_instance=load_train_data.Load_train_data(config_instance,'level_one',clf_name)
    
    #from load_predict_data.py module Load Load_predict_data class
    #this class has three parameters: config,level,clf_name
    #initialize by calling the class and asgin parameters vales
    #config=config_instance, level='level_one', clf_name=clf_name
	predict_data_instance=load_predict_data.Load_predict_data(config_instance,'level_one',clf_name)
    
    #for local_verify
	X_0,X_1,uid_00,uid_11=load_data_instance.level_data()
	predict_X,predict_uid=predict_data_instance.level_data()



	threads=[]
	threads.append(Level_predict_thread(config_instance,LogisticRegression(solver='sag'),level,'_lr_sag',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,LogisticRegression(max_iter=1000,solver='sag'),level,ftype+'_lr_sag_1000',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,LogisticRegression(max_iter=1500,solver='sag'),level,ftype+'_lr_sag_1500',X_0,X_1,predict_X,predict_uid))

	threads.append(Level_predict_thread(config_instance,LogisticRegression(solver='newton-cg'),level,'_lr_newton',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,LogisticRegression(solver='lbfgs'),level,'_lr_lbfgs',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,LogisticRegression(solver='liblinear'),level,'_lr_liblinear',X_0,X_1,predict_X,predict_uid))

	threads.append(Level_predict_thread(config_instance,RandomForestClassifier(n_estimators=100,max_depth=3,min_samples_split=10),level,'_rf100',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,RandomForestClassifier(n_estimators=200,max_depth=3,min_samples_split=10),level,'_rf200',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,RandomForestClassifier(n_estimators=500,max_depth=3,min_samples_split=10),level,'_rf500',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,RandomForestClassifier(n_estimators=100,max_depth=3,min_samples_split=10),level,'_rf1000',X_0,X_1,predict_X,predict_uid))

	threads.append(Level_predict_thread(config_instance,GradientBoostingClassifier(n_estimators=20,max_depth=3,min_samples_split=15,learning_rate=0.01,subsample=0.7),level,'_gbdt20',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,GradientBoostingClassifier(n_estimators=50,max_depth=3,min_samples_split=15,learning_rate=0.01,subsample=0.7),level,'_gbdt50',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,GradientBoostingClassifier(n_estimators=100,max_depth=3,min_samples_split=15,learning_rate=0.01,subsample=0.7),level,'_gbdt100',X_0,X_1,predict_X,predict_uid))

	threads.append(Level_predict_thread(config_instance,AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=5,max_depth=3,min_samples_split=10),n_estimators=20,learning_rate=0.02),level,'_ada20',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=5,max_depth=3,min_samples_split=10),n_estimators=50,learning_rate=0.02),level,'_ada50',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=5,max_depth=3,min_samples_split=10),n_estimators=100,learning_rate=0.02),level,'_ada100',X_0,X_1,predict_X,predict_uid))

	params={
	    'booster':'gbtree',
	    'objective': 'binary:logistic',
	   	'scale_pos_weight':40920/3080.0,
	    'eval_metric': 'auc',
	    'gamma':15,
	    'max_depth':3,
	    'lambda':600,
	    'subsample':0.40,
	    'colsample_bytree':0.3,
	    'min_child_weight':10,
	    'eta': 0.002,#0.0005
	    'seed':1,
	    'nthread':8
	    }

	threads.append(Xgb_level_predict_thread(config_instance,level,'_xgb2000',X_0,X_1,predict_X,predict_uid,params,2000))
	threads.append(Xgb_level_predict_thread(config_instance,level,'_xgb2500',X_0,X_1,predict_X,predict_uid,params,2500))
	params={
	    'booster':'gbtree',
	    'objective': 'binary:logistic',
	   	'scale_pos_weight':40920/3080.0,
	    'eval_metric': 'auc',
	    'gamma':0,
	    'max_depth':3,
	    'lambda':700,
	    'subsample':0.6,
	    'colsample_bytree':0.3,
	    'min_child_weight':5,
	    'eta': 0.01,
	    'seed':1,
	    'nthread':8
	    }
	threads.append(Xgb_level_predict_thread(config_instance,level,'_xgb2000_2',X_0,X_1,predict_X,predict_uid,params,2000))
	threads.append(Xgb_level_predict_thread(config_instance,level,'_xgb2500_2',X_0,X_1,predict_X,predict_uid,params,2500))
	for thread in threads:
		thread.run()
	pass
Пример #3
0
def level_three_predict():
	config_instance=Config('')
	level='level_three'
	clf_name=[
		'log_move_lr_sag',
		'log_move_lr_newton',
		'log_move_lr_lbfgs',
		'log_move_lr_liblinear',
		'log_move_rf100',
		'log_move_rf200',
		'log_move_rf500',
		'log_move_rf1000',
		'log_move_gbdt20',
		'log_move_gbdt50',
		'log_move_gbdt100',
		'log_move_ada20',
		'log_move_ada50',
		'log_move_ada100',
		'log_move_xgb2000',
		'log_move_xgb2500',
		'log_move_xgb2000_2',
		'log_move_xgb2500_2'
	]

	load_data_instance=load_train_data.Load_train_data(config_instance,'level_two',clf_name)
	predict_data_instance=load_predict_data.Load_predict_data(config_instance,'level_two',clf_name)
	X_0,X_1,uid_0,uid_1=load_data_instance.level_data()
	predict_X,predict_uid=predict_data_instance.level_data()

	threads=[]
	threads.append(Level_predict_thread(config_instance,LogisticRegression(solver='sag'),level,'log_move_lr_sag',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,LogisticRegression(solver='newton-cg'),level,'log_move_lr_newton',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,LogisticRegression(solver='lbfgs'),level,'log_move_lr_lbfgs',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,LogisticRegression(solver='liblinear'),level,'log_move_lr_liblinear',X_0,X_1,predict_X,predict_uid))

	threads.append(Level_predict_thread(config_instance,RandomForestClassifier(n_estimators=100,max_depth=3,min_samples_split=10),level,'log_move_rf100',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,RandomForestClassifier(n_estimators=200,max_depth=3,min_samples_split=10),level,'log_move_rf200',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,RandomForestClassifier(n_estimators=500,max_depth=3,min_samples_split=10),level,'log_move_rf500',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,RandomForestClassifier(n_estimators=1000,max_depth=3,min_samples_split=10),level,'log_move_rf1000',X_0,X_1,predict_X,predict_uid))

	threads.append(Level_predict_thread(config_instance,GradientBoostingClassifier(n_estimators=20,max_depth=3,min_samples_split=15,learning_rate=0.01,subsample=0.7),level,'log_move_gbdt20',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,GradientBoostingClassifier(n_estimators=50,max_depth=3,min_samples_split=15,learning_rate=0.01,subsample=0.7),level,'log_move_gbdt50',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,GradientBoostingClassifier(n_estimators=100,max_depth=3,min_samples_split=15,learning_rate=0.01,subsample=0.7),level,'log_move_gbdt100',X_0,X_1,predict_X,predict_uid))

	threads.append(Level_predict_thread(config_instance,AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=50,max_depth=3,min_samples_split=10),n_estimators=20,learning_rate=0.02),level,'log_move_ada20',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=50,max_depth=3,min_samples_split=10),n_estimators=50,learning_rate=0.02),level,'log_move_ada50',X_0,X_1,predict_X,predict_uid))
	threads.append(Level_predict_thread(config_instance,AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=50,max_depth=3,min_samples_split=10),n_estimators=100,learning_rate=0.02),level,'log_move_ada100',X_0,X_1,predict_X,predict_uid))

	params={
	    'booster':'gbtree',
	    'objective': 'binary:logistic',
	   	'scale_pos_weight':28000/4000.0,
	    'eval_metric': 'auc',
	    'gamma':0,
	    'max_depth':3,
	    'lambda':700,
	    'subsample':0.9,
	    'colsample_bytree':0.3,
	    'min_child_weight':5,
	    'eta': 0.0005,
	    'seed':1,
	    'nthread':8
	    }
	threads.append(Xgb_level_predict_thread(config_instance,level,'log_move_xgb2000',X_0,X_1,predict_X,predict_uid,params,2000))
	threads.append(Xgb_level_predict_thread(config_instance,level,'log_move_xgb2500',X_0,X_1,predict_X,predict_uid,params,2500))
	params={
	    'booster':'gbtree',
	    'objective': 'binary:logistic',
	   	'scale_pos_weight':28000/4000.0,
	    'eval_metric': 'auc',
	    'gamma':0,
	    'max_depth':3,
	    'lambda':700,
	    'subsample':0.9,
	    'colsample_bytree':0.3,
	    'min_child_weight':5,
	    'eta': 0.0005,
	    'seed':1,
	    'nthread':8
	    }
	threads.append(Xgb_level_predict_thread(config_instance,level,'log_move_xgb2000_2',X_0,X_1,predict_X,predict_uid,params,2000))
	threads.append(Xgb_level_predict_thread(config_instance,level,'log_move_xgb2500_2',X_0,X_1,predict_X,predict_uid,params,2500))
	for thread in threads:
		thread.run()
	pass