示例#1
0
	def main(self):
		
		# Step 1 --> Get and sample data from the original dataset
		print("[AVAZU]\tCreating Data Handler for train and test files...")
		if self.validation_file == None:
			data = DataHandler(self.train_file,self.train_file)
		else:
			data = DataHandler(self.train_file,self.validation_file)

		
 		# you can then get train and test files by doing data._train or data._test
		data.create_train_and_test(self.sampling)
		print("[AVAZU]\tData read and split successfully.")


 		# Step 2 --> Feature Engineering
		feat_eng = FeatureEngineering()
		if need_feateng == True:
			print("[AVAZU]\tStarting feature engineering operations...")
			#data.transform_data(feat_eng.append_hours,feat_eng.append_counters_uniques)
			#data.transform_data(feat_eng.append_days,feat_eng.)
			print("[AVAZU]\tFinished feature engineering operations.")
		
		#print("[AVAZU]\tSaving intermediary dataframe...")
		#data.save("feat_eng_")
		#print("[AVAZU]\tFile saved...")		

		data._train.drop(['click','id'], axis=1, inplace=True)
		data.drop(['C1','device_conn_type','device_type','banner_pos','C15','C16','C17','C18','C19','C20','C21','device_model','site_category','site_domain','site_id','app_category','app_domain','app_id','hours','device_id','device_ip'])
		
		print("[AVAZU]\tSelecting K-Best Features...")
		idxs = feat_eng.select_best_k_features(data._train,data._ytrain,5)
		print(idxs)
		print("[AVAZU]\tFeatures selected.")
 		
 		# Step 3 --> Train model and test it
		print("Training and testing the model...")
		m = Model(idxs)
		m.train(data)
		m.predict(data)

		if self.validation_file != None:
			print(type(data._test['id'].ravel()))
			df = pd.DataFrame({'id':data._test['id'].ravel(),'click':m._ypred}, columns=['id', 'click'])
			df.to_csv('output.csv', index=False)
			print("Output in the file: output.csv")
		else:
			print("Log Loss Result: " + str(m.log_loss(data)))

		def print_help():
			print("python avazu.py <args>\n\n")
			print("------------------------------------------------------------------")
			print("List of arguments:")
			print("\t -t: Specifies a training file to read from a csv file\t[REQUIRED]")
			print("\t -v: Specifies a validation/test file to read from a csv file. If the file is not specified, 30\% of the training samples will be used to build the validation dataset (Default: Not specified)")
			print("\t -s: Specifies a sampling ratio to be performed over the training data. If none is specified, it will use the whole dataset. (Default: Entire dataset)")
			print("\t -f: (To be inproved) Specifies if one desires to apply feature engineering over the dataset or not. (Default: No transformation applied)")
			print("\t -h: Used to print this menu")
			print("------------------------------------------------------------------")
示例#2
0
def page():
    x = request.args['x']
    y = request.args['y']
    model = Model()
    model.train()
    n  = model.predict([int(x),int(y)])
    return jsonify({ 'data' : str(n)})
示例#3
0
def teste():
    logicals  = request.json['logicals']
    model = Model()
    model.train()
    n  = model.predict(logicals)
    return  jsonify({ 'data' : str(n) })
    def main(self):

        # Step 1 --> Get and sample data from the original dataset
        print("[AVAZU]\tCreating Data Handler for train and test files...")
        if self.validation_file == None:
            data = DataHandler(self.train_file, self.train_file)
        else:
            data = DataHandler(self.train_file, self.validation_file)

# you can then get train and test files by doing data._train or data._test
        data.create_train_and_test(self.sampling)
        print("[AVAZU]\tData read and split successfully.")

        # Step 2 --> Feature Engineering
        feat_eng = FeatureEngineering()
        if need_feateng == True:
            print("[AVAZU]\tStarting feature engineering operations...")
            #data.transform_data(feat_eng.append_hours,feat_eng.append_counters_uniques)
            #data.transform_data(feat_eng.append_days,feat_eng.)
            print("[AVAZU]\tFinished feature engineering operations.")

        #print("[AVAZU]\tSaving intermediary dataframe...")
        #data.save("feat_eng_")
        #print("[AVAZU]\tFile saved...")

        data._train.drop(['click', 'id'], axis=1, inplace=True)
        data.drop([
            'C1', 'device_conn_type', 'device_type', 'banner_pos', 'C15',
            'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'device_model',
            'site_category', 'site_domain', 'site_id', 'app_category',
            'app_domain', 'app_id', 'hours', 'device_id', 'device_ip'
        ])

        print("[AVAZU]\tSelecting K-Best Features...")
        idxs = feat_eng.select_best_k_features(data._train, data._ytrain, 5)
        print(idxs)
        print("[AVAZU]\tFeatures selected.")

        # Step 3 --> Train model and test it
        print("Training and testing the model...")
        m = Model(idxs)
        m.train(data)
        m.predict(data)

        if self.validation_file != None:
            print(type(data._test['id'].ravel()))
            df = pd.DataFrame(
                {
                    'id': data._test['id'].ravel(),
                    'click': m._ypred
                },
                columns=['id', 'click'])
            df.to_csv('output.csv', index=False)
            print("Output in the file: output.csv")
        else:
            print("Log Loss Result: " + str(m.log_loss(data)))

        def print_help():
            print("python avazu.py <args>\n\n")
            print(
                "------------------------------------------------------------------"
            )
            print("List of arguments:")
            print(
                "\t -t: Specifies a training file to read from a csv file\t[REQUIRED]"
            )
            print(
                "\t -v: Specifies a validation/test file to read from a csv file. If the file is not specified, 30\% of the training samples will be used to build the validation dataset (Default: Not specified)"
            )
            print(
                "\t -s: Specifies a sampling ratio to be performed over the training data. If none is specified, it will use the whole dataset. (Default: Entire dataset)"
            )
            print(
                "\t -f: (To be inproved) Specifies if one desires to apply feature engineering over the dataset or not. (Default: No transformation applied)"
            )
            print("\t -h: Used to print this menu")
            print(
                "------------------------------------------------------------------"
            )