import tpot from tpot import TPOTClassifier from sklearn.metrics import precision_score, f1_score, recall_score import warnings import numpy as np import pandas as pd import pymysql from collections import Counter if __name__ == '__main__': warnings.filterwarnings('ignore') from sklearn.metrics import precision_score, f1_score, recall_score warnings.filterwarnings('ignore') # 加载数据 sql = "SELECT * from bidata.trail_pigeon_wdf1" df = load_data_new(sql, filename="drumping.csv") df_btest = load_data_new(sql, filename="btest.csv") label_by_contract = "target_is_DD_ACTIVE" labels = label_by_contract select_columns = [ "CAFE20_gender", "CAFE20_region", "CAFE20_levels", "is_festival_user", "level_use", "is_LAST_2YEAR_DD_ACTIVE", "cafe_tag_is_mop_available", "is_merch_user", "p4week_active",
from model_evalu import evalution_model, plot_importance import numpy as np from models import rf_mdoel, gbdt_mdoel, xgb_model, cat_boost_model from sklearn.metrics import make_scorer from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier from xgboost import XGBClassifier from sklearn.metrics import precision_score, f1_score, recall_score from sklearn.base import clone if __name__ == '__main__': warnings.filterwarnings('ignore') from sklearn.metrics import precision_score, f1_score, recall_score warnings.filterwarnings('ignore') # 加载数据 sql = "SELECT * from bidata.trail_pigeon_wdf1" df = load_data_new(sql, filename="drumping.csv") label_by_contract = "target_is_DD_ACTIVE" labels = label_by_contract select_columns = [ "CAFE20_gender", "CAFE20_region", "CAFE20_levels", "is_festival_user", "level_use", "is_LAST_2YEAR_DD_ACTIVE", "cafe_tag_is_mop_available", "is_merch_user", "p4week_active", "is_LAST_1YEAR_DD_ACTIVE",
from data_treatment import data_clean2 from data_treatment import load_data_new import joblib import pandas as pd import matplotlib.pyplot as plt if __name__ == '__main__': sql = " " df = load_data_new(sql, filename="btest.csv") df = data_clean2(df) labels = "target_is_DD_ACTIVE" select_columns = [ 'is_festival_user', 'is_LAST_2YEAR_DD_ACTIVE', 'cafe_tag_is_mop_available', 'IS_SR_KIT_USER', 'level_use', 'skr_rate', 'merch_rate', 'active_index', 'cafe_tag_p6m_food_qty', 'DD_rev', 'svc_revenue', 'SR_KIT_NUM', 'cafe_tag_p3m_merch_party_size', 'CAFE20_VISIT_MERCH', 'CAFE20_AMT', 'cafe_tag_p3m_food_qty', 'p3m_weekday_trans', 'max_DD_rev', 'DD_end_gap',
import tpot from tpot import TPOTClassifier from sklearn.metrics import precision_score, f1_score, recall_score import warnings import numpy as np import pandas as pd import pymysql from collections import Counter if __name__ == '__main__': warnings.filterwarnings('ignore') from sklearn.metrics import precision_score, f1_score, recall_score warnings.filterwarnings('ignore') # 加载数据 sql = "SELECT * from bidata.trail_pigeon_wdf1" df = load_data_new(sql, filename="df_20190226.csv") label_by_contract = "is_pigeon" labels = label_by_contract select_columns = [ # "sale_id", # "teacher_id", "know_origin", "grade_subject", "student_city_class", "student_province", "grade_rank", "class_rank_fillna", "student_province_byphone", "subject_ids",
from model_evalu import evalution_model if __name__ == '__main__': labels = "attend" file = "load_data/audio_data_with_success_attend_cutwords.pkl" with open(file, 'rb') as f: data = pickle.load(f) #student_intention_id sql_student = """SELECT v.student_id,v.student_intention_id,v.student_no,lpo.apply_time AS order_apply_time,lpo.order_id ,v.submit_time from hfjydb.view_student v LEFT JOIN hfjydb.lesson_plan_order lpo on lpo.student_intention_id = v.student_intention_id""" df_student = load_data_new(sql_student, filename="df_students_s.csv") df_student = df_student[~df_student["order_id"].isnull()] df_concat = pd.merge(data, df_student, on="student_intention_id", how="left") df_concat = df_concat[~df_concat["order_id"].isnull()] conts = df_concat["student_intention_id"].value_counts() conts = conts[conts == 1] df_concat = df_concat[df_concat["student_intention_id"].isin(list(conts.index))] df_voice = df_concat[["order_id", "order_apply_time", "attend", "content"]] print(1) #分词 start = time.clock()
import warnings warnings.filterwarnings('ignore') from data_treatment import load_data_new, data_clean2 import pandas as pd from statsmodels.formula.api import ols from statsmodels.stats.anova import anova_lm from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import chi2 if __name__ == '__main__': sql = "select * from bidata.trail_pigeon" df = load_data_new(sql, filename="df_20190215.csv") student_sql = """SELECT student_id,count(student_id) from trail_pigeon GROUP BY student_id""" student_mul = load_data_new(student_sql, filename="student_mul.csv") student_ids = student_mul[student_mul["count(student_id)"] == 1] df = df[df["student_id"].isin(student_mul["student_id"])] label_by_contract = "is_pigeon" labels = label_by_contract # 数据预处理 df = data_clean2(df, min_date="2018-05-01", mid_date="2018-09-15", max_date="2018-09-30", label=labels)
from sklearn.metrics import make_scorer from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier from model_evalu import evalution_model,plot_importance import numpy as npr from xgboost import XGBClassifier from sklearn.metrics import precision_score,f1_score,recall_score import warnings import pandas as pd import numpy as np import pymysql from catboost import CatBoostClassifier,CatBoostRegressor,Pool if __name__ == '__main__': warnings.filterwarnings('ignore') #加载数据 sql = "select * from bidata.trail_boost" df = load_data_new(sql, filename="df_201810166.csv") #加载新数据 # sql_new = "select DISTINCT teacher_id,history_trail_cnt,history_trail_suc_cnt_bycontract,history_trail_suc_cnt_bystudent,first_tkod_tifl_count from trail_boost" # conn = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com", port=3306, user="******", # passwd="xMbquuHi98JyfiF1", db="bidata", charset="utf8") # df_new = pd.read_sql(sql_new, conn) # conn.close() # # df = pd.merge(df,df_new,how="left",on=["teacher_id"]) label_by_contract = "is_sucess_by_contract" label_by_pay = "is_sucess_by_pay" label_by_official_course = "is_sucess_by_official_course" labels = label_by_contract select_columns = [