def _save_flag(flag_list): """ 保存标签 :return: None """ try: conn = pymysql.connect(host=get_config("recommend", "host"), port=int(get_config("recommend", "port")), db=get_config("recommend", "db"), user=get_config("recommend", "user"), passwd=get_config("recommend", "passwd"), charset='utf8') cursor = conn.cursor() try: for flag_dict in flag_list: insert_sql = '''replace into r_commodity_flag() values("{0}", "{1}", {2}, "{3}", "{4}", "{5}")'''\ .format(flag_dict["0"], flag_dict["1"], flag_dict["2"], flag_dict["3"], flag_dict["4"], flag_dict["5"]) cursor.execute(insert_sql) conn.commit() except Exception as ex: logger.exception(ex) print ex conn.rollback() cursor.close() conn.close() except Exception as ex: logger.exception(ex) print ex
def _delete_all_data(): """ 清除数据 :return: None """ try: conn = pymysql.connect(host=get_config("recommend", "host"), port=int(get_config("recommend", "port")), db=get_config("recommend", "db"), user=get_config("recommend", "user"), passwd=get_config("recommend", "passwd"), charset='utf8') cursor = conn.cursor() try: cursor.execute("delete") conn.commit() except Exception as ex: logger.exception(ex) print ex conn.rollback() cursor.close() conn.close() except Exception as ex: logger.exception(ex) print ex
def impala_db(hive_sql): conn = connect(host=get_config("impala", "host"), port=get_config("impala", "port"), database=get_config("impala", "database"), auth_mechanism='PLAIN') curl = conn.cursor() curl.execute(hive_sql) return as_pandas(curl)
def _get_checkup_data_from_impala(): """ 获取全部体检商品 :return: 体检商品 """ try: conn = connect(host=get_config("impala", "host"), port=get_config("impala", "port"), database=get_config("impala", "database"), auth_mechanism="PLAIN") cursor = conn.cursor() cursor.execute('''select ''') checkup_list = cursor.fetchall() cursor.close() conn.close() return checkup_list except Exception as ex: logger.exception(ex)
def main(): """ The best submission was a single LightGBM model. We provide flexibility for various model and feature configuration. See README.md for more details. """ #Config CONFIG_DATA, CONFIG_FEATURES, CONFIG_PARAMS = get_config() CLF = get_model(model = "LGBM", param = CONFIG_PARAMS): #Load Data train_current, train_previous, test_current, test_previous, mapping_dict = \ get_processed_data(CONFIG_DATA["path_current_train"], CONFIG_DATA["path_previous_train"], \ CONFIG_DATA["path_current_test"], CONFIG_DATA["path_previous_test"]) #Create Features train_label, train_data, train_previous_products, train_user_ids, train_new_products = \ create_data(feature_msg = CONFIG_FEATURES, translation_dict = mapping_dict, \ train_current = train_current, train_previous = train_previous, \ test_current = test_current, test_previous = test_previous, \ return_labels = True, return_prev_prod = True, return_user_id = True, return_new_products = True, \ MONTHS = CONFIG_FEATURES['train_month'], is_train = True) test_label, test_data, test_previous_products, test_user_ids, test_new_products = \ create_data(feature_msg = CONFIG_FEATURES, translation_dict = mapping_dict, \ train_current = train_current, train_previous = train_previous, \ test_current = test_current, test_previous = test_previous, \ return_labels = True, return_prev_prod = True, return_user_id = True, return_new_products = True, \ MONTHS = CONFIG_FEATURES['test_month'], is_train = False) # Train CLF, unique_labels = ml.train_model(y = train_label, X = train_data, model = CLF) # Test predictions = ml.create_prediction(model = CLF, X = test_data, previous_products = test_previous_products, \ unq_lb = unique_labels) # generate prediction create_submission(predictions = predictions, test_id = test_user_ids, \ filename = CONFIG_DATA["filename"])
# Author zhen.ma """ 体检商品标签提取 """ import sys sys.path.append("../") import logbook import jieba import jieba.posseg as pseg from impala.dbapi import connect import pymysql from helper.config import get_config logger = logbook.Logger("checkup_flag_extract") jieba_path = get_config("jieba", "path") def extract_checkup_flag(): """ 提取体检商品标签 :return: None """ # checkup_list = _get_checkup_data_from_mysql() checkup_list = _get_checkup_data_from_impala() flag_list = _extract_flag(checkup_list) _save_flag(flag_list) print "extract_checkup_flag end." def _get_checkup_data_from_impala():
def save_to_mysql(df, table='r_commodity_flag'): engine = create_engine(get_config("recommend", "full_path")) pd.io.sql.to_sql(df, table, con=engine, index=False, if_exists='append')
import jieba import jieba.posseg as pseg from impala.dbapi import connect from impala.util import as_pandas import pandas as pd from collections import defaultdict import re from extract_checkup_flag import extract_checkup_flag from sqlalchemy import create_engine import pymysql import time import logbook from helper.config import get_config logbook.StderrHandler(bubble=True).push_application() logbook.TimedRotatingFileHandler(get_config("log", "extract_flag"), backup_count=7, bubble=True).push_application() logger = logbook.Logger("flag_extract") jieba_path = get_config("jieba", "path") def save_to_mysql(df, table='r_commodity_flag'): engine = create_engine(get_config("recommend", "full_path")) pd.io.sql.to_sql(df, table, con=engine, index=False, if_exists='append') def impala_db(hive_sql): conn = connect(host=get_config("impala", "host"), port=get_config("impala", "port"),