示例#1
0
def _save_flag(flag_list):
    """
    保存标签
    :return: None
    """
    try:
        conn = pymysql.connect(host=get_config("recommend", "host"),
                               port=int(get_config("recommend", "port")),
                               db=get_config("recommend", "db"),
                               user=get_config("recommend", "user"),
                               passwd=get_config("recommend", "passwd"),
                               charset='utf8')
        cursor = conn.cursor()

        try:
            for flag_dict in flag_list:
                insert_sql = '''replace into r_commodity_flag() values("{0}", "{1}", {2}, "{3}", "{4}", "{5}")'''\
                    .format(flag_dict["0"], flag_dict["1"], flag_dict["2"],
                            flag_dict["3"], flag_dict["4"], flag_dict["5"])
                cursor.execute(insert_sql)
            conn.commit()
        except Exception as ex:
            logger.exception(ex)
            print ex
            conn.rollback()

        cursor.close()
        conn.close()
    except Exception as ex:
        logger.exception(ex)
        print ex
示例#2
0
def _delete_all_data():
    """
    清除数据
    :return: None
    """
    try:
        conn = pymysql.connect(host=get_config("recommend", "host"),
                               port=int(get_config("recommend", "port")),
                               db=get_config("recommend", "db"),
                               user=get_config("recommend", "user"),
                               passwd=get_config("recommend", "passwd"),
                               charset='utf8')
        cursor = conn.cursor()

        try:
            cursor.execute("delete")
            conn.commit()
        except Exception as ex:
            logger.exception(ex)
            print ex
            conn.rollback()

        cursor.close()
        conn.close()
    except Exception as ex:
        logger.exception(ex)
        print ex
示例#3
0
def impala_db(hive_sql):
    conn = connect(host=get_config("impala", "host"),
                   port=get_config("impala", "port"),
                   database=get_config("impala", "database"),
                   auth_mechanism='PLAIN')
    curl = conn.cursor()
    curl.execute(hive_sql)
    return as_pandas(curl)
示例#4
0
def _get_checkup_data_from_impala():
    """
    获取全部体检商品
    :return: 体检商品
    """
    try:
        conn = connect(host=get_config("impala", "host"),
                       port=get_config("impala", "port"),
                       database=get_config("impala", "database"),
                       auth_mechanism="PLAIN")
        cursor = conn.cursor()
        cursor.execute('''select ''')
        checkup_list = cursor.fetchall()
        cursor.close()
        conn.close()
        return checkup_list
    except Exception as ex:
        logger.exception(ex)
def main():
    """
    The best submission was a single LightGBM model. 
    We provide flexibility for various model and feature configuration. 
    See README.md for more details.
    """    

    #Config
    CONFIG_DATA, CONFIG_FEATURES, CONFIG_PARAMS = get_config()
    CLF = get_model(model = "LGBM", param = CONFIG_PARAMS):

    #Load Data
    train_current, train_previous, test_current, test_previous, mapping_dict = \
        get_processed_data(CONFIG_DATA["path_current_train"], CONFIG_DATA["path_previous_train"], \
            CONFIG_DATA["path_current_test"], CONFIG_DATA["path_previous_test"])

    #Create Features
    train_label, train_data, train_previous_products, train_user_ids, train_new_products = \
        create_data(feature_msg = CONFIG_FEATURES, translation_dict = mapping_dict, \
                    train_current = train_current, train_previous = train_previous, \
                    test_current = test_current, test_previous = test_previous, \
                    return_labels = True, return_prev_prod = True, return_user_id = True, return_new_products = True, \
                    MONTHS = CONFIG_FEATURES['train_month'], is_train = True)

    test_label, test_data, test_previous_products, test_user_ids, test_new_products = \
        create_data(feature_msg = CONFIG_FEATURES, translation_dict = mapping_dict, \
                    train_current = train_current, train_previous = train_previous, \
                    test_current = test_current, test_previous = test_previous, \
                    return_labels = True, return_prev_prod = True, return_user_id = True, return_new_products = True, \
                    MONTHS = CONFIG_FEATURES['test_month'], is_train = False)
    
    # Train
    CLF, unique_labels = ml.train_model(y = train_label, X = train_data, model = CLF)

    # Test
    predictions = ml.create_prediction(model = CLF, X = test_data, previous_products = test_previous_products, \
        unq_lb = unique_labels) # generate prediction
    create_submission(predictions = predictions, test_id = test_user_ids, \
        filename = CONFIG_DATA["filename"])
示例#6
0
# Author zhen.ma
"""
体检商品标签提取
"""
import sys

sys.path.append("../")
import logbook
import jieba
import jieba.posseg as pseg
from impala.dbapi import connect
import pymysql
from helper.config import get_config

logger = logbook.Logger("checkup_flag_extract")
jieba_path = get_config("jieba", "path")


def extract_checkup_flag():
    """
    提取体检商品标签
    :return: None
    """
    # checkup_list = _get_checkup_data_from_mysql()
    checkup_list = _get_checkup_data_from_impala()
    flag_list = _extract_flag(checkup_list)
    _save_flag(flag_list)
    print "extract_checkup_flag end."


def _get_checkup_data_from_impala():
示例#7
0
def save_to_mysql(df, table='r_commodity_flag'):
    engine = create_engine(get_config("recommend", "full_path"))
    pd.io.sql.to_sql(df, table, con=engine, index=False, if_exists='append')
示例#8
0
import jieba
import jieba.posseg as pseg
from impala.dbapi import connect
from impala.util import as_pandas
import pandas as pd
from collections import defaultdict
import re
from extract_checkup_flag import extract_checkup_flag
from sqlalchemy import create_engine
import pymysql
import time
import logbook
from helper.config import get_config

logbook.StderrHandler(bubble=True).push_application()
logbook.TimedRotatingFileHandler(get_config("log", "extract_flag"),
                                 backup_count=7,
                                 bubble=True).push_application()
logger = logbook.Logger("flag_extract")

jieba_path = get_config("jieba", "path")


def save_to_mysql(df, table='r_commodity_flag'):
    engine = create_engine(get_config("recommend", "full_path"))
    pd.io.sql.to_sql(df, table, con=engine, index=False, if_exists='append')


def impala_db(hive_sql):
    conn = connect(host=get_config("impala", "host"),
                   port=get_config("impala", "port"),