Exemplo n.º 1
0
def generatedata(lag_day=7):
    exec_start_time=time.time()
    today_string          = dt.date.today().strftime("%Y%m%d")
    yesterday_string      = (dt.date.today() - dt.timedelta(1)).strftime("%Y%m%d")
    n_date_string = (dt.date.today() - dt.timedelta(lag_day)).strftime("%Y%m%d")
    
    from jaydebeapi import _DEFAULT_CONVERTERS, _java_to_py
    import jaydebeapi
    _DEFAULT_CONVERTERS.update({'BIGINT':_java_to_py('longValue')})

    conn = jaydebeapi.connect('com.ingres.jdbc.IngresDriver', 
                              ['jdbc:ingres://192.168.6.199:vw7/compass_v1', 'mintel', 'mintel'], 
                              "lib/iijdbc.jar")
    curs = conn.cursor()
    curs.execute('''
                 select 
                 TOP 10000
                 a.egoodsid,
                 a.c34 as c34a,
                 b.c34 as c34b,
                 a.goodsid,
                 a.currentprice, 
                 a.allcomments,
                 a.c34, 
                 a.c1,
                 a.item_rank, 
                 a.agg1, 
                 a.agg2, 
                 a.agg3, 
                 a.agg5, 
                 a.agg7, 
                 a.is_promo
                 from mv_simtxn a, mv_simtxn b
                 where a.etype='JD' AND a.ref_date='%s'
                 AND b.etype='JD' AND b.ref_date='%s'
                 AND a.egoodsid = b.egoodsid
                 ''' % (yesterday_string,n_date_string))
    rows = curs.fetchall()
    print "query read time: %.1f seconds" % (time.time() - exec_start_time)
    columnNames = [curs.description[i][0] for i in range(len(curs.description))]
    df_test = pd.DataFrame(data=rows, columns=columnNames)
    df_test.index = df_test['egoodsid'].values

    df_test['relative_item_rank']= divide(df_test['item_rank'],df_test['agg1'])
    df_test['brand_share']       = divide(df_test['agg5'],df_test['agg2'])
    df_test['log1p(RIR)']        = log(divide(df_test['item_rank'],df_test['agg1'])) #log(df_test['relative_item_rank'])
    df_test['log(c34)']          = log(df_test['c34b'])
    df_test['log(allcomments)']  = log(df_test['allcomments'])
    df_test['is_promo']          = pd.to_numeric(df_test['is_promo'])
    df_test['c1']                = pd.to_numeric(df_test['c1'])
    #df_test['AC_updated']        = df_test['agg7']>0

    df_test["case_label"] = 0
    df_test.loc[df_test.item_rank.notnull(),"case_label"]=1
    #df_test.loc[df_test.item_rank.notnull() & df_test.c34.isnull(),"case_label"]=2
    df_test.loc[df_test.item_rank.isnull() & df_test.c34.notnull(),"case_label"]=3
    df_test.loc[df_test.item_rank.isnull() & df_test.c34.isnull(),"case_label"]=4
    return df_test
Exemplo n.º 2
0
#%pylab inline

# ####Design:
#
# 1. Add the following features {relative_item_rank: item_rank/agg1, brandshare: agg5/agg2}
# 2. Filter out all non-zero c30s, run them through:
#   - p(c30==0|D1):D1=allcomments, is_in_stock, item_rank, agg7, agg8,
#   - p(q30==0|D2):D2=p0c30, allcomments, is_in_stock, relative_item_rank, is_promo
# 3. Generate a case number for each row, based on rules in documentation
#
#

# In[3]:

from jaydebeapi import _DEFAULT_CONVERTERS, _java_to_py
_DEFAULT_CONVERTERS.update({'BIGINT': _java_to_py('longValue')})

conn = jaydebeapi.connect(
    'com.ingres.jdbc.IngresDriver',
    ['jdbc:ingres://192.168.6.199:vw7/compass_v1', 'mintel', 'mintel'],
    "lib/iijdbc.jar")
curs = conn.cursor()
curs.execute('''
             SELECT 
             a.egoodsid,
             a.goodsid,
             a.currentprice, 
             a.allcomments,
             a.c34, 
             a.c1, 
             a.item_rank, 
Exemplo n.º 3
0
import datetime
import jaydebeapi
from jaydebeapi import _DEFAULT_CONVERTERS, _java_to_py
import sqlite3
from config import *

_DEFAULT_CONVERTERS.update({'BIGINT': _java_to_py('longValue')})

class openfireMySQL:

    def __init__(self):
        self.mysqlOFCon = jaydebeapi.connect('com.mysql.jdbc.Driver',
                                             ['jdbc:mysql://' + destDBHost + ':' + destDBPort + '/' + destDBname,
                                              destUser, destPw], ['hsqldb.jar', 'mysql.jar'], )
        self.mysqlOFCur = self.mysqlOFCon.cursor()

    def getMySQLTableNames(self):
        self.mysqlCon = jaydebeapi.connect('com.mysql.jdbc.Driver',
                                 ['jdbc:mysql://' + destDBHost + ':' + destDBPort,
                                     destUser, destPw], ['hsqldb.jar', 'mysql.jar'],)
        self.mysqlCur = self.mysqlCon.cursor()
        self.mysqlCur.execute("SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = '%s'" % destDBname)
        self.tables = self.mysqlCur.fetchall()
        self.mysqlCon.close()
        self.tableList = []
        for table in self.tables:
            self.tableList.append(str(table[0]))
        return self.tableList

    def addToMySQL(self, table, columns, data):
        data = str(data)
Exemplo n.º 4
0
def lag(lag_day=7, until_day='yesterday'):

    ### you can put until_day as m days ago, where m must <=n
    ### one greater than 1 integer number is expected of until_day.
    ### example: m=3, means you generate df is data of 3 days ago, df_lag is data of "lag_day" days ago.

    import datetime as dt
    import time
    import pandas as pd
    exec_start_time = time.time()
    yesterday_string = (dt.date.today() - dt.timedelta(1)).strftime("%Y%m%d")
    today_string = dt.date.today().strftime("%Y%m%d")
    n_date_string = (dt.date.today() -
                     dt.timedelta(lag_day)).strftime("%Y%m%d")

    if until_day == 'yesterday': until_day = 1
    if until_day > lag_day:
        raise IOError("until_day: %d must be no greater than lag_day: %d!" %
                      (until_day, lag_day))
    if until_day < 1:
        raise IOError("until_day: %d and lag_day: %d must be no less than 1!" %
                      (until_day, lag_day))
    if type(until_day) != int or type(lag_day) != int:
        raise IOError("until_day and lag_day must be integer number!")

    yesterday_string = (dt.date.today() -
                        dt.timedelta(until_day)).strftime("%Y%m%d")

    from jaydebeapi import _DEFAULT_CONVERTERS, _java_to_py
    import jaydebeapi
    _DEFAULT_CONVERTERS.update({'BIGINT': _java_to_py('longValue')})

    conn = jaydebeapi.connect(
        'com.ingres.jdbc.IngresDriver',
        ['jdbc:ingres://192.168.6.199:vw7/compass_v1', 'mintel', 'Passwd'],
        "lib/iijdbc.jar")
    curs = conn.cursor()
    curs.execute('''
                 SELECT 
                 a.egoodsid,
                 a.goodsid,
                 a.currentprice, 
                 a.allcomments,
                 a.c30, 
                 a.c1, 
                 a.item_rank, 
                 a.agg1, 
                 a.agg2, 
                 a.agg3, 
                 a.agg5, 
                 a.agg7, 
                 a.is_promo
                 FROM mv_simtxn a
                 WHERE a.etype='JD' AND a.ref_date='%s'
                 ''' % (yesterday_string))
    rows = curs.fetchall()
    columnNames = [
        curs.description[i][0] for i in range(len(curs.description))
    ]
    df = pd.DataFrame(data=rows, columns=columnNames)

    curs = conn.cursor()
    curs.execute('''
                 select 
                 a.egoodsid,
                 b.c30,
                 a.goodsid,
                 a.currentprice, 
                 a.allcomments,
                 a.c1,
                 a.item_rank, 
                 a.agg1, 
                 a.agg2, 
                 a.agg3, 
                 a.agg5, 
                 a.agg7, 
                 a.is_promo
                 from mv_simtxn a, mv_simtxn b
                 where a.etype='JD' AND a.ref_date='%s'
                 AND b.etype='JD' AND b.ref_date='%s'
                 AND a.egoodsid = b.egoodsid
                 ''' % (yesterday_string, n_date_string))
    rows = curs.fetchall()
    print "query read time: %.1f seconds" % (time.time() - exec_start_time)
    columnNames = [
        curs.description[i][0] for i in range(len(curs.description))
    ]
    df_lag = pd.DataFrame(data=rows, columns=columnNames)
    return df, df_lag
Exemplo n.º 5
0
    def dbapi(cls):
        import jaydebeapi
        from jaydebeapi import _DEFAULT_CONVERTERS, _java_to_py
        _DEFAULT_CONVERTERS.update({'BIGINT': _java_to_py('longValue')})

        return jaydebeapi