Пример #1
0
def record_device_id(deviceid, platform, original_deviceid):

    tnow = timeHelper.getNowLong()
    if platform not in [notification_config.CONST_DEVICE_PLATFORM_ANDROID, notification_config.CONST_DEVICE_PLATFORM_IOS]:
        platform = notification_config.CONST_DEVICE_PLATFORM_IOS

    if len(deviceid) < 5:
        return {
            'status': -1,
            'msg': 'invalid device_id = %s' %deviceid,
        }

    # record in db
    vlist = [
        [deviceid,platform,tnow,original_deviceid,""]
    ]

    ret = crawler_helper.persist_db_history_and_latest(
        table_name='user_notification_device',
        num_cols=len(vlist[0]),
        value_list=vlist,
        is_many=True,
        need_history=True,
        need_flow=False,
    )

    return ret
Пример #2
0
def record_device_id(deviceid, platform, original_deviceid):

    tnow = timeHelper.getNowLong()
    if platform not in [
            notification_config.CONST_DEVICE_PLATFORM_ANDROID,
            notification_config.CONST_DEVICE_PLATFORM_IOS
    ]:
        platform = notification_config.CONST_DEVICE_PLATFORM_IOS

    if len(deviceid) < 5:
        return {
            'status': -1,
            'msg': 'invalid device_id = %s' % deviceid,
        }

    # record in db
    vlist = [[deviceid, platform, tnow, original_deviceid, ""]]

    ret = crawler_helper.persist_db_history_and_latest(
        table_name='user_notification_device',
        num_cols=len(vlist[0]),
        value_list=vlist,
        is_many=True,
        need_history=True,
        need_flow=False,
    )

    return ret
Пример #3
0
def match_discounts():

    debug_sku_str = "" if not IF_DEBUG_SKU else " where sku_id=%s" %DEBUG_SKU_ID
    print('>>> 1/8 >>> Reading jd_price_temp_latest...')
    sql_price = 'select * from jd_price_temp_latest %s' %debug_sku_str
    retrows_price = dbhelper.executeSqlRead(sql_price, is_dirty=True)
    print('rows read: %s' %len(retrows_price))

    print('>>> 2/8 >>> Reading strongest deductions of each sku...')
    deduction_dict = _get_deduction_dict()
    print('rows read: %s' %len(deduction_dict))

    print('>>> 3/8 >>> Reading discounts of each sku...')
    discount_dict = _get_discount_dict()
    print('rows read: %s' %len(discount_dict))

    print('>>> 4/8 >>> Reading gifts of each sku...')
    gift_dict = _get_gift_dict()
    print('rows read: %s' %len(gift_dict))

    print('>>> 5/8 >>> Reading first seen date of each sku...')
    first_seen_dict = _get_item_firstseen_dict()
    print('rows read: %s' %len(first_seen_dict))

    print('>>> 6/8 >>> Reading ratings of each sku...')
    rating_dict = _get_rating_dict()
    print('rows read: %s' %len(rating_dict))

    print('>>> 7/8 >>> Joining results in memory...')

    _merge_dict_under_key(
        deduction_dict,
        [
            discount_dict,
            gift_dict,
            first_seen_dict,
            rating_dict,
        ]
    )

    tlist = _memory_left_join(retrows_price,deduction_dict,
                              col_name_list_left=cols_left,
                              col_name_list_right=cols_deduction
                              )
    print('rows generated: %s' %len(tlist))

    print '>>> 8/8 >>> Calculating worhty_values...'
    _calculate_worthy_values(tlist)
    print 'num cols = %s ' %len(tlist[0])

    print '>>> 9/9 >>> Saving to DB...'
    ret = crawler_helper.persist_db_history_and_latest(
        table_name='jd_worthy',
        num_cols=len(tlist[0]),
        value_list=tlist,
        is_many=True,
        need_history=False
    )
    return ret
Пример #4
0
def match_discounts():

    debug_sku_str = "" if not IF_DEBUG_SKU else " where sku_id=%s" % DEBUG_SKU_ID
    print('>>> 1/8 >>> Reading jd_price_temp_latest...')
    sql_price = 'select * from jd_price_temp_latest %s' % debug_sku_str
    retrows_price = dbhelper.executeSqlRead(sql_price, is_dirty=True)
    print('rows read: %s' % len(retrows_price))

    print('>>> 2/8 >>> Reading strongest deductions of each sku...')
    deduction_dict = _get_deduction_dict()
    print('rows read: %s' % len(deduction_dict))

    print('>>> 3/8 >>> Reading discounts of each sku...')
    discount_dict = _get_discount_dict()
    print('rows read: %s' % len(discount_dict))

    print('>>> 4/8 >>> Reading gifts of each sku...')
    gift_dict = _get_gift_dict()
    print('rows read: %s' % len(gift_dict))

    print('>>> 5/8 >>> Reading first seen date of each sku...')
    first_seen_dict = _get_item_firstseen_dict()
    print('rows read: %s' % len(first_seen_dict))

    print('>>> 6/8 >>> Reading ratings of each sku...')
    rating_dict = _get_rating_dict()
    print('rows read: %s' % len(rating_dict))

    print('>>> 7/8 >>> Joining results in memory...')

    _merge_dict_under_key(deduction_dict, [
        discount_dict,
        gift_dict,
        first_seen_dict,
        rating_dict,
    ])

    tlist = _memory_left_join(retrows_price,
                              deduction_dict,
                              col_name_list_left=cols_left,
                              col_name_list_right=cols_deduction)
    print('rows generated: %s' % len(tlist))

    print '>>> 8/8 >>> Calculating worhty_values...'
    _calculate_worthy_values(tlist)
    print 'num cols = %s ' % len(tlist[0])

    print '>>> 9/9 >>> Saving to DB...'
    ret = crawler_helper.persist_db_history_and_latest(table_name='jd_worthy',
                                                       num_cols=len(tlist[0]),
                                                       value_list=tlist,
                                                       is_many=True,
                                                       need_history=False)
    return ret
Пример #5
0
def genPropertyTable():

    print("reading...")
    sql = 'select * from jd_item_property_latest'
    retrows = dbhelper.executeSqlRead(sql, is_dirty=True)

    pdict = {}

    for row in retrows:

        p_key = row['p_key']
        if p_key is None:
            continue
        if p_key == '__DEFAULT__' or p_key == u'__DEFAULT__':
            continue
        if len(p_key) > 60:
            # print p_key
            continue

        p_value = row['p_value']
        if (p_value is None):
            continue
        if p_value == u'无':
            # print p_value
            continue
        p_value_nf = multi_replace(p_value, PROPERTY_SPLITTER_LIST, ' ')
        lendiff = len(p_value) - len(p_value_nf)
        if lendiff > 5:
            # print p_value
            continue

        sku_id = row['sku_id']
        if sku_id in pdict:
            pold = pdict[sku_id]
            pdict[sku_id] = "%s %s" % (pold, p_value_nf)
        else:
            pdict[sku_id] = p_value_nf

    vlist = []
    for key in pdict:
        vlist.append([key, pdict[key]])

    print("writing to db...")
    return crawler_helper.persist_db_history_and_latest(
        table_name='jd_index_property',
        num_cols=len(vlist[0]),
        value_list=vlist,
        is_many=True,
        need_history=False,
        need_flow=False,
    )
Пример #6
0
def genPropertyTable():

    print("reading...")
    sql = 'select * from jd_item_property_latest'
    retrows = dbhelper.executeSqlRead(sql, is_dirty=True)

    pdict = {}

    for row in retrows:

        p_key = row['p_key']
        if p_key is None:
            continue
        if p_key == '__DEFAULT__' or p_key == u'__DEFAULT__':
            continue
        if len(p_key) > 60:
            # print p_key
            continue

        p_value = row['p_value']
        if (p_value is None):
            continue
        if p_value == u'无':
            # print p_value
            continue
        p_value_nf = multi_replace(p_value,PROPERTY_SPLITTER_LIST,' ')
        lendiff = len(p_value) - len(p_value_nf)
        if lendiff > 5:
            # print p_value
            continue

        sku_id = row['sku_id']
        if sku_id in pdict:
            pold = pdict[sku_id]
            pdict[sku_id] = "%s %s" %(pold,p_value_nf)
        else:
            pdict[sku_id] = p_value_nf

    vlist = []
    for key in pdict:
        vlist.append([key,pdict[key]])

    print("writing to db...")
    return crawler_helper.persist_db_history_and_latest(
        table_name='jd_index_property',
        num_cols=len(vlist[0]),
        value_list=vlist,
        is_many=True,
        need_history=False,
        need_flow=False,
    )
Пример #7
0
def calculate_base_rating_for_categories():

    today = timeHelper.getNow()
    sql = getSqlCatRating()
    retrows = dbhelper.executeSqlRead(sql, is_dirty=True)
    # print sql
    print "rows of data selected for insert: %s" %len(retrows)
    # print len(retrows[0])
    # print retrows[0]
    vlist = []
    for row in retrows:
        tp = []
        tp.append(row['category_id'])
        tp.append(row['sample_count'])
        tp.append(row['sum_1'])
        tp.append(row['sum_2'])
        tp.append(row['sum_3'])
        tp.append(row['sum_4'])
        tp.append(row['sum_5'])
        tp.append(row['comment_count'])
        tp.append(row['rating_score'])
        tp.append(row['rate_1'])
        tp.append(row['rate_2'])
        tp.append(row['rate_3'])
        tp.append(row['rate_4'])
        tp.append(row['rate_5'])
        tp.append(row['rate_good'])
        tp.append(row['rate_bad'])
        tp.append(row['origin_dt'])
        tp.append(row['dt'])
        tp.append(row['name'])
        # print row['category_id']
        vlist.append(tp)

    return crawler_helper.persist_db_history_and_latest(
        table_name='jd_analytic_category_rating',
        num_cols=len(vlist[0]),
        value_list=vlist,
        is_many=True
    )
Пример #8
0
def generate_worthy_mix_main():

    t1 = time.time()
    logging.debug(
        "1/4 >>> Join all related tables: price_temp, dynamic, deduction, discount, gift, rating, last-seen, etc..."
    )
    worthy_rows = _get_merged_tables()
    t2 = time.time()
    logging.debug("Done, rows read: %s, using seconds: %s\n" % (len(worthy_rows), (t2 - t1)))

    logging.debug("2/4 >>> Calculating worthy scores and final price")
    _calculate_worthy_values(worthy_rows)
    t3 = time.time()
    logging.debug("Done, using seconds: %0.1f\n" % (t3 - t2))

    logging.debug("3/4 >>> Generating data for db insert")
    insert_list = rows_helper.generate_list_for_db_write(worthy_rows, worthy_columns)
    t4 = time.time()
    logging.debug("Done, using seconds: %0.1f\n" % (t4 - t3))

    logging.debug("4/4 >>> Now writing to db, rows = %s" % len(insert_list))

    tbl_name = "jd_worthy"
    # tbl_name = 'zz_worthy_%s' %int(time.time())
    # tbl_name_latest = "%s_latest" %tbl_name

    # ret = crawler_helper.persist_db_history_and_lastest_empty_first(

    WRITE_STEP = 10000
    wtimes = len(insert_list) // WRITE_STEP
    wremaining = len(insert_list) - wtimes * WRITE_STEP
    print "%s,%s,%s" % (len(insert_list), wtimes, wremaining)

    total_written = 0
    for i in xrange(wtimes):
        tt1 = time.time()
        partlist = insert_list[i * WRITE_STEP : (i + 1) * WRITE_STEP]

        ret = crawler_helper.persist_db_history_and_latest(
            table_name=tbl_name, num_cols=len(insert_list[0]), value_list=partlist, is_many=True, need_history=False
        )
        afr = ret["affected_rows_latest"]
        total_written += afr

        tt2 = time.time()
        logging.debug("Written %s/%s: affected rows = %s, using seconds: %s" % (i, wtimes + 1, afr, int(tt2 - tt1)))

    partlist = insert_list[wtimes * WRITE_STEP : len(insert_list)]
    ret = crawler_helper.persist_db_history_and_latest(
        table_name=tbl_name, num_cols=len(insert_list[0]), value_list=partlist, is_many=True, need_history=False
    )
    afr = ret["affected_rows_latest"]
    total_written += afr

    logging.debug("Written %s/%s: affected rows = %s" % (wtimes + 1, wtimes + 1, afr))
    logging.debug(">>> Done, total affected rows = %s >>>" % total_written)

    # logging.debug('Now altering table name...')
    # afr = dbhelper.rename_table(tbl_name_latest, 'jd_worthy_latest', if_delete_duplicate=True)

    t5 = time.time()
    logging.debug("Done, using seconds: %0.1f\n" % (t5 - t4))

    return ret