Пример #1
0
def crawl_sku_comment_count(sku_list):
    clist = jd_API.getCommentCount_JD(sku_list)
    if len(clist) == 0:
        return {'status': -1, 'msg': 'jd api returned no result for sku_list'}
    if len(clist) != len(set(sku_list)):
        return {
            'status':
            -1,
            'msg':
            'jd api return size mismatch, size of sku:%s, size of api:%s' %
            (len(set(sku_list)), len(clist))
        }
    vlist = []
    dt = timeHelper.getNow()
    for cdict in clist:
        tp = []
        cdict['dt'] = dt
        for key in cdict:
            tp.append(cdict[key])
        vlist.append(tp)

    return crawler_helper.persist_db_history_and_latest(
        table_name='jd_item_comment_count',
        num_cols=len(clist[0]),
        value_list=vlist,
        is_many=True)
Пример #2
0
def configLogging(log_name, log_level=data_config.LOGGING_LEVEL):
    timenow = timeHelper.getNow()
    #filename = '/tmp/%s_worthy_%s.log' %(log_name,timenow)
    filename = '/tmp/%s_worthy.log' %(log_name)

    logging.basicConfig(level=log_level,
                        format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                        datefmt='%a, %d %b %Y %H:%M:%S',
                        filename=filename,
                        filemode='a')

    logging.info('START logging : %s' %log_name)
Пример #3
0
 def __get_task_already_done__(self):
     if self.is_daily:
         sql = 'select task_id from task_status where job_name="%s" and update_time>="%s 0:00:00" group by task_id' % (
             self.job_name, timeHelper.getNow())
     else:
         stime = timeHelper.getTimeAheadOfNowHours(
             self.interval_hours, format='%Y-%m-%d %H:%M:%S')
         sql = 'select task_id from task_status where job_name="%s" and update_time>="%s" group by task_id' % (
             self.job_name, stime)
     print sql
     retrows = dbhelper.executeSqlRead(sql, is_dirty=True)
     catlist = []
     for row in retrows:
         catlist.append("%s" % row['task_id'])
     logging.info("Task already done: %s" % len(catlist))
     print("Task already done: %s" % len(catlist))
     return catlist
Пример #4
0
def crawl_sku_price(sku_list, sleep_time):
    # no more than 5000 items here per design
    rdict = jd_API.getPrices_JD(sku_list,sleep_time=sleep_time)
    vlist = []
    dt = timeHelper.getNow()
    dtlong = timeHelper.getNowLong()
    for key in rdict:
        tp = rdict[key]
        price = tp[0]
        price_m = tp[1]
        price_pcp = tp[2]
        vlist.append([key,dt,dtlong,price,price_m,price_pcp])
    return crawler_helper.persist_db_history_and_latest(
        table_name='jd_item_price',
        num_cols=len(vlist[0]),
        value_list=vlist,
        is_many=True,
        need_history=True,
        need_flow=True,
    )
Пример #5
0
def crawl_category_promo(category_id):
    rdict = jd_API.get_Promo_Category(category_id)
    dt = timeHelper.getNow()
    if len(rdict) == 0:
        return {
            'status': 0,
            'msg': 'empty in return, category_id=%s' % category_id
        }
    quan = json.dumps(rdict['quan'])
    ads = json.dumps(rdict['ads'])
    prom = json.dumps(rdict['prom'])
    vlist = [[
        category_id, dt, quan if quan != '[]' else None,
        ads if ads != '[]' else None, prom if prom != '[]' else None
    ]]
    return crawler_helper.persist_db_history_and_latest(
        table_name='jd_promo_category',
        num_cols=len(vlist[0]),
        value_list=vlist,
        is_many=True)
Пример #6
0
def crawl_sku_price(sku_list, sleep_time):
    # no more than 5000 items here per design
    rdict = jd_API.getPrices_JD(sku_list, sleep_time=sleep_time)
    vlist = []
    dt = timeHelper.getNow()
    dtlong = timeHelper.getNowLong()
    for key in rdict:
        tp = rdict[key]
        price = tp[0]
        price_m = tp[1]
        price_pcp = tp[2]
        vlist.append([key, dt, dtlong, price, price_m, price_pcp])
    return crawler_helper.persist_db_history_and_latest(
        table_name='jd_item_price',
        num_cols=len(vlist[0]),
        value_list=vlist,
        is_many=True,
        need_history=True,
        need_flow=True,
    )
Пример #7
0
def crawl_sku_comment_count(sku_list):
    clist = jd_API.getCommentCount_JD(sku_list)
    if len(clist)==0:
        return {'status':-1,'msg':'jd api returned no result for sku_list'}
    if len(clist)!=len(set(sku_list)):
        return {'status':-1,'msg':'jd api return size mismatch, size of sku:%s, size of api:%s' %(len(set(sku_list)),len(clist))}
    vlist = []
    dt = timeHelper.getNow()
    for cdict in clist:
        tp = []
        cdict['dt'] = dt
        for key in cdict:
            tp.append(cdict[key])
        vlist.append(tp)

    return crawler_helper.persist_db_history_and_latest(
        table_name='jd_item_comment_count',
        num_cols=len(clist[0]),
        value_list=vlist,
        is_many=True
    )
def crawl_detail_images(sku_id):
    html = __get_detail_page_content__(sku_id)
    img_list = jd_detail_resolver.resolve_Images(html)
    # logging.debug(img_list)
    if len(img_list)==0:
        return {'status':-1}
    vlist = []
    update_time = timeHelper.getNow()
    for img in img_list:
        tp = (sku_id, update_time, img)
        vlist.append(tp)
    # sql = 'replace into jd_item_images values(%s,%s,%s)'
    # affected_rows = dbhelper.executeSqlWriteMany(sql,vlist)
    sql2 = 'replace into jd_item_images_latest values(%s,%s,%s)'
    affected_rows2 = dbhelper.executeSqlWriteMany(sql2,vlist)
    ret = {
        'status': -1,
        # 'affected_rows': affected_rows,
        'affected_rows2': affected_rows2
    }
    if affected_rows2>0:
        ret['status'] = 0
    return ret
def crawl_detail_property(sku_id):
    html = __get_detail_page_content__(sku_id)
    prop_map = jd_detail_resolver.resolve_Properties(html)
    update_time = timeHelper.getNow()
    vlist = []
    if len(prop_map) == 0:
        return {'status':0}
    for p_key in prop_map:
        p_value = prop_map[p_key]
        tp = (sku_id, update_time, p_key, p_value)
        vlist.append(tp)
    # sql = 'replace into jd_item_property values(%s,%s,%s,%s)'
    # affected_rows = dbhelper.executeSqlWriteMany(sql,vlist)
    sql2 = 'replace into jd_item_property_latest values(%s,%s,%s,%s)'
    affected_rows2 = dbhelper.executeSqlWriteMany(sql2,vlist)
    ret = {
        'status': -1,
        # 'affected_rows': affected_rows,
        'affected_rows2': affected_rows2
    }
    if affected_rows2 > 0:
        ret['status'] = 0
    return ret
Пример #10
0
def crawl_category_promo(category_id):
    rdict = jd_API.get_Promo_Category(category_id)
    dt = timeHelper.getNow()
    if len(rdict)==0:
        return {
            'status':0,
            'msg':'empty in return, category_id=%s' %category_id
        }
    quan = json.dumps(rdict['quan'])
    ads = json.dumps(rdict['ads'])
    prom = json.dumps(rdict['prom'])
    vlist = [[
        category_id,
        dt,
        quan if quan!='[]' else None,
        ads if ads!='[]' else None,
        prom if prom!='[]' else None
    ]]
    return crawler_helper.persist_db_history_and_latest(
        table_name='jd_promo_category',
        num_cols=len(vlist[0]),
        value_list=vlist,
        is_many=True
    )
Пример #11
0
def resolveProductListFromPage(html):
    product_list = []
    nowtime = timeHelper.getNowLong()
    nowdate = timeHelper.getNow()
    try:
        doc = libxml2.htmlReadDoc(html, None, 'utf8', PARSE_OPTIONS)
        sku_docs = doc.xpathEval('//div[@data-sku]')
        for sku in sku_docs:
            #if True:
            try:
                sku_doc = libxml2.htmlReadDoc('%s' % sku, None, 'utf8',
                                              PARSE_OPTIONS)

                sku_id = int(sku_doc.xpathEval('//@data-sku')[0].content)
                # 判断是否是JD自营
                if sku_id > 99999999:
                    # 非自营商品
                    continue

                #print '%s' %sku

                sku_url = sku_doc.xpathEval(
                    '//div[@class="p-img"]/a/@href')[0].content
                try:
                    sku_thumnail_url = sku_doc.xpathEval(
                        '//div[@class="p-img"]/a/img/@data-lazy-img'
                    )[0].content
                except:
                    sku_thumnail_url = sku_doc.xpathEval(
                        '//div[@class="p-img"]/a/img/@src')[0].content

                sku_title = ""
                try:
                    sku_title = sku_doc.xpathEval(
                        '//div[@class="p-name"]/a/@title')[0].content
                except:
                    pass

                if len(sku_title) == 0:
                    sku_title = sku_doc.xpathEval(
                        '//div[@class="p-name"]/a/em')[0].content
                comment_count = int(
                    sku_doc.xpathEval('//div[@class="p-commit"]/strong/a')
                    [0].content)

                sku_icon_url = ""
                icon_doc = sku_doc.xpathEval(
                    '//div[@class="p-img"]/a/div/@style')
                if len(icon_doc) > 0:
                    sku_icon_url = url_utils.getStringBetween(
                        icon_doc[0].content, 'url("', '")')

                is_global = is_free_gift = is_pay_on_delivery = 0
                price_items = sku_doc.xpathEval(
                    '//div[@class="p-price"]/div/i')
                for pitem in price_items:
                    txt = pitem.content
                    if '全球购' in txt:
                        is_global = 1
                    elif '货到付款' in txt:
                        is_pay_on_delivery = 1
                    elif '赠品' in txt:
                        is_free_gift = 1
                    else:
                        print 'new-mark found:'
                        print txt

                sku_stock = -1
                try:
                    sku_stock = int(
                        sku_doc.xpathEval('//div[@data-stock_v]/@data-stock_v')
                        [0].content)
                except:
                    pass

                sku_url = __makeUrl__(sku_url)
                sku_thumnail_url = __makeUrl__(sku_thumnail_url)

                tp = (sku_id, nowdate, nowtime, sku_title, sku_url,
                      sku_thumnail_url, sku_stock, comment_count, is_global,
                      is_pay_on_delivery, is_free_gift, sku_icon_url)
                product_list.append(tp)

            except Exception as e:
                logging.error('resolveProductListError: %s, error = %s') % (
                    sku, e)
                continue
            finally:
                sku_doc.freeDoc()

        return product_list
    finally:
        doc.freeDoc()
Пример #12
0
def crawl_category(category_id):

    logging.debug('category_id = %s -- page 1' %(category_id))
    url = __get_category_page_url__(category_id,1)
    # print url
    html = url_utils.getWebResponse(url,'utf-8')
    if html == "":
        html = url_utils.getWebResponse(url,'gb18030')
    if html == "":
        html = url_utils.getWebResponse(url, 'gbk')
    total_pages = jd_list_resolver.resolveTotalPageNum(html)

    product_list = jd_list_resolver.resolveProductListFromPage(html)

    while len(product_list) == 0 and category_id is not None:
        category_id = __up_roll_category_id__(category_id)
        return crawl_category(category_id)

    if category_id is None or len(product_list)==0:
        return {'status':-1, 'msg': 'No item in category product list'}

    for page_iter in range(2,total_pages+1):
        logging.debug('category_id = %s -- page %s' %(category_id,page_iter))
        url = __get_category_page_url__(category_id,page_iter)
        html = url_utils.getWebResponse(url,'utf-8')
        product_list = product_list + jd_list_resolver.resolveProductListFromPage(html)
        time.sleep(SLEEP_TIME)

    sku_list = []
    for product_tp in product_list:
        sku_id = product_tp[0]
        sku_list.append(sku_id)

    # Get price of all products
    #price_obj = jd_API.getPrices_JD(sku_list,sleep_time=SLEEP_PRICE_API)

    ret_obj = {
        'status': -1,
        'affected_rows': -1,
        'sku_count': -1
    }
    total_goods_num = len(product_list)

    # for item in product_list:
    #     print item[0]
    # print '='*80

    # combine product list and price list, timestamp, category_id
    for i in xrange(total_goods_num):
        product_id = product_list[i][0]
        pkey = '%s' %product_id
        # if pkey in price_obj:
        #     product_list[i] = product_list[i] + (price_obj[pkey][0],price_obj[pkey][1],price_obj[pkey][2],) #nowdate,nowtime,)
        # else:
        #     logging.error('Error: product_id=%s cannot get result' %(product_id,price_id))
        #     continue
        product_list[i] = product_list[i] + (0,0,0,)

    # persist in database
    # (sku_id,sku_title,sku_url,sku_thumnail_url,sku_stock,comment_count,is_global,is_pay_on_delivery,is_free_gift,sku_icon_url, price, price_m, update_date,update_time, category_id)
    # sql = '''
    #   replace into jd_item_dynamic (sku_id,title,url,thumbnail_url,stock_status,comment_count,is_global,is_pay_on_delivery,
    #   has_free_gift,icon_url,price,price_m,price_pcp,update_date,update_time) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
    #   '''
    # affected_rows = dbhelper.executeSqlWriteMany(sql,product_list)

    ret = crawler_helper.persist_db_history_and_latest(
        table_name='jd_item_dynamic',
        num_cols=len(product_list[0]),
        value_list=product_list,
        is_many=True,
        need_history=False, # was True - changed 01/03
        need_flow=False,    # was True - changed 12/23
    )

    logging.debug('Saved to DB -- category_id = %s -- sku_count=%s' %(category_id,total_goods_num))
    logging.debug('%s' %ret)

    # HANDLE JD_ITEM_CATEGORY
    item_cat_list = []
    for prod in product_list:
        item_cat_list.append((prod[0],category_id,))
    sql2 = 'replace into jd_item_category values (%s,%s)'
    affected_rows2 = dbhelper.executeSqlWriteMany(sql2,item_cat_list)
    logging.debug('Saved to DB - item_category - affected rows = %s' %affected_rows2)
    if affected_rows2<=0:
        logging.error('Saving to item_category error, category_id = %s' %category_id)


    # HANDLE JD_ITEM_FIRSTSEEN
    nowtime = timeHelper.getNowLong()
    nowdate = timeHelper.getNow()
    sql3 = 'insert ignore into jd_item_firstseen values(%s,"%s","%s")'
    ftlist = []
    for item in product_list:
        ftlist.append([item[0],nowtime,nowdate])
    affected_rows3 = dbhelper.executeSqlWriteMany(sql3,ftlist)

    ret_obj = {
        'status': 0 if ret['status']==0 and affected_rows2>0 else -1,
        'item_dynamic': ret,
        'item_category': affected_rows2,
        'item_first_seen': affected_rows3,
    }

    return ret_obj
Пример #13
0
def processRecord(consentJsonStreamTable, requestDict, consentJson):
    requestId = localId = status = errorMessage = None

    baseTable = consentJsonStreamTable.TABLE
    baseTableAttr = baseTable.ATTRIBUTE
    print(f"CustomerMK: '{baseTable.INCOMING_DATA.CUSTOMER_MK}', requestDict: {requestDict}")
    customerMK = requestDict[baseTable.INCOMING_DATA.CUSTOMER_MK]
    sortKey = timeHelper.getUTCDateTimeString()

    # ----- INSERT the record -----
    insertRecordDict = {}
    requestId = localId = status = None
    # Tag Start DateTime
    startTime = timeHelper.getNow()
    # Set Primary and Sort Key
    addPrimaryAndSortKey(insertRecordDict, baseTableAttr, customerMK, sortKey)
    # Set Original Json
    insertRecordDict[baseTableAttr.ORIGINAL_DATA] = requestDict
    valiationResult = transformConsentJson.validate(requestDict, consentJson)
    if(API.isResultFailure(valiationResult)):
        status = API.getResultStatus(valiationResult)
        errorMessage = API.getErrorPrintMessage(valiationResult)
    else:
        # Transform and set new Json
        transformResult = transformConsentJson.transform(requestDict, consentJson)
        if(API.isResultFailure(transformResult)):
            status = API.getResultStatus(transformResult)
            errorMessage = API.getErrorPrintMessage(transformResult)
        else:
            insertRecordDict[baseTableAttr.TRANSFORMED_DATA] = API.getResultData(transformResult)

    # Set Trace and Stats dictionary
    timeLogTuple = timeHelper.getTimeLogTupleString(startTime)
    # print(f"processRecord() >> timeLogTuple (insert): '{timeLogTuple[0]}', '{timeLogTuple[1]}', '{timeLogTuple[2]}'")
    insertRecordDict[baseTableAttr.TRACE] = helper.getTraceDict(originId=requestId, localId=localId, status=status, message=errorMessage, startDateTime=timeLogTuple[0], endDateTime=timeLogTuple[1], duration=timeLogTuple[2])
    # Insert into the table
    # print("processRecord() >> insert dictionary: ", insertRecordDict)
    insertResult = consentJsonStreamTable.insert(insertRecordDict)
    # print("  processRecord() >> Processing Outcome (insert): ", insertResult)
    if(API.isResultFailure(insertResult)):
        # TODO What do we do in this scenario?
        return insertResult if insertResult != None else consentJsonStreamTable.composeResult(API.STATUS_CODE.FAILED)
    else:
        # ----- UPDATE the record trace (overall processing time until insert DB operation was successful) -----
        status = API.getResultStatus(insertResult)

        updateRecordDict = {}
        addPrimaryAndSortKey(updateRecordDict, baseTableAttr, customerMK, sortKey)
        # Update the End DateTime
        timeLogTuple = timeHelper.getTimeLogTupleString(startTime)
        # print(f"processRecord() >> timeLogTuple (update): '{timeLogTuple[0]}', '{timeLogTuple[1]}', '{timeLogTuple[2]}'")
        updateRecordDict[baseTableAttr.TRACE] = helper.getTraceDict(originId=requestId, localId=localId, status=status, message=errorMessage, startDateTime=timeLogTuple[0], endDateTime=timeLogTuple[1], duration=timeLogTuple[2])
        # Update ConsentProcessStream
        # print("processRecord() >> update dictionary: ", insertRecordDict)
        updateResult = consentJsonStreamTable.update(updateRecordDict)
        # print("  processRecord() >> Processing Outcome (update): ", updateResult)
        # Ignore the update error as insert operation (the key operation) is successful 
        # if(API.isResultFailure(updateResult)):
        #     return updateResult if updateResult != None else consentJsonStreamTable.composeResult(API.STATUS_CODE.FAILED)

    # ----- RETURN the insertResult -----
    return insertResult if insertResult != None else consentJsonStreamTable.composeResult(API.STATUS_CODE.FAILED)
Пример #14
0
def resolveProductListFromPage(html):
    product_list = []
    nowtime = timeHelper.getNowLong()
    nowdate = timeHelper.getNow()
    try:
        doc = libxml2.htmlReadDoc(html, None, "utf8", PARSE_OPTIONS)
        sku_docs = doc.xpathEval("//div[@data-sku]")
        for sku in sku_docs:
            # if True:
            try:
                sku_doc = libxml2.htmlReadDoc("%s" % sku, None, "utf8", PARSE_OPTIONS)

                sku_id = int(sku_doc.xpathEval("//@data-sku")[0].content)
                # 判断是否是JD自营
                if sku_id > 99999999:
                    # 非自营商品
                    continue

                # print '%s' %sku

                sku_url = sku_doc.xpathEval('//div[@class="p-img"]/a/@href')[0].content
                try:
                    sku_thumnail_url = sku_doc.xpathEval('//div[@class="p-img"]/a/img/@data-lazy-img')[0].content
                except:
                    sku_thumnail_url = sku_doc.xpathEval('//div[@class="p-img"]/a/img/@src')[0].content

                sku_title = ""
                try:
                    sku_title = sku_doc.xpathEval('//div[@class="p-name"]/a/@title')[0].content
                except:
                    pass

                if len(sku_title) == 0:
                    sku_title = sku_doc.xpathEval('//div[@class="p-name"]/a/em')[0].content
                comment_count = int(sku_doc.xpathEval('//div[@class="p-commit"]/strong/a')[0].content)

                sku_icon_url = ""
                icon_doc = sku_doc.xpathEval('//div[@class="p-img"]/a/div/@style')
                if len(icon_doc) > 0:
                    sku_icon_url = url_utils.getStringBetween(icon_doc[0].content, 'url("', '")')

                is_global = is_free_gift = is_pay_on_delivery = 0
                price_items = sku_doc.xpathEval('//div[@class="p-price"]/div/i')
                for pitem in price_items:
                    txt = pitem.content
                    if "全球购" in txt:
                        is_global = 1
                    elif "货到付款" in txt:
                        is_pay_on_delivery = 1
                    elif "赠品" in txt:
                        is_free_gift = 1
                    else:
                        print "new-mark found:"
                        print txt

                sku_stock = -1
                try:
                    sku_stock = int(sku_doc.xpathEval("//div[@data-stock_v]/@data-stock_v")[0].content)
                except:
                    pass

                sku_url = __makeUrl__(sku_url)
                sku_thumnail_url = __makeUrl__(sku_thumnail_url)

                tp = (
                    sku_id,
                    nowdate,
                    nowtime,
                    sku_title,
                    sku_url,
                    sku_thumnail_url,
                    sku_stock,
                    comment_count,
                    is_global,
                    is_pay_on_delivery,
                    is_free_gift,
                    sku_icon_url,
                )
                product_list.append(tp)

            except Exception as e:
                logging.error("resolveProductListError: %s, error = %s") % (sku, e)
                continue
            finally:
                sku_doc.freeDoc()

        return product_list
    finally:
        doc.freeDoc()
Пример #15
0
 def __get_task_already_done__(self):
     if self.is_daily:
         sql = 'select task_id from task_status where job_name="%s" and update_time>="%s 0:00:00" group by task_id' %(self.job_name,timeHelper.getNow())
     else:
         stime = timeHelper.getTimeAheadOfNowHours(self.interval_hours,format='%Y-%m-%d %H:%M:%S')
         sql = 'select task_id from task_status where job_name="%s" and update_time>="%s" group by task_id' %(self.job_name,stime)
     print sql
     retrows = dbhelper.executeSqlRead(sql,is_dirty=True)
     catlist = []
     for row in retrows:
         catlist.append("%s" %row['task_id'])
     logging.info("Task already done: %s" %len(catlist))
     print("Task already done: %s" %len(catlist))
     return catlist
Пример #16
0
def crawl_category(category_id):

    logging.debug('category_id = %s -- page 1' % (category_id))
    url = __get_category_page_url__(category_id, 1)
    # print url
    html = url_utils.getWebResponse(url, 'utf-8')
    if html == "":
        html = url_utils.getWebResponse(url, 'gb18030')
    if html == "":
        html = url_utils.getWebResponse(url, 'gbk')
    total_pages = jd_list_resolver.resolveTotalPageNum(html)

    product_list = jd_list_resolver.resolveProductListFromPage(html)

    while len(product_list) == 0 and category_id is not None:
        category_id = __up_roll_category_id__(category_id)
        return crawl_category(category_id)

    if category_id is None or len(product_list) == 0:
        return {'status': -1, 'msg': 'No item in category product list'}

    for page_iter in range(2, total_pages + 1):
        logging.debug('category_id = %s -- page %s' % (category_id, page_iter))
        url = __get_category_page_url__(category_id, page_iter)
        html = url_utils.getWebResponse(url, 'utf-8')
        product_list = product_list + jd_list_resolver.resolveProductListFromPage(
            html)
        time.sleep(SLEEP_TIME)

    sku_list = []
    for product_tp in product_list:
        sku_id = product_tp[0]
        sku_list.append(sku_id)

    # Get price of all products
    #price_obj = jd_API.getPrices_JD(sku_list,sleep_time=SLEEP_PRICE_API)

    ret_obj = {'status': -1, 'affected_rows': -1, 'sku_count': -1}
    total_goods_num = len(product_list)

    # for item in product_list:
    #     print item[0]
    # print '='*80

    # combine product list and price list, timestamp, category_id
    for i in xrange(total_goods_num):
        product_id = product_list[i][0]
        pkey = '%s' % product_id
        # if pkey in price_obj:
        #     product_list[i] = product_list[i] + (price_obj[pkey][0],price_obj[pkey][1],price_obj[pkey][2],) #nowdate,nowtime,)
        # else:
        #     logging.error('Error: product_id=%s cannot get result' %(product_id,price_id))
        #     continue
        product_list[i] = product_list[i] + (
            0,
            0,
            0,
        )

    # persist in database
    # (sku_id,sku_title,sku_url,sku_thumnail_url,sku_stock,comment_count,is_global,is_pay_on_delivery,is_free_gift,sku_icon_url, price, price_m, update_date,update_time, category_id)
    # sql = '''
    #   replace into jd_item_dynamic (sku_id,title,url,thumbnail_url,stock_status,comment_count,is_global,is_pay_on_delivery,
    #   has_free_gift,icon_url,price,price_m,price_pcp,update_date,update_time) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
    #   '''
    # affected_rows = dbhelper.executeSqlWriteMany(sql,product_list)

    ret = crawler_helper.persist_db_history_and_latest(
        table_name='jd_item_dynamic',
        num_cols=len(product_list[0]),
        value_list=product_list,
        is_many=True,
        need_history=False,  # was True - changed 01/03
        need_flow=False,  # was True - changed 12/23
    )

    logging.debug('Saved to DB -- category_id = %s -- sku_count=%s' %
                  (category_id, total_goods_num))
    logging.debug('%s' % ret)

    # HANDLE JD_ITEM_CATEGORY
    item_cat_list = []
    for prod in product_list:
        item_cat_list.append((
            prod[0],
            category_id,
        ))
    sql2 = 'replace into jd_item_category values (%s,%s)'
    affected_rows2 = dbhelper.executeSqlWriteMany(sql2, item_cat_list)
    logging.debug('Saved to DB - item_category - affected rows = %s' %
                  affected_rows2)
    if affected_rows2 <= 0:
        logging.error('Saving to item_category error, category_id = %s' %
                      category_id)

    # HANDLE JD_ITEM_FIRSTSEEN
    nowtime = timeHelper.getNowLong()
    nowdate = timeHelper.getNow()
    sql3 = 'insert ignore into jd_item_firstseen values(%s,"%s","%s")'
    ftlist = []
    for item in product_list:
        ftlist.append([item[0], nowtime, nowdate])
    affected_rows3 = dbhelper.executeSqlWriteMany(sql3, ftlist)

    ret_obj = {
        'status': 0 if ret['status'] == 0 and affected_rows2 > 0 else -1,
        'item_dynamic': ret,
        'item_category': affected_rows2,
        'item_first_seen': affected_rows3,
    }

    return ret_obj