Exemplo n.º 1
0
def update_price():
    M = N = 1
    mylog.configLogging('price_task_%s_%s' %(M,N))

    data_task = Jd_Price_Task()
    data_task.configTask(is_daily=False,interval_hours=0,sleep_time=0.1, group_num=2000)
    data_task.doTaskOnce(M,N)
Exemplo n.º 2
0
# encoding: utf-8

import sys
import dbhelper
import time
from task_class import DataTask
import jd_api_crawler
import mylog
import category_helper

reload(sys)
sys.setdefaultencoding('utf8')

mylog.configLogging('promo_category_task')


class Jd_Promo_Category_DataTask(DataTask):

    # VIRTUAL
    def __load_all_tasks__(self):
        return category_helper.load_all_white_sub_categories()

    # VIRTUAL
    def __task_order__(self, task_id):
        return jd_api_crawler.crawl_category_promo(task_id)


#
# ==================================================================================
#
Exemplo n.º 3
0
# encoding: utf-8

import sys
import dbhelper
import time
from task_class import DataTask
import jd_detail_crawler
import mylog
import timeHelper

reload(sys)
sys.setdefaultencoding('utf8')

mylog.configLogging('property_task')


class Jd_Property_DataTask(DataTask):
    def __init__(self, job_name):
        self.job_name = job_name

    # VIRTUAL
    def __load_all_tasks__(self):
        daysago3 = timeHelper.getTimeAheadOfNowDays(3)
        sql = 'select distinct sku_id from jd_item_dynamic_latest where update_date >= "%s"' % daysago3
        retrows = dbhelper.executeSqlRead2(sql, is_dirty=True)
        sku_list = []
        for row in retrows:
            sku_list.append(row[0])
        return sku_list

    # VIRTUAL
# encoding: utf-8

import sys
import dbhelper
import time
from task_class import DataTask
import jd_api_crawler
import mylog
import timeHelper

reload(sys)
sys.setdefaultencoding('utf8')

mylog.configLogging('comment_stats_task')


class Jd_Comment_Stats_DataTask(DataTask):

    # VIRTUAL
    def __load_all_tasks__(self):
        daysago3 = timeHelper.getTimeAheadOfNowDays(3)
        sql = 'select distinct sku_id from jd_item_dynamic_latest where update_date >= "%s"' % daysago3
        retrows = dbhelper.executeSqlRead2(sql, is_dirty=True)
        sku_list = []
        for row in retrows:
            sku_list.append(row[0])
        return sku_list

    # VIRTUAL
    def __task_order__(self, task_id):
        return jd_api_crawler.crawl_sku_comment_count(task_id)
Exemplo n.º 5
0
# encoding: utf-8

import sys
import dbhelper
import time
from task_class import DataTask
import jd_api_crawler
import mylog
import timeHelper

reload(sys)
sys.setdefaultencoding('utf8')

mylog.configLogging('stock_task')


class Jd_Stock_DataTask(DataTask):

    # VIRTUAL
    def __load_all_tasks__(self):
        daysago3 = timeHelper.getTimeAheadOfNowDays(3)
        sql = 'select distinct sku_id from jd_item_dynamic_latest where update_date >= "%s"' % daysago3
        retrows = dbhelper.executeSqlRead2(sql, is_dirty=True)
        sku_list = []
        for row in retrows:
            sku_list.append(row[0])
        return sku_list

    # VIRTUAL
    def __task_order__(self, task_id):
        return jd_api_crawler.crawl_sku_stock_status(task_id)
        return sku_list

    # VIRTUAL
    def __task_order__(self,task_id):
        return jd_api_crawler.crawl_item_promo(task_id)

#
# ==================================================================================
#

if __name__ == "__main__":
    argv = sys.argv

    M = N = 1
    if len(argv) == 2:
        print 'Error in arguments'
    elif len(argv) == 3:
        try:
            M = int(argv[1])
            N = int(argv[2])
        except:
            print 'Error in arguments'

    mylog.configLogging('promo_item_task_%s_%s' %(M,N))

    data_task = Jd_Promo_item_DataTask()
    data_task.configTask(is_daily=False,interval_hours=4,sleep_time=0.08)
    data_task.doTask(M,N)


Exemplo n.º 7
0
    mylog.configLogging('price_task_%s_%s' %(M,N))

    data_task = Jd_Price_Task()
    data_task.configTask(is_daily=False,interval_hours=0,sleep_time=0.1, group_num=2000)
    data_task.doTaskOnce(M,N)

#
# ==================================================================================
#

if __name__ == "__main__":
    argv = sys.argv

    M = N = 1
    if len(argv) == 2:
        print 'Error in arguments'
    elif len(argv) == 3:
        try:
            M = int(argv[1])
            N = int(argv[2])
        except:
            print 'Error in arguments'

    mylog.configLogging('price_task_%s_%s' %(M,N))

    data_task = Jd_Price_Task()
    data_task.configTask(is_daily=False,interval_hours=0,sleep_time=0.1, group_num=2000)
    data_task.doTask(M,N)


Exemplo n.º 8
0
# encoding: utf-8

import sys
import dbhelper
import time
from task_class import DataTask
import jd_list_crawler
import mylog

reload(sys)
sys.setdefaultencoding('utf8')

mylog.configLogging('list_task')


class Jd_List_DataTask(DataTask):

    # VIRTUAL
    def __load_all_tasks__(self):
        cat_list = __load_white_categories___()
        sub_cat_list = []
        for cat_id in cat_list:
            sub_cat_list = sub_cat_list + __expand_to_sub_categories__(cat_id)
        task_list = __remove_duplicate_categories__(sub_cat_list)
        return task_list

    # VIRTUAL
    def __task_order__(self, task_id):
        return jd_list_crawler.crawl_category(task_id)

Exemplo n.º 9
0
                  affected_rows2)
    if affected_rows2 <= 0:
        logging.error('Saving to item_category error, category_id = %s' %
                      category_id)

    # HANDLE JD_ITEM_FIRSTSEEN
    nowtime = timeHelper.getNowLong()
    nowdate = timeHelper.getNow()
    sql3 = 'insert ignore into jd_item_firstseen values(%s,"%s","%s")'
    ftlist = []
    for item in product_list:
        ftlist.append([item[0], nowtime, nowdate])
    affected_rows3 = dbhelper.executeSqlWriteMany(sql3, ftlist)

    ret_obj = {
        'status': 0 if ret['status'] == 0 and affected_rows2 > 0 else -1,
        'item_dynamic': ret,
        'item_category': affected_rows2,
        'item_first_seen': affected_rows3,
    }

    return ret_obj


if __name__ == '__main__':
    import mylog
    mylog.configLogging('test_list_crawler')
    cat_id = '12218-12219'
    print crawl_category(cat_id)
    # print __up_roll_category_id__('652')