Exemplo n.º 1
0
 def download_pic_url(self, img_url=None, file_name=None):
     #todo 去除url中的参数串
     try:
         if '?' in img_url:
             img_url = img_url[:img_url.index('?')]
         split_file_name = img_url.split('/')[-1]
         path_arr = img_url.split('/')[3:-1]
         if file_name is None:
             file_name = split_file_name
         else:
             file_name = str(file_name) + os.path.splitext(
                 split_file_name)[-1]
         if img_url.startswith('/'):
             img_url = img_url[1:]
         download_path = os.path.join(XKCCommonParam.script_download_path,
                                      *path_arr)
         if not os.path.exists(download_path):
             os.makedirs(download_path)
         r = requests.get(img_url, stream=True, timeout=60 * 5)
         abs_file_path = os.path.join(download_path, file_name)
         # print('保存图片:' + abs_file_path)
         with open(abs_file_path, 'wb') as f:
             f.write(r.content)
         return True
     except Exception as err:
         traceback.print_exc()
         Logger.log('img %s download err' % img_url, this_dir=self.log_path)
         return False
Exemplo n.º 2
0
 def download_trade_type_goods(self,
                               soup,
                               target_title,
                               activiy_code,
                               limit_page_num=None):
     Logger.log('开始下载 ' + target_title)
     trade_good = soup.select_one('.nav-tabs .nav-item:contains("%s")' %
                                  (target_title))
     activity = ActivtiyBO()
     activity.activity_code = activiy_code
     activity.name = target_title
     activity.url = self.base_url + trade_good.select_one('a')['href']
     activity_pid = ActivtiyDao.insert(activity)
     self.download_search_good_list(activity.url,
                                    activity_pid,
                                    activiy_code,
                                    limit_page_num=limit_page_num)
Exemplo n.º 3
0
 def download_pic_url(self, img_url=None, file_name=None):
     try:
         full_img_url = self.base_url + img_url
         split_file_name = img_url.split('/')[-1]
         if file_name is None:
             file_name = split_file_name
         else:
             file_name = file_name + os.path.splitext(split_file_name)[-1]
         if img_url.startswith('/'):
             img_url = img_url[1:]
         download_path = os.path.join(HNCommonParam.script_download_path,
                                      img_url.replace(split_file_name, ''))
         if not os.path.exists(download_path):
             os.makedirs(download_path)
         r = requests.get(full_img_url, stream=True, timeout=60 * 5)
         abs_file_path = os.path.join(download_path, file_name)
         # print('保存图片:' + abs_file_path)
         with open(abs_file_path, 'wb') as f:
             f.write(r.content)
         return True
     except Exception as err:
         Logger.log('img %s download err' % img_url)
         return False
Exemplo n.º 4
0
def rebate():
    script = HouNiaoScript()
    try:
        Logger.log('spider task start')
        script.download_goods()
        Logger.log('Spider task end')
    except BaseException as err:
        Logger.log('Spider task failed: ' +
                   time.strftime('%Y-%m-%d', time.localtime()))
        traceback.print_exc()
Exemplo n.º 5
0
def rebate():
    script = XXKuCunScript()
    try:
        Logger.log('spider task start', this_dir=script.log_path)
        script.download_goods()
        Logger.log('Spider task end', this_dir=script.log_path)
    except BaseException as err:
        Logger.log('Spider task failed: ' +
                   time.strftime('%Y-%m-%d', time.localtime()),
                   this_dir=script.log_path)
        traceback.print_exc()
Exemplo n.º 6
0
 def login(self, driver):
     if driver is None or ('home/User/login' not in driver.current_url
                           and 'data:,' != driver.current_url):
         return False
     try:
         driver.get(self.login_url)
         WebDriverWait(driver, 60 * 5, 3).until(
             EC.presence_of_all_elements_located(
                 (By.CSS_SELECTOR, '.userName')))
         time.sleep(1.6)
         driver.find_element_by_class_name('userName').send_keys(
             HNCommonParam.hn_username)
         time.sleep(1.6)
         driver.find_element_by_class_name('password').send_keys(
             HNCommonParam.hn_password)
         time.sleep(1.6)
         cnt = 0
         while driver.find_element_by_class_name(
                 'submit-form').is_displayed():
             if cnt > 10:
                 self.login(driver)
             cnt = cnt + 1
             driver.find_element_by_class_name('submit-form').click()
             time.sleep(1.8)
         if not driver.find_element_by_class_name(
                 'submit-form').is_displayed():
             WebDriverWait(driver, 60 * 1, 3).until(
                 EC.presence_of_all_elements_located(
                     (By.CSS_SELECTOR, '.verify-code')))
             verify_exp = driver.find_element_by_class_name(
                 'verify-code').text.replace('=', '').replace('?',
                                                              '').replace(
                                                                  '×', '*')
             verify_exp_result = eval(verify_exp)
             Logger.log('验证码为:%s, 计算结果为:%s' %
                        (verify_exp, verify_exp_result))
             driver.find_element_by_class_name(
                 'varify-input-code').send_keys(verify_exp_result)
             time.sleep(0.6)
             driver.find_element_by_class_name('cerify-code-button').click()
             WebDriverWait(driver, 60 * 2, 3).until(
                 EC.presence_of_all_elements_located(
                     (By.CSS_SELECTOR, '.personal')))
             Logger.log('登录成功!')
     except Exception as err:
         traceback.print_exc()
         Logger.log('登录失败,重新登录: %r ' % (err))
         self.login(driver)
     return True
Exemplo n.º 7
0
 def request_get_proxy(self, url, params=None):
     Logger.log('request get:' + url, this_dir=self.log_path)
     Logger.log(params)
     response = None
     retry_cnt = 1
     while True:
         response = requests.get(url=url,
                                 params=params,
                                 headers=self.get_now_header(),
                                 timeout=60 * 5,
                                 verify=False)
         if response.status_code == 200:
             break
         else:
             retry_cnt = retry_cnt + 1
             if retry_cnt > 10:
                 Logger.log('%s 重试超过10次,跳出' % (url), this_dir=self.log_path)
                 break
             Logger.log('%s请求失败:%d,重试第%d次' %
                        (url, response.status_code, retry_cnt))
             time.sleep(5)
     return response
Exemplo n.º 8
0
 def request_proxy(self, url, params=None, headers=None, cookies=None):
     Logger.log('request get:' + url)
     Logger.log(params)
     while True:
         response = requests.get(url=url,
                                 params=params,
                                 headers=headers,
                                 cookies=cookies,
                                 timeout=60 * 5,
                                 verify=False)
         self.download_file(response)
         if response.status_code == 200:
             break
         else:
             Logger.log('%s请求失败:%d,重试' % (url, response.status_code))
             time.sleep(5)
     return response
Exemplo n.º 9
0
 def request_get_proxy(self, url, params=None):
     Logger.log('request get:' + url)
     Logger.log(params)
     self.cookies_jar.set('productHuodongClose', 'yes')
     self.cookies_jar.set(self.cookie_ts_key, str(int(time.time())))
     while True:
         response = requests.get(url=url,
                                 params=params,
                                 cookies=self.cookies_jar,
                                 timeout=60 * 5,
                                 verify=False)
         self.download_file(response)
         if response.status_code == 200:
             break
         else:
             Logger.log('%s请求失败:%d,重试' % (url, response.status_code))
             time.sleep(5)
     return response
Exemplo n.º 10
0
    def download_goods(self):
        # App Banner页
        app_banner = 'https://www.houniao.hk/wxapi/index/carousel'
        app_banner_res = self.request_proxy(url=app_banner,
                                            headers=self.headers).json()
        if app_banner_res['code'] == 200:
            for data in app_banner_res['data']:
                ad_name = data['adName']
                ad_url = data['adURL']
                ad_img_file = "/" + data['adFile']
                activity = ActivtiyBO()
                activity.type = 'search'
                activity.activity_code = 'TOP'
                activity.name = ad_name
                activity.pic_url = self.base_url + ad_img_file
                activity_pid = ActivtiyDao.insert(activity)
                self.download_pic_url(img_url=ad_img_file,
                                      file_name='TOP#顶幅banner#' +
                                      str(activity_pid))
                # 下载详情数据
                self.download_search_app_good_list(keyword=ad_name,
                                                   activity_pid=activity_pid,
                                                   activity_code='TOP')

        # App 国际馆
        coutry_url = 'https://www.houniao.hk/wxapi/index/country'
        coutry_res = self.request_proxy(url=coutry_url,
                                        headers=self.headers).json()
        if coutry_res['code'] == 200:
            for data in coutry_res['data']:
                ad_name = data['adName']
                ad_url = data['adURL']
                ad_img_file = '/' + data['adFile']
                activity = ActivtiyBO()
                activity.activity_code = 'COUNTRY'
                activity.name = ad_name
                activity_pid = ActivtiyDao.insert(activity)
                self.download_pic_url(img_url=ad_img_file,
                                      file_name='COUNTRY#' + ad_name + "#" +
                                      str(activity_pid))
                for page in [1, 2]:
                    country_good_url = 'https://www.houniao.hk/wxapi/goods/selectGoods'
                    country_good_params = {
                        'originId': ad_url,
                        'page': page,
                        'limit': 8
                    }
                    country_good_res = self.request_proxy(
                        url=country_good_url,
                        params=country_good_params,
                        headers=self.headers).json()
                    if country_good_res['code'] == 200:
                        for good_list_item in country_good_res['data']['list']:
                            good_bo = GoodBo()
                            good_bo.activity_code = 'COUNTRY'
                            good_bo.activity_pid = activity_pid
                            good_bo.code = good_list_item['goodsSku']
                            good_id = good_list_item['goodsId']
                            good_bo.name = good_list_item['goodsName']
                            good_img = '/' + good_list_item['goodsImg']
                            if self.download_pic_url(good_img,
                                                     file_name=good_bo.code):
                                good_bo.pic_url = self.base_url + good_img
                            GoodDao.insert(good_bo)
        else:
            Logger.log('国际馆下载失败')
        self.init_cookies()
        res = self.request_get_proxy('http://www.houniao.hk/')
        soup = BeautifulSoup(res.text)
        soup.select_one('.nav-tabs .nav-item')

        self.download_trade_type_goods(soup, '保税直供', 'TRADETYPE', 1)
        self.download_trade_type_goods(soup, '完税进口', 'TRADETYPE', 1)
        self.download_trade_type_goods(soup, '国内贸易', 'TRADETYPE', 1)
        self.download_trade_type_goods(soup, '香港直邮', 'TRADETYPE', 1)
        self.download_trade_type_goods(soup, '海外直邮', 'TRADETYPE', 1)

        # 网红爆品+新品上市
        Logger.log('开始下载 网红爆品')
        hot_good = soup.select_one('.nav-tabs .nav-item:contains("网红爆品")')
        activity = ActivtiyBO()
        activity.activity_code = 'HOT'
        activity.name = '网红爆品'
        activity.url = self.base_url + hot_good.select_one('a')['href']
        activity_pid = ActivtiyDao.insert(activity)
        self.download_good_item_lists(activity.url, activity_pid, 'HOT')

        self.download_trade_type_goods(soup, '新品上市', 'NEW')
        # Logger.log('开始下载 新品上市')
        # new_good = soup.select_one('.nav-tabs .nav-item:contains("新品上市")')
        # activity = ActivtiyBO()
        # activity.activity_code = 'NEW'
        # activity.name = '新品上市'
        # activity.url = self.base_url+ new_good.select_one('a')['href']
        # activity_pid = ActivtiyDao.insert(activity)
        # self.download_search_good_list(activity.url, activity_pid, 'NEW')

        # pc顶幅, 废弃,使用APP跑马灯
        # Logger.log('开始下载 顶幅跑马灯')
        # banner_items = soup.select('.banner-slide .items li')
        # for banner_item in banner_items:
        #     activity = ActivtiyBO()
        #     href = banner_item.select_one('a')['href']
        #     activity.activity_code = 'TOP'
        #     activity.name = '顶幅banner'
        #     activity.url = href
        #     if banner_item['_src'] is not None:
        #         activity.pic_url = self.base_url + banner_item['_src'].replace('url(', '').replace(')', '')
        #     # type: # detail: 商品详情, search: 搜索结果页,other: 其他
        #     if 'product/detail' in href:
        #         # 商品详情链接:
        #         # if banner_item['style'] is not None:
        #         #     pass
        #         activity.type = 'detail'
        #         activity_pid = ActivtiyDao.insert(activity)
        #         self.download_good_detail(href, activity_pid=activity_pid, activity_code='TOP')
        #     elif 'product/search' in href:
        #         activity.type = 'search'
        #         activity_pid = ActivtiyDao.insert(activity)
        #         self.download_search_good_list(href, activity_pid=activity_pid, activity_code='TOP')
        #     else:
        #         Logger.log('unknow type:' + href)
        #         activity.type = 'other'
        #         activity_pid = ActivtiyDao.insert(activity)
        #     if not self.download_pic_url(banner_item['_src'].replace('url(', '').replace(')', ''), file_name='TOP#顶幅banner#' + str(activity_pid)):
        #         activity.pid = activity_pid
        #         activity.pic_url = None
        #         ActivtiyDao.update_room_detail(activity)

        # 卡片活动
        Logger.log('开始下载 卡片活动')
        floor_items = soup.select('.floor-items  .floor-item')
        for floor_item in floor_items:
            href = floor_item.select_one('a')['href']
            name = floor_item.select_one('p.name').text.strip()
            desc = floor_item.select_one('p.desc').text.strip()
            img_url = floor_item.select_one('img')['src']
            activity = ActivtiyBO()
            activity.activity_code = 'CARD'
            activity.desc = desc
            activity.name = '卡片活动-' + name
            activity.url = self.base_url + href
            activity.pic_url = self.base_url + img_url
            activity_pid = ActivtiyDao.insert(activity)
            if not self.download_pic_url(img_url,
                                         file_name='CARD#' + activity.name +
                                         '#' + str(activity_pid)):
                activity.pid = activity_pid
                activity.pic_url = None
                ActivtiyDao.update_room_detail(activity)
            self.download_card_good_lists(activity.url, activity_pid, 'CARD')
        # 抢购
        Logger.log('开始下载 抢购')
        activity = ActivtiyBO()
        activity.activity_code = 'COUNTDOWN'
        activity.name = '抢购'
        activity.url = self.base_url + soup.select_one(
            '#flashsale .navbox.active')['href']
        activity_pid = ActivtiyDao.insert(activity)
        self.download_countdown_good_lists(activity.url,
                                           activity_pid=activity_pid,
                                           activity_code='COUNTDOWN')

        # 爬取商品分类第一页
        Logger.log('开始爬取 商品分类')
        list_items = soup.select('.site-category .catlist li.list-item')
        for list_item in list_items:
            # :奶粉辅食
            item_name_1 = list_item.select_one(
                'a.item span.catname').text.strip()
            for list_item2 in list_item.select('div.sub-list dl.slblock'):
                # 婴儿奶粉
                item_name_2 = list_item2.select_one('dt.li-title').text.strip()
                for list_item3 in list_item2.select('dd.li-item'):
                    item_name_3 = list_item3.text.strip()
                    activity = ActivtiyBO()
                    activity.activity_code = 'CATEGORY'
                    activity.name = item_name_1 + '-' + item_name_2 + '-' + item_name_3
                    activity.url = 'http:' + list_item3.select_one('a')['href']
                    activity_pid = ActivtiyDao.insert(activity)
                    self.download_search_good_list(activity.url,
                                                   activity_pid,
                                                   'CATEGORY',
                                                   limit_page_num=1)
Exemplo n.º 11
0
scheduler = BlockingScheduler()


@scheduler.scheduled_job("cron",
                         day_of_week='*',
                         hour=HNCommonParam.job_hour,
                         minute=HNCommonParam.job_min,
                         second='00')
def rebate():
    script = HouNiaoScript()
    try:
        Logger.log('spider task start')
        script.download_goods()
        Logger.log('Spider task end')
    except BaseException as err:
        Logger.log('Spider task failed: ' +
                   time.strftime('%Y-%m-%d', time.localtime()))
        traceback.print_exc()


if __name__ == '__main__':
    try:
        Logger.log("statistic scheduler start")
        scheduler.start()
        # HouNiaoScript().download_goods()
        Logger.log("statistic scheduler start success")
    except (KeyboardInterrupt, SystemExit):
        scheduler.shutdown()
        Logger.log("statistic scheduler start-up fail")
Exemplo n.º 12
0
    def download_city_goods(self, city):
        city_id = city[1]
        city_name = city[2]
        Logger.log('开始下载城市:' + city_name, this_dir=self.log_path)

        banner_url = 'https://api.xxkucun.com/v1/product/banner/list?ver=1&city_id=' + str(
            city_id)
        banner_res = self.request_get_proxy(banner_url).json()

        # 下载区域
        Logger.log('开始下载区域, 条目基础数据', this_dir=self.log_path)
        district_url = 'https://api.xxkucun.com/v1/common/district/list?city_id=' + str(
            city_id)
        district_res = self.request_get_proxy(district_url).json()
        if 0 == district_res['err_code']:
            for district_data in district_res['data']:
                if DistrictDao.select_by_district_id(
                        district_data['ID']) is None:
                    district_bo = DistrictBO()
                    district_bo.city_id = city_id
                    district_bo.district_id = district_data['ID']
                    district_bo.district_name = district_data['Name']
                    DistrictDao.insert(district_bo)

        catetory_url = 'https://api.xxkucun.com/v1/product/category/list?num=8&city_id=' + str(
            city_id)
        catetory_res = self.request_get_proxy(catetory_url).json()
        if 0 == catetory_res['err_code']:
            for catetory_data in catetory_res['data']:
                db_category = CategoryDao.select_by_city_id_categoryid(
                    city_id, catetory_data['ID'])
                if db_category is None:
                    category_bo = CategoryBo()
                    category_bo.city_id = city_id
                    category_bo.level = 0
                    category_bo.category_id = catetory_data['ID']
                    category_bo.category_name = catetory_data['Name']
                    category_id = CategoryDao.insert(category_bo)
                else:
                    category_id = db_category.pid
                child_category_url = 'https://api.xxkucun.com/v1/product/catechild/list?category_id=%d&city_id=%d' % (
                    catetory_data['ID'], city_id)
                child_catetory_res = self.request_get_proxy(
                    child_category_url).json()
                if 0 == child_catetory_res['err_code']:
                    for catetory_data in child_catetory_res['data']:
                        db_category = CategoryDao.select_by_city_id_categoryid(
                            city_id, catetory_data['ID'])
                        if db_category is None:
                            category_bo = CategoryBo()
                            category_bo.city_id = city_id
                            category_bo.level = 1
                            category_bo.parent_category_pid = category_id
                            category_bo.category_id = catetory_data['ID']
                            category_bo.category_name = catetory_data['Name']
                            CategoryDao.insert(category_bo)

        # 条目下商品
        Logger.log('开始下载条目商品数据', this_dir=self.log_path)
        distircts = DistrictDao.select_by_city_id(city_id)
        categories = CategoryDao.select_by_city_id_level(city_id, 2)
        for district in distircts:
            for category in categories:
                page_index = 1
                while True:
                    category_district_search_url = 'https://api.xxkucun.com/v1/product/search/by/category?page_index=%d&district_id=%d&px=0&category_id=%d&type=0&lng=103.56358166666665&lat=33.00125&actiontype=&city_id=%d' % (
                        page_index, district.district_id, category.category_id,
                        city_id)
                    category_district_search_res = self.request_get_proxy(
                        category_district_search_url).json()
                    if 0 == category_district_search_res['err_code']:
                        if len(category_district_search_res['data']) == 0:
                            break
                        for rec_data in category_district_search_res['data']:
                            product_list_bo = ProductListBO()
                            product_list_bo.city_id = city_id
                            product_list_bo.district_id = district.district_id
                            product_list_bo.category_pid = category.pid
                            product_list_bo.product_id = rec_data['ID']
                            product_list_bo.name = rec_data['Name']
                            product_list_bo.img_url = rec_data['Img']
                            self.download_pic_url(rec_data['Img'],
                                                  rec_data['ID'])
                            product_list_bo.brand_name = rec_data['BrandName']
                            product_list_bo.price = rec_data['Price']
                            product_list_bo.market_price = rec_data[
                                'MarketPrice']
                            product_list_bo.sale_qty = rec_data['SaleQty']
                            product_list_bo.total_qty = rec_data['TotalQty']
                            product_list_bo.discount = rec_data['Discount']
                            product_list_bo.commission = rec_data['Commission']
                            product_list_bo.group_type = 'SEARCH'
                            product_list_bo.sale_status = rec_data.get(
                                'SaleStatus', None)
                            ProductListDao.insert(product_list_bo)
                            # self.down_good_detail(rec_data['ID'], city_id)
                    else:
                        break
                    page_index = page_index + 1
        Logger.log('开始下载BANNER数据', this_dir=self.log_path)
        banner_url = 'https://api.xxkucun.com/v1/product/banner/list?ver=1&city_id=' + str(
            city_id)
        banner_res = self.request_get_proxy(banner_url).json()
        if 0 == banner_res['err_code']:
            for banner_data in banner_res['data']:
                product_list_bo = ProductListBO()
                product_list_bo.city_id = city_id
                product_list_bo.product_id = banner_data['ProdID']
                product_list_bo.img_url = banner_data['Img']
                self.download_pic_url(banner_data['Img'])
                product_list_bo.group_type = banner_data['type']
                product_list_bo.sale_status = banner_data.get(
                    'SaleStatus', None)
                ProductListDao.insert(product_list_bo)
        Logger.log('开始下载今日推荐', this_dir=self.log_path)
        rec_url = 'https://api.xxkucun.com/v1/product/today/rec/list?lng=103.56358166666665&lat=33.00125&city_id=' + str(
            city_id)
        rec_res = self.request_get_proxy(rec_url).json()
        if 0 == rec_res['err_code']:
            for rec_data in rec_res['data']:
                product_list_bo = ProductListBO()
                product_list_bo.city_id = city_id
                product_list_bo.product_id = rec_data['ID']
                product_list_bo.name = rec_data['Name']
                product_list_bo.img_url = rec_data['Img']
                self.download_pic_url(rec_data['Img'], rec_data['ID'])
                product_list_bo.brand_name = rec_data['BrandName']
                product_list_bo.price = rec_data['Price']
                product_list_bo.market_price = rec_data['MarketPrice']
                product_list_bo.commission = rec_data['Commission']
                product_list_bo.pay_count = rec_data['PayCount']
                product_list_bo.group_type = 'REC'
                product_list_bo.sale_status = rec_data.get('SaleStatus', None)
                ProductListDao.insert(product_list_bo)
                self.down_good_detail(rec_data['ID'], city_id)

        Logger.log('开始下载即将下线', this_dir=self.log_path)
        down_url = 'https://api.xxkucun.com/v1/product/pro/GetDownLineList?user_id=null&lng=103.56358166666665&lat=33.00125&city_id=' + str(
            city_id)
        down_res = self.request_get_proxy(down_url).json()
        if 0 == down_res['err_code']:
            for rec_data in down_res['data']:
                product_list_bo = ProductListBO()
                product_list_bo.city_id = city_id
                product_list_bo.product_id = rec_data['ID']
                product_list_bo.name = rec_data['Name']
                product_list_bo.img_url = rec_data['Img']
                self.download_pic_url(rec_data['Img'], rec_data['ID'])
                product_list_bo.brand_name = rec_data['BrandName']
                product_list_bo.price = rec_data['Price']
                product_list_bo.market_price = rec_data['MarketPrice']
                product_list_bo.sale_qty = rec_data['SaleQty']
                product_list_bo.total_qty = rec_data['TotalQty']
                product_list_bo.discount = rec_data['Discount']
                product_list_bo.commission = rec_data['Commission']
                product_list_bo.group_type = 'DOWN'
                product_list_bo.sale_status = rec_data.get('SaleStatus', None)
                ProductListDao.insert(product_list_bo)
                self.down_good_detail(rec_data['ID'], city_id)

        Logger.log('开始下载爆款推荐', this_dir=self.log_path)
        hot_url = 'https://api.xxkucun.com/v1/product/hot/list?page_index=1&lng=103.56358166666665&lat=33.00125&city_id=' + str(
            city_id)
        hot_res = self.request_get_proxy(hot_url).json()
        if 0 == hot_res['err_code']:
            for rec_data in hot_res['data']:
                product_list_bo = ProductListBO()
                product_list_bo.city_id = city_id
                product_list_bo.product_id = rec_data['ID']
                product_list_bo.name = rec_data['Name']
                product_list_bo.img_url = rec_data['Img']
                self.download_pic_url(rec_data['Img'], rec_data['ID'])
                product_list_bo.brand_name = rec_data['BrandName']
                product_list_bo.price = rec_data['Price']
                product_list_bo.market_price = rec_data['MarketPrice']
                product_list_bo.sale_qty = rec_data['SaleQty']
                product_list_bo.total_qty = rec_data['TotalQty']
                product_list_bo.discount = rec_data['Discount']
                product_list_bo.commission = rec_data['Commission']
                product_list_bo.group_type = 'HOT'
                product_list_bo.sale_status = rec_data.get('SaleStatus', None)
                ProductListDao.insert(product_list_bo)
                self.down_good_detail(rec_data['ID'], city_id)

        Logger.log('开始下载专栏', this_dir=self.log_path)
        category_url = 'https://api.xxkucun.com/v1/product/GetListArrayByCategory?lng=33.00125&lat=103.56358166666665&city_id=' + str(
            city_id)
        category_res = self.request_get_proxy(category_url).json()
        if 0 == category_res['err_code']:
            for rec_data in category_res['data']:
                type_name = rec_data['Name']
                db_category = CategoryDao.select_by_city_id_categoryid(
                    city_id, rec_data['ID'])
                # category_bo = CategoryBo()
                # category_bo.category_name = type_name
                # category_bo.city_id = city_id
                # category_bo.level = 99
                # db_category = CategoryDao.select_by_city_id_level_name(city_id, 99, type_name)
                # if db_category is None:
                #     category_pid = CategoryDao.insert(category_bo)
                # else:
                #     category_pid = db_category.pid
                for item in rec_data['List']:
                    product_list_bo = ProductListBO()
                    product_list_bo.city_id = city_id
                    product_list_bo.product_id = item['ID']
                    product_list_bo.name = item['Name']
                    product_list_bo.img_url = item['Img']
                    self.download_pic_url(item['Img'], item['ID'])
                    product_list_bo.brand_name = item['BrandName']
                    product_list_bo.price = item['Price']
                    product_list_bo.market_price = item['MarketPrice']
                    product_list_bo.sale_qty = item['SaleQty']
                    product_list_bo.total_qty = item['TotalQty']
                    product_list_bo.discount = item['Discount']
                    product_list_bo.commission = item['Commission']
                    product_list_bo.group_type = 'CATEGORY_' + type_name
                    product_list_bo.category_pid = db_category.pid
                    product_list_bo.sale_status = rec_data.get(
                        'SaleStatus', None)
                    ProductListDao.insert(product_list_bo)
                    self.down_good_detail(item['ID'], city_id)
Exemplo n.º 13
0
@scheduler.scheduled_job("cron",
                         day_of_week='*',
                         hour=XKCCommonParam.job_hour,
                         minute=XKCCommonParam.job_min,
                         second='00')
def rebate():
    script = XXKuCunScript()
    try:
        Logger.log('spider task start', this_dir=script.log_path)
        script.download_goods()
        Logger.log('Spider task end', this_dir=script.log_path)
    except BaseException as err:
        Logger.log('Spider task failed: ' +
                   time.strftime('%Y-%m-%d', time.localtime()),
                   this_dir=script.log_path)
        traceback.print_exc()
    # Logger.log("statistic scheduler execute success" + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))


if __name__ == '__main__':
    try:
        Logger.log("statistic scheduler start",
                   this_dir=XXKuCunScript.log_path)
        DBUtil.param = XKCCommonParam
        # scheduler.start()
        XXKuCunScript().download_goods()
    except (KeyboardInterrupt, SystemExit):
        scheduler.shutdown()
        Logger.log("statistic scheduler start-up fail",
                   this_dir=XXKuCunScript.log_path)