示例#1
0
 def brandHourList(self):
     # 查找需要每小时统计的列表
     # 得到需要的时间段
     val = (Common.add_hours(self.begin_time), Common.add_hours(self.begin_time, -1))
     print '# hour crawler time:',val
     
     # 商品默认信息列表
     all_item_num = 0
     hour_val_list = []
     act_items = {}
     item_results = self.mysqlAccess.selectJhsItemsHouralive(val)
     if item_results:
         for item in item_results:
             if act_items.has_key(str(item[0])):
                 act_items[str(item[0])]["items"].append(item[2:])
             else:
                 act_items[str(item[0])] = {'act_name':item[1],'items':[]}
                 act_items[str(item[0])]["items"].append(item[2:])
             all_item_num += 1
         for key in act_items.keys():
             hour_val_list.append((key,act_items[key]["act_name"],act_items[key]["items"]))
     else:
         print '# not find need hour items...'
         
     print '# hour all item nums:',all_item_num
     print '# hour all acts nums:',len(hour_val_list)
     # 清空每小时抓取redis队列
     self.item_queue.clearQ()
     # 保存每小时抓取redis队列
     self.item_queue.putlistQ(hour_val_list)
     print '# item queue end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
示例#2
0
    def antPage(self):
        try:
            # 主机器需要配置redis队列
            if self.m_type == 'm':
                category_list = self.mysqlAccess.selectJhsGroupItemCategory()
                if not category_list or len(category_list) == 0:
                    category_list = self.category_list
                if category_list and len(category_list) > 0:
                    cate_val_list = []
                    for cate in category_list:
                        cate_val_list.append((cate[0],cate[2],cate[1],Config.ju_home_today,Config.JHS_GroupItem))
                    # 清空category redis队列
                    self.cat_queue.clearQ()
                    # 保存category redis队列
                    self.cat_queue.putlistQ(cate_val_list)

                    # 清空act redis队列
                    self.act_queue.clearQ()
                    print '# category queue end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
                else:
                    print '# not find category...'

            # 类目的活动Json
            obj = 'cat'
            crawl_type = 'main'
            # 获取还没有开团的活动id
            val = (Common.time_s(Common.now()),)
            acts = self.mysqlAccess.selectJhsActNotStart(val)
            brandact_id_list = []
            if acts:
                for act in acts:
                    brandact_id_list.append(str(act[1]))
            _val = (self.begin_time, brandact_id_list)
            self.work.process(obj,crawl_type,_val)

            # 活动数据
            act_val_list = self.work.items
            print '# act nums:', len(act_val_list)

            # 保存到redis队列
            self.act_queue.putlistQ(act_val_list)
            print '# act queue end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))

            if self.m_type == 'm':
                val = (Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -1))
                # 删除Redis中上个小时结束的活动
                _acts = self.mysqlAccess.selectJhsActEndLastOneHour(val)
                print '# end acts num:',len(_acts)
                self.work.delAct(_acts)
                # 删除Redis中上个小时结束的商品
                _items = self.mysqlAccess.selectJhsItemEndLastOneHour(val)
                print '# end items num:',len(_items)
                self.work.delItem(_items)
        except Exception as e:
            print '# antpage error :',e
            Common.traceback_log()
示例#3
0
 def scanEndItemsLasthour(self):
     val = (Common.add_hours(self.crawling_time, -2),Common.add_hours(self.crawling_time, -2),Common.add_hours(self.crawling_time, -1))
     _items = self.mysqlAccess.selectJhsGroupItemEndLastOneHour(val)
     end_items = []
     # 遍历商品
     for _item in _items:
         item_juid = _item[0]
         end_items.append({"item_juId":str(item_juid)})
     print '# del item nums for last hour end:',len(end_items)
     # 删除已经结束的商品
     self.delItem(end_items)
示例#4
0
 def scanAliveItems(self):
     # 到结束时间后的一个小时
     val = (Common.time_s(self.crawling_time), Common.add_hours(self.crawling_time, -1))
     # 查找已经开团但是没有结束的商品
     _items = self.mysqlAccess.selectJhsGroupItemAlive(val)
     print "# hour all item nums:",len(_items)
     return _items
示例#5
0
 def brandDayList(self):
     # 查找需要每天统计的活动列表
     # 当前时刻减去24小时
     val = (Common.today_s()+" 00:00:00",Common.add_hours(self.begin_time, -24))
     print '# day crawler time:',val
     # 商品默认信息列表
     all_item_num = 0
     day_val_list = []
     act_items = {}
     item_results = self.mysqlAccess.selectJhsItemsDayalive(val)
     if item_results:
         for item in item_results:
             if act_items.has_key(str(item[0])):
                 act_items[str(item[0])]["items"].append(item[2:])
             else:
                 act_items[str(item[0])] = {'act_name':item[1],'items':[]}
                 act_items[str(item[0])]["items"].append(item[2:])
             all_item_num += 1
         for key in act_items.keys():
             day_val_list.append((key,act_items[key]["act_name"],act_items[key]["items"]))
     else:
         print '# not find need day items...'
         
     print '# day all item nums:',all_item_num
     print '# need update all acts nums:',len(day_val_list)
     # 清空每天抓取redis队列
     self.item_queue.clearQ()
     # 保存每天抓取redis队列
     self.item_queue.putlistQ(day_val_list)
     print '# item queue end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
示例#6
0
    def antPage(self):
        try:
            # 更新即将开团活动的商品信息
            # 主机器需要配置redis队列
            if self.m_type == 'm':
                # 一个小时即将开团
                val = (Common.time_s(self.begin_time),Common.add_hours(self.begin_time, self.min_hourslot))
                print '# update time:',val

                # 商品默认信息列表
                all_item_num = 0
                update_val_list = []
                act_items = {}
                item_results = self.mysqlAccess.selectJhsItemsForUpdate(val)
                if item_results:
                    for item in item_results:
                        if act_items.has_key(str(item[0])):
                            act_items[str(item[0])]["items"].append(item[2:]) 
                        else:
                            act_items[str(item[0])] = {'act_name':item[1],'items':[]}
                            act_items[str(item[0])]["items"].append(item[2:])
                        all_item_num += 1
                    for key in act_items.keys():
                        update_val_list.append((key,act_items[key]["act_name"],act_items[key]["items"]))
                else:
                    print '# not find need update items...'
                print '# need update all items nums:',all_item_num
                print '# need update all acts nums:',len(update_val_list)

                # 清空redis队列
                self.item_queue.clearQ()
                # 保存到redis队列
                self.item_queue.putlistQ(update_val_list)
                print '# item queue end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))

            """
            # 附加的信息
            a_val = (self.begin_time,)
            self.work.process(self._obj, self._crawl_type, a_val)
            """
            
        except Exception as e:
            print '# exception err in antPage info:',e
            Common.traceback_log()