Пример #1
0
    def process(self, params):
        C = self._class
        params['uniqueid'] = get_uniqueid(
            '%s:%s' % (params['wechat_id'], params['title']))

        # 加上默认值
        data = params.copy()
        for k, v in self.defaults.iteritems():
            data.setdefault(k, v)

        # 设置唯一键
        unique_value = ':'.join(['%s' % data[k] for k in self.unique_key])
        data['uniqueid'] = get_uniqueid(unique_value)
        data['update_time'] = str(datetime.now())
        # 清除数据
        data.pop('seed_id', None)
        data.pop('rule_id', None)
        data.pop('detail_multi', None)
        # 更新或插入数据库
        try:
            C.objects.update_or_create(uniqueid=data['uniqueid'],
                                       defaults=data)
        except Exception as e:
            logger.exception(e)
        finally:
            logger.debug(data['link'])
Пример #2
0
    def process(self, params, filters=None):
        # 加上默认值
        data = params.copy()
        for k, v in self.defaults.iteritems():
            data.setdefault(k, v)

        # 设置唯一键
        unique_value = ':'.join(['%s' % data[k] for k in self.unique_key])
        data['uniqueid'] = get_uniqueid(unique_value)
        data['update_time'] = str(datetime.now())
        # 清除数据
        data.pop('seed_id', None)
        data.pop('rule_id', None)
        data.pop('detail_multi', None)
        # 更新或插入数据库
        #print data
        try:
            # try update
            affected = self.update(data, {'uniqueid': data['uniqueid']})
            if affected == 0:
                # row not exists, try create
                data['create_time'] = str(datetime.now())
                self.create(data)
        except Exception as e:
            logger.exception(e)
        finally:
            logger.debug(data['url'])
Пример #3
0
 def check_detail_fresh_time(self, unique_url, data):
     fresh_time, rule_id = data["detail_fresh_time"], data["rule_id"]
     if fresh_time <= 0:
         return False
     else:
         key = 'unicrawler:detail_fresh_time:%s:%s' % (rule_id, get_uniqueid(unique_url))
         if self.redis.exists(key):
             return True
         else:
             self.redis.setex(key, fresh_time, fresh_time)
             return False
Пример #4
0
 def check_detail_fresh_time(self, unique_url, data):
     fresh_time, rule_id = data["detail_fresh_time"], data["rule_id"]
     if fresh_time <= 0:
         return False
     else:
         key = 'unicrawler:detail_fresh_time:%s:%s' % (
             rule_id, get_uniqueid(unique_url))
         if self.redis.exists(key):
             return True
         else:
             self.redis.setex(key, fresh_time, fresh_time)
             return False
Пример #5
0
 def check_detail_fresh_time(self, data):
     unique_key, fresh_time, rule_id = data['unique_key'], data["detail_fresh_time"], data["rule_id"]
     if fresh_time <= 0:
         return False
     else:
         unique_value = ''.join([data.get(item) for item in unique_key])
         key = 'unicrawler:detail_fresh_time:%s:%s' % (rule_id, get_uniqueid(unique_value))
         if self.redis.exists(key):
             return True
         else:
             self.redis.setex(key, fresh_time, fresh_time)
             return False
Пример #6
0
    def process(self, params):
        C = self._class
        params['uniqueid'] = get_uniqueid('%s:%s' % (params['wechat_id'], params['title']))

        # 加上默认值
        data = params.copy()
        for k, v in self.defaults.iteritems():
            data.setdefault(k, v)

        # 设置唯一键
        unique_value = ':'.join(['%s' % data[k] for k in self.unique_key])
        data['uniqueid'] = get_uniqueid(unique_value)
        data['update_time'] = str(datetime.now())
        # 清除数据
        data.pop('seed_id', None)
        data.pop('rule_id', None)
        data.pop('detail_multi', None)
        # 更新或插入数据库
        try:
            C.objects.update_or_create(uniqueid=data['uniqueid'], defaults=data)
        except Exception as e:
            logger.exception(e)
        finally:
            logger.debug(data['url'])