def process(self, params): C = self._class params['uniqueid'] = get_uniqueid( '%s:%s' % (params['wechat_id'], params['title'])) # 加上默认值 data = params.copy() for k, v in self.defaults.iteritems(): data.setdefault(k, v) # 设置唯一键 unique_value = ':'.join(['%s' % data[k] for k in self.unique_key]) data['uniqueid'] = get_uniqueid(unique_value) data['update_time'] = str(datetime.now()) # 清除数据 data.pop('seed_id', None) data.pop('rule_id', None) data.pop('detail_multi', None) # 更新或插入数据库 try: C.objects.update_or_create(uniqueid=data['uniqueid'], defaults=data) except Exception as e: logger.exception(e) finally: logger.debug(data['link'])
def process(self, params, filters=None): # 加上默认值 data = params.copy() for k, v in self.defaults.iteritems(): data.setdefault(k, v) # 设置唯一键 unique_value = ':'.join(['%s' % data[k] for k in self.unique_key]) data['uniqueid'] = get_uniqueid(unique_value) data['update_time'] = str(datetime.now()) # 清除数据 data.pop('seed_id', None) data.pop('rule_id', None) data.pop('detail_multi', None) # 更新或插入数据库 #print data try: # try update affected = self.update(data, {'uniqueid': data['uniqueid']}) if affected == 0: # row not exists, try create data['create_time'] = str(datetime.now()) self.create(data) except Exception as e: logger.exception(e) finally: logger.debug(data['url'])
def check_detail_fresh_time(self, unique_url, data): fresh_time, rule_id = data["detail_fresh_time"], data["rule_id"] if fresh_time <= 0: return False else: key = 'unicrawler:detail_fresh_time:%s:%s' % (rule_id, get_uniqueid(unique_url)) if self.redis.exists(key): return True else: self.redis.setex(key, fresh_time, fresh_time) return False
def check_detail_fresh_time(self, unique_url, data): fresh_time, rule_id = data["detail_fresh_time"], data["rule_id"] if fresh_time <= 0: return False else: key = 'unicrawler:detail_fresh_time:%s:%s' % ( rule_id, get_uniqueid(unique_url)) if self.redis.exists(key): return True else: self.redis.setex(key, fresh_time, fresh_time) return False
def check_detail_fresh_time(self, data): unique_key, fresh_time, rule_id = data['unique_key'], data["detail_fresh_time"], data["rule_id"] if fresh_time <= 0: return False else: unique_value = ''.join([data.get(item) for item in unique_key]) key = 'unicrawler:detail_fresh_time:%s:%s' % (rule_id, get_uniqueid(unique_value)) if self.redis.exists(key): return True else: self.redis.setex(key, fresh_time, fresh_time) return False
def process(self, params): C = self._class params['uniqueid'] = get_uniqueid('%s:%s' % (params['wechat_id'], params['title'])) # 加上默认值 data = params.copy() for k, v in self.defaults.iteritems(): data.setdefault(k, v) # 设置唯一键 unique_value = ':'.join(['%s' % data[k] for k in self.unique_key]) data['uniqueid'] = get_uniqueid(unique_value) data['update_time'] = str(datetime.now()) # 清除数据 data.pop('seed_id', None) data.pop('rule_id', None) data.pop('detail_multi', None) # 更新或插入数据库 try: C.objects.update_or_create(uniqueid=data['uniqueid'], defaults=data) except Exception as e: logger.exception(e) finally: logger.debug(data['url'])