async def async_visit_target(self, db, url, proxy, bullet, sem, session, scan=True): """ 异步请求协程,对单个代理IP数据进行异步验证 :param db:处理操作的数据库 :param url:目标网站url :param proxy:要验证对目标网址是否有用的代理IP,dict类型 :param bullet:单个代理ip对象的所有数据 :param sem:协程并发信号量 :param session:异步请求session :param scan:是否进行的是目标库扫描操作,False则表示进行的是初次入库验证 """ data = { 'ip': bullet['ip'], 'port': bullet['port'], 'anony_type': bullet['anony_type'], 'address': bullet['address'], 'createdTime': bullet['createdTime'], 'score': bullet['score'], 'test_count': int(bullet['test_count']) + 1, 'url': url, } db_name = gen_target_db_name(url) async with sem: ret = await send_async_http(session, 'head', url, retries=RETRIES, headers=headers, proxy=proxy['http'], timeout=TIMEOUT) t, code = ret['cost'], ret['code'] if code == 200: data['score'] = round( (bullet['score'] * bullet['test_count'] + round( (1 - t / 15) * 100, 2)) / data['test_count'], 2) data['total'] = round(data['score'] * data['test_count'], 2) data['resp_time'] = str(t) + 's' data['valid_time'] = time_to_date(int(time.time())) if scan: self.update(db, data, db_name) else: self.success(db, data, db_name) else: if scan: self.fail(db, data, db_name)
def clean_expired_targets(self): """ 清除过期目标库 """ if not self.db.connected: return now = datetime.datetime.now() expired_created_time = (now - datetime.timedelta(days=TARGET_EXPIRE)).isoformat() all_data = self.db.all(tname=TARGETS_DB) for tar in all_data: if tar['validTime'] < expired_created_time: db_name = gen_target_db_name(tar['url']) _std_count = self.db.handler[db_name].drop() self.db.delete({'url':tar['url']},tname=TARGETS_DB) logger.info('Deleted expired target website proxy collection:(%s)' % (db_name))
def save_targets(self): """ 保存当前config设置的targets信息到数据库 """ data = {} now = datetime.datetime.now() j = 0 for i in targets: inside_data = self.db.select({'url': i}, tname=TARGETS_DB) if inside_data: self.db.update({'url': i},{'validTime':now.isoformat()},tname=TARGETS_DB) continue data['url'] = i data['createdTime'] = now.isoformat() data['validTime'] = now.isoformat() data['db'] = gen_target_db_name(i) data['_id'] = str(j + random.randint(0,100000))+\ ascii_letters[random.randint(0,52)]+\ str(int(time.time()*1000)) self.db.save(data, tname=TARGETS_DB)
def load_target_db(self) -> dict: """ 加载所有待验证目标库中的所有数据 """ _targets = set() allowed_targets = [] _dict = {} if AGO: targets_inside = self.db.all(tname=TARGETS_DB) for i in targets_inside: url = i['url'] if url in self.targets: continue elif url: _targets.add(url) [allowed_targets.extend(i) for i in (self.targets,_targets)] for url in allowed_targets: _name = gen_target_db_name(url) _data = self.db.all(tname=_name) _dict[url] = _data logger.info('Loaded %d proxies from db: %s '%(len(_data),_name)) return _dict