def _distribute(ip): log.info("search start IP: {}".format(ip)) key = "rbl:search:domain" key2 = "rbl:search:domain:2" ret_key = "rbl:search:ret:{}".format(ip) hostname_key = "rbl:search:hostname" p = redis_db.pipeline() p.hincrby(ret_key, "score", 100) p.hset(ret_key, "created", time.time()) p.expire(ret_key, 3600) p.lpush(hostname_key, ip) for ref in rbl_settings.RBL_SEARCH_DOMAINS: tagid, domain, answers, desc = ref p.lpush( key, json.dumps({ "ip": ip, "tagid": tagid, "domain": domain, "answers": answers, "desc": desc, })) for ref in rbl_settings.RBL_SEARCH_DOMAINS2: tagid, domain, answers, desc = ref p.lpush( key2, json.dumps({ "ip": ip, "tagid": tagid, "domain": domain, "answers": answers, "desc": desc, })) p.execute()
def init_routine(): while True: if signal_stop: break try: log.info('init routine...') init() log.info('init routine finish...') except BaseException as e: log.error('init routine error...') gevent.sleep(7200)
def _search(d, dbl_err_key, j): customer_id, domain = j['customer_id'], j['domain'] log.info("start customer_id: {}, domain: {}".format(customer_id, domain)) url = "http://www.spamhaus.org/query/domain/{}".format(domain) https_url = "http://www.spamhaus.org/query/domain/{}".format(domain) ref_url = "http://www.spamhaus.org/query/domain/{}".format(domain) s = requests.session() proxies = next(sf_settings.CYCLE_PROXIES) log.info("domain: {}, proxies: {}".format(domain, proxies)) s.proxies = proxies s.headers.update(sf_settings.HEADERS) s.headers.update({'User-Agent': random.choice(sf_settings.UA)}) s.headers.update({'Referer': ref_url}) r = s.get(https_url) jschl_vc, passwd, jschl_answer = get_js_return(r.content) log.info("jschl_vc: {}, passwd: {}, jschl_answer: {}".format(jschl_vc, passwd, jschl_answer)) if jschl_answer is not None: payload = {'jschl_vc': jschl_vc, 'pass': passwd, 'jschl_answer': jschl_answer} time.sleep(4) s.get("https://www.spamhaus.org/cdn-cgi/l/chk_jschl?", params=payload) r = s.get(https_url) T = False content = r.content if content.find("is not listed in the DBL") > 0: T = True log.info("{} is not listed in the DBL".format(domain)) if content.find("is listed in the DBL") > 0: T = True log.error("{} is listed in the DBL".format(domain)) redis.lpush(dbl_err_key, d) if not T: raise Exception("query dbl with no response") return
def hostname(): key = "rbl:search:hostname" while True: if signal_stop: break _, ip = redis_db.brpop(key) try: ret = rblsearch.get_hostname(ip) ret_key = "rbl:search:ret:{}".format(ip) if not ret: redis_db.hincrby(ret_key, "score", -5) ret = "No Reverse DNS" redis_db.hset(ret_key, "domain", ret) log.info("search hostname: {}".format(ret)) except BaseException as e: redis_db.lpush(key, ip) log.error(traceback.format_exc())
def _search(d): j = json.loads(d) ip, tagid, domain, answers, desc = j["ip"], j["tagid"], j["domain"], j[ "answers"], j["desc"] log.info("search IP: {}, domain: {}".format(ip, domain)) with gevent.Timeout(5): ret, T = rblsearch.RDnsQuery(ip=ip, domain=domain, answers=answers) status, msg = rblsearch.show_ret(ip, ret, T, desc) log.info("search msg: {}, domain: {}".format(msg, domain)) filed = "domain:success:{}".format(tagid) key = "rbl:search:ret:{}".format(ip) p = redis_db.pipeline() p.hincrby(key, "searched", 1) if status: p.hincrby(key, "score", -10) filed = "domain:error:{}".format(tagid) p.hset(key, filed, msg) p.execute() inc = redis_db.hget(key, "searched") if int(inc) >= 28: created = float(redis_db.hget(key, "created")) redis_db.hset(key, "total", time.time() - created) log.info("search finish IP: {}".format(ip))
def signal_handle(mode): log.info("catch signal: %s" % mode) global signal_stop signal_stop = True
def save(): dbl_err_key = "domain:dbl:error" while True: if signal_stop: break _, d = redis.brpop(dbl_err_key) try: _save(json.loads(d) ) except BaseException as e: redis.lpush(dbl_err_key, d) log.error(traceback.format_exc()) ############################################################ # 信号量处理 def signal_handle(mode): log.info("catch signal: %s" % mode) global signal_stop signal_stop = True def main(): init_gevent_signal(signal_handle) gevent.joinall([ gevent.spawn(init_routine), gevent.spawn(search), gevent.spawn(save), ]) if __name__ == "__main__": log.info("program start...") main() log.info("program quit...")