def apache_param_analysis(): ''' 解析用户传进来的参数 :return: { 'report_type': 'ip', 'log_file_path': '/sf/log', 'server_ip': '200.200.1.35', } ''' parser = argparse.ArgumentParser() parser.description = 'apache-log-analysis' parser.add_argument('server_ip', help='apache server ip') parser.add_argument('log_file_path', help='apache log file path') parser.add_argument('report_type', help='report type:article,ip,full,all') args = parser.parse_args() # 检查ip的合法性 if check_ip(args.server_ip): logger.info("server_ip is true") else: logger.info("server_ip is false") exit("server_ip is false") apache_param = { "server_ip": args.server_ip, "log_file_path": args.log_file_path, "report_type": args.report_type } return apache_param #apache_param_analysis()
def web_insert(self, domainitem): count = 1 sql = "INSERT INTO Website_Information(domain_name, \ ip, area, registrar_domain_id, registrar_whois_server, registrar_url, updated_date, creation_date, \ registry_expiry_date, registrar, registrar_iana_id, registrar_abuse_contact_email, \ registrar_abuse_contact_phone) \ VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % \ (domainitem.domain_name, domainitem.ip, domainitem.area, domainitem.registrar_domain_id, domainitem.registrar_whois_server, domainitem.registrar_url, domainitem.updated_date, domainitem.creation_date, domainitem.registry_expiry_date, domainitem.registrar, domainitem.registrar_iana_id, domainitem.registrar_abuse_contact_email, domainitem.registrar_abuse_contact_phone) try: self.lock.acquire() self.db.ping(reconnect=True) cursor = self.db.cursor() cursor.execute(sql) self.db.commit() self.lock.release() cursor.close() except: self.db.rollback() count = 0 if count == 1: logger.info("插入新的网站信息:{}".format(domainitem)) else: logger.warning("已经存在的网站信息:{}".format(domainitem))
def wrapper(*args): try: start_time = time.time() ret = func(*args) end_time = time.time() use_time = str(round(end_time - start_time, 3)) logger.info(func.__name__ + "测试通过,用时:" + use_time) return ret except Exception as e: logger.info(func.__name__ + "测试失败,原因:" + str(e))
def update_score(self, ipItem): count = 1 sql = "UPDATE IP_Pool SET IP_Score = '%s' where IP_Address = '%s' " % \ (ipItem.score, ipItem.ip) try: self.lock.acquire() cursor = self.db.cursor() cursor.execute(sql) self.db.commit() self.lock.release() cursor.close() except: self.db.rollback() count = 0 if count == 1: logger.info("修改代理成功:{}".format(ipItem)) else: logger.warning("修改代理失败:{}".format(ipItem))
def _test_proxy(self): proxy = self.queue.get() try: proxy = check_proxy(proxy) if proxy.speed == -1: proxy.score -= 1 if proxy.score == 0: self.proxy_pool.delete(proxy) logger.info('删除代理:{}'.format(proxy)) else: self.proxy_pool.update_score(proxy) else: proxy.score = settings.MAX_SCORE self.proxy_pool.update_score(proxy) except Exception as ex: logger.exception(ex) self.queue.task_done()
def insert(self, ipItem): count = 1 sql = "INSERT INTO IP_Pool(IP_Address, \ IP_Port, IP_Protocol, IP_Nick, IP_Speed, IP_Area, IP_Score) \ VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s')" % \ (ipItem.ip, ipItem.port, ipItem.protocol, ipItem.nick_type, ipItem.speed, ipItem.area, ipItem.score) try: self.lock.acquire() self.db.ping(reconnect=True) cursor = self.db.cursor() cursor.execute(sql) self.db.commit() self.lock.release() cursor.close() except: self.db.rollback() count = 0 if count == 1: logger.info("插入新的代理:{}".format(ipItem)) else: logger.warning("已经存在的代理:{}".format(ipItem))
def get_html_title(self, url): """ 根据具体的url获取该页面的标题信息 :param url:http://200.200.1.35//coding/miniprj/material.html :return:title:训练素材 """ html = self.get_url_html(url) if html is None: return None soup = BeautifulSoup(html, 'html.parser') node = getattr(soup, 'title') try: title = node.string except Exception as ex: logger.info(" title to string err:%(err)s" % {'err': ex}) # 获取不到标题信息时,直接返回node信息 return node return title
def get_url_html(self, url): """ 根据具体的url获取该页面的html信息 :param url: :return:html:页面的html信息 """ logger.info("start get %(url)s html" % {'url': url}) response = None try: response = urllib2.urlopen(url, timeout=1) html = response.read() except Exception as ex: logger.info("start get %(url)s html err:%(err)s" % { 'url': url, 'err': ex }) # 出现异常,直接返回None,不中断程序 return None finally: if response: response.close() return html