示例#1
0
def main():

    baseinit_ins = BaseInit()
    baseinit_ins.es_index_init()

    nginx_doc_url = get_config("common", "nginx_doc_url")
    timeout = int(get_config("common", "timeout"))
    retry = int(get_config("common", "retry"))
    interval = int(get_config("common", "interval"))

    logger.info("Start get url {}".format(nginx_doc_url))
    nginx_module_index_page_html = retry_call(get_request_text,
                                              fargs=[nginx_doc_url, timeout],
                                              tries=retry)

    nginxpage_ins = NginxPage(nginx_module_index_page_html)
    nginx_module_names_info = nginxpage_ins.get_module_names()

    # 存储变量和配置名称的相关数据,为搜索添加数据支撑
    keyword_info = []

    for item in nginx_module_names_info:
        module_name = item.split("/")[-1].split(".")[0]
        if module_name.startswith("ngx_") and "http_api" not in module_name:
            nginx_module_ins = NginxModule(item)
            nginx_module_ins.handle_module_direct_info()
            nginx_module_ins.handle_module_vars_info()
            nginx_module_ins.save_module_info_to_es()
            keyword_info.extend(nginx_module_ins.keyword_info)
            time.sleep(interval)

    baseinit_ins.es_ins.insert_mul_index_data("keyword", keyword_info)
示例#2
0
    def get_module_variable_location(self):

        # 获取variables指令的a标签,方便定位操作
        logger.info("获取variables指令的a标签,方便定位操作")
        nginx_module_variable_info = self.d("a").filter(
            lambda i, this: pq(this).attr("name") == "variables"
        )
        return nginx_module_variable_info
示例#3
0
 def delete_index_data_by_id(self, index_name, id, index_type="_doc"):
     """
     删除索引中的一条
     :param id:
     :return:
     """
     res = self.es.delete(index=index_name, doc_type=index_type, id=id)
     logger.info(res)
示例#4
0
    def create_index(self, index_name, map_body=None):

        if not self.es.indices.exists(index=index_name):
            if map_body:
                res = self.es.indices.create(index=index_name, body=map_body)
            else:
                res = self.es.indices.create(index=index_name)
            logger.info(res)
示例#5
0
    def get_module_item_location(self):

        # 获取每个菜单指令的a标签,方便定位操作

        logger.info("获取每个菜单指令的a标签,方便定位操作")
        nginx_module_item = self.d("a").filter(
            lambda i, this: pq(this).attr("name") is not None
        )
        return nginx_module_item
示例#6
0
 def insert_one_index_data(self, index_name, index_data, index_type="_doc"):
     """
     数据存储到es
     :return:
     """
     res = self.es.index(index=index_name,
                         doc_type=index_type,
                         body=index_data)
     logger.info(res)
示例#7
0
 def delete_index_data_by_query(self, index_name, query_body):
     """
     删除query_body查询出的所有内容
     :param index_name: index_name
     :param index_type: index_type
     :param query_body: es query
     :return:
     """
     res = self.es.delete_by_query(index=index_name, body=query_body)
     logger.info(res)
示例#8
0
    def get_module_names(self):

        # 获取nginx的所有模块
        logger.info("获取所有模块页面列表")
        nginx_module_locations = self.d("center").filter(
            lambda i, this: pq(this).children("h4").text() == "Modules reference"
        )
        return [
            item.attr("href")
            for item in nginx_module_locations.nextAll("ul").find("a").items()
        ]
示例#9
0
    def get_data_by_id(self, index_name, id, index_type="_doc"):

        res = self.es.get(index=index_name, doc_type=index_type, id=id)

        logger.info(res["_source"])

        # 输出查询到的结果
        for hit in res["hits"]["hits"]:
            # logger.info hit['_source']
            logger.info(
                hit["_source"]["date"],
                hit["_source"]["source"],
                hit["_source"]["link"],
                hit["_source"]["keyword"],
                hit["_source"]["title"],
            )
示例#10
0
 def insert_mul_index_data(self,
                           index_name,
                           mul_index_data,
                           index_type="_doc"):
     """
     用bulk将批量数据存储到es
     :return:
     """
     ACTIONS = []
     for line in mul_index_data:
         action = {
             "_index": index_name,
             "_type": index_type,
             "_source": line
         }
         ACTIONS.append(action)
     # 批量处理
     success, _ = bulk(self.es,
                       ACTIONS,
                       index=index_name,
                       raise_on_error=True)
     logger.info("Performed %d actions" % success)
示例#11
0
def get_request_text(url, timeout):
    r = requests.get(url, timeout=timeout)
    logger.info("get url: {} successfully...".format(url))
    return r.text
示例#12
0
    def __init__(self):
        self.keyword_map = {
            "mappings": {
                "properties": {
                    "keyword": {
                        "type": "text"
                    },
                    "module_name": {
                        "type": "text"
                    },
                }
            }
        }
        self.direct_retain_keyword = [
            "endpoints",
            "arguments",
            "summary",
            "issues",
            "example",
            "directives",
            "compatibility",
            "definitions",
            "protocol",
            "variables",
            "commands",
            "data",
            "compatibility",
            "properties",
        ]

        self.module_map = {
            "mappings": {
                "properties": {
                    "module_name": {
                        "type": "text"
                    },
                    "compatibility": {
                        "type": "text"
                    },
                    "properties": {
                        "type": "text"
                    },
                    "arguments": {
                        "type": "text"
                    },
                    "definitions": {
                        "type": "text"
                    },
                    "protocol": {
                        "type": "text"
                    },
                    "commands": {
                        "type": "text"
                    },
                    "data": {
                        "type": "text"
                    },
                    "summary": {
                        "type": "text"
                    },
                    "variables": {
                        "type": "object"
                    },
                    "issues": {
                        "type": "text"
                    },
                    "example": {
                        "type": "text"
                    },
                    "endpoints": {
                        "type": "nested"
                    },
                    "directives": {
                        "type": "text"
                    },
                    "directive_info": {
                        "type": "nested"
                    },
                }
            }
        }
        logger.info("初始化es实例")
        self.es_ins = ElasticSearch(
            ips=get_config("elasticsearch", "ips"),
            port=get_config("elasticsearch", "port"),
        )
示例#13
0
    def delete_index(self, index_name):

        # 删除索引
        if self.es.indices.exists(index=index_name):
            self.es.indices.delete(index=index_name)
            logger.info("删除索引{}成功".format(index_name))