Exemplo n.º 1
0
 def delete_project(self, project_id):
     """Delete the whole index"""
     es_index_name = self.get_index_name(project_id)
     if self.index_exists(es_index_name):
         try:
             self.es_client.indices.delete(index=es_index_name + "*")
             delete_template_response = requests.delete(
                 f"{self.host}/_index_template/{self.get_template_name(es_index_name)}",
                 headers={"Content-type": "application/json", "Accept": "text/plain"}
             ).__dict__
             delete_policy_response = requests.delete(
                 f"{self.host}/_ilm/policy/{self.get_policy_name(es_index_name)}",
                 headers={"Content-type": "application/json", "Accept": "text/plain"}
             ).__dict__
             logger.info("ES Url %s", utils.remove_credentials_from_url(self.host))
             self.log_response(
                 response=delete_template_response,
                 success_message=f"Deleted template for index {es_index_name}",
                 error_message=f"Error while deleting template for index {es_index_name}"
             )
             self.log_response(
                 response=delete_policy_response,
                 success_message=f"Deleted policy for index {es_index_name}",
                 error_message=f"Error while deleting policy for index {es_index_name}"
             )
             logger.debug("Deleted index %s", es_index_name)
             return 1
         except Exception as err:
             logger.error("Not found %s for deleting", es_index_name)
             logger.error("ES Url %s", utils.remove_credentials_from_url(self.host))
             logger.error(err)
             return 0
     return 0
Exemplo n.º 2
0
 def delete_index(self, index_name):
     """Delete the whole index"""
     try:
         self.es_client.indices.delete(index=str(index_name))
         logger.info("ES Url %s", utils.remove_credentials_from_url(self.host))
         logger.debug("Deleted index %s", str(index_name))
         return True
     except Exception as err:
         logger.error("Not found %s for deleting", str(index_name))
         logger.error("ES Url %s", utils.remove_credentials_from_url(self.host))
         logger.error(err)
         return False
Exemplo n.º 3
0
 def _bulk_index(self, bodies, host=None, es_client=None, refresh=True):
     if host is None:
         host = self.host
     if es_client is None:
         es_client = self.es_client
     if not bodies:
         return commons.launch_objects.BulkResponse(took=0, errors=False)
     logger.debug("Indexing %d logs...", len(bodies))
     try:
         try:
             success_count, errors = elasticsearch.helpers.bulk(es_client,
                                                                bodies,
                                                                chunk_size=1000,
                                                                request_timeout=30,
                                                                refresh=refresh)
         except Exception as err:
             logger.error(err)
             self.update_settings_after_read_only(host)
             success_count, errors = elasticsearch.helpers.bulk(es_client,
                                                                bodies,
                                                                chunk_size=1000,
                                                                request_timeout=30,
                                                                refresh=refresh)
         logger.debug("Processed %d logs", success_count)
         if errors:
             logger.debug("Occured errors %s", errors)
         return commons.launch_objects.BulkResponse(took=success_count, errors=len(errors) > 0)
     except Exception as err:
         logger.error("Error in bulk")
         logger.error("ES Url %s", utils.remove_credentials_from_url(host))
         logger.error(err)
         return commons.launch_objects.BulkResponse(took=0, errors=True)
Exemplo n.º 4
0
    def search_logs(self, search_req):
        """Get all logs similar to given logs"""
        similar_log_ids = set()
        logger.info("Started searching by request %s", search_req.json())
        logger.info("ES Url %s", utils.remove_credentials_from_url(self.host))
        t_start = time()
        if not self.index_exists(str(search_req.projectId)):
            return []
        searched_logs = set()
        for message in search_req.logMessages:
            if not message.strip():
                continue
            cleaned_message = self.clean_message(message)
            sanitized_msg = utils.leave_only_unique_lines(
                utils.sanitize_text(
                    utils.first_lines(cleaned_message, search_req.logLines)))

            msg_words = " ".join(utils.split_words(sanitized_msg))
            if msg_words in searched_logs:
                continue
            searched_logs.add(msg_words)
            query = self.build_search_query(search_req, sanitized_msg)
            res = self.es_client.search(index=str(search_req.projectId),
                                        body=query)
            similar_log_ids = similar_log_ids.union(
                self.find_similar_logs_by_cosine_similarity(
                    msg_words, message, res))

        logger.info(
            "Finished searching by request %s with %d results. It took %.2f sec.",
            search_req.json(), len(similar_log_ids),
            time() - t_start)
        return list(similar_log_ids)
Exemplo n.º 5
0
    def index_logs(self, launches):
        """Index launches to the index with project name"""
        logger.info("Indexing logs for %d launches", len(launches))
        logger.info("ES Url %s", utils.remove_credentials_from_url(self.host))
        t_start = time()
        bodies = []
        test_item_ids = []
        project = None
        for launch in launches:
            self.create_index_if_not_exists(str(launch.project))
            project = str(launch.project)

            for test_item in launch.testItems:
                logs_added = False
                for log in test_item.logs:

                    if log.logLevel < ERROR_LOGGING_LEVEL or not log.message.strip(
                    ):
                        continue

                    bodies.append(self._prepare_log(launch, test_item, log))
                    logs_added = True
                if logs_added:
                    test_item_ids.append(str(test_item.testItemId))
        result = self._bulk_index(bodies)
        self._merge_logs(test_item_ids, project)
        logger.info(
            "Finished indexing logs for %d launches. It took %.2f sec.",
            len(launches),
            time() - t_start)
        return result
Exemplo n.º 6
0
 def create_index(self, index_name):
     """Create index in elasticsearch"""
     logger.debug("Creating '%s' Elasticsearch index", str(index_name))
     logger.info("ES Url %s", utils.remove_credentials_from_url(self.host))
     try:
         response = self.es_client.indices.create(index=str(index_name), body={
             'settings': utils.read_json_file("", "index_settings.json", to_json=True),
             'mappings': utils.read_json_file("", "index_mapping_settings.json", to_json=True)
         })
         logger.debug("Created '%s' Elasticsearch index", str(index_name))
         return commons.launch_objects.Response(**response)
     except Exception as err:
         logger.error("Couldn't create index")
         logger.error("ES Url %s", utils.remove_credentials_from_url(self.host))
         logger.error(err)
         return commons.launch_objects.Response()
Exemplo n.º 7
0
    def delete_logs(self, clean_index):
        """Delete logs from elasticsearch"""
        logger.info("Delete logs %s for the project %s",
                    clean_index.ids, clean_index.project)
        logger.info("ES Url %s", utils.remove_credentials_from_url(self.host))
        t_start = time()
        if not self.index_exists(clean_index.project):
            return 0
        test_item_ids = set()
        try:
            search_query = self.build_search_test_item_ids_query(
                clean_index.ids)
            for res in elasticsearch.helpers.scan(self.es_client,
                                                  query=search_query,
                                                  index=clean_index.project,
                                                  scroll="5m"):
                test_item_ids.add(res["_source"]["test_item"])
        except Exception as err:
            logger.error("Couldn't find test items for logs")
            logger.error(err)

        bodies = []
        for _id in clean_index.ids:
            bodies.append({
                "_op_type": "delete",
                "_id":      _id,
                "_index":   clean_index.project,
            })
        result = self._bulk_index(bodies)
        self._merge_logs(list(test_item_ids), clean_index.project)
        logger.info("Finished deleting logs %s for the project %s. It took %.2f sec",
                    clean_index.ids, clean_index.project, time() - t_start)
        return result.took
Exemplo n.º 8
0
 def create_ampq_connection(amqpUrl):
     """Creates AMQP client"""
     amqp_full_url = amqpUrl.rstrip("\\").rstrip(
         "/") + "/analyzer?heartbeat=600"
     logger.info("Try connect to %s" %
                 utils.remove_credentials_from_url(amqp_full_url))
     return pika.BlockingConnection(
         pika.connection.URLParameters(amqp_full_url))
Exemplo n.º 9
0
 def create_index(self, index_name):
     """Create index in elasticsearch"""
     logger.debug("Creating '%s' Elasticsearch index", str(index_name))
     logger.info("ES Url %s", utils.remove_credentials_from_url(self.host))
     try:
         response = self.es_client.indices.create(
             index=str(index_name),
             body={
                 'settings': DEFAULT_INDEX_SETTINGS,
                 'mappings': DEFAULT_MAPPING_SETTINGS,
             })
         logger.debug("Created '%s' Elasticsearch index", str(index_name))
         return commons.launch_objects.Response(**response)
     except Exception as err:
         logger.error("Couldn't create index")
         logger.error("ES Url %s",
                      utils.remove_credentials_from_url(self.host))
         logger.error(err)
         return commons.launch_objects.Response()
Exemplo n.º 10
0
 def create_grafana_data_source(self, esHostGrafanaDatasource, index_name,
                                time_field):
     index_exists = False
     index_properties = utils.read_json_file("",
                                             "%s_mappings.json" %
                                             index_name,
                                             to_json=True)
     if not self.index_exists(index_name, print_error=False):
         response = self.create_index(index_name, index_properties)
         if len(response):
             index_exists = True
     else:
         index_exists = True
     if index_exists:
         self.delete_grafana_datasource_by_name(index_name)
         es_user, es_pass = utils.get_credentials_from_url(
             esHostGrafanaDatasource)
         try:
             requests.post("%s/api/datasources" % self.grafanaHost,
                           data=json.dumps({
                               "name":
                               index_name,
                               "type":
                               "elasticsearch",
                               "url":
                               utils.remove_credentials_from_url(
                                   esHostGrafanaDatasource),
                               "access":
                               "proxy",
                               "basicAuth":
                               len(es_user) > 0,
                               "basicAuthUser":
                               es_user,
                               "secureJsonData": {
                                   "basicAuthPassword": es_pass
                               },
                               "database":
                               index_name,
                               "jsonData": {
                                   "esVersion": 70,
                                   "maxConcurrentShardRequests": "1",
                                   "timeField": time_field
                               }
                           }),
                           headers={
                               "content-type": "application/json"
                           }).raise_for_status()
             return True
         except Exception as err:
             logger.error("Can't create grafana datasource")
             logger.error(err)
             return False
     return False
Exemplo n.º 11
0
 def send_request(url, method, username, password):
     """Send request with specified url and http method"""
     try:
         if username.strip() and password.strip():
             response = requests.get(
                 url, auth=(username, password)) if method == "GET" else {}
         else:
             response = requests.get(url) if method == "GET" else {}
         data = response._content.decode("utf-8")
         content = json.loads(data, strict=False)
         return content
     except Exception as err:
         logger.error("Error with loading url: %s",
                      utils.remove_credentials_from_url(url))
         logger.error(err)
     return []
Exemplo n.º 12
0
    def index_logs(self, launches):
        """Index launches to the index with project name"""
        cnt_launches = len(launches)
        logger.info("Indexing logs for %d launches", cnt_launches)
        logger.info("ES Url %s", utils.remove_credentials_from_url(self.host))
        t_start = time()
        bodies = []
        test_item_ids = []
        project = None
        test_item_queue = Queue()
        for launch in launches:
            project = str(launch.project)
            test_items = launch.testItems
            launch.testItems = []
            self.create_index_if_not_exists(str(launch.project))
            for test_item in test_items:
                test_item_queue.put((launch, test_item))
        del launches
        while not test_item_queue.empty():
            launch, test_item = test_item_queue.get()
            logs_added = False
            for log in test_item.logs:
                if log.logLevel < utils.ERROR_LOGGING_LEVEL or not log.message.strip():
                    continue

                bodies.append(self.log_preparation._prepare_log(launch, test_item, log))
                logs_added = True
            if logs_added:
                test_item_ids.append(str(test_item.testItemId))

        logs_with_exceptions = utils.extract_all_exceptions(bodies)
        result = self._bulk_index(bodies)
        result.logResults = logs_with_exceptions
        _, num_logs_with_defect_types = self._merge_logs(test_item_ids, project)
        try:
            if "amqpUrl" in self.app_config and self.app_config["amqpUrl"].strip():
                AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue(
                    self.app_config["exchangeName"], "train_models", json.dumps({
                        "model_type": "defect_type",
                        "project_id": project,
                        "num_logs_with_defect_types": num_logs_with_defect_types
                    }))
        except Exception as err:
            logger.error(err)
        logger.info("Finished indexing logs for %d launches. It took %.2f sec.",
                    cnt_launches, time() - t_start)
        return result
Exemplo n.º 13
0
 def create_index(self, index_name, index_properties):
     logger.debug("Creating '%s' Elasticsearch index", str(index_name))
     try:
         response = self.es_client.indices.create(
             index=str(index_name),
             body={
                 'settings': {
                     "number_of_shards": 1
                 },
                 'mappings': index_properties
             })
         logger.debug("Created '%s' Elasticsearch index", str(index_name))
         return response
     except Exception as err:
         logger.error("Couldn't create index")
         logger.error("ES Url %s",
                      utils.remove_credentials_from_url(self.esHost))
         logger.error(err)
         return {}
Exemplo n.º 14
0
 def _bulk_index(self, bodies, refresh=True, es_chunk_number=1000):
     if not bodies:
         return 0
     start_time = time()
     logger.debug("Indexing %d logs...", len(bodies))
     try:
         success_count, errors = elasticsearch.helpers.bulk(self.es_client,
                                                            bodies,
                                                            chunk_size=es_chunk_number,
                                                            request_timeout=30,
                                                            refresh=refresh)
         logger.debug("Processed %d logs", success_count)
         if errors:
             logger.debug("Occurred errors %s", errors)
         logger.debug("Finished indexing for %.2f s", time() - start_time)
         return success_count
     except Exception as err:
         logger.error("Error in bulk")
         logger.error("ES Url %s", utils.remove_credentials_from_url(self.host))
         logger.error(err)
         return 0
Exemplo n.º 15
0
    def suggest_items(self, test_item_info, num_items=5):
        logger.info("Started suggesting test items")
        logger.info("ES Url %s",
                    utils.remove_credentials_from_url(self.es_client.host))
        if not self.es_client.index_exists(str(test_item_info.project)):
            logger.info("Project %d doesn't exist", test_item_info.project)
            logger.info("Finished suggesting for test item with 0 results.")
            return []

        t_start = time()
        results = []
        unique_logs = utils.leave_only_unique_logs(test_item_info.logs)
        prepared_logs = [
            self.log_preparation._prepare_log_for_suggests(
                test_item_info, log) for log in unique_logs
            if log.logLevel >= utils.ERROR_LOGGING_LEVEL
        ]
        logs = LogMerger.decompose_logs_merged_and_without_duplicates(
            prepared_logs)
        searched_res = self.query_es_for_suggested_items(test_item_info, logs)

        boosting_config = self.get_config_for_boosting_suggests(
            test_item_info.analyzerConfig)
        boosting_config[
            "chosen_namespaces"] = self.namespace_finder.get_chosen_namespaces(
                test_item_info.project)

        _boosting_data_gatherer = SuggestBoostingFeaturizer(
            searched_res,
            boosting_config,
            feature_ids=self.suggest_decision_maker.get_feature_ids(),
            weighted_log_similarity_calculator=self.
            weighted_log_similarity_calculator)
        defect_type_model_to_use = self.choose_model(test_item_info.project,
                                                     "defect_type_model/")
        if defect_type_model_to_use is None:
            _boosting_data_gatherer.set_defect_type_model(
                self.global_defect_type_model)
        else:
            _boosting_data_gatherer.set_defect_type_model(
                defect_type_model_to_use)
        feature_data, test_item_ids = _boosting_data_gatherer.gather_features_info(
        )
        scores_by_test_items = _boosting_data_gatherer.scores_by_issue_type
        model_info_tags = _boosting_data_gatherer.get_used_model_info() +\
            self.suggest_decision_maker.get_model_info()

        if feature_data:
            predicted_labels, predicted_labels_probability = self.suggest_decision_maker.predict(
                feature_data)
            sorted_results = self.sort_results(scores_by_test_items,
                                               test_item_ids,
                                               predicted_labels_probability)

            logger.debug("Found %d results for test items ",
                         len(sorted_results))
            for idx, prob, _ in sorted_results:
                test_item_id = test_item_ids[idx]
                issue_type = scores_by_test_items[test_item_id]["mrHit"][
                    "_source"]["issue_type"]
                logger.debug(
                    "Test item id %d with issue type %s has probability %.2f",
                    test_item_id, issue_type, prob)

            global_idx = 0
            for idx, prob, _ in sorted_results[:num_items]:
                if prob >= self.suggest_threshold:
                    test_item_id = test_item_ids[idx]
                    issue_type = scores_by_test_items[test_item_id]["mrHit"][
                        "_source"]["issue_type"]
                    relevant_log_id = utils.extract_real_id(
                        scores_by_test_items[test_item_id]["mrHit"]["_id"])
                    test_item_log_id = utils.extract_real_id(
                        scores_by_test_items[test_item_id]["compared_log"]
                        ["_id"])
                    analysis_result = SuggestAnalysisResult(
                        testItem=test_item_info.testItemId,
                        testItemLogId=test_item_log_id,
                        issueType=issue_type,
                        relevantItem=test_item_id,
                        relevantLogId=relevant_log_id,
                        matchScore=round(prob * 100, 2),
                        esScore=round(
                            scores_by_test_items[test_item_id]["mrHit"]
                            ["_score"], 2),
                        esPosition=scores_by_test_items[test_item_id]["mrHit"]
                        ["es_pos"],
                        modelFeatureNames=";".join([
                            str(feature) for feature in
                            self.suggest_decision_maker.get_feature_ids()
                        ]),
                        modelFeatureValues=";".join(
                            [str(feature) for feature in feature_data[idx]]),
                        modelInfo=";".join(model_info_tags),
                        resultPosition=global_idx,
                        usedLogLines=test_item_info.analyzerConfig.
                        numberOfLogLines,
                        minShouldMatch=self.find_min_should_match_threshold(
                            test_item_info.analyzerConfig))
                    results.append(analysis_result)
                    logger.debug(analysis_result)
                global_idx += 1
        else:
            logger.debug("There are no results for test item %s",
                         test_item_info.testItemId)
        results_to_share = {
            test_item_info.launchId: {
                "not_found":
                int(len(results) == 0),
                "items_to_process":
                1,
                "processed_time":
                time() - t_start,
                "found_items":
                len(results),
                "launch_id":
                test_item_info.launchId,
                "launch_name":
                test_item_info.launchName,
                "project_id":
                test_item_info.project,
                "method":
                "suggest",
                "gather_date":
                datetime.now().strftime("%Y-%m-%d"),
                "gather_datetime":
                datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "number_of_log_lines":
                test_item_info.analyzerConfig.numberOfLogLines,
                "model_info":
                model_info_tags,
                "module_version": [self.app_config["appVersion"]],
                "min_should_match":
                self.find_min_should_match_threshold(
                    test_item_info.analyzerConfig)
            }
        }
        if "amqpUrl" in self.app_config and self.app_config["amqpUrl"].strip():
            AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue(
                self.app_config["exchangeName"], "stats_info",
                json.dumps(results_to_share))

        logger.debug("Stats info %s", results_to_share)
        logger.info("Processed the test item. It took %.2f sec.",
                    time() - t_start)
        logger.info("Finished suggesting for test item with %d results.",
                    len(results))
        return results
Exemplo n.º 16
0
    def analyze_logs(self, launches, timeout=300):
        global EARLY_FINISH
        cnt_launches = len(launches)
        logger.info("Started analysis for %d launches", cnt_launches)
        logger.info("ES Url %s",
                    utils.remove_credentials_from_url(self.es_client.host))
        self.queue = Queue()
        self.finished_queue = Queue()
        defect_type_model_to_use = {}
        es_query_thread = Thread(target=self._query_elasticsearch,
                                 args=(launches, ))
        es_query_thread.daemon = True
        es_query_thread.start()
        try:
            results = []
            t_start = time()
            del launches

            cnt_items_to_process = 0
            results_to_share = {}
            chosen_namespaces = {}
            while self.finished_queue.empty() or not self.queue.empty():
                if (timeout - (time() - t_start)
                    ) <= 5:  # check whether we are running out of time
                    EARLY_FINISH = True
                    break
                if self.queue.empty():
                    sleep(0.1)
                    continue
                else:
                    item_to_process = self.queue.get()
                analyzer_config, test_item_id, searched_res, time_processed = item_to_process
                launch_id = searched_res[0][0]["_source"]["launch_id"]
                launch_name = searched_res[0][0]["_source"]["launch_name"]
                project_id = searched_res[0][0]["_index"]
                if launch_id not in results_to_share:
                    results_to_share[launch_id] = {
                        "not_found":
                        0,
                        "items_to_process":
                        0,
                        "processed_time":
                        0,
                        "launch_id":
                        launch_id,
                        "launch_name":
                        launch_name,
                        "project_id":
                        project_id,
                        "method":
                        "auto_analysis",
                        "gather_date":
                        datetime.now().strftime("%Y-%m-%d"),
                        "gather_datetime":
                        datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        "number_of_log_lines":
                        analyzer_config.numberOfLogLines,
                        "min_should_match":
                        self.find_min_should_match_threshold(analyzer_config),
                        "model_info":
                        set(),
                        "module_version": [self.app_config["appVersion"]]
                    }

                t_start_item = time()
                cnt_items_to_process += 1
                results_to_share[launch_id]["items_to_process"] += 1
                results_to_share[launch_id]["processed_time"] += time_processed
                boosting_config = self.get_config_for_boosting(analyzer_config)
                if project_id not in chosen_namespaces:
                    chosen_namespaces[
                        project_id] = self.namespace_finder.get_chosen_namespaces(
                            project_id)
                boosting_config["chosen_namespaces"] = chosen_namespaces[
                    project_id]

                boosting_data_gatherer = boosting_featurizer.BoostingFeaturizer(
                    searched_res,
                    boosting_config,
                    feature_ids=self.boosting_decision_maker.get_feature_ids(),
                    weighted_log_similarity_calculator=self.
                    weighted_log_similarity_calculator)
                if project_id not in defect_type_model_to_use:
                    defect_type_model_to_use[project_id] = self.choose_model(
                        project_id, "defect_type_model/")
                if defect_type_model_to_use[project_id] is None:
                    boosting_data_gatherer.set_defect_type_model(
                        self.global_defect_type_model)
                else:
                    boosting_data_gatherer.set_defect_type_model(
                        defect_type_model_to_use[project_id])
                feature_data, issue_type_names = boosting_data_gatherer.gather_features_info(
                )
                model_info_tags = boosting_data_gatherer.get_used_model_info() +\
                    self.boosting_decision_maker.get_model_info()
                results_to_share[launch_id]["model_info"].update(
                    model_info_tags)

                if len(feature_data) > 0:

                    predicted_labels, predicted_labels_probability =\
                        self.boosting_decision_maker.predict(feature_data)

                    scores_by_issue_type = boosting_data_gatherer.scores_by_issue_type

                    for i in range(len(issue_type_names)):
                        logger.debug(
                            "Most relevant item with issue type %s has id %s",
                            issue_type_names[i],
                            boosting_data_gatherer.scores_by_issue_type[
                                issue_type_names[i]]["mrHit"]["_id"])
                        logger.debug(
                            "Issue type %s has label %d and probability %.3f for features %s",
                            issue_type_names[i], predicted_labels[i],
                            predicted_labels_probability[i][1],
                            feature_data[i])

                    predicted_issue_type = utils.choose_issue_type(
                        predicted_labels, predicted_labels_probability,
                        issue_type_names,
                        boosting_data_gatherer.scores_by_issue_type)

                    if predicted_issue_type:
                        chosen_type = scores_by_issue_type[
                            predicted_issue_type]
                        relevant_item = chosen_type["mrHit"]["_source"][
                            "test_item"]
                        analysis_result = AnalysisResult(
                            testItem=test_item_id,
                            issueType=predicted_issue_type,
                            relevantItem=relevant_item)
                        results.append(analysis_result)
                        logger.debug(analysis_result)
                    else:
                        results_to_share[launch_id]["not_found"] += 1
                        logger.debug("Test item %s has no relevant items",
                                     test_item_id)
                else:
                    results_to_share[launch_id]["not_found"] += 1
                    logger.debug("There are no results for test item %s",
                                 test_item_id)
                results_to_share[launch_id]["processed_time"] += (time() -
                                                                  t_start_item)
            if "amqpUrl" in self.app_config and self.app_config[
                    "amqpUrl"].strip():
                for launch_id in results_to_share:
                    results_to_share[launch_id]["model_info"] = list(
                        results_to_share[launch_id]["model_info"])
                AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue(
                    self.app_config["exchangeName"], "stats_info",
                    json.dumps(results_to_share))
        except Exception as err:
            logger.error(err)
        es_query_thread.join()
        EARLY_FINISH = False
        self.queue = Queue()
        self.finished_queue = Queue()
        logger.debug("Stats info %s", results_to_share)
        logger.info("Processed %d test items. It took %.2f sec.",
                    cnt_items_to_process,
                    time() - t_start)
        logger.info("Finished analysis for %d launches with %d results.",
                    cnt_launches, len(results))
        return results
Exemplo n.º 17
0
    def search_logs(self, search_req):
        """Get all logs similar to given logs"""
        similar_log_ids = set()
        logger.info("Started searching by request %s", search_req.json())
        logger.info("ES Url %s",
                    utils.remove_credentials_from_url(self.es_client.host))
        t_start = time()
        if not self.es_client.index_exists(str(search_req.projectId)):
            return []
        searched_logs = set()
        test_item_info = {}

        for message in search_req.logMessages:
            if not message.strip():
                continue

            queried_log = self.log_preparation._create_log_template()
            queried_log = self.log_preparation._fill_log_fields(
                queried_log, Log(logId=0, message=message),
                search_req.logLines)

            msg_words = " ".join(
                utils.split_words(queried_log["_source"]["message"]))
            if not msg_words.strip() or msg_words in searched_logs:
                continue
            searched_logs.add(msg_words)
            query = self.build_search_query(search_req,
                                            queried_log["_source"]["message"])
            res = self.es_client.es_client.search(index=str(
                search_req.projectId),
                                                  body=query)
            for es_res in res["hits"]["hits"]:
                test_item_info[es_res["_id"]] = es_res["_source"]["test_item"]

            _similarity_calculator = similarity_calculator.SimilarityCalculator(
                {
                    "max_query_terms": self.search_cfg["MaxQueryTerms"],
                    "min_word_length": self.search_cfg["MinWordLength"],
                    "min_should_match": "90%",
                    "number_of_log_lines": search_req.logLines
                },
                weighted_similarity_calculator=self.
                weighted_log_similarity_calculator)
            _similarity_calculator.find_similarity([(queried_log, res)],
                                                   ["message"])

            for group_id, similarity_obj in _similarity_calculator.similarity_dict[
                    "message"].items():
                log_id, _ = group_id
                similarity_percent = similarity_obj["similarity"]
                logger.debug(
                    "Log with id %s has %.3f similarity with the queried log '%s'",
                    log_id, similarity_percent,
                    queried_log["_source"]["message"])
                if similarity_percent >= self.search_cfg[
                        "SearchLogsMinSimilarity"]:
                    similar_log_ids.add((utils.extract_real_id(log_id),
                                         int(test_item_info[log_id])))

        logger.info(
            "Finished searching by request %s with %d results. It took %.2f sec.",
            search_req.json(), len(similar_log_ids),
            time() - t_start)
        return [
            SearchLogInfo(logId=log_info[0], testItemId=log_info[1])
            for log_info in similar_log_ids
        ]
Exemplo n.º 18
0
                _es_client.rp_model_remove_stats_index
        ]:
            date_field = "gather_date"
            if index == _es_client.rp_suggest_metrics_index:
                date_field = "savedDate"
            data_source_created.append(
                int(
                    _es_client.create_grafana_data_source(
                        APP_CONFIG["esHostGrafanaDataSource"], index,
                        date_field)))
        if sum(data_source_created) == len(data_source_created):
            for dashboard_id in ["X-WoMD5Mz", "7po7Ga1Gz", "OM3Zn8EMz"]:
                _es_client.import_dashboard(dashboard_id)
                logger.info("Imported dashboard '%s' into Grafana %s" %
                            (dashboard_id,
                             utils.remove_credentials_from_url(
                                 APP_CONFIG["grafanaHost"])))
            break
    except Exception as e:
        logger.error(e)
        logger.error(
            "Can't import dashboard into Grafana %s" %
            utils.remove_credentials_from_url(APP_CONFIG["grafanaHost"]))
        time.sleep(10)


@application.route('/', methods=['GET'])
def get_health_status():
    _es_client = es_client.EsClient(esHost=APP_CONFIG["esHost"],
                                    grafanaHost=APP_CONFIG["grafanaHost"],
                                    app_config=APP_CONFIG)
    _postgres_dao = postgres_dao.PostgresDAO(APP_CONFIG)
Exemplo n.º 19
0
    def analyze_logs(self, launches):
        logger.info("Started analysis for %d launches", len(launches))
        logger.info("ES Url %s", utils.remove_credentials_from_url(self.host))
        results = []

        t_start = time()
        es_results = self.get_bulk_search_results(launches)
        logger.debug("Searched ES for all test items for %.2f sec.",
                     time() - t_start)

        t = time()
        es_results_to_process, process_results = self.prepare_features_for_analysis(
            es_results)
        logger.debug("Prepared features for all test items for %.2f sec.",
                     time() - t)

        for idx, features_gathered in process_results:
            for i in range(len(features_gathered)):
                analyzer_config, test_item_id, searched_res = es_results_to_process[
                    idx][i]
                feature_data, issue_type_names, boosting_data_gatherer = features_gathered[
                    i]

                if feature_data:

                    predicted_labels, predicted_labels_probability =\
                        self.boosting_decision_maker.predict(feature_data)

                    for c in range(len(issue_type_names)):
                        logger.debug(
                            "Most relevant item with issue type %s has id %s",
                            issue_type_names[c],
                            boosting_data_gatherer.scores_by_issue_type[
                                issue_type_names[c]]["mrHit"]["_id"])
                        logger.debug(
                            "Most relevant item with issue type %s with info %s",
                            issue_type_names[c],
                            boosting_data_gatherer.scores_by_issue_type[
                                issue_type_names[c]]["mrHit"]["_source"])
                        logger.debug(
                            "Issue type %s has label %d and probability %.3f for features %s",
                            issue_type_names[c], predicted_labels[c],
                            predicted_labels_probability[c][1],
                            feature_data[c])

                    predicted_issue_type = self.choose_issue_type(
                        predicted_labels, predicted_labels_probability,
                        issue_type_names, boosting_data_gatherer)

                    if predicted_issue_type:
                        chosen_type =\
                            boosting_data_gatherer.scores_by_issue_type[predicted_issue_type]
                        relevant_item = chosen_type["mrHit"]["_source"][
                            "test_item"]
                        analysis_result = AnalysisResult(
                            testItem=test_item_id,
                            issueType=predicted_issue_type,
                            relevantItem=relevant_item)
                        results.append(analysis_result)
                        logger.debug(analysis_result)
                    else:
                        logger.debug("Test item %s has no relevant items",
                                     test_item_id)
                else:
                    logger.debug("There are no results for test item %s",
                                 test_item_id)
        logger.info("Processed %d test items. It took %.2f sec.",
                    len(es_results),
                    time() - t_start)
        logger.info("Finished analysis for %d launches with %d results.",
                    len(launches), len(results))
        return results