def __init__(self, app_config={}, search_cfg={}): self.app_config = app_config self.search_cfg = search_cfg self.namespace_finder = namespace_finder.NamespaceFinder(app_config) self.model_training_triggering = { "defect_type": RetrainingDefectTypeTriggering(self.app_config) } self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg)
def __init__(self, app_config={}, search_cfg={}): self.app_config = app_config self.search_cfg = search_cfg self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg) self.log_preparation = LogPreparation() self.weighted_log_similarity_calculator = None if self.search_cfg["SimilarityWeightsFolder"].strip(): self.weighted_log_similarity_calculator = weighted_similarity_calculator.\ WeightedSimilarityCalculator(folder=self.search_cfg["SimilarityWeightsFolder"])
def __init__(self, app_config, search_cfg): self.app_config = app_config self.search_cfg = search_cfg self.label2inds = {"ab": 0, "pb": 1, "si": 2} self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg) self.baseline_model = defect_type_model.DefectTypeModel( folder=search_cfg["GlobalDefectTypeModelFolder"])
def __init__(self, app_config={}, search_cfg={}): self.app_config = app_config self.search_cfg = search_cfg self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg) self.log_preparation = LogPreparation() self.weighted_log_similarity_calculator = None self.global_defect_type_model = None self.namespace_finder = namespace_finder.NamespaceFinder(app_config) self.object_saver = ObjectSaver(self.app_config) self.initialize_common_models()
class DeleteIndexService: def __init__(self, app_config={}, search_cfg={}): self.app_config = app_config self.search_cfg = search_cfg self.namespace_finder = namespace_finder.NamespaceFinder(app_config) self.model_training_triggering = { "defect_type": RetrainingDefectTypeTriggering(self.app_config) } self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg) @utils.ignore_warnings def delete_index(self, index_name): logger.info("Started deleting index") t_start = time() is_index_deleted = self.es_client.delete_index(index_name) self.namespace_finder.remove_namespaces(index_name) for model_type in self.model_training_triggering: self.model_training_triggering[model_type].remove_triggering_info( {"project_id": index_name}) logger.info("Finished deleting index %.2f s", time() - t_start) return int(is_index_deleted)
class SearchService: def __init__(self, app_config={}, search_cfg={}): self.app_config = app_config self.search_cfg = search_cfg self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg) self.log_preparation = LogPreparation() self.weighted_log_similarity_calculator = None if self.search_cfg["SimilarityWeightsFolder"].strip(): self.weighted_log_similarity_calculator = weighted_similarity_calculator.\ WeightedSimilarityCalculator(folder=self.search_cfg["SimilarityWeightsFolder"]) def build_search_query(self, search_req, message): """Build search query""" return { "_source": ["message", "test_item", "detected_message", "stacktrace"], "size": 10000, "query": { "bool": { "filter": [ { "range": { "log_level": { "gte": utils.ERROR_LOGGING_LEVEL } } }, { "exists": { "field": "issue_type" } }, { "term": { "is_merged": False } }, ], "must_not": { "term": { "test_item": { "value": search_req.itemId, "boost": 1.0 } } }, "must": [{ "bool": { "should": [ { "wildcard": { "issue_type": "TI*" } }, { "wildcard": { "issue_type": "ti*" } }, ] } }, { "terms": { "launch_id": search_req.filteredLaunchIds } }, { "more_like_this": { "fields": ["message"], "like": message, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<90%", "max_query_terms": self.search_cfg["MaxQueryTerms"], "boost": 1.0 } }], "should": [ { "term": { "is_auto_analyzed": { "value": "false", "boost": 1.0 } } }, ] } } } def search_logs(self, search_req): """Get all logs similar to given logs""" similar_log_ids = set() logger.info("Started searching by request %s", search_req.json()) logger.info("ES Url %s", utils.remove_credentials_from_url(self.es_client.host)) t_start = time() if not self.es_client.index_exists(str(search_req.projectId)): return [] searched_logs = set() test_item_info = {} for message in search_req.logMessages: if not message.strip(): continue queried_log = self.log_preparation._create_log_template() queried_log = self.log_preparation._fill_log_fields( queried_log, Log(logId=0, message=message), search_req.logLines) msg_words = " ".join( utils.split_words(queried_log["_source"]["message"])) if not msg_words.strip() or msg_words in searched_logs: continue searched_logs.add(msg_words) query = self.build_search_query(search_req, queried_log["_source"]["message"]) res = self.es_client.es_client.search(index=str( search_req.projectId), body=query) for es_res in res["hits"]["hits"]: test_item_info[es_res["_id"]] = es_res["_source"]["test_item"] _similarity_calculator = similarity_calculator.SimilarityCalculator( { "max_query_terms": self.search_cfg["MaxQueryTerms"], "min_word_length": self.search_cfg["MinWordLength"], "min_should_match": "90%", "number_of_log_lines": search_req.logLines }, weighted_similarity_calculator=self. weighted_log_similarity_calculator) _similarity_calculator.find_similarity([(queried_log, res)], ["message"]) for group_id, similarity_obj in _similarity_calculator.similarity_dict[ "message"].items(): log_id, _ = group_id similarity_percent = similarity_obj["similarity"] logger.debug( "Log with id %s has %.3f similarity with the queried log '%s'", log_id, similarity_percent, queried_log["_source"]["message"]) if similarity_percent >= self.search_cfg[ "SearchLogsMinSimilarity"]: similar_log_ids.add((utils.extract_real_id(log_id), int(test_item_info[log_id]))) logger.info( "Finished searching by request %s with %d results. It took %.2f sec.", search_req.json(), len(similar_log_ids), time() - t_start) return [ SearchLogInfo(logId=log_info[0], testItemId=log_info[1]) for log_info in similar_log_ids ]
def create_es_client(): """Creates Elasticsearch client""" _es_client = EsClient(APP_CONFIG["esHost"], SEARCH_CONFIG) decision_maker = boosting_decision_maker.BoostingDecisionMaker(APP_CONFIG["boostModelFolder"]) _es_client.set_boosting_decision_maker(decision_maker) return _es_client
def __init__(self, app_config={}, search_cfg={}): self.app_config = app_config self.search_cfg = search_cfg self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg) self.log_preparation = LogPreparation()
class ClusterService: def __init__(self, app_config={}, search_cfg={}): self.app_config = app_config self.search_cfg = search_cfg self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg) self.log_preparation = LogPreparation() def build_search_similar_items_query(self, launch_id, test_item, message): """Build search query""" return { "_source": [ "whole_message", "test_item", "detected_message", "stacktrace", "launch_id", "cluster_id" ], "size": 10000, "query": { "bool": { "filter": [ { "range": { "log_level": { "gte": utils.ERROR_LOGGING_LEVEL } } }, { "exists": { "field": "issue_type" } }, { "term": { "is_merged": False } }, ], "must_not": { "term": { "test_item": { "value": test_item, "boost": 1.0 } } }, "must": [{ "term": { "launch_id": launch_id } }, { "more_like_this": { "fields": ["whole_message"], "like": message, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<98%", "max_query_terms": self.search_cfg["MaxQueryTerms"], "boost": 1.0 } }] } } } def find_similar_items_from_es(self, groups, log_dict, log_messages, log_ids, number_of_lines): new_clusters = {} _clusterizer = clusterizer.Clusterizer() for global_group in groups: first_item_ind = groups[global_group][0] query = self.build_search_similar_items_query( log_dict[first_item_ind]["_source"]["launch_id"], log_dict[first_item_ind]["_source"]["test_item"], log_messages[first_item_ind]) search_results = self.es_client.es_client.search( index=log_dict[first_item_ind]["_index"], body=query) log_messages_part = [log_messages[first_item_ind]] log_dict_part = {0: log_dict[first_item_ind]} ind = 1 for res in search_results["hits"]["hits"]: if int(res["_id"]) in log_ids: continue log_dict_part[ind] = res log_message = utils.prepare_message_for_clustering( res["_source"]["whole_message"], number_of_lines) if not log_message.strip(): continue log_messages_part.append(log_message) ind += 1 groups_part = _clusterizer.find_clusters(log_messages_part) new_group = [] for group in groups_part: if 0 in groups_part[group] and len(groups_part[group]) > 1: cluster_id = "" for ind in groups_part[group]: if log_dict_part[ind]["_source"]["cluster_id"].strip(): cluster_id = log_dict_part[ind]["_source"][ "cluster_id"].strip() if not cluster_id.strip(): cluster_id = str(uuid.uuid1()) for ind in groups_part[group]: if ind == 0: continue log_ids.add(int(log_dict_part[ind]["_id"])) new_group.append( ClusterResult(logId=log_dict_part[ind]["_id"], testItemId=log_dict_part[ind] ["_source"]["test_item"], project=log_dict_part[ind]["_index"], launchId=log_dict_part[ind] ["_source"]["launch_id"], clusterId=cluster_id)) break new_clusters[global_group] = new_group return new_clusters def gather_cluster_results(self, groups, additional_results, log_dict): results_to_return = [] cluster_num = 0 for group in groups: cnt_items = len(groups[group]) cluster_id = "" if group in additional_results: cnt_items += len(additional_results[group]) for item in additional_results[group]: cluster_id = item.clusterId break if cnt_items > 1: cluster_num += 1 if not cluster_id: cluster_id = str(uuid.uuid1()) for ind in groups[group]: results_to_return.append( ClusterResult( logId=log_dict[ind]["_id"], testItemId=log_dict[ind]["_source"]["test_item"], project=log_dict[ind]["_index"], launchId=log_dict[ind]["_source"]["launch_id"], clusterId=cluster_id)) if cnt_items > 1 and group in additional_results: results_to_return.extend(additional_results[group]) return results_to_return, cluster_num @utils.ignore_warnings def find_clusters(self, launch_info): logger.info("Started clusterizing logs") if not self.es_client.index_exists(str(launch_info.launch.project)): logger.info("Project %d doesn't exist", launch_info.launch.project) logger.info("Finished clustering log with 0 clusters.") return [] t_start = time() _clusterizer = clusterizer.Clusterizer() log_messages, log_dict = self.log_preparation.prepare_logs_for_clustering( launch_info.launch, launch_info.numberOfLogLines) log_ids = set([int(log["_id"]) for log in log_dict.values()]) groups = _clusterizer.find_clusters(log_messages) additional_results = {} if launch_info.for_update: additional_results = self.find_similar_items_from_es( groups, log_dict, log_messages, log_ids, launch_info.numberOfLogLines) results_to_return, cluster_num = self.gather_cluster_results( groups, additional_results, log_dict) if results_to_return: bodies = [] for result in results_to_return: bodies.append({ "_op_type": "update", "_id": result.logId, "_index": result.project, "doc": { "cluster_id": result.clusterId } }) self.es_client._bulk_index(bodies) results_to_share = { launch_info.launch.launchId: { "not_found": int(cluster_num == 0), "items_to_process": len(log_ids), "processed_time": time() - t_start, "found_clusters": cluster_num, "launch_id": launch_info.launch.launchId, "launch_name": launch_info.launch.launchName, "project_id": launch_info.launch.project, "method": "find_clusters", "gather_date": datetime.now().strftime("%Y-%m-%d"), "gather_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "module_version": [self.app_config["appVersion"]], "model_info": [] } } if "amqpUrl" in self.app_config and self.app_config["amqpUrl"].strip(): AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue( self.app_config["exchangeName"], "stats_info", json.dumps(results_to_share)) logger.debug("Stats info %s", results_to_share) logger.info("Processed the launch. It took %.2f sec.", time() - t_start) logger.info("Finished clustering for the launch with %d clusters.", cluster_num) return results_to_return
def create_es_client(): """Creates Elasticsearch client""" return EsClient(APP_CONFIG, SEARCH_CONFIG)
def init_amqp(_amqp_client): """Initialize rabbitmq queues, exchange and stars threads for queue messages processing""" with _amqp_client.connection.channel() as channel: try: declare_exchange(channel, APP_CONFIG) except Exception as err: logger.error("Failed to declare amqp objects") logger.error(err) return threads = [] es_client = EsClient(APP_CONFIG, SEARCH_CONFIG) if APP_CONFIG["instanceTaskType"] == "train": threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, (APP_CONFIG["exchangeName"], "train_models", True, False, lambda channel, method, props, body: amqp_handler.handle_inner_amqp_request(channel, method, props, body, RetrainingService( APP_CONFIG, SEARCH_CONFIG).train_models)))) else: threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, (APP_CONFIG["exchangeName"], "index", True, False, lambda channel, method, props, body: amqp_handler.handle_amqp_request(channel, method, props, body, es_client.index_logs, prepare_response_data=amqp_handler. prepare_index_response_data)))) threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, (APP_CONFIG["exchangeName"], "analyze", True, False, lambda channel, method, props, body: amqp_handler.handle_amqp_request(channel, method, props, body, AutoAnalyzerService( APP_CONFIG, SEARCH_CONFIG).analyze_logs, prepare_response_data=amqp_handler. prepare_analyze_response_data)))) threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, (APP_CONFIG["exchangeName"], "delete", True, False, lambda channel, method, props, body: amqp_handler.handle_amqp_request(channel, method, props, body, DeleteIndexService( APP_CONFIG, SEARCH_CONFIG).delete_index, prepare_data_func=amqp_handler. prepare_delete_index, prepare_response_data=amqp_handler. output_result)))) threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, (APP_CONFIG["exchangeName"], "clean", True, False, lambda channel, method, props, body: amqp_handler.handle_amqp_request(channel, method, props, body, es_client.delete_logs, prepare_data_func=amqp_handler. prepare_clean_index, prepare_response_data=amqp_handler. output_result)))) threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, (APP_CONFIG["exchangeName"], "search", True, False, lambda channel, method, props, body: amqp_handler.handle_amqp_request(channel, method, props, body, SearchService(APP_CONFIG, SEARCH_CONFIG).search_logs, prepare_data_func=amqp_handler. prepare_search_logs, prepare_response_data=amqp_handler. prepare_analyze_response_data)))) threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, (APP_CONFIG["exchangeName"], "suggest", True, False, lambda channel, method, props, body: amqp_handler.handle_amqp_request(channel, method, props, body, SuggestService( APP_CONFIG, SEARCH_CONFIG).suggest_items, prepare_data_func=amqp_handler. prepare_test_item_info, prepare_response_data=amqp_handler. prepare_analyze_response_data)))) threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, (APP_CONFIG["exchangeName"], "cluster", True, False, lambda channel, method, props, body: amqp_handler.handle_amqp_request(channel, method, props, body, ClusterService( APP_CONFIG, SEARCH_CONFIG).find_clusters, prepare_data_func=amqp_handler. prepare_launch_info, prepare_response_data=amqp_handler. prepare_analyze_response_data)))) threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, (APP_CONFIG["exchangeName"], "stats_info", True, False, lambda channel, method, props, body: amqp_handler.handle_inner_amqp_request(channel, method, props, body, es_client.send_stats_info)))) threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, (APP_CONFIG["exchangeName"], "namespace_finder", True, False, lambda channel, method, props, body: amqp_handler.handle_amqp_request(channel, method, props, body, NamespaceFinderService( APP_CONFIG, SEARCH_CONFIG).update_chosen_namespaces)))) return threads
SEARCH_CONFIG["SuggestBoostModelFolder"] = model_settings["SUGGEST_BOOST_MODEL_FOLDER"] SEARCH_CONFIG["SimilarityWeightsFolder"] = model_settings["SIMILARITY_WEIGHTS_FOLDER"] SEARCH_CONFIG["GlobalDefectTypeModelFolder"] = model_settings["GLOBAL_DEFECT_TYPE_MODEL_FOLDER"] log_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logging.conf') logging.config.fileConfig(log_file_path) if APP_CONFIG["logLevel"].lower() == "debug": logging.disable(logging.NOTSET) elif APP_CONFIG["logLevel"].lower() == "info": logging.disable(logging.DEBUG) else: logging.disable(logging.INFO) logger = logging.getLogger("analyzerApp") APP_CONFIG["appVersion"] = read_version() es_client = EsClient(APP_CONFIG, SEARCH_CONFIG) read_model_settings() application = create_application() CORS(application) threads = [] @application.route('/', methods=['GET']) def get_health_status(): status = "" if not es_client.is_healthy(APP_CONFIG["esHost"]): status += "Elasticsearch is not healthy;" if status: logger.error("Analyzer health check status failed: %s", status) return Response(json.dumps({"status": status}), status=503, mimetype='application/json')