def output_block_pattern_list (block_pattern_list ,input_file, prefix = ""): #{{{ file_mode = global_APIs.get_file_mode(input_file) block_pattern_list_file_path = "sql_database/" block_pattern_list_file_path += file_mode if not prefix == "": block_pattern_list_file_path += "/" + str(prefix) block_pattern_list_file_path += "/block_pattern_list.txt" fl = open (block_pattern_list_file_path, "w") for block in block_pattern_list: fl.write(block) fl.write(" \n") pattern = block_pattern_list[block]["pattern"] tmp = "" for message in pattern: tmp += message tmp += " " fl.write(tmp) fl.write("\n") happen_count = block_pattern_list[block]["happen_count"] tmp = "" tmp += "correct" tmp += " " tmp += str(happen_count["correct"]) tmp += " " tmp += "incorrect" tmp += " " tmp += str(happen_count["incorrect"]) fl.write(tmp) fl.write("\n") fl.close()
def output_message_closest_message_list (message_closest_message_list ,input_file, prefix = ""): #{{{ file_mode = global_APIs.get_file_mode(input_file) message_closest_message_file_path = "sql_database/" message_closest_message_file_path += file_mode if not prefix == "": message_closest_message_file_path += "/" + str(prefix) message_closest_message_file_path += "/message_closest_message_list.txt" fl = open (message_closest_message_file_path, "w") for message_pattern in message_closest_message_list: fl.write(message_pattern) fl.write(" \n") closest_message_list = message_closest_message_list[message_pattern] tmp = "" if "prior" in closest_message_list: tmp += "prior" tmp += " " for message in closest_message_list["prior"]: tmp += message tmp += " " fl.write(tmp) fl.write("\n") tmp = "" if "follow" in closest_message_list: tmp += "follow" tmp += " " for message in closest_message_list["follow"]: tmp += message tmp += " " fl.write(tmp) fl.write("\n") fl.close()
def output_single_line_pattern_db(single_line_pattern_db, input_file, prefix = ""): #{{{ file_mode = global_APIs.get_file_mode(input_file) node_id_last_file_path = "sql_database/" node_id_last_file_path += file_mode if not prefix == "": node_id_last_file_path += "/" + str(prefix) node_id_last_file_path += "/single_line_pattern_db.txt" fl = open (node_id_last_file_path, "w") for line_pattern_name in single_line_pattern_db: fl.write(line_pattern_name) fl.write(" \n") line_pattern = single_line_pattern_db[line_pattern_name]["pattern"] tmp = "" for i in range (0, len(line_pattern)): element = line_pattern[i] pos = element[0] word = element[1] tmp += str(pos) tmp += " " tmp += str(word) tmp += " " fl.write(tmp) fl.write("\n") tmp = "" tmp += "belong_block" tmp += " " tmp += single_line_pattern_db[line_pattern_name]["belong_block"] fl.write(tmp) fl.write("\n") fl.close()
def read_message_closest_message_list (input_file, prefix = ""): #{{{ message_closest_message_list = {} file_mode = global_APIs.get_file_mode(input_file) message_closest_message_file_path = "sql_database/" message_closest_message_file_path += file_mode if not prefix == "": message_closest_message_file_path += "/" + str(prefix) message_closest_message_file_path += "/message_closest_message_list.txt" if os.path.isfile(message_closest_message_file_path): fl = open (message_closest_message_file_path, "r") while True: line_1 = fl.readline() line_2 = fl.readline() line_3 = fl.readline() if not line_1 or not line_2 or not line_3: break message_pattern = line_1.split()[0] prior_pattern_list = line_2.split() follow_pattern_list = line_3.split() tmp = {} prior_list = [] for i in range (1, len(prior_pattern_list)): prior_list.append(prior_pattern_list[i]) follow_list = [] for i in range (1, len(follow_pattern_list)): follow_list.append(follow_pattern_list[i]) tmp["prior"] = prior_list tmp["follow"] = follow_list message_closest_message_list[message_pattern] = tmp return message_closest_message_list
def read_single_line_pattern_db(input_file, prefix = ""): #{{{ single_line_pattern_db = {} file_mode = global_APIs.get_file_mode(input_file) single_line_pattern_file_path = "sql_database/" single_line_pattern_file_path += file_mode if not prefix == "": single_line_pattern_file_path += "/" + str(prefix) single_line_pattern_file_path += "/single_line_pattern_db.txt" if os.path.isfile(single_line_pattern_file_path): fl = open(single_line_pattern_file_path, "r") while True: line_1 = fl.readline() line_2 = fl.readline() line_3 = fl.readline() if not line_1 or not line_2 or not line_3: break pattern_name = line_1.split()[0] pattern = convert_string_into_pattern(line_2) tmp = {} tmp["pattern"] = pattern belong_block = line_3.split() if len (belong_block) == 1: tmp["belong_block"] = "" else: tmp["belong_block"] = belong_block[1] single_line_pattern_db[pattern_name] = tmp fl.close() else: return {} return single_line_pattern_db
def output_happen_matrix (happen_matrix, input_file, prefix = "", file_name_prefix = ""): #{{{ file_mode = global_APIs.get_file_mode(input_file) happen_matrix_path = "sql_database/" happen_matrix_path += file_mode happen_matrix_path += "/" if not prefix == "": happen_matrix_path += "/" + str(prefix) + "/" if not file_name_prefix == "": happen_matrix_path += file_name_prefix + "_" happen_matrix_path += "happen_matrix" happen_matrix_path += ".txt" fl = open (happen_matrix_path, "w") for line_pattern_name in happen_matrix: fl.write(line_pattern_name) fl.write(" \n") line_pattern_happen_list = happen_matrix[line_pattern_name] tmp = "" tmp += "happen_time" tmp += " " tmp += str(line_pattern_happen_list["happen_time"]) tmp += " " for next_pattern in line_pattern_happen_list: if next_pattern == "happen_time": continue next_pattern_happern_time = line_pattern_happen_list[next_pattern] tmp += next_pattern tmp += " " tmp += str(next_pattern_happern_time) tmp += " " fl.write(tmp) fl.write("\n") fl.close()
def output_this_round_affect_message_list(sub_folder, new_found_single_line_pattern, need_update_single_line_pattern_list, left_new_found_single_line_pattern_list, affected_message_list, prefix): #{{{ file_mode = global_APIs.get_file_mode(sub_folder) affect_message_file_path = "sql_database/" affect_message_file_path += file_mode if not prefix == "": affect_message_file_path += "/" + str(prefix) affect_message_file_path += "/affected_message.txt" fl = open (affect_message_file_path, "w") fl.write("new_found_single_line_pattern\n") tmp = "" for message in new_found_single_line_pattern: tmp += message + " " tmp += "\n" fl.write (tmp) fl.write("need_update_single_line_pattern_list\n") tmp = "" for message in need_update_single_line_pattern_list: tmp += message + " " tmp += "\n" fl.write (tmp) fl.write("left_new_found_single_line_pattern_list\n") tmp = "" for message in left_new_found_single_line_pattern_list: tmp += message + " " tmp += "\n" fl.write (tmp) fl.write("affected_message_list\n") tmp = "" for message in affected_message_list: tmp += message + " " tmp += "\n" fl.write (tmp) fl.close()
def read_single_line_pattern_range_list (input_file, prefix = ""): #{{{ single_line_pattern_range_line_pattern_list = {} file_mode = global_APIs.get_file_mode(input_file) range_pattern_list_path = "sql_database/" range_pattern_list_path += file_mode if not prefix == "": range_pattern_list_path += "/" + str(prefix) range_pattern_list_path += "/range_pattern_list" range_pattern_list_path += ".txt" if os.path.isfile(range_pattern_list_path): fl = open (range_pattern_list_path, "r") while True: line_1 = fl.readline() line_2 = fl.readline() if not line_1 or not line_2: break pattern_name = line_1.split()[0] range_pattern_file_list = line_2.split() range_pattern_list = {} i = 0 while i < len(range_pattern_file_list): range_pattern_list[range_pattern_file_list[i]] = int(range_pattern_file_list[i+1]) i = i + 2 single_line_pattern_range_line_pattern_list[pattern_name] = range_pattern_list fl.close() else: return {} return single_line_pattern_range_line_pattern_list
def output_done_sub_folder_list(done_sub_folder_list, sub_folder): #{{{ file_mode = global_APIs.get_file_mode(sub_folder) done_sub_folder_list_file_path = "sql_database/" done_sub_folder_list_file_path += file_mode done_sub_folder_list_file_path += "/done_sub_folder_list.txt" fl = open (done_sub_folder_list_file_path, "w") for sub_folder in done_sub_folder_list: tmp = "" tmp += sub_folder tmp += " \n" fl.write(tmp) fl.close()
def block_pattern_list_summary (input_file, show_report = 0): #this input_file is just a file mode tragger file_mode = global_APIs.get_file_mode(input_file) block_pattern_list_file_path = "sql_database/" block_pattern_list_file_path += file_mode file_list = global_APIs.get_folder_file_list(block_pattern_list_file_path) last_sub_folder = global_APIs.get_latest_sql_db_path(input_file) block_pattern_list = read_block_pattern_list(input_file, last_sub_folder) single_line_pattern_db = read_single_line_pattern_db(input_file, last_sub_folder) block_covered_message_list = {} block_covered_message_num = 0 for block in block_pattern_list: block_pattern = block_pattern_list[block]["pattern"] block_covered_message_num += len(block_pattern) for message in block_pattern: if not message in block_covered_message_list: tmp = [block] block_covered_message_list[message] = tmp else: print message tmp = block_covered_message_list[message] tmp.append(block) block_covered_message_list[message] = tmp message_have_block_info_list = [] message_have_block_info_num = 0 for single_line_pattern in single_line_pattern_db: if not single_line_pattern_db[single_line_pattern]["belong_block"] == "": message_have_block_info_num += 1 if not single_line_pattern in message_have_block_info_list: message_have_block_info_list.append(single_line_pattern) if show_report == 1: print "===================================================" print "EBD summary report" print " block_covered_message_num: " + str(block_covered_message_num) print " message_have_block_info_num: " + str(message_have_block_info_num) print " total_single_line_message: " + str(len(single_line_pattern_db)) print " total_block: " + str(len(block_pattern_list)) print "===================================================" error_list = {} for message in block_covered_message_list: if len(block_covered_message_list[message]) > 1: error_list [message] = block_covered_message_list[message] return error_list
def output_node_id_last_list_file(node_id_last_list, input_file, prefix = ""): #{{{ file_mode = global_APIs.get_file_mode(input_file) node_id_last_file_path = "sql_database/" node_id_last_file_path += file_mode if not prefix == "": node_id_last_file_path += "/" + str(prefix) node_id_last_file_path += "/node_id_last_file.txt" fl = open (node_id_last_file_path, "w") for node_id in node_id_last_list: tmp = node_id tmp += " " tmp += str(node_id_last_list[node_id]) tmp += "\n" fl.write(tmp) fl.close()
def read_done_sub_folder_list(sub_folder): #{{{ done_sub_folder_list = [] file_mode = global_APIs.get_file_mode(sub_folder) done_sub_folder_list_file_path = "sql_database/" done_sub_folder_list_file_path += file_mode done_sub_folder_list_file_path += "/done_sub_folder_list.txt" if os.path.isfile(done_sub_folder_list_file_path): fl = open(done_sub_folder_list_file_path, "r") for line in fl.readlines(): line = line.replace("\n", "") line = line.split(" ") done_sub_folder_list.append(line[0]) fl.close() else: return [] return done_sub_folder_list
def read_this_round_affect_message_list(sub_folder, prefix): file_mode = global_APIs.get_file_mode(sub_folder) affect_message_file_path = "sql_database/" affect_message_file_path += file_mode if not prefix == "": affect_message_file_path += "/" + str(prefix) affect_message_file_path += "/affected_message.txt" if os.path.isfile( affect_message_file_path): fl = open (affect_message_file_path, "r") line_list = fl.readlines() new_found_single_line_pattern = [] need_update_single_line_pattern_list = [] left_new_found_single_line_pattern_list = [] affected_message_list = [] new_found_line = line_list[1] need_update_line = line_list[3] left_new_line = line_list[5] affected_line = line_list[7] for message in new_found_line.split(): if message == "\n": continue new_found_single_line_pattern.append(message) for message in need_update_line.split(): if message == "\n": continue need_update_single_line_pattern_list.append(message) for message in left_new_line.split(): if message == "\n": continue left_new_found_single_line_pattern_list.append(message) for message in affected_line.split(): if message == "\n": continue affected_message_list.append(message) tmp = [] tmp.append(new_found_single_line_pattern) tmp.append(need_update_single_line_pattern_list) tmp.append(left_new_found_single_line_pattern_list) tmp.append(affected_message_list) return tmp else: return []
def read_node_id_last_list_file(input_file, prefix = ""): #{{{ node_id_last_list = {} file_mode = global_APIs.get_file_mode(input_file) node_id_last_file_path = "sql_database/" node_id_last_file_path += file_mode if not prefix == "": node_id_last_file_path += "/" + str(prefix) node_id_last_file_path += "/node_id_last_file.txt" if os.path.isfile(node_id_last_file_path): fl = open(node_id_last_file_path, "r") for line in fl.readlines(): line = line.replace("\n", "") line = line.split(" ") node_id_last_list[line[0]] = line[1] fl.close() else: return {} return node_id_last_list
def whole_daily_folder_block_extract (folder_name): total_error_report = "extract_error_report.txt" error_report_fl = open(total_error_report, "w") sub_folder_list = global_APIs.get_folder_file_list (folder_name) done_example_list = [] done_example_report_fl = open("example_report.txt", "w") folder_num = -1 for sub_folder in sub_folder_list: folder_num += 1 if folder_num < folder_control_lower_band: continue if folder_num >= folder_control_upper_band: break print "block extract " + sub_folder file_mode = global_APIs.get_file_mode(sub_folder) if file_mode == "": print "can't detect mode " + sub_folder continue folder_block_extract(sub_folder, error_report_fl, done_example_list, done_example_report_fl) error_report_fl.close() done_example_report_fl.close()
def read_block_pattern_list (input_file, prefix = ""): #{{{ block_pattern_list = {} file_mode = global_APIs.get_file_mode(input_file) block_pattern_list_file_path = "sql_database/" block_pattern_list_file_path += file_mode if not prefix == "": block_pattern_list_file_path += "/" + str(prefix) block_pattern_list_file_path += "/block_pattern_list.txt" if os.path.isfile(block_pattern_list_file_path): fl = open (block_pattern_list_file_path, "r") while True: line_1 = fl.readline() line_2 = fl.readline() line_3 = fl.readline() if not line_1 or not line_2 or not line_3: break block_name = line_1.split()[0] block_pattern_list_tmp = {} pattern_list = line_2.split() pattern_tmp = [] i = 0 while i < len(pattern_list): pattern_tmp.append(pattern_list[i]) i = i + 1 block_pattern_list_tmp["pattern"] = pattern_tmp happen_count_list = line_3.split() happen_count_tmp = {} i = 0 while i < len(happen_count_list): happen_count_tmp[happen_count_list[i]] = happen_count_list[i + 1] i = i + 2 block_pattern_list_tmp["happen_count"] = happen_count_tmp block_pattern_list[block_name] = block_pattern_list_tmp return block_pattern_list
def output_single_line_pattern_range_list (single_line_pattern_range_line_pattern_list, input_file, prefix = ""): #{{{ file_mode = global_APIs.get_file_mode(input_file) range_pattern_list_path = "sql_database/" range_pattern_list_path += file_mode if not prefix == "": range_pattern_list_path += "/" + str(prefix) range_pattern_list_path += "/range_pattern_list" range_pattern_list_path += ".txt" fl = open (range_pattern_list_path, "w") for line_pattern_name in single_line_pattern_range_line_pattern_list: fl.write(line_pattern_name) fl.write(" \n") range_pattern_list = single_line_pattern_range_line_pattern_list[line_pattern_name] tmp = "" for pattern in range_pattern_list: tmp += pattern tmp += " " tmp += str(range_pattern_list[pattern]) tmp += " " fl.write(tmp) fl.write("\n") fl.close()
def read_happen_matrix (input_file, prefix = "", file_name_prefix = ""): #{{{ happen_matrix = {} file_mode = global_APIs.get_file_mode(input_file) happen_matrix_path = "sql_database/" happen_matrix_path += file_mode happen_matrix_path += "/" if not prefix == "": happen_matrix_path += "/" + str(prefix) + "/" if not file_name_prefix == "": happen_matrix_path += file_name_prefix + "_" happen_matrix_path += "happen_matrix" happen_matrix_path += ".txt" if os.path.isfile(happen_matrix_path): fl = open (happen_matrix_path, "r") while True: line_1 = fl.readline() line_2 = fl.readline() if not line_1 or not line_2: break pattern_name = line_1.split()[0] next_pattern_list = {} next_pattern_file_list = line_2.split() i = 0 while i < len(next_pattern_file_list) : next_pattern_name = next_pattern_file_list[i] next_pattern_happen_time = next_pattern_file_list[i+1] i = i + 2 tmp = [] next_pattern_list[next_pattern_name] = int(next_pattern_happen_time) happen_matrix[pattern_name] = next_pattern_list else: return {} return happen_matrix
def whole_daily_folder_block_learning (folder_name): sub_folder_list = global_APIs.get_folder_file_list (folder_name) done_sub_folder_list = [] folder_num = -1 report_fl = open("total_progress.txt", "w") last_prefix = -1 #this last prefix is the last successed folder number for sub_folder in sub_folder_list: report_tmp = "" report_tmp += "block learning " + sub_folder + "\n" print "block learning " + sub_folder + "\n" folder_num += 1 file_mode = global_APIs.get_file_mode(sub_folder) if file_mode == "": print "Can't detect file mode from folder " + sub_folder continue if folder_num < folder_control_lower_band: last_prefix = folder_num continue if folder_num >= folder_control_upper_band: break #done_sub_folder done_sub_folder_list = database_opt.read_done_sub_folder_list(sub_folder) if sub_folder in done_sub_folder_list: last_prefix = folder_num continue prefix_path = str(folder_num) global_APIs.sql_prefix_folder_initializer(sub_folder, prefix_path) #previous result read from database #{{{ previous_happen_matrix = database_opt.read_happen_matrix(sub_folder, last_prefix, "total") new_found_single_line_pattern = [] single_line_pattern_db = database_opt.read_single_line_pattern_db(sub_folder, last_prefix) single_line_pattern_range_line_pattern_list = database_opt.read_single_line_pattern_range_list(sub_folder, last_prefix) message_closest_message_list = database_opt.read_message_closest_message_list(sub_folder, last_prefix) block_pattern_list = database_opt.read_block_pattern_list(sub_folder, last_prefix) previous_node_id_last_list = database_opt.read_node_id_last_list_file(sub_folder, last_prefix) #}}} #generate this matrix #{{{ ignore_previous_file = 0 this_happen_matrix = {} result = folder_happen_matrix_analyze ( sub_folder, this_happen_matrix, single_line_pattern_db, new_found_single_line_pattern, single_line_pattern_range_line_pattern_list, previous_node_id_last_list, ignore_previous_file) this_happen_matrix = result[0] single_line_pattern_db = result[1] new_found_single_line_pattern = result[2] single_line_pattern_range_line_pattern_list = result[3] node_id_last_list = result[4] previous_node_id_last_list = database_opt.read_node_id_last_list_file(sub_folder, last_prefix) total_happen_matrix = happen_matrix_merge(this_happen_matrix, previous_happen_matrix, previous_node_id_last_list, sub_folder) if global_APIs.invalid_message == 'invalid_message': global_APIs.single_line_db_invalid_message_assign(single_line_pattern_db) global_APIs.generate_single_pattern_dynamic_similarity_threshold(total_happen_matrix) #store recent record database_opt.output_happen_matrix(total_happen_matrix, sub_folder, folder_num, "total") database_opt.output_happen_matrix(this_happen_matrix, sub_folder, folder_num, "this") database_opt.output_single_line_pattern_range_list (single_line_pattern_range_line_pattern_list, sub_folder, folder_num) database_opt.output_node_id_last_list_file(node_id_last_list, sub_folder,folder_num ) #}}} #new_found_single_line_pattern #{{{ report_tmp += " new found single line " + str (len(new_found_single_line_pattern)) + "\n" print " new found single line " + str (len(new_found_single_line_pattern)) orig_new_found_single_line_pattern_list = [] for message in new_found_single_line_pattern: orig_new_found_single_line_pattern_list.append(message) previous_happen_matrix = database_opt.read_happen_matrix(sub_folder, last_prefix, "total") need_update_single_line_pattern_list = anomaly_detection.this_happen_matrix_anomaly_detection( previous_happen_matrix, total_happen_matrix, new_found_single_line_pattern, single_line_pattern_db, message_closest_message_list) new_found_single_line_num = 0 left_new_found_single_line_pattern_list = [] for message in orig_new_found_single_line_pattern_list: if message in need_update_single_line_pattern_list: new_found_single_line_num += 1 else: left_new_found_single_line_pattern_list.append(message) update_length = len(need_update_single_line_pattern_list) report_tmp += " new_found_single_line " + str(new_found_single_line_num) + "\n" print " new_found_single_line " + str(new_found_single_line_num) report_tmp += " need update previous single line " + str(update_length - new_found_single_line_num) + "\n" print " need update previous single line " + str(update_length - new_found_single_line_num) #}}} if len(need_update_single_line_pattern_list) > 0: result = folder_block_learning ( sub_folder, total_happen_matrix, single_line_pattern_db, need_update_single_line_pattern_list, single_line_pattern_range_line_pattern_list, block_pattern_list, message_closest_message_list) total_happen_matrix = result[0] single_line_pattern_range_line_pattern_list = result[1] message_closest_message_list = result[2] block_pattern_list = result[3] single_line_pattern_db = result[4] affected_message_list = result[5] report_tmp += " affected_message_list_length: " + str(len(affected_message_list)) + "\n" print " affected_message_list_length: " + str(len(affected_message_list)) if len(affected_message_list) == str(new_found_single_line_num): report_tmp += " previous block list have no change" + "\n" database_opt.output_message_closest_message_list(message_closest_message_list, sub_folder, folder_num) database_opt.output_block_pattern_list(block_pattern_list, sub_folder, folder_num) database_opt.output_single_line_pattern_db(single_line_pattern_db, sub_folder, folder_num) done_sub_folder_list.append(sub_folder) #ARES #database_opt.output_done_sub_folder_list(done_sub_folder_list, sub_folder ) database_opt.output_this_round_affect_message_list( sub_folder, orig_new_found_single_line_pattern_list, need_update_single_line_pattern_list, left_new_found_single_line_pattern_list, affected_message_list, folder_num) last_prefix = folder_num block_merge_error_list = database_opt.block_pattern_list_summary(sub_folder) if not len(block_merge_error_list) == 0: report_tmp += str(block_merge_error_list) #print report_tmp report_fl.write(report_tmp) report_fl.close() return folder_num
def affected_message_pattern_history_analyze(folder_name): #this is testing cutoff sub_folder_list = global_APIs.get_folder_file_list(folder_name) folder_num = -1 prefix = -1 last_prefix = -1 each_message_record_dict = {} cutoff_message_list = [] each_interval_record = {} cut_off_happen_time_threshold = 100 cut_off_folder_num_threshold = 10 daily_num = 2 interval_num = 0 interval_control_num = 10 total_day_num = 0 this_interval_need_update_list = [] this_interval_affected_list = [] this_interval_new_found_num = 0 this_interval_new_found_update_num = 0 for sub_folder in sub_folder_list: folder_num += 1 prefix = folder_num file_mode = global_APIs.get_file_mode(sub_folder) if file_mode == "": continue if folder_num < multi_file_folder.folder_control_lower_band: last_prefix = folder_num continue if folder_num >= multi_file_folder.folder_control_upper_band: break affect_record = database_opt.read_this_round_affect_message_list( sub_folder, prefix) new_found_single_line_pattern = affect_record[0] need_update_single_line_pattern_list = affect_record[1] left_new_found_single_line_pattern_list = affect_record[2] affected_message_list = affect_record[3] for need_update_pattern in need_update_single_line_pattern_list: if not need_update_pattern in this_interval_need_update_list: this_interval_need_update_list.append(need_update_pattern) for affected_message in affected_message_list: if not affected_message in this_interval_affected_list: this_interval_affected_list.append(affected_message) this_interval_new_found_num += len(new_found_single_line_pattern) this_interval_new_found_update_num += len( new_found_single_line_pattern) - len( left_new_found_single_line_pattern_list) total_day_num = (folder_num + 1) * daily_num if total_day_num % interval_control_num == 0: tmp = [] tmp.append(this_interval_new_found_num) tmp.append(this_interval_new_found_update_num) tmp.append(len(this_interval_need_update_list)) tmp.append(len(this_interval_affected_list)) print total_day_num print tmp each_interval_record[interval_num] = tmp this_interval_need_update_list = [] this_interval_affected_list = [] this_interval_new_found_num = 0 this_interval_new_found_update_num = 0 interval_num += 1 continue for message in new_found_single_line_pattern: tmp = {} tmp["first_seen"] = folder_num tmp["last_need_update"] = -1 tmp["last_change"] = -1 each_message_record_dict[message] = tmp for message in need_update_single_line_pattern_list: each_message_record_dict[message]["last_need_update"] = folder_num for message in affected_message_list: previous_block_pattern_list = database_opt.read_block_pattern_list( sub_folder, last_prefix) previous_single_line_pattern_db = database_opt.read_single_line_pattern_db( sub_folder, last_prefix) this_block_pattern_list = database_opt.read_block_pattern_list( sub_folder, prefix) this_single_line_pattern_db = database_opt.read_single_line_pattern_db( sub_folder, prefix) if not message in previous_single_line_pattern_db or previous_single_line_pattern_db[ message]["belong_block"] == "": each_message_record_dict[message]["last_change"] = folder_num continue previous_belong_block = previous_single_line_pattern_db[message][ "belong_block"] previous_belong_block_pattern = previous_block_pattern_list[ previous_belong_block] this_belong_block = this_single_line_pattern_db[message][ "belong_block"] if this_belong_block == "": #previous belong to a block, now disconnected if message in cutoff_message_list: print "cutoff message update " + message + " " + str( each_message_record_dict[message] ["last_change"]) + " " + str(folder_num) each_message_record_dict[message]["last_change"] = folder_num continue this_belong_block_pattern = this_block_pattern_list[ this_belong_block] #now this message have a belong block same_result = judge_two_block_pattern_list_same( previous_belong_block_pattern, this_belong_block_pattern) if same_result == 0: if message in cutoff_message_list: print "cutoff message update " + message + " " + str( each_message_record_dict[message] ["last_change"]) + " " + str(folder_num) each_message_record_dict[message]["last_change"] = folder_num continue #here make cut off decision this_happen_matrix = database_opt.read_happen_matrix( sub_folder, prefix, "total") #happen_matrix is for count each message's happen time for message in each_message_record_dict: if message in cutoff_message_list: continue message_happen_time = this_happen_matrix[message]["happen_time"] if message_happen_time < cut_off_happen_time_threshold: continue last_change_folder_num = each_message_record_dict[message][ "last_change"] if last_change_folder_num == -1: #continue #this means this message never merged or dismerged with any other messages first_seen_folder_num = each_message_record_dict[message][ "first_seen"] if folder_num - first_seen_folder_num > cut_off_folder_num_threshold: cutoff_message_list.append(message) else: interval_to_last_change = folder_num - last_change_folder_num if interval_to_last_change >= cut_off_folder_num_threshold: cutoff_message_list.append(message) #final test #for message in cutoff_message_list: # last_change_folder_num = each_message_record_dict[message]["last_change"] # interval_to_last_change = folder_num - last_change_folder_num # if interval_to_last_change < cut_off_folder_num_threshold: # print "error " + message + " " + str(folder_num) last_prefix = folder_num