def test_insert_data_in_text(self): examplary_text = "silt kilst sliks klast list tilst liist" list_data = [1111, 11, 1111, 11, 111, 1] instance = LogWriter(list_data, examplary_text) new_text = instance.insert_data_in_text(examplary_text, list_data) self.assertEqual( new_text, "silt kilst sliks klast list ([1111, 11, 1111, 11, 111, 1]) tilst liist" )
def split_logs_to_two_and_write(traces): first_half, second_half = split_trace_in_half_by_time(traces) first_half_traces = [] for tr in first_half: first_half_traces.append([ev.label for ev in tr]) second_half_traces = [] for tr in second_half: second_half_traces.append([ev.label for ev in tr]) from log_writer import LogWriter LogWriter.write_log(first_half_traces, '../../data/bear/first_half_traces.log') LogWriter.write_log(second_half_traces, '../../data/bear/last_half_traces.log')
def split_traces_by_months_and_write(traces): months = {} for trace in traces: month_year = (trace[0].time.year, trace[0].time.month) if month_year not in months: months[month_year] = [] trace_labels = [ev.label for ev in trace] months[month_year].append(trace_labels) from log_writer import LogWriter for m in months: print('month/year', m, 'had', len(months[m]), 'traces') LogWriter.write_log( months[m], '../../data/bear/' + str(m[0]) + '_' + str(m[1]) + '.log')
def split_traces_by_quarters_and_write(traces): quarters = {'Q1': [], 'Q2': [], 'Q3': [], 'Q4': []} for trace in traces: if trace[0].time.year == 2011: continue trace_labels = [ev.label for ev in trace] if trace[0].time.month <= 3: quarters['Q1'].append(trace_labels) elif trace[0].time.month <= 6: quarters['Q2'].append(trace_labels) elif trace[0].time.month <= 9: quarters['Q3'].append(trace_labels) elif trace[0].time.month <= 12: quarters['Q4'].append(trace_labels) from log_writer import LogWriter for q in quarters: print('month/year', q, 'had', len(quarters[q]), 'traces') LogWriter.write_log(quarters[q], '../../data/bear/' + str(q) + '.log')
class MyTest(unittest.TestCase): def setUp(self): self.head_text = """ The following list represents the total number of invisible unicorns in classroom. """ self.list_data = [1, 2, 3, 4] self.test_instance = LogWriter(self.list_data, self.head_text) def test_combining_method(self): combined_text = self.test_instance.combining_method() total_text = "\n\t\tThe following list represents the total number of invisible unicorns in classroom.\n\t\t_________\n After change: \n\n\t\tThe following list ([1, 2, 3, 4]) represents the total number of invisible unicorns in classroom.\n\t\t0 O 0 O 0 O 0 O 0 O 0 O7.483314773547883\nTo seek the holy grail\n2218.473985099097" self.assertEqual(total_text, combined_text)
def __build_sentence_list(id: int, caption_list: list, f_name: str, source_film: str = None, target_audio: str = None): s_list = [] for caption in caption_list: try: id += 1 level = CaptionFactory.__judge_level(caption.english) if source_film: try: audio_file = film_spliter.spliter.Spliter.split_to_mp3( source_film, target_audio, caption) except ValueError as e: LogWriter.write_warning(e, "视频裁切模块错误") audio_file = "" except IOError as e: LogWriter.write_warning(e, "读取视频文件或写入音频文件失败") audio_file = "" s_list.append( data_connector.model_sentence.ModelSentence( id, caption, f_name, level, target_audio + "\\" + audio_file)) else: s_list.append( data_connector.model_sentence.ModelSentence( id, caption, f_name, level)) except ValueError as e: LogWriter.write_warning(e, "字符串分割错误:原句为" + caption.english) return s_list
def produce_logs(): MODEL_TO_PRODUCE = 6 TRACE2PRODUCE = 100000 first_model = ProtocolModel(MODELS_PATH, 0, assign_transtion_probs=True) second_model = ProtocolModel(MODELS_PATH, 0, assign_transtion_probs=True) for instance_id in range(MODEL_TO_PRODUCE): print('processing instance:', instance_id, MODELS_PATH) ## read model & add transition probabilities # model_generator = ProtocolModel(MODELS_PATH, instance_id, assign_transtion_probs=True) model_generator = first_model if instance_id < 3 else second_model log = LogGenerator.produce_log_from_model( model_generator.graph, transition_probability_attribute=TRANSITION_PROBABILITY_ATTRIBUTE, traces2produce=TRACE2PRODUCE) ## generate transition probabilities model_generator.write_transitions_probabilities(LOGS_OUTPUT_PATH) ## produce k-Tail model LogWriter.write_log(log, LOGS_OUTPUT_PATH + 'l' + str(instance_id) + ".log")
def load_dir(path: str, id: int, audio_path: str): sentence_list = [] files = os.listdir(path) for file in files: if file.endswith(".srt"): is_have_film = CaptionFactory.__find_film(path, file) try: caption_list = CaptionFactory.load_srt_file(path + '\\' + file) if not is_have_film: sentence_list += CaptionFactory.__build_sentence_list( id, caption_list, file[:-4]) else: film_source = path + '\\' + file[:-4] + ".mp4" sentence_list += CaptionFactory.__build_sentence_list( id, caption_list, file[:-4], film_source, audio_path) id += len(caption_list) except IOError as e: LogWriter.write_warning(e, "字幕文件读取失败,文件名:" + str(file)) except ValueError as e: LogWriter.write_warning(e, "字幕文件解析失败,文件名:" + str(file)) return sentence_list
def __init__(self, start_id: int = 0): try: self.__db_setting = self.__decode_xml() print('分解字幕文件并分割视频....') self.__sentence_list = analyser.caption_factory.CaptionFactory.load_dir( self.__caption_path, start_id, self.__audio_path) self.__word_list = [] try: dm = data_connector.data_manager.DataManager(self.__db_setting) print('分析例句并上传....') for sentence in self.__sentence_list: self.__split_word(sentence) dm.execute_sql(sentence.to_sql()) print('上传分析结果....') for word in self.__word_list: dm.execute_sql(word.to_sql()) dm.close_connection() except Exception as e: LogWriter.write_warning(e, "连接数据库失败") raise e except Exception as e: LogWriter.write_warning(e, "读取配置文件失败") raise e print('作业结束。')
def test_computation(self): comp_res = LogWriter.computation(3) self.assertEqual(math.sqrt(3)+9+math.sqrt(math.sqrt(3)), comp_res)
def setUp(self): self.head_text = """ The following list represents the total number of invisible unicorns in classroom. """ self.list_data = [1,2,3,4] self.test_instance = LogWriter(self.list_data, self.head_text)
def test_get_second_word(self): self.assertEqual(LogWriter.get_second_word("To seek the holy grail"),"seek") self.assertEqual(LogWriter.get_second_word("answers to questions"),"to")
def test_what_is_your_quest(self): self.assertEqual(LogWriter.what_is_your_quest(),"To seek the holy grail") self.assertEqual(LogWriter.what_is_your_quest("answers"),"To seek the answers")
def test_what_is_added_the_meaning_of_life(self): self.assertEqual(LogWriter.what_is_added_the_meaning_of_life(),math.sqrt(42)) self.assertEqual(LogWriter.what_is_added_the_meaning_of_life(-6),6.0)
class MyTest(unittest.TestCase): def setUp(self): self.head_text = """ The following list represents the total number of invisible unicorns in classroom. """ self.list_data = [1,2,3,4] self.test_instance = LogWriter(self.list_data, self.head_text) def test_init(self): self.assertEqual(self.test_instance.list_data, self.list_data) self.assertEqual(self.test_instance.head_text, self.head_text) def test_avg_every_second_element(self): self.assertEqual(LogWriter.avg_every_second_element([1,2,3,4]),3) def test_get_every_second_element(self): self.assertEqual(LogWriter.get_every_second_element([1,2,34,3]), [2,3]) def test_insert_data_in_text(self): examplary_text = "silt kilst sliks klast list tilst liist" list_data = [1111, 11, 1111, 11, 111, 1] instance = LogWriter(list_data, examplary_text) new_text = instance.insert_data_in_text(examplary_text,list_data) self.assertEqual(new_text, "silt kilst sliks klast list ([1111, 11, 1111, 11, 111, 1]) tilst liist") def test_count_o(self): self.assertEqual(LogWriter.count_o("000ooo00ooo0o0"),7) self.assertEqual(LogWriter.count_o("oOooO"),5) def test_get_first_part(self): first_text_part, count = self.test_instance.get_first_part() self.assertEqual(count, 14) result_text= "\n\t\tThe following list represents the total number of invisible unicorns in classroom.\n\t\t_________\n After change: \n\n\t\tThe following list ([1, 2, 3, 4]) represents the total number of invisible unicorns in classroom.\n\t\t" self.assertEqual(result_text, first_text_part) def test_what_is_added_the_meaning_of_life(self): self.assertEqual(LogWriter.what_is_added_the_meaning_of_life(),math.sqrt(42)) self.assertEqual(LogWriter.what_is_added_the_meaning_of_life(-6),6.0) def test_what_is_your_quest(self): self.assertEqual(LogWriter.what_is_your_quest(),"To seek the holy grail") self.assertEqual(LogWriter.what_is_your_quest("answers"),"To seek the answers") def test_get_second_word(self): self.assertEqual(LogWriter.get_second_word("To seek the holy grail"),"seek") self.assertEqual(LogWriter.get_second_word("answers to questions"),"to") def test_o_count_is_even(self): tmp = self.test_instance.o_count self.assertEqual(False, self.test_instance.o_count_is_even()) self.test_instance.o_count += 1 self.assertEqual(True, self.test_instance.o_count_is_even()) self.test_instance.o_count -= 1 def test_get_movie_reference(self): reference1 = self.test_instance.get_movie_reference() actual_reference1 = "To seek the following" self.assertEqual(reference1, actual_reference1) self.test_instance.o_count = 6 reference2 = self.test_instance.get_movie_reference() actual_reference2 = str(math.sqrt(42+6)) self.assertEqual(reference2, actual_reference2) self.test_instance.o_count = 8 reference3 = self.test_instance.get_movie_reference() actual_reference3 = str(math.sqrt(42+8))+"\n"+"To seek the holy grail" self.assertEqual(reference3, actual_reference3) self.test_instance.o_count = 7 def test_computation(self): comp_res = LogWriter.computation(3) self.assertEqual(math.sqrt(3)+9+math.sqrt(math.sqrt(3)), comp_res) def test_get_second_part(self): reference = self.test_instance.get_movie_reference() self.assertEqual(self.test_instance.get_second_part(), reference) self.assertEqual( self.test_instance.get_second_part(computation= lambda x: x+3), reference+"\n"+str(50) ) def test_combining_method(self): combined_text = self.test_instance.combining_method() total_text = "\n\t\tThe following list represents the total number of invisible unicorns in classroom.\n\t\t_________\n After change: \n\n\t\tThe following list ([1, 2, 3, 4]) represents the total number of invisible unicorns in classroom.\n\t\t0 O 0 O 0 O 0 O 0 O 0 O7.48331477355\nTo seek the holy grail\n2218.4739851" self.assertEqual(total_text, combined_text)
def test_count_o(self): self.assertEqual(LogWriter.count_o("000ooo00ooo0o0"),7) self.assertEqual(LogWriter.count_o("oOooO"),5)
def test_get_every_second_element(self): self.assertEqual(LogWriter.get_every_second_element([1,2,34,3]), [2,3])
def test_avg_every_second_element(self): self.assertEqual(LogWriter.avg_every_second_element([1,2,3,4]),3)
def bear_based_experiments(): ## read log # k = 11 # ks = [20, 40, 80] # ks = [1, 2, 3, 4, 6, 8, 10] LOG_SUFFIX = '.log' MODEL_SUFFIX = '_model.dot' LOG_PATH = '../../data/bear/findyourhouse_long.log' LOG_OUT_PATH = '../../data/bear/filtered_logs/' GRAPH_OUTPUT = "../../data/bear_models/bear_models" ks = [1, 2, 3, 4] log_parser = BearLogParser(LOG_PATH) traces = log_parser.process_log(True) # log1_traces = log_parser.get_traces_of_browser(traces, "Mozilla/4.0") # log2_traces = log_parser.get_traces_of_browser(traces, "Mozilla/5.0") # log1_filename = 'mozzila4' # log2_filename = 'mozzila5' log1_filename = 'desktop' log2_filename = 'mobile' log1_traces = log_parser.get_desktop_traces(traces) log2_traces = log_parser.get_mobile_traces(traces) # events2keep = set(['search','sales_anncs', # 'sales_page, facebook', # 'sales_page, page_1', # 'sales_page, page_2', # 'sales_page, page_3', # 'sales_page, page_4', # 'sales_page, page_5', # 'sales_page, page_6', # 'sales_page, page_7', # 'sales_page, page_8', # 'sales_page, page_9', # ]) # filter_traces_mozilla4 = log_parser.filter_events(events2keep, mozilla4_traces, True) # filter_traces_mozilla5 = log_parser.filter_events(events2keep, mozilla5_traces, True) new_name_mapping = { 'sales_page, page_1': 'sales_page', 'sales_page, page_2': 'sales_page', 'sales_page, page_3': 'sales_page', 'sales_page, page_4': 'sales_page', 'sales_page, page_5': 'sales_page', 'sales_page, page_6': 'sales_page', 'sales_page, page_7': 'sales_page', 'sales_page, page_8': 'sales_page', 'sales_page, page_9': 'sales_page', 'renting_page, page_1': 'renting_page', 'renting_page, page_2': 'renting_page', 'contacts_requested': 'contact_requested' } filter_traces_log1 = log_parser.abstract_events(new_name_mapping, log1_traces) filter_traces_log2 = log_parser.abstract_events(new_name_mapping, log2_traces) log1_traces = log_parser.get_traces_as_lists_of_event_labels( filter_traces_log1) log2_traces = log_parser.get_traces_as_lists_of_event_labels( filter_traces_log2) from log_writer import LogWriter LogWriter.write_log(log1_traces, LOG_OUT_PATH + log1_filename + LOG_SUFFIX) LogWriter.write_log(log2_traces, LOG_OUT_PATH + log2_filename + LOG_SUFFIX) # mozilla4_traces = change_tuples_to_list(mozilla4_traces) # mozilla5_traces = change_tuples_to_list(mozilla5_traces) # traces = log_parser.get_traces_as_lists_of_event_labels log1_traces_tups = [] for tr in log1_traces: log1_traces_tups.append(tuple(tr)) log1_traces = log1_traces_tups log2_traces_tups = [] for tr in log2_traces: log2_traces_tups.append(tuple(tr)) log2_traces = log2_traces_tups for k in ks: ktail_runner_4 = kTailsRunner(log1_traces, k) ktail_runner_5 = kTailsRunner(log2_traces, k) ktail_runner_4_past = kTailsRunner(log1_traces, k) ktail_runner_5_past = kTailsRunner(log2_traces, k) ktail_runner_4.run_ktails(add_dummy_init=False, add_dummy_terminal=False) ktail_runner_5.run_ktails(add_dummy_init=False, add_dummy_terminal=False) ktail_runner_4_past.run_ktails(add_dummy_init=False, add_dummy_terminal=False, graph_simplification=1) ktail_runner_5_past.run_ktails(add_dummy_init=False, add_dummy_terminal=False, graph_simplification=1) g4 = ktail_runner_4.get_graph() g5 = ktail_runner_5.get_graph() g4_past = ktail_runner_4_past.get_graph() g5_past = ktail_runner_5_past.get_graph() print(len(g4.nodes()), len(g4_past.nodes()), len(g5.nodes()), len(g5_past.nodes())) continue filtering_str = "" low_probability_filter = None ## 0.05 # if low_probability_filter: # print("FILTER APPLIED: low prob filter!") # g4 = graph_filtering.filter_low_probability_transitions(g4, low_probability_filter) # g5 = graph_filtering.filter_low_probability_transitions(g5, low_probability_filter) # filtering_str += "_lp_" + str(low_probability_filter) # # simple_filter = 20 # if simple_filter: # print("FILTER APPLIED: simple filter!") # g4 = graph_filtering.simple_filter_graph(g4, simple_filter, False) # g5 = graph_filtering.simple_filter_graph(g5, simple_filter, False) # filtering_str += "_sim_" + str(simple_filter) ktail_runner_4.write2file(GRAPH_OUTPUT + log1_filename + filtering_str + '_k' + str(k) + DOT_SUFFIX) ktail_runner_5.write2file(GRAPH_OUTPUT + log2_filename + filtering_str + '_k' + str(k) + DOT_SUFFIX) print("done running with k=", k)