def test_insert_data_in_text(self):
     examplary_text = "silt kilst sliks klast list tilst liist"
     list_data = [1111, 11, 1111, 11, 111, 1]
     instance = LogWriter(list_data, examplary_text)
     new_text = instance.insert_data_in_text(examplary_text, list_data)
     self.assertEqual(
         new_text,
         "silt kilst sliks klast list ([1111, 11, 1111, 11, 111, 1]) tilst liist"
     )
Пример #2
0
def split_logs_to_two_and_write(traces):

    first_half, second_half = split_trace_in_half_by_time(traces)
    first_half_traces = []
    for tr in first_half:
        first_half_traces.append([ev.label for ev in tr])
    second_half_traces = []
    for tr in second_half:
        second_half_traces.append([ev.label for ev in tr])
    from log_writer import LogWriter
    LogWriter.write_log(first_half_traces,
                        '../../data/bear/first_half_traces.log')
    LogWriter.write_log(second_half_traces,
                        '../../data/bear/last_half_traces.log')
Пример #3
0
def split_traces_by_months_and_write(traces):

    months = {}
    for trace in traces:
        month_year = (trace[0].time.year, trace[0].time.month)
        if month_year not in months:
            months[month_year] = []
        trace_labels = [ev.label for ev in trace]
        months[month_year].append(trace_labels)
    from log_writer import LogWriter
    for m in months:
        print('month/year', m, 'had', len(months[m]), 'traces')
        LogWriter.write_log(
            months[m],
            '../../data/bear/' + str(m[0]) + '_' + str(m[1]) + '.log')
Пример #4
0
def split_traces_by_quarters_and_write(traces):

    quarters = {'Q1': [], 'Q2': [], 'Q3': [], 'Q4': []}
    for trace in traces:
        if trace[0].time.year == 2011:
            continue
        trace_labels = [ev.label for ev in trace]
        if trace[0].time.month <= 3:
            quarters['Q1'].append(trace_labels)
        elif trace[0].time.month <= 6:
            quarters['Q2'].append(trace_labels)
        elif trace[0].time.month <= 9:
            quarters['Q3'].append(trace_labels)
        elif trace[0].time.month <= 12:
            quarters['Q4'].append(trace_labels)

    from log_writer import LogWriter
    for q in quarters:
        print('month/year', q, 'had', len(quarters[q]), 'traces')
        LogWriter.write_log(quarters[q], '../../data/bear/' + str(q) + '.log')
class MyTest(unittest.TestCase):
    def setUp(self):
        self.head_text = """
		The following list represents the total number of invisible unicorns in classroom.
		"""
        self.list_data = [1, 2, 3, 4]
        self.test_instance = LogWriter(self.list_data, self.head_text)

    def test_combining_method(self):
        combined_text = self.test_instance.combining_method()
        total_text = "\n\t\tThe following list represents the total number of invisible unicorns in classroom.\n\t\t_________\n After change: \n\n\t\tThe following list ([1, 2, 3, 4]) represents the total number of invisible unicorns in classroom.\n\t\t0 O 0 O 0 O 0 O 0 O 0 O7.483314773547883\nTo seek the holy grail\n2218.473985099097"
        self.assertEqual(total_text, combined_text)
Пример #6
0
 def __build_sentence_list(id: int,
                           caption_list: list,
                           f_name: str,
                           source_film: str = None,
                           target_audio: str = None):
     s_list = []
     for caption in caption_list:
         try:
             id += 1
             level = CaptionFactory.__judge_level(caption.english)
             if source_film:
                 try:
                     audio_file = film_spliter.spliter.Spliter.split_to_mp3(
                         source_film, target_audio, caption)
                 except ValueError as e:
                     LogWriter.write_warning(e, "视频裁切模块错误")
                     audio_file = ""
                 except IOError as e:
                     LogWriter.write_warning(e, "读取视频文件或写入音频文件失败")
                     audio_file = ""
                 s_list.append(
                     data_connector.model_sentence.ModelSentence(
                         id, caption, f_name, level,
                         target_audio + "\\" + audio_file))
             else:
                 s_list.append(
                     data_connector.model_sentence.ModelSentence(
                         id, caption, f_name, level))
         except ValueError as e:
             LogWriter.write_warning(e, "字符串分割错误:原句为" + caption.english)
     return s_list
Пример #7
0
def produce_logs():

    MODEL_TO_PRODUCE = 6
    TRACE2PRODUCE = 100000

    first_model = ProtocolModel(MODELS_PATH, 0, assign_transtion_probs=True)
    second_model = ProtocolModel(MODELS_PATH, 0, assign_transtion_probs=True)

    for instance_id in range(MODEL_TO_PRODUCE):
        print('processing instance:', instance_id, MODELS_PATH)
        ## read model & add transition probabilities
        # model_generator = ProtocolModel(MODELS_PATH, instance_id, assign_transtion_probs=True)
        model_generator = first_model if instance_id < 3 else second_model
        log = LogGenerator.produce_log_from_model(
            model_generator.graph,
            transition_probability_attribute=TRANSITION_PROBABILITY_ATTRIBUTE,
            traces2produce=TRACE2PRODUCE)
        ## generate transition probabilities
        model_generator.write_transitions_probabilities(LOGS_OUTPUT_PATH)
        ## produce k-Tail model
        LogWriter.write_log(log,
                            LOGS_OUTPUT_PATH + 'l' + str(instance_id) + ".log")
Пример #8
0
 def load_dir(path: str, id: int, audio_path: str):
     sentence_list = []
     files = os.listdir(path)
     for file in files:
         if file.endswith(".srt"):
             is_have_film = CaptionFactory.__find_film(path, file)
             try:
                 caption_list = CaptionFactory.load_srt_file(path + '\\' +
                                                             file)
                 if not is_have_film:
                     sentence_list += CaptionFactory.__build_sentence_list(
                         id, caption_list, file[:-4])
                 else:
                     film_source = path + '\\' + file[:-4] + ".mp4"
                     sentence_list += CaptionFactory.__build_sentence_list(
                         id, caption_list, file[:-4], film_source,
                         audio_path)
                 id += len(caption_list)
             except IOError as e:
                 LogWriter.write_warning(e, "字幕文件读取失败,文件名:" + str(file))
             except ValueError as e:
                 LogWriter.write_warning(e, "字幕文件解析失败,文件名:" + str(file))
     return sentence_list
Пример #9
0
 def __init__(self, start_id: int = 0):
     try:
         self.__db_setting = self.__decode_xml()
         print('分解字幕文件并分割视频....')
         self.__sentence_list = analyser.caption_factory.CaptionFactory.load_dir(
             self.__caption_path, start_id, self.__audio_path)
         self.__word_list = []
         try:
             dm = data_connector.data_manager.DataManager(self.__db_setting)
             print('分析例句并上传....')
             for sentence in self.__sentence_list:
                 self.__split_word(sentence)
                 dm.execute_sql(sentence.to_sql())
             print('上传分析结果....')
             for word in self.__word_list:
                 dm.execute_sql(word.to_sql())
             dm.close_connection()
         except Exception as e:
             LogWriter.write_warning(e, "连接数据库失败")
             raise e
     except Exception as e:
         LogWriter.write_warning(e, "读取配置文件失败")
         raise e
     print('作业结束。')
	def test_computation(self):
		comp_res = LogWriter.computation(3)
		self.assertEqual(math.sqrt(3)+9+math.sqrt(math.sqrt(3)), comp_res)
	def setUp(self):
		self.head_text = """
		The following list represents the total number of invisible unicorns in classroom.
		"""
		self.list_data = [1,2,3,4]
		self.test_instance = LogWriter(self.list_data, self.head_text)
	def test_get_second_word(self):
		self.assertEqual(LogWriter.get_second_word("To seek the holy grail"),"seek")
		self.assertEqual(LogWriter.get_second_word("answers to questions"),"to")
	def test_what_is_your_quest(self):
		self.assertEqual(LogWriter.what_is_your_quest(),"To seek the holy grail")
		self.assertEqual(LogWriter.what_is_your_quest("answers"),"To seek the answers")
	def test_what_is_added_the_meaning_of_life(self):
		self.assertEqual(LogWriter.what_is_added_the_meaning_of_life(),math.sqrt(42))
		self.assertEqual(LogWriter.what_is_added_the_meaning_of_life(-6),6.0)
class MyTest(unittest.TestCase):

	def setUp(self):
		self.head_text = """
		The following list represents the total number of invisible unicorns in classroom.
		"""
		self.list_data = [1,2,3,4]
		self.test_instance = LogWriter(self.list_data, self.head_text)

	def test_init(self):
		self.assertEqual(self.test_instance.list_data, self.list_data)
		self.assertEqual(self.test_instance.head_text, self.head_text)

	def test_avg_every_second_element(self):
		self.assertEqual(LogWriter.avg_every_second_element([1,2,3,4]),3)

	def test_get_every_second_element(self):
		self.assertEqual(LogWriter.get_every_second_element([1,2,34,3]), [2,3])

	def test_insert_data_in_text(self):
		examplary_text = "silt kilst sliks klast list tilst liist"
		list_data = [1111, 11, 1111, 11, 111, 1]
		instance = LogWriter(list_data, examplary_text)
		new_text = instance.insert_data_in_text(examplary_text,list_data)
		self.assertEqual(new_text, "silt kilst sliks klast list ([1111, 11, 1111, 11, 111, 1]) tilst liist")

	def test_count_o(self):
		self.assertEqual(LogWriter.count_o("000ooo00ooo0o0"),7)
		self.assertEqual(LogWriter.count_o("oOooO"),5)

	def test_get_first_part(self):
		first_text_part, count = self.test_instance.get_first_part()
		self.assertEqual(count, 14)
		result_text= "\n\t\tThe following list represents the total number of invisible unicorns in classroom.\n\t\t_________\n After change: \n\n\t\tThe following list ([1, 2, 3, 4]) represents the total number of invisible unicorns in classroom.\n\t\t"
		self.assertEqual(result_text, first_text_part)

	def test_what_is_added_the_meaning_of_life(self):
		self.assertEqual(LogWriter.what_is_added_the_meaning_of_life(),math.sqrt(42))
		self.assertEqual(LogWriter.what_is_added_the_meaning_of_life(-6),6.0)
		
	def test_what_is_your_quest(self):
		self.assertEqual(LogWriter.what_is_your_quest(),"To seek the holy grail")
		self.assertEqual(LogWriter.what_is_your_quest("answers"),"To seek the answers")

	def test_get_second_word(self):
		self.assertEqual(LogWriter.get_second_word("To seek the holy grail"),"seek")
		self.assertEqual(LogWriter.get_second_word("answers to questions"),"to")

	def test_o_count_is_even(self):
		tmp = self.test_instance.o_count
		self.assertEqual(False, self.test_instance.o_count_is_even())
		self.test_instance.o_count += 1
		self.assertEqual(True, self.test_instance.o_count_is_even())
		self.test_instance.o_count -= 1

	def test_get_movie_reference(self):
		reference1 = self.test_instance.get_movie_reference()
		actual_reference1 = "To seek the following"
		self.assertEqual(reference1, actual_reference1)
		self.test_instance.o_count = 6
		reference2 = self.test_instance.get_movie_reference()
		actual_reference2 = str(math.sqrt(42+6))
		self.assertEqual(reference2, actual_reference2)
		self.test_instance.o_count = 8
		reference3 = self.test_instance.get_movie_reference()
		actual_reference3 = str(math.sqrt(42+8))+"\n"+"To seek the holy grail"
		self.assertEqual(reference3, actual_reference3)
		self.test_instance.o_count = 7

	def test_computation(self):
		comp_res = LogWriter.computation(3)
		self.assertEqual(math.sqrt(3)+9+math.sqrt(math.sqrt(3)), comp_res)

	def test_get_second_part(self):
		reference = self.test_instance.get_movie_reference()
		self.assertEqual(self.test_instance.get_second_part(), reference)
		self.assertEqual(
			self.test_instance.get_second_part(computation= lambda x: x+3), 
			reference+"\n"+str(50)
			)

	def test_combining_method(self):
		combined_text = self.test_instance.combining_method()
		total_text = "\n\t\tThe following list represents the total number of invisible unicorns in classroom.\n\t\t_________\n After change: \n\n\t\tThe following list ([1, 2, 3, 4]) represents the total number of invisible unicorns in classroom.\n\t\t0 O 0 O 0 O 0 O 0 O 0 O7.48331477355\nTo seek the holy grail\n2218.4739851"
		self.assertEqual(total_text, combined_text)
	def test_count_o(self):
		self.assertEqual(LogWriter.count_o("000ooo00ooo0o0"),7)
		self.assertEqual(LogWriter.count_o("oOooO"),5)
	def test_get_every_second_element(self):
		self.assertEqual(LogWriter.get_every_second_element([1,2,34,3]), [2,3])
	def test_avg_every_second_element(self):
		self.assertEqual(LogWriter.avg_every_second_element([1,2,3,4]),3)
Пример #19
0
def bear_based_experiments():

    ## read log
    # k = 11
    # ks = [20, 40, 80]
    # ks = [1, 2, 3, 4, 6, 8, 10]
    LOG_SUFFIX = '.log'
    MODEL_SUFFIX = '_model.dot'
    LOG_PATH = '../../data/bear/findyourhouse_long.log'
    LOG_OUT_PATH = '../../data/bear/filtered_logs/'
    GRAPH_OUTPUT = "../../data/bear_models/bear_models"
    ks = [1, 2, 3, 4]
    log_parser = BearLogParser(LOG_PATH)
    traces = log_parser.process_log(True)
    # log1_traces = log_parser.get_traces_of_browser(traces, "Mozilla/4.0")
    # log2_traces = log_parser.get_traces_of_browser(traces, "Mozilla/5.0")
    # log1_filename = 'mozzila4'
    # log2_filename = 'mozzila5'

    log1_filename = 'desktop'
    log2_filename = 'mobile'
    log1_traces = log_parser.get_desktop_traces(traces)
    log2_traces = log_parser.get_mobile_traces(traces)

    # events2keep = set(['search','sales_anncs',
    #                    'sales_page, facebook',
    #                    'sales_page, page_1',
    #                    'sales_page, page_2',
    #                    'sales_page, page_3',
    #                    'sales_page, page_4',
    #                    'sales_page, page_5',
    #                    'sales_page, page_6',
    #                    'sales_page, page_7',
    #                    'sales_page, page_8',
    #                    'sales_page, page_9',
    #                    ])
    # filter_traces_mozilla4 = log_parser.filter_events(events2keep, mozilla4_traces, True)
    # filter_traces_mozilla5 = log_parser.filter_events(events2keep, mozilla5_traces, True)

    new_name_mapping = {
        'sales_page, page_1': 'sales_page',
        'sales_page, page_2': 'sales_page',
        'sales_page, page_3': 'sales_page',
        'sales_page, page_4': 'sales_page',
        'sales_page, page_5': 'sales_page',
        'sales_page, page_6': 'sales_page',
        'sales_page, page_7': 'sales_page',
        'sales_page, page_8': 'sales_page',
        'sales_page, page_9': 'sales_page',
        'renting_page, page_1': 'renting_page',
        'renting_page, page_2': 'renting_page',
        'contacts_requested': 'contact_requested'
    }

    filter_traces_log1 = log_parser.abstract_events(new_name_mapping,
                                                    log1_traces)
    filter_traces_log2 = log_parser.abstract_events(new_name_mapping,
                                                    log2_traces)

    log1_traces = log_parser.get_traces_as_lists_of_event_labels(
        filter_traces_log1)
    log2_traces = log_parser.get_traces_as_lists_of_event_labels(
        filter_traces_log2)

    from log_writer import LogWriter
    LogWriter.write_log(log1_traces, LOG_OUT_PATH + log1_filename + LOG_SUFFIX)
    LogWriter.write_log(log2_traces, LOG_OUT_PATH + log2_filename + LOG_SUFFIX)
    # mozilla4_traces = change_tuples_to_list(mozilla4_traces)
    # mozilla5_traces = change_tuples_to_list(mozilla5_traces)
    # traces = log_parser.get_traces_as_lists_of_event_labels

    log1_traces_tups = []
    for tr in log1_traces:
        log1_traces_tups.append(tuple(tr))
    log1_traces = log1_traces_tups
    log2_traces_tups = []
    for tr in log2_traces:
        log2_traces_tups.append(tuple(tr))
    log2_traces = log2_traces_tups

    for k in ks:
        ktail_runner_4 = kTailsRunner(log1_traces, k)
        ktail_runner_5 = kTailsRunner(log2_traces, k)
        ktail_runner_4_past = kTailsRunner(log1_traces, k)
        ktail_runner_5_past = kTailsRunner(log2_traces, k)
        ktail_runner_4.run_ktails(add_dummy_init=False,
                                  add_dummy_terminal=False)
        ktail_runner_5.run_ktails(add_dummy_init=False,
                                  add_dummy_terminal=False)
        ktail_runner_4_past.run_ktails(add_dummy_init=False,
                                       add_dummy_terminal=False,
                                       graph_simplification=1)
        ktail_runner_5_past.run_ktails(add_dummy_init=False,
                                       add_dummy_terminal=False,
                                       graph_simplification=1)
        g4 = ktail_runner_4.get_graph()
        g5 = ktail_runner_5.get_graph()
        g4_past = ktail_runner_4_past.get_graph()
        g5_past = ktail_runner_5_past.get_graph()
        print(len(g4.nodes()), len(g4_past.nodes()), len(g5.nodes()),
              len(g5_past.nodes()))
        continue
        filtering_str = ""
        low_probability_filter = None  ##  0.05
        # if low_probability_filter:
        #     print("FILTER APPLIED: low prob filter!")
        #     g4 = graph_filtering.filter_low_probability_transitions(g4, low_probability_filter)
        #     g5 = graph_filtering.filter_low_probability_transitions(g5, low_probability_filter)
        #     filtering_str += "_lp_" + str(low_probability_filter)
        #
        # simple_filter = 20
        # if simple_filter:
        #     print("FILTER APPLIED: simple filter!")
        #     g4 = graph_filtering.simple_filter_graph(g4, simple_filter, False)
        #     g5 = graph_filtering.simple_filter_graph(g5, simple_filter, False)
        #     filtering_str += "_sim_" + str(simple_filter)

        ktail_runner_4.write2file(GRAPH_OUTPUT + log1_filename +
                                  filtering_str + '_k' + str(k) + DOT_SUFFIX)
        ktail_runner_5.write2file(GRAPH_OUTPUT + log2_filename +
                                  filtering_str + '_k' + str(k) + DOT_SUFFIX)
        print("done running with k=", k)