def test_response_time_high_cutoff_percentile(self): # this test assumes config.CUTOFF_TIME_STRATEGY = "TWO_SIGMA" cutoff_percentile = 5.0 expected_resp_time = util.load_from_disk( self.test_data_dir + "/channel/truncated_rt_5percent") expected_cutoff_time = 2 resp_time, cutoff_time = channel.response_time(self.log_data, self.nicks, self.nick_same_list, cutoff_percentile) assert resp_time == expected_resp_time, \ "Error in computing response time with 5% cutoff percentile." assert cutoff_time == expected_cutoff_time, \ "Error in computing RT cutoff with 5% cutoff percentile." cutoff_percentile = 10.0 expected_resp_time = util.load_from_disk( self.test_data_dir + "/channel/truncated_rt_10percent") expected_cutoff_time = 3 resp_time, cutoff_time = channel.response_time(self.log_data, self.nicks, self.nick_same_list, cutoff_percentile) assert resp_time == expected_resp_time, \ "Error in computing response time with 10% cutoff percentile." assert cutoff_time == expected_cutoff_time, \ "Error in computing RT cutoff with 10% cutoff percentile."
def test_response_time_from_channel_lib(self, log_data, nicks, nick_same_list): update_expected_output_directory(log_data) resp_time = channel.response_time(log_data, nicks, nick_same_list) expected_resp_time = [] csv_to_list(expected_output_directory + 'resp_time.csv', expected_resp_time) self.assertListEqual(resp_time, expected_resp_time, msg=None)
def test_conversation_characteristics_cutoff_1(self, mock_savefig): cutoff = 1 expected_result_conv_len = [ 0.45678248067618998, 1.9431782685053713, 0.0030314547153581827, 3.3570362370587976e-05 ] expected_result_resp_time = [ 0.26876242441433712, 0.38822996056503406, 0.0001355301591146847, 1.4291783519203551e-05 ] expected_result_conv_ref = [ 0.0031066946048193583, 0.0089793356687177077, 2.3338045062882878e-05, 8.2373085916393017e-08, 66 ] truncated_rt, rt_cutoff_time = channel.response_time( self.log_data_kubuntu_devel, self.nicks, self.nick_same_list, cutoff) conv_len, conv_ref_time = channel.conv_len_conv_refr_time( self.log_data_kubuntu_devel, self.nicks, self.nick_same_list, rt_cutoff_time, cutoff) expected_output_conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot( conv_len, self.current_directory, "conv_len_cutoff" + str(cutoff)) expected_output_resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot( truncated_rt, self.current_directory, "resp_time_cutoff" + str(cutoff)) expected_output_conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted( conv_ref_time, self.current_directory, "conv_ref_time_cutoff" + str(cutoff)) self.assertTrue(expected_output_conv_len_curve_fit_parameters, expected_result_conv_len) self.assertTrue(expected_output_resp_time_curve_fit_parameters, expected_result_resp_time) self.assertTrue(expected_output_conv_ref_time_curve_fit_parameters, expected_result_conv_ref)
def test_conversation_characteristics_cutoff_0(self, mock_savefig): cutoff = 0 expected_result_conv_len = [ 0.46025248575487415, 1.8745480617100398, 0.0014084453709393393, 1.8113237256968182e-05 ] expected_result_resp_time = [ 0.26599443483759627, 0.38817554962605116, 0.00012042990450484642, 1.1831364434688785e-05 ] expected_result_conv_ref = [ 0.004067315269095536, 0.01296093837728012, 1.761952783942606e-05, 5.6259486000435723e-08, 60 ] truncated_rt, rt_cutoff_time = channel.response_time( self.log_data_kubuntu_devel, self.nicks, self.nick_same_list, cutoff) conv_len, conv_ref_time = channel.conv_len_conv_refr_time( self.log_data_kubuntu_devel, self.nicks, self.nick_same_list, rt_cutoff_time, cutoff) expected_output_conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot( conv_len, self.current_directory, "conv_len_cutoff" + str(cutoff)) expected_output_resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot( truncated_rt, self.current_directory, "resp_time_cutoff" + str(cutoff)) expected_output_conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted( conv_ref_time, self.current_directory, "conv_ref_time_cutoff" + str(cutoff)) self.assertTrue(expected_output_conv_len_curve_fit_parameters, expected_result_conv_len) self.assertTrue(expected_output_resp_time_curve_fit_parameters, expected_result_resp_time) self.assertTrue(expected_output_conv_ref_time_curve_fit_parameters, expected_result_conv_ref)
def test_conversation_characteristics_cutoff_5(self, mock_savefig): cutoff = 5 expected_result_conv_len = [ 0.44916983849233633, 1.9156349592761313, 0.0048790728866266418, 4.3411589194639429e-05 ] expected_result_resp_time = [ 0.28001731891457893, 0.38845839930487419, 0.00020016446653020847, 2.896185549800808e-05 ] expected_result_conv_ref = [ 0.0019379564807119043, 0.0048940078069499857, 4.2070926227686924e-05, 1.7671895171226243e-07, 90 ] truncated_rt, rt_cutoff_time = channel.response_time( self.log_data_kubuntu_devel, self.nicks, self.nick_same_list, cutoff) conv_len, conv_ref_time = channel.conv_len_conv_refr_time( self.log_data_kubuntu_devel, self.nicks, self.nick_same_list, rt_cutoff_time, cutoff) expected_output_conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot( conv_len, self.current_directory, "conv_len_cutoff" + str(cutoff)) expected_output_resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot( truncated_rt, self.current_directory, "resp_time_cutoff" + str(cutoff)) expected_output_conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted( conv_ref_time, self.current_directory, "conv_ref_time_cutoff" + str(cutoff)) self.assertTrue(expected_output_conv_len_curve_fit_parameters, expected_result_conv_len) self.assertTrue(expected_output_resp_time_curve_fit_parameters, expected_result_resp_time) self.assertTrue(expected_output_conv_ref_time_curve_fit_parameters, expected_result_conv_ref)
def test_response_time_high_cutoff_percentile(self,mock_to_graph, mock_connected_nick_list, mock_nick_sen,\ mock_correct_last_char_list,mock_correct_last_char_CR, mock_rec_list_splice,\ mock_msg_line,mock_splice_find, mock_stat_dist, mock_truncate_table): # this test assumes config.CUTOFF_TIME_STRATEGY = "TWO_SIGMA" mock_to_graph.return_value = self.to_graph mock_connected_nick_list.side_effect = self.mock_create_connected_nick_list mock_nick_sen.side_effect = self.mock_get_nick_sen_rec mock_correct_last_char_list.side_effect = self.mock_correct_last_char_list mock_correct_last_char_CR.side_effect = self.mock_correctLastCharCR mock_rec_list_splice.side_effect = self.mock_rec_list_splice mock_msg_line.side_effect = self.mock_check_if_msg_line mock_splice_find.side_effect = self.mock_splice_find mock_truncate_table.side_effect = [ util.load_from_disk(self.test_data_dir + "/channel/rt_high_5_percent"), util.load_from_disk(self.test_data_dir + "/channel/rt_high_10_percent") ] cutoff_percentile = 5.0 expected_resp_time = util.load_from_disk( self.test_data_dir + "/channel/truncated_rt_5percent") expected_cutoff_time = 2 resp_time, cutoff_time = channel.response_time(self.log_data, self.nicks, self.nick_same_list, cutoff_percentile) self.assertEqual(resp_time, expected_resp_time, \ "Error in computing response time with 5% cutoff percentile.") self.assertEqual(cutoff_time, expected_cutoff_time, \ "Error in computing RT cutoff with 5% cutoff percentile.") cutoff_percentile = 10.0 expected_resp_time = util.load_from_disk( self.test_data_dir + "/channel/truncated_rt_10percent") expected_cutoff_time = 3 resp_time, cutoff_time = channel.response_time(self.log_data, self.nicks, self.nick_same_list, cutoff_percentile) self.assertEqual(resp_time, expected_resp_time, \ "Error in computing response time with 10% cutoff percentile.") self.assertEqual(cutoff_time, expected_cutoff_time, \ "Error in computing RT cutoff with 10% cutoff percentile.")
def test_response_time_low_cutoff_percentile(self): # this test assumes config.CUTOFF_TIME_STRATEGY = "TWO_SIGMA" cutoff_percentile = 0.0 expected_resp_time = util.load_from_disk(current_directory+ "/data/resp_time") expected_cutoff_time = 1 resp_time, cutoff_time = channel.response_time(self.log_data, self.nicks, self.nick_same_list, cutoff_percentile) assert resp_time == expected_resp_time, \ "Error in computing response time with 0% cutoff percentile." assert cutoff_time == expected_cutoff_time, \ "Error in computing RT cutoff with 0% cutoff percentile." cutoff_percentile = 1.0 expected_resp_time = util.load_from_disk(current_directory+ "/data/truncated_rt_1percent") expected_cutoff_time = 1 resp_time, cutoff_time = channel.response_time(self.log_data, self.nicks, self.nick_same_list, cutoff_percentile) assert resp_time == expected_resp_time, \ "Error in computing response time with 1% cutoff percentile." assert cutoff_time == expected_cutoff_time, \ "Error in computing RT cutoff with 1% cutoff percentile."
message_number_graph_day_list = network.message_number_graph( log_data, nicks, nick_same_list, True) degree_anal_message_numder = network.degree_analysis_on_graph( message_number_graph) message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True) message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False) out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv( log_data, nicks, nick_same_list) nick_change_graph_list = user.nick_change_graph(log_data, True) bin_matrix, total_messages = network.message_number_bins_csv( log_data, nicks, nick_same_list) conv_len, conv_ref_time = channel.conv_len_conv_refr_time( log_data, nicks, nick_same_list) resp_time = channel.response_time(log_data, nicks, nick_same_list) user.keywords_clusters(log_data, nicks, nick_same_list) network.degree_analysis_on_graph(message_number_graph) # adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net") # ============== OUTPUT ================ saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph") saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"], output_directory, "out_degree") saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"], output_directory, "in_degree") saver.save_csv(degree_anal_message_numder["total_degree"]["formatted_for_csv"], output_directory, "total_degree")
# ============== ANALYSIS ============= message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False) degree_anal_message_number = network.degree_analysis_on_graph(message_number_graph) bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list) data = [[i for i in range(len(bin_matrix[0]))]] data.append([sum(i) for i in zip(*bin_matrix)]) default_cutoff = config.CUTOFF_PERCENTILE percentiles = [0, 1, 5, 10, 20] for cutoff in percentiles: config.CUTOFF_PERCENTILE = cutoff truncated_rt, rt_cutoff_time = channel.response_time(log_data, nicks, nick_same_list, config.CUTOFF_PERCENTILE) conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list, rt_cutoff_time, config.CUTOFF_PERCENTILE) saver.save_csv(conv_len, output_directory, "conv_len-cutoff-" + str(cutoff)) saver.save_csv(truncated_rt, output_directory, "resp_time-cutoff-" + str(cutoff)) saver.save_csv(conv_ref_time, output_directory, "conv_ref_time-cutoff-" + str(cutoff)) conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(conv_len, output_directory, "conv_len_cutoff" + str(cutoff)) resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(truncated_rt, output_directory, "resp_time_cutoff" + str(cutoff)) conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(conv_ref_time, output_directory, "conv_ref_time_cutoff" + str(cutoff)) saver.save_csv( [["a","b","c", "MSE"], [conv_len_curve_fit_parameters]], output_directory,"conv_len_curve_fit_parameters-cutoff-" + str(cutoff)) saver.save_csv( [["a","b","c", "MSE"], [resp_time_curve_fit_parameters]], output_directory,"resp_time_curve_fit_parameters-cutoff-" + str(cutoff)) saver.save_csv( [["a","b","c", "MSE"], [conv_ref_time_curve_fit_parameters]], output_directory,"conv_ref_time_curve_fit_parameters-cutoff-"+str(cutoff)) config.CUTOFF_PERCENTILE = default_cutoff #revert back to default user.keywords_clusters(log_data, nicks, nick_same_list, output_directory, "keywords") network.degree_analysis_on_graph(message_number_graph)
# ============== INPUT================== log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date) nicks, nick_same_list = nickTracker.nick_tracker(log_data) # ============== ANALYSIS ============= message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False) message_number_graph_day_list = network.message_number_graph(log_data, nicks, nick_same_list, True) degree_anal_message_numder = network.degree_analysis_on_graph(message_number_graph) message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True) message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False) out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(log_data, nicks, nick_same_list) nick_change_graph_list = user.nick_change_graph(log_data, True) bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list) conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list) resp_time = channel.response_time(log_data, nicks, nick_same_list) user.keywords_clusters(log_data, nicks, nick_same_list) network.degree_analysis_on_graph(message_number_graph) # adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net") # ============== OUTPUT ================ saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph") saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"], output_directory, "out_degree") saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"], output_directory, "in_degree") saver.save_csv(degree_anal_message_numder["total_degree"]["formatted_for_csv"], output_directory, "total_degree") saver.save_csv(out_degree_node_number, output_directory, "node_out_degree" + starting_date +'-'+ending_date) saver.save_csv(in_degree_node_number, output_directory, "node_in_degree"+ starting_date +'-'+ending_date) saver.save_csv(total_degree_node_number, output_directory, "node_total_degree"+ starting_date +'-'+ending_date) saver.save_csv(bin_matrix, output_directory, "MessageNumber_binsize_"+str(config.BIN_LENGTH_MINS))
def correlational_CL_RT_CRT(log_directory, output_directory, start_date, end_date): """ Correlational : statistical distribution as illustrated by box plot for RT, CL, CRT parameters. The function takes the given time duration and selects one month at a time for generation of a degree distribution sample. Each degree distribution sample shall have 3 curve fit parameters namely a,b & c. The function collects these parameters for all the months of the given time duration. The function produces box plot separately for each parameter. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') percentiles = [0, 1, 5, 10, 20] for channel_name_iter in [["#kubuntu-devel"], ["#ubuntu-devel"], ["#kubuntu"]]: for cutoff in percentiles: conv_len_curve_fit_parameters = np.zeros((12, 4)) resp_time_curve_fit_parameters = np.zeros((12, 4)) conv_ref_time_curve_fit_parameters = np.zeros((12, 5)) for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month = dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data = reader.linux_input( log_directory, channel_name_iter, dt.strftime("%Y-%m-%d"), last_day_of_the_month.strftime("%Y-%m-%d")) nicks, nick_same_list = nickTracker.nick_tracker(log_data) default_cutoff = config.CUTOFF_PERCENTILE config.CUTOFF_PERCENTILE = cutoff truncated_rt, rt_cutoff_time = channel.response_time( log_data, nicks, nick_same_list, config.CUTOFF_PERCENTILE) conv_len, conv_ref_time = channel.conv_len_conv_refr_time( log_data, nicks, nick_same_list, rt_cutoff_time, config.CUTOFF_PERCENTILE) conv_len_curve_fit_parameters[ dt.month - 1] = vis.exponential_curve_fit_and_plot( conv_len, output_directory, "conv_len_cutoff" + str(cutoff)) resp_time_curve_fit_parameters[ dt.month - 1] = vis.exponential_curve_fit_and_plot( truncated_rt, output_directory, "resp_time_cutoff" + str(cutoff)) conv_ref_time_curve_fit_parameters[ dt.month - 1] = vis.exponential_curve_fit_and_plot_x_shifted( conv_ref_time, output_directory, "conv_ref_time_cutoff" + str(cutoff)) parameters = ['a', 'b', 'c'] for para_ind in range(len(parameters)): vis.box_plot( conv_len_curve_fit_parameters[:, para_ind], output_directory, "conv_len_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( resp_time_curve_fit_parameters[:, para_ind], output_directory, "resp_time_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( conv_ref_time_curve_fit_parameters[:, para_ind], output_directory, "conv_refr_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv( [conv_len_curve_fit_parameters[:, para_ind].tolist()], output_directory, "conv_len_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv( [resp_time_curve_fit_parameters[:, para_ind].tolist()], output_directory, "resp_time_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv( [conv_ref_time_curve_fit_parameters[:, para_ind].tolist()], output_directory, "conv_refr_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) config.CUTOFF_PERCENTILE = default_cutoff