示例#1
0
    def test_response_time_high_cutoff_percentile(self):
        # this test assumes config.CUTOFF_TIME_STRATEGY = "TWO_SIGMA"

        cutoff_percentile = 5.0
        expected_resp_time = util.load_from_disk(
            self.test_data_dir + "/channel/truncated_rt_5percent")
        expected_cutoff_time = 2
        resp_time, cutoff_time = channel.response_time(self.log_data,
                                                       self.nicks,
                                                       self.nick_same_list,
                                                       cutoff_percentile)
        assert resp_time == expected_resp_time, \
                "Error in computing response time with 5% cutoff percentile."
        assert cutoff_time == expected_cutoff_time, \
                "Error in computing RT cutoff with 5% cutoff percentile."

        cutoff_percentile = 10.0
        expected_resp_time = util.load_from_disk(
            self.test_data_dir + "/channel/truncated_rt_10percent")
        expected_cutoff_time = 3
        resp_time, cutoff_time = channel.response_time(self.log_data,
                                                       self.nicks,
                                                       self.nick_same_list,
                                                       cutoff_percentile)
        assert resp_time == expected_resp_time, \
                "Error in computing response time with 10% cutoff percentile."
        assert cutoff_time == expected_cutoff_time, \
                "Error in computing RT cutoff with 10% cutoff percentile."
    def test_response_time_from_channel_lib(self, log_data, nicks, nick_same_list):
        update_expected_output_directory(log_data)
        resp_time = channel.response_time(log_data, nicks, nick_same_list)

        expected_resp_time = []
        csv_to_list(expected_output_directory + 'resp_time.csv', expected_resp_time)
        self.assertListEqual(resp_time, expected_resp_time, msg=None)
示例#3
0
    def test_response_time_from_channel_lib(self, log_data, nicks, nick_same_list):
        update_expected_output_directory(log_data)
        resp_time = channel.response_time(log_data, nicks, nick_same_list)

        expected_resp_time = []
        csv_to_list(expected_output_directory + 'resp_time.csv', expected_resp_time)
        self.assertListEqual(resp_time, expected_resp_time, msg=None)
    def test_conversation_characteristics_cutoff_1(self, mock_savefig):
        cutoff = 1
        expected_result_conv_len = [
            0.45678248067618998, 1.9431782685053713, 0.0030314547153581827,
            3.3570362370587976e-05
        ]
        expected_result_resp_time = [
            0.26876242441433712, 0.38822996056503406, 0.0001355301591146847,
            1.4291783519203551e-05
        ]
        expected_result_conv_ref = [
            0.0031066946048193583, 0.0089793356687177077,
            2.3338045062882878e-05, 8.2373085916393017e-08, 66
        ]

        truncated_rt, rt_cutoff_time = channel.response_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            cutoff)
        conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            rt_cutoff_time, cutoff)
        expected_output_conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            conv_len, self.current_directory, "conv_len_cutoff" + str(cutoff))
        expected_output_resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            truncated_rt, self.current_directory,
            "resp_time_cutoff" + str(cutoff))
        expected_output_conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(
            conv_ref_time, self.current_directory,
            "conv_ref_time_cutoff" + str(cutoff))
        self.assertTrue(expected_output_conv_len_curve_fit_parameters,
                        expected_result_conv_len)
        self.assertTrue(expected_output_resp_time_curve_fit_parameters,
                        expected_result_resp_time)
        self.assertTrue(expected_output_conv_ref_time_curve_fit_parameters,
                        expected_result_conv_ref)
    def test_conversation_characteristics_cutoff_0(self, mock_savefig):
        cutoff = 0
        expected_result_conv_len = [
            0.46025248575487415, 1.8745480617100398, 0.0014084453709393393,
            1.8113237256968182e-05
        ]
        expected_result_resp_time = [
            0.26599443483759627, 0.38817554962605116, 0.00012042990450484642,
            1.1831364434688785e-05
        ]
        expected_result_conv_ref = [
            0.004067315269095536, 0.01296093837728012, 1.761952783942606e-05,
            5.6259486000435723e-08, 60
        ]

        truncated_rt, rt_cutoff_time = channel.response_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            cutoff)
        conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            rt_cutoff_time, cutoff)
        expected_output_conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            conv_len, self.current_directory, "conv_len_cutoff" + str(cutoff))
        expected_output_resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            truncated_rt, self.current_directory,
            "resp_time_cutoff" + str(cutoff))
        expected_output_conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(
            conv_ref_time, self.current_directory,
            "conv_ref_time_cutoff" + str(cutoff))
        self.assertTrue(expected_output_conv_len_curve_fit_parameters,
                        expected_result_conv_len)
        self.assertTrue(expected_output_resp_time_curve_fit_parameters,
                        expected_result_resp_time)
        self.assertTrue(expected_output_conv_ref_time_curve_fit_parameters,
                        expected_result_conv_ref)
    def test_conversation_characteristics_cutoff_5(self, mock_savefig):
        cutoff = 5
        expected_result_conv_len = [
            0.44916983849233633, 1.9156349592761313, 0.0048790728866266418,
            4.3411589194639429e-05
        ]
        expected_result_resp_time = [
            0.28001731891457893, 0.38845839930487419, 0.00020016446653020847,
            2.896185549800808e-05
        ]
        expected_result_conv_ref = [
            0.0019379564807119043, 0.0048940078069499857,
            4.2070926227686924e-05, 1.7671895171226243e-07, 90
        ]

        truncated_rt, rt_cutoff_time = channel.response_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            cutoff)
        conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            rt_cutoff_time, cutoff)
        expected_output_conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            conv_len, self.current_directory, "conv_len_cutoff" + str(cutoff))
        expected_output_resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            truncated_rt, self.current_directory,
            "resp_time_cutoff" + str(cutoff))
        expected_output_conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(
            conv_ref_time, self.current_directory,
            "conv_ref_time_cutoff" + str(cutoff))
        self.assertTrue(expected_output_conv_len_curve_fit_parameters,
                        expected_result_conv_len)
        self.assertTrue(expected_output_resp_time_curve_fit_parameters,
                        expected_result_resp_time)
        self.assertTrue(expected_output_conv_ref_time_curve_fit_parameters,
                        expected_result_conv_ref)
示例#7
0
    def test_response_time_high_cutoff_percentile(self,mock_to_graph, mock_connected_nick_list, mock_nick_sen,\
                                     mock_correct_last_char_list,mock_correct_last_char_CR, mock_rec_list_splice,\
                                     mock_msg_line,mock_splice_find, mock_stat_dist, mock_truncate_table):

        # this test assumes config.CUTOFF_TIME_STRATEGY = "TWO_SIGMA"
        mock_to_graph.return_value = self.to_graph
        mock_connected_nick_list.side_effect = self.mock_create_connected_nick_list
        mock_nick_sen.side_effect = self.mock_get_nick_sen_rec
        mock_correct_last_char_list.side_effect = self.mock_correct_last_char_list
        mock_correct_last_char_CR.side_effect = self.mock_correctLastCharCR
        mock_rec_list_splice.side_effect = self.mock_rec_list_splice
        mock_msg_line.side_effect = self.mock_check_if_msg_line
        mock_splice_find.side_effect = self.mock_splice_find
        mock_truncate_table.side_effect = [
            util.load_from_disk(self.test_data_dir +
                                "/channel/rt_high_5_percent"),
            util.load_from_disk(self.test_data_dir +
                                "/channel/rt_high_10_percent")
        ]

        cutoff_percentile = 5.0
        expected_resp_time = util.load_from_disk(
            self.test_data_dir + "/channel/truncated_rt_5percent")
        expected_cutoff_time = 2
        resp_time, cutoff_time = channel.response_time(self.log_data,
                                                       self.nicks,
                                                       self.nick_same_list,
                                                       cutoff_percentile)
        self.assertEqual(resp_time, expected_resp_time, \
                "Error in computing response time with 5% cutoff percentile.")
        self.assertEqual(cutoff_time, expected_cutoff_time, \
                "Error in computing RT cutoff with 5% cutoff percentile.")

        cutoff_percentile = 10.0
        expected_resp_time = util.load_from_disk(
            self.test_data_dir + "/channel/truncated_rt_10percent")
        expected_cutoff_time = 3
        resp_time, cutoff_time = channel.response_time(self.log_data,
                                                       self.nicks,
                                                       self.nick_same_list,
                                                       cutoff_percentile)
        self.assertEqual(resp_time, expected_resp_time, \
                "Error in computing response time with 10% cutoff percentile.")
        self.assertEqual(cutoff_time, expected_cutoff_time, \
                "Error in computing RT cutoff with 10% cutoff percentile.")
    def test_response_time_low_cutoff_percentile(self):
        # this test assumes config.CUTOFF_TIME_STRATEGY = "TWO_SIGMA"

        cutoff_percentile = 0.0
        expected_resp_time = util.load_from_disk(current_directory+ "/data/resp_time")
        expected_cutoff_time = 1
        resp_time, cutoff_time = channel.response_time(self.log_data, self.nicks,
                                          self.nick_same_list, cutoff_percentile)
        assert resp_time == expected_resp_time, \
                "Error in computing response time with 0% cutoff percentile."
        assert cutoff_time == expected_cutoff_time, \
                "Error in computing RT cutoff with 0% cutoff percentile."

        cutoff_percentile = 1.0
        expected_resp_time = util.load_from_disk(current_directory+ "/data/truncated_rt_1percent")
        expected_cutoff_time = 1
        resp_time, cutoff_time = channel.response_time(self.log_data, self.nicks,
                                          self.nick_same_list, cutoff_percentile)
        assert resp_time == expected_resp_time, \
                "Error in computing response time with 1% cutoff percentile."
        assert cutoff_time == expected_cutoff_time, \
                "Error in computing RT cutoff with 1% cutoff percentile."
示例#9
0
message_number_graph_day_list = network.message_number_graph(
    log_data, nicks, nick_same_list, True)
degree_anal_message_numder = network.degree_analysis_on_graph(
    message_number_graph)
message_time_graph_list = network.message_time_graph(log_data, nicks,
                                                     nick_same_list, True)
message_time_graph = network.message_time_graph(log_data, nicks,
                                                nick_same_list, False)
out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(
    log_data, nicks, nick_same_list)
nick_change_graph_list = user.nick_change_graph(log_data, True)
bin_matrix, total_messages = network.message_number_bins_csv(
    log_data, nicks, nick_same_list)
conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
    log_data, nicks, nick_same_list)
resp_time = channel.response_time(log_data, nicks, nick_same_list)

user.keywords_clusters(log_data, nicks, nick_same_list)
network.degree_analysis_on_graph(message_number_graph)

# adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net")

# ============== OUTPUT ================
saver.draw_nx_graph(message_number_graph, output_directory,
                    "message_number_graph")
saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"],
               output_directory, "out_degree")
saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"],
               output_directory, "in_degree")
saver.save_csv(degree_anal_message_numder["total_degree"]["formatted_for_csv"],
               output_directory, "total_degree")
示例#10
0
# ============== ANALYSIS =============
message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False)

degree_anal_message_number = network.degree_analysis_on_graph(message_number_graph)

bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list)
data = [[i for i in range(len(bin_matrix[0]))]]
data.append([sum(i) for i in zip(*bin_matrix)])

default_cutoff = config.CUTOFF_PERCENTILE
percentiles = [0, 1, 5, 10, 20]

for cutoff in percentiles:
    config.CUTOFF_PERCENTILE = cutoff
    truncated_rt, rt_cutoff_time = channel.response_time(log_data, nicks, nick_same_list, config.CUTOFF_PERCENTILE)
    conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list, rt_cutoff_time, config.CUTOFF_PERCENTILE)
    saver.save_csv(conv_len, output_directory, "conv_len-cutoff-" + str(cutoff))
    saver.save_csv(truncated_rt, output_directory, "resp_time-cutoff-" + str(cutoff))
    saver.save_csv(conv_ref_time, output_directory, "conv_ref_time-cutoff-" + str(cutoff))
    conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(conv_len, output_directory, "conv_len_cutoff" + str(cutoff))
    resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(truncated_rt, output_directory, "resp_time_cutoff" + str(cutoff))
    conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(conv_ref_time, output_directory, "conv_ref_time_cutoff" + str(cutoff))
    saver.save_csv( [["a","b","c", "MSE"], [conv_len_curve_fit_parameters]], output_directory,"conv_len_curve_fit_parameters-cutoff-" + str(cutoff))
    saver.save_csv( [["a","b","c", "MSE"], [resp_time_curve_fit_parameters]], output_directory,"resp_time_curve_fit_parameters-cutoff-" + str(cutoff))
    saver.save_csv( [["a","b","c", "MSE"], [conv_ref_time_curve_fit_parameters]], output_directory,"conv_ref_time_curve_fit_parameters-cutoff-"+str(cutoff))

config.CUTOFF_PERCENTILE = default_cutoff #revert back to default

user.keywords_clusters(log_data, nicks, nick_same_list, output_directory, "keywords")
network.degree_analysis_on_graph(message_number_graph)
示例#11
0
# ============== INPUT==================
log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date)
nicks, nick_same_list = nickTracker.nick_tracker(log_data)

# ============== ANALYSIS =============
message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False)
message_number_graph_day_list = network.message_number_graph(log_data, nicks, nick_same_list, True)
degree_anal_message_numder = network.degree_analysis_on_graph(message_number_graph)
message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True)
message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False)
out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(log_data, nicks, nick_same_list)
nick_change_graph_list =  user.nick_change_graph(log_data, True)
bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list)
conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list)
resp_time = channel.response_time(log_data, nicks, nick_same_list)

user.keywords_clusters(log_data, nicks, nick_same_list)
network.degree_analysis_on_graph(message_number_graph)

# adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net")

# ============== OUTPUT ================
saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph")
saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"], output_directory, "out_degree")
saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"], output_directory, "in_degree")
saver.save_csv(degree_anal_message_numder["total_degree"]["formatted_for_csv"], output_directory, "total_degree")
saver.save_csv(out_degree_node_number, output_directory, "node_out_degree" + starting_date +'-'+ending_date)
saver.save_csv(in_degree_node_number, output_directory, "node_in_degree"+ starting_date +'-'+ending_date)
saver.save_csv(total_degree_node_number, output_directory, "node_total_degree"+ starting_date +'-'+ending_date)
saver.save_csv(bin_matrix, output_directory, "MessageNumber_binsize_"+str(config.BIN_LENGTH_MINS))
示例#12
0
def correlational_CL_RT_CRT(log_directory, output_directory, start_date,
                            end_date):
    """
        Correlational : statistical distribution as illustrated by box plot for RT, CL, CRT parameters. The function
        takes the given time duration and selects one month at a time for generation of a degree distribution sample. Each
        degree distribution sample shall have 3 curve fit parameters namely a,b & c. The function collects these parameters
        for all the months of the given time duration. The function produces box plot separately for each parameter.


    Args:
        log_directory(str): path to the location of Logs
        output_directory(str):  path to the location where the results are to be stored
        channel_name(list): channels for which the analysis is to be done
        start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month.
        end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month.

    Returns:
       null

    """
    start_date = start_date.strptime('%Y-%m-%d')
    end_date = end_date.strptime('%Y-%m-%d')
    percentiles = [0, 1, 5, 10, 20]
    for channel_name_iter in [["#kubuntu-devel"], ["#ubuntu-devel"],
                              ["#kubuntu"]]:
        for cutoff in percentiles:
            conv_len_curve_fit_parameters = np.zeros((12, 4))
            resp_time_curve_fit_parameters = np.zeros((12, 4))
            conv_ref_time_curve_fit_parameters = np.zeros((12, 5))
            for dt in rrule(MONTHLY, dtstart=start_date, until=end_date):
                last_day_of_the_month = dt + relativedelta(
                    months=1) - datetime.timedelta(days=1)

                log_data = reader.linux_input(
                    log_directory, channel_name_iter, dt.strftime("%Y-%m-%d"),
                    last_day_of_the_month.strftime("%Y-%m-%d"))
                nicks, nick_same_list = nickTracker.nick_tracker(log_data)
                default_cutoff = config.CUTOFF_PERCENTILE

                config.CUTOFF_PERCENTILE = cutoff
                truncated_rt, rt_cutoff_time = channel.response_time(
                    log_data, nicks, nick_same_list, config.CUTOFF_PERCENTILE)
                conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
                    log_data, nicks, nick_same_list, rt_cutoff_time,
                    config.CUTOFF_PERCENTILE)
                conv_len_curve_fit_parameters[
                    dt.month - 1] = vis.exponential_curve_fit_and_plot(
                        conv_len, output_directory,
                        "conv_len_cutoff" + str(cutoff))
                resp_time_curve_fit_parameters[
                    dt.month - 1] = vis.exponential_curve_fit_and_plot(
                        truncated_rt, output_directory,
                        "resp_time_cutoff" + str(cutoff))
                conv_ref_time_curve_fit_parameters[
                    dt.month -
                    1] = vis.exponential_curve_fit_and_plot_x_shifted(
                        conv_ref_time, output_directory,
                        "conv_ref_time_cutoff" + str(cutoff))

            parameters = ['a', 'b', 'c']
            for para_ind in range(len(parameters)):
                vis.box_plot(
                    conv_len_curve_fit_parameters[:, para_ind],
                    output_directory, "conv_len_" + str(parameters[para_ind]) +
                    "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff))
                vis.box_plot(
                    resp_time_curve_fit_parameters[:,
                                                   para_ind], output_directory,
                    "resp_time_" + str(parameters[para_ind]) + "_2013_" +
                    channel_name_iter[0] + "_cut_" + str(cutoff))
                vis.box_plot(
                    conv_ref_time_curve_fit_parameters[:, para_ind],
                    output_directory,
                    "conv_refr_" + str(parameters[para_ind]) + "_2013_" +
                    channel_name_iter[0] + "_cut_" + str(cutoff))

                saver.save_csv(
                    [conv_len_curve_fit_parameters[:, para_ind].tolist()],
                    output_directory, "conv_len_" + str(parameters[para_ind]) +
                    "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff))
                saver.save_csv(
                    [resp_time_curve_fit_parameters[:, para_ind].tolist()],
                    output_directory,
                    "resp_time_" + str(parameters[para_ind]) + "_2013_" +
                    channel_name_iter[0] + "_cut_" + str(cutoff))
                saver.save_csv(
                    [conv_ref_time_curve_fit_parameters[:, para_ind].tolist()],
                    output_directory,
                    "conv_refr_" + str(parameters[para_ind]) + "_2013_" +
                    channel_name_iter[0] + "_cut_" + str(cutoff))

    config.CUTOFF_PERCENTILE = default_cutoff