示例#1
0
    def test_keyword_digest(self):

        nicks, nick_same_list = nickTracker.nick_tracker(self.log_data)

        user.keywords_clusters(self.log_data, nicks, nick_same_list, "./",
                               "temp_keywords")

        self.assertTrue(
            filecmp.cmp(self.out_dir + "temp_keywords.txt",
                        "temp_keywords.txt"))
        os.remove("temp_keywords.txt")
示例#2
0
    def test_keywords_clusters_expected_failure(self, mock_time,
                                                mock_extended_stop_words,
                                                mock_keywords):
        keywords_filtered = util.load_from_disk(
            self.current_directory +
            "/../../../data/user_test/keywords/keywords_filtered")
        user_keyword_freq_dict = util.load_from_disk(
            self.current_directory +
            "/../../../data/user_test/user_keyword_freq_dict")
        user_words_dict = util.load_from_disk(
            self.current_directory +
            "/../../../data/user_test/keywords/user_words_dict")
        nicks_for_stop_words = util.load_from_disk(
            self.current_directory +
            "/../../../data/user_test/keywords/nicks_for_stop_words")
        sorted_keywords_for_channels = util.load_from_disk(
            self.current_directory +
            "/../../../data/user_test/keywords/sorted_keywords_for_channels")

        mock_keywords.return_value = keywords_filtered, user_keyword_freq_dict, user_words_dict, nicks_for_stop_words, sorted_keywords_for_channels
        mock_extended_stop_words.return_value = util.load_from_disk(
            self.current_directory + "/data/user_test/extended_stop_words")
        mock_time.return_value = 0
        expected_captured_output = util.load_from_disk(
            self.current_directory +
            "/data/user_test/stdout_captured_output_keywords_clusters")

        captured_output = StringIO.StringIO()
        sys.stdout = captured_output
        user.keywords_clusters(self.log_data, self.nicks, self.nick_same_list,
                               self.current_directory + "/data/user_test/",
                               "temp_output_keywords_clusters")
        sys.stdout = sys.__stdout__
        output = captured_output.getvalue()
        captured_output.close()

        self.assertEqual(expected_captured_output, output)
        self.assertTrue(
            filecmp.cmp(
                self.current_directory +
                "/data/user_test/output_keywords_clusters.txt",
                self.current_directory +
                "/data/user_test/temp_output_keywords_clusters.txt"))
        os.remove(self.current_directory +
                  "/data/user_test/temp_output_keywords_clusters.txt")
示例#3
0
    def test_keywords_clusters_expected_failure(self, mock_time):
        mock_time.return_value = 0
        expected_captured_output = util.load_from_disk(
            self.test_data_dir + "stdout_captured_output_keywords_clusters")

        captured_output = StringIO.StringIO()
        sys.stdout = captured_output
        user.keywords_clusters(self.log_data, self.nicks, self.nick_same_list,
                               self.current_directory,
                               "temp_output_keywords_clusters")
        sys.stdout = sys.__stdout__
        output = captured_output.getvalue()
        captured_output.close()

        self.assertEqual(expected_captured_output, output)
        self.assertTrue(
            filecmp.cmp(
                self.test_data_dir + "output_keywords_clusters.txt",
                self.current_directory + "/temp_output_keywords_clusters.txt"))
        os.remove(self.current_directory +
                  "/temp_output_keywords_clusters.txt")
示例#4
0
degree_anal_message_numder = network.degree_analysis_on_graph(
    message_number_graph)
message_time_graph_list = network.message_time_graph(log_data, nicks,
                                                     nick_same_list, True)
message_time_graph = network.message_time_graph(log_data, nicks,
                                                nick_same_list, False)
out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(
    log_data, nicks, nick_same_list)
nick_change_graph_list = user.nick_change_graph(log_data, True)
bin_matrix, total_messages = network.message_number_bins_csv(
    log_data, nicks, nick_same_list)
conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
    log_data, nicks, nick_same_list)
resp_time = channel.response_time(log_data, nicks, nick_same_list)

user.keywords_clusters(log_data, nicks, nick_same_list)
network.degree_analysis_on_graph(message_number_graph)

# adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net")

# ============== OUTPUT ================
saver.draw_nx_graph(message_number_graph, output_directory,
                    "message_number_graph")
saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"],
               output_directory, "out_degree")
saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"],
               output_directory, "in_degree")
saver.save_csv(degree_anal_message_numder["total_degree"]["formatted_for_csv"],
               output_directory, "total_degree")
saver.save_csv(out_degree_node_number, output_directory,
               "node_out_degree" + starting_date + '-' + ending_date)
示例#5
0
    config.CUTOFF_PERCENTILE = cutoff
    truncated_rt, rt_cutoff_time = channel.response_time(log_data, nicks, nick_same_list, config.CUTOFF_PERCENTILE)
    conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list, rt_cutoff_time, config.CUTOFF_PERCENTILE)
    saver.save_csv(conv_len, output_directory, "conv_len-cutoff-" + str(cutoff))
    saver.save_csv(truncated_rt, output_directory, "resp_time-cutoff-" + str(cutoff))
    saver.save_csv(conv_ref_time, output_directory, "conv_ref_time-cutoff-" + str(cutoff))
    conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(conv_len, output_directory, "conv_len_cutoff" + str(cutoff))
    resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(truncated_rt, output_directory, "resp_time_cutoff" + str(cutoff))
    conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(conv_ref_time, output_directory, "conv_ref_time_cutoff" + str(cutoff))
    saver.save_csv( [["a","b","c", "MSE"], [conv_len_curve_fit_parameters]], output_directory,"conv_len_curve_fit_parameters-cutoff-" + str(cutoff))
    saver.save_csv( [["a","b","c", "MSE"], [resp_time_curve_fit_parameters]], output_directory,"resp_time_curve_fit_parameters-cutoff-" + str(cutoff))
    saver.save_csv( [["a","b","c", "MSE"], [conv_ref_time_curve_fit_parameters]], output_directory,"conv_ref_time_curve_fit_parameters-cutoff-"+str(cutoff))

config.CUTOFF_PERCENTILE = default_cutoff #revert back to default

user.keywords_clusters(log_data, nicks, nick_same_list, output_directory, "keywords")
network.degree_analysis_on_graph(message_number_graph)

threshold = config.THRESHOLD_MESSAGE_NUMBER_GRAPH #store original default config
cutoffs = [0, 10, 20]

for cutoff in cutoffs:
    config.THRESHOLD_MESSAGE_NUMBER_GRAPH = cutoff
    msg_graph_experts, top_hub, top_keyword_overlap, top_auth = network.identify_hubs_and_experts(log_data, nicks, nick_same_list)
    saver.draw_nx_graph (msg_graph_experts, output_directory, "hits-cutoff-"+str(cutoff))

config.THRESHOLD_MESSAGE_NUMBER_GRAPH = threshold #revert to default config

# ============== OUTPUT ================
saver.save_net_nx_graph (message_number_graph, output_directory, "message_number_graph")
saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph")
示例#6
0
log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date)
nicks, nick_same_list = nickTracker.nick_tracker(log_data)

# ============== ANALYSIS =============
message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False)
message_number_graph_day_list = network.message_number_graph(log_data, nicks, nick_same_list, True)
degree_anal_message_numder = network.degree_analysis_on_graph(message_number_graph)
message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True)
message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False)
out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(log_data, nicks, nick_same_list)
nick_change_graph_list =  user.nick_change_graph(log_data, True)
bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list)
conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list)
resp_time = channel.response_time(log_data, nicks, nick_same_list)

user.keywords_clusters(log_data, nicks, nick_same_list)
network.degree_analysis_on_graph(message_number_graph)

# adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net")

# ============== OUTPUT ================
saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph")
saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"], output_directory, "out_degree")
saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"], output_directory, "in_degree")
saver.save_csv(degree_anal_message_numder["total_degree"]["formatted_for_csv"], output_directory, "total_degree")
saver.save_csv(out_degree_node_number, output_directory, "node_out_degree" + starting_date +'-'+ending_date)
saver.save_csv(in_degree_node_number, output_directory, "node_in_degree"+ starting_date +'-'+ending_date)
saver.save_csv(total_degree_node_number, output_directory, "node_total_degree"+ starting_date +'-'+ending_date)
saver.save_csv(bin_matrix, output_directory, "MessageNumber_binsize_"+str(config.BIN_LENGTH_MINS))
for i in range(len(message_number_graph_day_list)):
    saver.draw_nx_graph(message_number_graph_day_list[i][0], output_directory, "mng" + str(i+1))