def test_keyword_digest(self): nicks, nick_same_list = nickTracker.nick_tracker(self.log_data) user.keywords_clusters(self.log_data, nicks, nick_same_list, "./", "temp_keywords") self.assertTrue( filecmp.cmp(self.out_dir + "temp_keywords.txt", "temp_keywords.txt")) os.remove("temp_keywords.txt")
def test_keywords_clusters_expected_failure(self, mock_time, mock_extended_stop_words, mock_keywords): keywords_filtered = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/keywords_filtered") user_keyword_freq_dict = util.load_from_disk( self.current_directory + "/../../../data/user_test/user_keyword_freq_dict") user_words_dict = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/user_words_dict") nicks_for_stop_words = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/nicks_for_stop_words") sorted_keywords_for_channels = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/sorted_keywords_for_channels") mock_keywords.return_value = keywords_filtered, user_keyword_freq_dict, user_words_dict, nicks_for_stop_words, sorted_keywords_for_channels mock_extended_stop_words.return_value = util.load_from_disk( self.current_directory + "/data/user_test/extended_stop_words") mock_time.return_value = 0 expected_captured_output = util.load_from_disk( self.current_directory + "/data/user_test/stdout_captured_output_keywords_clusters") captured_output = StringIO.StringIO() sys.stdout = captured_output user.keywords_clusters(self.log_data, self.nicks, self.nick_same_list, self.current_directory + "/data/user_test/", "temp_output_keywords_clusters") sys.stdout = sys.__stdout__ output = captured_output.getvalue() captured_output.close() self.assertEqual(expected_captured_output, output) self.assertTrue( filecmp.cmp( self.current_directory + "/data/user_test/output_keywords_clusters.txt", self.current_directory + "/data/user_test/temp_output_keywords_clusters.txt")) os.remove(self.current_directory + "/data/user_test/temp_output_keywords_clusters.txt")
def test_keywords_clusters_expected_failure(self, mock_time): mock_time.return_value = 0 expected_captured_output = util.load_from_disk( self.test_data_dir + "stdout_captured_output_keywords_clusters") captured_output = StringIO.StringIO() sys.stdout = captured_output user.keywords_clusters(self.log_data, self.nicks, self.nick_same_list, self.current_directory, "temp_output_keywords_clusters") sys.stdout = sys.__stdout__ output = captured_output.getvalue() captured_output.close() self.assertEqual(expected_captured_output, output) self.assertTrue( filecmp.cmp( self.test_data_dir + "output_keywords_clusters.txt", self.current_directory + "/temp_output_keywords_clusters.txt")) os.remove(self.current_directory + "/temp_output_keywords_clusters.txt")
degree_anal_message_numder = network.degree_analysis_on_graph( message_number_graph) message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True) message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False) out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv( log_data, nicks, nick_same_list) nick_change_graph_list = user.nick_change_graph(log_data, True) bin_matrix, total_messages = network.message_number_bins_csv( log_data, nicks, nick_same_list) conv_len, conv_ref_time = channel.conv_len_conv_refr_time( log_data, nicks, nick_same_list) resp_time = channel.response_time(log_data, nicks, nick_same_list) user.keywords_clusters(log_data, nicks, nick_same_list) network.degree_analysis_on_graph(message_number_graph) # adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net") # ============== OUTPUT ================ saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph") saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"], output_directory, "out_degree") saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"], output_directory, "in_degree") saver.save_csv(degree_anal_message_numder["total_degree"]["formatted_for_csv"], output_directory, "total_degree") saver.save_csv(out_degree_node_number, output_directory, "node_out_degree" + starting_date + '-' + ending_date)
config.CUTOFF_PERCENTILE = cutoff truncated_rt, rt_cutoff_time = channel.response_time(log_data, nicks, nick_same_list, config.CUTOFF_PERCENTILE) conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list, rt_cutoff_time, config.CUTOFF_PERCENTILE) saver.save_csv(conv_len, output_directory, "conv_len-cutoff-" + str(cutoff)) saver.save_csv(truncated_rt, output_directory, "resp_time-cutoff-" + str(cutoff)) saver.save_csv(conv_ref_time, output_directory, "conv_ref_time-cutoff-" + str(cutoff)) conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(conv_len, output_directory, "conv_len_cutoff" + str(cutoff)) resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(truncated_rt, output_directory, "resp_time_cutoff" + str(cutoff)) conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(conv_ref_time, output_directory, "conv_ref_time_cutoff" + str(cutoff)) saver.save_csv( [["a","b","c", "MSE"], [conv_len_curve_fit_parameters]], output_directory,"conv_len_curve_fit_parameters-cutoff-" + str(cutoff)) saver.save_csv( [["a","b","c", "MSE"], [resp_time_curve_fit_parameters]], output_directory,"resp_time_curve_fit_parameters-cutoff-" + str(cutoff)) saver.save_csv( [["a","b","c", "MSE"], [conv_ref_time_curve_fit_parameters]], output_directory,"conv_ref_time_curve_fit_parameters-cutoff-"+str(cutoff)) config.CUTOFF_PERCENTILE = default_cutoff #revert back to default user.keywords_clusters(log_data, nicks, nick_same_list, output_directory, "keywords") network.degree_analysis_on_graph(message_number_graph) threshold = config.THRESHOLD_MESSAGE_NUMBER_GRAPH #store original default config cutoffs = [0, 10, 20] for cutoff in cutoffs: config.THRESHOLD_MESSAGE_NUMBER_GRAPH = cutoff msg_graph_experts, top_hub, top_keyword_overlap, top_auth = network.identify_hubs_and_experts(log_data, nicks, nick_same_list) saver.draw_nx_graph (msg_graph_experts, output_directory, "hits-cutoff-"+str(cutoff)) config.THRESHOLD_MESSAGE_NUMBER_GRAPH = threshold #revert to default config # ============== OUTPUT ================ saver.save_net_nx_graph (message_number_graph, output_directory, "message_number_graph") saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph")
log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date) nicks, nick_same_list = nickTracker.nick_tracker(log_data) # ============== ANALYSIS ============= message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False) message_number_graph_day_list = network.message_number_graph(log_data, nicks, nick_same_list, True) degree_anal_message_numder = network.degree_analysis_on_graph(message_number_graph) message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True) message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False) out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(log_data, nicks, nick_same_list) nick_change_graph_list = user.nick_change_graph(log_data, True) bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list) conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list) resp_time = channel.response_time(log_data, nicks, nick_same_list) user.keywords_clusters(log_data, nicks, nick_same_list) network.degree_analysis_on_graph(message_number_graph) # adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net") # ============== OUTPUT ================ saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph") saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"], output_directory, "out_degree") saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"], output_directory, "in_degree") saver.save_csv(degree_anal_message_numder["total_degree"]["formatted_for_csv"], output_directory, "total_degree") saver.save_csv(out_degree_node_number, output_directory, "node_out_degree" + starting_date +'-'+ending_date) saver.save_csv(in_degree_node_number, output_directory, "node_in_degree"+ starting_date +'-'+ending_date) saver.save_csv(total_degree_node_number, output_directory, "node_total_degree"+ starting_date +'-'+ending_date) saver.save_csv(bin_matrix, output_directory, "MessageNumber_binsize_"+str(config.BIN_LENGTH_MINS)) for i in range(len(message_number_graph_day_list)): saver.draw_nx_graph(message_number_graph_day_list[i][0], output_directory, "mng" + str(i+1))