def correlational_activity(log_directory, output_directory, channel_name): pearson = [] for month in xrange(1, 12): log_data_m1 = reader.linux_input(log_directory, channel_name, "2013-" + str(month) + "-1", "2013-" + str(month) + "-31") nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) bin_matrix_m1, total_messages_m1 = network.message_number_bins_csv( log_data_m1, nicks_m1, nick_same_list_m1) monthly_sum_bins_m1 = [sum(i) for i in zip(*bin_matrix_m1)] log_data_m2 = reader.linux_input(log_directory, channel_name, "2013-" + str(month + 1) + "-1", "2013-" + str(month + 1) + "-31") nicks_m2, nick_same_list_m2 = nickTracker.nick_tracker(log_data_m2) bin_matrix_m2, total_messages_m2 = network.message_number_bins_csv( log_data_m2, nicks_m2, nick_same_list_m2) monthly_sum_bins_m2 = [sum(i) for i in zip(*bin_matrix_m2)] corr = np.corrcoef(monthly_sum_bins_m1, monthly_sum_bins_m2)[0, 1] print "\n----------------------------------" print "For months", month, "and", month + 1 print "Bins for M1:", monthly_sum_bins_m1 print "Bins for M2:", monthly_sum_bins_m2 print "Pearson correlation:", corr pearson.append(corr) vis.box_plot(pearson, output_directory, "pearson2013") saver.save_csv([pearson], output_directory, "pearson2013")
def correlational_activity(log_directory, output_directory, channel_name, start_date, end_date): """ The function selects a month in the given date range and creates heatmap bins for the current month and the next month. It then calculates the correlational calculates the correlational vectors between the two heatmaps and then produces a box plot for all the correlational coefficients of the months in the given date range. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') pearson = [] for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month1 = dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data_m1 = reader.linux_input( log_directory, channel_name, dt.strftime("%Y-%m-%d"), last_day_of_the_month1.strftime("%Y-%m-%d")) nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) bin_matrix_m1, total_messages_m1 = network.message_number_bins_csv( log_data_m1, nicks_m1, nick_same_list_m1) monthly_sum_bins_m1 = [sum(i) for i in zip(*bin_matrix_m1)] next_month_dt = dt + relativedelta(months=1) last_day_of_the_month2 = next_month_dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data_m2 = reader.linux_input( log_directory, channel_name, next_month_dt.strftime("%Y-%m-%d"), last_day_of_the_month2.strftime("%Y-%m-%d")) nicks_m2, nick_same_list_m2 = nickTracker.nick_tracker(log_data_m2) bin_matrix_m2, total_messages_m2 = network.message_number_bins_csv( log_data_m2, nicks_m2, nick_same_list_m2) monthly_sum_bins_m2 = [sum(i) for i in zip(*bin_matrix_m2)] corr = np.corrcoef(monthly_sum_bins_m1, monthly_sum_bins_m2)[0, 1] print "\n----------------------------------" print "For months", dt.month, "and", dt.month + 1 print "Bins for M1:", monthly_sum_bins_m1 print "Bins for M2:", monthly_sum_bins_m2 print "Pearson correlation:", corr pearson.append(corr) vis.box_plot(pearson, output_directory, "pearson2013") saver.save_csv([pearson], output_directory, "pearson2013")
def test_save_csv(self): matrix = [[1, 2, 3], [1, 2, 3], [1, 2, 3]] saver.save_csv(matrix, self.current_directory, 'test_save_csv') assert os.path.exists(self.current_directory + '/test_save_csv.csv') assert os.path.isfile(self.current_directory + '/test_save_csv.csv') filename = self.current_directory + '/test_save_csv.csv' with open(filename, 'rb') as f: reader = csv.reader(f, delimiter=',', quoting=csv.QUOTE_NONNUMERIC) try: for row in reader: assert row == [1.0, 2.0, 3.0] except csv.Error as e: sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e)) os.remove(self.current_directory + '/test_save_csv.csv')
def codelengths(log_directory, output_directory, channel_name): codelengths = [] for month in xrange(1, 13): log_data_m1 = reader.linux_input(log_directory, channel_name, "2013-" + str(month) + "-1", "2013-" + str(month) + "-31") nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) message_number_graph_m1 = network.message_number_graph( log_data_m1, nicks_m1, nick_same_list_m1, False) try: #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(month)) msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(month) + '.net') codelengths.append(msg_community.codelength) except: node_labels = message_number_graph_m1.nodes() labels = {} for label in node_labels: if label == "fos": labels[label] = "fos_" else: labels[label] = label message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1, labels) saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(month)) print "error in", month msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(month) + '.net') codelengths.append(msg_community.codelength) vis.box_plot(codelengths, output_directory, "codelengths2013") saver.save_csv([codelengths], output_directory, "codelengths2013")
def box_plot_for_degree(log_directory, output_directory, channel_name): cutoff = 0 for channel_name_iter in channel_name: out_degree_fit_parameters = np.zeros((12, 4)) in_degree_fit_parameters = np.zeros((12, 4)) total_degree_fit_parameters = np.zeros((12, 4)) for month in range(1, 13): log_data = reader.linux_input(log_directory, channel_name_iter, "2013-" + str(month) + "-1", "2013-" + str(month) + "-31") nicks, nick_same_list = nickTracker.nick_tracker(log_data) message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) degree_anal_message_number = network.degree_analysis_on_graph( message_number_graph) out_degree_fit_parameters[month - 1] = vis.generate_log_plots( degree_anal_message_number["out_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) in_degree_fit_parameters[month - 1] = vis.generate_log_plots( degree_anal_message_number["in_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) total_degree_fit_parameters[month - 1] = vis.generate_log_plots( degree_anal_message_number["total_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) parameters = ['slope', 'intercept', 'r_square'] for para_ind in range(len(parameters)): vis.box_plot( out_degree_fit_parameters[:, para_ind], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( in_degree_fit_parameters[:, para_ind], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( total_degree_fit_parameters[:, para_ind], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([out_degree_fit_parameters[:, para_ind].tolist()], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([in_degree_fit_parameters[:, para_ind].tolist()], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([total_degree_fit_parameters[:, para_ind].tolist()], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff))
nick_change_graph_list = user.nick_change_graph(log_data, True) bin_matrix, total_messages = network.message_number_bins_csv( log_data, nicks, nick_same_list) conv_len, conv_ref_time = channel.conv_len_conv_refr_time( log_data, nicks, nick_same_list) resp_time = channel.response_time(log_data, nicks, nick_same_list) user.keywords_clusters(log_data, nicks, nick_same_list) network.degree_analysis_on_graph(message_number_graph) # adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net") # ============== OUTPUT ================ saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph") saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"], output_directory, "out_degree") saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"], output_directory, "in_degree") saver.save_csv(degree_anal_message_numder["total_degree"]["formatted_for_csv"], output_directory, "total_degree") saver.save_csv(out_degree_node_number, output_directory, "node_out_degree" + starting_date + '-' + ending_date) saver.save_csv(in_degree_node_number, output_directory, "node_in_degree" + starting_date + '-' + ending_date) saver.save_csv(total_degree_node_number, output_directory, "node_total_degree" + starting_date + '-' + ending_date) saver.save_csv(bin_matrix, output_directory, "MessageNumber_binsize_" + str(config.BIN_LENGTH_MINS)) for i in range(len(message_number_graph_day_list)): saver.draw_nx_graph(message_number_graph_day_list[i][0], output_directory, "mng" + str(i + 1))
message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False) degree_anal_message_number = network.degree_analysis_on_graph(message_number_graph) bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list) data = [[i for i in range(len(bin_matrix[0]))]] data.append([sum(i) for i in zip(*bin_matrix)]) default_cutoff = config.CUTOFF_PERCENTILE percentiles = [0, 1, 5, 10, 20] for cutoff in percentiles: config.CUTOFF_PERCENTILE = cutoff truncated_rt, rt_cutoff_time = channel.response_time(log_data, nicks, nick_same_list, config.CUTOFF_PERCENTILE) conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list, rt_cutoff_time, config.CUTOFF_PERCENTILE) saver.save_csv(conv_len, output_directory, "conv_len-cutoff-" + str(cutoff)) saver.save_csv(truncated_rt, output_directory, "resp_time-cutoff-" + str(cutoff)) saver.save_csv(conv_ref_time, output_directory, "conv_ref_time-cutoff-" + str(cutoff)) conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(conv_len, output_directory, "conv_len_cutoff" + str(cutoff)) resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(truncated_rt, output_directory, "resp_time_cutoff" + str(cutoff)) conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(conv_ref_time, output_directory, "conv_ref_time_cutoff" + str(cutoff)) saver.save_csv( [["a","b","c", "MSE"], [conv_len_curve_fit_parameters]], output_directory,"conv_len_curve_fit_parameters-cutoff-" + str(cutoff)) saver.save_csv( [["a","b","c", "MSE"], [resp_time_curve_fit_parameters]], output_directory,"resp_time_curve_fit_parameters-cutoff-" + str(cutoff)) saver.save_csv( [["a","b","c", "MSE"], [conv_ref_time_curve_fit_parameters]], output_directory,"conv_ref_time_curve_fit_parameters-cutoff-"+str(cutoff)) config.CUTOFF_PERCENTILE = default_cutoff #revert back to default user.keywords_clusters(log_data, nicks, nick_same_list, output_directory, "keywords") network.degree_analysis_on_graph(message_number_graph) threshold = config.THRESHOLD_MESSAGE_NUMBER_GRAPH #store original default config
message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True) message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False) out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(log_data, nicks, nick_same_list) nick_change_graph_list = user.nick_change_graph(log_data, True) bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list) conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list) resp_time = channel.response_time(log_data, nicks, nick_same_list) user.keywords_clusters(log_data, nicks, nick_same_list) network.degree_analysis_on_graph(message_number_graph) # adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net") # ============== OUTPUT ================ saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph") saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"], output_directory, "out_degree") saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"], output_directory, "in_degree") saver.save_csv(degree_anal_message_numder["total_degree"]["formatted_for_csv"], output_directory, "total_degree") saver.save_csv(out_degree_node_number, output_directory, "node_out_degree" + starting_date +'-'+ending_date) saver.save_csv(in_degree_node_number, output_directory, "node_in_degree"+ starting_date +'-'+ending_date) saver.save_csv(total_degree_node_number, output_directory, "node_total_degree"+ starting_date +'-'+ending_date) saver.save_csv(bin_matrix, output_directory, "MessageNumber_binsize_"+str(config.BIN_LENGTH_MINS)) for i in range(len(message_number_graph_day_list)): saver.draw_nx_graph(message_number_graph_day_list[i][0], output_directory, "mng" + str(i+1)) for i in range(len(nick_change_graph_list)): saver.draw_nx_graph(nick_change_graph_list[i], output_directory, "ncg" + str(i+1)) saver.draw_nx_graph(message_number_graph, output_directory, "mnagg") saver.draw_nx_graph(message_time_graph, output_directory, "mtgagg") saver.save_csv(conv_len, output_directory, "conv_len") saver.save_csv(resp_time, output_directory, "resp_time")
def box_plot_for_degree(log_directory, output_directory, channel_name, start_date, end_date): """ Correlational : statistical distribution of curve fit parameters generated for degree distribution. The function takes the given time duration and selects one month at a time for generation of a degree distribution sample. Each degree distribution sample shall have 3 curve fit parameters namely slope, intercept & r_square. The function collects these parameters for all the months of the given time duration. The function produces box plot separately for each parameter. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done. start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') cutoff = 0 for channel_name_iter in channel_name: out_degree_fit_parameters = np.zeros((12, 4)) in_degree_fit_parameters = np.zeros((12, 4)) total_degree_fit_parameters = np.zeros((12, 4)) for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month = dt + relativedelta( months=1) - datetime.timedelta(days=1) # for month in range(1, 13): log_data = reader.linux_input( log_directory, channel_name_iter, dt.strftime("%Y-%m-%d"), last_day_of_the_month.strftime("%Y-%m-%d")) nicks, nick_same_list = nickTracker.nick_tracker(log_data) message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) degree_anal_message_number = network.degree_analysis_on_graph( message_number_graph) out_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots( degree_anal_message_number["out_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) in_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots( degree_anal_message_number["in_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) total_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots( degree_anal_message_number["total_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) parameters = ['slope', 'intercept', 'r_square'] for para_ind in range(len(parameters)): vis.box_plot( out_degree_fit_parameters[:, para_ind], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( in_degree_fit_parameters[:, para_ind], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( total_degree_fit_parameters[:, para_ind], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([out_degree_fit_parameters[:, para_ind].tolist()], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([in_degree_fit_parameters[:, para_ind].tolist()], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([total_degree_fit_parameters[:, para_ind].tolist()], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff))
def correlational_CL_RT_CRT(log_directory, output_directory, start_date, end_date): """ Correlational : statistical distribution as illustrated by box plot for RT, CL, CRT parameters. The function takes the given time duration and selects one month at a time for generation of a degree distribution sample. Each degree distribution sample shall have 3 curve fit parameters namely a,b & c. The function collects these parameters for all the months of the given time duration. The function produces box plot separately for each parameter. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') percentiles = [0, 1, 5, 10, 20] for channel_name_iter in [["#kubuntu-devel"], ["#ubuntu-devel"], ["#kubuntu"]]: for cutoff in percentiles: conv_len_curve_fit_parameters = np.zeros((12, 4)) resp_time_curve_fit_parameters = np.zeros((12, 4)) conv_ref_time_curve_fit_parameters = np.zeros((12, 5)) for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month = dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data = reader.linux_input( log_directory, channel_name_iter, dt.strftime("%Y-%m-%d"), last_day_of_the_month.strftime("%Y-%m-%d")) nicks, nick_same_list = nickTracker.nick_tracker(log_data) default_cutoff = config.CUTOFF_PERCENTILE config.CUTOFF_PERCENTILE = cutoff truncated_rt, rt_cutoff_time = channel.response_time( log_data, nicks, nick_same_list, config.CUTOFF_PERCENTILE) conv_len, conv_ref_time = channel.conv_len_conv_refr_time( log_data, nicks, nick_same_list, rt_cutoff_time, config.CUTOFF_PERCENTILE) conv_len_curve_fit_parameters[ dt.month - 1] = vis.exponential_curve_fit_and_plot( conv_len, output_directory, "conv_len_cutoff" + str(cutoff)) resp_time_curve_fit_parameters[ dt.month - 1] = vis.exponential_curve_fit_and_plot( truncated_rt, output_directory, "resp_time_cutoff" + str(cutoff)) conv_ref_time_curve_fit_parameters[ dt.month - 1] = vis.exponential_curve_fit_and_plot_x_shifted( conv_ref_time, output_directory, "conv_ref_time_cutoff" + str(cutoff)) parameters = ['a', 'b', 'c'] for para_ind in range(len(parameters)): vis.box_plot( conv_len_curve_fit_parameters[:, para_ind], output_directory, "conv_len_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( resp_time_curve_fit_parameters[:, para_ind], output_directory, "resp_time_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( conv_ref_time_curve_fit_parameters[:, para_ind], output_directory, "conv_refr_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv( [conv_len_curve_fit_parameters[:, para_ind].tolist()], output_directory, "conv_len_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv( [resp_time_curve_fit_parameters[:, para_ind].tolist()], output_directory, "resp_time_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv( [conv_ref_time_curve_fit_parameters[:, para_ind].tolist()], output_directory, "conv_refr_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) config.CUTOFF_PERCENTILE = default_cutoff
def codelengths(log_directory, output_directory, channel_name, start_date, end_date): """ The function iterate through the months in the given date range and computes the infomap number. It then plots a box plot for the infomap numbers of all the whole months in the given time period. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') codelengths = [] for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month1 = dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data_m1 = reader.linux_input( log_directory, channel_name, dt.strftime("%Y-%m-%d"), last_day_of_the_month1.strftime("%Y-%m-%d")) nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) message_number_graph_m1 = network.message_number_graph( log_data_m1, nicks_m1, nick_same_list_m1, False) try: #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(dt.month)) msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(dt.month) + '.net') codelengths.append(msg_community.codelength) except: node_labels = message_number_graph_m1.nodes() labels = {} for label in node_labels: if label == "fos": labels[label] = "fos_" else: labels[label] = label message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1, labels) saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(dt.month)) print "error in", dt.month msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(dt.month) + '.net') codelengths.append(msg_community.codelength) vis.box_plot(codelengths, output_directory, "codelengths2013") saver.save_csv([codelengths], output_directory, "codelengths2013")
exec_times_file.flush() # ============== INPUT================== log_data = reader.linux_input_slack(log_directory, starting_date, ending_date) nicks, nick_same_list = nickTracker.nick_tracker(log_data) print("reading log files completed at: ", datetime.datetime.now(), file=exec_times_file) exec_times_file.flush() # ============== MESSAGE BINS HEATMAP ============= bin_matrix, total_messages = network.message_number_bins_csv( log_data, nicks, nick_same_list) data = [[i for i in range(len(bin_matrix[0]))]] data.append([sum(i) for i in zip(*bin_matrix)]) saver.save_csv(bin_matrix, output_directory, "MessageNumber_binsize_" + str(config.BIN_LENGTH_MINS)) vis.plot_data(data, output_directory, "bins") print("msg bins completed at: ", datetime.datetime.now(), file=exec_times_file) exec_times_file.flush() del bin_matrix, total_messages, data gc.collect() print("msg bins gc completed at: ", datetime.datetime.now(), file=exec_times_file) exec_times_file.flush() # ============== CONVERSATION CHARACTERISTICS ============= default_cutoff = config.CUTOFF_PERCENTILE #percentiles = [0, 1, 5, 10, 20]