def main(): print("Loading...") fullset = common.load_data(FULLSET_PATH, sep=',') types = get_types(fullset) print("Predicting...") uncertain_mask = (types == UNCERTAIN_LABEL) uncertainset = fullset[uncertain_mask] probs = get_probs_for_uncertain(uncertainset) linenum_to_probs = { idx: prob for idx, prob in zip(np.nonzero(uncertain_mask)[0], probs) } print("Deciding...") probs_and_predictions = [] for i, (row, type_) in enumerate(zip(fullset, types)): if type_ == UNCERTAIN_LABEL: probs = linenum_to_probs[i].tolist() prediction, order = check_and_decide(row[:common.N_DISASTER], probs) probs_and_predictions.append(probs + [prediction] + [order + 1]) elif type_ == -99: probs_and_predictions.append([-99] * (common.N_CLASS + 2)) else: probs = [0.0] * common.N_CLASS probs[type_] = 1.0 probs_and_predictions.append(probs + [type_] + [0]) print("Saving...") common.save_data( np.concatenate((types[:, np.newaxis], probs_and_predictions), axis=1), OUTPUT_PATH)
def main(): matrices = [correct_nodata(common.load_data(path, skip=6)) for path in INPUT_PATHS] matrix = np.dstack(matrices) rows = matrix.reshape(-1, len(MAP_NAMES)) common.save_data(rows, OUTPUT_PATH)
def get_fires(): """! Load fire data @return None """ stmt_select = r'''SELECT f.FIREID, c.ORGUNITCODE || TO_CHAR(f.FIRESEQ, '000') AS FIRENAME, f.LATITUDE, f.LONGITUDE FROM (SELECT * FROM FIRE f2 WHERE f2.FIREID NOT IN (SELECT DISTINCT FIREID FROM FIRESTATUS s WHERE CONDITION = 'OUT')) f LEFT JOIN CT_ORGUNIT c ON c.ORGUNIT=f.ORGUNIT ''' local_table = r'[ACTIVEFIRE]' index = ['FIREID'] delete_all = True # need to fix longitude because they're all positive even though they're in the west result = read_remote(stmt_select) if result is None: return df = result.set_index(index) df['LONGITUDE'] = df['LONGITUDE'].apply(lambda x: -abs(x)) common.save_data(local_table, df, delete_all, DBNAME)
async def _shutdown(self, ctx): """Shut down the bot.""" user = ctx.message.author if str(user) in common.admins: common.save_data() await self.bot.say("Goodbye!\nVocamon has shut down.") import sys sys.exit(0)
def main(): print('Loading...') fullset = common.load_data(FULLSET_PATH, sep=',') print('Processing...') trainset = get_trainset(fullset, upward=True) print('Saving...') common.save_data(trainset, OUTPUT_PATH) print('Done!')
def run_convbase_on_images(src_folder, dst_file, image_shape, force_all=False): """ Run the imported convolutional base on the cropped images. Save the result in a file. Parameters ---------- src_folder (str): The folder of the images which have been cropped. dst_file (str): The destination file of images which have been processes in the convolutional base. force_all (bool), default False. If false, process only images which are present in the src_folder, but not the dst_folder. If True, delete everything in the dst_folder and process everything in the src_folder. Returns ------- None """ if force_all: xd = XData() else: try: with open(dst_file, "rb") as f: xd = pickle.load(f) except FileNotFoundError: xd = XData() src_images = os.listdir(src_folder) dst_images = xd.get_stored_filenames() missing_in_dst = list(set(src_images).difference(set(dst_images))) convbase = get_VGG16_convbase(image_shape) for i, filename in enumerate(missing_in_dst): print(f"{i + 1}/{len(missing_in_dst)}: Processing {filename}") src_path = os.path.join(src_folder, filename) im = cv2.imread(src_path) im = normalize_image(im) single_image_batch = np.array([im]) after_convbase_single = convbase.predict(single_image_batch) after_convbase_single = after_convbase_single.flatten() xd.add_image(filename, after_convbase_single, assert_dimensions=1) if i % 100 == 0: print("Saving to avoid losing work if interrupted...") save_data(xd, dst_file) save_data(xd, dst_file) return xd
async def _restart(self, ctx): """Restart the bot.""" user = ctx.message.author if str(user) in common.admins: print("Restart command received. Restarting...") await self.bot.say("Bot restarting...") common.save_data() import sys python = sys.executable sys.stdout.flush() os.execl("bot.py", python) else: await self.bot.say("{0} is not an administrator".format(user))
def copy_table(stmt_select, local_table, index, delete_all=False): """! Copy contents of remote table to local database @param stmt_select SQL statement for reading data @param local_table Table in local database to insert into @param index Index for data that gets read @param delete_all Whether or not to clear entire table before adding data @return None """ # insert_data(read_remote(stmt_select), local_table) # need to set index or it screws up and tries to insert 'Index' column result = read_remote(stmt_select) if result is not None: common.save_data(SCHEMA + '.' + local_table, result.set_index(index), delete_all, DBNAME)
def main(): fullset = common.load_data(FULLSET_PATH, sep=',') clust_samples = get_clust_samples(fullset) km = KModes(n_clusters=N_CLUST, n_init=N_INIT, init='Huang', verbose=True) clust_labels = km.fit_predict(clust_samples) label_to_codes = get_label_to_codes(clust_samples, clust_labels) with open(JSON_PATH, 'w') as f: json.dump(label_to_codes, f, sort_keys=True) common.save_data([[km.cost_]] + km.cluster_centroids_.tolist(), RESULT_PATH)
def copy_ct_wstn(): """! Copy CT_WSTN (code table for weather stations) @return None """ stmt_select = r'select * from ct_wstn' local_table = r'[CT_WSTN]' index = ['WSTNCODE'] delete_all = True # copy_table(stmt_select, local_table, index, True) # need to fix longitude because they're all positive even though they're in the west result = read_remote(stmt_select) if result is None: return df = result.set_index(index) df['LONGITUDE'] = df['LONGITUDE'].apply(lambda x: -abs(x)) common.save_data(SCHEMA + '.' + local_table, df, delete_all, DBNAME)
def run(self): cm.create_all_directories( [os.path.join(cm.checkpoint_path, 'feature_creation')]) df = self.read_data() df = self.create_features(df) cm.save_data( df, os.path.join( cm.cleaned_data_path, 'regression', 'features_extractor_sample_rate_' + str(self.sample_size).replace('.', '') + '_version_' + str(self.version) + '.csv')) pd.DataFrame().to_csv( os.path.join( cm.checkpoint_path, 'feature_creation', 'success_sample_rate_' + str(self.sample_size).replace('.', '') + '_version_' + str(self.version) + '.csv'))
def process_trace(pcap_filepath, graph_dir_exp, stat_dir_exp, failed_conns_dir_exp, acksize_tcp_dir_exp, tcpcsm, mptcp_connections=None, print_out=sys.stdout): """ Process a tcp pcap file and generate stats of its connections """ cmd = ['tstat', '-s', os.path.basename(pcap_filepath[:-5]), pcap_filepath] try: connections = process_tstat_cmd(cmd, pcap_filepath, keep_log=True, graph_dir_exp=graph_dir_exp) except TstatError as e: print(str(e) + ": skip process", file=sys.stderr) return # Directory containing all TCPConnections that tried to be MPTCP subflows, but failed to failed_conns = {} if tcpcsm: retransmissions_tcpcsm(pcap_filepath, connections) inverse_conns = create_inverse_tcp_dictionary(connections) acksize_all = compute_tcp_acks_retrans(pcap_filepath, connections, inverse_conns) acksize_all_mptcp = {co.C2S: {}, co.S2C: {}} if mptcp_connections: for flow_id in connections: # Copy info to mptcp connections copy_info_to_mptcp_connections(connections, mptcp_connections, failed_conns, acksize_all, acksize_all_mptcp, flow_id) # Save connections info if mptcp_connections: co.save_data(pcap_filepath, acksize_tcp_dir_exp, acksize_all_mptcp) # Also save TCP connections that failed to be MPTCP subflows co.save_data(pcap_filepath, failed_conns_dir_exp, failed_conns) else: co.save_data(pcap_filepath, acksize_tcp_dir_exp, acksize_all) co.save_data(pcap_filepath, stat_dir_exp, connections)
def update_file(childs, last_upd): """Update data file and log file. The log file creates an history to be used in the future. """ if common.DATA_FORMAT == common.JSON: last_upd_str = str(last_upd) # ISO format common.save_data([childs, last_upd_str]) elif common.DATA_FORMAT == common.PKL: common.save_data([childs, last_upd]) else: # error pass if common.LOG: if common.DATA_FORMAT == common.JSON: last_upd_str = str(last_upd) common.save_data([childs, last_upd_str], common.LOG_FILE) elif common.DATA_FORMAT == common.PKL: common.save_data([childs, last_upd], common.LOG_FILE) else: # error pass
def create_setup(): """Copy files from template and update them with user input.""" global app_name, app_version, app_license, app_author, app_email, \ app_url, app_keywords, DEFAULT_AUTHOR, DEFAULT_EMAIL, \ DEFAULT_LICENSE, DEFAULT_URL, DEFAULT_VERSION data_lst = common.load_data() if data_lst: (DEFAULT_AUTHOR, DEFAULT_EMAIL, DEFAULT_LICENSE, DEFAULT_URL, DEFAULT_VERSION) = data_lst while not app_name: app_name = input(lcl.Q_APP_NAME).decode(lcl.INPUT_ENC) app_version = input(lcl.Q_APP_VERSION + '[' + DEFAULT_VERSION + '] ').decode(lcl.INPUT_ENC) if not app_version: app_version = DEFAULT_VERSION app_license = input(lcl.Q_APP_LICENSE + '[' + DEFAULT_LICENSE + '] ').decode(lcl.INPUT_ENC) if not app_license: app_license = DEFAULT_LICENSE app_author = input(lcl.Q_APP_AUTHOR + '[' + DEFAULT_AUTHOR + '] ').decode(lcl.INPUT_ENC) if not app_author: app_author = DEFAULT_AUTHOR app_email = input(lcl.Q_APP_EMAIL + '[' + DEFAULT_EMAIL + '] ').decode(lcl.INPUT_ENC) if not app_email: app_email = DEFAULT_EMAIL app_url = input(lcl.Q_APP_URL + '[' + DEFAULT_URL + '] ').decode(lcl.INPUT_ENC) if not app_url: app_url = DEFAULT_URL app_keywords = input(lcl.Q_APP_KEYWORDS).decode(lcl.INPUT_ENC) if not app_keywords: app_keywords = app_name data_lst = [app_author, app_email, app_license, app_url, app_version] common.save_data(data_lst) app_url += app_name # backup existing files backup = False filenames = glob.glob('*') filenames += glob.glob('.*') if filenames: backup = True os.mkdir(BAK_DIR) for filename in filenames: dest = BAK_DIR + '/' + filename.split(os.sep)[-1] shu.move(filename, dest) filenames = glob.glob(common.DATA_PATH + 'template/*') filenames += glob.glob(common.DATA_PATH + 'template/.*') # remove doc dir filenames = [filename for filename in filenames if 'template' + os.sep + 'doc' not in filename] # copy files and dirs for filename in filenames: if os.path.isfile(filename): shu.copyfile(filename, filename.split(os.sep)[-1]) else: shu.copytree(filename, filename.split(os.sep)[-1]) common.sleep(2) os.rename('APPLICATION_NAME', app_name) # rename application dir # collect all filenames, including from 1st level subdirs filenames = glob.glob('*') filenames = [filename for filename in filenames if BAK_DIR not in filename] filenames += glob.glob('.*') new_filenames = [] for filename in filenames: if os.path.isdir(filename): new_filenames += glob.glob(filename + '/*') filenames += new_filenames exceptions = ['__init__.py', 'build.cmd', 'requirements.txt', 'requirements-dev.txt', 'setup.py', 'setup_py2exe.py', 'setup_utils.py'] # delete .pyc files and update files for filename in filenames: if os.path.isfile(filename): if '.pyc' in filename: os.remove(filename) else: if filename.split(os.sep)[-1] not in exceptions: update_file(filename) create_redir2rtd_zip() if backup: os.remove(app_name + APPLICATION_TEMPLATE_FILE) # remove app template # restore files from backup, but only if they don't already exist filenames = glob.glob(BAK_DIR + '/*') for filename in filenames: dest = app_name + '/' + filename.split(os.sep)[-1] if not os.path.isfile(dest): shu.copyfile(filename, dest) else: os.rename(app_name + APPLICATION_TEMPLATE_FILE, app_name + '/' + app_name + '.py') # rename app template print(lcl.REMINDERS)
import os import csv import numpy as np from a3_1 import run_kmeans from a3_2 import run_mog from common import save_data if __name__ == '__main__': data = np.load('data100D.npy') # Run 2.2.3 for k in [5, 10, 15, 20, 30]: result = run_kmeans(k, data, epochs=1000, tol=1e-6) save_data(result, '2.2.3', 'kmeans-%s' % str(k)) for k in [5, 10, 15, 20, 30]: result = run_mog(k, data, epochs=1000, tol=1e-8) save_data(result, '2.2.3', 'mog-%s' % str(k))
# index = (CD.data[:, D.RT_fast[PLAYER]] is True) | (CD.data[:, D.RT_slow[PLAYER]] is True) # CD.cursor_xy2[:, :, index, PLAYER] = np.nan # CD.gaze[:, :, index, :] = np.NaN return CD CD_results[SESSION] = [None] * common.Number.control for CONTROL in range(0, common.Number.control): CD_results[SESSION][CONTROL] = limit_to_control_condition( CONTROL, data, cursor_xy2, target_position, gaze[:, :, :, :, SESSION]) ## save results common.save_data(CD_results, output_directory + "CD_results.bin") test = common.load_data(output_directory + "CD_results.bin") else: test = common.load_data(output_directory + "CD_results.bin") ## psychopy setup is_play_movies = True if is_play_movies: import psychopy.visual #import psychopy.event win = psychopy.visual.Window(size=[1920, 1080],
def main(): fullset = common.load_data(FULLSET_PATH, sep=',') codes = get_codes(fullset) uncertain_mask = (codes == common.N_CLASS) uncertain_set = fullset[uncertain_mask] uncertain_features = common.onehot_encode( uncertain_set[:, common.N_DISASTER:], 0) trainset = common.load_data(TRAINSET_PATH, sep=',') trainset = common.onehot_encode(trainset, 0) prob_sum = np.zeros((uncertain_features.shape[0], common.N_CLASS)) for i in range(N_MODEL): x_train, _, _, _ = common.split(trainset, i) _, normalized_features = common.normalize(x_train, uncertain_features) prob_sum += tf.keras.models.load_model(common.numbering( MODEL_PATH, i)).predict(normalized_features) print(i, ' is done.') probs = prob_sum / N_MODEL linenum_to_prob = { idx: prob for idx, prob in zip(np.nonzero(uncertain_mask)[0], probs) } # unpredicted map common.save_map(codes.reshape(common.N_ROWS, -1), UNPRED_OUTPUT_PATH) # predicted map counter = [0] * common.N_CLASS predicted_map = codes.copy() for i, (row, code) in enumerate(zip(fullset, codes)): if code == common.N_CLASS: predicted_map[i], order = check_and_decide(row[:common.N_DISASTER], linenum_to_prob[i]) counter[order] += 1 common.save_map(predicted_map.reshape(common.N_ROWS, -1), PRED_OUTPUT_PATH) print(counter) # full_probs encoded_codes = get_encoded_codes(fullset) certain_mask = (codes < common.N_CLASS) & (codes >= 0) certain_set = codes[certain_mask] cerntain_probs = get_hunnit_prob_vecs(certain_set) linenum_to_certain_prob = { idx: prob for idx, prob in zip(np.nonzero(certain_mask)[0], cerntain_probs) } full_probs = [] for i, code in enumerate(encoded_codes): if i in linenum_to_prob: _, order = check_and_decide(fullset[i][:common.N_DISASTER], linenum_to_prob[i]) full_probs.append([code] + linenum_to_prob[i].tolist() + [order + 1]) elif i in linenum_to_certain_prob: full_probs.append([code] + linenum_to_certain_prob[i].tolist() + [0]) else: full_probs.append([0]) cur_id = 0 reversed_full_probs = [] for row in np.flipud(np.array(full_probs).reshape(common.N_ROWS, -1)).reshape(-1): if len(row) == 1: continue reversed_full_probs.append([cur_id] + row) cur_id += 1 common.save_data(reversed_full_probs, PROBS_OUTPUT_PATH)
# print question_action # print replacement_action_conjugated # print editable_question data = {} data['image_file'] = image_file data['original_question'] = question data['question'] = editable_question data['answer'] = question_action # data['replacement_action'] = replacement_action_conjugated data['relevant'] = 0 data['image_id'] = row['image_id'] data['qa_id'] = -1 * row['qa_id'] data['image_actions'] = ','.join(image_actions) editable_questions.append(data) # noedit_data = {} # noedit_data['image_file'] = image_file # noedit_data['original_question'] = question # noedit_data['question'] = question # noedit_data['answer'] = question_action # noedit_data['replacement_action'] = question_action # noedit_data['relevant'] = 1 # noedit_data['image_id'] = row['image_id'] # noedit_data['qa_id'] = row['qa_id'] # noedit_data['image_actions'] = row['image_actions'] # editable_questions.append(noedit_data) editable_df = save_data(editable_questions, editable_dataset_output_file) # print editable_df
def plot(connections, multiflow_connections, sums_dir_exp): threshold_handover = 1.0 syn_first_additional_sf = [] syn_additional_sfs = [] time_handover = [] time_handover_conn = [] time_handover_conn_info = [] react_handover = [] handover_conns = {} second_sf_handover = [] log_file = sys.stdout less_200ms = 0 less_1s = 0 more_60s = 0 more_3600s = 0 less_200ms_second = 0 less_1s_second = 0 more_60s_second = 0 more_3600s_second = 0 # Look only at multiple subflows connections for fname, conns in multiflow_connections.iteritems(): handover_conns[fname] = {} for conn_id, conn in conns.iteritems(): # First find initial subflow timestamp initial_sf_ts = float('inf') initial_sf_id = None last_acks = [] min_time_last_ack = float('inf') for flow_id, flow in conn.flows.iteritems(): if co.START not in flow.attr or flow.attr[ co.SADDR] in co.IP_PROXY: continue if (flow.attr[co.START] - conn.attr[co.START]).total_seconds() < -30: continue if flow.attr[co.START].total_seconds() < initial_sf_ts: initial_sf_ts = flow.attr[co.START].total_seconds() initial_sf_id = flow_id flow_bytes = 0 for direction in co.DIRECTIONS: flow_bytes += flow.attr[direction].get(co.BYTES_DATA, 0) if flow_bytes > 0 and co.TIME_LAST_ACK_TCP in flow.attr[ co.S2C] and flow.attr[co.S2C][ co.TIME_LAST_ACK_TCP].total_seconds( ) > 0.0 and co.TIME_LAST_ACK_TCP in flow.attr[ co.C2S] and flow.attr[co.C2S][ co.TIME_LAST_ACK_TCP].total_seconds( ) > 0.0: last_acks.append(flow.attr[co.S2C][ co.TIME_LAST_ACK_TCP].total_seconds()) min_time_last_ack = min( min_time_last_ack, flow.attr[co.S2C][ co.TIME_LAST_ACK_TCP].total_seconds()) if initial_sf_ts == float('inf'): continue # Now store the delta and record connections with handover handover_detected = False count_flows = 0 min_delta = float('inf') flow_id_min_delta = None for flow_id, flow in conn.flows.iteritems(): if co.START not in flow.attr or flow.attr[ co.SADDR] in co.IP_PROXY: continue if co.TIME_LAST_ACK_TCP not in flow.attr[co.S2C] or flow.attr[ co.S2C][co.TIME_LAST_ACK_TCP].total_seconds( ) == 0 or co.TIME_LAST_ACK_TCP not in flow.attr[ co.C2S] or flow.attr[co.C2S][ co.TIME_LAST_ACK_TCP].total_seconds() == 0: # RST, don't consider as valid MP_JOIN continue if (flow.attr[co.START] - conn.attr[co.START]).total_seconds() < -30: continue if (flow.attr[co.START] - conn.attr[co.START] ).total_seconds() > conn.attr[co.DURATION]: # This subflow is maybe wrongly attributed continue delta = flow.attr[co.START].total_seconds() - initial_sf_ts min_last_acks = float('inf') if len(last_acks) >= 1: min_last_acks = min(last_acks) max_last_payload = 0 - float('inf') if flow.attr[co.C2S].get(co.BYTES, 0) > 0 or flow.attr[co.S2C].get( co.BYTES, 0) > 0: max_last_payload = max([ flow.attr[direction][co.TIME_LAST_PAYLD] for direction in co.DIRECTIONS ]) handover_delta = flow.attr[co.START].total_seconds( ) + max_last_payload - min_last_acks if delta > 0.0: min_delta = min(min_delta, delta) if min_delta == delta: flow_id_min_delta = flow_id if delta < 0.01: print(fname, conn_id, flow_id, delta) syn_additional_sfs.append(delta) if handover_delta > 0.0: # A subflow is established after the last ack of the client seen --> Handover time_handover.append(min_last_acks - initial_sf_ts) react_handover.append(handover_delta) last_acks.remove(min_last_acks) if not handover_detected: handover_detected = True time_handover_conn.append(delta) time_handover_conn_info.append( (min_last_acks - initial_sf_ts, delta, fname, conn_id)) handover_conns[fname][conn_id] = conn if delta >= 50000: print("HUGE DELTA", fname, conn_id, flow_id, delta, file=log_file) if delta <= 0.2: less_200ms += 1 if delta <= 1: less_1s += 1 if delta >= 60: more_60s += 1 if delta >= 3600: more_3600s += 1 if flow_id_min_delta: syn_first_additional_sf.append(min_delta) if conn.flows[initial_sf_id].attr[co.S2C][ co.TIME_LAST_ACK_TCP].total_seconds() < conn.flows[ flow_id_min_delta].attr[co.START].total_seconds(): # Handover between initial and second subflow second_sf_handover.append(min_delta) if delta <= 0.2: less_200ms_second += 1 if delta <= 1: less_1s_second += 1 if delta >= 60: more_60s_second += 1 if delta >= 3600: more_3600s_second += 1 # Do a first CDF plot of the delta between initial SYN and additional ones base_graph_path = os.path.join(sums_dir_exp, 'cdf_delta_addtitional_syns') color = 'red' graph_fname = os.path.splitext(base_graph_path)[0] + "_cdf.pdf" graph_fname_log = os.path.splitext(base_graph_path)[0] + "_cdf_log.pdf" sample = np.array(sorted(syn_additional_sfs)) sorted_array = np.sort(sample) yvals = np.arange(len(sorted_array)) / float(len(sorted_array)) sample_2 = np.array(sorted(syn_first_additional_sf)) sorted_array_2 = np.sort(sample_2) yvals_2 = np.arange(len(sorted_array_2)) / float(len(sorted_array_2)) if len(sorted_array) > 0: # Add a last point sorted_array = np.append(sorted_array, sorted_array[-1]) yvals = np.append(yvals, 1.0) sorted_array_2 = np.append(sorted_array_2, sorted_array_2[-1]) yvals_2 = np.append(yvals_2, 1.0) # Log plot plt.figure() plt.clf() fig, ax = plt.subplots() ax.plot(sorted_array, yvals, color=color, linewidth=2, label="Additional subflows") ax.plot(sorted_array_2, yvals_2, color='blue', linestyle='--', linewidth=2, label="Second subflows") # Shrink current axis's height by 10% on the top # box = ax.get_position() # ax.set_position([box.x0, box.y0, # box.width, box.height * 0.9]) ax.set_xscale('log') # Put a legend above current axis # ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1.05), fancybox=True, shadow=True, ncol=ncol) ax.legend(loc='lower right') plt.xlim(xmin=0.01) plt.xlabel('Time between MP_JOIN and MP_CAP [s]', fontsize=24, labelpad=-2) plt.ylabel("CDF", fontsize=24) plt.savefig(graph_fname_log) plt.close('all') # # Normal plot # plt.figure() # plt.clf() # fig, ax = plt.subplots() # ax.plot(sorted_array, yvals, color=color, linewidth=2, label="MP_JOIN - MP_CAP") # # # Shrink current axis's height by 10% on the top # # box = ax.get_position() # # ax.set_position([box.x0, box.y0, # # box.width, box.height * 0.9]) # # ax.set_xscale('log') # # # Put a legend above current axis # # ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1.05), fancybox=True, shadow=True, ncol=ncol) # ax.legend(loc='lower right') # # plt.xlabel('Time [s]', fontsize=18) # plt.ylabel("CDF", fontsize=18) # plt.savefig(graph_fname) # plt.close('all') # Now quantify in handover connections the amount of data not on the initial subflows bytes_init_sf = 0.0 bytes_init_sfs = 0.0 bytes_total = 0.0 for fname, conns in handover_conns.iteritems(): for conn_id, conn in conns.iteritems(): # First find initial subflow timestamp initial_sf_ts = float('inf') for flow_id, flow in conn.flows.iteritems(): if co.START not in flow.attr: continue if flow.attr[co.START].total_seconds() < initial_sf_ts: initial_sf_ts = flow.attr[co.START].total_seconds() min_delta = float('inf') for flow_id, flow in conn.flows.iteritems(): if co.START not in flow.attr: continue delta = flow.attr[co.START].total_seconds() - initial_sf_ts if delta > 0.0: min_delta = min(min_delta, delta) # Now collect the amount of data on all subflows for flow_id, flow in conn.flows.iteritems(): if co.START not in flow.attr: continue delta = flow.attr[co.START].total_seconds() - initial_sf_ts for direction in co.DIRECTIONS: bytes_total += flow.attr[direction].get(co.BYTES, 0) if flow.attr[direction].get(co.BYTES, 0) >= 1000000000: print("WARNING!!!", fname, conn_id, flow_id, bytes_total, file=log_file) if delta <= min_delta: # Initial subflows bytes_init_sfs += flow.attr[direction].get(co.BYTES, 0) if delta == 0.0: # Initial subflow bytes_init_sf += flow.attr[direction].get( co.BYTES, 0) # Log those values in the log file print("DELTA HANDOVER IN FILE delta_handover") co.save_data("delta_handover", sums_dir_exp, time_handover) print("REACT HANDOVER IN FILE react_handover") co.save_data("react_handover", sums_dir_exp, react_handover) print("REACT HANDOVER IN FILE time_handover_conn") co.save_data("time_handover_conn", sums_dir_exp, time_handover_conn) print("REACT HANDOVER IN FILE time_handover_conn_info") co.save_data("time_handover_conn_info", sums_dir_exp, time_handover_conn_info) print("SECOND SF HANDOVER IN FILE second_sf_handover") co.save_data("second_sf_handover", sums_dir_exp, second_sf_handover) print("QUANTIFY HANDOVER", file=log_file) print(bytes_init_sf, "BYTES ON INIT SF", bytes_init_sf * 100 / bytes_total, "%", file=log_file) print(bytes_init_sfs, "BYTES ON INIT SFS", bytes_init_sfs * 100 / bytes_total, "%", file=log_file) print("TOTAL BYTES", bytes_total, file=log_file) print("<= 200ms", less_200ms, less_200ms * 100.0 / len(syn_additional_sfs), "%") print("<= 1s", less_1s, less_1s * 100.0 / len(syn_additional_sfs), "%") print(">= 60s", more_60s, more_60s * 100.0 / len(syn_additional_sfs), "%") print(">= 3600s", more_3600s, more_3600s * 100.0 / len(syn_additional_sfs), "%") print("<= 200ms second", less_200ms_second, less_200ms_second * 100.0 / len(syn_first_additional_sf), "%") print("<= 1s second", less_1s_second, less_1s_second * 100.0 / len(syn_first_additional_sf), "%") print(">= 60s second", more_60s_second, more_60s_second * 100.0 / len(syn_first_additional_sf), "%") print(">= 3600s second", more_3600s_second, more_3600s_second * 100.0 / len(syn_first_additional_sf), "%")
def get_ip_address(cmd): return str(ord(cmd[1])) + '.' + str(ord(cmd[2])) + '.' + str(ord(cmd[3])) + '.' + str(ord(cmd[4])) def process_pcap(pcap_filepath, ports): # condition = "tcp.len==7" # tshark_filter(condition, pcap_filepath, pcap_filtered_filepath) file = open(pcap_filepath) try: pcap = dpkt.pcap.Reader(file) for ts, data in pcap: eth = dpkt.ethernet.Ethernet(data) ip = eth.data tcp = ip.data if len(tcp.data) == 7: crypted_socks_cmd = tcp.data decrypted_socks_cmd = decode(crypted_socks_cmd) if decrypted_socks_cmd[0] == b'\x01': # Connect add_port(decrypted_socks_cmd, ports) except Exception as e: print(e) file.close() if __name__ == "__main__": for pcap_filepath in pcap_list: ports = {} process_pcap(pcap_filepath, ports) co.save_data(pcap_filepath, ports_dir_exp, ports)
def plot(connections, multiflow_connections, sums_dir_exp): threshold_handover = 1.0 syn_first_additional_sf = [] syn_additional_sfs = [] time_handover = [] time_handover_conn = [] time_handover_conn_info = [] react_handover = [] handover_conns = {} second_sf_handover = [] log_file = sys.stdout less_200ms = 0 less_1s = 0 more_60s = 0 more_3600s = 0 less_200ms_second = 0 less_1s_second = 0 more_60s_second = 0 more_3600s_second = 0 # Look only at multiple subflows connections for fname, conns in multiflow_connections.iteritems(): handover_conns[fname] = {} for conn_id, conn in conns.iteritems(): # First find initial subflow timestamp initial_sf_ts = float('inf') initial_sf_id = None last_acks = [] min_time_last_ack = float('inf') for flow_id, flow in conn.flows.iteritems(): if co.START not in flow.attr or flow.attr[co.SADDR] in co.IP_PROXY: continue if (flow.attr[co.START] - conn.attr[co.START]).total_seconds() < -30: continue if flow.attr[co.START].total_seconds() < initial_sf_ts: initial_sf_ts = flow.attr[co.START].total_seconds() initial_sf_id = flow_id flow_bytes = 0 for direction in co.DIRECTIONS: flow_bytes += flow.attr[direction].get(co.BYTES_DATA, 0) if flow_bytes > 0 and co.TIME_LAST_ACK_TCP in flow.attr[co.S2C] and flow.attr[co.S2C][co.TIME_LAST_ACK_TCP].total_seconds() > 0.0 and co.TIME_LAST_ACK_TCP in flow.attr[co.C2S] and flow.attr[co.C2S][co.TIME_LAST_ACK_TCP].total_seconds() > 0.0: last_acks.append(flow.attr[co.S2C][co.TIME_LAST_ACK_TCP].total_seconds()) min_time_last_ack = min(min_time_last_ack, flow.attr[co.S2C][co.TIME_LAST_ACK_TCP].total_seconds()) if initial_sf_ts == float('inf'): continue # Now store the delta and record connections with handover handover_detected = False count_flows = 0 min_delta = float('inf') flow_id_min_delta = None for flow_id, flow in conn.flows.iteritems(): if co.START not in flow.attr or flow.attr[co.SADDR] in co.IP_PROXY: continue if co.TIME_LAST_ACK_TCP not in flow.attr[co.S2C] or flow.attr[co.S2C][co.TIME_LAST_ACK_TCP].total_seconds() == 0 or co.TIME_LAST_ACK_TCP not in flow.attr[co.C2S] or flow.attr[co.C2S][co.TIME_LAST_ACK_TCP].total_seconds() == 0: # RST, don't consider as valid MP_JOIN continue if (flow.attr[co.START] - conn.attr[co.START]).total_seconds() < -30: continue if (flow.attr[co.START] - conn.attr[co.START]).total_seconds() > conn.attr[co.DURATION]: # This subflow is maybe wrongly attributed continue delta = flow.attr[co.START].total_seconds() - initial_sf_ts min_last_acks = float('inf') if len(last_acks) >= 1: min_last_acks = min(last_acks) max_last_payload = 0 - float('inf') if flow.attr[co.C2S].get(co.BYTES, 0) > 0 or flow.attr[co.S2C].get(co.BYTES, 0) > 0: max_last_payload = max([flow.attr[direction][co.TIME_LAST_PAYLD] for direction in co.DIRECTIONS]) handover_delta = flow.attr[co.START].total_seconds() + max_last_payload - min_last_acks if delta > 0.0: min_delta = min(min_delta, delta) if min_delta == delta: flow_id_min_delta = flow_id if delta < 0.01: print(fname, conn_id, flow_id, delta) syn_additional_sfs.append(delta) if handover_delta > 0.0: # A subflow is established after the last ack of the client seen --> Handover time_handover.append(min_last_acks - initial_sf_ts) react_handover.append(handover_delta) last_acks.remove(min_last_acks) if not handover_detected: handover_detected = True time_handover_conn.append(delta) time_handover_conn_info.append((min_last_acks - initial_sf_ts, delta, fname, conn_id)) handover_conns[fname][conn_id] = conn if delta >= 50000: print("HUGE DELTA", fname, conn_id, flow_id, delta, file=log_file) if delta <= 0.2: less_200ms += 1 if delta <= 1: less_1s += 1 if delta >= 60: more_60s += 1 if delta >= 3600: more_3600s += 1 if flow_id_min_delta: syn_first_additional_sf.append(min_delta) if conn.flows[initial_sf_id].attr[co.S2C][co.TIME_LAST_ACK_TCP].total_seconds() < conn.flows[flow_id_min_delta].attr[co.START].total_seconds(): # Handover between initial and second subflow second_sf_handover.append(min_delta) if delta <= 0.2: less_200ms_second += 1 if delta <= 1: less_1s_second += 1 if delta >= 60: more_60s_second += 1 if delta >= 3600: more_3600s_second += 1 # Do a first CDF plot of the delta between initial SYN and additional ones base_graph_path = os.path.join(sums_dir_exp, 'cdf_delta_addtitional_syns') color = 'red' graph_fname = os.path.splitext(base_graph_path)[0] + "_cdf.pdf" graph_fname_log = os.path.splitext(base_graph_path)[0] + "_cdf_log.pdf" sample = np.array(sorted(syn_additional_sfs)) sorted_array = np.sort(sample) yvals = np.arange(len(sorted_array)) / float(len(sorted_array)) sample_2 = np.array(sorted(syn_first_additional_sf)) sorted_array_2 = np.sort(sample_2) yvals_2 = np.arange(len(sorted_array_2)) / float(len(sorted_array_2)) if len(sorted_array) > 0: # Add a last point sorted_array = np.append(sorted_array, sorted_array[-1]) yvals = np.append(yvals, 1.0) sorted_array_2 = np.append(sorted_array_2, sorted_array_2[-1]) yvals_2 = np.append(yvals_2, 1.0) # Log plot plt.figure() plt.clf() fig, ax = plt.subplots() ax.plot(sorted_array, yvals, color=color, linewidth=2, label="Additional subflows") ax.plot(sorted_array_2, yvals_2, color='blue', linestyle='--', linewidth=2, label="Second subflows") # Shrink current axis's height by 10% on the top # box = ax.get_position() # ax.set_position([box.x0, box.y0, # box.width, box.height * 0.9]) ax.set_xscale('log') # Put a legend above current axis # ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1.05), fancybox=True, shadow=True, ncol=ncol) ax.legend(loc='lower right') plt.xlim(xmin=0.01) plt.xlabel('Time between MP_JOIN and MP_CAP [s]', fontsize=24, labelpad=-2) plt.ylabel("CDF", fontsize=24) plt.savefig(graph_fname_log) plt.close('all') # # Normal plot # plt.figure() # plt.clf() # fig, ax = plt.subplots() # ax.plot(sorted_array, yvals, color=color, linewidth=2, label="MP_JOIN - MP_CAP") # # # Shrink current axis's height by 10% on the top # # box = ax.get_position() # # ax.set_position([box.x0, box.y0, # # box.width, box.height * 0.9]) # # ax.set_xscale('log') # # # Put a legend above current axis # # ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1.05), fancybox=True, shadow=True, ncol=ncol) # ax.legend(loc='lower right') # # plt.xlabel('Time [s]', fontsize=18) # plt.ylabel("CDF", fontsize=18) # plt.savefig(graph_fname) # plt.close('all') # Now quantify in handover connections the amount of data not on the initial subflows bytes_init_sf = 0.0 bytes_init_sfs = 0.0 bytes_total = 0.0 for fname, conns in handover_conns.iteritems(): for conn_id, conn in conns.iteritems(): # First find initial subflow timestamp initial_sf_ts = float('inf') for flow_id, flow in conn.flows.iteritems(): if co.START not in flow.attr: continue if flow.attr[co.START].total_seconds() < initial_sf_ts: initial_sf_ts = flow.attr[co.START].total_seconds() min_delta = float('inf') for flow_id, flow in conn.flows.iteritems(): if co.START not in flow.attr: continue delta = flow.attr[co.START].total_seconds() - initial_sf_ts if delta > 0.0: min_delta = min(min_delta, delta) # Now collect the amount of data on all subflows for flow_id, flow in conn.flows.iteritems(): if co.START not in flow.attr: continue delta = flow.attr[co.START].total_seconds() - initial_sf_ts for direction in co.DIRECTIONS: bytes_total += flow.attr[direction].get(co.BYTES, 0) if flow.attr[direction].get(co.BYTES, 0) >= 1000000000: print("WARNING!!!", fname, conn_id, flow_id, bytes_total, file=log_file) if delta <= min_delta: # Initial subflows bytes_init_sfs += flow.attr[direction].get(co.BYTES, 0) if delta == 0.0: # Initial subflow bytes_init_sf += flow.attr[direction].get(co.BYTES, 0) # Log those values in the log file print("DELTA HANDOVER IN FILE delta_handover") co.save_data("delta_handover", sums_dir_exp, time_handover) print("REACT HANDOVER IN FILE react_handover") co.save_data("react_handover", sums_dir_exp, react_handover) print("REACT HANDOVER IN FILE time_handover_conn") co.save_data("time_handover_conn", sums_dir_exp, time_handover_conn) print("REACT HANDOVER IN FILE time_handover_conn_info") co.save_data("time_handover_conn_info", sums_dir_exp, time_handover_conn_info) print("SECOND SF HANDOVER IN FILE second_sf_handover") co.save_data("second_sf_handover", sums_dir_exp, second_sf_handover) print("QUANTIFY HANDOVER", file=log_file) print(bytes_init_sf, "BYTES ON INIT SF", bytes_init_sf * 100 / bytes_total, "%", file=log_file) print(bytes_init_sfs, "BYTES ON INIT SFS", bytes_init_sfs * 100 / bytes_total, "%", file=log_file) print("TOTAL BYTES", bytes_total, file=log_file) print("<= 200ms", less_200ms, less_200ms * 100.0 / len(syn_additional_sfs), "%") print("<= 1s", less_1s, less_1s * 100.0 / len(syn_additional_sfs), "%") print(">= 60s", more_60s, more_60s * 100.0 / len(syn_additional_sfs), "%") print(">= 3600s", more_3600s, more_3600s * 100.0 / len(syn_additional_sfs), "%") print("<= 200ms second", less_200ms_second, less_200ms_second * 100.0 / len(syn_first_additional_sf), "%") print("<= 1s second", less_1s_second, less_1s_second * 100.0 / len(syn_first_additional_sf), "%") print(">= 60s second", more_60s_second, more_60s_second * 100.0 / len(syn_first_additional_sf), "%") print(">= 3600s second", more_3600s_second, more_3600s_second * 100.0 / len(syn_first_additional_sf), "%")
'X': X_valid, 'y': y_valid, 'Mu': c_valid, 'loss': valid_loss, }, }) return result if __name__ == '__main__': data = np.load('data2D.npy') # Remove comment to run each problem # Run 1.1 result = run_kmeans(3, data) save_data(result, '1.1') # Run 1.2 for k in range(1, 6): result = run_kmeans(k, data) save_data(result, '1.2', str(k)) # Run 1.3 for k in range(1, 6): result = run_kmeans(k, data, with_valid=True) save_data(result, '1.3', str(k))
def process_trace(pcap_filepath, graph_dir_exp, stat_dir_exp, aggl_dir_exp, rtt_dir_exp, rtt_subflow_dir_exp, failed_conns_dir_exp, acksize_dir_exp, acksize_tcp_dir_exp, plot_cwin, tcpcsm, min_bytes=0, light=False, return_dict=False): """ Process a mptcp pcap file and generate graphs of its subflows Notice that we can't change dir per thread, we should use processes """ # if not check_mptcp_joins(pcap_filepath): # print("WARNING: no mptcp joins on " + pcap_filepath, file=sys.stderr) csv_tmp_dir = tempfile.mkdtemp(dir=os.getcwd()) connections = None do_tcp_processing = False try: with co.cd(csv_tmp_dir): # If segmentation faults, remove the -S option # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-S', '-t', '5000', '-w', '0'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3', '-a'] # connections = process_mptcptrace_cmd(cmd, pcap_filepath) # # # Useful to count the number of reinjected bytes # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-a', '-t', '5000', '-w', '2'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3'] # devnull = open(os.devnull, 'w') # if subprocess.call(cmd, stdout=devnull) != 0: # raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath) # devnull.close() # # cmd = ['mptcptrace', '-f', pcap_filepath, '-r', '2', '-t', '5000', '-w', '2'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3'] # devnull = open(os.devnull, 'w') # if subprocess.call(cmd, stdout=devnull) != 0: # raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath) # devnull.close() cmd = [ 'mptcptrace', '-f', pcap_filepath, '-s', '-S', '-a', '-A', '-R', '-r', '2', '-t', '5000', '-w', '2' ] connections = process_mptcptrace_cmd(cmd, pcap_filepath) # The mptcptrace call will generate .xpl files to cope with # First see all xpl files, to detect the relative 0 of all connections # Also, compute the duration and number of bytes of the MPTCP connection first_pass_on_files(connections) rtt_all = {co.C2S: {}, co.S2C: {}} acksize_all = {co.C2S: {}, co.S2C: {}} # Then really process xpl files if return_dict: for xpl_fname in glob.glob(os.path.join('*.xpl')): try: os.remove(xpl_fname) except IOError as e: print(str(e), file=sys.stderr) else: for xpl_fname in glob.glob(os.path.join('*.xpl')): try: directory = co.DEF_RTT_DIR if MPTCP_RTT_FNAME in xpl_fname else co.TSG_THGPT_DIR shutil.move( xpl_fname, os.path.join( graph_dir_exp, directory, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(xpl_fname))) except IOError as e: print(str(e), file=sys.stderr) # And by default, save only seq csv files for csv_fname in glob.glob(os.path.join('*.csv')): if not light: if MPTCP_GPUT_FNAME in os.path.basename(csv_fname): process_gput_csv(csv_fname, connections) try: if os.path.basename(csv_fname).startswith( MPTCP_ADDADDR_FNAME): conn_id = get_connection_id( os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue process_add_addr_csv(csv_fname, connections, conn_id) os.remove(csv_fname) elif os.path.basename(csv_fname).startswith( MPTCP_RMADDR_FNAME): conn_id = get_connection_id( os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue process_rm_addr_csv(csv_fname, connections, conn_id) os.remove(csv_fname) elif MPTCP_RTT_FNAME in os.path.basename(csv_fname): conn_id = get_connection_id( os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue is_reversed = is_reverse_connection( os.path.basename(csv_fname)) process_rtt_csv(csv_fname, rtt_all, connections, conn_id, is_reversed) os.remove(csv_fname) # co.move_file(csv_fname, os.path.join( # graph_dir_exp, co.DEF_RTT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + csv_fname)) elif MPTCP_SEQ_FNAME in os.path.basename(csv_fname): conn_id = get_connection_id( os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue is_reversed = is_reverse_connection( os.path.basename(csv_fname)) process_csv(csv_fname, connections, conn_id, is_reversed) if return_dict: try: os.remove(csv_fname) except Exception: pass else: co.move_file( csv_fname, os.path.join( graph_dir_exp, co.TSG_THGPT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(csv_fname))) elif MPTCP_ACKSIZE_FNAME in os.path.basename(csv_fname): collect_acksize_csv(csv_fname, connections, acksize_all) os.remove(csv_fname) else: if not light and not return_dict: co.move_file( csv_fname, os.path.join( graph_dir_exp, co.TSG_THGPT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(csv_fname))) else: os.remove(csv_fname) except IOError as e: print(str(e), file=sys.stderr) do_tcp_processing = True except MPTCPTraceError as e: print(str(e) + "; skip mptcp process", file=sys.stderr) shutil.rmtree(csv_tmp_dir) # This will save the mptcp connections if connections and do_tcp_processing: dicts = tcp.process_trace(pcap_filepath, graph_dir_exp, stat_dir_exp, failed_conns_dir_exp, acksize_tcp_dir_exp, tcpcsm, mptcp_connections=connections, light=light, return_dict=return_dict) if return_dict: tcp_connections, acksize_all_tcp = dicts return connections, tcp_connections, rtt_all, acksize_all, acksize_all_tcp else: co.save_data(pcap_filepath, acksize_dir_exp, acksize_all) co.save_data(pcap_filepath, rtt_dir_exp, rtt_all) co.save_data(pcap_filepath, stat_dir_exp, connections)
data['original_question'] = original_question data['question'] = question data['original_answer'] = original_answer data['answer'] = answer data['image_file'] = url data['qa_id'] = q['qa_id'] data['question_type'] = question_type data['image_id'] = image_id data['question_action'] = o[0] data['matched_image_action'] = o[1] data['original_question_action'] = verb_lemma_to_full_verb[o[1]] data['image_actions'] = ','.join(image_actions) question_actions_data.append(data) print "Finished questions" # print add_actions question_actions_df = save_data(question_actions_data, questions_output_file) print "Question actions: [%d]" % (len(question_actions_df)) if not os.path.exists(output_actions_file): print 'Adding in new actions...' add_df = pd.DataFrame(add_actions) add_df = add_df.loc[:, ~add_df.columns.str.contains('^Unnamed')] # print add_df # print actions_df actions_df = actions_df.append(add_df) actions_df = actions_df.loc[:, ~actions_df.columns.str.contains('^Unnamed')] actions_df = actions_df[actions_df['image_id'].isin(found_image_ids)] actions_df.to_csv(output_actions_file)
# # print relevant irrelevant = {} irrelevant.update(relevant) irrelevant['relevant'] = 0 irrelevant['original_answer'] = 'no ' + ' or '.join(question_actions) + ' found' irrelevant['answer'] = irrelevant['original_answer'] irrelevant['image_file'] = random_image_without_actions irrelevant['qa_id'] = -1 * irrelevant['qa_id'] new_questions.append(irrelevant) # print irrelevant print 'Finished adding questions' new_questions_df = save_data(new_questions, dataset_output_file) # print new_questions_df print len(new_questions_df) if filter_infrequent: print 'Filtering dataset to remove infrequent answers...' irrelevant_df = new_questions_df[new_questions_df['relevant'] == 0] grouped_df = irrelevant_df.groupby('answer', as_index=False).count().sort_values(['image_file']) # print grouped_df # print grouped_df[grouped_df['image_file'] < 30] remove_answers = grouped_df[grouped_df['image_file'] < 5]['answer'].tolist() remove_qa_ids = new_questions_df[new_questions_df['answer'].isin(remove_answers)]['qa_id'].tolist() remove_qa_ids += [(-1 * qa) for qa in remove_qa_ids] remove_qa_ids = set(remove_qa_ids) new_questions_df = new_questions_df[~new_questions_df['qa_id'].isin(remove_qa_ids)]
import os import csv import numpy as np from a3_1 import run_kmeans from a3_2 import run_mog from common import save_data if __name__ == '__main__': data = np.load('data100D.npy') # Run 2.2.3 for i in range(10): for k in [5, 10, 15, 20, 30]: result = run_kmeans(k, data, epochs=1000, tol=1e-6) save_data(result, '2.2.3-silhouette-%s' % i, 'kmeans-%s' % str(k)) for k in [5, 10, 15, 20, 30]: result = run_mog(k, data, epochs=1000, tol=1e-8) save_data(result, '2.2.3-silhouette-%s' % i, 'mog-%s' % str(k))
def mnist_classifier_tanh(): # paths path = dict() path['project'] = os.path.dirname(os.path.abspath(__file__)) path['state'] = os.path.join(path['project'], 'epoch') path['dataset'] = os.path.join(path['project'], 'dataset') path['graph'] = os.path.join(path['project'], 'graph') path['array'] = os.path.join(path['project'], 'array') for key, value in path.items(): if not os.path.exists(path[key]): os.mkdir(path[key]) # parameters batch_size = 1000 number_of_epochs = 20 learning_rate = 1e-3 device = 'cuda' if torch.cuda.is_available() else 'cpu' mean = 0.1307 std = 0.3081 loss = nn.CrossEntropyLoss() train_info_per_batch = 6 validation_info_per_batch = 3 test_info_per_batch = 5 validation_ratio = 0.1 # transform transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(mean, ), std=(std, )) ]) # dataset train_dataset = torchvision.datasets.MNIST(root=path['dataset'], train=True, transform=transform, download=True) test_dataset = torchvision.datasets.MNIST(root=path['dataset'], train=False, transform=transform, download=True) # validation dataset validation_limit = int((1 - validation_ratio) * len(train_dataset)) index_list = list(range(len(train_dataset))) train_indexes, validation_indexes = index_list[: validation_limit], index_list[ validation_limit:] train_sampler = SubsetRandomSampler(train_indexes) validation_sampler = SequentialSampler(validation_indexes) # dataset loaders train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, sampler=validation_sampler) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size) # model model = MnistClassifierTanh().to(device) # optimizer optimizer = optim.SGD(params=model.parameters(), lr=learning_rate) epochs = np.arange(start=1, stop=(number_of_epochs + 1), step=1, dtype=int) print('Mnist Classifier Tanh') train_losses = [] train_accuracies = [] validation_losses = [] validation_accuracies = [] test_losses = [] test_accuracies = [] for epoch in epochs: info = 'Epoch {epoch_index}/{number_of_epochs}' print(info.format(epoch_index=epoch, number_of_epochs=number_of_epochs)) # train train_loss, train_accuracy = train(model=model, device=device, loader=train_loader, optimizer=optimizer, loss=loss, info_per_batch=train_info_per_batch) info = 'Train: Average Loss: {train_loss:.5f}, Accuracy: % {train_accuracy:.2f}' print( info.format(train_loss=train_loss, train_accuracy=(100 * train_accuracy))) train_losses.append(train_loss) train_accuracies.append(train_accuracy) # validation validation_loss, validation_accuracy = test( model=model, loader=validation_loader, device=device, loss=loss, info_per_batch=validation_info_per_batch, info_name='Validation') info = 'Validation: Average Loss: {validation_loss:.5f}, Accuracy: % {validation_accuracy:.2f}' print( info.format(validation_loss=validation_loss, validation_accuracy=(100 * validation_accuracy))) validation_losses.append(validation_loss) validation_accuracies.append(validation_accuracy) # test test_loss, test_accuracy = test(model=model, loader=test_loader, device=device, loss=loss, info_per_batch=test_info_per_batch, info_name='Test') info = 'Test: Average Loss: {test_loss:.5f}, Accuracy: % {test_accuracy:.2f}' print( info.format(test_loss=test_loss, test_accuracy=(100 * test_accuracy))) test_losses.append(test_loss) test_accuracies.append(test_accuracy) # epoch state state_file_name = 'mnist_classifier_tanh_epoch_{epoch_index}.pkl'.format( epoch_index=epoch) save_state(model=model, directory=path['state'], file_name=state_file_name) # train loss save_data(array=train_losses, directory=path['array'], file_name='mnist_classifier_tanh_train_loss.npy') draw_line_graph(x=epochs, y=train_losses, x_label='Epoch', y_label='Loss', title='Mnist Classifier Tanh Train Loss', directory=path['graph'], file_name='mnist_classifier_tanh_train_loss.png') # train accuracy save_data(array=train_accuracies, directory=path['array'], file_name='mnist_classifier_tanh_train_accuracy.npy') draw_line_graph(x=epochs, y=train_accuracies, x_label='Epoch', y_label='Accuracy', title='Mnist Classifier Tanh Train Accuracy', directory=path['graph'], file_name='mnist_classifier_tanh_train_accuracy.png') # validation loss save_data(array=validation_losses, directory=path['array'], file_name='mnist_classifier_tanh_validation_loss.npy') draw_line_graph(x=epochs, y=validation_losses, x_label='Epoch', y_label='Loss', title='Mnist Classifier Tanh Validation Loss', directory=path['graph'], file_name='mnist_classifier_tanh_validation_loss.png') # validation accuracy save_data(array=validation_accuracies, directory=path['array'], file_name='mnist_classifier_tanh_validation_accuracy.npy') draw_line_graph(x=epochs, y=validation_accuracies, x_label='Epoch', y_label='Accuracy', title='Mnist Classifier Tanh Validation Accuracy', directory=path['graph'], file_name='mnist_classifier_tanh_validation_accuracy.png') # test loss save_data(array=test_losses, directory=path['array'], file_name='mnist_classifier_tanh_test_loss.npy') draw_line_graph(x=epochs, y=test_losses, x_label='Epoch', y_label='Loss', title='Mnist Classifier Tanh Test Loss', directory=path['graph'], file_name='mnist_classifier_tanh_test_loss.png') # test accuracy save_data(array=test_accuracies, directory=path['array'], file_name='mnist_classifier_tanh_test_accuracy.npy') draw_line_graph(x=epochs, y=test_accuracies, x_label='Epoch', y_label='Accuracy', title='Mnist Classifier Tanh Test Accuracy', directory=path['graph'], file_name='mnist_classifier_tanh_test_accuracy.png') # loss draw_multi_lines_graph(lines=[ dict(label='Train', data=dict(x=epochs, y=train_losses)), dict(label='Validation', data=dict(x=epochs, y=validation_losses)), dict(label='Test', data=dict(x=epochs, y=test_losses)) ], x_label='Epoch', y_label='Loss', title='Mnist Classifier Tanh Loss', directory=path['graph'], file_name='mnist_classifier_tanh_loss.png') # accuracy draw_multi_lines_graph(lines=[ dict(label='Train', data=dict(x=epochs, y=train_accuracies)), dict(label='Validation', data=dict(x=epochs, y=validation_accuracies)), dict(label='Test', data=dict(x=epochs, y=test_accuracies)) ], x_label='Epoch', y_label='Accuracy', title='Mnist Classifier Tanh Accuracy', directory=path['graph'], file_name='mnist_classifier_tanh_accuracy.png')
def process_trace( pcap_filepath, graph_dir_exp, stat_dir_exp, aggl_dir_exp, rtt_dir_exp, rtt_subflow_dir_exp, failed_conns_dir_exp, acksize_dir_exp, acksize_tcp_dir_exp, plot_cwin, tcpcsm, min_bytes=0, light=False, return_dict=False, ): """ Process a mptcp pcap file and generate graphs of its subflows Notice that we can't change dir per thread, we should use processes """ # if not check_mptcp_joins(pcap_filepath): # print("WARNING: no mptcp joins on " + pcap_filepath, file=sys.stderr) csv_tmp_dir = tempfile.mkdtemp(dir=os.getcwd()) connections = None do_tcp_processing = False try: with co.cd(csv_tmp_dir): # If segmentation faults, remove the -S option # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-S', '-t', '5000', '-w', '0'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3', '-a'] # connections = process_mptcptrace_cmd(cmd, pcap_filepath) # # # Useful to count the number of reinjected bytes # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-a', '-t', '5000', '-w', '2'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3'] # devnull = open(os.devnull, 'w') # if subprocess.call(cmd, stdout=devnull) != 0: # raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath) # devnull.close() # # cmd = ['mptcptrace', '-f', pcap_filepath, '-r', '2', '-t', '5000', '-w', '2'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3'] # devnull = open(os.devnull, 'w') # if subprocess.call(cmd, stdout=devnull) != 0: # raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath) # devnull.close() cmd = ["mptcptrace", "-f", pcap_filepath, "-s", "-S", "-a", "-A", "-R", "-r", "2", "-t", "5000", "-w", "2"] connections = process_mptcptrace_cmd(cmd, pcap_filepath) # The mptcptrace call will generate .xpl files to cope with # First see all xpl files, to detect the relative 0 of all connections # Also, compute the duration and number of bytes of the MPTCP connection first_pass_on_files(connections) rtt_all = {co.C2S: {}, co.S2C: {}} acksize_all = {co.C2S: {}, co.S2C: {}} # Then really process xpl files if return_dict: for xpl_fname in glob.glob(os.path.join("*.xpl")): try: os.remove(xpl_fname) except IOError as e: print(str(e), file=sys.stderr) else: for xpl_fname in glob.glob(os.path.join("*.xpl")): try: directory = co.DEF_RTT_DIR if MPTCP_RTT_FNAME in xpl_fname else co.TSG_THGPT_DIR shutil.move( xpl_fname, os.path.join( graph_dir_exp, directory, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(xpl_fname), ), ) except IOError as e: print(str(e), file=sys.stderr) # And by default, save only seq csv files for csv_fname in glob.glob(os.path.join("*.csv")): if not light: if MPTCP_GPUT_FNAME in os.path.basename(csv_fname): process_gput_csv(csv_fname, connections) try: if os.path.basename(csv_fname).startswith(MPTCP_ADDADDR_FNAME): conn_id = get_connection_id(os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue process_add_addr_csv(csv_fname, connections, conn_id) os.remove(csv_fname) elif os.path.basename(csv_fname).startswith(MPTCP_RMADDR_FNAME): conn_id = get_connection_id(os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue process_rm_addr_csv(csv_fname, connections, conn_id) os.remove(csv_fname) elif MPTCP_RTT_FNAME in os.path.basename(csv_fname): conn_id = get_connection_id(os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue is_reversed = is_reverse_connection(os.path.basename(csv_fname)) process_rtt_csv(csv_fname, rtt_all, connections, conn_id, is_reversed) os.remove(csv_fname) # co.move_file(csv_fname, os.path.join( # graph_dir_exp, co.DEF_RTT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + csv_fname)) elif MPTCP_SEQ_FNAME in os.path.basename(csv_fname): conn_id = get_connection_id(os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue is_reversed = is_reverse_connection(os.path.basename(csv_fname)) process_csv(csv_fname, connections, conn_id, is_reversed) if return_dict: try: os.remove(csv_fname) except Exception: pass else: co.move_file( csv_fname, os.path.join( graph_dir_exp, co.TSG_THGPT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(csv_fname), ), ) elif MPTCP_ACKSIZE_FNAME in os.path.basename(csv_fname): collect_acksize_csv(csv_fname, connections, acksize_all) os.remove(csv_fname) else: if not light and not return_dict: co.move_file( csv_fname, os.path.join( graph_dir_exp, co.TSG_THGPT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(csv_fname), ), ) else: os.remove(csv_fname) except IOError as e: print(str(e), file=sys.stderr) do_tcp_processing = True except MPTCPTraceError as e: print(str(e) + "; skip mptcp process", file=sys.stderr) shutil.rmtree(csv_tmp_dir) # This will save the mptcp connections if connections and do_tcp_processing: dicts = tcp.process_trace( pcap_filepath, graph_dir_exp, stat_dir_exp, failed_conns_dir_exp, acksize_tcp_dir_exp, tcpcsm, mptcp_connections=connections, light=light, return_dict=return_dict, ) if return_dict: tcp_connections, acksize_all_tcp = dicts return connections, tcp_connections, rtt_all, acksize_all, acksize_all_tcp else: co.save_data(pcap_filepath, acksize_dir_exp, acksize_all) co.save_data(pcap_filepath, rtt_dir_exp, rtt_all) co.save_data(pcap_filepath, stat_dir_exp, connections)