def main(): messages = Message.dataparser("data.csv") authentication_result = {} # print(len(messages)) for message in messages: if message.nodeID in authentication_result: if message.messagetype in [1, 2, 3, 4, 5, 8]: authentication_result[message.nodeID][1] += 1 elif message.messagetype in [6, 7, 9]: authentication_result[message.nodeID][0] += 1 else: print("Message type invalid") else: if message.messagetype in [1, 2, 3, 4, 5, 8]: authentication_result[message.nodeID] = [0, 1] elif message.messagetype in [6, 7, 9]: authentication_result[message.nodeID] = [1, 0] else: print("Message type invalid") print(len(authentication_result)) for item in authentication_result.items(): if item[1][1] != 0: item[1].append(item[1][0] / item[1][1]) else: item[1].append(0) sorted_d = sorted(authentication_result.items(), key=lambda x: x[1][2]) pprint.pprint(sorted_d)
def main(): messages = Message.dataparser("data.csv") msg_type_counts = [0,0,0,0,0,0,0,0,0] for message in messages: msg_type_counts[message.messagetype - 1] += 1 print(msg_type_counts)
def main(): messages = Message.dataparser("data.csv") # k_message_type_grams_list = node_target(messages) # k_message_type_grams_list = IPaddress_target(messages) k_message_type_grams_list = username_target(messages) print(len(k_message_type_grams_list))
def main(): # Load the data from CSV file messages = Message.dataparser("data.csv") # Count number of accesses to a node from each IPAddress accessing that node node_access_set = count_usage(messages) # pprint.pprint(node_access_set) # print(len(node_access_set)) # for nodeid in node_access_set.keys(): # print(nodeid + " " + str(len(node_access_set[nodeid]))) # Calculate the x and y co-ordinates for clustering. x = [] y = [] counter = 1 plot_list = [] for nodeid in node_access_set.keys(): sum = 0 for IPAddress in node_access_set[nodeid].keys(): sum += node_access_set[nodeid][IPAddress] plot_list.append([nodeid, len(node_access_set[nodeid]), sum]) x.append(counter) # x.append(len(node_access_set[nodeid])) y.append(sum) counter += 1 if sum > 40000: print(nodeid + "\t" + str(sum)) # for nodeid in node_access_set.keys(): # print(nodeid + " " + str(len(node_access_set[nodeid]))) # print(plot_list) # print(len(plot_list)) # plt.scatter(x, y, s=2) # plt.show() # vertices_list = [] # for m, n in zip(x, y): # vertices_list.append([float(m), float(n)]) # vertices = np.array(vertices_list) # print(vertices.shape) # centers = np.array([[0,0],[1000,0],[0,1000]]) # centers_gonzalez = LloydCluster(vertices, 3, centers) # # centers_gonzalez = GonzalezCluster(vertices, 3, vertices[0]) # vertex2center = verticesCluster(vertices, centers_gonzalez) # with open('gonzalez.csv', 'w') as f: # for i in range(np.shape(vertices)[0]): # f.write(','.join([str(elem) for elem in np.ndarray.tolist(vertices[i])]) + ',' + # str(vertex2center[i]) + '\n') file = "gonzalez.csv" x = [] y = [] z = [] color = set() colours = ["red", "green", "blue", "black"] with open(file, "r") as csvfile: reader = csv.reader(csvfile) # print(reader) for row in reader: x.append(float(row[0])) y.append(float(row[1])) z.append(int(row[2])) for num in z: color.add(num) color = list(color) coldict = dict() for col, num in zip(colours, color): coldict[num] = col # print(coldict) # print(x) # print(y) # print(z) # z = map(lambda x: coldict[x], z) # print(list(z)) plt.scatter(x, y, c=z, cmap="Accent", s=6) grey_patch = mpatches.Patch(color='grey', label='High risk nodes') green_patch = mpatches.Patch(color='green', label='Moderate risk nodes') black_patch = mpatches.Patch(color='black', label='Low risk nodes') plt.legend(handles=[grey_patch, green_patch, black_patch]) plt.xlabel('#IPaddress') plt.ylabel('#Access') plt.title('Plot of #accesses vs #IPaddresses') # m = ['ms1102', 'hp005', 'hp054', 'hp053', 'hp017'] # j = 0 # for i in range(len(y)): # if y[i] > 40000: # plt.annotate(m[j], (x[i], y[i])) # j += 1 # if j > 4: # break plt.show()