def main():
    messages = Message.dataparser("data.csv")
    authentication_result = {}
    # print(len(messages))
    for message in messages:
        if message.nodeID in authentication_result:
            if message.messagetype in [1, 2, 3, 4, 5, 8]:
                authentication_result[message.nodeID][1] += 1
            elif message.messagetype in [6, 7, 9]:
                authentication_result[message.nodeID][0] += 1
            else:
                print("Message type invalid")
        else:
            if message.messagetype in [1, 2, 3, 4, 5, 8]:
                authentication_result[message.nodeID] = [0, 1]
            elif message.messagetype in [6, 7, 9]:
                authentication_result[message.nodeID] = [1, 0]
            else:
                print("Message type invalid")
    print(len(authentication_result))
    for item in authentication_result.items():
        if item[1][1] != 0:
            item[1].append(item[1][0] / item[1][1])
        else:
            item[1].append(0)
    sorted_d = sorted(authentication_result.items(), key=lambda x: x[1][2])
    pprint.pprint(sorted_d)
Exemplo n.º 2
0
def main():
    messages = Message.dataparser("data.csv")
    msg_type_counts = [0,0,0,0,0,0,0,0,0]
    for message in messages:
        msg_type_counts[message.messagetype - 1] += 1
    print(msg_type_counts)
def main():
    messages = Message.dataparser("data.csv")
    # k_message_type_grams_list = node_target(messages)
    # k_message_type_grams_list = IPaddress_target(messages)
    k_message_type_grams_list = username_target(messages)
    print(len(k_message_type_grams_list))
def main():
    # Load the data from CSV file
    messages = Message.dataparser("data.csv")

    # Count number of accesses to a node from each IPAddress accessing that node
    node_access_set = count_usage(messages)
    # pprint.pprint(node_access_set)

    # print(len(node_access_set))
    # for nodeid in node_access_set.keys():
    #     print(nodeid + "   " + str(len(node_access_set[nodeid])))

    # Calculate the x and y co-ordinates for clustering.
    x = []
    y = []
    counter = 1
    plot_list = []
    for nodeid in node_access_set.keys():
        sum = 0
        for IPAddress in node_access_set[nodeid].keys():
            sum += node_access_set[nodeid][IPAddress]
        plot_list.append([nodeid, len(node_access_set[nodeid]), sum])
        x.append(counter)
        # x.append(len(node_access_set[nodeid]))
        y.append(sum)
        counter += 1
        if sum > 40000:
            print(nodeid + "\t" + str(sum))

    # for nodeid in node_access_set.keys():
    #     print(nodeid + "   " + str(len(node_access_set[nodeid])))
    # print(plot_list)
    # print(len(plot_list))
    # plt.scatter(x, y, s=2)
    # plt.show()

    # vertices_list = []
    # for m, n in zip(x, y):
    #     vertices_list.append([float(m), float(n)])
    # vertices = np.array(vertices_list)
    # print(vertices.shape)
    # centers = np.array([[0,0],[1000,0],[0,1000]])
    # centers_gonzalez = LloydCluster(vertices, 3, centers)
    # # centers_gonzalez = GonzalezCluster(vertices, 3, vertices[0])
    # vertex2center = verticesCluster(vertices, centers_gonzalez)
    # with open('gonzalez.csv', 'w') as f:
    #     for i in range(np.shape(vertices)[0]):
    #         f.write(','.join([str(elem) for elem in np.ndarray.tolist(vertices[i])]) + ',' +
    #                 str(vertex2center[i]) + '\n')

    file = "gonzalez.csv"
    x = []
    y = []
    z = []
    color = set()
    colours = ["red", "green", "blue", "black"]
    with open(file, "r") as csvfile:
        reader = csv.reader(csvfile)
        # print(reader)
        for row in reader:
            x.append(float(row[0]))
            y.append(float(row[1]))
            z.append(int(row[2]))
    for num in z:
        color.add(num)
    color = list(color)
    coldict = dict()
    for col, num in zip(colours, color):
        coldict[num] = col
    # print(coldict)

    # print(x)
    # print(y)
    # print(z)
    # z = map(lambda x: coldict[x], z)
    # print(list(z))
    plt.scatter(x, y, c=z, cmap="Accent", s=6)
    grey_patch = mpatches.Patch(color='grey', label='High risk nodes')
    green_patch = mpatches.Patch(color='green', label='Moderate risk nodes')
    black_patch = mpatches.Patch(color='black', label='Low risk nodes')
    plt.legend(handles=[grey_patch, green_patch, black_patch])
    plt.xlabel('#IPaddress')
    plt.ylabel('#Access')
    plt.title('Plot of #accesses vs #IPaddresses')
    # m = ['ms1102', 'hp005', 'hp054', 'hp053', 'hp017']
    # j = 0
    # for i in range(len(y)):
    #     if y[i] > 40000:
    #         plt.annotate(m[j], (x[i], y[i]))
    #         j += 1
    #     if j > 4:
    #         break
    plt.show()