# sorting the two vector by pid, but it does NOT make many sense...
# pid_vector, object_vector = zip(*sorted(zip(pids, system_objects))) # unpacking arguments by using `*`

for record in raw_dataset:
    vector = list()
    pid = record["pid"]
    date = record["date"]
    obj = record["object"]
    vector.append(objects.index(obj))  # object index
    vector.append(pids.index(pid))  # event pid index
    vector.append(dates.index(date))  # date index
    vector.append(system_objects[obj])  # number of system obj occurrence
    vector.append(process_ids[pid])  # number of pid occurrence

    vectorized_dataset.append(tuple(vector))

ksom = SOM(10, 10, vectorized_dataset, 100)

for iv in random.sample(vectorized_dataset, 100):
    # print iv
    # print objects[iv[0]], pids[iv[1]], dates[iv[2]]
    ksom.epoch(iv)
    pass

line_width = 10
for index, node in enumerate(ksom.nodes):
    print ("(%d,%d):%d " % (node.x, node.y, node.bmu_count)),
    if (index + 1) % line_width is 0:
        print ""
        pass
Exemplo n.º 2
0
        vector_dict['3'], # token index of activity
        vector_dict['4'], # number of database operations
        vector_dict['5'], # number of content provider queries
        vector_dict['6'], # number of network operations
    ]'''
    dataset.append(vector)

# dimension of the map (from config file)
config_file = open('./som_config.js')
configs = dict(map(lambda x:x[0:-1].split('='), config_file.read().strip().split('\n')))
config_values = configs.values()
width = int(config_values[0])
height = int(config_values[1])

# init the self-organising map
ksom = SOM(width, height, vector_dataset)

# start training
for activity_vector in random.sample(activity_dataset, len(activity_dataset)):
    ksom.epoch(activity_vector['vector'], activity_vector['name'], activity_vector['start_date'])
    pass

# get SOM nodes collection in database
collection = db.som_nodes
# store the map to database
for node in ksom.nodes:
    map_node = dict()
    map_node['x'] = node.x
    map_node['y'] = node.y
    map_node['features'] = node.weights_vector
    map_node['extra_data'] = node.extra_data