def create(neo4j: Neo4JConnection): neo4j.query(""" MATCH (ev:Event) MATCH (en:Entity {IDraw: ev.IDraw, EntityType:ev.EntityType}) CREATE (ev)-[r:E_EN]->(en) SET r.EntityType = en.EntityType """, 'Creating E_EN relations')
def __cleanup_temp(neo4j: Neo4JConnection): neo4j.query( """ MATCH (e:TempEvent) OPTIONAL MATCH (e)-[k:Source]->() DELETE e, k """, 'Cleaning up temp nodes')
def calculate(neo4j: Neo4JConnection, config: Config): nodetypes = ["Event", "Common", "Entity", "Log"] relationtypes = ["E_C", "DF", "E_EN", "L_E"] results = [] volume = neo4j.query( f""" MATCH()-[e:{functools.reduce(lambda a,b : f'{a}|{b}', relationtypes)}]->() return count(e) """, "Calculating Volume")[0][0] nr_vertices = neo4j.query(f""" MATCH (n) WHERE {functools.reduce(lambda a,b : f'n:{a} OR n:{b}', nodetypes)} return count(n) """)[0][0] results.append(["volume", volume, "edges"]) results.append(["nr_vertices", nr_vertices, "vertices"]) results.append(["size", volume + nr_vertices, "vertices + edges"]) results.append( ["fill", volume / (nr_vertices * nr_vertices), "edges/vertices^2"]) csv.write(results, ["Statistic", "Value", "Unit"], "basic")
def create(neo4j: Neo4JConnection): neo4j.query( """ CREATE (l:Log {ID: 'BPI14'}) WITH l Match (e:Event) CREATE (l)-[r:L_E]->(e) """, 'Creating Log node with L_E relations')
def calculate(neo4j: Neo4JConnection): edges = neo4j.query( """ match ()-[n]->() return type(n), count(n) """, "calculating counts per edge type") csv.write(edges, ["type", "count"], "counts-per-edge-type") nodes = neo4j.query( """ match (n) return labels(n), count(n) """, "calculating counts per node type") csv.write(nodes, ["labels", "count"], "counts-per-node-type")
def create(neo4j: Neo4JConnection): neo4j.query( """ MATCH (n:Entity) MATCH (n)-[]-(ev) WITH n, ev as nodes ORDER BY ev.Start, ev.commonID WITH n, collect(nodes) as nodeList WITH n, apoc.coll.pairsMin(nodeList) as pairs UNWIND pairs as pair WITH n, pair[0] as first, pair[1] as second CREATE (first)-[df:DF]->(second) SET df.EntityType = n.EntityType SET df.EntityId = n.ID """, 'Creating DF relations')
def __create_temp_events(neo4j: Neo4JConnection, entity_config: dict, entities_config: dict): event_config = entity_config['event'] related_entities = event_config[ 'related_entities'] # list of entities that should relate to these events # Create temp event nodes for the target entity type neo4j.query(__create_temp_events_query(entity_config, entities_config), f'Creating temp events for {entity_config["label"]} entities') # Create temp event nodes for the entities related to the target entity type for related_entity in related_entities: neo4j.query( __create_temp_events_query(entity_config, entities_config, related_entity), f'Creating temp events for {related_entity} entities related to {entity_config["label"]}' )
def __histogram(neo4j: Neo4JConnection, label: str): return neo4j.query( f""" MATCH (n:Entity {{EntityType: '{label}'}}) OPTIONAL MATCH (n)<-[:E_EN]-(e:Event) WITH n, count(e) as nr_events unwind [nr_events,0] as path_length with n, max(path_length) as path_length RETURN path_length, count(n) """, f"Calculating histogram of lengths of df paths for entities with EntityType: {label}" )
def create_indexes(neo4j_conn: Neo4JConnection): neo4j_conn.query("CREATE INDEX ON :Entity(EntityType)", 'Creating index on :Entity(EntityType)') neo4j_conn.query("CREATE INDEX ON :Event(start)", 'Creating index on :Event(start)') neo4j_conn.query("CREATE INDEX ON :TempEvent(originID)", 'Creating index on :TempEvent(originID)')
def create(neo4j: Neo4JConnection, config: Config): entities = config['entity'] log_name = config['log']['name'] for entity in entities: label = entity['label'] id_column = entity["id_column"] neo4j.query( f""" MATCH (n:{label}) CALL apoc.create.node( ['Entity'], {{ EntityType:'{label}', IDLog:'{log_name}' + n.{id_column}, IDraw: n.{id_column}, Log:'{log_name}', uID:'{label}{log_name}'+ n.{id_column} }}) yield node SET node+=n """, f"Creating entity nodes with EntityType:{label}")
def __create_events(neo4j: Neo4JConnection, create_from: dict, entity_config: dict): entity_label = entity_config['label'] # label of the current entity start_column = create_from['start_column'] end_column = start_column # Set the end column equal to the start column if it is not specified activity = start_column # Set the activity equal to the start column if it is not specified if 'end_column' in create_from: end_column = create_from['end_column'] if 'activity' in create_from: activity = __form_activity(create_from['activity']) neo4j.query( f""" // Find Incident TempEvents that should generate this event MATCH (temp:TempEvent {{EntityType:'{entity_label}'}})-->(source) WHERE '{start_column}' in keys(source) // Find other matching TempEvent MATCH (t:TempEvent {{commonID: temp.commonID}}) WITH temp, source, t CREATE (event:Event) SET event = t SET event.Activity = {activity} SET event.Start = source.{start_column} SET event.End = source.{end_column} WITH temp, collect(event) as events CREATE (co:Common) WITH co, events UNWIND events as event WITH co, event // Create relations between events and common nodes CREATE (event)-[ec:E_C {{entityType: event.entityType}}]->(co) """, f'Creating event nodes for {entity_label}.{start_column}')
def __retrieve_relationship_data(neo4j: Neo4JConnection): results = neo4j.query(""" match (s)-[r]->(t) return ID(s) as sourceID, labels(s) as sourceLabels, ID(t) as targetID, labels(t) as targetLabels, type(r) as relationType """) for result in results: result[1] = result[1][0] result[3] = result[3][0] csv.write( results, ['source_id', 'source_label', 'target_id', 'target_label', 'rel_type'], 'relationships')
def __simple(neo4j: Neo4JConnection, label: str): return neo4j.query( f""" MATCH (n:Entity {{EntityType: '{label}'}}) OPTIONAL MATCH (n)<-[:E_EN]-(e:Event) WITH n, count(e) as nr_events unwind [nr_events,0] as path_length with n, max(path_length) as path_length RETURN avg(path_length) as average, stdev(path_length) as stdev, max(path_length) as max, min(path_length) as min """, f"Calculating lengths of df paths for entities with EntityType: {label}" )
def __histogram_query_data(neo4j: Neo4JConnection, nodetype: str, dir: str, entity_label: str = None): direction = "in" if dir == "<" else "out" match = f'MATCH (u:{nodetype}' message = f"Calculating histogram of {direction} degree of {nodetype} nodes" if entity_label is not None: match += f'{{EntityType: "{entity_label}"}}' message += f" with EntityType: {entity_label}" match += ')' return neo4j.query( f""" {match} RETURN apoc.node.degree(u,'{dir}'), count(u) """, message)
def __simple(neo4j: Neo4JConnection, nodetype: str, dir: str, entity_label: str = None): direction = "in" if dir == "<" else "out" match = f'MATCH (u:{nodetype}' message = f"Calculating {direction} degree of {nodetype} nodes" if entity_label is not None: match += f'{{EntityType: "{entity_label}"}}' message += f" with EntityType: {entity_label}" match += ')' return neo4j.query( f""" {match} RETURN avg(apoc.node.degree(u,'{dir}')) as average, stdev(apoc.node.degree(u,'{dir}')) as stdev, max(apoc.node.degree(u,'{dir}')) as max, min(apoc.node.degree(u,'{dir}')) as min """, message)
from config.config import Config from database.neo4j_connection import Neo4JConnection from log.logger import Logger from log.logger import INFO from conversion import cleanup from conversion import indexes from conversion import entities from conversion import events from conversion import e_en from conversion import df from conversion import log as lognode config = Config() log = Logger.instance() #log.set_log_level(INFO) neo4j = Neo4JConnection(config) cleanup.event_graph(neo4j) indexes.create_indexes(neo4j) entities.create(neo4j, config) events.create(neo4j, config) e_en.create(neo4j) df.create(neo4j) lognode.create(neo4j) cleanup.temp_variables(neo4j)