def to_dict(self): return { 'start': self.data['start'], 'end': self.data['end'], 'length': self.length, 'types': self.types, 'labels': self.labels, 'start_entity_id': self.start_entity_id, 'end_collection_id': self.end_collection_id, 'nodes': unwind(self.data['paths']), }
def from_data(cls, start_entity, end_collection_id, paths, types, labels, start, end): obj = cls() obj.start_entity_id = start_entity.id obj.end_collection_id = end_collection_id obj.labels = unwind(labels) obj.types = unwind(types) lengths = map(len, paths) obj.length = min(lengths) - 1 average = (float(sum(lengths)) - len(lengths)) / float(len(lengths)) obj.weight = len(paths) * (1.0 / max(1.0, average)) obj.data = { 'start': start, 'end': end, 'paths': paths } db.session.add(obj) return obj
def generate_paths(graph, entity, ignore_types=SKIP_TYPES): """Generate all possible paths which end in a different collection.""" Path.delete_by_entity(entity.id) if graph is None or entity.state != entity.STATE_ACTIVE: return log.info("Generating graph path cache: %r", entity) # TODO: should max path length be configurable? q = "MATCH pth = (start:Aleph:Entity)-[*1..3]-(end:Aleph:Entity) " \ "MATCH (start)-[startpart:PART_OF]->(startcoll:Collection) " \ "MATCH (end)-[endpart:PART_OF]->(endcoll:Collection) " \ "WHERE start.fingerprint = {entity_fp} AND " \ "startpart.alephCanonical = {entity_id} AND " \ "startcoll.alephCollection <> endcoll.alephCollection AND " \ "all(r IN relationships(pth) WHERE NOT type(r) IN {ignore_types}) " \ "WITH DISTINCT start, end, " \ " COLLECT(DISTINCT extract(x IN nodes(pth) | x.id)) AS paths, " \ " COLLECT(DISTINCT extract(x IN nodes(pth) | labels(x))) AS labels, " \ " COLLECT(DISTINCT extract(r IN relationships(pth) | type(r))) AS types, " \ " COLLECT(DISTINCT endcoll.alephCollection) AS end_collection_id " \ "RETURN start, end, paths, types, labels, end_collection_id " count = 0 for row in graph.run(q, entity_id=entity.id, entity_fp=entity.fingerprint, ignore_types=ignore_types): labels = unwind(row.get('labels')) labels = [l for l in labels if l != BASE_NODE] types = unwind(row.get('types')) if len(types) == 1 and 'AKA' in types: continue Path.from_data(entity, row.get('end_collection_id'), row.get('paths'), types, labels, NodeType.dict(row.get('start')), NodeType.dict(row.get('end'))) count += 1 db.session.commit() # TODO: send email to collection owners? log.info("Generated %s paths for %r", count, entity)