def qryChunkInWorker(self, offset, limit) -> List[LiveDbDisplayValueTuple]: """ Query Chunk This returns a chunk of LiveDB items from the database :param self: A celery reference to this task :param offset: The offset of the chunk :param limit: An encoded payload containing the updates :returns: List[LiveDbDisplayValueTuple] serialised in a payload json """ table = LiveDbItem.__table__ cols = [ table.c.key, table.c.dataType, table.c.rawValue, table.c.displayValue ] session = CeleryDbConn.getDbSession() try: result = session.execute( select(cols).order_by(table.c.id).offset(offset).limit(limit)) return [ LiveDbDisplayValueTuple(key=o.key, dataType=o.dataType, rawValue=o.rawValue, displayValue=o.displayValue) for o in result.fetchall() ] finally: session.close()
def createOrUpdateBranches(self, importBranchesEncodedPayload: bytes) -> None: """ Convert Import Branch Tuples This method takes import branch tuples, and converts them to branch format used throughout the diagram plugin. (Thats the packed JSON wrapped by an accessor class) """ # Decode importBranches payload importBranches: List[ImportBranchTuple] = ( Payload().fromEncodedPayload(importBranchesEncodedPayload).tuples) # Validate the input importBranches _validateNewBranchIndexs(importBranches) # Do the import groupedBranches = _convertImportBranchTuples(importBranches) startTime = datetime.now(pytz.utc) dbSession = CeleryDbConn.getDbSession() engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: for (modelSetKey, modelSetId, coordSetId), branches in groupedBranches.items(): _insertOrUpdateBranches(conn, modelSetKey, modelSetId, branches) newDisps, dispIdsToCompile = _convertBranchDisps(branches) # NO TRANSACTION # Bulk load the Disps _bulkInsertDisps(engine, newDisps) # Queue the compiler DispCompilerQueueController.queueDispIdsToCompileWithSession( dispIdsToCompile, conn) transaction.commit() dbSession.commit() logger.debug( "Completed importing %s branches for coordSetId %s in %s", len(branches), coordSetId, (datetime.now(pytz.utc) - startTime)) except Exception as e: dbSession.rollback() transaction.rollback() logger.debug("Retrying createOrUpdateBranches, %s", e) logger.exception(e) raise self.retry(exc=e, countdown=3) finally: dbSession.close() conn.close()
def _buildIndex(chunkKeys) -> Dict[str, bytes]: session = CeleryDbConn.getDbSession() try: indexQry = (session.query( DocDbDocument.chunkKey, DocDbDocument.key, DocDbDocument.documentJson).filter( DocDbDocument.chunkKey.in_(chunkKeys)).order_by( DocDbDocument.key).yield_per(1000).all()) # Create the ChunkKey -> {id -> packedJson, id -> packedJson, ....] packagedJsonByObjIdByChunkKey = defaultdict(dict) for item in indexQry: packagedJsonByObjIdByChunkKey[item.chunkKey][ item.key] = item.documentJson encPayloadByChunkKey = {} # Sort each bucket by the key for chunkKey, packedJsonByKey in packagedJsonByObjIdByChunkKey.items(): tuples = json.dumps(packedJsonByKey, sort_keys=True) # Create the blob data for this index. # It will be docDbed by a binary sort encPayloadByChunkKey[chunkKey] = Payload( tuples=tuples).toEncodedPayload() return encPayloadByChunkKey finally: session.close()
def _buildIndex(chunkKeys) -> Dict[str, bytes]: session = CeleryDbConn.getDbSession() try: indexQry = (session.query( BranchIndex.chunkKey, BranchIndex.key, BranchIndex.packedJson).filter( BranchIndex.chunkKey.in_(chunkKeys)).order_by( BranchIndex.key).yield_per(1000).all()) # Create the ChunkKey -> {key -> packedJson, key -> packedJson, ....] packagedJsonsByObjKeyByChunkKey = defaultdict( lambda: defaultdict(list)) for item in indexQry: packagedJsonsByObjKeyByChunkKey[item.chunkKey][item.key].append( item.packedJson) encPayloadByChunkKey = {} # Sort each bucket by the key for chunkKey, packedJsonsByKey in packagedJsonsByObjKeyByChunkKey.items( ): tuples = json.dumps(packedJsonsByKey, sort_keys=True) # Create the blob data for this index. # It will be index-blueprint by a binary sort encPayloadByChunkKey[chunkKey] = Payload( tuples=tuples).toEncodedPayload() return encPayloadByChunkKey finally: session.close()
def _loadCoordSet(modelSetKey, coordSetKey): ormSession = CeleryDbConn.getDbSession() try: coordSet = getOrCreateCoordSet(ormSession, modelSetKey, coordSetKey) ormSession.expunge_all() return coordSet finally: ormSession.close()
def _convertImportBranchTuples( importBranches: List[ImportBranchTuple] ) -> Dict[typing.Tuple[str, int, int], List[BranchTuple]]: """ Convert Import Branch Tuples This method takes import branch tuples, and converts them to branch format used throughout the diagram plugin. (Thats the packed JSON wrapped by an accessor class) """ # Get a map for the coordSetIds modelKeyCoordKeyTuples = [(b.modelSetKey, b.coordSetKey) for b in importBranches] coordSetIdByModelKeyCoordKeyTuple = getModelSetIdCoordSetId( modelKeyCoordKeyTuples) # Sort out the importBranches by coordSetKey branchByModelKeyByCoordKey = defaultdict(lambda: defaultdict(list)) for importBranch in importBranches: branchByModelKeyByCoordKey[importBranch.modelSetKey][importBranch.coordSetKey] \ .append(importBranch) # Define the converted importBranches convertedBranchesByCoordSetId: Dict[typing.Tuple[str, int, int], List[BranchTuple]] \ = {} # Get the model set dbSession = CeleryDbConn.getDbSession() try: # Iterate through the importBranches and convert them for modelSetKey, item in branchByModelKeyByCoordKey.items(): for coordSetKey, importBranches in item: modelSetId, coordSetId = coordSetIdByModelKeyCoordKeyTuple[( modelSetKey, coordSetKey)] lookupHashConverter = LookupHashConverter( dbSession, modelSetId, coordSetId) convertedBranches = [] for importBranch in importBranches: branch = BranchTuple.loadFromImportTuple( importBranch, coordSetId, lookupHashConverter=lookupHashConverter) convertedBranches.append(branch) convertedBranchesByCoordSetId[(modelSetKey, modelSetId, coordSetId)] \ = convertedBranches finally: dbSession.close() return convertedBranchesByCoordSetId
def _makeModelSet(modelSetKey: str) -> int: # Get the model set dbSession = CeleryDbConn.getDbSession() try: newItem = GraphDbModelSet(key=modelSetKey, name=modelSetKey) dbSession.add(newItem) dbSession.commit() return newItem.id finally: dbSession.close()
def _makeCoordSet(modelSetId: int, coordSetKey: str) -> int: # Make a coord set dbSession = CeleryDbConn.getDbSession() try: newItem = ModelCoordSet(modelSetId=modelSetId, key=coordSetKey, name=coordSetKey) dbSession.add(newItem) dbSession.commit() return newItem.id finally: dbSession.close()
def _buildIndex(chunkKeys) -> Dict[str, bytes]: session = CeleryDbConn.getDbSession() try: indexQry = ( session.query( ItemKeyIndex.chunkKey, ItemKeyIndex.itemKey, ItemKeyIndex.itemKey, # ItemKeyIndex.itemType, ItemKeyIndex.segmentKey).filter( ItemKeyIndex.chunkKey.in_(chunkKeys)).order_by( ItemKeyIndex.itemKey, ItemKeyIndex.segmentKey).yield_per(1000).all()) # Create the ChunkKey -> {id -> packedJson, id -> packedJson, ....] packagedJsonByObjIdByChunkKey = defaultdict(lambda: defaultdict(list)) for item in indexQry: (packagedJsonByObjIdByChunkKey[item.chunkKey][item.itemKey].append( item.segmentKey)) encPayloadByChunkKey = {} # Sort each bucket by the key for chunkKey, segmentKeysByItemKey in packagedJsonByObjIdByChunkKey.items( ): # Convert the list to a json string, this reduces the memory footprint when # searching the index. packedJsonByKey = { itemKey: json.dumps(segmentKeys) for itemKey, segmentKeys in segmentKeysByItemKey.items() } tuples = json.dumps(packedJsonByKey, sort_keys=True) # Create the blob data for this index. # It could/will be found by a binary sort encPayloadByChunkKey[chunkKey] = Payload( tuples=tuples).toEncodedPayload() return encPayloadByChunkKey finally: session.close()
def updateValues(self, payloadEncodedArgs: bytes) -> None: """ Compile Grids Task :param self: A celery reference to this task :param payloadEncodedArgs: The updates from the queue controller :returns: None """ startTime = datetime.now(pytz.utc) argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples allModelUpdates: List[LiveDbRawValueQueue] = argData[0] queueItemIds = argData[1] # Group the data by model set updatesByModelSetId = defaultdict(list) for update in allModelUpdates: updatesByModelSetId[update.modelSetId].append(update) ormSession = CeleryDbConn.getDbSession() try: for modelSetId, modelUpdates in updatesByModelSetId.items(): _updateValuesForModelSet(modelSetId, modelUpdates, ormSession) # --------------- # delete the queue items dispQueueTable = LiveDbRawValueQueue.__table__ ormSession.execute( dispQueueTable.delete(dispQueueTable.c.id.in_(queueItemIds)) ) ormSession.commit() # --------------- # Finally, tell log some statistics logger.info("Updated %s raw values in %s", len(allModelUpdates), (datetime.now(pytz.utc) - startTime)) except Exception as e: logger.exception(e) raise self.retry(exc=e, countdown=2) finally: ormSession.close()
def _insertOrUpdateObjects(newTraceConfigs: List[GraphDbTraceConfigTuple], modelSetId: int) -> None: """ Insert or Update Objects 1) Find objects and update them 2) Insert object if the are missing """ traceConfigTable = GraphDbTraceConfig.__table__ startTime = datetime.now(pytz.utc) dbSession = CeleryDbConn.getDbSession() try: keysToDelete = {i.key for i in newTraceConfigs} dbSession.execute( traceConfigTable.delete( traceConfigTable.c.key.in_(keysToDelete)) ) # Create state arrays inserts = [] # Create the DB Orm objects to insert for importTraceConfig in newTraceConfigs: dbSession.add(GraphDbTraceConfig().fromTuple(importTraceConfig, modelSetId)) dbSession.commit() logger.info("Inserted %s trace configs in %s", len(inserts), (datetime.now(pytz.utc) - startTime)) except Exception: dbSession.rollback() raise finally: dbSession.close()
def _updateCoordSetPosition(coordSet: ModelCoordSet, disps: List): """ Update CoordSet Position 1) Drop all disps with matching importGroupHash 2) set the coordSetId :param coordSet: :param disps: An array of disp objects to import :return: """ if coordSet.initialPanX or coordSet.initialPanY or coordSet.initialZoom: return startTime = datetime.now(pytz.utc) ormSession = CeleryDbConn.getDbSession() try: # Initialise the ModelCoordSet initial position if it's not set for disp in disps: if not hasattr(disp, 'geomJson'): continue coords = json.loads(disp.geomJson) coordSet.initialPanX = coords[0] coordSet.initialPanY = coords[1] coordSet.initialZoom = 0.05 ormSession.merge(coordSet) break ormSession.commit() logger.info("Updated coordset position in %s", (datetime.now(pytz.utc) - startTime)) finally: ormSession.close()
def importInternalUsers(self, importHash: str, usersVortexMsg: bytes) -> InternalUserImportResultTuple: """ Import Internal Users :param self: A celery reference to this task :param importHash: An unique string of this group of items being imported. :param usersVortexMsg: A vortexMsg containing the user tuples :returns: A list of grid keys that have been updated. """ importUsers: List[ImportInternalUserTuple] = ( Payload() .fromEncodedPayload(usersVortexMsg) .tuples ) startTime = datetime.now(pytz.utc) session = CeleryDbConn.getDbSession() try: same = [] updates = [] deleteIds = [] inserts = [] errors = [] # This will remove duplicates allNames = [i.userName for i in importUsers] if not allNames: existingUsersByName = {} else: existingUsersByName = { g.userName: g for g in session .query(InternalUserTuple) .filter(InternalUserTuple.userName.in_(allNames)) .filter(InternalUserTuple.importHash == importHash) .options(subqueryload(InternalUserTuple.groups)) .all() } groupsByName = { g.groupName: g for g in session.query(InternalGroupTuple).all() } for importUser in importUsers: try: existingUser = existingUsersByName.pop(importUser.userName, None) if existingUser: _updateUser(existingUser, groupsByName, importUser, same, updates) else: _insertUser(session, groupsByName, importUser, importHash, inserts) session.commit() except IntegrityError as e: errors.append(str(e)) session.rollback() for oldUser in existingUsersByName.values(): deleteIds.append(oldUser.id) session.delete(oldUser) session.commit() logger.info("Inserted %s, Updated %s, Deleted %s, Same %s, in %s", len(inserts), len(updates), len(deleteIds), len(same), (datetime.now(pytz.utc) - startTime)) return InternalUserImportResultTuple( addedIds=[o.id for o in inserts], updatedIds=[o.id for o in updates], deletedIds=deleteIds, sameCount=len(same), errors=errors ) except Exception as e: session.rollback() logger.debug("Task failed, but it will retry. %s", e) raise self.retry(exc=e, countdown=2) finally: session.close()
def compileDisps(self, payloadEncodedArgs: bytes): """ Compile Disps This function takes a list of Disp IDs and compiles them. The processing is as follows (more or less) 0) Load lookups ---- 1) DispGroupPointers, copy disps from group to pointer ---- 2) Load the Disps from the DB 3) Apply the LiveDB values to the Disp attributes 4) Scale the Disp geomJson to match the coord set scaling 5) DispGroups, take Disps as part of a disp group and load them into JSON in the DispGroup. PreparedDisp???? 6) Extract any new LocationIndex entries, of the Disp has a key 7) Determine which grids this disp will live in, and create GridKeyIndex entries for those grid keys for this disp. 8) Write the Disp JSON back to the disp ormSession.commit() here. This stores the following updates that have been made into the disp: * dispJson, * locationJson, * livedb attribute updates ---- 9) Write the calculated data to tables NOTE: Disps that belong to a DispGroup will not be queued for compile by ImportDispTask. """ argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples dispIds = [o.dispId for o in argData[0]] queueItemIds: List[int] = argData[1] # ========================== # 0) Load the lookups ormSession = CeleryDbConn.getDbSession() try: # --------------- # Load Coord Sets coordSets = (ormSession.query(ModelCoordSet).options( subqueryload(ModelCoordSet.modelSet), subqueryload(ModelCoordSet.gridSizes)).all()) # Get Model Set Name Map coordSetById = {o.id: o for o in coordSets} # --------------- # Load Coord Sets textStyleById = { ts.id: ts for ts in ormSession.query(DispTextStyle).all() } ormSession.expunge_all() except Exception as e: logger.exception(e) raise self.retry(exc=e, countdown=2) finally: ormSession.close() # ========================== # This method will create new disps that will be compiled later. try: # --------------- # 1) Clone the disps for the group instances dispIdsIncludingClones = _cloneDispsForDispGroupPointer(dispIds) except Exception as e: logger.exception(e) raise self.retry(exc=e, countdown=2) # ========================== # Run all the ORM Session update methods ormSession = CeleryDbConn.getDbSession() try: with ormSession.no_autoflush: # --------------- # 2) Apply the LiveDB Attribute updates disps = _loadDisps(ormSession, dispIdsIncludingClones) # --------------- # 3) Apply the LiveDB Attribute updates _applyLiveDbAttributes(ormSession, disps, coordSetById) # --------------- # 4) Scale the Disp geomJson to match the coord set scaling preparedDisps = _scaleDisp(disps, coordSetById) # 5) DispGroups, take Disps as part of a disp group and load them # into JSON in the DispGroup. PreparedDisp???? _compileDispGroups(ormSession, preparedDisps) # --------------- # 6) Extract any new LocationIndex entries, of the Disp has a key locationCompiledQueueItems, locationIndexByDispId = _indexLocation( preparedDisps, coordSetById) # --------------- # 7) Determine which grids this disp will live in, and create GridKeyIndex # entries for those grid keys for this disp. gridCompiledQueueItems, gridKeyIndexesByDispId = _calculateGridKeys( preparedDisps, coordSetById, textStyleById) # --------------- # 8) Write the Disp JSON back to the disp _updateDispsJson(preparedDisps) # --------------- # Commit the updates startTime = datetime.now(pytz.utc) ormSession.commit() logger.debug("Committed %s disp objects in %s", len(disps), (datetime.now(pytz.utc) - startTime)) except Exception as e: ormSession.rollback() logger.exception(e) raise self.retry(exc=e, countdown=2) finally: ormSession.close() # ========================== # 9) Run the bulk DB delete/insert methods try: _insertToDb(dispIdsIncludingClones, gridCompiledQueueItems, gridKeyIndexesByDispId, locationCompiledQueueItems, locationIndexByDispId, queueItemIds) except Exception as e: logger.exception(e) raise self.retry(exc=e, countdown=2) logger.info("Compiled %s disp objects in %s", len(dispIds), (datetime.now(pytz.utc) - startTime))
def importDispLinks(coordSet: ModelCoordSet, importGroupHash: str, importDispLinks: List[ImportLiveDbDispLinkTuple] ) -> List[ImportLiveDbItemTuple]: """ Import Disps Links 1) Drop all disps with matching importGroupHash 2) set the coordSetId :param coordSet: :param importGroupHash: :param importDispLinks: An array of import LiveDB Disp Links to import :return: """ dispLinkTable = LiveDbDispLink.__table__ dispLinkIdIterator = prefetchDeclarativeIds(LiveDbDispLink, len(importDispLinks)) startTime = datetime.now(pytz.utc) ormSession = CeleryDbConn.getDbSession() try: ormSession.execute(dispLinkTable .delete() .where(dispLinkTable.c.importGroupHash == importGroupHash)) if not importDispLinks: return [] liveDbItemsToImportByKey = {} dispLinkInserts = [] for importDispLink in importDispLinks: dispLink = _convertImportDispLinkTuple(coordSet, importDispLink) dispLink.id = next(dispLinkIdIterator) liveDbItem = _makeImportLiveDbItem( importDispLink, liveDbItemsToImportByKey ) dispLink.liveDbKey = liveDbItem.key dispLinkInserts.append(dispLink.tupleToSqlaBulkInsertDict()) # if dispLinkInserts: # ormSession.execute(LiveDbDispLink.__table__.insert(), dispLinkInserts) ormSession.commit() if dispLinkInserts: # This commits it's self rawConn = CeleryDbConn.getDbEngine().raw_connection() pgCopyInsert(rawConn, LiveDbDispLink.__table__, dispLinkInserts) rawConn.commit() logger.info( "Inserted %s LiveDbDispLinks in %s", len(dispLinkInserts), (datetime.now(pytz.utc) - startTime) ) return list(liveDbItemsToImportByKey.values()) finally: ormSession.close()
def compileLocationIndex(self, payloadEncodedArgs: bytes) -> List[str]: """ Compile Location Index Task :param self: A celery reference to this task :param payloadEncodedArgs: An encoded payload containing the queue tuples. :returns: A list of grid keys that have been updated. """ argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples queueItems = argData[0] queueItemIds: List[int] = argData[1] indexBuckets = list(set([i.indexBucket for i in queueItems])) modelSetIdByIndexBucket = {i.indexBucket: i.modelSetId for i in queueItems} queueTable = LocationIndexCompilerQueue.__table__ compiledTable = LocationIndexCompiled.__table__ lastUpdate = datetime.now(pytz.utc).isoformat() startTime = datetime.now(pytz.utc) session = CeleryDbConn.getDbSession() engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: logger.debug("Staring compile of %s queueItems in %s", len(queueItems), (datetime.now(pytz.utc) - startTime)) # Get Model Sets modelSetIds = list(set(modelSetIdByIndexBucket.values())) modelSetQry = ( session.query(ModelSet.key, ModelSet.id) .filter(ModelSet.id.in_(modelSetIds)) ) modelSetKeyByModelSetId = {o.id: o.key for o in modelSetQry} total = 0 dispData = _buildIndex(session, indexBuckets) conn.execute(compiledTable.delete( makeCoreValuesSubqueryCondition(engine, compiledTable.c.indexBucket, indexBuckets) )) transaction.commit() transaction = conn.begin() inserts = [] for indexBucket, jsonStr in dispData.items(): modelSetId = modelSetIdByIndexBucket[indexBucket] modelSetKey = modelSetKeyByModelSetId[modelSetId] m = hashlib.sha256() m.update(modelSetKey.encode()) m.update(jsonStr.encode()) dataHash = b64encode(m.digest()).decode() locationIndexTuple = LocationIndexTuple( modelSetKey=modelSetKey, indexBucket=indexBucket, jsonStr=jsonStr, lastUpdate=dataHash ) blobData = Payload(tuples=[locationIndexTuple]).toEncodedPayload() inserts.append(dict(modelSetId=modelSetId, indexBucket=indexBucket, lastUpdate=dataHash, blobData=blobData)) if inserts: conn.execute(compiledTable.insert(), inserts) logger.debug("Compiled %s LocationIndexes, %s missing, in %s", len(inserts), len(indexBuckets) - len(inserts), (datetime.now(pytz.utc) - startTime)) total += len(inserts) conn.execute(queueTable.delete( makeCoreValuesSubqueryCondition(engine, queueTable.c.id, queueItemIds) )) transaction.commit() logger.info("Compiled and Comitted %s LocationIndexCompileds in %s", total, (datetime.now(pytz.utc) - startTime)) return indexBuckets except Exception as e: transaction.rollback() # logger.warning(e) # Just a warning, it will retry logger.exception(e) raise self.retry(exc=e, countdown=2) finally: conn.close() session.close()
def _cloneDispsForDispGroupPointer(dispIds: List[int]): """ Clone Disps for DispGroupPointer This method will clone "instances" of the disps in the disp groups for the DispGroupPointer. """ startTime = datetime.now(pytz.utc) ormSession = CeleryDbConn.getDbSession() try: # ----- # Load the disp group pointers qry = ormSession.query(DispGroupPointer) \ .filter(DispGroupPointer.targetDispGroupId != None) \ .filter(DispGroupPointer.id.in_(dispIds)) dispGroupPointers: List[DispGroupPointer] = qry.all() # If there are no DispGroupPointers that need cloning, then return. if not dispGroupPointers: logger.debug( "Cloning skipped," " there are no disp group ptrs with targets, in %s", (datetime.now(pytz.utc) - startTime)) return dispIds dispGroupPointerTargetIds = [ o.targetDispGroupId for o in dispGroupPointers ] del qry # ----- # Delete any existing disps are in these pointers ormSession.query(DispBase) \ .filter(DispBase.groupId.in_([o.id for o in dispGroupPointers])) \ .delete(synchronize_session=False) ormSession.commit() # ----- # Query for the disp groups we'll need dispGroupChildsByGroupId = _queryDispsForGroup( ormSession, dispGroupPointerTargetIds) # ----- # Query for the disp groups names dispBaseTable = DispBase.__table__ dispGroupTable = DispGroup.__table__ qry = ormSession.execute( select(columns=[ dispBaseTable.c.id, dispBaseTable.c.coordSetId, dispGroupTable.c.name ], whereclause=dispBaseTable.c.id.in_( dispGroupPointerTargetIds)).select_from( join(dispGroupTable, dispBaseTable, dispGroupTable.c.id == dispBaseTable.c.id))) dispGroupNameByGroupId = { o.id: '%s|%s' % (o.coordSetId, o.name) for o in qry.fetchall() } del qry # ----- # Clone the child disps cloneDisps = [] cloneLiveDbDispLinks = [] for dispPtr in dispGroupPointers: if not dispPtr.targetDispGroupId: logger.debug("Pointer has no targetGroupId id=%s", dispPtr.id) continue dispGroupChilds = dispGroupChildsByGroupId.get( dispPtr.targetDispGroupId) if not dispGroupChilds: logger.warning( "Pointer points to missing DispGroup," " id=%s, targetGroupId=%s", dispPtr.id, dispPtr.targetDispGroupId) continue x, y = json.loads(dispPtr.geomJson) dispPtr.targetDispGroupName = \ dispGroupNameByGroupId[dispPtr.targetDispGroupId] for templateDisp in dispGroupChilds: # Create the clone cloneDisp = templateDisp.tupleClone() cloneDisps.append(cloneDisp) cloneDisp.coordSetId = dispPtr.coordSetId # Offset the geometry geom = json.loads(cloneDisp.geomJson) geom = _scaleDispGeom(geom, 1, 1, x, y) cloneDisp.geomJson = json.dumps(geom) # Assign the clone to the DispGroupPointer cloneDisp.groupId = dispPtr.id for dispLink in templateDisp.liveDbLinks: cloneDispLink = dispLink.tupleClone() cloneLiveDbDispLinks.append(cloneDispLink) cloneDispLink.id = None cloneDispLink.disp = cloneDisp cloneDispLink.coordSetId = dispPtr.coordSetId # ----- # Preallocate the IDs for performance on PostGreSQL dispIdGen = CeleryDbConn.prefetchDeclarativeIds( DispBase, len(cloneDisps)) for cloneDisp in cloneDisps: cloneDisp.id = next(dispIdGen) # Preallocate the IDs for performance on PostGreSQL dispLinkIdGen = CeleryDbConn.prefetchDeclarativeIds( LiveDbDispLink, len(cloneLiveDbDispLinks)) for cloneDispLink in cloneLiveDbDispLinks: cloneDispLink.id = next(dispLinkIdGen) cloneDispLink.dispId = cloneDispLink.disp.id cloneDispLink.disp = None # ----- # Create the new list of IDs to compile # Do this here, otherwise it will cause a DB refresh if it's after the commit. dispIdsIncludingClones = dispIds + [o.id for o in cloneDisps] ormSession.bulk_save_objects(cloneDisps, update_changed_only=False) ormSession.bulk_save_objects(cloneLiveDbDispLinks, update_changed_only=False) ormSession.commit() logger.debug("Cloned %s disp group objects in %s", len(cloneDisps), (datetime.now(pytz.utc) - startTime)) except Exception: ormSession.rollback() raise finally: ormSession.close() return dispIdsIncludingClones
def _prepareLookups(newDocuments: List[ImportDocumentTuple], modelSetId: int) -> Dict[str, int]: """ Check Or Insert Search Properties Make sure the search properties exist. """ dbSession = CeleryDbConn.getDbSession() startTime = datetime.now(pytz.utc) try: docTypeNames = set() propertyNames = set() for o in newDocuments: o.document["key"] = o.key o.documentTypeKey = o.documentTypeKey.lower() docTypeNames.add(o.documentTypeKey) if o.document: propertyNames.update([s.lower() for s in o.document]) # Prepare Properties dbProps = (dbSession.query(DocDbPropertyTuple).filter( DocDbPropertyTuple.modelSetId == modelSetId).all()) propertyNames -= set([o.name for o in dbProps]) if propertyNames: for newPropName in propertyNames: dbSession.add( DocDbPropertyTuple(name=newPropName, title=newPropName, modelSetId=modelSetId)) dbSession.commit() del dbProps del propertyNames # Prepare Object Types dbObjectTypes = (dbSession.query(DocDbDocumentTypeTuple).filter( DocDbDocumentTypeTuple.modelSetId == modelSetId).all()) docTypeNames -= set([o.name for o in dbObjectTypes]) if not docTypeNames: docTypeIdsByName = {o.name: o.id for o in dbObjectTypes} else: for newType in docTypeNames: dbSession.add( DocDbDocumentTypeTuple(name=newType, title=newType, modelSetId=modelSetId)) dbSession.commit() dbObjectTypes = dbSession.query(DocDbDocumentTypeTuple).all() docTypeIdsByName = {o.name: o.id for o in dbObjectTypes} logger.debug("Prepared lookups in %s", (datetime.now(pytz.utc) - startTime)) return docTypeIdsByName except Exception as e: dbSession.rollback() raise finally: dbSession.close()
def importLiveDbItems(self, modelSetKey: str, newItems: List[ImportLiveDbItemTuple]) -> List[str]: """ Compile Grids Task :param self: A celery reference to this task :param modelSetKey: The model set name :param newItems: The list of new items :returns: A list of grid keys that have been updated. """ startTime = datetime.now(pytz.utc) session = CeleryDbConn.getDbSession() engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() liveDbTable = LiveDbItem.__table__ try: liveDbModelSet = getOrCreateLiveDbModelSet(session, modelSetKey) # This will remove duplicates itemsByKey = {i.key: i for i in newItems} allKeys = list(itemsByKey) existingKeys = set() # Query for existing keys, in 1000 chinks chunkSize = 1000 offset = 0 while True: chunk = allKeys[offset:offset + chunkSize] if not chunk: break offset += chunkSize stmt = (select([liveDbTable.c.key]) .where(liveDbTable.c.modelSetId == liveDbModelSet.id) .where(makeCoreValuesSubqueryCondition( engine, liveDbTable.c.key, chunk )) ) result = conn.execute(stmt) existingKeys.update([o[0] for o in result.fetchall()]) inserts = [] newKeys = [] for newItem in itemsByKey.values(): if newItem.key in existingKeys: continue inserts.append(dict( modelSetId=liveDbModelSet.id, key=newItem.key, dataType=newItem.dataType, rawValue=newItem.rawValue, displayValue=newItem.displayValue, importHash=newItem.importHash )) newKeys.append(newItem.key) if not inserts: return [] conn.execute(LiveDbItem.__table__.insert(), inserts) transaction.commit() logger.info("Inserted %s LiveDbItems, %s already existed, in %s", len(inserts), len(existingKeys), (datetime.now(pytz.utc) - startTime)) return newKeys except Exception as e: transaction.rollback() logger.debug("Task failed, but it will retry. %s", e) raise self.retry(exc=e, countdown=10) finally: conn.close() session.close()
def compileGrids(self, payloadEncodedArgs: bytes) -> List[str]: """ Compile Grids Task :param self: A celery reference to this task :param payloadEncodedArgs: An encoded payload containing the queue tuples. :returns: A list of grid keys that have been updated. """ argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples queueItems = argData[0] queueItemIds: List[int] = argData[1] gridKeys = list(set([i.gridKey for i in queueItems])) coordSetIdByGridKey = {i.gridKey: i.coordSetId for i in queueItems} queueTable = GridKeyCompilerQueue.__table__ gridTable = GridKeyIndexCompiled.__table__ startTime = datetime.now(pytz.utc) session = CeleryDbConn.getDbSession() engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: logger.debug("Staring compile of %s queueItems in %s", len(queueItems), (datetime.now(pytz.utc) - startTime)) total = 0 dispData = _qryDispData(session, gridKeys) conn.execute(gridTable.delete(gridTable.c.gridKey.in_(gridKeys))) transaction.commit() transaction = conn.begin() inserts = [] for gridKey, dispJsonStr in dispData.items(): m = hashlib.sha256() m.update(gridKey.encode()) m.update(dispJsonStr.encode()) gridTupleHash = b64encode(m.digest()).decode() gridTuple = GridTuple( gridKey=gridKey, dispJsonStr=dispJsonStr, lastUpdate=gridTupleHash ) encodedGridTuple = Payload(tuples=[gridTuple]).toEncodedPayload() inserts.append(dict(coordSetId=coordSetIdByGridKey[gridKey], gridKey=gridKey, lastUpdate=gridTupleHash, encodedGridTuple=encodedGridTuple)) if inserts: conn.execute(gridTable.insert(), inserts) logger.debug("Compiled %s gridKeys, %s missing, in %s", len(inserts), len(gridKeys) - len(inserts), (datetime.now(pytz.utc) - startTime)) total += len(inserts) conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds))) transaction.commit() logger.info("Compiled and Committed %s GridKeyIndexCompileds in %s", total, (datetime.now(pytz.utc) - startTime)) return gridKeys except NotAllDispsCompiledException as e: logger.warning("Retrying, Not all disps for gridKey %s are compiled", gridKeys) raise self.retry(exc=e, countdown=1) except Exception as e: transaction.rollback() logger.debug("Compile of grids failed, retrying : %s", gridKeys) raise self.retry(exc=e, countdown=2) finally: conn.close() session.close()
def importInternalGroups( self, importHash: str, groupsVortexMsg: bytes) -> InternalGroupImportResultTuple: """ Import Internal Groups :param self: A celery reference to this task :param importHash: An unique string of this group of items being imported. :param groupsVortexMsg: A vortexMsg containing the group tuples :returns: A list of grid keys that have been updated. """ importGroups: List[ImportInternalGroupTuple] = ( Payload().fromEncodedPayload(groupsVortexMsg).tuples) startTime = datetime.now(pytz.utc) session = CeleryDbConn.getDbSession() try: sameCount = 0 updates = [] deleteIds = [] inserts = [] errors = [] # This will remove duplicates allNames = [i.groupName for i in importGroups] existingGroupsByName = { g.groupName: g for g in session.query(InternalGroupTuple).filter( InternalGroupTuple.userName.in_(allNames)).filter( InternalGroupTuple.importHash == importHash).all() } for importGroup in importGroups: existingGroup = existingGroupsByName.pop(importGroup.grouName, None) if existingGroup: updated = False for fieldName in ImportInternalGroupTuple.tupleFieldNames(): newVal = getattr(importGroup, fieldName) if getattr(existingGroup, fieldName) != newVal: setattr(existingGroup, fieldName, newVal) updated = True if updated: updates.append(existingGroup) else: sameCount += 1 else: newGroup = InternalGroupTuple() for fieldName in ImportInternalGroupTuple.tupleFieldNames(): setattr(newGroup, fieldName, getattr(importGroup, fieldName)) session.add(newGroup) inserts.append(newGroup) for oldGroup in existingGroupsByName.values(): deleteIds.append(oldGroup.id) session.delete(oldGroup) session.commit() logger.info("Inserted %s, Updated %s, Deleted %s, Same %s, in %s", len(inserts), len(updates), len(deleteIds), sameCount, (datetime.now(pytz.utc) - startTime)) return InternalGroupImportResultTuple( addedIds=[o.id for o in inserts], updatedIds=[o.id for o in updates], deletedIds=deleteIds, sameCount=sameCount, errors=errors) except Exception as e: session.rollback() logger.debug("Task failed, but it will retry. %s", e) raise self.retry(exc=e, countdown=2) finally: session.close()
def updateBranches(self, modelSetId: int, branchEncodedPayload: bytes) -> None: """ Update Branch This method is called from the UI to update a single branch. It could be called from a server API as well. All the branches must be for the same model set. """ # Decode BranchTuples payload updatedBranches: List[BranchTuple] = ( Payload().fromEncodedPayload(branchEncodedPayload).tuples ) startTime = datetime.now(pytz.utc) queueTable = BranchIndexCompilerQueue.__table__ dispBaseTable = DispBase.__table__ gridKeyIndexTable = GridKeyIndex.__table__ gridKeyCompilerQueueTable = GridKeyCompilerQueue.__table__ branchesByCoordSetId: Dict[int, List[BranchTuple]] = defaultdict(list) chunkKeys: Set[str] = set() newBranchesToInsert = [] # Create a lookup of CoordSets by ID dbSession = CeleryDbConn.getDbSession() try: # Get the latest lookups modelSet = dbSession.query(ModelSet).filter(ModelSet.id == modelSetId).one() coordSetById = {i.id: i for i in dbSession.query(ModelCoordSet).all()} dbSession.expunge_all() # Update the branches # This will be a performance problem if lots of branches are updated, # however, on first writing this will just be used by the UI for updating # individual branches. for branch in updatedBranches: try: if str(branch.id).startswith("NEW_"): branch.id = None if branch.id is None: branchIndex = dbSession.query(BranchIndex) \ .filter(BranchIndex.coordSetId == branch.coordSetId) \ .filter(BranchIndex.key == branch.key) \ .one() else: branchIndex = dbSession.query(BranchIndex) \ .filter(BranchIndex.id == branch.id) \ .one() branch.id = branchIndex.id branchIndex.packedJson = branch.packJson() branchIndex.updatedDate = branch.updatedDate except NoResultFound: newBranchesToInsert.append(branch) branchesByCoordSetId[branch.coordSetId].append(branch) chunkKeys.add(makeChunkKeyForBranchIndex(modelSet.key, branch.key)) dbSession.commit() except Exception as e: dbSession.rollback() logger.debug("Retrying updateBranch, %s", e) logger.exception(e) raise self.retry(exc=e, countdown=3) finally: dbSession.close() dbSession = CeleryDbConn.getDbSession() try: if newBranchesToInsert: _insertOrUpdateBranches(dbSession, modelSet.key, modelSet.id, newBranchesToInsert) dbSession.commit() # Make an array of all branch IDs allBranchIds = [] for branches in branchesByCoordSetId.values(): allBranchIds.extend([b.id for b in branches]) # Find out all the existing grids effected by this branch. gridsToRecompile = dbSession.execute( select(distinct=True, columns=[gridKeyIndexTable.c.gridKey, gridKeyIndexTable.c.coordSetId], whereclause=dispBaseTable.c.branchId.in_(allBranchIds)) .select_from(gridKeyIndexTable.join(dispBaseTable)) ).fetchall() allNewDisps = [] allDispIdsToCompile = [] packedJsonUpdates = [] # Recompile the BranchGridIndexes for coordSetId, branches in branchesByCoordSetId.items(): coordSet = coordSetById[coordSetId] assert coordSet.modelSetId == modelSetId, "Branches not all from one model" newDisps, dispIdsToCompile = _convertBranchDisps(branches) allNewDisps.extend(newDisps) allDispIdsToCompile.extend(dispIdsToCompile) packedJsonUpdates.extend([ dict(b_id=b.id, b_packedJson=b.packJson()) for b in branches ]) dbSession.execute( dispBaseTable.delete(dispBaseTable.c.branchId.in_(allBranchIds)) ) dbSession.commit() # NO TRANSACTION # Bulk load the Disps _bulkInsertDisps(CeleryDbConn.getDbEngine(), allNewDisps) # Queue the compiler DispCompilerQueueController.queueDispIdsToCompileWithSession( allDispIdsToCompile, dbSession ) # Update the JSON again back into the grid index. stmt = BranchIndex.__table__.update(). \ where(BranchIndex.__table__.c.id == bindparam('b_id')) \ .values(packedJson=bindparam('b_packedJson')) dbSession.execute(stmt, packedJsonUpdates) # 3) Queue chunks for recompile dbSession.execute( queueTable.insert(), [dict(modelSetId=modelSetId, chunkKey=c) for c in chunkKeys] ) # 4) Queue chunks for if gridsToRecompile: dbSession.execute( gridKeyCompilerQueueTable.insert(), [dict(coordSetId=item.coordSetId, gridKey=item.gridKey) for item in gridsToRecompile] ) dbSession.commit() logger.debug("Updated %s BranchIndexes queued %s chunks in %s", len(updatedBranches), len(chunkKeys), (datetime.now(pytz.utc) - startTime)) except Exception as e: dbSession.rollback() logger.debug("Retrying updateBranch, %s", e) logger.exception(e) raise self.retry(exc=e, countdown=3) finally: dbSession.close()
def _importDisps(coordSet: ModelCoordSet, importDisps: List): """ Link Disps 1) Use the AgentImportDispGridLookup to convert lookups from importHash to id 2) set the coordSetId This is not done in a thread because the lookups cause issues """ dispIdGen = CeleryDbConn.prefetchDeclarativeIds(DispBase, len(importDisps)) dispIdsToCompile = [] importDispLinks = [] ormDisps = [] ormSession = CeleryDbConn.getDbSession() try: lookupConverter = LookupHashConverter(ormSession, modelSetId=coordSet.modelSetId, coordSetId=coordSet.id) dispGroupPtrWithTargetHash: List[Tuple[DispGroupPointer, str]] = [] dispGroupChildWithTargetHash: List[Tuple[DispBase, str]] = [] # Preload any groups our pointers may point to. # Pre-import any DispGroup IDs we may need dispGroupTargetImportHashes = [ o.targetDispGroupHash for o in importDisps if o.tupleType() == ImportDispGroupPtrTuple.tupleType() ] # This will store DispGroup and DispGroupPointer hashes groupIdByImportHash: Dict[str, int] = { o.importHash: o.id for o in ormSession.query(DispBase.importHash, DispBase.id).filter( DispBase.importHash.in_(dispGroupTargetImportHashes)).filter( DispBase.coordSetId == coordSet.id) } del dispGroupTargetImportHashes # This is a list of DispGroup.id. # We use this to filter out disps that part of a DispGroup, # they don't get compiled dispGroupIds = set() # Sort the DispGroups first, so they are created before any FK references them sortedImportDisps = sorted( importDisps, key=lambda o: IMPORT_SORT_ORDER[o.tupleType()]) for importDisp in sortedImportDisps: # Convert the geometry into the internal array format _convertGeom(importDisp) # Create the storage tuple instance, and copy over the data. ormDisp = _convertImportTuple(importDisp) ormDisps.append(ormDisp) # Preallocate the IDs for performance on PostGreSQL ormDisp.id = next(dispIdGen) # Assign the coord set id. ormDisp.coordSetId = coordSet.id # If this is a dispGroup, index it's ID if isinstance(ormDisp, DispGroup): dispGroupIds.add(ormDisp.id) groupIdByImportHash[ormDisp.importHash] = ormDisp.id # If this is a dispGroupPtr, index its targetHash so we can update it if isinstance(ormDisp, DispGroupPointer): groupIdByImportHash[ormDisp.importHash] = ormDisp.id if ormDisp.targetDispGroupName: ormDisp.targetDispGroupName = '%s|%s' % ( coordSet.id, ormDisp.targetDispGroupName) # Not all DispGroupPointers have targets, # they can be orphaned instances if importDisp.targetDispGroupHash: dispGroupPtrWithTargetHash.append( (ormDisp, importDisp.targetDispGroupHash)) # If this is a dispGroupPtr, index its targetHash so we can update it parentDispGroupHash = getattr(importDisp, "parentDispGroupHash", None) if parentDispGroupHash: dispGroupChildWithTargetHash.append( (ormDisp, parentDispGroupHash)) # Add some interim data to the import display link, so it can be created if hasattr(importDisp, "liveDbDispLinks"): for importDispLink in importDisp.liveDbDispLinks: attrName = importDispLink.dispAttrName importDispLink.internalRawValue = getattr( ormDisp, attrName) importDispLink.internalDispId = ormDisp.id importDispLinks.append(importDispLink) # Convert the values of the liveDb attributes lookupConverter.convertLookups(ormDisp) # Add the after translate value, this is the Display Value if hasattr(importDisp, "liveDbDispLinks"): for importDispLink in importDisp.liveDbDispLinks: attrName = importDispLink.dispAttrName importDispLink.internalDisplayValue = getattr( ormDisp, attrName) # Queue the Disp to be compiled into a grid. # Disps belonging to a DispGroup do not get compiled into grids. if ormDisp.groupId not in dispGroupIds: dispIdsToCompile.append(ormDisp.id) # Link the DispGroups # Create the links between the Disp and DispGroup for ormDisp, groupHash in dispGroupChildWithTargetHash: groupOrmObjId = groupIdByImportHash.get(groupHash) if groupOrmObjId is None: raise Exception("DispGroup with importHash %s doesn't exist" % groupHash) ormDisp.groupId = groupOrmObjId # Link the DispGroupPtr to the DispGroup # This is only used when the dispGrouPtr points to a disp group for ormDisp, groupHash in dispGroupPtrWithTargetHash: groupOrmObjId = groupIdByImportHash.get(groupHash) if groupOrmObjId is None: raise Exception("DispGroup with importHash %s doesn't exist" % groupHash) ormDisp.targetDispGroupId = groupOrmObjId finally: ormSession.close() return dispIdsToCompile, importDispLinks, ormDisps
def removeBranches(self, modelSetKey: str, coordSetKey: str, keys: List[str]) -> None: """ Remove Branches This worker task removes branches from the indexes. """ startTime = datetime.now(pytz.utc) branchIndexTable = BranchIndex.__table__ queueTable = BranchIndexCompilerQueue.__table__ # Create a lookup of CoordSets by ID dbSession = CeleryDbConn.getDbSession() try: coordSet = dbSession.query(ModelCoordSet) \ .filter(ModelCoordSet.modelSet.key == modelSetKey) \ .filter(ModelCoordSet.key == coordSetKey) \ .one() dbSession.expunge_all() finally: dbSession.close() engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: items = conn.execute(select( distinct=True, columns=[branchIndexTable.c.id, branchIndexTable.c.chunkKey], whereclause=and_(branchIndexTable.c.key.in_(keys), branchIndexTable.c.coordSetId == coordSet.id) )).fetchall() branchIndexIds = [i.id for i in items] chunkKeys = set([i.chunkKey for i in items]) _deleteBranchDisps(conn, branchIndexIds) # 1) Delete existing branches conn.execute( branchIndexTable.delete(branchIndexTable.c.id.in_(branchIndexIds)) ) # 3) Queue chunks for recompile conn.execute( queueTable.insert(), [dict(modelSetId=coordSet.modelSetId, chunkKey=c) for c in chunkKeys] ) transaction.commit() logger.debug("Deleted %s BranchIndexes queued %s chunks in %s", len(branchIndexIds), len(chunkKeys), (datetime.now(pytz.utc) - startTime)) except Exception as e: transaction.rollback() logger.debug("Retrying createOrUpdateBranches, %s", e) logger.exception(e) raise self.retry(exc=e, countdown=3) finally: conn.close()