def reindexSearchObject(conn, objectsToIndex: List[ObjectToIndexTuple]) -> None: """ Reindex Search Object :param conn: :param objectsToIndex: Object To Index :returns: """ logger.debug("Starting to index %s SearchIndex", len(objectsToIndex)) searchIndexTable = SearchIndex.__table__ queueTable = SearchIndexCompilerQueue.__table__ startTime = datetime.now(pytz.utc) newSearchIndexes = [] objectIds = [] searchIndexChunksToQueue = set() for objectToIndex in objectsToIndex: newSearchIndexes.extend(_indexObject(objectToIndex)) objectIds.append(objectToIndex.id) newIdGen = CeleryDbConn.prefetchDeclarativeIds(SearchIndex, len(newSearchIndexes)) for newSearchIndex in newSearchIndexes: newSearchIndex.id = next(newIdGen) searchIndexChunksToQueue.add(newSearchIndex.chunkKey) results = conn.execute( select(columns=[searchIndexTable.c.chunkKey], whereclause=searchIndexTable.c.objectId.in_(objectIds))) for result in results: searchIndexChunksToQueue.add(result.chunkKey) if objectIds: conn.execute( searchIndexTable.delete( searchIndexTable.c.objectId.in_(objectIds))) if newSearchIndexes: logger.debug("Inserting %s SearchIndex", len(newSearchIndexes)) inserts = [o.tupleToSqlaBulkInsertDict() for o in newSearchIndexes] conn.execute(searchIndexTable.insert(), inserts) if searchIndexChunksToQueue: conn.execute(queueTable.insert(), [dict(chunkKey=k) for k in searchIndexChunksToQueue]) logger.info("Inserted %s SearchIndex keywords in %s", len(newSearchIndexes), (datetime.now(pytz.utc) - startTime))
def _compileBranchIndexChunk( conn, transaction, modelSetId: int, queueItems: List[BranchIndexCompilerQueue]) -> None: chunkKeys = list(set([i.chunkKey for i in queueItems])) compiledTable = BranchIndexEncodedChunk.__table__ lastUpdate = datetime.now(pytz.utc).isoformat() startTime = datetime.now(pytz.utc) logger.debug("Staring compile of %s queueItems in %s", len(queueItems), (datetime.now(pytz.utc) - startTime)) # Get Model Sets total = 0 existingHashes = _loadExistingHashes(conn, chunkKeys) encKwPayloadByChunkKey = _buildIndex(chunkKeys) chunksToDelete = [] inserts = [] for chunkKey, diagramIndexChunkEncodedPayload in encKwPayloadByChunkKey.items( ): m = hashlib.sha256() m.update(diagramIndexChunkEncodedPayload) encodedHash = b64encode(m.digest()).decode() # Compare the hash, AND delete the chunk key if chunkKey in existingHashes: # At this point we could decide to do an update instead, # but inserts are quicker if encodedHash == existingHashes.pop(chunkKey): continue chunksToDelete.append(chunkKey) inserts.append( dict(modelSetId=modelSetId, chunkKey=chunkKey, encodedData=diagramIndexChunkEncodedPayload, encodedHash=encodedHash, lastUpdate=lastUpdate)) # Add any chnuks that we need to delete that we don't have new data for, here chunksToDelete.extend(list(existingHashes)) if chunksToDelete: # Delete the old chunks conn.execute( compiledTable.delete(compiledTable.c.chunkKey.in_(chunksToDelete))) if inserts: newIdGen = CeleryDbConn.prefetchDeclarativeIds(BranchIndex, len(inserts)) for insert in inserts: insert["id"] = next(newIdGen) transaction.commit() transaction = conn.begin() if inserts: conn.execute(compiledTable.insert(), inserts) logger.debug("Compiled %s BranchIndexs, %s missing, in %s", len(inserts), len(chunkKeys) - len(inserts), (datetime.now(pytz.utc) - startTime)) total += len(inserts) transaction.commit() logger.debug("Compiled and Committed %s EncodedBranchIndexChunks in %s", total, (datetime.now(pytz.utc) - startTime))
def loadItemKeys(conn, newItemKeys: List[ItemKeyImportTuple], modelSetId: int, modelSetKey: str) -> None: """ Insert or Update Objects 1) Find objects and update them 2) Insert object if the are missing """ itemKeyIndexTable = ItemKeyIndex.__table__ queueTable = ItemKeyIndexCompilerQueue.__table__ startTime = datetime.now(pytz.utc) importHashSet = set() chunkKeysForQueue: Set[Tuple[int, str]] = set() # Get the IDs that we need newIdGen = CeleryDbConn.prefetchDeclarativeIds(ItemKeyIndex, len(newItemKeys)) # Create state arrays inserts = [] # Work out which objects have been updated or need inserting for importItemKey in newItemKeys: importHashSet.add(importItemKey.importGroupHash) # Work out if we need to update the object type id_ = next(newIdGen) insertObject = ItemKeyIndex( id=id_, modelSetId=modelSetId, importGroupHash=importItemKey.importGroupHash, itemType=importItemKey.itemType, itemKey=importItemKey.itemKey, segmentKey=importItemKey.segmentKey, chunkKey=makeChunkKeyForItemKey(modelSetKey, importItemKey.itemKey)) inserts.append(insertObject.tupleToSqlaBulkInsertDict()) chunkKeysForQueue.add((modelSetId, insertObject.chunkKey)) if importHashSet: conn.execute( itemKeyIndexTable.delete( itemKeyIndexTable.c.importGroupHash.in_(importHashSet))) # Insert the ItemKeyIndex Objects if inserts: conn.execute(itemKeyIndexTable.insert(), inserts) if chunkKeysForQueue: conn.execute( queueTable.insert(), [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue]) logger.debug("Inserted %s ItemKeys queued %s chunks in %s", len(inserts), len(chunkKeysForQueue), (datetime.now(pytz.utc) - startTime))
def _cloneDispsForDispGroupPointer(dispIds: List[int]): """ Clone Disps for DispGroupPointer This method will clone "instances" of the disps in the disp groups for the DispGroupPointer. """ startTime = datetime.now(pytz.utc) ormSession = CeleryDbConn.getDbSession() try: # ----- # Load the disp group pointers qry = ormSession.query(DispGroupPointer) \ .filter(DispGroupPointer.targetDispGroupId != None) \ .filter(DispGroupPointer.id.in_(dispIds)) dispGroupPointers: List[DispGroupPointer] = qry.all() # If there are no DispGroupPointers that need cloning, then return. if not dispGroupPointers: logger.debug( "Cloning skipped," " there are no disp group ptrs with targets, in %s", (datetime.now(pytz.utc) - startTime)) return dispIds dispGroupPointerTargetIds = [ o.targetDispGroupId for o in dispGroupPointers ] del qry # ----- # Delete any existing disps are in these pointers ormSession.query(DispBase) \ .filter(DispBase.groupId.in_([o.id for o in dispGroupPointers])) \ .delete(synchronize_session=False) ormSession.commit() # ----- # Query for the disp groups we'll need dispGroupChildsByGroupId = _queryDispsForGroup( ormSession, dispGroupPointerTargetIds) # ----- # Query for the disp groups names dispBaseTable = DispBase.__table__ dispGroupTable = DispGroup.__table__ qry = ormSession.execute( select(columns=[ dispBaseTable.c.id, dispBaseTable.c.coordSetId, dispGroupTable.c.name ], whereclause=dispBaseTable.c.id.in_( dispGroupPointerTargetIds)).select_from( join(dispGroupTable, dispBaseTable, dispGroupTable.c.id == dispBaseTable.c.id))) dispGroupNameByGroupId = { o.id: '%s|%s' % (o.coordSetId, o.name) for o in qry.fetchall() } del qry # ----- # Clone the child disps cloneDisps = [] cloneLiveDbDispLinks = [] for dispPtr in dispGroupPointers: if not dispPtr.targetDispGroupId: logger.debug("Pointer has no targetGroupId id=%s", dispPtr.id) continue dispGroupChilds = dispGroupChildsByGroupId.get( dispPtr.targetDispGroupId) if not dispGroupChilds: logger.warning( "Pointer points to missing DispGroup," " id=%s, targetGroupId=%s", dispPtr.id, dispPtr.targetDispGroupId) continue x, y = json.loads(dispPtr.geomJson) dispPtr.targetDispGroupName = \ dispGroupNameByGroupId[dispPtr.targetDispGroupId] for templateDisp in dispGroupChilds: # Create the clone cloneDisp = templateDisp.tupleClone() cloneDisps.append(cloneDisp) cloneDisp.coordSetId = dispPtr.coordSetId # Offset the geometry geom = json.loads(cloneDisp.geomJson) geom = _scaleDispGeom(geom, 1, 1, x, y) cloneDisp.geomJson = json.dumps(geom) # Assign the clone to the DispGroupPointer cloneDisp.groupId = dispPtr.id for dispLink in templateDisp.liveDbLinks: cloneDispLink = dispLink.tupleClone() cloneLiveDbDispLinks.append(cloneDispLink) cloneDispLink.id = None cloneDispLink.disp = cloneDisp cloneDispLink.coordSetId = dispPtr.coordSetId # ----- # Preallocate the IDs for performance on PostGreSQL dispIdGen = CeleryDbConn.prefetchDeclarativeIds( DispBase, len(cloneDisps)) for cloneDisp in cloneDisps: cloneDisp.id = next(dispIdGen) # Preallocate the IDs for performance on PostGreSQL dispLinkIdGen = CeleryDbConn.prefetchDeclarativeIds( LiveDbDispLink, len(cloneLiveDbDispLinks)) for cloneDispLink in cloneLiveDbDispLinks: cloneDispLink.id = next(dispLinkIdGen) cloneDispLink.dispId = cloneDispLink.disp.id cloneDispLink.disp = None # ----- # Create the new list of IDs to compile # Do this here, otherwise it will cause a DB refresh if it's after the commit. dispIdsIncludingClones = dispIds + [o.id for o in cloneDisps] ormSession.bulk_save_objects(cloneDisps, update_changed_only=False) ormSession.bulk_save_objects(cloneLiveDbDispLinks, update_changed_only=False) ormSession.commit() logger.debug("Cloned %s disp group objects in %s", len(cloneDisps), (datetime.now(pytz.utc) - startTime)) except Exception: ormSession.rollback() raise finally: ormSession.close() return dispIdsIncludingClones
def _insertOrUpdateObjects(newDocuments: List[ImportDocumentTuple], modelSetId: int, docTypeIdsByName: Dict[str, int]) -> None: """ Insert or Update Objects 1) Find objects and update them 2) Insert object if the are missing """ documentTable = DocDbDocument.__table__ queueTable = DocDbCompilerQueue.__table__ startTime = datetime.now(pytz.utc) engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: dontDeleteObjectIds = [] objectIdByKey: Dict[str, int] = {} objectKeys = [o.key for o in newDocuments] chunkKeysForQueue: Set[Tuple[str, str]] = set() # Query existing objects results = list( conn.execute( select(columns=[ documentTable.c.id, documentTable.c.key, documentTable.c.chunkKey, documentTable.c.documentJson ], whereclause=and_( documentTable.c.key.in_(objectKeys), documentTable.c.modelSetId == modelSetId)))) foundObjectByKey = {o.key: o for o in results} del results # Get the IDs that we need newIdGen = CeleryDbConn.prefetchDeclarativeIds( DocDbDocument, len(newDocuments) - len(foundObjectByKey)) # Create state arrays inserts = [] updates = [] processedKeys = set() # Work out which objects have been updated or need inserting for importDocument in newDocuments: if importDocument.key in processedKeys: raise Exception("Key %s exists in import data twice" % importDocument.key) processedKeys.add(importDocument.key) existingObject = foundObjectByKey.get(importDocument.key) importDocumentTypeId = docTypeIdsByName[ importDocument.documentTypeKey] packedJsonDict = { k: v for k, v in importDocument.document.items() if v is not None and v is not '' } # 0 / false allowed packedJsonDict['_dtid'] = importDocumentTypeId packedJsonDict['_msid'] = modelSetId documentJson = json.dumps(packedJsonDict, sort_keys=True) # Work out if we need to update the object type if existingObject: updates.append( dict(b_id=existingObject.id, b_typeId=importDocumentTypeId, b_documentJson=documentJson)) dontDeleteObjectIds.append(existingObject.id) else: id_ = next(newIdGen) existingObject = DocDbDocument( id=id_, modelSetId=modelSetId, documentTypeId=importDocumentTypeId, key=importDocument.key, importGroupHash=importDocument.importGroupHash, chunkKey=makeChunkKey(importDocument.modelSetKey, importDocument.key), documentJson=documentJson) inserts.append(existingObject.tupleToSqlaBulkInsertDict()) objectIdByKey[existingObject.key] = existingObject.id chunkKeysForQueue.add((modelSetId, existingObject.chunkKey)) # Insert the DocDb Objects if inserts: conn.execute(documentTable.insert(), inserts) if updates: stmt = (documentTable.update().where( documentTable.c.id == bindparam('b_id')).values( documentTypeId=bindparam('b_typeId'), documentJson=bindparam('b_documentJson'))) conn.execute(stmt, updates) if chunkKeysForQueue: conn.execute( queueTable.insert(), [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue]) if inserts or updates or chunkKeysForQueue: transaction.commit() else: transaction.rollback() logger.debug("Inserted %s updated %s queued %s chunks in %s", len(inserts), len(updates), len(chunkKeysForQueue), (datetime.now(pytz.utc) - startTime)) except Exception: transaction.rollback() raise finally: conn.close()
def compileSearchIndexChunk(self, payloadEncodedArgs: bytes) -> List[str]: """ Compile Search Index Task :param self: A celery reference to this task :param payloadEncodedArgs: An encoded payload containing the queue tuples. :returns: A list of grid keys that have been updated. """ argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples queueItems = argData[0] queueItemIds: List[int] = argData[1] chunkKeys = list(set([i.chunkKey for i in queueItems])) queueTable = SearchIndexCompilerQueue.__table__ compiledTable = EncodedSearchIndexChunk.__table__ lastUpdate = datetime.now(pytz.utc).isoformat() startTime = datetime.now(pytz.utc) engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: logger.debug("Staring compile of %s queueItems in %s", len(queueItems), (datetime.now(pytz.utc) - startTime)) # Get Model Sets total = 0 existingHashes = _loadExistingHashes(conn, chunkKeys) encKwPayloadByChunkKey = _buildIndex(conn, chunkKeys) chunksToDelete = [] inserts = [] for chunkKey, searchIndexChunkEncodedPayload in encKwPayloadByChunkKey.items( ): m = hashlib.sha256() m.update(searchIndexChunkEncodedPayload) encodedHash = b64encode(m.digest()).decode() # Compare the hash, AND delete the chunk key if chunkKey in existingHashes: # At this point we could decide to do an update instead, # but inserts are quicker if encodedHash == existingHashes.pop(chunkKey): continue chunksToDelete.append(chunkKey) inserts.append( dict(chunkKey=chunkKey, encodedData=searchIndexChunkEncodedPayload, encodedHash=encodedHash, lastUpdate=lastUpdate)) # Add any chnuks that we need to delete that we don't have new data for, here chunksToDelete.extend(list(existingHashes)) if chunksToDelete: # Delete the old chunks conn.execute( compiledTable.delete( compiledTable.c.chunkKey.in_(chunksToDelete))) if inserts: newIdGen = CeleryDbConn.prefetchDeclarativeIds( SearchIndex, len(inserts)) for insert in inserts: insert["id"] = next(newIdGen) transaction.commit() transaction = conn.begin() if inserts: conn.execute(compiledTable.insert(), inserts) logger.debug("Compiled %s SearchIndexes, %s missing, in %s", len(inserts), len(chunkKeys) - len(inserts), (datetime.now(pytz.utc) - startTime)) total += len(inserts) conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds))) transaction.commit() logger.info("Compiled and Committed %s EncodedSearchIndexChunks in %s", total, (datetime.now(pytz.utc) - startTime)) return chunkKeys except Exception as e: transaction.rollback() # logger.warning(e) # Just a warning, it will retry logger.exception(e) raise self.retry(exc=e, countdown=10) finally: conn.close()
def _importDisps(coordSet: ModelCoordSet, importDisps: List): """ Link Disps 1) Use the AgentImportDispGridLookup to convert lookups from importHash to id 2) set the coordSetId This is not done in a thread because the lookups cause issues """ dispIdGen = CeleryDbConn.prefetchDeclarativeIds(DispBase, len(importDisps)) dispIdsToCompile = [] importDispLinks = [] ormDisps = [] ormSession = CeleryDbConn.getDbSession() try: lookupConverter = LookupHashConverter(ormSession, modelSetId=coordSet.modelSetId, coordSetId=coordSet.id) dispGroupPtrWithTargetHash: List[Tuple[DispGroupPointer, str]] = [] dispGroupChildWithTargetHash: List[Tuple[DispBase, str]] = [] # Preload any groups our pointers may point to. # Pre-import any DispGroup IDs we may need dispGroupTargetImportHashes = [ o.targetDispGroupHash for o in importDisps if o.tupleType() == ImportDispGroupPtrTuple.tupleType() ] # This will store DispGroup and DispGroupPointer hashes groupIdByImportHash: Dict[str, int] = { o.importHash: o.id for o in ormSession.query(DispBase.importHash, DispBase.id).filter( DispBase.importHash.in_(dispGroupTargetImportHashes)).filter( DispBase.coordSetId == coordSet.id) } del dispGroupTargetImportHashes # This is a list of DispGroup.id. # We use this to filter out disps that part of a DispGroup, # they don't get compiled dispGroupIds = set() # Sort the DispGroups first, so they are created before any FK references them sortedImportDisps = sorted( importDisps, key=lambda o: IMPORT_SORT_ORDER[o.tupleType()]) for importDisp in sortedImportDisps: # Convert the geometry into the internal array format _convertGeom(importDisp) # Create the storage tuple instance, and copy over the data. ormDisp = _convertImportTuple(importDisp) ormDisps.append(ormDisp) # Preallocate the IDs for performance on PostGreSQL ormDisp.id = next(dispIdGen) # Assign the coord set id. ormDisp.coordSetId = coordSet.id # If this is a dispGroup, index it's ID if isinstance(ormDisp, DispGroup): dispGroupIds.add(ormDisp.id) groupIdByImportHash[ormDisp.importHash] = ormDisp.id # If this is a dispGroupPtr, index its targetHash so we can update it if isinstance(ormDisp, DispGroupPointer): groupIdByImportHash[ormDisp.importHash] = ormDisp.id if ormDisp.targetDispGroupName: ormDisp.targetDispGroupName = '%s|%s' % ( coordSet.id, ormDisp.targetDispGroupName) # Not all DispGroupPointers have targets, # they can be orphaned instances if importDisp.targetDispGroupHash: dispGroupPtrWithTargetHash.append( (ormDisp, importDisp.targetDispGroupHash)) # If this is a dispGroupPtr, index its targetHash so we can update it parentDispGroupHash = getattr(importDisp, "parentDispGroupHash", None) if parentDispGroupHash: dispGroupChildWithTargetHash.append( (ormDisp, parentDispGroupHash)) # Add some interim data to the import display link, so it can be created if hasattr(importDisp, "liveDbDispLinks"): for importDispLink in importDisp.liveDbDispLinks: attrName = importDispLink.dispAttrName importDispLink.internalRawValue = getattr( ormDisp, attrName) importDispLink.internalDispId = ormDisp.id importDispLinks.append(importDispLink) # Convert the values of the liveDb attributes lookupConverter.convertLookups(ormDisp) # Add the after translate value, this is the Display Value if hasattr(importDisp, "liveDbDispLinks"): for importDispLink in importDisp.liveDbDispLinks: attrName = importDispLink.dispAttrName importDispLink.internalDisplayValue = getattr( ormDisp, attrName) # Queue the Disp to be compiled into a grid. # Disps belonging to a DispGroup do not get compiled into grids. if ormDisp.groupId not in dispGroupIds: dispIdsToCompile.append(ormDisp.id) # Link the DispGroups # Create the links between the Disp and DispGroup for ormDisp, groupHash in dispGroupChildWithTargetHash: groupOrmObjId = groupIdByImportHash.get(groupHash) if groupOrmObjId is None: raise Exception("DispGroup with importHash %s doesn't exist" % groupHash) ormDisp.groupId = groupOrmObjId # Link the DispGroupPtr to the DispGroup # This is only used when the dispGrouPtr points to a disp group for ormDisp, groupHash in dispGroupPtrWithTargetHash: groupOrmObjId = groupIdByImportHash.get(groupHash) if groupOrmObjId is None: raise Exception("DispGroup with importHash %s doesn't exist" % groupHash) ormDisp.targetDispGroupId = groupOrmObjId finally: ormSession.close() return dispIdsToCompile, importDispLinks, ormDisps
def _insertOrUpdateObjects(newSegments: List[GraphDbImportSegmentTuple], modelSetId: int, modelSetKey: str) -> None: """ Insert or Update Objects 1) Find objects and update them 2) Insert object if the are missing """ segmentTable = GraphDbSegment.__table__ queueTable = GraphDbCompilerQueue.__table__ startTime = datetime.now(pytz.utc) importHashSet = set() chunkKeysForQueue: Set[Tuple[int, str]] = set() # Get the IDs that we need newIdGen = CeleryDbConn.prefetchDeclarativeIds(GraphDbSegment, len(newSegments)) # Create state arrays inserts = [] newItemKeys = [] # Work out which objects have been updated or need inserting for importSegment in newSegments: importHashSet.add(importSegment.importGroupHash) segmentJson = importSegment.packJson() id_ = next(newIdGen) existingObject = GraphDbSegment( id=id_, modelSetId=modelSetId, key=importSegment.key, importGroupHash=importSegment.importGroupHash, chunkKey=makeChunkKeyForSegmentKey(importSegment.modelSetKey, importSegment.key), segmentJson=segmentJson) inserts.append(existingObject.tupleToSqlaBulkInsertDict()) chunkKeysForQueue.add((modelSetId, existingObject.chunkKey)) for edge in importSegment.edges: newItemKeys.append( ItemKeyImportTuple( importGroupHash=importSegment.importGroupHash, itemKey=edge.key, itemType=ItemKeyTuple.ITEM_TYPE_EDGE, segmentKey=importSegment.key)) for vertex in importSegment.vertexes: newItemKeys.append( ItemKeyImportTuple( importGroupHash=importSegment.importGroupHash, itemKey=vertex.key, itemType=ItemKeyTuple.ITEM_TYPE_VERTEX, segmentKey=importSegment.key)) # TODO: If this fails, we could potentially delete by segment key. # But that seems a bit hackish, the agents should delete the old first. # Or should they. That might leave a temporary gap in the network. # Delete old stuff if importHashSet: deleteSegment(modelSetKey=modelSetKey, importGroupHashes=list(importHashSet)) engine = CeleryDbConn.getDbEngine() conn = engine.connect() transaction = conn.begin() try: # Insert the GraphDb Objects if inserts: conn.execute(segmentTable.insert(), inserts) if chunkKeysForQueue: conn.execute( queueTable.insert(), [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue]) loadItemKeys(conn, newItemKeys, modelSetId, modelSetKey) if inserts or chunkKeysForQueue or newItemKeys: transaction.commit() else: transaction.rollback() logger.info("Inserted %s queued %s chunks in %s", len(inserts), len(chunkKeysForQueue), (datetime.now(pytz.utc) - startTime)) except Exception: transaction.rollback() raise finally: conn.close()
def _insertOrUpdateBranches(conn, modelSetKey: str, modelSetId: int, newBranches: List[BranchTuple]) -> None: """ Insert or Update Branches 1) Delete existing branches 2) Insert new branches 3) Queue chunks for recompile """ startTime = datetime.now(pytz.utc) branchIndexTable = BranchIndex.__table__ queueTable = BranchIndexCompilerQueue.__table__ importHashSet = set() chunkKeysForQueue: Set[Tuple[int, str]] = set() # Get the IDs that we need newIdGen = CeleryDbConn.prefetchDeclarativeIds(BranchIndex, len(newBranches)) # Create state arrays inserts = [] # Work out which objects have been updated or need inserting for newBranch in newBranches: importHashSet.add(newBranch.importGroupHash) # noinspection PyTypeChecker newBranch.id = next(newIdGen) branchJson = newBranch.packJson() existingObject = BranchIndex( id=newBranch.id, coordSetId=newBranch.coordSetId, key=newBranch.key, updatedDate=newBranch.updatedDate, createdDate=newBranch.createdDate, importHash=newBranch.importHash, importGroupHash=newBranch.importGroupHash, chunkKey=makeChunkKeyForBranchIndex(modelSetKey, newBranch.key), packedJson=branchJson ) inserts.append(existingObject.tupleToSqlaBulkInsertDict()) chunkKeysForQueue.add((modelSetId, existingObject.chunkKey)) # 1) Delete existing branches if importHashSet: # Make note of the IDs being deleted # FIXME : Unused branchIndexIdsBeingDeleted = [ item.id for item in conn.execute(select( distinct=True, columns=[branchIndexTable.c.id], whereclause=branchIndexTable.c.importGroupHash.in_(importHashSet) )) ] conn.execute( branchIndexTable.delete(branchIndexTable.c.importGroupHash.in_(importHashSet)) ) # 2) Insert new branches if inserts: conn.execute(branchIndexTable.insert(), inserts) # 3) Queue chunks for recompile if chunkKeysForQueue: conn.execute( queueTable.insert(), [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue] ) logger.debug("Inserted %s queued %s chunks in %s", len(inserts), len(chunkKeysForQueue), (datetime.now(pytz.utc) - startTime))
def _convertBranchDisps( newBranches: List[BranchTuple]) -> typing.Tuple[List, List]: """ Insert Disps for Branch 1) Insert new Disps 2) Queue disps for recompile """ startTime = datetime.now(pytz.utc) # Create state arrays newDisps = [] dispIdsToCompile = [] # Convert the branch disps into database disps for newBranch in newBranches: branchDisps = _convertJsonDispsToTuples(newBranch) if not branchDisps: continue # Create the map from the UI temp ID to the DB ID oldDispIdMap = {} # Set the IDs of the new Disps newIdGen = CeleryDbConn.prefetchDeclarativeIds(DispBase, len(branchDisps)) for disp in branchDisps: oldDispId = disp.id disp.id = next(newIdGen) oldDispIdMap[oldDispId] = disp.id dispIdsToCompile.append(disp.id) newDisps.append(disp) # Update the group IDs for disp in branchDisps: if disp.groupId in oldDispIdMap: disp.groupId = oldDispIdMap[disp.groupId] # Recreate the branch disp json as per the structure from the DispBase tables # Just to be clear, this is converting it one way and then converting it back. # It ensures the data is consistent. (Which it should be if all was right) # It also sets the "hashId" # Create the map from the UI temp ID to the DB ID oldDispHashIdMap = {} newBranchDispItems = [] newBranch.disps = [] for disp in branchDisps: oldDispHashId = disp.hashId # This assigns the hashId to the jsonDict and disp newJsonDict = _packDispJson(disp, disp.tupleToSmallJsonDict()) newBranch.disps.append(newJsonDict) oldDispHashIdMap[oldDispHashId] = disp.hashId newBranchDispItems.append((disp, newJsonDict)) for disp, jsonDict in newBranchDispItems: if disp.replacesHashId in oldDispHashIdMap: disp.replacesHashId = oldDispHashIdMap.get(disp.replacesHashId) jsonDict['rid'] = disp.replacesHashId disp.dispJson = json.dumps(jsonDict) # AFTER the json has been dumped to the disp, convert it for storage # in the branch as geom JSON is not stored as a string in the branch # Because it's stored in the Disp Tuple/Table "geom" field as a string if 'g' in jsonDict: jsonDict['g'] = json.loads(jsonDict['g']) del newBranchDispItems logger.debug("Converted %s disps for %s branches in %s", len(newDisps), len(newBranches), (datetime.now(pytz.utc) - startTime)) return newDisps, dispIdsToCompile