def createOrUpdateBranches(self, importBranchesEncodedPayload: bytes) -> None:
    """ Convert Import Branch Tuples

    This method takes import branch tuples, and converts them to
    branch format used throughout the diagram plugin.

    (Thats the packed JSON wrapped by an accessor class)

    """
    # Decode importBranches payload
    importBranches: List[ImportBranchTuple] = (
        Payload().fromEncodedPayload(importBranchesEncodedPayload).tuples)

    # Validate the input importBranches
    _validateNewBranchIndexs(importBranches)

    # Do the import
    groupedBranches = _convertImportBranchTuples(importBranches)

    startTime = datetime.now(pytz.utc)

    dbSession = CeleryDbConn.getDbSession()

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    try:
        for (modelSetKey, modelSetId,
             coordSetId), branches in groupedBranches.items():
            _insertOrUpdateBranches(conn, modelSetKey, modelSetId, branches)

            newDisps, dispIdsToCompile = _convertBranchDisps(branches)

            # NO TRANSACTION
            # Bulk load the Disps
            _bulkInsertDisps(engine, newDisps)

            # Queue the compiler
            DispCompilerQueueController.queueDispIdsToCompileWithSession(
                dispIdsToCompile, conn)

            transaction.commit()
            dbSession.commit()

            logger.debug(
                "Completed importing %s branches for coordSetId %s in %s",
                len(branches), coordSetId,
                (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        dbSession.rollback()
        transaction.rollback()
        logger.debug("Retrying createOrUpdateBranches, %s", e)
        logger.exception(e)
        raise self.retry(exc=e, countdown=3)

    finally:
        dbSession.close()
        conn.close()
def qryChunkInWorker(self, offset, limit) -> List[LiveDbDisplayValueTuple]:
    """ Query Chunk

    This returns a chunk of LiveDB items from the database

    :param self: A celery reference to this task
    :param offset: The offset of the chunk
    :param limit: An encoded payload containing the updates
    :returns: List[LiveDbDisplayValueTuple] serialised in a payload json
    """

    table = LiveDbItem.__table__
    cols = [
        table.c.key, table.c.dataType, table.c.rawValue, table.c.displayValue
    ]

    session = CeleryDbConn.getDbSession()
    try:
        result = session.execute(
            select(cols).order_by(table.c.id).offset(offset).limit(limit))

        return [
            LiveDbDisplayValueTuple(key=o.key,
                                    dataType=o.dataType,
                                    rawValue=o.rawValue,
                                    displayValue=o.displayValue)
            for o in result.fetchall()
        ]

    finally:
        session.close()
示例#3
0
def _buildIndex(chunkKeys) -> Dict[str, bytes]:
    session = CeleryDbConn.getDbSession()

    try:
        indexQry = (session.query(
            DocDbDocument.chunkKey, DocDbDocument.key,
            DocDbDocument.documentJson).filter(
                DocDbDocument.chunkKey.in_(chunkKeys)).order_by(
                    DocDbDocument.key).yield_per(1000).all())

        # Create the ChunkKey -> {id -> packedJson, id -> packedJson, ....]
        packagedJsonByObjIdByChunkKey = defaultdict(dict)

        for item in indexQry:
            packagedJsonByObjIdByChunkKey[item.chunkKey][
                item.key] = item.documentJson

        encPayloadByChunkKey = {}

        # Sort each bucket by the key
        for chunkKey, packedJsonByKey in packagedJsonByObjIdByChunkKey.items():
            tuples = json.dumps(packedJsonByKey, sort_keys=True)

            # Create the blob data for this index.
            # It will be docDbed by a binary sort
            encPayloadByChunkKey[chunkKey] = Payload(
                tuples=tuples).toEncodedPayload()

        return encPayloadByChunkKey

    finally:
        session.close()
def deleteTraceConfig(self, modelSetKey: str, traceConfigKeys: List[str]) -> None:
    startTime = datetime.now(pytz.utc)

    traceConfigTable = GraphDbTraceConfig.__table__

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:
        modelSetIdByKey = _loadModelSets()
        modelSetId = modelSetIdByKey[modelSetKey]

        conn.execute(
            traceConfigTable.delete(and_(traceConfigTable.c.key.in_(traceConfigKeys),
                                         traceConfigTable.c.modelSetId == modelSetId))
        )

        transaction.commit()

        logger.info("Deleted %s trace configs in %s",
                     len(traceConfigKeys),
                     (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        transaction.rollback()
        logger.debug("Retrying import graphDb objects, %s", e)
        raise self.retry(exc=e, countdown=3)


    finally:
        conn.close()
def _buildIndex(chunkKeys) -> Dict[str, bytes]:
    session = CeleryDbConn.getDbSession()

    try:
        indexQry = (session.query(
            BranchIndex.chunkKey,
            BranchIndex.key, BranchIndex.packedJson).filter(
                BranchIndex.chunkKey.in_(chunkKeys)).order_by(
                    BranchIndex.key).yield_per(1000).all())

        # Create the ChunkKey -> {key -> packedJson, key -> packedJson, ....]
        packagedJsonsByObjKeyByChunkKey = defaultdict(
            lambda: defaultdict(list))

        for item in indexQry:
            packagedJsonsByObjKeyByChunkKey[item.chunkKey][item.key].append(
                item.packedJson)

        encPayloadByChunkKey = {}

        # Sort each bucket by the key
        for chunkKey, packedJsonsByKey in packagedJsonsByObjKeyByChunkKey.items(
        ):
            tuples = json.dumps(packedJsonsByKey, sort_keys=True)

            # Create the blob data for this index.
            # It will be index-blueprint by a binary sort
            encPayloadByChunkKey[chunkKey] = Payload(
                tuples=tuples).toEncodedPayload()

        return encPayloadByChunkKey

    finally:
        session.close()
def _loadCoordSet(modelSetKey, coordSetKey):
    ormSession = CeleryDbConn.getDbSession()
    try:
        coordSet = getOrCreateCoordSet(ormSession, modelSetKey, coordSetKey)
        ormSession.expunge_all()
        return coordSet

    finally:
        ormSession.close()
def _convertImportBranchTuples(
    importBranches: List[ImportBranchTuple]
) -> Dict[typing.Tuple[str, int, int], List[BranchTuple]]:
    """ Convert Import Branch Tuples

    This method takes import branch tuples, and converts them to
    branch format used throughout the diagram plugin.

    (Thats the packed JSON wrapped by an accessor class)

    """

    # Get a map for the coordSetIds
    modelKeyCoordKeyTuples = [(b.modelSetKey, b.coordSetKey)
                              for b in importBranches]

    coordSetIdByModelKeyCoordKeyTuple = getModelSetIdCoordSetId(
        modelKeyCoordKeyTuples)

    # Sort out the importBranches by coordSetKey
    branchByModelKeyByCoordKey = defaultdict(lambda: defaultdict(list))
    for importBranch in importBranches:
        branchByModelKeyByCoordKey[importBranch.modelSetKey][importBranch.coordSetKey] \
            .append(importBranch)

    # Define the converted importBranches
    convertedBranchesByCoordSetId: Dict[typing.Tuple[str, int, int], List[BranchTuple]] \
        = {}

    # Get the model set
    dbSession = CeleryDbConn.getDbSession()
    try:
        # Iterate through the importBranches and convert them
        for modelSetKey, item in branchByModelKeyByCoordKey.items():
            for coordSetKey, importBranches in item:
                modelSetId, coordSetId = coordSetIdByModelKeyCoordKeyTuple[(
                    modelSetKey, coordSetKey)]

                lookupHashConverter = LookupHashConverter(
                    dbSession, modelSetId, coordSetId)

                convertedBranches = []
                for importBranch in importBranches:
                    branch = BranchTuple.loadFromImportTuple(
                        importBranch,
                        coordSetId,
                        lookupHashConverter=lookupHashConverter)
                    convertedBranches.append(branch)

                convertedBranchesByCoordSetId[(modelSetKey, modelSetId, coordSetId)] \
                    = convertedBranches

    finally:
        dbSession.close()

    return convertedBranchesByCoordSetId
def _makeModelSet(modelSetKey: str) -> int:
    # Get the model set
    dbSession = CeleryDbConn.getDbSession()
    try:
        newItem = GraphDbModelSet(key=modelSetKey, name=modelSetKey)
        dbSession.add(newItem)
        dbSession.commit()
        return newItem.id

    finally:
        dbSession.close()
示例#9
0
def reindexSearchObject(conn,
                        objectsToIndex: List[ObjectToIndexTuple]) -> None:
    """ Reindex Search Object

    :param conn:
    :param objectsToIndex: Object To Index
    :returns:
    """

    logger.debug("Starting to index %s SearchIndex", len(objectsToIndex))

    searchIndexTable = SearchIndex.__table__
    queueTable = SearchIndexCompilerQueue.__table__

    startTime = datetime.now(pytz.utc)

    newSearchIndexes = []
    objectIds = []
    searchIndexChunksToQueue = set()

    for objectToIndex in objectsToIndex:
        newSearchIndexes.extend(_indexObject(objectToIndex))
        objectIds.append(objectToIndex.id)

    newIdGen = CeleryDbConn.prefetchDeclarativeIds(SearchIndex,
                                                   len(newSearchIndexes))
    for newSearchIndex in newSearchIndexes:
        newSearchIndex.id = next(newIdGen)
        searchIndexChunksToQueue.add(newSearchIndex.chunkKey)

    results = conn.execute(
        select(columns=[searchIndexTable.c.chunkKey],
               whereclause=searchIndexTable.c.objectId.in_(objectIds)))

    for result in results:
        searchIndexChunksToQueue.add(result.chunkKey)

    if objectIds:
        conn.execute(
            searchIndexTable.delete(
                searchIndexTable.c.objectId.in_(objectIds)))

    if newSearchIndexes:
        logger.debug("Inserting %s SearchIndex", len(newSearchIndexes))
        inserts = [o.tupleToSqlaBulkInsertDict() for o in newSearchIndexes]
        conn.execute(searchIndexTable.insert(), inserts)

    if searchIndexChunksToQueue:
        conn.execute(queueTable.insert(),
                     [dict(chunkKey=k) for k in searchIndexChunksToQueue])

    logger.info("Inserted %s SearchIndex keywords in %s",
                len(newSearchIndexes), (datetime.now(pytz.utc) - startTime))
def _makeCoordSet(modelSetId: int, coordSetKey: str) -> int:
    # Make a coord set
    dbSession = CeleryDbConn.getDbSession()
    try:
        newItem = ModelCoordSet(modelSetId=modelSetId,
                                key=coordSetKey,
                                name=coordSetKey)
        dbSession.add(newItem)
        dbSession.commit()
        return newItem.id

    finally:
        dbSession.close()
def _loadModelSets() -> Dict[str, int]:
    # Get the model set
    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    try:
        results = list(
            conn.execute(
                select(columns=[_modelSetTable.c.id, _modelSetTable.c.key])))
        modelSetIdByKey = {o.key: o.id for o in results}
        del results

    finally:
        conn.close()
    return modelSetIdByKey
def _loadCoordSets(modelSetId: int) -> Dict[str, int]:
    # Get the model set
    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    try:
        results = list(
            conn.execute(
                select(columns=[_coordSetTable.c.id, _coordSetTable.c.key],
                       whereclause=_coordSetTable.c.modelSetId == modelSetId)))
        coordSetIdByKey = {o.key: o.id for o in results}
        del results

    finally:
        conn.close()

    return coordSetIdByKey
def _bulkLoadDispsTask(importGroupHash: str, disps: List):
    """ Import Disps Links

    1) Drop all disps with matching importGroupHash

    2) set the  coordSetId

    :param importGroupHash:
    :param disps: An array of disp objects to import
    :return:
    """

    dispTable = DispBase.__table__
    gridKeyIndexTable = GridKeyIndex.__table__
    gridQueueTable = GridKeyCompilerQueue.__table__

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    try:

        stmt = select([gridKeyIndexTable.c.coordSetId,
                       gridKeyIndexTable.c.gridKey]) \
            .where(dispTable.c.importGroupHash == importGroupHash) \
            .select_from(join(gridKeyIndexTable, dispTable,
                              gridKeyIndexTable.c.dispId == dispTable.c.id)) \
            .distinct()

        ins = gridQueueTable.insert().from_select(['coordSetId', 'gridKey'],
                                                  stmt)
        conn.execute(ins)

        conn.execute(dispTable.delete().where(
            dispTable.c.importGroupHash == importGroupHash))

        transaction.commit()

        _bulkInsertDisps(engine, disps)

    except Exception:
        transaction.rollback()
        raise

    finally:
        conn.close()
def updateValues(self, payloadEncodedArgs: bytes) -> None:
    """ Compile Grids Task

    :param self: A celery reference to this task
    :param payloadEncodedArgs: The updates from the queue controller
    :returns: None
    """
    startTime = datetime.now(pytz.utc)

    argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples
    allModelUpdates: List[LiveDbRawValueQueue] = argData[0]
    queueItemIds = argData[1]

    # Group the data by model set
    updatesByModelSetId = defaultdict(list)
    for update in allModelUpdates:
        updatesByModelSetId[update.modelSetId].append(update)

    ormSession = CeleryDbConn.getDbSession()
    try:

        for modelSetId, modelUpdates in updatesByModelSetId.items():
            _updateValuesForModelSet(modelSetId, modelUpdates, ormSession)

        # ---------------
        # delete the queue items
        dispQueueTable = LiveDbRawValueQueue.__table__
        ormSession.execute(
            dispQueueTable.delete(dispQueueTable.c.id.in_(queueItemIds))
        )

        ormSession.commit()

        # ---------------
        # Finally, tell log some statistics
        logger.info("Updated %s raw values in %s",
                    len(allModelUpdates),
                    (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        logger.exception(e)
        raise self.retry(exc=e, countdown=2)

    finally:
        ormSession.close()
示例#15
0
def _buildIndex(chunkKeys) -> Dict[str, bytes]:
    session = CeleryDbConn.getDbSession()

    try:
        indexQry = (
            session.query(
                ItemKeyIndex.chunkKey,
                ItemKeyIndex.itemKey,
                ItemKeyIndex.itemKey,
                # ItemKeyIndex.itemType,
                ItemKeyIndex.segmentKey).filter(
                    ItemKeyIndex.chunkKey.in_(chunkKeys)).order_by(
                        ItemKeyIndex.itemKey,
                        ItemKeyIndex.segmentKey).yield_per(1000).all())

        # Create the ChunkKey -> {id -> packedJson, id -> packedJson, ....]
        packagedJsonByObjIdByChunkKey = defaultdict(lambda: defaultdict(list))

        for item in indexQry:
            (packagedJsonByObjIdByChunkKey[item.chunkKey][item.itemKey].append(
                item.segmentKey))

        encPayloadByChunkKey = {}

        # Sort each bucket by the key
        for chunkKey, segmentKeysByItemKey in packagedJsonByObjIdByChunkKey.items(
        ):
            # Convert the list to a json string, this reduces the memory footprint when
            # searching the index.
            packedJsonByKey = {
                itemKey: json.dumps(segmentKeys)
                for itemKey, segmentKeys in segmentKeysByItemKey.items()
            }

            tuples = json.dumps(packedJsonByKey, sort_keys=True)

            # Create the blob data for this index.
            # It could/will be found by a binary sort
            encPayloadByChunkKey[chunkKey] = Payload(
                tuples=tuples).toEncodedPayload()

        return encPayloadByChunkKey

    finally:
        session.close()
def _insertOrUpdateObjects(newTraceConfigs: List[GraphDbTraceConfigTuple],
                           modelSetId: int) -> None:
    """ Insert or Update Objects

    1) Find objects and update them
    2) Insert object if the are missing

    """

    traceConfigTable = GraphDbTraceConfig.__table__

    startTime = datetime.now(pytz.utc)

    dbSession = CeleryDbConn.getDbSession()

    try:
        keysToDelete = {i.key for i in newTraceConfigs}

        dbSession.execute(
            traceConfigTable.delete(
                traceConfigTable.c.key.in_(keysToDelete))
        )

        # Create state arrays
        inserts = []

        # Create the DB Orm objects to insert
        for importTraceConfig in newTraceConfigs:
            dbSession.add(GraphDbTraceConfig().fromTuple(importTraceConfig, modelSetId))

        dbSession.commit()

        logger.info("Inserted %s trace configs in %s",
                     len(inserts),
                     (datetime.now(pytz.utc) - startTime))

    except Exception:
        dbSession.rollback()
        raise


    finally:
        dbSession.close()
def deleteSegment(self, modelSetKey: str,
                  importGroupHashes: List[str]) -> None:
    startTime = datetime.now(pytz.utc)

    segmentTable = GraphDbSegment.__table__
    queueTable = GraphDbCompilerQueue.__table__

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:

        modelSetIdByKey = _loadModelSets()
        modelSetId = modelSetIdByKey[modelSetKey]

        chunkKeys = conn.execute(
            select([segmentTable.c.modelSetId, segmentTable.c.chunkKey],
                   and_(segmentTable.c.importGroupHash.in_(importGroupHashes),
                        segmentTable.c.modelSetId == modelSetId))).fetchall()

        if chunkKeys:
            conn.execute(
                segmentTable.delete(
                    and_(segmentTable.c.importGroupHash.in_(importGroupHashes),
                         segmentTable.c.modelSetId == modelSetId)))

            conn.execute(queueTable.insert(), chunkKeys)

        deleteItemKeys(conn, modelSetId, importGroupHashes)

        transaction.commit()

        logger.info("Deleted %s, queued %s chunks in %s",
                    len(importGroupHashes), len(chunkKeys),
                    (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        transaction.rollback()
        logger.debug("Retrying graphDb deleteSegment, %s", e)
        raise self.retry(exc=e, countdown=3)

    finally:
        conn.close()
def _updateCoordSetPosition(coordSet: ModelCoordSet, disps: List):
    """ Update CoordSet Position

    1) Drop all disps with matching importGroupHash

    2) set the  coordSetId

    :param coordSet:
    :param disps: An array of disp objects to import
    :return:
    """

    if coordSet.initialPanX or coordSet.initialPanY or coordSet.initialZoom:
        return

    startTime = datetime.now(pytz.utc)

    ormSession = CeleryDbConn.getDbSession()

    try:

        # Initialise the ModelCoordSet initial position if it's not set
        for disp in disps:
            if not hasattr(disp, 'geomJson'):
                continue
            coords = json.loads(disp.geomJson)
            coordSet.initialPanX = coords[0]
            coordSet.initialPanY = coords[1]
            coordSet.initialZoom = 0.05
            ormSession.merge(coordSet)
            break

        ormSession.commit()

        logger.info("Updated coordset position in %s",
                    (datetime.now(pytz.utc) - startTime))

    finally:
        ormSession.close()
def compileBranchIndexChunk(self, payloadEncodedArgs: bytes) -> List[int]:
    """ Compile BranchIndex Index Task

    :param self: A bound parameter from celery
    :param payloadEncodedArgs: An encoded payload containing the queue tuples.
    :returns: A list of grid keys that have been updated.
    """
    argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples
    queueItems = argData[0]
    queueItemIds: List[int] = argData[1]

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:
        queueItemsByModelSetId = defaultdict(list)

        for queueItem in queueItems:
            queueItemsByModelSetId[queueItem.modelSetId].append(queueItem)

        for modelSetId, modelSetQueueItems in queueItemsByModelSetId.items():
            _compileBranchIndexChunk(conn, transaction, modelSetId,
                                     modelSetQueueItems)

        queueTable = BranchIndexCompilerQueue.__table__

        transaction = conn.begin()
        conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds)))
        transaction.commit()

    except Exception as e:
        transaction.rollback()
        logger.debug("RETRYING task - %s", e)
        raise self.retry(exc=e, countdown=10)

    finally:
        conn.close()

    return list(set([i.chunkKey for i in queueItems]))
def importLiveDbItems(self, modelSetKey: str,
                      newItems: List[ImportLiveDbItemTuple]) -> List[str]:
    """ Compile Grids Task

    :param self: A celery reference to this task
    :param modelSetKey: The model set name
    :param newItems: The list of new items
    :returns: A list of grid keys that have been updated.
    """

    startTime = datetime.now(pytz.utc)

    session = CeleryDbConn.getDbSession()
    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    liveDbTable = LiveDbItem.__table__
    try:

        liveDbModelSet = getOrCreateLiveDbModelSet(session, modelSetKey)

        # This will remove duplicates
        itemsByKey = {i.key: i for i in newItems}

        allKeys = list(itemsByKey)
        existingKeys = set()

        # Query for existing keys, in 1000 chinks
        chunkSize = 1000
        offset = 0
        while True:
            chunk = allKeys[offset:offset + chunkSize]
            if not chunk:
                break
            offset += chunkSize
            stmt = (select([liveDbTable.c.key])
                    .where(liveDbTable.c.modelSetId == liveDbModelSet.id)
            .where(makeCoreValuesSubqueryCondition(
                engine, liveDbTable.c.key, chunk
            ))
            )

            result = conn.execute(stmt)

            existingKeys.update([o[0] for o in result.fetchall()])

        inserts = []
        newKeys = []

        for newItem in itemsByKey.values():
            if newItem.key in existingKeys:
                continue

            inserts.append(dict(
                modelSetId=liveDbModelSet.id,
                key=newItem.key,
                dataType=newItem.dataType,
                rawValue=newItem.rawValue,
                displayValue=newItem.displayValue,
                importHash=newItem.importHash
            ))

            newKeys.append(newItem.key)

        if not inserts:
            return []

        conn.execute(LiveDbItem.__table__.insert(), inserts)

        transaction.commit()
        logger.info("Inserted %s LiveDbItems, %s already existed, in %s",
                    len(inserts), len(existingKeys), (datetime.now(pytz.utc) - startTime))

        return newKeys

    except Exception as e:
        transaction.rollback()
        logger.debug("Task failed, but it will retry. %s", e)
        raise self.retry(exc=e, countdown=10)

    finally:
        conn.close()
        session.close()
def compileGrids(self, payloadEncodedArgs: bytes) -> List[str]:
    """ Compile Grids Task

    :param self: A celery reference to this task
    :param payloadEncodedArgs: An encoded payload containing the queue tuples.
    :returns: A list of grid keys that have been updated.
    """
    argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples
    queueItems = argData[0]
    queueItemIds: List[int] = argData[1]

    gridKeys = list(set([i.gridKey for i in queueItems]))
    coordSetIdByGridKey = {i.gridKey: i.coordSetId for i in queueItems}

    queueTable = GridKeyCompilerQueue.__table__
    gridTable = GridKeyIndexCompiled.__table__

    startTime = datetime.now(pytz.utc)

    session = CeleryDbConn.getDbSession()
    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:

        logger.debug("Staring compile of %s queueItems in %s",
                     len(queueItems), (datetime.now(pytz.utc) - startTime))

        total = 0
        dispData = _qryDispData(session, gridKeys)

        conn.execute(gridTable.delete(gridTable.c.gridKey.in_(gridKeys)))

        transaction.commit()
        transaction = conn.begin()

        inserts = []
        for gridKey, dispJsonStr in dispData.items():
            m = hashlib.sha256()
            m.update(gridKey.encode())
            m.update(dispJsonStr.encode())
            gridTupleHash = b64encode(m.digest()).decode()

            gridTuple = GridTuple(
                gridKey=gridKey,
                dispJsonStr=dispJsonStr,
                lastUpdate=gridTupleHash
            )

            encodedGridTuple = Payload(tuples=[gridTuple]).toEncodedPayload()

            inserts.append(dict(coordSetId=coordSetIdByGridKey[gridKey],
                                gridKey=gridKey,
                                lastUpdate=gridTupleHash,
                                encodedGridTuple=encodedGridTuple))

        if inserts:
            conn.execute(gridTable.insert(), inserts)

        logger.debug("Compiled %s gridKeys, %s missing, in %s",
                     len(inserts),
                     len(gridKeys) - len(inserts), (datetime.now(pytz.utc) - startTime))

        total += len(inserts)

        conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds)))

        transaction.commit()
        logger.info("Compiled and Committed %s GridKeyIndexCompileds in %s",
                    total, (datetime.now(pytz.utc) - startTime))

        return gridKeys

    except NotAllDispsCompiledException as e:
        logger.warning("Retrying, Not all disps for gridKey %s are compiled", gridKeys)
        raise self.retry(exc=e, countdown=1)

    except Exception as e:
        transaction.rollback()
        logger.debug("Compile of grids failed, retrying : %s", gridKeys)
        raise self.retry(exc=e, countdown=2)

    finally:
        conn.close()
        session.close()
def _importDisps(coordSet: ModelCoordSet, importDisps: List):
    """ Link Disps

    1) Use the AgentImportDispGridLookup to convert lookups from importHash
        to id
    2) set the  coordSetId

    This is not done in a thread because the lookups cause issues

    """

    dispIdGen = CeleryDbConn.prefetchDeclarativeIds(DispBase, len(importDisps))

    dispIdsToCompile = []
    importDispLinks = []
    ormDisps = []

    ormSession = CeleryDbConn.getDbSession()
    try:

        lookupConverter = LookupHashConverter(ormSession,
                                              modelSetId=coordSet.modelSetId,
                                              coordSetId=coordSet.id)

        dispGroupPtrWithTargetHash: List[Tuple[DispGroupPointer, str]] = []
        dispGroupChildWithTargetHash: List[Tuple[DispBase, str]] = []

        # Preload any groups our pointers may point to.

        # Pre-import any DispGroup IDs we may need
        dispGroupTargetImportHashes = [
            o.targetDispGroupHash for o in importDisps
            if o.tupleType() == ImportDispGroupPtrTuple.tupleType()
        ]

        # This will store DispGroup and DispGroupPointer hashes
        groupIdByImportHash: Dict[str, int] = {
            o.importHash: o.id
            for o in ormSession.query(DispBase.importHash, DispBase.id).filter(
                DispBase.importHash.in_(dispGroupTargetImportHashes)).filter(
                    DispBase.coordSetId == coordSet.id)
        }

        del dispGroupTargetImportHashes

        # This is a list of DispGroup.id.
        # We use this to filter out disps that part of a DispGroup,
        # they don't get compiled
        dispGroupIds = set()

        # Sort the DispGroups first, so they are created before any FK references them
        sortedImportDisps = sorted(
            importDisps, key=lambda o: IMPORT_SORT_ORDER[o.tupleType()])

        for importDisp in sortedImportDisps:
            # Convert the geometry into the internal array format
            _convertGeom(importDisp)

            # Create the storage tuple instance, and copy over the data.
            ormDisp = _convertImportTuple(importDisp)
            ormDisps.append(ormDisp)

            # Preallocate the IDs for performance on PostGreSQL
            ormDisp.id = next(dispIdGen)

            # Assign the coord set id.
            ormDisp.coordSetId = coordSet.id

            # If this is a dispGroup, index it's ID
            if isinstance(ormDisp, DispGroup):
                dispGroupIds.add(ormDisp.id)
                groupIdByImportHash[ormDisp.importHash] = ormDisp.id

            # If this is a dispGroupPtr, index its targetHash so we can update it
            if isinstance(ormDisp, DispGroupPointer):
                groupIdByImportHash[ormDisp.importHash] = ormDisp.id

                if ormDisp.targetDispGroupName:
                    ormDisp.targetDispGroupName = '%s|%s' % (
                        coordSet.id, ormDisp.targetDispGroupName)

                # Not all DispGroupPointers have targets,
                # they can be orphaned instances
                if importDisp.targetDispGroupHash:
                    dispGroupPtrWithTargetHash.append(
                        (ormDisp, importDisp.targetDispGroupHash))

            # If this is a dispGroupPtr, index its targetHash so we can update it
            parentDispGroupHash = getattr(importDisp, "parentDispGroupHash",
                                          None)
            if parentDispGroupHash:
                dispGroupChildWithTargetHash.append(
                    (ormDisp, parentDispGroupHash))

            # Add some interim data to the import display link, so it can be created
            if hasattr(importDisp, "liveDbDispLinks"):
                for importDispLink in importDisp.liveDbDispLinks:
                    attrName = importDispLink.dispAttrName
                    importDispLink.internalRawValue = getattr(
                        ormDisp, attrName)
                    importDispLink.internalDispId = ormDisp.id
                    importDispLinks.append(importDispLink)

            # Convert the values of the liveDb attributes
            lookupConverter.convertLookups(ormDisp)

            # Add the after translate value, this is the Display Value
            if hasattr(importDisp, "liveDbDispLinks"):
                for importDispLink in importDisp.liveDbDispLinks:
                    attrName = importDispLink.dispAttrName
                    importDispLink.internalDisplayValue = getattr(
                        ormDisp, attrName)

            # Queue the Disp to be compiled into a grid.
            # Disps belonging to a DispGroup do not get compiled into grids.
            if ormDisp.groupId not in dispGroupIds:
                dispIdsToCompile.append(ormDisp.id)

        # Link the DispGroups
        # Create the links between the Disp and DispGroup
        for ormDisp, groupHash in dispGroupChildWithTargetHash:
            groupOrmObjId = groupIdByImportHash.get(groupHash)
            if groupOrmObjId is None:
                raise Exception("DispGroup with importHash %s doesn't exist" %
                                groupHash)

            ormDisp.groupId = groupOrmObjId

        # Link the DispGroupPtr to the DispGroup
        # This is only used when the dispGrouPtr points to a disp group
        for ormDisp, groupHash in dispGroupPtrWithTargetHash:
            groupOrmObjId = groupIdByImportHash.get(groupHash)
            if groupOrmObjId is None:
                raise Exception("DispGroup with importHash %s doesn't exist" %
                                groupHash)

            ormDisp.targetDispGroupId = groupOrmObjId

    finally:
        ormSession.close()

    return dispIdsToCompile, importDispLinks, ormDisps
示例#23
0
def loadItemKeys(conn, newItemKeys: List[ItemKeyImportTuple], modelSetId: int,
                 modelSetKey: str) -> None:
    """ Insert or Update Objects

    1) Find objects and update them
    2) Insert object if the are missing

    """

    itemKeyIndexTable = ItemKeyIndex.__table__
    queueTable = ItemKeyIndexCompilerQueue.__table__

    startTime = datetime.now(pytz.utc)

    importHashSet = set()

    chunkKeysForQueue: Set[Tuple[int, str]] = set()

    # Get the IDs that we need
    newIdGen = CeleryDbConn.prefetchDeclarativeIds(ItemKeyIndex,
                                                   len(newItemKeys))

    # Create state arrays
    inserts = []

    # Work out which objects have been updated or need inserting
    for importItemKey in newItemKeys:
        importHashSet.add(importItemKey.importGroupHash)

        # Work out if we need to update the object type

        id_ = next(newIdGen)
        insertObject = ItemKeyIndex(
            id=id_,
            modelSetId=modelSetId,
            importGroupHash=importItemKey.importGroupHash,
            itemType=importItemKey.itemType,
            itemKey=importItemKey.itemKey,
            segmentKey=importItemKey.segmentKey,
            chunkKey=makeChunkKeyForItemKey(modelSetKey,
                                            importItemKey.itemKey))
        inserts.append(insertObject.tupleToSqlaBulkInsertDict())

        chunkKeysForQueue.add((modelSetId, insertObject.chunkKey))

    if importHashSet:
        conn.execute(
            itemKeyIndexTable.delete(
                itemKeyIndexTable.c.importGroupHash.in_(importHashSet)))

    # Insert the ItemKeyIndex Objects
    if inserts:
        conn.execute(itemKeyIndexTable.insert(), inserts)

    if chunkKeysForQueue:
        conn.execute(
            queueTable.insert(),
            [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue])

    logger.debug("Inserted %s ItemKeys queued %s chunks in %s", len(inserts),
                 len(chunkKeysForQueue), (datetime.now(pytz.utc) - startTime))
def _insertToDb(dispIds, gridCompiledQueueItems, gridKeyIndexesByDispId,
                locationCompiledQueueItems, locationIndexByDispId, queueIds):
    """ Insert to DB

    This method provides the DB inserts and deletes after the data has been calculated.

    """
    startTime = datetime.now(pytz.utc)

    dispBaseTable = DispBase.__table__
    dispQueueTable = DispIndexerQueue.__table__

    gridKeyIndexTable = GridKeyIndex.__table__
    gridQueueTable = GridKeyCompilerQueue.__table__

    locationIndexTable = LocationIndex.__table__
    locationIndexCompilerQueueTable = LocationIndexCompilerQueue.__table__

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:
        lockedDispIds = conn.execute(
            Select(whereclause=dispBaseTable.c.id.in_(dispIds),
                   columns=[dispBaseTable.c.id],
                   for_update=True))

        lockedDispIds = [o[0] for o in lockedDispIds]

        # Ensure that the Disps exist, otherwise we get an integrity error.
        gridKeyIndexes = []
        locationIndexes = []
        for dispId in lockedDispIds:
            gridKeyIndexes.extend(gridKeyIndexesByDispId[dispId])

            if dispId in locationIndexByDispId:
                locationIndexes.append(locationIndexByDispId[dispId])

        # Delete existing items in the location and grid index

        # grid index
        conn.execute(
            gridKeyIndexTable.delete(gridKeyIndexTable.c.dispId.in_(dispIds)))

        # location index
        conn.execute(
            locationIndexTable.delete(
                locationIndexTable.c.dispId.in_(dispIds)))

        # ---------------
        # Insert the Grid Key indexes
        if gridKeyIndexes:
            conn.execute(gridKeyIndexTable.insert(), gridKeyIndexes)

        # Directly insert into the Grid compiler queue.
        if gridCompiledQueueItems:
            conn.execute(gridQueueTable.insert(), [
                dict(coordSetId=i.coordSetId, gridKey=i.gridKey)
                for i in gridCompiledQueueItems
            ])

        # ---------------
        # Insert the Location indexes
        if locationIndexes:
            conn.execute(locationIndexTable.insert(), locationIndexes)

        # Directly insert into the Location compiler queue.
        if locationCompiledQueueItems:
            conn.execute(locationIndexCompilerQueueTable.insert(), [
                dict(modelSetId=i.modelSetId, indexBucket=i.indexBucket)
                for i in locationCompiledQueueItems
            ])

        # ---------------
        # Finally, delete the disp queue items

        conn.execute(dispQueueTable.delete(dispQueueTable.c.id.in_(queueIds)))

        transaction.commit()
        logger.debug("Committed %s GridKeyIndex in %s", len(gridKeyIndexes),
                     (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        raise

    finally:
        conn.close()
def compileDisps(self, payloadEncodedArgs: bytes):
    """ Compile Disps

    This function takes a list of Disp IDs and compiles them.
    The processing is as follows (more or less)

    0) Load lookups

    ----

    1) DispGroupPointers, copy disps from group to pointer

    ----

    2) Load the Disps from the DB

    3) Apply the LiveDB values to the Disp attributes

    4) Scale the Disp geomJson to match the coord set scaling

    5) DispGroups, take Disps as part of a disp group and load them into JSON in the
        DispGroup. PreparedDisp????

    6) Extract any new LocationIndex entries, of the Disp has a key

    7) Determine which grids this disp will live in, and create GridKeyIndex entries
        for those grid keys for this disp.

    8) Write the Disp JSON back to the disp

    ormSession.commit() here.
        This stores the following updates that have been made into the disp:
        * dispJson,
        * locationJson,
        * livedb attribute updates

    ----

    9) Write the calculated data to tables

    NOTE: Disps that belong to a DispGroup will not be queued for compile by
    ImportDispTask.

    """

    argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples
    dispIds = [o.dispId for o in argData[0]]
    queueItemIds: List[int] = argData[1]

    # ==========================
    # 0) Load the lookups
    ormSession = CeleryDbConn.getDbSession()
    try:
        # ---------------
        # Load Coord Sets
        coordSets = (ormSession.query(ModelCoordSet).options(
            subqueryload(ModelCoordSet.modelSet),
            subqueryload(ModelCoordSet.gridSizes)).all())

        # Get Model Set Name Map
        coordSetById = {o.id: o for o in coordSets}

        # ---------------
        # Load Coord Sets
        textStyleById = {
            ts.id: ts
            for ts in ormSession.query(DispTextStyle).all()
        }

        ormSession.expunge_all()

    except Exception as e:
        logger.exception(e)
        raise self.retry(exc=e, countdown=2)

    finally:
        ormSession.close()

    # ==========================
    # This method will create new disps that will be compiled later.
    try:

        # ---------------
        # 1) Clone the disps for the group instances
        dispIdsIncludingClones = _cloneDispsForDispGroupPointer(dispIds)

    except Exception as e:
        logger.exception(e)
        raise self.retry(exc=e, countdown=2)

    # ==========================
    # Run all the ORM Session update methods
    ormSession = CeleryDbConn.getDbSession()
    try:
        with ormSession.no_autoflush:
            # ---------------
            # 2) Apply the LiveDB Attribute updates
            disps = _loadDisps(ormSession, dispIdsIncludingClones)

            # ---------------
            # 3) Apply the LiveDB Attribute updates
            _applyLiveDbAttributes(ormSession, disps, coordSetById)

            # ---------------
            # 4) Scale the Disp geomJson to match the coord set scaling
            preparedDisps = _scaleDisp(disps, coordSetById)

            # 5) DispGroups, take Disps as part of a disp group and load them
            # into JSON in the DispGroup. PreparedDisp????
            _compileDispGroups(ormSession, preparedDisps)

            # ---------------
            # 6) Extract any new LocationIndex entries, of the Disp has a key
            locationCompiledQueueItems, locationIndexByDispId = _indexLocation(
                preparedDisps, coordSetById)

            # ---------------
            # 7) Determine which grids this disp will live in, and create GridKeyIndex
            # entries for those grid keys for this disp.
            gridCompiledQueueItems, gridKeyIndexesByDispId = _calculateGridKeys(
                preparedDisps, coordSetById, textStyleById)

            # ---------------
            # 8) Write the Disp JSON back to the disp
            _updateDispsJson(preparedDisps)

        # ---------------
        # Commit the updates
        startTime = datetime.now(pytz.utc)
        ormSession.commit()
        logger.debug("Committed %s disp objects in %s", len(disps),
                     (datetime.now(pytz.utc) - startTime))

    except Exception as e:
        ormSession.rollback()
        logger.exception(e)
        raise self.retry(exc=e, countdown=2)

    finally:
        ormSession.close()

    # ==========================
    # 9) Run the bulk DB delete/insert methods
    try:

        _insertToDb(dispIdsIncludingClones, gridCompiledQueueItems,
                    gridKeyIndexesByDispId, locationCompiledQueueItems,
                    locationIndexByDispId, queueItemIds)

    except Exception as e:
        logger.exception(e)
        raise self.retry(exc=e, countdown=2)

    logger.info("Compiled %s disp objects in %s", len(dispIds),
                (datetime.now(pytz.utc) - startTime))
def _cloneDispsForDispGroupPointer(dispIds: List[int]):
    """ Clone Disps for DispGroupPointer

    This method will clone "instances" of the disps in the disp groups for the
    DispGroupPointer.


    """
    startTime = datetime.now(pytz.utc)

    ormSession = CeleryDbConn.getDbSession()
    try:

        # -----
        # Load the disp group pointers
        qry = ormSession.query(DispGroupPointer) \
            .filter(DispGroupPointer.targetDispGroupId != None) \
            .filter(DispGroupPointer.id.in_(dispIds))

        dispGroupPointers: List[DispGroupPointer] = qry.all()

        # If there are no DispGroupPointers that need cloning, then return.
        if not dispGroupPointers:
            logger.debug(
                "Cloning skipped,"
                " there are no disp group ptrs with targets, in %s",
                (datetime.now(pytz.utc) - startTime))
            return dispIds

        dispGroupPointerTargetIds = [
            o.targetDispGroupId for o in dispGroupPointers
        ]

        del qry

        # -----
        # Delete any existing disps are in these pointers
        ormSession.query(DispBase) \
            .filter(DispBase.groupId.in_([o.id for o in dispGroupPointers])) \
            .delete(synchronize_session=False)

        ormSession.commit()

        # -----
        # Query for the disp groups we'll need
        dispGroupChildsByGroupId = _queryDispsForGroup(
            ormSession, dispGroupPointerTargetIds)

        # -----
        # Query for the disp groups names
        dispBaseTable = DispBase.__table__
        dispGroupTable = DispGroup.__table__

        qry = ormSession.execute(
            select(columns=[
                dispBaseTable.c.id, dispBaseTable.c.coordSetId,
                dispGroupTable.c.name
            ],
                   whereclause=dispBaseTable.c.id.in_(
                       dispGroupPointerTargetIds)).select_from(
                           join(dispGroupTable, dispBaseTable,
                                dispGroupTable.c.id == dispBaseTable.c.id)))

        dispGroupNameByGroupId = {
            o.id: '%s|%s' % (o.coordSetId, o.name)
            for o in qry.fetchall()
        }

        del qry

        # -----
        # Clone the child disps
        cloneDisps = []
        cloneLiveDbDispLinks = []

        for dispPtr in dispGroupPointers:
            if not dispPtr.targetDispGroupId:
                logger.debug("Pointer has no targetGroupId id=%s", dispPtr.id)
                continue

            dispGroupChilds = dispGroupChildsByGroupId.get(
                dispPtr.targetDispGroupId)

            if not dispGroupChilds:
                logger.warning(
                    "Pointer points to missing DispGroup,"
                    " id=%s, targetGroupId=%s", dispPtr.id,
                    dispPtr.targetDispGroupId)
                continue

            x, y = json.loads(dispPtr.geomJson)
            dispPtr.targetDispGroupName = \
                dispGroupNameByGroupId[dispPtr.targetDispGroupId]

            for templateDisp in dispGroupChilds:
                # Create the clone
                cloneDisp = templateDisp.tupleClone()
                cloneDisps.append(cloneDisp)

                cloneDisp.coordSetId = dispPtr.coordSetId

                # Offset the geometry
                geom = json.loads(cloneDisp.geomJson)
                geom = _scaleDispGeom(geom, 1, 1, x, y)
                cloneDisp.geomJson = json.dumps(geom)

                # Assign the clone to the DispGroupPointer
                cloneDisp.groupId = dispPtr.id

                for dispLink in templateDisp.liveDbLinks:
                    cloneDispLink = dispLink.tupleClone()
                    cloneLiveDbDispLinks.append(cloneDispLink)

                    cloneDispLink.id = None
                    cloneDispLink.disp = cloneDisp
                    cloneDispLink.coordSetId = dispPtr.coordSetId

        # -----
        # Preallocate the IDs for performance on PostGreSQL
        dispIdGen = CeleryDbConn.prefetchDeclarativeIds(
            DispBase, len(cloneDisps))
        for cloneDisp in cloneDisps:
            cloneDisp.id = next(dispIdGen)

        # Preallocate the IDs for performance on PostGreSQL
        dispLinkIdGen = CeleryDbConn.prefetchDeclarativeIds(
            LiveDbDispLink, len(cloneLiveDbDispLinks))
        for cloneDispLink in cloneLiveDbDispLinks:
            cloneDispLink.id = next(dispLinkIdGen)
            cloneDispLink.dispId = cloneDispLink.disp.id
            cloneDispLink.disp = None

        # -----
        # Create the new list of IDs to compile
        # Do this here, otherwise it will cause a DB refresh if it's after the commit.
        dispIdsIncludingClones = dispIds + [o.id for o in cloneDisps]

        ormSession.bulk_save_objects(cloneDisps, update_changed_only=False)
        ormSession.bulk_save_objects(cloneLiveDbDispLinks,
                                     update_changed_only=False)

        ormSession.commit()

        logger.debug("Cloned %s disp group objects in %s", len(cloneDisps),
                     (datetime.now(pytz.utc) - startTime))

    except Exception:
        ormSession.rollback()
        raise

    finally:
        ormSession.close()

    return dispIdsIncludingClones
示例#27
0
def compileSearchIndexChunk(self, payloadEncodedArgs: bytes) -> List[str]:
    """ Compile Search Index Task

    :param self: A celery reference to this task
    :param payloadEncodedArgs: An encoded payload containing the queue tuples.
    :returns: A list of grid keys that have been updated.
    """
    argData = Payload().fromEncodedPayload(payloadEncodedArgs).tuples
    queueItems = argData[0]
    queueItemIds: List[int] = argData[1]

    chunkKeys = list(set([i.chunkKey for i in queueItems]))

    queueTable = SearchIndexCompilerQueue.__table__
    compiledTable = EncodedSearchIndexChunk.__table__
    lastUpdate = datetime.now(pytz.utc).isoformat()

    startTime = datetime.now(pytz.utc)

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()
    try:

        logger.debug("Staring compile of %s queueItems in %s", len(queueItems),
                     (datetime.now(pytz.utc) - startTime))

        # Get Model Sets

        total = 0
        existingHashes = _loadExistingHashes(conn, chunkKeys)
        encKwPayloadByChunkKey = _buildIndex(conn, chunkKeys)
        chunksToDelete = []

        inserts = []
        for chunkKey, searchIndexChunkEncodedPayload in encKwPayloadByChunkKey.items(
        ):
            m = hashlib.sha256()
            m.update(searchIndexChunkEncodedPayload)
            encodedHash = b64encode(m.digest()).decode()

            # Compare the hash, AND delete the chunk key
            if chunkKey in existingHashes:
                # At this point we could decide to do an update instead,
                # but inserts are quicker
                if encodedHash == existingHashes.pop(chunkKey):
                    continue

            chunksToDelete.append(chunkKey)
            inserts.append(
                dict(chunkKey=chunkKey,
                     encodedData=searchIndexChunkEncodedPayload,
                     encodedHash=encodedHash,
                     lastUpdate=lastUpdate))

        # Add any chnuks that we need to delete that we don't have new data for, here
        chunksToDelete.extend(list(existingHashes))

        if chunksToDelete:
            # Delete the old chunks
            conn.execute(
                compiledTable.delete(
                    compiledTable.c.chunkKey.in_(chunksToDelete)))

        if inserts:
            newIdGen = CeleryDbConn.prefetchDeclarativeIds(
                SearchIndex, len(inserts))
            for insert in inserts:
                insert["id"] = next(newIdGen)

        transaction.commit()
        transaction = conn.begin()

        if inserts:
            conn.execute(compiledTable.insert(), inserts)

        logger.debug("Compiled %s SearchIndexes, %s missing, in %s",
                     len(inserts),
                     len(chunkKeys) - len(inserts),
                     (datetime.now(pytz.utc) - startTime))

        total += len(inserts)

        conn.execute(queueTable.delete(queueTable.c.id.in_(queueItemIds)))

        transaction.commit()
        logger.info("Compiled and Committed %s EncodedSearchIndexChunks in %s",
                    total, (datetime.now(pytz.utc) - startTime))

        return chunkKeys

    except Exception as e:
        transaction.rollback()
        # logger.warning(e)  # Just a warning, it will retry
        logger.exception(e)
        raise self.retry(exc=e, countdown=10)

    finally:
        conn.close()
示例#28
0
def _insertOrUpdateObjects(newDocuments: List[ImportDocumentTuple],
                           modelSetId: int,
                           docTypeIdsByName: Dict[str, int]) -> None:
    """ Insert or Update Objects

    1) Find objects and update them
    2) Insert object if the are missing

    """

    documentTable = DocDbDocument.__table__
    queueTable = DocDbCompilerQueue.__table__

    startTime = datetime.now(pytz.utc)

    engine = CeleryDbConn.getDbEngine()
    conn = engine.connect()
    transaction = conn.begin()

    try:
        dontDeleteObjectIds = []
        objectIdByKey: Dict[str, int] = {}

        objectKeys = [o.key for o in newDocuments]
        chunkKeysForQueue: Set[Tuple[str, str]] = set()

        # Query existing objects
        results = list(
            conn.execute(
                select(columns=[
                    documentTable.c.id, documentTable.c.key,
                    documentTable.c.chunkKey, documentTable.c.documentJson
                ],
                       whereclause=and_(
                           documentTable.c.key.in_(objectKeys),
                           documentTable.c.modelSetId == modelSetId))))

        foundObjectByKey = {o.key: o for o in results}
        del results

        # Get the IDs that we need
        newIdGen = CeleryDbConn.prefetchDeclarativeIds(
            DocDbDocument,
            len(newDocuments) - len(foundObjectByKey))

        # Create state arrays
        inserts = []
        updates = []
        processedKeys = set()

        # Work out which objects have been updated or need inserting
        for importDocument in newDocuments:
            if importDocument.key in processedKeys:
                raise Exception("Key %s exists in import data twice" %
                                importDocument.key)
            processedKeys.add(importDocument.key)

            existingObject = foundObjectByKey.get(importDocument.key)
            importDocumentTypeId = docTypeIdsByName[
                importDocument.documentTypeKey]

            packedJsonDict = {
                k: v
                for k, v in importDocument.document.items()
                if v is not None and v is not ''
            }  # 0 / false allowed
            packedJsonDict['_dtid'] = importDocumentTypeId
            packedJsonDict['_msid'] = modelSetId
            documentJson = json.dumps(packedJsonDict, sort_keys=True)

            # Work out if we need to update the object type
            if existingObject:
                updates.append(
                    dict(b_id=existingObject.id,
                         b_typeId=importDocumentTypeId,
                         b_documentJson=documentJson))
                dontDeleteObjectIds.append(existingObject.id)

            else:
                id_ = next(newIdGen)
                existingObject = DocDbDocument(
                    id=id_,
                    modelSetId=modelSetId,
                    documentTypeId=importDocumentTypeId,
                    key=importDocument.key,
                    importGroupHash=importDocument.importGroupHash,
                    chunkKey=makeChunkKey(importDocument.modelSetKey,
                                          importDocument.key),
                    documentJson=documentJson)
                inserts.append(existingObject.tupleToSqlaBulkInsertDict())

            objectIdByKey[existingObject.key] = existingObject.id
            chunkKeysForQueue.add((modelSetId, existingObject.chunkKey))

        # Insert the DocDb Objects
        if inserts:
            conn.execute(documentTable.insert(), inserts)

        if updates:
            stmt = (documentTable.update().where(
                documentTable.c.id == bindparam('b_id')).values(
                    documentTypeId=bindparam('b_typeId'),
                    documentJson=bindparam('b_documentJson')))
            conn.execute(stmt, updates)

        if chunkKeysForQueue:
            conn.execute(
                queueTable.insert(),
                [dict(modelSetId=m, chunkKey=c) for m, c in chunkKeysForQueue])

        if inserts or updates or chunkKeysForQueue:
            transaction.commit()
        else:
            transaction.rollback()

        logger.debug("Inserted %s updated %s queued %s chunks in %s",
                     len(inserts), len(updates), len(chunkKeysForQueue),
                     (datetime.now(pytz.utc) - startTime))

    except Exception:
        transaction.rollback()
        raise

    finally:
        conn.close()
def _compileBranchIndexChunk(
        conn, transaction, modelSetId: int,
        queueItems: List[BranchIndexCompilerQueue]) -> None:
    chunkKeys = list(set([i.chunkKey for i in queueItems]))

    compiledTable = BranchIndexEncodedChunk.__table__
    lastUpdate = datetime.now(pytz.utc).isoformat()

    startTime = datetime.now(pytz.utc)

    logger.debug("Staring compile of %s queueItems in %s", len(queueItems),
                 (datetime.now(pytz.utc) - startTime))

    # Get Model Sets

    total = 0
    existingHashes = _loadExistingHashes(conn, chunkKeys)
    encKwPayloadByChunkKey = _buildIndex(chunkKeys)
    chunksToDelete = []

    inserts = []
    for chunkKey, diagramIndexChunkEncodedPayload in encKwPayloadByChunkKey.items(
    ):
        m = hashlib.sha256()
        m.update(diagramIndexChunkEncodedPayload)
        encodedHash = b64encode(m.digest()).decode()

        # Compare the hash, AND delete the chunk key
        if chunkKey in existingHashes:
            # At this point we could decide to do an update instead,
            # but inserts are quicker
            if encodedHash == existingHashes.pop(chunkKey):
                continue

        chunksToDelete.append(chunkKey)
        inserts.append(
            dict(modelSetId=modelSetId,
                 chunkKey=chunkKey,
                 encodedData=diagramIndexChunkEncodedPayload,
                 encodedHash=encodedHash,
                 lastUpdate=lastUpdate))

    # Add any chnuks that we need to delete that we don't have new data for, here
    chunksToDelete.extend(list(existingHashes))

    if chunksToDelete:
        # Delete the old chunks
        conn.execute(
            compiledTable.delete(compiledTable.c.chunkKey.in_(chunksToDelete)))

    if inserts:
        newIdGen = CeleryDbConn.prefetchDeclarativeIds(BranchIndex,
                                                       len(inserts))
        for insert in inserts:
            insert["id"] = next(newIdGen)

    transaction.commit()
    transaction = conn.begin()

    if inserts:
        conn.execute(compiledTable.insert(), inserts)

    logger.debug("Compiled %s BranchIndexs, %s missing, in %s", len(inserts),
                 len(chunkKeys) - len(inserts),
                 (datetime.now(pytz.utc) - startTime))

    total += len(inserts)

    transaction.commit()
    logger.debug("Compiled and Committed %s EncodedBranchIndexChunks in %s",
                 total, (datetime.now(pytz.utc) - startTime))
示例#30
0
def _prepareLookups(newDocuments: List[ImportDocumentTuple],
                    modelSetId: int) -> Dict[str, int]:
    """ Check Or Insert Search Properties

    Make sure the search properties exist.

    """

    dbSession = CeleryDbConn.getDbSession()

    startTime = datetime.now(pytz.utc)

    try:

        docTypeNames = set()
        propertyNames = set()

        for o in newDocuments:
            o.document["key"] = o.key
            o.documentTypeKey = o.documentTypeKey.lower()
            docTypeNames.add(o.documentTypeKey)

            if o.document:
                propertyNames.update([s.lower() for s in o.document])

        # Prepare Properties
        dbProps = (dbSession.query(DocDbPropertyTuple).filter(
            DocDbPropertyTuple.modelSetId == modelSetId).all())

        propertyNames -= set([o.name for o in dbProps])

        if propertyNames:
            for newPropName in propertyNames:
                dbSession.add(
                    DocDbPropertyTuple(name=newPropName,
                                       title=newPropName,
                                       modelSetId=modelSetId))

            dbSession.commit()

        del dbProps
        del propertyNames

        # Prepare Object Types
        dbObjectTypes = (dbSession.query(DocDbDocumentTypeTuple).filter(
            DocDbDocumentTypeTuple.modelSetId == modelSetId).all())
        docTypeNames -= set([o.name for o in dbObjectTypes])

        if not docTypeNames:
            docTypeIdsByName = {o.name: o.id for o in dbObjectTypes}

        else:
            for newType in docTypeNames:
                dbSession.add(
                    DocDbDocumentTypeTuple(name=newType,
                                           title=newType,
                                           modelSetId=modelSetId))

            dbSession.commit()

            dbObjectTypes = dbSession.query(DocDbDocumentTypeTuple).all()
            docTypeIdsByName = {o.name: o.id for o in dbObjectTypes}

        logger.debug("Prepared lookups in %s",
                     (datetime.now(pytz.utc) - startTime))

        return docTypeIdsByName

    except Exception as e:
        dbSession.rollback()
        raise

    finally:
        dbSession.close()