示例#1
0
文件: Queuer.py 项目: wmfs/chimp
 def _queueCtreeDisable(self, settings, groupId, sourceName):        
     args = {}
     filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql","indexes"), "drop_ctree_%s_closure_indexes.sql" % (sourceName))                        
     args["filename"] = filename
     self.queue.queueTask(groupId,  self.stream, "script" , "Drop %s closure indexes" %(sourceName), None, None, None, json.dumps(args), False)            
     
     args = {}
     filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql","ctree"), "%s_disable.sql" % (sourceName))                        
     args["filename"] = filename
     self.queue.queueTask(groupId,  self.stream, "script" , "Disable %s closure tree" %(sourceName), None, None, None, json.dumps(args), False)            
示例#2
0
文件: Queuer.py 项目: wmfs/chimp
 def _queueRemoveDuplicatesTask(self, groupId, stream, specificationName, toleranceLevel, commitFrequency, checkpointBehaviour, removeDuplicates, paths):
     if removeDuplicates:
         self.queue.queueCheckpoint(groupId, stream, "major", toleranceLevel, commitFrequency, checkpointBehaviour)
         args = {}
         filename = cs.getChimpScriptFilenameToUse(paths["repository"], ("specifications",specificationName,"resources", "sql","import"), "remove_%s_duplicates_from_stage.sql" % (specificationName))    
         args["filename"] = filename
         self.queue.queueTask(groupId, stream,  "script" , "Remove duplicates", None, None, None, json.dumps(args), False)
         self.queue.queueCheckpoint(groupId, stream, "major", toleranceLevel, commitFrequency, checkpointBehaviour)        
示例#3
0
文件: Stager.py 项目: wmfs/chimp
 def __init__(self, queue, supportConnection, supportCursor, dataConnection, dataCursor, taskId, specification, paths, commitThreshold, appLogger):
     self.appLogger = appLogger
     self.commitThreshold = int(commitThreshold)
     self.queue = queue
     self.supportConnection = supportConnection
     self.supportCursor = supportCursor
     self.dataConnection = dataConnection
     self.dataCursor = dataCursor
     self.taskId = taskId
     self.specification = specification
     self.paths = paths
     
     # Prepare
     # =======
     self.lineCount = 0
     self.successCount=0
     self.exceptionCount = 0
     self.errorCount = 0
     self.warningCount = 0
     self.noticeCount = 0
     self.ignoredCount = 0
     self.action = None
     self.importData = []
     
     self.messageSql = "select shared.add_task_message(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
 
     self.transformFunctions={}
     for thisRecord in specification.records:
         if thisRecord.useful:
             
             moduleFilename = cs.getChimpScriptFilenameToUse(paths["repository"], ("specifications",specification.name,"resources", "py","transformation","stage"), "%s_stage_transformer.py" %(thisRecord.table))
             module = imp.load_source("%s_stage_transformer.py" %(thisRecord.table), moduleFilename)
             self.transformFunctions[thisRecord.table] = module.transformSuppliedValues
          
     
     #Set simple variables for speed 
     if specification.qualifier is not None:
         self.q = str(specification.qualifier)
     else:
         self.q = None            
     self.d = str(specification.delimiter)
     if len(specification.records) == 1:
         self.onlyOneRecord = True
     else:
         self.onlyOneRecord = False      
示例#4
0
文件: queueCustom.py 项目: wmfs/chimp
def queueTasks(queuer, settings, stream, specificationRestriction, groupId, appLogger):
    appLogger.debug("")
    appLogger.debug("  Custom column tasks")
    appLogger.debug("  -------------------")
    

    
    sql = "select specification_name, source_schema,source_name,output_column_list,seq,(select max(seq) from calc.custom_registry as m where m.specification_name=r.specification_name and m.source_schema=r.source_schema and m.source_name=r.source_name) as max_seq from calc.custom_registry as r"
    if specificationRestriction is not None:
        sql += " where specification_name in({0})".format(specificationRestriction)
    sql += " order by specification_name,seq"

    queuer.supportCursor.execute(sql)
    specificationCustomSources = queuer.supportCursor.fetchall()
       
    for custom in specificationCustomSources:
        
        specificationName = custom[0]
        inputSourceSchema = custom[1]
        inputSourceName = custom[2]
        outputCustomList = custom[3].split(",")
        seq = custom[4]
        maxSeq = custom[5]
    
        processorFilename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications", specificationName,"resources", "py","calculated"), "{0}_calculated_data_processor.py".format(inputSourceName))
        processorFilename = processorFilename.replace("\\", "\\\\") 
        
        
        args = {}
        args["inputSourceSchema"] = inputSourceSchema
        args["inputSourceName"] = inputSourceName
        args["customList"] = outputCustomList
        args["processorFilename"] = processorFilename
        args["flushQueue"] = (seq == maxSeq)      
        queuer.queue.queueTask(groupId, stream,  "syncCustomColumn", "Refresh custom columns {0} on {1}".format(outputCustomList,inputSourceName), None, None, None, json.dumps(args), False)
        appLogger.debug("      syncCustomColumn [{0}]".format(args))
        queuer.queue.queueCheckpoint(groupId, stream, "major", settings.args.tolerancelevel, queuer.commitFrequency, queuer.checkpointBehaviour)
        queuer.supportCursor.connection.commit()
        
示例#5
0
文件: Loader.py 项目: wmfs/chimp
    def processSendToImport(
        self, loopConnection, dataConnection, dataCursor, settings, taskId, processLimit, specification, args
    ):
        def getAction(sendMode, identification):
            if sendMode == "full":
                action = "insert"
            elif sendMode == "change":
                action = identification
            elif sendMode == "sync":
                action = "merge"
            return action

        def getSendMode(importMode, fileIntent, hasData, appLogger):
            # Settle on what it is we're doing
            #
            # importMode - auto
            #            - full
            #            - change
            #            - sync
            #
            # fileIntent - undefined
            #            - full
            #            - change
            #            - mixed
            #
            #

            if importMode == "auto":
                if fileIntent == "undefined":
                    if hasData:
                        mode = "sync"
                    else:
                        mode = "full"
                elif fileIntent == "full":
                    mode = "full"
                elif fileIntent == "change":
                    mode = "change"
                elif fileIntent == "mixed":
                    print("Imports of mixed file intents not supported")
                    raise

            elif importMode == "full":
                mode = "full"

            elif importMode == "change":
                mode = "change"

            elif importMode == "sync":
                mode = "sync"

            appLogger.debug(
                "|  {0} (importMode={1} fileIntent={2} hasData={3})".format(mode, importMode, fileIntent, hasData)
            )

            return mode

        appLogger = settings.appLogger
        commitThreshold = int(settings.env["dataCommitThreshold"])
        table = args["table"]
        importMode = args["importMode"]
        fileIntent = args["fileIntent"]
        strategy = args["strategy"]
        hasData = args["hasData"]
        sendMode = getSendMode(importMode, fileIntent, hasData, appLogger)

        self.queue.startTask(taskId, True)
        sql = "select count(*) from stage.{0}".format(table)
        self.supportCursor.execute(sql)
        scanCount = self.supportCursor.fetchone()[0]
        self.queue.setScanResults(taskId, scanCount)

        appLogger.debug("|  Scan count = {0}".format(scanCount))

        lineCount = 0
        successCount = 0
        exceptionCount = 0
        errorCount = 0
        warningCount = 0
        noticeCount = 0
        ignoredCount = 0

        # Grab record
        for r in specification.records:
            if r.table == table:
                record = r

        appLogger.debug("|")
        appLogger.debug("|  {0}".format(table))

        # BUILD DML STATEMENTS FOR THIS RECORD
        # ------------------------------------
        selectColumns = []
        insertPlaceholder = "select * from import.{0}_insert(".format(table)
        insertPlaceholder += "%s,%s"
        if not record.editable:
            insertPlaceholder += ",%s,%s"

        updatePlaceholder = "select * from import.{0}_update(".format(table)
        updatePlaceholder += "%s"
        if not record.editable:
            updatePlaceholder += ",%s,%s"

        mergePlaceholder = "select * from import.{0}_merge(".format(table)
        mergePlaceholder += "%s,%s"
        if not record.editable:
            mergePlaceholder += ",%s,%s"

        if record.hasPrimaryKey():
            deletePlaceholder = "select * from import.{0}_delete(%s".format(record.table)
            for column in record.primaryKeyColumns:
                deletePlaceholder += ",%s"
            deletePlaceholder += ")"
        else:
            deletePlaceholder = None

        for thisField in record.fields:
            if thisField.column is not None:
                selectColumns.append(thisField.column)
                insertPlaceholder += ",%s"
                updatePlaceholder += ",%s"
                mergePlaceholder += ",%s"

        for thisField in record.additionalFields:
            insertPlaceholder += ",%s"
            updatePlaceholder += ",%s"
            mergePlaceholder += ",%s"

        insertPlaceholder += ")"
        updatePlaceholder += ")"
        mergePlaceholder += ")"

        # Grab transformer functions
        moduleFilename = cs.getChimpScriptFilenameToUse(
            settings.paths["repository"],
            ("specifications", specification.name, "resources", "py", "transformation", "import"),
            "{0}_import_transformer.py".format(table),
        )
        module = imp.load_source("{0}_import_transformer.py".format(record.table), moduleFilename)
        transformer = module.transformSuppliedValues

        loopSql = "select id,task_id,{0},identification from stage.{1}".format(",".join(selectColumns), table)
        selectCount = 3 + len(selectColumns)

        # DEBUG:
        appLogger.debug("|   Pre-computed statements:")
        appLogger.debug("|     loopSql           : {0}".format(loopSql))
        appLogger.debug("|     insertPlaceholder : {0}".format(insertPlaceholder))
        appLogger.debug("|     updatePlaceholder : {0}".format(updatePlaceholder))
        appLogger.debug("|     mergePlaceholder  : {0}".format(mergePlaceholder))
        appLogger.debug("|     deletePlaceholder : {0}".format(deletePlaceholder))

        # Loop through all staged records
        loopCursor = loopConnection.makeCursor("loopCursor", True, True)
        loopCursor.execute(loopSql)

        for data in loopCursor:
            if lineCount % 1000 == 0:
                self.queue.setTaskProgress(
                    taskId, successCount, exceptionCount, errorCount, warningCount, noticeCount, ignoredCount
                )
            lineCount = lineCount + 1
            if lineCount % commitThreshold == 0:
                appLogger.debug("| << Transaction size threshold reached ({0}): COMMIT >>".format(lineCount))
                dataConnection.connection.commit()

            identification = data["identification"]
            workingRow = data
            del data[selectCount - 1]

            workingRow = transformer(dataCursor, workingRow)

            action = getAction(sendMode, identification)

            if action == "insert":
                dataCursor.execute(insertPlaceholder, tuple(workingRow))

            elif action == "update":
                del workingRow[0]
                dataCursor.execute(updatePlaceholder, tuple(workingRow))

            elif action == "delete":
                None
            #                        deleteParams=[]
            #                        deleteParams.append(stagedRow[1])
            #                        for thisPkColumn in pkColumnLists[data[0]]:
            #                            deleteParams.append(stagedRow[thisPkColumn])
            #                        sql = deletePlaceholders[data[0]]
            #                        dataCursor.execute(sql, tuple(deleteParams))
            #
            elif action == "merge":
                dataCursor.execute(mergePlaceholder, tuple(workingRow))
            #
            warningFlag = False
            errorFlag = False
            exceptionFlag = False
            messages = dataCursor.fetchall()
            success = True

            for thisMessage in messages:
                msgLevel = thisMessage[0]
                msgCode = thisMessage[1]
                msgTitle = thisMessage[2]
                msgAffectedColumns = thisMessage[3]
                msgAffectedRowCount = thisMessage[4]
                msgContent = thisMessage[5]

                self.queue.addTaskMessage(
                    taskId,
                    record.table,
                    lineCount,
                    msgLevel,
                    msgCode,
                    msgTitle,
                    msgAffectedColumns,
                    msgAffectedRowCount,
                    "{0}: {1}".format(msgContent, data),
                )

                if msgLevel == "warning":
                    warningFlag = True
                    success = False
                elif msgLevel == "error":
                    errorFlag = True
                    success = False
                elif msgLevel == "exception":
                    exceptionFlag = True
                    success = False
                elif msgLevel == "notice":
                    noticeCount += 1

            if success:
                successCount = successCount + 1
            else:
                if exceptionFlag:
                    exceptionCount += 1
                elif errorFlag:
                    errorCount += 1
                elif warningFlag:
                    warningCount += 1

        loopCursor.close()

        return (successCount, exceptionCount, errorCount, warningCount, ignoredCount, noticeCount)
示例#6
0
文件: Loader.py 项目: wmfs/chimp
    def processSendToEditable(
        self, loopConnection, dataConnection, dataCursor, settings, taskId, processLimit, specification, args
    ):

        commitThreshold = int(settings.env["dataCommitThreshold"])
        appLogger = settings.appLogger

        self.queue.startTask(taskId, True)

        # Get last time schemas synchronised
        sql = "select last_sent_to_editable from shared.specification_registry where name=%s"
        self.supportCursor.execute(sql, (specification.name,))
        lastImportTimestamp = self.supportCursor.fetchone()[0]

        appLogger.debug("| lastImportTimestamp      : {0}".format(lastImportTimestamp))

        # Grab record
        table = args["table"]
        for r in specification.records:
            if r.table == table:
                thisRecord = r

        # Scanning
        # ========
        affectedRecordCount = 0
        appLogger.debug("|   Scanning {0}:".format(table))

        # Count records that have been inserted/updated
        if lastImportTimestamp is None:
            sql = "select count(*) from import.%s" % (table)
            self.supportCursor.execute(sql)
        else:
            sql = "select count(*) from import.%s where modified >" % (table)
            sql = sql + "%s"
            self.supportCursor.execute(sql, (lastImportTimestamp,))
        recordsModified = self.supportCursor.fetchone()[0]
        appLogger.debug("|     {0} (modified)".format(recordsModified))
        affectedRecordCount = affectedRecordCount + recordsModified

        # Count records that have been deleted
        if lastImportTimestamp is None:
            sql = "select count(*) from history.import_%s_deletes" % (table)
            self.supportCursor.execute(sql)
        else:
            sql = "select count(*) from history.import_%s_deletes where deleted >" % (table)
            sql = sql + "%s"
            self.supportCursor.execute(sql, (lastImportTimestamp,))
        recordsModified = self.supportCursor.fetchone()[0]
        appLogger.debug("|     {0} (deleted)".format(recordsModified))
        affectedRecordCount = affectedRecordCount + recordsModified

        appLogger.debug("| affectedRecordCount  : {0} (total)".format(affectedRecordCount))
        self.queue.setScanResults(taskId, affectedRecordCount)

        lineCount = 0
        successCount = 0
        exceptionCount = 0
        errorCount = 0
        warningCount = 0
        noticeCount = 0
        ignoredCount = 0

        # Fire off the deletes
        # ====================
        appLogger.debug("|")
        appLogger.debug("| PROCESSING:")
        appLogger.debug("|")
        appLogger.debug("|   DELETES")

        appLogger.debug("|     {0}".format(thisRecord.table))
        sql = "select id from history.import_%s_deletes" % (thisRecord.table)
        if lastImportTimestamp is None:
            params = None
        else:
            sql = sql + " where deleted > %s"
            params = (lastImportTimestamp,)

        deleteDml = "delete from editable.%s" % (thisRecord.table)
        deleteDml = deleteDml + " where id = %s"

        loopCursor = loopConnection.makeCursor("loopCursor", True, True)
        loopCursor.execute(sql, params)

        for data in loopCursor:

            if lineCount % 1000 == 0:
                self.queue.setTaskProgress(
                    taskId, successCount, exceptionCount, errorCount, warningCount, noticeCount, ignoredCount
                )
            lineCount = lineCount + 1
            if lineCount % commitThreshold == 0:
                appLogger.debug("| << Transaction size threshold reached ({0}): COMMIT >>".format(lineCount))
                dataConnection.connection.commit()

            # Decision call to go here
            deleteAllowed = True
            if deleteAllowed:
                successCount = successCount + 1
                dataCursor.execute(deleteDml, (data[0],))
            else:
                warningCount = warningCount + 1

        loopCursor.connection.commit()

        # Fire off the inserts/updates
        # ============================
        appLogger.debug("|")
        appLogger.debug("|   INSERT/UPDATE")

        placeholder = "%s,%s,%s,%s"
        for thisField in thisRecord.fields:
            if thisField.column is not None:
                placeholder = placeholder + ",%s"
        for thisField in thisRecord.additionalFields:
            placeholder = placeholder + ",%s"

        appLogger.debug("|     {0}".format(thisRecord.table))

        # OPTIMISE:
        # Is there any data for this record in editable?
        # If not, then don't bother with the costly merge view.
        sql = "select exists (select 1 from editable.{0} limit 1)".format(thisRecord.table)
        self.supportCursor.execute(sql)
        dataExists = self.supportCursor.fetchone()
        dataExists = dataExists[0]
        appLogger.debug("|       dataExists: {0}".format(dataExists))

        # Build SQL statement to find
        # all affected records

        columnList = []
        columnList.append("id")

        if dataExists:
            columnList.append("editable_record_exists")
            importSliceStart = 2
        else:
            importSliceStart = 1

        importSliceEnd = importSliceStart - 1

        for thisField in thisRecord.fields:
            if thisField.column is not None:
                columnList.append(thisField.column)
                importSliceEnd = importSliceEnd + 1
        for thisField in thisRecord.additionalFields:
            columnList.append(thisField.column)
            importSliceEnd = importSliceEnd + 1
        columnList.append("created")
        columnList.append("modified")

        if dataExists:
            for thisField in thisRecord.fields:
                if thisField.column is not None:
                    columnList.append("e_%s" % (thisField.column))
            for thisField in thisRecord.additionalFields:
                columnList.append("e_%s" % (thisField.column))
            columnList.append("e_visibility")
            columnList.append("e_security")

        originalEnd = len(columnList) - 1

        if dataExists:
            source = "shared.{0}_to_merge_into_editable".format(thisRecord.table)
        else:
            source = "import.{0}".format(thisRecord.table)

        sql = "select {0} from {1}".format(",".join(columnList), source)

        if lastImportTimestamp is None:
            params = None
        else:
            sql = sql + " where modified > %s::timestamp"
            params = (lastImportTimestamp,)

        # BUILD DML Statements
        placeholder = "%s,%s,%s,%s"
        for thisField in thisRecord.fields:
            if thisField.column is not None:
                placeholder = placeholder + ",%s"
        for thisField in thisRecord.additionalFields:
            placeholder = placeholder + ",%s"
        insertDml = "select * from editable.%s_insert(%s)" % (thisRecord.table, placeholder)
        updateDml = "select * from editable.%s_update(%s)" % (thisRecord.table, placeholder)

        # Grab transformer function
        moduleFilename = cs.getChimpScriptFilenameToUse(
            settings.paths["repository"],
            ("specifications", specification.name, "resources", "py", "transformation", "editable"),
            "%s_editable_transformer.py" % (thisRecord.table),
        )
        module = imp.load_source("%s_editable_transformer.py" % (thisRecord.table), moduleFilename)
        transformFunction = module.transformSuppliedValues

        # Loop through all inserted/updated records
        appLogger.debug("|       loopSql   : {0}".format(sql))
        appLogger.debug("|       insertDml : {0}".format(insertDml))
        appLogger.debug("|       updateDml : {0}".format(updateDml))

        loopCursor = loopConnection.makeCursor("loopCursor", True, True)
        loopCursor.execute(sql, params)

        for data in loopCursor:
            if lineCount % 1000 == 0:
                self.queue.setTaskProgress(
                    taskId, successCount, exceptionCount, errorCount, warningCount, noticeCount, ignoredCount
                )
            lineCount = lineCount + 1
            if lineCount % commitThreshold == 0:
                appLogger.debug("| << Transaction size threshold reached ({0}): COMMIT >>".format(lineCount))
                dataConnection.connection.commit()

            # Transform values
            transformedValues = transformFunction(dataCursor, data)

            # Assemble values to apply
            applyValues = [data[0], "import"]
            applyValues.extend(data[importSliceStart : importSliceEnd + 1])
            applyValues.extend(transformedValues[originalEnd + 1 :])

            if dataExists:
                if data["editable_record_exists"]:
                    dataCursor.execute(updateDml, applyValues)
                    messages = dataCursor.fetchall()
                else:
                    dataCursor.execute(insertDml, applyValues)
                    messages = dataCursor.fetchall()
            else:
                dataCursor.execute(insertDml, applyValues)
                messages = dataCursor.fetchall()

            success = True
            for thisMessage in messages:
                msgLevel = thisMessage[0]
                msgCode = thisMessage[1]
                msgTitle = thisMessage[2]
                msgAffectedColumns = thisMessage[3]
                msgAffectedRowCount = thisMessage[4]
                msgContent = thisMessage[5]

                self.queue.addTaskMessage(
                    taskId,
                    thisRecord.table,
                    lineCount,
                    msgLevel,
                    msgCode,
                    msgTitle,
                    msgAffectedColumns,
                    msgAffectedRowCount,
                    "{0}: {1}".format(msgContent, transformedValues),
                )

                if msgLevel == "warning":
                    warningCount += 1
                    success = False
                elif msgLevel == "error":
                    errorCount += 1
                    success = False
                elif msgLevel == "exception":
                    exceptionCount += 1
                    success = False
                elif msgLevel == "notice":
                    noticeCount += 1

            if success:
                successCount = successCount + 1

        loopCursor.close()

        return (successCount, exceptionCount, errorCount, warningCount, ignoredCount, noticeCount)
示例#7
0
文件: Loader.py 项目: wmfs/chimp
    def makeEditableFile(
        self, loopConnection, dataConnection, dataCursor, settings, taskId, processLimit, specification, args
    ):
        table = args["table"]
        appLogger = settings.appLogger
        self.queue.startTask(taskId, True)

        appLogger.debug("| {0}:".format(table))

        # Any editable data here already?
        # ===============================
        sql = "select exists (select 1 from editable.{0} limit 1)".format(table)
        self.supportCursor.execute(sql)
        dataExists = self.supportCursor.fetchone()
        dataExists = dataExists[0]
        appLogger.debug("| dataExists: {0}".format(dataExists))

        # Get current timestamp
        # =====================
        sql = "select now()"
        self.supportCursor.execute(sql)
        thisImportStartTimestamp = self.supportCursor.fetchone()[0]
        appLogger.debug("| thisImportStartTimestamp : {0}".format(thisImportStartTimestamp))

        # Get last time schemas synchronised
        # ==================================
        sql = "select last_sent_to_editable from shared.specification_registry where name=%s"
        self.supportCursor.execute(sql, (specification.name,))
        lastImportTimestamp = self.supportCursor.fetchone()[0]
        appLogger.debug("| lastImportTimestamp      : {0}".format(lastImportTimestamp))

        # Scanning
        # ========
        appLogger.debug("|  Scanning")
        #   Modified
        scanSql = "select count(*) from import.{0}".format(table)
        if lastImportTimestamp is not None:
            scanSql += " where modified >%s"
            self.supportCursor.execute(scanSql, (lastImportTimestamp,))
        else:
            self.supportCursor.execute(scanSql)
        modifiedCount = self.supportCursor.fetchone()[0]
        appLogger.debug("|     Modified = {0}".format(modifiedCount))

        scanSql = "select count(*) from history.import_{0}_deletes".format(table)
        if lastImportTimestamp is not None:
            scanSql += " where deleted >%s"
            self.supportCursor.execute(scanSql, (lastImportTimestamp,))
        else:
            self.supportCursor.execute(scanSql)
        deletedCount = self.supportCursor.fetchone()[0]
        appLogger.debug("|     Deleted  = {0}".format(deletedCount))
        totalCount = modifiedCount + deletedCount
        appLogger.debug("|                {0}".format(totalCount))
        self.queue.setScanResults(taskId, totalCount)

        # Grab transformer function
        # =========================
        moduleFilename = cs.getChimpScriptFilenameToUse(
            settings.paths["repository"],
            ("specifications", specification.name, "resources", "py", "transformation", "editable"),
            "%s_editable_transformer.py" % (table),
        )
        module = imp.load_source("%s_editable_transformer.py" % (table), moduleFilename)
        transformFunction = module.transformSuppliedValues

        # Establish files
        # ===============
        filename = os.path.join(settings.env["tempPath"], "insert_into_editable_{0}.sql".format(table))
        appLogger.debug("|")
        appLogger.debug("| Filename: {0}".format(filename))
        insertFile = open(filename, "w")

        # Calculate DML placeholders
        # ==========================
        insertDml = "execute editable.{0}_insert(%s,%s".format(table)
        i = args["selectListLength"]
        while i > 0:
            insertDml += ",%s"
            i -= 1
        insertDml += ',"import");'
        appLogger.debug("| insertDml : {0}".format(insertDml))

        loopSql = "select {0} from import.{1}".format(args["selectList"], table)

        loopCursor = loopConnection.makeCursor("loopCursor", True, True)
        loopCursor.execute(loopSql)

        lineCount = 0
        successCount = 0
        exceptionCount = 0
        errorCount = 0
        warningCount = 0
        noticeCount = 0
        ignoredCount = 0

        if not dataExists:
            for data in loopCursor:
                if lineCount % 1000 == 0:
                    self.queue.setTaskProgress(
                        taskId, successCount, exceptionCount, errorCount, warningCount, noticeCount, ignoredCount
                    )
                lineCount = lineCount + 1

                transformedValues = transformFunction(dataCursor, data)
                quoted = str(psycopg2.extensions.adapt(transformedValues).getquoted())
                quoted = quoted[8:-2]

                line = "select editable.{0}_insert({1}, 'import');\n".format(table, quoted)
                insertFile.write(line)
                successCount += 1
                # line = self.supportCursor.mogrify(insertDml,transformedValues)

        insertFile.close()
        loopCursor.close()
        appLogger.debug("| Finished.")
        self.supportConnection.connection.commit()
        return (successCount, exceptionCount, errorCount, warningCount, ignoredCount, noticeCount)
示例#8
0
def processSolrDocuments(queue, supportConnection, supportCursor, loopConnection, dataConnection, dataCursor, settings, taskId, processLimit, args):
    # Init
    lineCount = 0
    successCount = 0
    exceptionCount=0
    errorCount=0
    warningCount=0
    noticeCount=0
    ignoredCount=0   
    appLogger = settings.appLogger
    commitThreshold = int(settings.env["dataCommitThreshold"])

    messageSql = "select shared.add_task_message(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        
    documentName = args["documentName"]
    serverName = args["serverName"]
    fieldCount = args["fieldCount"] - 1
    filename = "{0}_document_formatter.py".format(documentName)
    moduleToUse = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ["specifications", args["specification"], "resources", "py", "solr_formatting"], filename)
    module = imp.load_source(filename, moduleToUse)
    conversionFunctions = module.DocumentFormatter()
    conversionFunction = conversionFunctions.getSolrDocument
    
    # Publish count
    queue.startTask(taskId, True)
    sql = "select count(*) from {0}.{1}_solr_document_queue_view".format(CALC_SCHEMA, documentName)
    supportCursor.execute(sql)
    documentCount = supportCursor.fetchone()[0]
    queue.setScanResults(taskId, documentCount)
    appLogger.info(" |   documentCount : {0}".format(documentCount))

    sql = "select exists(select 1 from {0}.{1} where document_type=%s limit 1)".format(SOLR_SCHEMA, serverName)
    supportCursor.execute(sql,(documentName,))
    documentsExist = supportCursor.fetchone()[0]
    appLogger.info(" |   documentsExist: {0}".format(documentsExist))


    

    # Apply 
    applySql = "select * from {0}.apply_{1}(".format(SOLR_SCHEMA, serverName)
    i=0
    while i<fieldCount:
        applySql += "%s,"
        i += 1
    # if documentsExist:
    #    applySql += "true)"
    #else:
    #    applySql += "false)"
     
    applySql += "false)" 
        
    appLogger.info(" |   applySql : {0}".format(applySql))   

    # Establish main loop
    loopSql = "select * from {1}.{2}_solr_document_queue_view as a".format(None, CALC_SCHEMA, documentName)
    appLogger.info(" |   loopSql    : {0}".format(loopSql))   
    
    
    # Flushing
    if documentsExist:
        loopCursor = loopConnection.makeCursor("solrFlush", True, True)
        loopCursor.execute(loopSql)
        appLogger.info(" |   Flushing:")
        flushDml = "delete from {0}.{1} where document_type=%s and document_key=%s".format(SOLR_SCHEMA, serverName);
        appLogger.info(" |       flushDml: {0}".format(flushDml))
        for record in loopCursor:
            solrDocument = None
            solrDocument = conversionFunction(supportCursor, record)    
            appLogger.info(" |  {0}".format(solrDocument[2]));
            dataCursor.execute(flushDml, (documentName, solrDocument[2]))
        loopCursor.close()
    
    loopCursor = loopConnection.makeCursor("solr", True, True)
    loopCursor.execute(loopSql)
    lineCount=0
    
    # Truncate table
    truncateDml = "delete from {0}.{1}_solr_document_queue".format(CALC_SCHEMA, documentName)
    appLogger.info(" |   truncateDml : {0}".format(truncateDml))
    
    for record in loopCursor:
        
        if lineCount%1000 ==0:
            queue.setTaskProgress(taskId, successCount, 0, 0, 0, 0, 0)
        lineCount=lineCount+1
        if lineCount % commitThreshold == 0:
            appLogger.debug("| << Transaction size threshold reached ({0}): COMMIT >>".format(lineCount))
            dataConnection.connection.commit()

        try:
            solrDocument = None
            solrDocument = conversionFunction(supportCursor, record)        
            dataCursor.execute(applySql, solrDocument)
            
    
            
            messages = dataCursor.fetchall()
            messagesFound = False
            raisedWarning = False
            raisedError = False                
            raisedException=False
            
            for thisMessage in messages:
                messagesFound = True
                messageLevel = thisMessage[0]
                messageCode = thisMessage[1]
                messageTitle = thisMessage[2]
                messageAffectedColumns = thisMessage[3]
                messageAffectedRowCount = thisMessage[4]
                messageContent = "{0}\n\nDocument data being applied:\n{1}".format(thisMessage[5], solrDocument)
                supportCursor.execute(messageSql, (taskId, None, lineCount, messageLevel,  messageCode, messageTitle,  messageAffectedColumns, messageAffectedRowCount, messageContent))
    
                if messageLevel=="warning":
                    raisedWarning = True
                elif messageLevel=="error":
                    raisedError = True
                elif messageLevel=="exception":
                    raisedException = True     
                elif messageLevel=="notice":
                    noticeCount = noticeCount + 1
    
            if messagesFound:
                if raisedException:
                    exceptionCount = exceptionCount +1
                elif raisedError:
                    errorCount = errorCount +1
                elif raisedWarning:
                    warningCount = warningCount +1
                else:
                    successCount = successCount+1
            else:                                                                                    
                successCount = successCount+1
       
        except Exception as detail:
            exceptionCount = exceptionCount + 1            
            if exceptionCount < 4:
                print('Error processing Solr document (see logs)')
                appLogger.error(" |")
                appLogger.error(" |   EXCEPTION PROCESSING SOLR DOCUMENT")
                appLogger.error(" |     Filename: {0} ({1})".format(filename, moduleToUse))
                appLogger.error(" |     ConversionFunction: {0}".format(conversionFunction))
                appLogger.error(" |     ApplySql: {0}".format(applySql))
                appLogger.error(" |     {0}".format(str(detail)))
                appLogger.error(" |     Record: {0}".format(record))
                appLogger.error(" |     SolrDocument: {0}".format(solrDocument))
                appLogger.error(" |")                
            queue.addTaskMessage(taskId, None, i, "exception", "EXP", "Exception processing SolrDocument", None, 1, "ERROR: {0} RECORD: {1}".format(detail, record))

    loopCursor.close()

    if (exceptionCount > 0 or errorCount > 0):
        dataConnection.connection.rollback()
    else:      
        dataCursor.execute(truncateDml)
    
    
    queue.finishTask(taskId, successCount, exceptionCount, errorCount, warningCount, noticeCount, ignoredCount)        
    return( (successCount, exceptionCount, errorCount, warningCount, ignoredCount, noticeCount) )
示例#9
0
文件: queuePins.py 项目: hdpe/chimp
def queueTasks(queuer, settings, stream, specificationRestriction, groupId, appLogger):
    appLogger.debug("")
    appLogger.debug("  Pin tasks")
    appLogger.debug("  ---------")

    sql = "select specification_name, pin_name, input_id_column, input_x_column, input_y_column, input_schema, input_source_name, input_column_list, output_column_list, where_clause from calc.pin_registry"
    if specificationRestriction is not None:
        sql += " where specification_name in ({0})".format(specificationRestriction)
    queuer.supportCursor.execute(sql)
    specificationPins = queuer.supportCursor.fetchall()
    
    for pin in specificationPins:
        specificationName = pin[0]
        pinName = pin[1]

        
        
        appLogger.debug("  * {0}".format(pinName))
        
        sql = "select pinhead.%s_exists()" %(pinName)
        queuer.supportCursor.execute(sql)        
        pinsExist = queuer.supportCursor.fetchone()[0]
        
        if not pinsExist:
            args = {}
            filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",specificationName,"resources","sql","indexes"), "drop_pinhead_%s_indexes.sql" % (pinName))
            appLogger.debug("      No pins... drop via '{0}'".format(filename))                                                                                    
            args["filename"] = filename
            queuer.queue.queueTask(groupId,  stream,  "script" , "Drop %s pin indexes" %(pinName), None, None, None, json.dumps(args), False)                                                        
            queuer.queue.queueCheckpoint(groupId, stream, "major", queuer.toleranceLevel, queuer.commitFrequency, queuer.checkpointBehaviour)                    

#        [0] specification_name
#        [1] pin_name
#        [2] input_id_column 
#        [3] input_x_column 
#        [4] input_y_column 
#        [5] input_schema
#        [6] input_source_name 
#        [7] input_column_list 
#        [8] output_column_list 
#        [9] where_clause
#        [10]processing_script_location 

        sourceName = pin[6]
    
        args = {}
        args["pinName"] = pinName
        args["inputIdColumn"] = pin[2]
        args["inputXColumn"] = pin[3]
        args["inputYColumn"] = pin[4]
        args["inputSchema"] = pin[5]
        
        args["inputSourceName"] = sourceName
        args["inputColumnList"] = pin[7]
        args["outputColumnList"] = pin[8]
        args["whereClause"] = pin[9]
        
        processorFilename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications", specificationName, "resources", "py","calculated"), "{0}_calculated_data_processor.py".format(sourceName))
        processorFilename = processorFilename.replace("\\", "\\\\") 
        args["processorFilename"] = processorFilename



        
        queuer.queue.queueTask(groupId, stream,  "syncPins", "Refresh %s pins" %(pinName), None, None, None, json.dumps(args), False)
        appLogger.debug("      syncPins [{0}]".format(args))

        if not pinsExist:
            args = {}
            filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications", specificationName, "resources", "sql","indexes"), "create_pinhead_%s_indexes.sql" % (pinName))                                                                                    
            appLogger.debug("      Rebuild pins... via '{0}'".format(filename))
            args["filename"] = filename
            queuer.queue.queueTask(groupId,  stream, "script" , "Build %s pin indexes" %(pinName), None, None, None, json.dumps(args), False)            

        queuer.queue.queueCheckpoint(groupId, stream, "major", settings.args.tolerancelevel, queuer.commitFrequency, queuer.checkpointBehaviour)

        queuer.supportCursor.connection.commit()
示例#10
0
文件: Queuer.py 项目: wmfs/chimp
 def _queueFinishStageTask(self, groupId, stream, specificationName, paths):
     args = {}
     filename = cs.getChimpScriptFilenameToUse(paths["repository"], ("specifications",specificationName,"resources", "sql","import"), "post_%s_staging.sql" % (specificationName))    
     args["filename"] = filename
     self.queue.queueTask(groupId, stream,  "script" , "Finish stage", None, None, None, json.dumps(args), False)
示例#11
0
文件: Queuer.py 项目: wmfs/chimp
    def queueImport(self, groupId):
    
        settings = self.settings
     
        if settings.specification.dedicatedStagingAreaName is None:
            nativeStageSchema = "stage"
        else:
            nativeStageSchema = settings.specification.dedicatedStagingAreaName 
    
    
        enableMv = False
        enableCtree = False
        
#        self.stream = settings.args.streamname
#        self.specificationName = settings.specification.name
#    
    #    supportConnection = settings.db.makeConnection("support")
    #    supportCursor = supportConnection.makeCursor("supportCursor", False, False)
    
#        self.commitFrequency = settings.args.commitfrequency 
#        self.checkpointBehaviour = settings.args.checkpointbehaviour
        self.importMode = settings.args.importmode
    
        #(supportConnection, supportCursor) = settings.db.makeConnection("support", False, False)
        self.removeDuplicates = settings.specification.autoRemoveStageDuplicates
    
        # ===============
        # [1] Queue files
        # ===============

        if settings.args.json is not None:
            (queuedTasks, minTaskId, maxTaskId) = self._queueJSON(groupId, settings.specification, self.stream, self.specificationName, settings.args.limit, settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour, settings.paths, self.removeDuplicates, self.importMode)
            fileIntent="undefined"
        elif settings.specification.sourceType=="csv":
            (queuedTasks, fileIntent, minTaskId, maxTaskId) = self._queueCsvFiles(groupId, settings.specification, self.stream, self.specificationName, settings.args.limit, settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour, settings.args.files, settings.paths, self.removeDuplicates,settings.args.recurse, settings.args.filenameregex, self.importMode)
        elif settings.specification.sourceType=="external":
            (queuedTasks, minTaskId, maxTaskId) = self._queueExternalLoaderFiles(groupId, self.stream, self.specificationName, settings.specification.externalLoaderName, nativeStageSchema, settings.specification.externalLoaderProfile, settings.specification.externalLoaderVariables, settings.args.limit,settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour, settings.args.files, settings.paths, settings.db.credentials, settings.env, self.removeDuplicates, settings.args.recurse, settings.args.filenameregex, self.importMode)
            fileIntent="full"
            
        
        args = {}
        args["specification"] = self.specificationName    
    
    # =======================
        sql = "select import.%s_exists()" %(self.specificationName)
        self.supportCursor.execute(sql)        
        hasData = self.supportCursor.fetchone()[0]
                        
        if not hasData:
        
            # ADD RECORD INDEX DROPS
            for thisRecord in settings.specification.records:
                if thisRecord.useful:
                    args = {}
                    filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications", self.specificationName, "resources", "sql", "indexes"), "drop_import_%s_indexes.sql" % (thisRecord.table))                        
                    args["filename"] = filename                               
                    self.queue.queueTask(groupId, self.stream, "script" , "Drop import.%s indexes" %(thisRecord.table), None, None, None, json.dumps(args), False)            
        
            # ADD CHECKPOINT
            self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)
        
            # ADD ENTITY RECORD INDEX DROPS AND DISABLE                                    
            for thisEntity in settings.specification.entities:
                enableMv = True
        
                args = {}
                filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources","sql","indexes"), "drop_mv_%s_indexes.sql" % (thisEntity.name))                        
                args["filename"] = filename
                self.queue.queueTask(groupId,  self.stream, "script" , "Drop %s mv indexes" %(thisEntity.name), None, None, None, json.dumps(args), False)            
                
                args = {}
                filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql", "mv"), "%s_disable.sql" % (thisEntity.name))                        
                args["filename"] = filename
                self.queue.queueTask(groupId, self.stream,  "script" , "Disable %s mv" %(thisEntity.name), None, None, None,json.dumps(args), False)            
        
            # ADD CHECKPOINT
            if enableMv:            
                self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)
                
            
            # ADD CTREE INDEX DROPS AND DISABLE
            for thisRecord in settings.specification.records:
                if thisRecord.useful:
                    if thisRecord.hasCtree():
                        enableCtree = True
                        self._queueCtreeDisable(settings, groupId, thisRecord.table)
            for thisEntity in settings.specification.entities:    
                if thisEntity.hasCtree():
                    enableCtree = True
                    self._queueCtreeDisable(settings, groupId, thisEntity.name)
            
            if enableCtree:            
                self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)                    
        
        # ADD SENT TO IMPORT
        
        for record in settings.specification.records:
            if record.useful:
                args = {}
                args["specification"] = self.specificationName         
                args["importMode"] = self.importMode
                args["fileIntent"] = fileIntent
                args["strategy"] = "speed"
                args["table"] = record.table
                args["hasData"]=hasData
                self.queue.queueTask(groupId,  self.stream,  "sendtoimport" , "Send '{0}' to import".format(record.table), None, None, None, json.dumps(args), False)
                self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)
                self.queue.queueAVacuum(settings.args.vacuumstrategy, groupId, self.stream, "import", record.table)
                
        # If we're in sync mode then we may need to delete some things
        if self.importMode=="sync":
            for record in settings.specification.records:
                if record.useful:
                    args = {}
                    args["specification"] = self.specificationName         
                    args["importMode"] = self.importMode
                    args["fileIntent"] = fileIntent
                    args["minTaskId"] = minTaskId
                    args["maxTaskId"] = maxTaskId
                    args["table"] = record.table
                    args["hasData"]=hasData
                    self.queue.queueTask(groupId,  self.stream,  "importsyncdeletes" , "Process '{0}' sync deletes".format(record.table), None, None, None, json.dumps(args), False)
                    self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)
                    self.queue.queueAVacuum(settings.args.vacuumstrategy, groupId, self.stream, "import", record.table)                
        
        committedForIndexes=False
        if not hasData:
            for thisRecord in settings.specification.records:
                if thisRecord.useful:
                    
                    if not committedForIndexes:
                        committedForIndexes = True
                        self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)

                    # ADD INDEXES
                    args = {}
                    filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql","indexes"), "create_import_%s_indexes.sql" % (thisRecord.table))                        
                    args["filename"] = filename            
                    self.queue.queueTask(groupId,  self.stream, "script" , "Create import.%s indexes" %(thisRecord.table), None, None, None, json.dumps(args), False)            
                    self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)
                        
    # ================================
    
        args = None
    
        atLeastOneEditable = False
        for quickCheck in settings.specification.records:
            if quickCheck.editable:
                atLeastOneEditable = True
            
    #=================
        if atLeastOneEditable:
            sql = "select editable.%s_exists()" %(self.specificationName)
            self.supportCursor.execute(sql)        
            hasData = self.supportCursor.fetchone()[0]
        
            if not hasData:
                for thisRecord in settings.specification.records:
                    if thisRecord.useful:
                        args = {}
                        filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications", self.specificationName, "resources", "sql","indexes"), "drop_editable_%s_indexes.sql" % (thisRecord.table))                                            
                        args["filename"] = filename            
                        self.queue.queueTask(groupId,  self.stream, "script" , "Drop editable.%s indexes" %(thisRecord.table), None, None, None, json.dumps(args), False)            
            
            self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)

            firstEditable=True
            for record in settings.specification.records:
                if record.useful:
                    if firstEditable:
                        firstEditable = False
                        args = {}
                        self.queue.queueTask(groupId,  self.stream, "recordtimestamp" , "Record current timestamp", None, None, None, json.dumps(args), False)

                    args = {}
                    args["specification"] = self.specificationName
                    args["table"] = record.table
                    args["hasData"]=hasData                 
                    self.queue.queueTask(groupId,  self.stream, "sendtoeditable" , "Make '{0}' editable".format(record.table), None, None, None, json.dumps(args), False)
                    self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)
                    self.queue.queueAVacuum(settings.args.vacuumstrategy, groupId, self.stream, "editable", record.table)

            args = {}
            args["specification"] = self.specificationName
            self.queue.queueTask(groupId,  self.stream, "finisheditable" , "Finish send to editable process", None, None, None, json.dumps(args), False)

                           
            if not hasData:          
                
                for thisRecord in settings.specification.records:
                    if thisRecord.useful:
                        args = {}
                        filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql","indexes"), "create_editable_%s_indexes.sql" % (thisRecord.table))                    
                        args["filename"] = filename            
                        self.queue.queueTask(groupId,  self.stream, "script" , "Create editable.%s indexes" %(thisRecord.table), None, None, None, json.dumps(args), False)            
                        self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)  
    
    #======================
        if enableCtree:
            for thisRecord in settings.specification.records:
                if thisRecord.useful:
                    if thisRecord.hasCtree():
                        self._queueCtreeEnable(settings, groupId, thisRecord.table)
            for thisEntity in settings.specification.entities:    
                if thisEntity.hasCtree():
                    self._queueCtreeEnable(settings, groupId, thisEntity.name)
                     
            self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)                    

    #======================
        for thisRecord in settings.specification.records:
            if thisRecord.useful:
                if thisRecord.hasCtree():
                    if thisRecord.editable:
                        schemaRestriction="editable"
                    else:
                        schemaRestriction="import"
                    queueCtree.queueTasks(self, settings, schemaRestriction, self.stream, "'{0}'".format(self.specificationName), groupId, settings.appLogger)
                    
    #======================
        
        if enableMv:
            self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)  
            for thisEntity in settings.specification.entities:
                args = {}            
                filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql","mv"), "%s_enable_and_recreate.sql" % (thisEntity.name))                                            
                args["filename"] = filename            
                self.queue.queueTask(groupId,  self.stream, "script" , "Enable %s mv" %(thisEntity.name), None, None, None, json.dumps(args), False)                
                self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)
                
                args = {}
                filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql","indexes"), "create_mv_%s_indexes.sql" % (thisEntity.name))                                            
                args["filename"] = filename
                self.queue.queueTask(groupId,  self.stream, "script" , "Create %s indexes" %(thisEntity.name), None, None, None, json.dumps(args), False)                
                self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)

            
#        if enableCtree:    
#            for thisRecord in settings.specification.records:
#                if thisRecord.useful:
#                    if thisRecord.ancestorColumn is not None or thisRecord.descendantColumn is not None:
#                        enableCtree = True
#                        args = {}
#                        filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specification files",self.specificationName,"sql","ctree"), "%s_enable_and_recreate.sql" % (thisRecord.table))                                                                
#                        args["filename"] = filename
#                        self.queue.queueTask(groupId,  self.stream, "script" , "Build %s closure tree" %(thisRecord.table), None, None, None, json.dumps(args), False)            
#    
#            self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)
    
# OLD SEARCH WENT HERE                
#        if settings.args.chainsearch:
#            sql = "select domain_name,source_type,source_schema,source_name,specification_name,last_synchronized,config_location from search.active_sources where specification_name=%s"
#            self.supportCursor.execute(sql, (self.specificationName,))
#            sources = self.supportCursor.fetchall()            
#            
#            domains=[]
#            for thisSource in sources:
#                if thisSource[0] not in domains:
#                    domains.append(thisSource[0])
#    
#    
#            domainsToRebuild=[]                
#            for thisDomain in domains:
#                sql  = "select search.is_there_any_%s_data()" %(thisDomain)
#                self.supportCursor.execute(sql, (self.specificationName,))
#                hasData = self.supportCursor.fetchone()[0]
#                if not hasData:
#                    domainsToRebuild.append(thisDomain)
#                    args = {}
#                    filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("search domain files",thisDomain,"sql","indexes"), "drop_search_%s_indexes.sql" % (thisDomain))                                                                
#                    args["filename"] = filename            
#                    self.queue.queueTask(groupId,  self.stream, "script" , "Drop search.%s indexes" %(thisDomain), None, None, None, json.dumps(args), False)            
#            for thisSource in sources:
#                args = {}
#                args["domainName"] = thisSource[0]        
#                args["sourceType"] = thisSource[1]
#                args["sourceSchema"] = thisSource[2]
#                args["sourceName"] = thisSource[3]
#                args["specification"] = thisSource[4]
#                args["lastSynchronized"] = thisSource[5]
#                args["configLocation"] = thisSource[6]            
#                args["recordLimit"] = None
#                self.queue.queueTask(groupId,  self.stream, "syncSearchSource" , "Refresh %s (%s)" %(thisSource[0], thisSource[3]), None, None, None, json.dumps(args), False)
#            
#            for thisDomain in domainsToRebuild:
#                args = {}
#                filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("search domain files",thisDomain,"sql","indexes"), "create_search_%s_indexes.sql" % (thisDomain))                                                                
#                args["filename"] = filename            
#                self.queue.queueTask(groupId,  self.stream, "script" , "Create search.%s indexes" %(thisDomain), None, None, None, json.dumps(args), False)            
#        
#            self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)

                        
        # OLD PINHEAD WENT HERE
        
        # =======================================================================
        # Queue calculated data tasks for this specification
        
        
        
#        for record in settings.specification.records:
#            if record.useful:        
#                record.computedData.addTasks(settings, self, groupId, self.stream)
#        for entity in settings.specification.entities:
#            entity.computedData.addTasks(settings, self, groupId, self.stream)

            
        self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)
        self.queue.queueAVacuum(settings.args.vacuumstrategy, groupId, self.stream, None, None)          
        self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour)
        self.supportCursor.connection.commit()
示例#12
0
文件: Queuer.py 项目: wmfs/chimp
 def _queueCtreeEnable(self, settings, groupId, sourceName):        
     args = {}
     filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql","ctree"), "%s_enable_and_recreate.sql" % (sourceName))                                                                
     args["filename"] = filename
     self.queue.queueTask(groupId,  self.stream, "script" , "Build %s closure tree" %(sourceName), None, None, None, json.dumps(args), False)