Exemplo n.º 1
0
    def getDBStructure(self, tableName, tableSchema):
        tableName = self.setTable(tableName=tableName)

        ret = OrderedDict()
        try:
            collection = self.isExists(tableName=tableName,
                                       tableSchema=tableSchema)
            if collection:

                cntRows = self.cntRows()
                ## there are rows - will use current strucutre
                if cntRows > 0:
                    schemaObj = self.cursor[tableName].find_one()
                    if schemaObj and len(schemaObj) > 0:
                        for col in schemaObj:
                            colName = uniocdeStr(col)
                            colType = type(col)
                            ret[colName] = {
                                eJson.jSttValues.TYPE: colType,
                                eJson.jSttValues.ALIACE: None
                            }
                else:
                    collectionInfo = self.cursor.command({
                        'listCollections': 1,
                        'filter': {
                            'name': collection
                        }
                    })
                    #collectionInfo = self.cursor.get_collection_infos( filter=[collectionsL[tableName.lower()]] )

                    if 'cursor' in collectionInfo:
                        cursorObj = collectionInfo['cursor']

                        if 'firstBatch' in cursorObj:
                            firstBatch = cursorObj['firstBatch']
                            for batch in firstBatch:
                                if 'options' in batch:
                                    validator = batch['options']['validator']
                                    collectionProperties = validator[
                                        '$jsonSchema']['properties']

                                    for col in collectionProperties:
                                        colType = collectionProperties[col][
                                            'bsonType']
                                        ret[uniocdeStr(col)] = {
                                            eJson.jSttValues.TYPE: colType,
                                            eJson.jSttValues.ALIACE: None
                                        }

        except Exception as e:
            p("MONGODB-> %s ERROR:\n %s " % (tableName, str(e)), "e")

        return ret
Exemplo n.º 2
0
    def __updateSTTBySourceOrTarget(self, srcStructure, pre="[", pos="]"):
        # Check if ther are sourcea in STT that not defined
        srcStrucureL = []
        srcColumns = {}

        for col in srcStructure:
            srcStrucureL.append(col.replace(pre, "").replace(pos, "").lower())
            if eJson.stt.SOURCE in srcStructure[col] and srcStructure[col][
                    eJson.stt.SOURCE]:
                srcName = srcStructure[col][eJson.stt.SOURCE].replace(
                    pre, "").replace(pos, "")
                srcStrucureL.append(uniocdeStr(srcName))
                srcColumns[srcName] = None

        removeColumnsSrc = []
        if self.stt:
            for col in self.stt:
                if eJson.stt.SOURCE in self.stt[col] and self.stt[col][
                        eJson.stt.SOURCE] not in srcColumns:
                    if self.stt[col][eJson.stt.SOURCE].replace(
                            pre, "").replace(pos,
                                             "").lower() not in srcStrucureL:
                        removeColumnsSrc.append(col)

            for col in removeColumnsSrc:
                p(
                    "STT TAREGT %s HAVE INVALID SOURCE %s --> ignore COLUMN " %
                    (col, self.stt[col][eJson.stt.SOURCE]), "w")
                del self.stt[col]
Exemplo n.º 3
0
 def __execEachLine(connObj, sqlTxt):
     sqlQuery = __split_sql_expressions(sqlTxt)
     isParam = True if len(locParams) > 0 else False
     for sql in sqlQuery:
         sql = re.sub(r"\s+", " ", sql)
         if isParam:
             sql = connObj.setQueryWithParams(query=sql,
                                              queryParams=locParams)
         if 'PRINT' in sql:
             disp = sql.split("'")[1]
             p('SQL PRINT: ' + disp, "i")
         if len(sql) > 1:
             sql = str(
                 sql) if connObj.isExtractSqlIsOnlySTR else uniocdeStr(sql)
             connObj.exeSQL(sql=sql)
             p(u"FINISH EXEC: %s" % uniocdeStr(sql), "i")
Exemplo n.º 4
0
def OLAP_Process(serverName, dbName, cubes=[], dims=[], fullProcess=True):
    import sys, os
    localPath = os.path.abspath(os.path.dirname(__file__))
    sys.path.append(os.path.join(localPath, r'../dll/clrmodule.dll"'))
    import clr
    clr.AddReference(
        os.path.join(localPath, r'../dll/Microsoft.AnalysisServices.DLL'))

    from Microsoft.AnalysisServices import Server
    from Microsoft.AnalysisServices import ProcessType

    processType = ProcessType.ProcessFull if fullProcess else 0
    # Connect to server
    amoServer = Server()
    amoServer.Connect(serverName)

    # Connect to database
    amoDb = None
    for d in amoServer.Databases:
        if str(d).lower() == dbName.lower():
            amoDb = amoServer.Databases.GetByName(str(d))
            break

    if not amoDb:
        p("OLAP: CANNOT FIND %s DB IN %s SERVER" % (dbName, serverName), "e")
        return

    for dim in amoDb.Dimensions:
        if len(dims) == 0 or dim in dims:
            try:
                dim.Process(processType)
                p(
                    u"OLAP DB: %s, process DIM %s finish succeffully ... " %
                    (uniocdeStr(dbName, decode=True),
                     uniocdeStr(dim, decode=True)), "i")
            except Exception as e:
                p(
                    u"OLAP DB: %s, ERROR processing DIM %s ... " % (uniocdeStr(
                        dbName, decode=True), uniocdeStr(dim, decode=True)),
                    "e")
                p(e, "e")

    for cube in amoDb.Cubes:
        if len(cubes) == 0 or cube in cubes:
            try:
                cube.Process(processType)
                p(
                    u"OLAP DB: %s, CUBE %s finish succeffully ... " %
                    (uniocdeStr(dbName, decode=True),
                     uniocdeStr(cube, decode=True)), "i")
            except Exception as e:
                p(
                    u"OLAP DB: %s, ERROR processing CUBE %s ... " %
                    (uniocdeStr(dbName, decode=True),
                     uniocdeStr(cube, decode=True)), "e")
                p(e, "e")
Exemplo n.º 5
0
    def dataTransform(self, data, functionDict=None, execDict=None):
        if isinstance(data, tuple):
            data = list(data)

        regex = r"(\{.*?\})"
        if (functionDict and len(functionDict) > 0) or (execDict
                                                        and len(execDict) > 0):
            for num, dataRow in enumerate(data):
                row = list(dataRow)
                for ind in functionDict:
                    newVal = row[ind]
                    for fn in functionDict[ind]:
                        newVal = fn.handler(newVal, ind)
                    row[ind] = newVal

                for ind in execDict:
                    newVal = execDict[ind]
                    matches = re.finditer(regex, execDict[ind],
                                          re.MULTILINE | re.DOTALL)
                    for matchNum, match in enumerate(matches):
                        for groupNum in range(0, len(match.groups())):
                            colNum = match.group(1).replace('{', '').replace(
                                '}', '')
                            colVal = row[int(colNum)]
                            colVal = uniocdeStr(colVal,
                                                decode=True) if colVal else ''
                            newVal = replaceStr(sString=str(newVal),
                                                findStr=match.group(1),
                                                repStr=colVal,
                                                ignoreCase=False,
                                                addQuotes=None)
                    row[ind] = newVal
                data[num] = row

        ## ceOBDC - convert data to None
        if self.connType == eConn.types.SQLSERVER:
            for num, row in enumerate(data):
                data[num] = [i if i != '' else None for i in row]
        return data
Exemplo n.º 6
0
def __split_sql_expressions(text):
    if (isinstance(text, basestring)):
        text = re.sub(r"\/\*.*\*\/|--.*?\n", "", text,
                      re.MULTILINE | re.UNICODE | re.DOTALL)
        #text = re.sub(r"\s+", " ", text)
        return [text]

    results = []
    text = uniocdeStr(sObj=text.read(), decode=False)
    text = re.sub(
        re.compile("/\*.*?\*/", re.MULTILINE | re.UNICODE | re.DOTALL), "",
        text)
    text = re.sub(re.compile("--.*?\n"), "", text,
                  re.MULTILINE | re.UNICODE | re.DOTALL)  # remov

    lines = text.split("GO\n")
    for l in lines:
        il = l.split(";\n")
        for ll in il:
            results.append(ll)

    results = results if len(results) > 0 else None
    return results
Exemplo n.º 7
0
    def mappingLoadingSourceToTarget(self, srcDictStructure, src, tar):

        if not srcDictStructure:
            return None

        tarToSrc = OrderedDict()

        srcPre, srcPos, tarPre, tarPos = '', '', '', ''
        if hasattr(src, 'columnFrame'):
            srcPre, srcPos = src.columnFrame[0], src.columnFrame[1]

        if hasattr(tar, 'columnFrame'):
            tarPre, tarPos = tar.columnFrame[0], tar.columnFrame[1]

        if src.isSingleObject:
            srcDictStructure = {'': srcDictStructure}

        # remove from STT column that not exists in Target OBJECT OR Source OBJECT
        for src in srcDictStructure:
            srcStructure = srcDictStructure[src]

            if src and len(src) > 0:
                tarStructure = tar.getStructure(objects=src)
            else:
                tarStructure = tar.getStructure()

            self.__updateSTTBySourceOrTarget(srcStructure=srcStructure,
                                             pre=srcPre,
                                             pos=srcPos)
            srcColumns = OrderedDict()
            if tarStructure and len(tarStructure) > 0:
                tarColumns = OrderedDict({
                    x.replace(tarPre, "").replace(tarPos, "").lower(): x
                    for x in tarStructure
                })
            else:
                tarColumns = []

            sttColumns = OrderedDict()
            if self.stt:
                for x in self.stt:
                    sttColumns[x.replace(tarPre, "").replace(tarPos,
                                                             "").lower()] = x
            tarToSrc[src] = OrderedDict()

            ## {srcName in Target: Source column }
            for col in srcStructure:
                colAlias = col.replace(srcPre, "").replace(srcPos, "").lower()
                colName = srcStructure[col][
                    eJson.stt.
                    SOURCE] if eJson.stt.SOURCE in srcStructure[col] else col
                srcColumns[colAlias] = colName

            # There is no target schema --> using all source and STT
            if self.addSourceColumn:
                for col in srcColumns:
                    tarToSrc[src][col] = {eJson.stt.SOURCE: srcColumns[col]}

            else:
                for col in tarColumns:
                    if col in srcColumns:
                        tarToSrc[src][tarColumns[col]] = {
                            eJson.stt.SOURCE: srcColumns[col]
                        }

            tarToSrcColumns = {
                x.replace(tarPre, "").replace(tarPos, "").lower(): x
                for x in tarToSrc[src]
            }

            for col in sttColumns:
                if col in tarToSrcColumns:
                    tarToSrc[src][tarToSrcColumns[col]].update(
                        self.stt[sttColumns[col]])
                else:
                    tarToSrc[src][sttColumns[col]] = self.stt[sttColumns[col]]

            #### Check Column in Source and not exists in mapping
            existsTarColumns = {}
            existsSrcColumns = {}

            for col in tarToSrc[src]:
                colL = col.replace(tarPre, "").replace(tarPos, "").lower()
                existsTarColumns[colL] = col
                if eJson.stt.SOURCE in tarToSrc[src][col] and tarToSrc[src][
                        col][eJson.stt.SOURCE]:
                    srcL = tarToSrc[src][col][eJson.stt.SOURCE].replace(
                        srcPre, "").replace(srcPos, "").lower()
                    existsSrcColumns[srcL] = tarToSrc[src][col][
                        eJson.stt.SOURCE]

            columnNotMapped = u""
            for col in tarColumns:
                if col not in existsTarColumns:
                    columnNotMapped += uniocdeStr(tarColumns[col]) + u" ; "

            if len(columnNotMapped) > 0:
                p(u"TARGET COLUMN NOT MAPPED: %s" % (columnNotMapped), "w")

            columnNotMapped = u""
            for col in srcColumns:
                if srcColumns[col].replace(srcPre, "").replace(
                        srcPos, "").lower() not in existsSrcColumns:
                    columnNotMapped += uniocdeStr(srcColumns[col]) + u" ; "
            if len(columnNotMapped) > 0:
                p(u"SOURCE COLUMN NOT MAPPED: %s" % (columnNotMapped), "w")

        return tarToSrc
Exemplo n.º 8
0
    def extract(self, tar, tarToSrcDict, batchRows=None):
        batchRows = batchRows if batchRows else self.batchSize
        startFromRow = 0 if not self.header else self.header
        fileStructureDict = self.getStructure()

        if self.isSingleObject:
            if len(self.objNames) > 0:
                fName = list(self.objNames.keys())[0]
                fileStructureDict = {fName: fileStructureDict}
                tarToSrcDict[fName] = tarToSrcDict['']
                del tarToSrcDict['']
            else:
                p("UNABLE TO EXTRACT FILE !!!")

        for fileName in fileStructureDict:
            fileStructure = fileStructureDict[fileName]
            fileFullPath = self.objNames[fileName][eObj.FILE_FULL_PATH]
            fileStructureL = OrderedDict()
            listOfColumnsH = {}
            targetColumnList = []
            fnOnRowsDic = {}
            execOnRowsDic = {}
            listOfColumnsL = []

            for i, col in enumerate(fileStructure):
                fileStructureL[col.lower()] = i
                listOfColumnsH[i] = col

            ## File with header and there is target to source mapping
            if tarToSrcDict and fileName in tarToSrcDict:
                tarToSrc = tarToSrcDict[fileName]
                mappingSourceColumnNotExists = u""
                fileSourceColumnNotExists = u""

                for i, col in enumerate(tarToSrc):
                    targetColumnList.append(col)
                    if eJson.stt.SOURCE in tarToSrc[col] and tarToSrc[col][
                            eJson.stt.SOURCE]:
                        srcColumnName = tarToSrc[col][eJson.stt.SOURCE]
                        if srcColumnName.lower() in fileStructureL:
                            listOfColumnsL.append(
                                fileStructureL[srcColumnName.lower()])
                        else:
                            mappingSourceColumnNotExists += uniocdeStr(
                                srcColumnName) + u" ; "
                    else:
                        listOfColumnsL.append(-1)

                    ### ADD FUNCTION
                    if eJson.stt.FUNCTION in tarToSrc[col] and tarToSrc[col][
                            eJson.stt.FUNCTION]:
                        fnc = eval(tarToSrc[col][eJson.stt.FUNCTION])
                        fnOnRowsDic[i] = fnc if isinstance(fnc,
                                                           (list,
                                                            tuple)) else [fnc]

                    ### ADD EXECUTION FUNCTIONS
                    if eJson.stt.EXECFUNC in tarToSrc[col] and len(
                            tarToSrc[col][eJson.stt.EXECFUNC]) > 0:
                        newExcecFunction = tarToSrc[col][eJson.stt.EXECFUNC]
                        regex = r"(\{.*?\})"
                        matches = re.finditer(
                            regex, tarToSrc[col][eJson.stt.EXECFUNC],
                            re.MULTILINE | re.DOTALL)
                        for matchNum, match in enumerate(matches):
                            for groupNum in range(0, len(match.groups())):
                                colName = match.group(1)
                                if colName and len(colName) > 0:
                                    colName = colName.replace("{", "").replace(
                                        "}", "")
                                    if colName.lower() in fileStructureL:
                                        newExcecFunction = newExcecFunction.replace(
                                            colName,
                                            str(fileStructureL[
                                                colName.lower()]))
                        execOnRowsDic[i] = newExcecFunction

                for colNum in listOfColumnsH:
                    if colNum not in listOfColumnsL:
                        fileSourceColumnNotExists += uniocdeStr(
                            listOfColumnsH[colNum]) + u" ; "

                if len(mappingSourceColumnNotExists) > 0:
                    p(
                        "SOURCE COLUMN EXISTS IN SOURCE TO TARGET MAPPING AND NOT FOUND IN SOURCE FILE: %s"
                        % (mappingSourceColumnNotExists), "w")

                if len(fileSourceColumnNotExists) > 0:
                    p(
                        "FILE COLUMN NOT FOUD IN MAPPING: %s" %
                        (fileSourceColumnNotExists), "w")
            ## There is no target to source mapping, load file as is
            else:
                for colNum in listOfColumnsH:
                    listOfColumnsL.append(colNum)
            """ EXECUTING LOADING SOURCE FILE DATA """
            rows = []
            try:
                with codecs.open(fileFullPath, 'r', encoding=self.encode
                                 ) as textFile:  # errors=self.withCharErr
                    if self.isCsv:
                        fFile = csv.reader(textFile, delimiter=self.delimiter)
                        for i, split_line in enumerate(fFile):
                            if i >= startFromRow:
                                if self.replaceToNone:
                                    rows.append([
                                        re.sub(
                                            self.replaceToNone, "",
                                            split_line[x], re.IGNORECASE
                                            | re.MULTILINE
                                            | re.UNICODE) if x > -1
                                        and len(split_line[x]) > 0 else None
                                        for x in listOfColumnsL
                                    ])
                                else:
                                    rows.append([
                                        split_line[x] if x > -1
                                        and len(split_line[x]) > 0 else None
                                        for x in listOfColumnsL
                                    ])

                            if self.maxLinesParse and i > startFromRow and i % self.maxLinesParse == 0:
                                rows = self.dataTransform(
                                    data=rows,
                                    functionDict=fnOnRowsDic,
                                    execDict=execOnRowsDic)
                                tar.load(rows=rows,
                                         targetColumn=targetColumnList,
                                         objectName=fileName)
                                rows = list([])
                    else:
                        for i, line in enumerate(textFile):
                            line = re.sub(
                                self.replaceToNone, "", line, re.IGNORECASE
                                | re.MULTILINE
                                | re.UNICODE) if self.replaceToNone else line
                            line = line.strip(self.endOfLine)
                            split_line = line.split(self.delimiter)
                            # Add headers structure
                            if i >= startFromRow:
                                rows.append([
                                    split_line[x] if x > -1
                                    and len(split_line[x]) > 0 else None
                                    for x in listOfColumnsL
                                ])

                            if self.maxLinesParse and i > startFromRow and i % self.maxLinesParse == 0:
                                rows = self.dataTransform(
                                    data=rows,
                                    functionDict=fnOnRowsDic,
                                    execDict=execOnRowsDic)
                                tar.load(rows=rows,
                                         targetColumn=targetColumnList,
                                         objectName=fileName)
                                rows = list([])

                    if len(rows) > 0:  #and split_line:
                        rows = self.dataTransform(data=rows,
                                                  functionDict=fnOnRowsDic,
                                                  execDict=execOnRowsDic)

                        tar.load(rows=rows,
                                 targetColumn=targetColumnList,
                                 objectName=fileName)
                        rows = list([])

            except Exception as e:
                p("ERROR LOADING FILE %s  >>>>>>" % (fileFullPath), "e")
                p(str(e), "e")