def fill_listIndexedBySchemaType(linesOfTouchedScripts): global g_internalSepator, g_listIndexedBySchemaType, g_schemataFound # _dbx( "foo" ); return _dbx(": %d" % (len(linesOfTouchedScripts))) schemaScripts = {} for line in linesOfTouchedScripts: pathNodes = line.split("/") _dbx(" nodes: %d" % (len(pathNodes))) schema = pathNodes[0] if len( pathNodes ) <= 2: # skip scripts which are on top level, e.g. BASIC_DATA/master.sql continue if schema not in schemaScripts.keys(): _dbx(line) _dbx(schema) schemaScripts[schema] = [] relPath = "/".join(pathNodes[1:]).rstrip("\n") schemaScripts[schema].append(relPath) _infoTs("Found touched scripts for schemata:\n %s" % (",".join(schemaScripts.keys()))) g_listIndexedBySchemaType = {} g_schemataFound = schemaScripts.keys() for schema in g_schemataFound: scriptList = schemaScripts[schema] for script in scriptList: fileExt = os.path.splitext(script)[1] if fileExt.upper() not in g_excludeTouchWithExtensions: scriptType = "UnknownScriptType" # _dbx( " ext: %s" % ( fileExt) ) # extract subfolder name pathNodes = script.split("/") if len(pathNodes) > 1: # pattern object_type / script_file subFolder = pathNodes[0] else: subFolder = None scriptType = "%s%s" % (subFolder if subFolder != None else '', fileExt.upper()) schemaType = schema + g_internalSepator + scriptType # _dbx("dbx script %s --> schemaType %s" % (script, schemaType) ) if not schemaType in g_listIndexedBySchemaType.keys(): g_listIndexedBySchemaType[schemaType] = [] _dbx(script) script = script.replace('/', '\\') script = "@@" + script _dbx(script) g_listIndexedBySchemaType[schemaType].append(script)
def persistAndPrintName(textName, textContent, baseNamePrefix): outPath = tempfile.mktemp() if baseNamePrefix != None: tempDirName = os.path.dirname(outPath) tempBaseName = os.path.basename(outPath) outPath = os.path.join(tempDirName, baseNamePrefix + tempBaseName) _infoTs("Text named '%s' will be written to %s" % (textName, mixedDosPathToUnix(outPath)), withTs=True) fh = open(outPath, "w") fh.write("\n".join(textContent)) return outPath
def action_dbs(envCsv, objCsv): """ Extract DDL script for objects given by cmdArgs When we compare DDLs from 2 databases, the following additional task is performed: 1. compute the diff grade of the original DDLs 2. If the diff grade is zero or we generate the HTML diff report using the original DDLs 3. If the diff grade is high, we generate the HTML diff report using the formatted DDLs """ objectList = getObjectList(objCsv) # _errorExit( "test exit %s" % ( len( objectList ) ) ) envList = envCsv.split(",") if len(envList) > 2: raise ValueError( "diff report cannot be created for more than 2 databases. Consider action extract!" ) # regardless we if need to process 1 or 2 databases, we need to extract the scripts to the target location first action_extractScripts(objCsv=objCsv, envCsv=envCsv) for ix, env in enumerate(envList): if ix == 0: dbOneOriginPaths, dbOneFormattedPaths = CopyFilesForObjectListForEnv( envCode=env, objectList=objectList, staleMinutesOk=60) _dbx("dbOneOriginPaths len: %s" % (len(dbOneOriginPaths))) elif ix == 1: dbTwoOriginPaths, dbTwoFormattedPaths = CopyFilesForObjectListForEnv( envCode=env, objectList=objectList, staleMinutesOk=60) concatDiffReport = "\n" if len(envList) == 2: # _errorExit( "getHtmlDiffOutput method coded but not yet used! " ) for i in range(len(dbOneOriginPaths)): fileAOrigin = dbOneOriginPaths[i] fileBOrigin = dbTwoOriginPaths[i] lnCntA, lnCntB, newCnt, delOrChgCnt, diffGrade = getDiffStatsFromFiles( fileA=fileAOrigin, fileB=fileBOrigin) if diffGrade == 0 or diffGrade == 1: concatDiffReport += getHtmlDiffOutput(fileA=fileAOrigin, fileB=fileBOrigin) else: fileAFormatted = dbOneFormattedPaths[i] fileBFormatted = dbTwoFormattedPaths[i] concatDiffReport += getHtmlDiffOutput(fileA=fileAFormatted, fileB=fileBFormatted) _dbx(len(concatDiffReport)) diffRepFile = tempfile.mkstemp(suffix="-accu-diffs.html")[1] open(diffRepFile, "w").write(concatDiffReport) _infoTs("Diff report generated as %s " % (diffRepFile))
def findFilesToExcludeFromInstall(blacklistPath): """ Example content of blacklist file: SYS/Tables/test_only.sql SYSTEM/Packages/test_pkg.sql """ global g_filesToExcludeFromInstall if os.path.exists(blacklistPath): lines = open(blacklistPath, "r").readlines() for line in lines: g_filesToExcludeFromInstall.append(line.strip()) else: _infoTs("Ignoring file %s since it does not seem to exist" % (blacklistPath)) _infoTs("%d touched files will be ignored due to %s" % (len(g_filesToExcludeFromInstall), blacklistPath))
def uglyFormat(inputFilePath): """ Read in lines of the input SQL file, format it with the simple/ugly formatter, * does some QA * dump the format result into a tempfile * return the temppath """ inputLines = open(inputFilePath, "r").readlines() _dbx("read %d lines from %s" % (len(inputLines), inputFilePath)) tree = fsm.plsqlTokenize(inputLines) formattedLines = tree.simpleFormatSemicolonAware() if True or "want to" == "QA": textWordCounter_a = charCounter.WordCounter(name="sql input", lines=inputLines, shortCode="sqlInput") textWordCounter_a.scan() wordCountResultLines_a = textWordCounter_a.report(printToStdout=False) forWordCountCheck_a = tempfile.mktemp() _dbx("forWordCountCheck_a: %s" % (forWordCountCheck_a)) open(forWordCountCheck_a, "w").write("\n".join(wordCountResultLines_a)) textWordCounter_b = charCounter.WordCounter(name="formatted result", lines=formattedLines, shortCode="sqlFormatted") textWordCounter_b.scan() wordCountResultLines_b = textWordCounter_b.report(printToStdout=False) forWordCountCheck_b = tempfile.mktemp() _dbx("forWordCountCheck_b: %s" % (forWordCountCheck_b)) open(forWordCountCheck_b, "w").write("\n".join(wordCountResultLines_b)) if "want see result of wordCount diff " == "which is barely usseful": _infoTs(" ************ DIFFing WordCounts ... ") diffWordCountResult = genUnixDiff(forWordCountCheck_a, forWordCountCheck_b) diffLinesToShow = 10 _infoTs( " ************ result of DIFFing WORD Counts, first %d lines only " % diffLinesToShow) print("\n".join( diffWordCountResult.split("\n")[0:diffLinesToShow])) inputFileBaseName = os.path.basename(inputFilePath) outPath = persistAndPrintName(textName="formatted %s" % inputFilePath, textContent=formattedLines, baseNamePrefix=inputFileBaseName + '-') return outPath
def getSqlRunner( oraUser, password, host, port, service ): """ set up an Oracle session and return a cursor with which queries can be executed. Result of query can be fetched using fetchone or fetchall. Why exactly we need a cursor instead of using the connection handle directly, remains to be clarified. """ myDsn = cx_Oracle.makedsn(host, port, service_name= service) # if needed, place an 'r' before any parameter in order to address special characters such as '\'. conx= cx_Oracle.connect( user= oraUser, password= password, dsn= myDsn ) conx.outputtypehandler = conxOutputTypeHandler cur = conx.cursor() # instantiate a handle cur.execute ("""select username, sys_context( 'userenv', 'db_name' ) from user_users""") connectedAs, dbName = cur.fetchone() _infoTs( "connected as %s to %s" % ( connectedAs, dbName ) ) return cur
def CopyFilesForObjectListForEnv(envCode, objectList, staleMinutesOk=20): dbName = g_mapDbNameOfEnvCode[envCode] _dbx("db %s" % (dbName)) now = time.time() # returns seconds since epoch originFilePathsInDiffArea = [] formattedFilePaths = [] for obj in objectList: orginScriptPath = getDdlScriptPath(object=obj, dbName=dbName) fileModTime = os.path.getmtime(orginScriptPath) _dbx("now: %s mtime: %s" % (now, fileModTime)) elaMinutues = (now - fileModTime) / 60 _dbx("elaMinutues %s" % elaMinutues) if elaMinutues > staleMinutesOk: raise ValueError("file %s is %s minutes old!" % (orginScriptPath, elaMinutues)) if not os.path.exists(orginScriptPath): _infoTs("File %s does not seem to exist!" % (orginScriptPath)) else: prefix, fileExt = os.path.splitext( os.path.basename(orginScriptPath)) # lets also copy the original but leave a copy for users convenience newBaseName = prefix + '-orgF' + fileExt newPathOfOriginFile = os.path.join(g_diffLocation, newBaseName) shutil.copy(orginScriptPath, newPathOfOriginFile) _dbx("newPathOfOriginFile %s" % (newPathOfOriginFile)) originFilePathsInDiffArea.append(newPathOfOriginFile) # create formatted copy and MOVE it to diff area formattedOutPath = uglyFormat(inputFilePath=orginScriptPath) newBaseName = prefix + '-ugly' + fileExt newPathOfFormattedFile = os.path.join(g_diffLocation, newBaseName) shutil.move(formattedOutPath, newPathOfFormattedFile) _infoTs("Formatted file to be found as %s " % (newPathOfFormattedFile)) formattedFilePaths.append(newPathOfFormattedFile) # _errorExit( "originFilePathsInDiffArea len %s, formattedFilePaths len %s" % ( len( originFilePathsInDiffArea), len( formattedFilePaths) ) ) return originFilePathsInDiffArea, formattedFilePaths
def action_os(inputFilePathsCsv, branchName=g_defaultBranchName, inputPathsFromJsonFile=None): if inputFilePathsCsv and inputPathsFromJsonFile: _infoTs( "both inputFilePaths and inputPathsFromJsonFile have been provided. Will only consider inputFilePaths!" ) inputPathsFromJsonFile = None inputFilePaths = inputFilePathsCsv.split(",") if inputPathsFromJsonFile: inputFilePaths = action_devTest(jsonFile=inputPathsFromJsonFile) # assert all input files exist for inputFilePath in inputFilePaths: if not os.path.exists(inputFilePath): raise ValueError("File %s does not seem to exist!" % (inputFilePath)) # now we have asserted all input files ... for inputFilePath in inputFilePaths: prefix, fileExt = os.path.splitext(os.path.basename(inputFilePath)) newBaseName = prefix + '-' + branchName + "-orgF" + fileExt tgtPathOfOrgFile = os.path.join(g_diffLocation, newBaseName) shutil.copy(inputFilePath, tgtPathOfOrgFile) formattedOutPath = uglyFormat(inputFilePath=inputFilePath) newBaseName = prefix + '-' + branchName + "-ugly" + fileExt tgtPathOfFormattedFile = os.path.join(g_diffLocation, newBaseName) shutil.move(formattedOutPath, tgtPathOfFormattedFile) _infoTs("Formatted file %s moved to target" % (tgtPathOfFormattedFile)) if inputPathsFromJsonFile: _infoTs("Only input files specified in %s were considered " % (inputPathsFromJsonFile))
def action_extractScripts(objCsv, envCsv, executeScript=True, connData=None): """ extract scripts into the expected local directory """ envs = envCsv.split(",") connObjects = oraUtils.loadOraConnectionData() for envCode in envs: conn = oraUtils.getConnectionByNickname(nickname=envCode, nicknamedConns=connObjects) if conn == None: raise ValueError("env %s is not found in configuration!" % envCode) objectList = getObjectList(objCsv) _infoTs("fixme: make extraction of script optional!") sqlplusScriptPath = oraUtils.spoolScriptWithSqlplusTempClob( dbObjects=objectList, conn=conn, spoolDestRoot="C:\\temp\\", dirSep="\\", envCode=envCode) if executeScript: # dummyInput = input( "Hit ENTER to run SQPLUS script" ) _infoTs("Running sqlplus script %s..." % (sqlplusScriptPath), True) subprocess.call(f"sqlplus /nolog @{sqlplusScriptPath}") _infoTs("Executed sqlplus script.", True)
def action_createFileTree(files, targetLocation): """zip the given files: 1. if at least 1 file starts with root, find a common root of all. In worst case it is the root. For example /a/b/file1.txt and /a/foo/bar.py would have /a as common root 2. remove the common root from all The 2 files above become b/file1.txt foo/bar.py 3. put the files into the zip with the remaining relative paths """ if len(files) == 0: raise ValueError("list of files is empty") if files[0].startswith("/"): # if any file path starts with root, we strip off the common prefix commonRoot = os.path.commonprefix(files) _dbx("commonRoot %s" % commonRoot) pathsUsed = [ os.path.relpath(file, commonRoot).rstrip("\n") for file in files ] else: pathsUsed = [file.rstrip("\n") for file in files] _dbx("pathsUsed %s" % pathsUsed) zipArcPath = tempfile.mkstemp(suffix=".zip")[1] _dbx("zipArcPath type %s" % zipArcPath) with zipfile.ZipFile(zipArcPath, 'w') as zipWriter: for filePath in pathsUsed: if os.path.exists(filePath): zipWriter.write(filePath) else: _infoTs("File at path %s does NOT exist!" % filePath) # for more efficiency, unzip it to the target location with zipfile.ZipFile(zipArcPath, 'r') as zipReader: _infoTs("creating file tree in %s ... " % targetLocation) zipReader.extractall(path=targetLocation, members=None) # imples all members return zipArcPath
def spoolScriptWithSqlplusTempClob ( spoolDestRoot, dirSep, dbObjects, conn= None, envCode= None, clobTempTable = 'tt_extract_ddl_clob' ): """ Use sqlplus This method requires a global table accessible by the connecting user to write the source code extracted from DBA_SOURCE line by line as CLOB """ host, port, service, username = conn.host, conn.port, conn.service, conn.username spoolScriptHeader = """ WHENEVER SQLERROR EXIT WHENEVER OSERROR EXIT CONNECT {v_ez_connect} column db_name new_val db_name column spool_path_current new_val spool_path_current ALTER SESSION SET NLS_LANGUAGE=GERMAN ; set termout ON SELECT sys_context( 'userenv', 'db_name' ) AS db_name , user connect_as FROM dual ; set linesize 1000 longchunksize 9999999 long 9999999 pagesize 49999 """ scriptBlockFor1Object = """ WITH prep_ AS ( SELECT 'c:\\temp\&db_name\\' as base_folder , UPPER( '{lv_object_name}' ) || '-'||'&db_name' as obj_name_and_db_name , CASE upper('{lv_object_type}') WHEN 'PACKAGE_BODY' THEN '.pkb' WHEN 'PACKAGE_SPEC' THEN '.pks' WHEN 'TRIGGER' THEN '.trg' WHEN 'TYPE_BODY' THEN '.tpb' WHEN 'TYPE_SPEC' THEN '.tps' WHEN 'FUNCTION' THEN '.fnc' WHEN 'PROCEDURE' THEN '.prc' WHEN 'VIEW' THEN '.vw' ELSE '.sql' END AS file_ext FROM DUAL ) SELECT base_folder||obj_name_and_db_name||file_ext as spool_path_current FROM prep_ ; PROMPT spool_path_current set to &spool_path_current --CREATE global TEMPORARY TABLE tt_extract_ddl_clob ( owner varchar2(30), type varchar2(30), name varchar2(30), text clob ) on COMMIT preserve rows; SET ECHO OFF VERIFY OFF DECLARE lv_schema VARCHAR2(30) := UPPER('{lv_schema}'); lv_object_type VARCHAR2(30) := UPPER('{lv_object_type}'); lv_type_to_filter VARCHAR2(30) ; lv_object_name VARCHAR2(30) := UPPER('{lv_object_name}'); lv_clob CLOB := 'CREATE OR REPLACE '; lv_text LONG; BEGIN lv_type_to_filter := CASE lv_object_type WHEN 'PACKAGE_SPEC' THEN 'PACKAGE' WHEN 'PACKAGE_BODY' THEN 'PACKAGE BODY' WHEN 'TYPE_SPEC' THEN 'TYPE' WHEN 'TYPE_BODY' THEN 'TYPE BODY' ELSE lv_object_type END; EXECUTE IMMEDIATE 'truncate table {clobTempTable}'; FOR rec IN ( SELECT line, text FROM dba_source WHERE owner = lv_schema AND type = lv_type_to_filter AND name = lv_object_name ORDER BY line ) LOOP lv_text := rec.text; dbms_lob.append( lv_clob, lv_text ); -- dbms_OUTPUT.put_line( 'Ln'||$$plsql_line||': '||lv_offset ); -- IF mod(rec.line, 13) = 1 THEN dbms_output.put_line( rec.text ); END IF; END LOOP; INSERT INTO {clobTempTable} ( text ) VALUES ( lv_clob ); COMMIT; END; / set termout off trimspool on head off spool &spool_path_current SELECT text FROM {clobTempTable} ; spool off """ spoolScriptTrailer = """ EXIT """ password = getOraPassword ( oraUser= username, oraPasswordEnvVar= 'ORA_SECRET', batchMode= False, envSuffix= envCode ) ezConnect = """%s/"%s"@(DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=%s)(PORT=%s)))(CONNECT_DATA=(SERVER=DEDICATED)(SERVICE_NAME=%s)))""" % ( username, password, host, port, service ) spoolPath = tempfile.mktemp() if "want to fight" == "the DOS vs gitbash vs unix platform gap": sqlTermoutPath = tempfile.mktemp() _dbx( "sqlTermoutPath %s" ) sqlpTermoutFh = open( sqlTermoutPath, "w" ) sqlpJob = subprocess.Popen( [ "sqlplus", "/nolog" ], stdin = subprocess.PIPE, stdout= sqlpTermoutFh ) sqlpJob.stdin.write( spoolScript.encode('utf-8') ) sqlpJob.communicate( ) sqlpTermoutFh = open( sqlTermoutPath, "r" ) _dbx( sqlpTermoutFh.readlines() ) else: # build one script block per DBObject _infoTs( "The connecting user will need access to global temporary table %s !" % clobTempTable ) scriptBlocks= [] for obj in dbObjects: scriptBlocks.append( scriptBlockFor1Object.format( spool_dest_root= spoolDestRoot, lv_schema= obj.owner, lv_object_type= obj.type, lv_object_name= obj.name, dir_sep= dirSep, clobTempTable = clobTempTable ) ) _dbx( "len( scriptBlocks ) : %d " % len( scriptBlocks ) ) headerFormatted = spoolScriptHeader.format( v_ez_connect= ezConnect ) spoolScript = "%s \n%s \n%s" % ( headerFormatted, "\n".join( scriptBlocks), spoolScriptTrailer ) sqlplusScriptPath = tempfile.mktemp() + '.sql' _dbx( "sqlplusScriptPath: %s" % ( sqlplusScriptPath ) ) open( sqlplusScriptPath, "w").write( spoolScript ) return sqlplusScriptPath
def plsqlTokenize(inpLines): lnCnt = len(inpLines) _dbx(lnCnt) lineNo = 0 nodeStack = TokenStack(name='plsqlTokenize') curTreeId = None tokBuf = "" interceptBufferLines = [] (interceptStartLineNo, interceptStartColNo) = (-1, -1) # just for clarity. First reference is when we hit block_comment_begin # match for alphanumString OR dblQuotedAlphanumString OR assignment OR singleEqual OR doubleEqual OR dotOperator # match macros # match block_comment_begin OR lineComment # match single quote, assignment operator # match comparison operators, named param operator # match arithmetric operators, left or right parenthesis, comma, semicolon # match Q notation begin in various flavours eng = re.compile( """^(\s*)(\$\$plsql_unit|\$\$plsql_line|q\{"|[\$#a-z0-9_]+|"[\$#a-z0-9_]+"|:=|>=|<=|>|<|!=|=>|=|/\*|--|\|\||\.\.|\.|%|\(|\)|\+|-|\*|/|,|;|@)(\s*)""", re.IGNORECASE) curSta = FsmState.start for line in inpLines[:29999]: colNo = 1 lineNo += 1 lnBuf = line eoLine = False if None != re.search('^(\s*)$', line): # match empty line pass # _dbx( 'Line %d is empty' % lineNo ) else: pass # _dbx( "line %d len: %d. Line content >>>>>>>>>%s" % ( lineNo, len( line ), line.rstrip("\n") ) ) i = 0 # do we need eoLine indeed or can we just bump colNo accordingly? while (i < 999 and colNo < len(line) and not eoLine ): # process line with safety belt against infinite loop i += 1 #_dbx( "Ln/col %d/%d curSta: '%s'" % ( lineNo, colNo, curSta ) ) if curSta == FsmState.find_block_comment_end: m = re.search('^(.*)(\*/)', lnBuf) # math end of block comment if m == None: #_dbx( "need to cache block comment" ) interceptBufferLines.append(lnBuf) eoLine = True continue else: # found end of block comment interceptBufferLines.append(m.group(1) + m.group(2)) _dbx("group1>>%s, group2>>%s, lnBuf>>>>%s" % (m.group(1), m.group(2), lnBuf)) lenUptoStarSlash = len(m.group(1)) + len(m.group(2)) _dbx("lenUptoStarSlash:%d" % (lenUptoStarSlash)) colNo += lenUptoStarSlash _dbx("found block comment end at col %d" % colNo) lnBuf = lnBuf[lenUptoStarSlash:] _dbx("stuff at comment is >>>%s" % (lnBuf.rstrip("\n"))) curSta = FsmState.start node = TokenNode(text="".join(interceptBufferLines), type=TokenType.block_comment_begin, staAtCreation=curSta, lineNo=interceptStartLineNo, colNo=interceptStartColNo, parentId=curTreeId) nodeStack.push(node) continue # while not EOL elif curSta == FsmState.in_single_quoted_literal: #_dbx( "scanning for end single quote in >>> %s " % lnBuf ) endOfLitFound, partOfLit = scanEndOfSQLiteral(lnBuf) if not endOfLitFound: # line break is part of string literal interceptBufferLines.append(lnBuf) eoLine = True # line is done else: # found end of literal in line, possibly with rest not belonging to literal curSta = FsmState.start interceptBufferLines.append(partOfLit) literalText = "".join(interceptBufferLines) node = TokenNode( text=literalText, type=TokenType.single_quoted_literal_begin, staAtCreation=curSta, lineNo=interceptStartLineNo, colNo=interceptStartColNo, parentId=curTreeId) nodeStack.push(node) colNo += len(partOfLit) lnBuf = line[colNo - 1:] #_dbx( "lnBuf>>>%s" % lnBuf ) continue elif curSta == FsmState.in_q_notation_begin: #_dbx( "scanning for end q notation literal in >>> %s " % lnBuf ) endOfLitFound, partOfLit = scanEndOfQNotationLiteral( q2and3, lnBuf) if not endOfLitFound: # line break is part of string literal interceptBufferLines.append(lnBuf) eoLine = True # line is done else: # found end of literal in line, possibly with rest not belonging to literal curSta = FsmState.start interceptBufferLines.append(partOfLit) literalText = "".join(interceptBufferLines) node = TokenNode( text=literalText, type=TokenType.single_quoted_literal_begin, staAtCreation=curSta, lineNo=interceptStartLineNo, colNo=interceptStartColNo, parentId=curTreeId) nodeStack.push(node) colNo += len(partOfLit) lnBuf = line[colNo - 1:] # _dbx( "lnBuf>>>%s" % lnBuf ) continue m = re.search('^(\s*)$', lnBuf) # match empty line if m != None: eoLine = True if eoLine: continue # process other types of token m = eng.match(lnBuf) # _dbx( type( m ) ) #_dbx( 'lnBuf being parsed >>>>>> %s' % lnBuf.rstrip("\n") ) if m == None: # the special scan for single quoted literal is no longer needed since we can use the triple single quotes! m = re.search("^(\s*)(')", lnBuf) # match single quote if m != None: # found single quote # stateStack.push( curSta, curTreeId ) curSta = FsmState.in_single_quoted_literal interceptBufferLines = [] (interceptStartLineNo, interceptStartColNo) = (lineNo, colNo) interceptBufferLines.append(m.group(2)) colNo += len(m.group(1) + m.group(2)) lnBuf = line[colNo - 1:] #_dbx( colNo ) continue # we must skip the fine-grained FSM else: _infoTs( "Rest of line %d could not be tokenized. Line content follows \n%s" % (lineNo, lnBuf)) return nodeStack else: # second re group i.e. token tok = m.group(2) # third re group i.e. optional whitespaces #if len( m.group(3) ) > 0: # found trailing whitespaces colNo += len(m.group(1)) + len(m.group(2)) + len(m.group(3)) # _dbx( "colNo: %d" % colNo ) #_dbx( "Ln/col %d/%d raw tok: '%s'" % ( lineNo, colNo, tok ) ) lnBuf = line[colNo - 1:] # _dbx( "rest of line: %s" % lnBuf.rstrip("\n") ) tokTyp, normed = gettokentype(tok) #_dbx( "tokTyp: %s normed: '%s'" % ( tokTyp, normed ) ) if tokTyp == TokenType.block_comment_begin: if curSta == FsmState.find_block_comment_end: _errorExit("Encountered tokTyp %s while in state %s!" % (tokTyp, curSta)) else: # found block_comment if the middle of somewhere, switch the parser to specifically search for end of comment curSta = FsmState.find_block_comment_end interceptBufferLines = [] (interceptStartLineNo, interceptStartColNo) = (lineNo, colNo) interceptBufferLines.append(tok) #_dbx( "we must skip the fine-grained FSM ") continue # we must skip the fine-grained FSM elif tokTyp == TokenType.single_line_comment_begin: # found double minus #_dbx( foo ) if curSta == FsmState.find_block_comment_end: _errorExit("Encountered tokTyp %s while in state %s!" % (tokTyp, curSta)) else: # not in wrong status, just push line comment node, no change of state node = TokenNode( text=normed + lnBuf.rstrip("\n"), type=TokenType.single_line_comment_begin, staAtCreation=curSta, lineNo=lineNo, colNo=colNo - len(normed), parentId=curTreeId) nodeStack.push(node) eoLine = True continue elif tokTyp == TokenType.q_notation_begin: #_dbx( foo ) if curSta == FsmState.in_q_notation_begin: _errorExit("Encountered tokTyp %s while in state %s!" % (tokTyp, curSta)) else: # not in wrong status, just push line comment node, no change of state # stateStack.push( curSta, curTreeId ) q2and3 = normed[ 1: 3] # should be the open bracket and single or double quote, in any order _dbx("normed>>>%s lnBuf>>> %s" % (normed, lnBuf)) curSta = FsmState.in_q_notation_begin interceptBufferLines = [] (interceptStartLineNo, interceptStartColNo) = (lineNo, colNo) interceptBufferLines.append(m.group(1) + m.group(2)) continue # we must skip the fine-grained FSM else: pass #_dbx( "lineNo/colNo: %d/%d lnBuf >>>%s" % ( lineNo, colNo, lnBuf )) # # node = TokenNode(text=normed, type=tokTyp, staAtCreation=None, lineNo=lineNo, colNo=colNo - len(normed), parentId=curTreeId) nodeStack.push(node) return nodeStack # plsqlTokenize
def fsmMain(preTokStack, startStatus=FsmState.start): """ make ASSUMPTION that comments tokens are in a different stack and in the main stack we only have non-comment tokens. when later on significant tokens have been linked properpy we will have another pass to insert the comment tokens based on lineNo/ColNo """ retTokStack = TokenStack(name="fsmMain") stateStack = StateStack(name="main_state") preTokStackLen = len(preTokStack.arr) curTreeId = None curSta = startStatus thenComesFromStack = StateStack(name="thenComesFrom") while preTokStack.peekAhead() != None: curTokens = preTokStack.popComplexAware() tokId, normed, tokTyp = (curTokens[0].id, curTokens[0].text, curTokens[0].type) # lineNo, colNo = (curTokens[0].lineNo, curTokens[0].colNo ) _dbx("curSta %s curTokens len %d, 1st id:%s type:%s >>>%s" % (curSta, len(curTokens), tokId, tokTyp, normed)) if curSta == FsmState.start: if tokTyp == TokenType.relevant_keyword and normed == "CREATE": stateStack.push(curSta, curTreeId) newSta = FsmState.in_compilation_unit_header curTokens[0].state = staAtCreation = newSta newTreeId = curTokens[0].id else: _errorExit("Unknown token id %s type %s in state %s " % (tokId, tokTyp, curSta)) elif curSta == FsmState.in_compilation_unit_header: if tokTyp == TokenType.relevant_keyword and normed == "AS": _dbx(foo) newSta = FsmState.in_declaration curTokens[0].state = staAtCreation = newSta newTreeId = curTokens[0].id elif tokTyp == TokenType.semicolon: # forward declaration of function/procedure _dbx(foo) newSta, newTreeId = stateStack.pop() else: _dbx("other token type %s in state %s " % (tokTyp, curSta)) elif curSta == FsmState.in_declaration: if tokTyp == TokenType.relevant_keyword and normed in ["BEGIN"]: _dbx(foo) newSta = FsmState.in_body newTreeId = curTokens[0].id else: _dbx("other token type %s in state %s " % (tokTyp, curSta)) newSta = FsmState.started_declaration_entry newTreeId = curTokens[0].id elif curSta == FsmState.started_declaration_entry: if tokTyp == TokenType.semicolon: _dbx(foo) newSta = FsmState.in_declaration else: _dbx("other token type %s in state %s " % (tokTyp, curSta)) elif curSta == FsmState.in_body: if tokTyp in [TokenType.aggEndIdentSemic, TokenType.aggEndSemic]: _dbx(foo) newSta, newTreeId = stateStack.pop() elif normed == "BEGIN": _dbx(foo) newSta = FsmState.in_body newTreeId = curTokens[0].id stateStack.push(curSta, curTreeId) elif normed in ['IF', 'WHILE']: newSta = FsmState.expect_bool_expression newTreeId = curTokens[0].id stateStack.push(curSta, curTreeId) if normed == "IF": thenComesFromStack.push(FsmState.if_or_case_statement_open, None) elif normed == "CASE" and preTokStack.peekAhead().text == "WHEN": _dbx(foo) for nextTok in preTokStack.popComplexAware(): curTokens.append(nextTok) newSta = FsmState.expect_bool_expression newTreeId = curTokens[0].id stateStack.push(curSta, curTreeId) thenComesFromStack.push(FsmState.if_or_case_statement_open, None) elif normed == "CASE" and preTokStack.peekAhead().text != "WHEN": # here we must not pop the peeked token, it must go thru normal FSM newSta = FsmState.expect_expression newTreeId = curTokens[0].id #no pop expected! stateStack.push( curSta, curTreeId) thenComesFromStack.push(FsmState.if_or_case_statement_open) elif normed in ['DECLARE']: newSta = FsmState.in_declaration newTreeId = curTokens[0].id stateStack.push(curSta, curTreeId) else: _dbx("other token type %s in state %s " % (tokTyp, curSta)) stateStack.push(curSta, curTreeId) newSta = FsmState.expect_expression newTreeId = curTokens[0].id elif curSta in [FsmState.expect_expression]: if tokTyp in [ TokenType.semicolon, TokenType.aggEndSemic, TokenType.aggEndIfSemic, TokenType.aggEndCaseSemic, TokenType.aggEndLoopSemic ]: _dbx(foo) newSta, newTreeId = stateStack.pop() elif normed in ['THEN']: # this is for "CASE WHEN .. THEN .." _dbx(foo) peekThenComesFrom = thenComesFromStack.peek()[ 0] # we dont care about the parentId if peekThenComesFrom == FsmState.case_bool_expression_open: newSta = FsmState.in_body newTreeId = curTokens[0].id else: _errorExit( "Found THEN at %s without opening CASE token in thenComesFromStack" % tokId) thenComesFromStack.pop() # ignore return values elif normed == "ELSE": _dbx(foo) newSta = FsmState.in_body newTreeId = curTokens[0].id elif normed == "CASE" and preTokStack.peekAhead().text == "WHEN": _dbx(foo) for nextTok in preTokStack.popComplexAware(): curTokens.append(nextTok) newSta = FsmState.expect_bool_expression newTreeId = curTokens[0].id #do not expect pop! stateStack.push( curSta, curTreeId) thenComesFromStack.push(FsmState.case_bool_expression_open, None) elif normed == "CASE" and preTokStack.peekAhead().text != "WHEN": _dbx(foo) # here we must not pop the peeked token, it must go thru normal FSM newSta = FsmState.expect_expression newTreeId = curTokens[0].id # stateStack.push( curSta, curTreeId) thenComesFromStack.push(FsmState.case_bool_expression_open, None) elif normed in ['LOOP']: # this is for "FOR rec IN ( select * from xyz ) LOOP or similar constructs" newSta = FsmState.in_body elif tokTyp == TokenType.left_bracket: _dbx(foo) stateStack.push(curSta, curTreeId) newTreeId = curTokens[0].id elif tokTyp == TokenType.right_bracket: newSta, newTreeId = stateStack.pop() else: _dbx("other token type %s in state %s " % (tokTyp, curSta)) elif curSta in [FsmState.expect_bool_expression]: _dbx(foo) if tokTyp in [TokenType.aggEndSemic ]: # this is for "CASE ... END;" _dbx(foo) newSta, newTreeId = stateStack.pop() elif normed in [ 'THEN' ]: # this is for "IF x THEN .. ELSE " or "WHILE y LOOP" or "CASE WHEN .. THEN .." _dbx(foo) peekThenComesFrom = thenComesFromStack.peek()[ 0] # we dont care about the parentId if peekThenComesFrom == FsmState.if_or_case_statement_open: newSta = FsmState.in_body newTreeId = curTokens[0].id elif peekThenComesFrom == FsmState.case_bool_expression_open: newSta = FsmState.expect_expression newTreeId = curTokens[0].id else: _errorExit("No matching OPENER for THEN at %s" % tokId) thenComesFromStack.pop() # ignore return values elif normed in [ 'ELSE', 'ELSIF', 'LOOP' ]: # this is for "IF x THEN .. ELSE " or "WHILE y LOOP" or "CASE WHEN .. THEN .." _dbx(foo) newSta = FsmState.in_body newTreeId = curTokens[0].id elif tokTyp == TokenType.left_bracket: _dbx(foo) stateStack.push(curSta, curTreeId) newTreeId = curTokens[0].id elif tokTyp == TokenType.right_bracket: newSta, newTreeId = stateStack.pop() elif normed == "CASE" and preTokStack.peekAhead().text == "WHEN": for nextTok in preTokStack.popComplexAware(): curTokens.append(nextTok) newSta = FsmState.expect_bool_expression newTreeId = curTokens[0].id stateStack.push(curSta, curTreeId) elif normed == "CASE" and preTokStack.peekAhead().text != "WHEN": # here we must not pop the peeked token, it must go thru normal FSM newSta = FsmState.expect_expression newTreeId = curTokens[0].id stateStack.push(curSta, curTreeId) else: _errorExit("No handler for state %s with input %s " % (curSta, tokTyp)) for ix, curTok in enumerate(curTokens): # _dbx( "ix: %d type %s" % (ix, type( curTok)) ) newNode = TokenNode(text=curTok.text, type=curTok.type, staAtCreation=curSta, lineNo=curTok.lineNo, colNo=curTok.colNo, parentId=curTreeId) retTokStack.push(newNode) #_dbx( "ret stack len %d" % (len( retTokStack.arr ) ) ) _dbx("cur sta %s new sta %s" % (curSta, newSta)) curSta, curTreeId = newSta, newTreeId _infoTs("final sta was %s" % (newSta)) if preTokStackLen != len(retTokStack.arr): _errorExit( "OOPPS preTokStackLen is %d and len( retTokStack.arr ) is %d" % (preTokStackLen, len(retTokStack.arr))) return retTokStack # # editing mark # return retTokStack
def main(): global g_fsmInitStatusCode argParserResult = parseCmdLine() if True: tree = fsm.plsqlTokenize(g_inpLines) formattedLines = tree.simpleFormatSemicolonAware() # print( "\n".join( formattedLines ) ) if False or "want to" == "compare output manually": #print( "*"*20 + "input sql" + "*"*20 ) #print( "".join( g_inpLines)) print("*" * 20 + "formatted" + "*" * 20) print("\n".join(formattedLines)) if "want to compare" == "char count": forCharCountCheck_A = tempfile.mktemp() _dbx("forCharCountCheck_A: %s" % (forCharCountCheck_A)) charCounter_A = charCounter.TextCharStatsIgnoreCase( textName="sql input", txt=g_inpLines) charCountResultLines_A = charCounter_A.report(printToStdout=False) open(forCharCountCheck_A, "w").write("\n".join(charCountResultLines_A)) forCharCountCheck_B = tempfile.mktemp() _dbx("forCharCountCheck_B: %s" % (forCharCountCheck_B)) charCounter_B = charCounter.TextCharStatsIgnoreCase( textName="formatted output", txt=formattedLines) charCountResultLines_B = charCounter_B.report(printToStdout=False) open(forCharCountCheck_B, "w").write("\n".join(charCountResultLines_B)) _infoTs(" ************ DIFFing CharCounts ... ") diffCharCountResult = genUnixDiff(forCharCountCheck_A, forCharCountCheck_B) _infoTs(" ************ result of DIFFing CharCounts") print(diffCharCountResult) if True: textWordCounter_a = charCounter.WordCounter(name="sql input", lines=g_inpLines, shortCode="sqlInput") textWordCounter_a.scan() wordCountResultLines_a = textWordCounter_a.report(printToStdout=False) forWordCountCheck_a = tempfile.mktemp() _dbx("forWordCountCheck_a: %s" % (forWordCountCheck_a)) open(forWordCountCheck_a, "w").write("\n".join(wordCountResultLines_a)) textWordCounter_b = charCounter.WordCounter(name="sql input", lines=formattedLines, shortCode="sqlInput") textWordCounter_b.scan() wordCountResultLines_b = textWordCounter_b.report(printToStdout=False) forWordCountCheck_b = tempfile.mktemp() _dbx("forWordCountCheck_b: %s" % (forWordCountCheck_b)) open(forWordCountCheck_b, "w").write("\n".join(wordCountResultLines_b)) _infoTs(" ************ DIFFing WordCounts ... ") diffWordCountResult = genUnixDiff(forWordCountCheck_a, forWordCountCheck_b) _infoTs(" ************ result of DIFFing WORD Counts") print(diffWordCountResult) persistAndPrintName(textName="formatted %s" % argParserResult.inFile, textContent=formattedLines, baseNamePrefix=argParserResult.inFile + '-') if "want to " == "use fsmMain": commentStack, signifStack = plstopa.separateCommentsFromSignficants( tree) #print( "*"*80 ); commentStack.simpleDump() #print( "*"*80 ); signifStack.simpleDump() signifStack.assembleComplexTokens() #signifStack.simpleDump( markComplexIdents= True ) useStatus = fsm.kickStartStatusByCode[ g_fsmInitStatusCode] if g_fsmInitStatusCode != None else plstopa.FsmState.start parsedTree = fsm.fsmMain(signifStack, startStatus=useStatus) # parsedTree.simpleDump() # eunitedTree = plstopa.mergeTokenTrees( commentStack, parsedTree ) reunitedTree = plstopa.mergeSignifcantAndCommentTrees( signifTree=parsedTree, commentTree=commentStack) _dbx("reunitedTree len %d" % (len(reunitedTree.arr))) print("*" * 30 + "reunited " + "*" * 20) #eunitedTree.simpleDump( markComplexIdents = True ) # reunitedTree.finalizeStats() # for node in reunitedTree.arr: node.showInfo() print(reunitedTree.formatTokenText()) if False: tree.assembleComplexTokens() # tree.simpleDump( markComplexIdents= False ) tree.simpleDump(markComplexIdents=False)
def main(): global g_listIndexedBySchemaType, g_unknownFeature homeLocation = os.path.expanduser("~") _dbx(homeLocation) cmdLnConfig = parseCmdLine() setDebug(cmdLnConfig.debug) usedFeatureName = getGitCurrBranchName( ) if cmdLnConfig.featureName == g_unknownFeature else cmdLnConfig.featureName _infoTs("usedFeatureName: %s" % usedFeatureName) if cmdLnConfig.baseCommit: linesOfTouchedScripts = extractTouchedScripts( commitA=cmdLnConfig.baseCommit, commitB=cmdLnConfig.lastCommit) else: _infoTs("reading touched lines from stdin..") linesOfTouchedScripts = sys.stdin.readlines() if cmdLnConfig.action == "extract": _infoTs("scripts found: %s" % "\n".join(linesOfTouchedScripts)) elif cmdLnConfig.action == "make": sqlInstallTemplateFile = cmdLnConfig.sqlScriptTemplatePath if sqlInstallTemplateFile == None: moduleSelfDir = os.path.dirname( inspect.getfile(inspect.currentframe())) sqlInstallTemplateFile = os.path.join(moduleSelfDir, './install_template.sql') _infoTs("Will use following file as SQL install template: %s" % sqlInstallTemplateFile) fill_listIndexedBySchemaType( linesOfTouchedScripts=linesOfTouchedScripts) createSchemataInstallScripts( sqlScriptTemplatePath= sqlInstallTemplateFile \ , baseCommit= cmdLnConfig.baseCommit, lastCommit= cmdLnConfig.lastCommit \ , featureName= usedFeatureName, storeReleaseMetadata = cmdLnConfig.storeRelMeta \ , fileSufix= usedFeatureName \ ) if len(g_filesToExcludeFromInstall) > 0: _infoTs("Some files may have been excluded based on blacklist!") elif cmdLnConfig.action == "zip": zipFile = action_createFileTree(files=linesOfTouchedScripts, targetLocation=os.path.join( homeLocation, 'Downloads', usedFeatureName)) _infoTs("zip file can also be viewed at %s" % zipFile) _infoTs("Some files may have been excluded based on blacklist!") elif cmdLnConfig.action == "devTest": _dbx("got here") pass
def createSchemataInstallScripts(sqlScriptTemplatePath, baseCommit, lastCommit, featureName=None, fileSufix=None, storeReleaseMetadata=True): """ Create SQL and BAT install scripts for the schemata with deployable scripts Deployable scripts are: 1) file is not at top level of the schema and 2) extension is not blacklisted """ global g_internalSepator, g_listIndexedBySchemaType, g_schemataFound insertSqlStmtTemplate = """ -------------------------------------------------------------------------------- -- Store software release information: at this position we also record attempted -- deployment -------------------------------------------------------------------------------- DECLARE lv_rel_id NUMBER; BEGIN SELECT basic_data.APPL_RELEASE_SQ.nextval INTO lv_rel_id FROM dual; INSERT INTO basic_data.t_applied_releases( id, release_no, creation_dt ) VALUES( lv_rel_id, q'[{featureName}]', sysdate ); INSERT INTO basic_data.t_applied_files( id, release_id, filename ) SELECT appl_files_sq.nextval, lv_rel_id, q'[{basenameSqlScript}, git-branch: {featureName}, baseline-commit:{baselineCommit}, last-commit:{lastCommit}]' FROM dual; COMMIT; END; / """ suffixUsed = "-" + fileSufix if fileSufix else "" sentinelPatPrefix = "REM place_here_scripts_for:" fh = open(sqlScriptTemplatePath, mode="r") inpTemplateLines = fh.readlines() _dbx("got %d lines from template" % (len(inpTemplateLines))) scriptTemplateText = "".join(inpTemplateLines) tmpDir = tempfile.mkdtemp() _infoTs("install scripts will be placed under %s" % (tmpDir)) batchScriptTemplate = """ SET NLS_LANG=GERMAN_GERMANY.WE8MSWIN1252 SQLPLUS /nolog @{sqlScriptBaseName} """ readmeContentHeader = """ Order to run install scripts: """ readmeContentFooter = """ All processes in groups xxx, yyy must be stopped """ batchScripts = [] for schema in g_schemataFound: _dbx("schema %s\n" % (schema)) script4Schema = scriptTemplateText script4Schema = script4Schema.replace("<TARGET_SCHEMA>", schema) for schemaType in g_listIndexedBySchemaType.keys(): if schemaType.startswith(schema): typeOnly = schemaType.split(g_internalSepator)[1] if typeOnly.upper() not in [ '.SQL' ]: # dirty fix to filter out top-level sql script sentinelPattern = "%s%s" % (sentinelPatPrefix, typeOnly.upper()) _dbx("schemaType %s, sentinel %s" % (schemaType, sentinelPattern)) listOfScripts = g_listIndexedBySchemaType[schemaType] _dbx("cnt scripts %s" % (len(listOfScripts))) # aggregate scripts of schemaType into one string stringToAppend = "\n".join(listOfScripts) _dbx(stringToAppend) found = script4Schema.find(sentinelPattern) if found > 0: _dbx("found pattern") script4Schema = script4Schema.replace( sentinelPattern, "\n%s\n%s" % (sentinelPattern, stringToAppend)) else: _errorExit("Sentinel '%s' not found in template!" % (sentinelPattern)) # , isWarning = True # print( script4Schema ) # now remove the sentinel's tempScript = script4Schema newLines = [] for line in tempScript.split("\n"): if not line.startswith(sentinelPatPrefix): newLines.append(line) script4Schema = "\n".join(newLines) schemaDir = os.path.join(tmpDir, schema) os.mkdir(schemaDir) basenameSqlScript = "install_%s%s.sql" % (schema, suffixUsed) _dbx(basenameSqlScript) scriptPathSql = os.path.join(schemaDir, basenameSqlScript) if storeReleaseMetadata: # for INSERT of applied release information insertSqlStmt = insertSqlStmtTemplate.format( featureName= featureName \ , baselineCommit= baseCommit, lastCommit= lastCommit, basenameSqlScript= basenameSqlScript ) _dbx(insertSqlStmt) script4Schema = script4Schema.format( placeHolderStoreReleaseMetadata=insertSqlStmt, baselineCommit=baseCommit, featureName=featureName) else: script4Schema = script4Schema.format( placeHolderStoreReleaseMetadata="", baselineCommit=baseCommit, featureName=featureName) batScriptBaseName = "install_%s%s.bat" % (schema, suffixUsed) scriptPathBat = os.path.join(schemaDir, batScriptBaseName) fh = open(scriptPathSql, mode="w") fh.write(script4Schema) fh.close() sqlScriptBaseName = os.path.basename(scriptPathSql) batchScriptContent = batchScriptTemplate.format( sqlScriptBaseName=sqlScriptBaseName) fh = open(scriptPathBat, mode="w") fh.write(batchScriptContent) fh.close() _infoTs("output SQL script unix-style >>>>> %s \nDOS style >>>: %s" % (dosPath2Unix(scriptPathSql), scriptPathSql)) _infoTs("output BAT script unix-style >>>>> %s \nDOS style >>>: %s" % (dosPath2Unix(scriptPathBat), scriptPathBat)) #_errorExit( "in test - stopped after 1 schema!! " ); batchScripts.append(os.path.basename(scriptPathBat)) # create install readme readmeFile = os.path.join(tmpDir, "install%s-readme.txt" % (suffixUsed)) items = [] for batchScript in batchScripts: items.append("?. %s" % (batchScript)) _dbx(batchScript) itemsText = "\n".join(items) readmeText = "%s\n%s\n%s\n" % (readmeContentHeader, itemsText, readmeContentFooter) fh = open(readmeFile, mode="w") fh.write(readmeText) fh.close() _infoTs("readme file unix-style >>>>> %s \nDOS style >>>: %s" % (dosPath2Unix(readmeFile), readmeFile))