def executeQueries(master, database, username, password, queryList, hostsFile, explain,adminUser, adminPassword, scale, emailAddress=""): loggerInfo = buildReportLogger("queries") reportName = loggerInfo[0] report = loggerInfo[1] header = [] explainString="" if explain: explainString = "explain analyze " startString = "Query Execution Phase" uniInfoLog(startString, report) header = "Executing HAWQ Queries for a "+str(scale)+" Data Set" uniInfoLog(header, report) hawqURI = queries.uri(master, port=5432, dbname=database, user=username, password=password) queryLocations = [] if int(int(queryList[0])) <> 0: # Loop for queryNum in queryList: uniInfoLog("Running Query " + queryNum, report) if int(queryNum) < 10: queryNum = "0" + queryNum queryLocations.append('./hawq-ddl/queries/"+scale+"/query_' + str(queryNum) + '.sql') else: uniInfoLog("Running all Queries", report) queryLocations = sorted(glob.glob("./hawq-ddl/queries/" + scale + "/*.sql")) with queries.Session(hawqURI) as session: for query in queryLocations: uniInfoLog(clearBuffers(hostsFile, adminUser, adminPassword), report) ddlFile = open(query, "r") queryName = ((query.split("/")[4]).split(".")[0]).rstrip() queryString = ddlFile.read() queryString = explainString+queryString startTime = time.time() result = session.query(queryString) if explain: queryPlan="" uniInfoLog(result.query,report) for row in result: queryPlan = queryPlan+str(row['QUERY PLAN']+"\n") uniInfoLog(queryPlan,report) stopTime = time.time() queryTime = stopTime - startTime results = "Query Complete: %s Execution Time(s): %0.2f Rows Returned: %s" % ( queryName, queryTime, str(result.count())) uniInfoLog(results, report) if emailAddress: Email.sendEmail(emailAddress, results[:25], results) if (emailAddress): messageLines = [] with open(reportName, "r") as reportMsg: for line in reportMsg.readlines(): messageLines.append(line) message = " ".join(messageLines) Email.sendEmail(emailAddress, "Query Final Report: " + (reportName.split('/')[2])[:-4], message)
def analyzeHawqTables(master, database, username, password, emailAddress=""): loggerInfo = buildReportLogger("analyze") reportName = loggerInfo[0] report = loggerInfo[1] header = [] startString = "Analyze Database Tables to Generate Statistics" uniInfoLog(startString, report) header = "Analyzing HAWQ Tables" uniInfoLog(header, report) analyzeCMD = "analyzedb -d " + database + " -a -p 10" ssh.exec_command2(master, username, password, analyzeCMD) # hawqURI = queries.uri(master, port=5432, dbname=database, user=username, password=password) # with queries.Session(hawqURI) as session: # uniInfoLog("Analyze Dimension Tables", report) # for table in dimensionTables: # ddlString = "Analyze " + table # startTime = datetime.datetime.now() # uniInfoLog("Start " + ddlString + ": " + str(startTime), report) # result = session.query(ddlString) # stopTime = datetime.datetime.now() # resultString = "Completed " + ddlString + ": " + str(stopTime) + " Elapsed Time: " + str( # stopTime - startTime) # uniInfoLog(resultString, report) # if emailAddress: # Email.sendEmail(emailAddress, ddlString + " Complete", resultString) # uniInfoLog("Analyze Fact Tables", report) # # for table in factTables: # ddlString = "analyze " + table # startTime = datetime.datetime.now() # uniInfoLog("Start " + ddlString + ": " + str(startTime), report) # result = session.query(ddlString) # stopTime = datetime.datetime.now() # resultString = "Completed " + ddlString + ": " + str(stopTime) + " Elapsed Time: " + str( # stopTime - startTime) # uniInfoLog(resultString, report) # if emailAddress: # Email.sendEmail(emailAddress, ddlString + " Complete", resultString) # # if (emailAddress): # messageLines = [] # with open(reportName, "r") as reportMsg: # for line in reportMsg.readlines(): # messageLines.append(line) # message = " ".join(messageLines) # Email.sendEmail(emailAddress, "Table Analyze Final Report: " + (reportName.split('/')[2])[:-4], message) if (emailAddress): Email.sendEmail(emailAddress, "Table Analyze Final Report: " + (reportName.split('/')[2])[:-4], "AnalyzeDB Complete")
def loadHawqTables(master, username, password, database, emailAddress): loggerInfo = buildReportLogger("load") reportName = loggerInfo[0] report = loggerInfo[1] uniInfoLog("Load HAWQ Internal Tables", report) hawqURI = queries.uri(master, port=5432, dbname=database, user=username, password=password) loadList = sorted(glob.glob('./hawq-ddl/load/*.sql')) for load in loadList: ddlFile = open(load, "r") tableName = ((load.split("/")[3]).split(".")[0])[:-5] loadDDL = ddlFile.read() startTime = datetime.datetime.now() pxfName = tableName if tableName in factTables: tableName = tableName + "_nopart" uniInfoLog("Starting Load of " + tableName, report) with queries.Session(hawqURI) as session: result = session.query(loadDDL) stopTime = datetime.datetime.now() uniInfoLog("Completed Load of " + tableName, report) uniInfoLog("Load Time: " + str(stopTime - startTime), report) rowsPXF = rowCount(master, database, username, password, pxfName + "_pxf") rows = rowCount(master, database, username, password, tableName) uniInfoLog("Expected Rows: " + str(rowsPXF), report) uniInfoLog("Actual Rows : " + str(rows), report) if emailAddress: Email.sendEmail(emailAddress, "Completed Load of " + tableName, "Loaded " + str(rows) + " Rows") if (emailAddress): messageLines = [] with open(reportName, "r") as reportMsg: for line in reportMsg.readlines(): messageLines.append(line) message = " ".join(messageLines) Email.sendEmail(emailAddress, "Table Load Final Report: " + (reportName.split('/')[2])[:-4], message)
def analyzeHawqTables(master, database, username, password, emailAddress=""): loggerInfo = buildReportLogger("analyze") reportName = loggerInfo[0] report = loggerInfo[1] header = [] startString = "Analyze Database Tables to Generate Statistics" uniInfoLog(startString, report) header = "Analyzing HAWQ Tables" uniInfoLog(header, report) hawqURI = queries.uri(master, port=5432, dbname=database, user=username, password=password) with queries.Session(hawqURI) as session: uniInfoLog("Analyze Dimension Tables", report) for table in dimensionTables: ddlString = "Analyze " + table startTime = datetime.datetime.now() uniInfoLog("Start " + ddlString + ": " + str(startTime), report) result = session.query(ddlString) stopTime = datetime.datetime.now() resultString = "Completed " + ddlString + ": " + str(stopTime) + " Elapsed Time: " + str( stopTime - startTime) uniInfoLog(resultString, report) if emailAddress: Email.sendEmail(emailAddress, ddlString + " Complete", resultString) uniInfoLog("Analyze Fact Tables", report) for table in factTables: ddlString = "analyze " + table startTime = datetime.datetime.now() uniInfoLog("Start " + ddlString + ": " + str(startTime), report) result = session.query(ddlString) stopTime = datetime.datetime.now() resultString = "Completed " + ddlString + ": " + str(stopTime) + " Elapsed Time: " + str( stopTime - startTime) uniInfoLog(resultString, report) if emailAddress: Email.sendEmail(emailAddress, ddlString + " Complete", resultString) if (emailAddress): messageLines = [] with open(reportName, "r") as reportMsg: for line in reportMsg.readlines(): messageLines.append(line) message = " ".join(messageLines) Email.sendEmail(emailAddress, "Table Analyze Final Report: " + (reportName.split('/')[2])[:-4], message)
def partitionTables(master, parts, username, password, database, orientation, byPart, compression, rowGroupSize, emailAddress=""): loggerInfo = buildReportLogger("partitioning") reportName = loggerInfo[0] report = loggerInfo[1] startString = "Partitioning Tables into " + str(parts) + " Day Partitions in " + orientation + " Format" uniInfoLog(startString, report) if orientation.upper() == "PARQUET": # orientation = "PARQUET,ROWGROUPSIZE=1073741823,COMPRESSTYPE=snappy" orientation = "PARQUET,ROWGROUPSIZE=" + rowGroupSize + ",COMPRESSTYPE=" + compression hawqURI = queries.uri(master, port=5432, dbname=database, user=username, password=password) if byPart: loadList = sorted(glob.glob('./hawq-ddl/load-partbypart/*.sql')) else: loadList = sorted(glob.glob('./hawq-ddl/load-part/*.sql')) tableList = sorted(glob.glob('./hawq-ddl/hawq-part/*.sql')) with queries.Session(hawqURI) as session: for table in tableList: ddlFile = open(table, "r") tableName = (table.split("/")[3]).split(".")[0] createStatus = "Creating Table: " + tableName uniInfoLog(createStatus, report) tableDDL = ddlFile.read() tableDDL = tableDDL.replace("$PARTS", parts) tableDDL = tableDDL.replace("$ORIENTATION", orientation) result = session.query(tableDDL) createStatus = "Table Created: " + tableName uniInfoLog(createStatus, report) if emailAddress: Email.sendEmail(emailAddress, createStatus, createStatus) # Hard Coded for now because Schema is HardCoded as well startDate = 2450815 endDate = 2453005 totalDays = endDate - startDate for load in loadList: ddlFile = open(load, "r") loadDDL = ddlFile.read() if byPart: tableName = ((load.split("/")[3]).split(".")[0])[:-13] else: tableName = ((load.split("/")[3]).split(".")[0])[:-5] loadStatus = "Loading: " + tableName uniInfoLog(loadStatus, report) ddlFile = open(load, "r") loadDDL = ddlFile.read() if byPart: partCount = getPartitionCount(master, database, username, password, "inventory") partStart = startDate for partNum in range(2, partCount + 1): modDDL = loadDDL # with queries.Session(hawqURI) as session: partName = tableName + "_1_prt_" + str(partNum) # End of part is num days in the part added to the first day partEnd = partStart + (int(parts) - 1) modDDL = modDDL.replace("$PARTNAME", str(partName)) modDDL = modDDL.replace("$PARTVALUE1", str(partStart)) modDDL = modDDL.replace("$PARTVALUE2", str(partEnd)) with queries.Session(hawqURI) as session: result = session.query(modDDL) partStart = partEnd + 1 createStatus = "Table Partition Loaded: " + partName uniInfoLog(createStatus, report) createStatus = "Table Loaded: " + tableName uniInfoLog(createStatus, report) if emailAddress: Email.sendEmail(emailAddress, createStatus, createStatus) # alterniatve # SELECT partitionboundary, partitiontablename, partitionname, partitionlevel, partitionrank FROM pg_partitions WHERE tablename='catalog_returns'; else: with queries.Session(hawqURI) as session: result = session.query(loadDDL) createStatus = "Table Loaded: " + tableName uniInfoLog(createStatus, report) if emailAddress: Email.sendEmail(emailAddress, createStatus, createStatus) if (emailAddress): messageLines = [] with open(reportName, "r") as reportMsg: for line in reportMsg.readlines(): messageLines.append(line) message = " ".join(messageLines) Email.sendEmail(emailAddress, "Repartition Final Report: " + (reportName.split('/')[2])[:-4], message)