예제 #1
0
def executeQueries(master, database, username, password, queryList, hostsFile, explain,adminUser, adminPassword, scale,
                   emailAddress=""):
    loggerInfo = buildReportLogger("queries")
    reportName = loggerInfo[0]
    report = loggerInfo[1]
    header = []
    explainString=""
    if explain:
        explainString = "explain analyze "
    startString = "Query Execution Phase"
    uniInfoLog(startString, report)
    header = "Executing HAWQ Queries for a "+str(scale)+" Data Set"
    uniInfoLog(header, report)
    hawqURI = queries.uri(master, port=5432, dbname=database, user=username, password=password)
    queryLocations = []

    if int(int(queryList[0])) <> 0:

        # Loop
        for queryNum in queryList:
            uniInfoLog("Running Query " + queryNum, report)

            if int(queryNum) < 10:
                queryNum = "0" + queryNum
            queryLocations.append('./hawq-ddl/queries/"+scale+"/query_' + str(queryNum) + '.sql')
    else:
        uniInfoLog("Running all Queries", report)
        queryLocations = sorted(glob.glob("./hawq-ddl/queries/" + scale + "/*.sql"))

    with queries.Session(hawqURI) as session:
        for query in queryLocations:
            uniInfoLog(clearBuffers(hostsFile, adminUser, adminPassword), report)
            ddlFile = open(query, "r")
            queryName = ((query.split("/")[4]).split(".")[0]).rstrip()
            queryString = ddlFile.read()
            queryString = explainString+queryString
            startTime = time.time()
            result = session.query(queryString)
            if explain:
                queryPlan=""
                uniInfoLog(result.query,report)
                for row in result:
                    queryPlan = queryPlan+str(row['QUERY PLAN']+"\n")
                uniInfoLog(queryPlan,report)
            stopTime = time.time()
            queryTime = stopTime - startTime
            results = "Query Complete: %s   Execution Time(s): %0.2f  Rows Returned: %s" % (
                queryName, queryTime, str(result.count()))
            uniInfoLog(results, report)
            if emailAddress:
                Email.sendEmail(emailAddress, results[:25], results)
        if (emailAddress):
            messageLines = []
            with open(reportName, "r") as reportMsg:
                for line in reportMsg.readlines():
                    messageLines.append(line)
                message = " ".join(messageLines)
                Email.sendEmail(emailAddress, "Query Final Report: " + (reportName.split('/')[2])[:-4], message)
예제 #2
0
def analyzeHawqTables(master, database, username, password, emailAddress=""):
    loggerInfo = buildReportLogger("analyze")
    reportName = loggerInfo[0]
    report = loggerInfo[1]
    header = []
    startString = "Analyze Database Tables to Generate Statistics"
    uniInfoLog(startString, report)
    header = "Analyzing HAWQ Tables"
    uniInfoLog(header, report)
    analyzeCMD = "analyzedb -d " + database + " -a -p 10"
    ssh.exec_command2(master, username, password, analyzeCMD)

    # hawqURI = queries.uri(master, port=5432, dbname=database, user=username, password=password)
    # with queries.Session(hawqURI) as session:
    #     uniInfoLog("Analyze Dimension Tables", report)
    #     for table in dimensionTables:
    #         ddlString = "Analyze " + table
    #         startTime = datetime.datetime.now()
    #         uniInfoLog("Start " + ddlString + ": " + str(startTime), report)
    #         result = session.query(ddlString)
    #         stopTime = datetime.datetime.now()
    #         resultString = "Completed " + ddlString + ": " + str(stopTime) + " Elapsed Time: " + str(
    #             stopTime - startTime)
    #         uniInfoLog(resultString, report)
    #         if emailAddress:
    #             Email.sendEmail(emailAddress, ddlString + " Complete", resultString)
    #     uniInfoLog("Analyze Fact Tables", report)
    #
    #     for table in factTables:
    #         ddlString = "analyze " + table
    #         startTime = datetime.datetime.now()
    #         uniInfoLog("Start " + ddlString + ": " + str(startTime), report)
    #         result = session.query(ddlString)
    #         stopTime = datetime.datetime.now()
    #         resultString = "Completed " + ddlString + ": " + str(stopTime) + " Elapsed Time: " + str(
    #             stopTime - startTime)
    #         uniInfoLog(resultString, report)
    #         if emailAddress:
    #             Email.sendEmail(emailAddress, ddlString + " Complete", resultString)
    #
    #     if (emailAddress):
    #         messageLines = []
    #         with open(reportName, "r") as reportMsg:
    #             for line in reportMsg.readlines():
    #                 messageLines.append(line)
    #             message = " ".join(messageLines)
    #             Email.sendEmail(emailAddress, "Table Analyze Final Report: " + (reportName.split('/')[2])[:-4], message)
    if (emailAddress):
        Email.sendEmail(emailAddress, "Table Analyze Final Report: " + (reportName.split('/')[2])[:-4],
                        "AnalyzeDB Complete")
예제 #3
0
def loadHawqTables(master, username, password, database, emailAddress):
    loggerInfo = buildReportLogger("load")
    reportName = loggerInfo[0]
    report = loggerInfo[1]
    uniInfoLog("Load HAWQ Internal Tables", report)

    hawqURI = queries.uri(master, port=5432, dbname=database, user=username, password=password)
    loadList = sorted(glob.glob('./hawq-ddl/load/*.sql'))

    for load in loadList:
        ddlFile = open(load, "r")
        tableName = ((load.split("/")[3]).split(".")[0])[:-5]
        loadDDL = ddlFile.read()
        startTime = datetime.datetime.now()
        pxfName = tableName
        if tableName in factTables:
            tableName = tableName + "_nopart"
        uniInfoLog("Starting Load of " + tableName, report)
        with queries.Session(hawqURI) as session:
            result = session.query(loadDDL)
        stopTime = datetime.datetime.now()
        uniInfoLog("Completed Load of " + tableName, report)
        uniInfoLog("Load Time: " + str(stopTime - startTime), report)
        rowsPXF = rowCount(master, database, username, password, pxfName + "_pxf")
        rows = rowCount(master, database, username, password, tableName)
        uniInfoLog("Expected Rows: " + str(rowsPXF), report)
        uniInfoLog("Actual Rows  : " + str(rows), report)
        if emailAddress:
            Email.sendEmail(emailAddress, "Completed Load of " + tableName, "Loaded " + str(rows) + " Rows")
    if (emailAddress):
        messageLines = []
        with open(reportName, "r") as reportMsg:
            for line in reportMsg.readlines():
                messageLines.append(line)
            message = " ".join(messageLines)
            Email.sendEmail(emailAddress, "Table Load Final Report: " + (reportName.split('/')[2])[:-4], message)
예제 #4
0
def analyzeHawqTables(master, database, username, password, emailAddress=""):
    loggerInfo = buildReportLogger("analyze")
    reportName = loggerInfo[0]
    report = loggerInfo[1]
    header = []
    startString = "Analyze Database Tables to Generate Statistics"
    uniInfoLog(startString, report)
    header = "Analyzing HAWQ Tables"
    uniInfoLog(header, report)

    hawqURI = queries.uri(master, port=5432, dbname=database, user=username, password=password)
    with queries.Session(hawqURI) as session:
        uniInfoLog("Analyze Dimension Tables", report)
        for table in dimensionTables:
            ddlString = "Analyze " + table
            startTime = datetime.datetime.now()
            uniInfoLog("Start " + ddlString + ": " + str(startTime), report)
            result = session.query(ddlString)
            stopTime = datetime.datetime.now()
            resultString = "Completed " + ddlString + ": " + str(stopTime) + " Elapsed Time: " + str(
                stopTime - startTime)
            uniInfoLog(resultString, report)
            if emailAddress:
                Email.sendEmail(emailAddress, ddlString + " Complete", resultString)
        uniInfoLog("Analyze Fact Tables", report)

        for table in factTables:
            ddlString = "analyze " + table
            startTime = datetime.datetime.now()
            uniInfoLog("Start " + ddlString + ": " + str(startTime), report)
            result = session.query(ddlString)
            stopTime = datetime.datetime.now()
            resultString = "Completed " + ddlString + ": " + str(stopTime) + " Elapsed Time: " + str(
                stopTime - startTime)
            uniInfoLog(resultString, report)
            if emailAddress:
                Email.sendEmail(emailAddress, ddlString + " Complete", resultString)

        if (emailAddress):
            messageLines = []
            with open(reportName, "r") as reportMsg:
                for line in reportMsg.readlines():
                    messageLines.append(line)
                message = " ".join(messageLines)
                Email.sendEmail(emailAddress, "Table Analyze Final Report: " + (reportName.split('/')[2])[:-4], message)
예제 #5
0
def partitionTables(master, parts, username, password, database, orientation, byPart, compression, rowGroupSize,
                    emailAddress=""):
    loggerInfo = buildReportLogger("partitioning")
    reportName = loggerInfo[0]
    report = loggerInfo[1]
    startString = "Partitioning Tables into " + str(parts) + " Day Partitions in " + orientation + " Format"
    uniInfoLog(startString, report)
    if orientation.upper() == "PARQUET":
        # orientation = "PARQUET,ROWGROUPSIZE=1073741823,COMPRESSTYPE=snappy"
        orientation = "PARQUET,ROWGROUPSIZE=" + rowGroupSize + ",COMPRESSTYPE=" + compression

    hawqURI = queries.uri(master, port=5432, dbname=database, user=username, password=password)
    if byPart:
        loadList = sorted(glob.glob('./hawq-ddl/load-partbypart/*.sql'))
    else:
        loadList = sorted(glob.glob('./hawq-ddl/load-part/*.sql'))

    tableList = sorted(glob.glob('./hawq-ddl/hawq-part/*.sql'))
    with queries.Session(hawqURI) as session:
        for table in tableList:
            ddlFile = open(table, "r")
            tableName = (table.split("/")[3]).split(".")[0]
            createStatus = "Creating Table: " + tableName
            uniInfoLog(createStatus, report)
            tableDDL = ddlFile.read()
            tableDDL = tableDDL.replace("$PARTS", parts)
            tableDDL = tableDDL.replace("$ORIENTATION", orientation)
            result = session.query(tableDDL)
            createStatus = "Table Created: " + tableName
            uniInfoLog(createStatus, report)
            if emailAddress:
                Email.sendEmail(emailAddress, createStatus, createStatus)


    # Hard Coded for now because Schema is HardCoded as well
    startDate = 2450815
    endDate = 2453005
    totalDays = endDate - startDate

    for load in loadList:
        ddlFile = open(load, "r")
        loadDDL = ddlFile.read()
        if byPart:
            tableName = ((load.split("/")[3]).split(".")[0])[:-13]
        else:
            tableName = ((load.split("/")[3]).split(".")[0])[:-5]
        loadStatus = "Loading: " + tableName
        uniInfoLog(loadStatus, report)
        ddlFile = open(load, "r")
        loadDDL = ddlFile.read()
        if byPart:
            partCount = getPartitionCount(master, database, username, password, "inventory")
            partStart = startDate

            for partNum in range(2, partCount + 1):
                modDDL = loadDDL
                # with queries.Session(hawqURI) as session:

                partName = tableName + "_1_prt_" + str(partNum)
                # End of part is num days in the part added to the first day
                partEnd = partStart + (int(parts) - 1)
                modDDL = modDDL.replace("$PARTNAME", str(partName))
                modDDL = modDDL.replace("$PARTVALUE1", str(partStart))
                modDDL = modDDL.replace("$PARTVALUE2", str(partEnd))

                with queries.Session(hawqURI) as session:
                    result = session.query(modDDL)
                partStart = partEnd + 1
                createStatus = "Table Partition Loaded: " + partName
                uniInfoLog(createStatus, report)
            createStatus = "Table Loaded: " + tableName
            uniInfoLog(createStatus, report)
            if emailAddress:
                Email.sendEmail(emailAddress, createStatus, createStatus)
                # alterniatve
                # SELECT partitionboundary, partitiontablename, partitionname, partitionlevel, partitionrank FROM pg_partitions WHERE tablename='catalog_returns';

        else:
            with queries.Session(hawqURI) as session:
                result = session.query(loadDDL)
            createStatus = "Table Loaded: " + tableName
            uniInfoLog(createStatus, report)
            if emailAddress:
                Email.sendEmail(emailAddress, createStatus, createStatus)

    if (emailAddress):
        messageLines = []
        with open(reportName, "r") as reportMsg:
            for line in reportMsg.readlines():
                messageLines.append(line)
            message = " ".join(messageLines)
            Email.sendEmail(emailAddress, "Repartition Final Report: " + (reportName.split('/')[2])[:-4], message)