Пример #1
0
def generateData(scale, base, namenode, tableName=""):
    loggerInfo = buildReportLogger("gen")
    reportName = loggerInfo[0]
    report = loggerInfo[1]
    trueScale = scaleConverter(scale)
    uniInfoLog("Creating Dataset for " + str(trueScale), report)
    print str(trueScale)
    if (Hadoop.ls(base))[0] == -1:
        result = Hadoop.mkdir(base)
        if result[0] < 0:
            print result[1]
            exit()
    else:
        print "Base Directory already exists.  Please change and rerun"
        exit()
    buildGen()
    os.chdir(workingDir + "/tpcds-gen")

    for file in glob.glob("target/*.jar"):
        jarFile = file

    uniInfoLog("Data Generation MapRed Job Starting", report)
    # TABLE GEN DOESN"T WORK, SO COMMENTING THIS OUT TO KEEP FROM RUNNING IT
    # if tableName:
    #     result = Hadoop.runTable(jarFile, scale, base, tableName)
    # else:
    #     result = Hadoop.run(jarFile, scale, base)
    result = Hadoop.run(jarFile, str(trueScale), base)
    uniInfoLog("Data Generation MapRed Job Complete", report)
    uniInfoLog("Changing Replication Factor of RawData to 2")
    result = Hadoop.setrep(2, base)
Пример #2
0
def capacityReport(namenode, hdfsDir):
    results = Hadoop.size(hdfsDir)
    print results[1]