def generateData(scale, base, namenode, tableName=""): loggerInfo = buildReportLogger("gen") reportName = loggerInfo[0] report = loggerInfo[1] trueScale = scaleConverter(scale) uniInfoLog("Creating Dataset for " + str(trueScale), report) print str(trueScale) if (Hadoop.ls(base))[0] == -1: result = Hadoop.mkdir(base) if result[0] < 0: print result[1] exit() else: print "Base Directory already exists. Please change and rerun" exit() buildGen() os.chdir(workingDir + "/tpcds-gen") for file in glob.glob("target/*.jar"): jarFile = file uniInfoLog("Data Generation MapRed Job Starting", report) # TABLE GEN DOESN"T WORK, SO COMMENTING THIS OUT TO KEEP FROM RUNNING IT # if tableName: # result = Hadoop.runTable(jarFile, scale, base, tableName) # else: # result = Hadoop.run(jarFile, scale, base) result = Hadoop.run(jarFile, str(trueScale), base) uniInfoLog("Data Generation MapRed Job Complete", report) uniInfoLog("Changing Replication Factor of RawData to 2") result = Hadoop.setrep(2, base)
def capacityReport(namenode, hdfsDir): results = Hadoop.size(hdfsDir) print results[1]