def getMnaFeature(): with open(ut.getFileLocation(outputPath, featureMnaFilename), "w") as fo: with open(ut.getFileLocation(outputPath, featureRankFilename), "r") as fi: for line in fi: data = line.strip().split(" ") gt = data[0] features = data[2:-2] fo.write(" ".join([gt]+features)+"\n")
def getNmFeature(): with open(ut.getFileLocation(outputPath, featureNmFilename), "w") as fo: with open(ut.getFileLocation(outputPath, featureRankFilename), "r") as fi: for line in fi: data = line.strip().split(" ") gt = data[0] features = [f.split(":")[1] for f in data[2:4]] fo.write(",".join([gt]+features)+"\n")
def getMnaFeature(): with open(ut.getFileLocation(outputPath, featureMnaFilename), "w") as fo: with open(ut.getFileLocation(outputPath, featureRankFilename), "r") as fi: for line in fi: data = line.strip().split(" ") gt = data[0] features = data[2:-2] fo.write(" ".join([gt] + features) + "\n")
def getNmFeature(): with open(ut.getFileLocation(outputPath, featureNmFilename), "w") as fo: with open(ut.getFileLocation(outputPath, featureRankFilename), "r") as fi: for line in fi: data = line.strip().split(" ") gt = data[0] features = [f.split(":")[1] for f in data[2:4]] fo.write(",".join([gt] + features) + "\n")
def getSvmFeature(): with open(ut.getFileLocation(outputPath, featureSvmFilename), "w") as fo: with open(ut.getFileLocation(outputPath, featureRankFilename), "r") as fi: for line in fi: data = line.strip().split(" ") gt = data[0] gid = data[1].split(":")[1] tid = data[-1] features = [f.split(":")[1] for f in data[2:-2]] if len(features)<50: features+=["0"]*(50-len(features)) fo.write(",".join([gt,gid,tid]+features)+"\n")
def getSvmFeature(): with open(ut.getFileLocation(outputPath, featureSvmFilename), "w") as fo: with open(ut.getFileLocation(outputPath, featureRankFilename), "r") as fi: for line in fi: data = line.strip().split(" ") gt = data[0] gid = data[1].split(":")[1] tid = data[-1] features = [f.split(":")[1] for f in data[2:-2]] if len(features) < 50: features += ["0"] * (50 - len(features)) fo.write(",".join([gt, gid, tid] + features) + "\n")
def mnaConstraint(n=1558): scores = list() with open(ut.getFileLocation(predPath, predictionMnaOriginFilename), "r") as fi: fi.readline() for line in fi: scores.append(float(line.strip().split()[1])) oneMapping(scores, predictionMnaConstraintFilename, n)
def main(): spark = utility.createSparkSession("Attunity EH Archiver to Data Lake") sliceDt = sys.argv[1] data_source = sys.argv[2] file_location = utility.getFileLocation(data_source, sliceDt) print file_location base = utility.getBase(spark, file_location) ub = utility.getunpackBody(spark, base) if ub.count() > 0: # Attunity specific stuff tableList = getTableList(spark, ub) sch = getConfiguredTableList() for tnameRow in tableList: try: getunpackBodyData( spark, ub, tnameRow['tname'], getConfiguredTableSchemaDefinition(sch, tnameRow['tname'])) print "Successful: %s" % (tnameRow['tname']) except: print "Failed: %s" % (tnameRow['tname']) raise else: print "Source file %s and all subdirectories are Empty" % ( file_location) spark.stop()
def main(): sliceDt = sys.argv[1] file_location = utility.getFileLocation(DATA_SOURCE,sliceDt) print file_location spark = utility.createSparkSession("Tealium EH Archiver to Data Lake") base = utility.getBase(spark,file_location) ub = utility.getunpackBody(spark,base) if ub.count() > 0: unpackBodyData = spark.read.json((ub.rdd.map(extractData))) unpackBodyData.write.saveAsTable(TARGET_TABLE_NAME,mode="append") else: print "Source file %s and all subdirectories are Empty" %(file_location) spark.stop()