Пример #1
0
def getCoordinatesMinMax_bis(dir,sc):
    #if there is csv written in the path, then it's only one file
    if "csv" in dir :
        rdd=sc.textFile(dir)
    #else we should work with all the files
    else :
        rdd = sc.textFile(dir+"/*.csv")

    # the ra and decl are line[6] and line[9]
    result =  rdd.filter(lambda line: len(line) > 0) \
        .map(lambda line: line.split(',')) \
        .map(lambda line :( 1, [mymath.getL(float(line[6]), float(line[9])), mymath.getL(float(line[6]), float(line[9])) \
        , mymath.getB(float(line[6]), float(line[9])), mymath.getB(float(line[6]), float(line[9]))])) \
        .reduceByKey(lambda x,y: [min(x[0],y[0]),max(x[1],y[1]),min(x[2],y[2]),max(x[3],y[3])] ).collect()
    return result[0][1]
Пример #2
0
def partitioning_V3(dir,dir_result,sc,dict):
    # if there is csv written in the path, then it's only one file
    if "csv" in dir:
        rdd=sc.textFile(dir)
    else :
        rdd = sc.textFile(dir+"/*.csv")

    return rdd.filter(lambda line: len(line) > 0)\
    .map(lambda line : [ line, line.split(",")[6] , line.split(",")[9] ] )\
    .map(lambda x :  dict.get_block_number_with_margins(mymath.getL(float(x[1]), float(x[2])),
                                                        mymath.getB(float(x[1]), float(x[2])), x[0]))\
    .flatMap(lambda x : x.split("_") )\
    .map(lambda x : x.split(':')) \
        .map(lambda x: (int(x[0]), x[1])) \
        .partitionBy(len(dict.dictOfCoord)) \
        .saveAsHadoopFile(dir_result, "org.apache.hadoop.mapred.TextOutputFormat" )
Пример #3
0
def getNbLinePerPatition_V3(dir,sc,dict):
    # if there is csv written in the path, then it's only one file
    if "csv" in dir:
        rdd = sc.textFile(dir)
    else:
        rdd = sc.textFile(dir + "/*.csv")

    tab=rdd.filter(lambda line: len(line) > 0) \
        .map(lambda line: [ line.split(",")[6], line.split(",")[9]]) \
        .map(lambda x: dict.get_block_number_with_margins(mymath.getL(float(x[0]), float(x[1])),
                                                          mymath.getB(float(x[0]), float(x[1])), 1)) \
        .flatMap(lambda x: x.split("_")) \
        .map(lambda x: x.split(':')) \
        .map(lambda x : (int(x[0]),1))\
        .sortByKey()\
        .reduceByKey(lambda x,y : x+y).collect()
    return tab