示例#1
0
def aggregate(rdd, op=None):
    rdd = rdd.reduceByKey(op) if op else average_by_key(rdd)
    rdd = rdd.map(lambda ((_experiment, second), value): (second, value))
    rdd = average_by_key(rdd)
    rdd = rdd.map(lambda (second, value): (second, int(value)))
    rdd = rdd.sortByKey()
    data = rdd.collect()
    return zip(*data)
示例#2
0
def process(sc, series_dir):
    log_paths = fetch_log_paths(series_dir)
    rdd = sc.parallelize(log_paths)
    rdd = rdd.flatMap(parse_log_file)
    rdd = average_by_key(rdd)
    rdd = rdd.map(lambda ((_experiment, measurement), value): (measurement, value))
    rdd = average_by_key(rdd)
    rdd = rdd.sortByKey()
    return zip(*rdd.collect())
示例#3
0
def aggregate(rdd):
    rdd = rdd.filter(lambda (_key, value): value > -10000)
    rdd = rdd.reduceByKey(min)
    rdd = rdd.map(lambda ((_experiment, second), value): (second, value))
    rdd = average_by_key(rdd)
    rdd = rdd.map(lambda (second, value): (second, value))
    rdd = rdd.sortByKey()
    data = rdd.collect()
    return zip(*data)
def aggregate(rdd):
    rdd = rdd.map(lambda (key, value): (key, 1) if value < 10 else (key, 0))
    rdd = rdd.reduceByKey(add)
    rdd = rdd.map(lambda ((_experiment, second), value): (second, value))
    rdd = average_by_key(rdd)
    rdd = rdd.map(lambda (second, value): (second, int(value)))
    rdd = rdd.sortByKey()
    data = rdd.collect()
    return zip(*data)
示例#5
0
def process(sc, logs_dir):
    log_paths = fetch_log_paths(logs_dir)
    rdd = sc.parallelize(log_paths)
    rdd = rdd.flatMap(parse_log_file)
    rdd = rdd.filter(lambda ((_nodes_count, _experiment, measurement), _value):
                     50 < measurement < 250)
    rdd = rdd.map(lambda ((nodes_count, experiment, _measurement), value):
                  ((nodes_count, experiment), value))
    rdd = rdd.reduceByKey(add)
    rdd = rdd.map(lambda ((nodes_count, _experiment), value):
                  (nodes_count, value / 200))
    rdd = average_by_key(rdd)
    rdd = rdd.sortByKey()
    return zip(*rdd.collect())