def main():
    """ Application main. """

    K = [10]                                        # folds
    N = [3, 4, 5, 6, 7, 8]                          # neighbors
    P = [1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5]         # powers
    C = [0.001 * i for i in range(1, 25)]           # time_scales
    C.extend([0.025 * i for i in range(1, 81)])

    # build the list and then RDD of KFoldConf objects under analysis
    conf_list = [kfold.KFoldConf(k, n, p, None, c)
                 for k in K
                 for n in N
                 for p in P
                 for c in C]
    # add incremental "conf_id" attribute to each KFoldConf object
    for i, conf in enumerate(conf_list):
        conf.conf_id = i
    conf_rdd = SC.parallelize(conf_list, 150).cache()

    # load radius_table and broadcast it
    with open('radius_table.pkl', 'r') as f:
        radius_table = pickle.load(f)
    radius_table_brd = SC.broadcast(radius_table)

    # run learning tasks for each partition
    for i in range(3):
        point_list = load_partition(i)
        point_list_brd = SC.broadcast(point_list)

        def fold(conf):
            """ Return a result tuple for the given configuration. """
            return (i,                                  # partition_id
                    conf,                               # KFoldConf object
                    kfold.mare(conf,                    # MARE statistic
                    kfold.rmspe(conf,                   # RMSPE statistic

        report_rdd = conf_rdd.map(fold).map(report)
        report_rdd.saveAsTextFile('results/partition%02d' % i)

    # collect all results into one rdd, then into one file
    result_rdds = [SC.textFile('results/partition0%d/' % i) for i in range(3)]
    results = result_rdds[0].\
    with open('results.csv', 'w') as output:
        results = map(lambda line: line + '\n', results)
def main():
    """ Application main. """

    N = [3, 4, 5, 6, 7]
    P = [1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5]

    conf_list = [kfold.KFoldConf(10, n, p, None, 0.1086) for n in N for p in P]
    conf_rdd = SC.parallelize(conf_list, 45).cache()

    point_list = point.load_pm25_file('../../data/pm25_2009_measured.csv')
    # The following was used to test execution of this script locally.
    # point_list = point_list[:250]
    point_list_brd = SC.broadcast(point_list)

    def fold(conf):
        return (conf, kfold.mare(conf, point_list_brd),
                kfold.rmspe(conf, point_list_brd))

    report_rdd = conf_rdd.map(fold).map(report)