def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--in_root', default=os.path.join(fs_root(), 'data/learnlarge/merged_parametrized'))
    parser.add_argument('--folds', default=['train', 'val', 'test', 'full'])
    parser.add_argument('--query_dates', type=list, default=[
        '2015-08-14-14-54-57',  # roadworks, overcast
        '2014-11-18-13-20-12',  # sun, clouds
        '2014-12-17-18-18-43',  # night, rain
        '2015-02-03-08-45-10',  # snow
        '2014-06-26-09-24-58'  # overcast, alternate-route (validation area)
    ]
                        )
    args = parser.parse_args()
    print(flags_to_args(args))

    folds = args.folds
    in_root = args.in_root
    query_dates = args.query_dates

    set_aside_queries(in_root, folds, query_dates)
                        default=os.path.join(fs_root(),
                                             'data/learnlarge/shuffled'))
    parser.add_argument('--out_root',
                        default=os.path.join(fs_root(),
                                             'data/learnlarge/clusters'))
    parser.add_argument('--num_clusters',
                        type=dict,
                        default={
                            'train': 7000,
                            'test': 2000,
                            'val': 1000
                        })
    parser.add_argument('--r', type=int, default=5)
    args = parser.parse_args()

    flags_to_args(args)

    in_root = args.in_root
    num_clusters = args.num_clusters
    out_root = args.out_root
    r = args.r
    test_ref_date = args.test_ref_date
    train_ref_date = args.train_ref_date
    val_ref_date = args.val_ref_date

    if not os.path.exists(out_root):
        os.makedirs(out_root)

    for mode in ['ref']:
        for s in ['train', 'val', 'test']:
            cluster(in_root, out_root, s, mode, r)