def preprocess_data(target, feature_pipeline, submission=False): pool = Pool(SETTINGS.N_jobs) paths = [path for path in generate_mat_cvs(target)] if submission: paths = mask_for_state(paths, state='test') else: paths = mask_for_random_sample(paths) feature_plumbing = FeaturePlumbing(feature_pipeline) results = pool.map(feature_plumbing.run, paths) gar = generate_accumulate_results(results) return wrap_preprocess_to_data(gar, paths)
def do_transformation_pipeline(target, transformations): pipeline = TransformationPipeline(transformations) pool = Pool(SETTINGS.N_jobs) paths = [path for path in generate_mat_cvs(target)] results = pool.map(pipeline.run, paths) return results, paths