def run_job_with_params(repo, job_params): drop_all_collections(repo) store_json(repo, 'nikolaj.params', job_params) storeT.run() storeBus.run() combine_t_bus.run() geoagg.run() pagerank.run()
def run_job_with_params(repo, job_params, doc): drop_derived_collections(repo) store_json(repo, 'nikolaj.params', job_params) startTime, _, endTime = geoagg.run() doc.update(geoagg.to_prov(startTime, endTime, job_params[0])) startTime, _, endTime = pagerank.run() doc.update(pagerank.to_prov(startTime, endTime, job_params[1])) if __name__ == "__main__": repo = get_auth_repo('nikolaj', 'nikolaj') doc = prov.model.ProvDocument() startTime, _, endTime = storeT.run() doc.update(storeT.to_prov(startTime, endTime)) startTime, _, endTime = storeBus.run() doc.update(storeBus.to_prov(startTime, endTime)) t_only_params = [{ "id": "geoagg_params", "maxDistance": 0, "output_col_name": "nikolaj.stops_with_neighs_t_only", "input_cols": ["nikolaj.raw_t_stops"], "routeUnion": ["$routes", "$geo_neigh_routes"], "neighUnion": ["$neighs", "$geo_neighs"] }, { "id": "pagerank_params", "input_col_name": "nikolaj.stops_with_neighs_t_only", "output_col_name": "nikolaj.pagerank_result_t_only" }]
def run_job_with_params(repo, job_params, doc): drop_derived_collections(repo) store_json(repo, 'nikolaj.params', job_params) startTime, _, endTime = geoagg.run() doc.update(geoagg.to_prov(startTime, endTime, job_params[0])) startTime, _, endTime = pagerank.run() doc.update(pagerank.to_prov(startTime, endTime, job_params[1])) if __name__ == "__main__": repo = get_auth_repo('nikolaj', 'nikolaj') doc = prov.model.ProvDocument() startTime, _, endTime = storeT.run() doc.update(storeT.to_prov(startTime, endTime)) startTime, _, endTime = storeBus.run() doc.update(storeBus.to_prov(startTime, endTime)) t_only_params = [ { "id" : "geoagg_params", "maxDistance" : 0, "output_col_name": "nikolaj.stops_with_neighs_t_only", "input_cols": [ "nikolaj.raw_t_stops" ], "routeUnion" : [ "$routes", "$geo_neigh_routes" ], "neighUnion" : [ "$neighs", "$geo_neighs" ] }, { "id" : "pagerank_params", "input_col_name": "nikolaj.stops_with_neighs_t_only", "output_col_name" : "nikolaj.pagerank_result_t_only" } ] t_500walk_params = [ { "id" : "geoagg_params", "maxDistance" : 500, "output_col_name": "nikolaj.stops_with_neighs_t_500walk", "input_cols": [ "nikolaj.raw_t_stops" ], "routeUnion" : [ "$routes", "$geo_neigh_routes" ], "neighUnion" : [ "$neighs", "$geo_neighs" ] }, { "id" : "pagerank_params", "input_col_name": "nikolaj.stops_with_neighs_t_500walk", "output_col_name" : "nikolaj.pagerank_result_t_500walk" } ] t_500walk_bus_params = [ { "id" : "geoagg_params", "maxDistance" : 500, "output_col_name": "nikolaj.stops_with_neighs_t_500walk_bus", "input_cols": [ "nikolaj.raw_t_stops", "nikolaj.raw_bus_stops" ], "routeUnion" : [ "$routes", "$geo_neigh_routes" ], "neighUnion" : [ "$neighs", "$geo_neighs" ] }, { "id" : "pagerank_params", "input_col_name": "nikolaj.stops_with_neighs_t_500walk_bus", "output_col_name" : "nikolaj.pagerank_result_t_500walk_bus" }