def test_data(tmpdir): data_path = str(tmpdir.mkdir("data")) testdata.create(data_path) cache_path = str(tmpdir.mkdir("cache")) output_path = str(tmpdir.mkdir("output")) config = dict(data_path=data_path, output_path=output_path, regions=[10, 11], hts="entd") stages = [ dict(descriptor="data.spatial.iris"), dict(descriptor="data.spatial.codes"), dict(descriptor="data.spatial.population"), dict(descriptor="data.bpe.cleaned"), dict(descriptor="data.income.municipality"), dict(descriptor="data.hts.entd.cleaned"), dict(descriptor="data.hts.egt.cleaned"), dict(descriptor="data.census.cleaned"), dict(descriptor="data.od.cleaned"), dict(descriptor="data.hts.output"), dict(descriptor="data.sirene.output"), ] synpp.run(stages, config, working_directory=cache_path) assert os.path.isfile("%s/ile_de_france_hts_households.csv" % output_path) assert os.path.isfile("%s/ile_de_france_hts_persons.csv" % output_path) assert os.path.isfile("%s/ile_de_france_hts_trips.csv" % output_path) assert os.path.isfile("%s/ile_de_france_sirene.gpkg" % output_path)
def test_visualize_stages_io(tmpdir): flowchart_path = tmpdir.mkdir("sub") + "/data.json" synpp.run([{ "descriptor": "tests.fixtures.visualization.stage_3" }], dryrun=True, flowchart_path=flowchart_path) with open(flowchart_path) as json_file: data = json.load(json_file) assert data['nodes'] == [{ "id": "tests.fixtures.visualization.stage_3" }, { "id": "tests.fixtures.visualization.stage_1" }, { "id": "tests.fixtures.visualization.stage_2" }] assert data['links'] == [{ 'source': 'tests.fixtures.visualization.stage_1', 'target': 'tests.fixtures.visualization.stage_3', 'key': 0 }, { 'source': 'tests.fixtures.visualization.stage_1', 'target': 'tests.fixtures.visualization.stage_2', 'key': 0 }, { 'source': 'tests.fixtures.visualization.stage_2', 'target': 'tests.fixtures.visualization.stage_3', 'key': 0 }]
def test_devalidate_descendants(tmpdir): working_directory = tmpdir.mkdir("sub") result = synpp.run([{ "descriptor": "tests.fixtures.devalidation.C" }], config={"a": 1}, working_directory=working_directory, verbose=True) assert "tests.fixtures.devalidation.A1__42b7b4f2921788ea14dac5566e6f06d0" in result[ "stale"] assert "tests.fixtures.devalidation.A2" in result["stale"] assert "tests.fixtures.devalidation.B__42b7b4f2921788ea14dac5566e6f06d0" in result[ "stale"] assert "tests.fixtures.devalidation.C__42b7b4f2921788ea14dac5566e6f06d0" in result[ "stale"] result = synpp.run([{ "descriptor": "tests.fixtures.devalidation.C" }, { "descriptor": "tests.fixtures.devalidation.A2" }], config={"a": 1}, working_directory=working_directory, verbose=True) assert not "tests.fixtures.devalidation.A1__42b7b4f2921788ea14dac5566e6f06d0" in result[ "stale"] assert "tests.fixtures.devalidation.A2" in result["stale"] assert "tests.fixtures.devalidation.B__42b7b4f2921788ea14dac5566e6f06d0" in result[ "stale"] assert "tests.fixtures.devalidation.C__42b7b4f2921788ea14dac5566e6f06d0" in result[ "stale"]
def test_simulation(tmpdir): data_path = str(tmpdir.mkdir("data")) testdata.create(data_path) cache_path = str(tmpdir.mkdir("cache")) output_path = str(tmpdir.mkdir("output")) config = dict(data_path=data_path, output_path=output_path, regions=[10, 11], sampling_rate=1.0, hts="entd", random_seed=1000, processes=1) stages = [dict(descriptor="matsim.output")] synpp.run(stages, config, working_directory=cache_path) assert os.path.isfile("%s/ile_de_france_population.xml.gz" % output_path) assert os.path.isfile("%s/ile_de_france_network.xml.gz" % output_path) assert os.path.isfile("%s/ile_de_france_transit_schedule.xml.gz" % output_path) assert os.path.isfile("%s/ile_de_france_transit_vehicles.xml.gz" % output_path) assert os.path.isfile("%s/ile_de_france_households.xml.gz" % output_path) assert os.path.isfile("%s/ile_de_france_facilities.xml.gz" % output_path)
def run_population(tmpdir, hts): data_path = str(tmpdir.mkdir("data")) testdata.create(data_path) cache_path = str(tmpdir.mkdir("cache")) output_path = str(tmpdir.mkdir("output")) config = dict(data_path=data_path, output_path=output_path, regions=[10, 11], sampling_rate=1.0, hts="entd", random_seed=1000, processes=1, secloc_maximum_iterations=10, maven_skip_tests=True) stages = [ dict(descriptor="synthesis.output"), ] synpp.run(stages, config, working_directory=cache_path) assert os.path.isfile("%s/activities.csv" % output_path) assert os.path.isfile("%s/persons.csv" % output_path) assert os.path.isfile("%s/households.csv" % output_path) assert os.path.isfile("%s/activities.gpkg" % output_path) assert os.path.isfile("%s/trips.gpkg" % output_path) assert os.path.isfile("%s/meta.json" % output_path)
def test_ephemeral_BD(tmpdir): working_directory = tmpdir.mkdir("cache") result = synpp.run([{ "descriptor": "tests.fixtures.ephemeral.D" }, { "descriptor": "tests.fixtures.ephemeral.B" }], working_directory=working_directory, verbose=True) assert "tests.fixtures.ephemeral.A" in result["stale"] assert "tests.fixtures.ephemeral.B" in result["stale"] assert "tests.fixtures.ephemeral.C" in result["stale"] assert "tests.fixtures.ephemeral.D" in result["stale"] result = synpp.run([{ "descriptor": "tests.fixtures.ephemeral.D" }], working_directory=working_directory, verbose=True) assert not "tests.fixtures.ephemeral.A" in result["stale"] assert not "tests.fixtures.ephemeral.B" in result["stale"] assert "tests.fixtures.ephemeral.C" in result["stale"] assert "tests.fixtures.ephemeral.D" in result["stale"]
def test_determinism(tmpdir): data_path = str(tmpdir.mkdir("data")) testdata.create(data_path) md5sums = [] for index in range(2): cache_path = str(tmpdir.mkdir("cache_%d" % index)) output_path = str(tmpdir.mkdir("output_%d" % index)) config = dict(data_path=data_path, output_path=output_path, regions=[10, 11], sampling_rate=1.0, hts="entd", random_seed=1000, processes=1) stages = [ dict(descriptor="synthesis.output"), dict(descriptor="matsim.output"), ] synpp.run(stages, config, working_directory=cache_path) files = [ "%s/activities.csv" % output_path, "%s/persons.csv" % output_path, "%s/households.csv" % output_path, #"%s/activities.gpkg" % output_path, #"%s/trips.gpkg" % output_path, #"%s/meta.json" % output_path "%s/ile_de_france_population.xml.gz" % output_path, "%s/ile_de_france_network.xml.gz" % output_path, #"%s/ile_de_france_transit_schedule.xml.gz" % output_path, #"%s/ile_de_france_transit_vehicles.xml.gz" % output_path, "%s/ile_de_france_households.xml.gz" % output_path, "%s/ile_de_france_facilities.xml.gz" % output_path, "%s/ile_de_france_config.xml" % output_path ] hash = hashlib.md5() for file in files: # Gzip saves time stamps, so the gzipped files are NOT the same! opener = lambda: open(file, "rb") if file.endswith(".gz"): opener = lambda: gzip.open(file) with opener() as f: for chunk in iter(lambda: f.read(4096), b""): hash.update(chunk) f.close() md5sums.append(hash.hexdigest()) for index in range(1, len(md5sums)): assert md5sums[0] == md5sums[1]
def test_devalidate_token(tmpdir): working_directory = tmpdir.mkdir("sub") path = "%s/test.fixture" % working_directory with open(path, "w+") as f: f.write("abcdef") result = synpp.run([{ "descriptor": "tests.fixtures.devalidation.token_b" }], config={"path": path}, working_directory=working_directory, verbose=True) name_a, name_b = None, None for name in result["stale"]: if "token_a" in name: name_a = name if "token_b" in name: name_b = name assert name_a in result["stale"] assert name_b in result["stale"] assert result["results"][0] == "abcdef" result = synpp.run([{ "descriptor": "tests.fixtures.devalidation.token_b" }], config={"path": path}, working_directory=working_directory, verbose=True) assert not name_a in result["stale"] assert name_b in result["stale"] assert result["results"][0] == "abcdef" with open(path, "w+") as f: f.write("uvwxyz") result = synpp.run([{ "descriptor": "tests.fixtures.devalidation.token_b" }], config={"path": path}, working_directory=working_directory, verbose=True) assert name_a in result["stale"] assert name_b in result["stale"] assert result["results"][0] == "uvwxyz"
def _test_determinism(index, data_path, tmpdir): print("Running index %d" % index) cache_path = str(tmpdir.mkdir("cache_%d" % index)) output_path = str(tmpdir.mkdir("output_%d" % index)) config = dict(data_path=data_path, output_path=output_path, regions=[10, 11], sampling_rate=1.0, hts="entd", random_seed=1000, processes=1, secloc_maximum_iterations=10, maven_skip_tests=True) stages = [ dict(descriptor="synthesis.output"), dict(descriptor="matsim.output"), ] synpp.run(stages, config, working_directory=cache_path) REFERENCE_HASHES = { "activities.csv": "4ad84a3d7741ef6182457acecc5b7ea5", "persons.csv": "ed87e2b6dfd2a9914d5fc7b2bf6d52d3", "households.csv": "882ce7dc1a44403d12c5aa10709c0d5b", #"ile_de_france_population.xml.gz": "e1407f918cb92166ebf46ad769d8d085", "ile_de_france_network.xml.gz": "594f427690bb5a7fad001fc2d5e31497", "ile_de_france_households.xml.gz": "cdbd6ed5b175328861f237dc58dee1ff", #"ile_de_france_facilities.xml.gz": "5ad41afff9ae5c470082510b943e6778", "ile_de_france_config.xml": "4f44821d6162dad1928a75e5c0b14f68" } # activities.gpkg, trips.gpkg, meta.json, # ile_de_france_transit_schedule.xml.gz, ile_de_france_transit_vehicles.xml.gz # TODO: Output of the Java part is not deterministic, probably because of # the ordering of persons / facilities. Fix that! Same is true for GPKG. A # detailed inspection of meta.json would make sense! generated_hashes = { file: hash_file("%s/%s" % (output_path, file)) for file in REFERENCE_HASHES.keys() } print("Generated hashes: ", generated_hashes) for file in REFERENCE_HASHES.keys(): assert REFERENCE_HASHES[file] == generated_hashes[file]
def test_cache_path(tmpdir): result = synpp.run([{ "descriptor": "tests.fixtures.cache_path_read" }], working_directory=tmpdir.mkdir("sub")) assert result[0] == "abc_uvw"
def test_visualize_stages_results(): result = synpp.run([{ "descriptor": "tests.fixtures.visualization.stage_3" }], dryrun=True) assert result['nodes'] == [{ "id": "tests.fixtures.visualization.stage_3" }, { "id": "tests.fixtures.visualization.stage_1" }, { "id": "tests.fixtures.visualization.stage_2" }] assert result['links'] == [{ 'source': 'tests.fixtures.visualization.stage_1', 'target': 'tests.fixtures.visualization.stage_3', 'key': 0 }, { 'source': 'tests.fixtures.visualization.stage_1', 'target': 'tests.fixtures.visualization.stage_2', 'key': 0 }, { 'source': 'tests.fixtures.visualization.stage_2', 'target': 'tests.fixtures.visualization.stage_3', 'key': 0 }]
def test_devalidate_by_downstream_config(tmpdir): working_directory = tmpdir.mkdir("sub") result = synpp.run([{ "descriptor": "tests.fixtures.downstream.chain_d" }], working_directory=working_directory, verbose=True) for x in result["stale"]: print(x) assert "tests.fixtures.downstream.chain_a__b1d43cd340a6b095b41ad645446b6800" in result[ "stale"] assert "tests.fixtures.downstream.chain_a__798cc71deef8c6835483eb116d0ce9bd" in result[ "stale"] assert "tests.fixtures.downstream.chain_b__b1d43cd340a6b095b41ad645446b6800" in result[ "stale"] assert "tests.fixtures.downstream.chain_b__798cc71deef8c6835483eb116d0ce9bd" in result[ "stale"] assert "tests.fixtures.downstream.chain_c__b1d43cd340a6b095b41ad645446b6800" in result[ "stale"] assert "tests.fixtures.downstream.chain_c__798cc71deef8c6835483eb116d0ce9bd" in result[ "stale"] assert "tests.fixtures.downstream.chain_d" in result["stale"]
def test_devalidate_by_passed_on_config(tmpdir): working_directory = tmpdir.mkdir("sub") result = synpp.run([{ "descriptor": "tests.fixtures.devalidation.A1" }], config={ "a": 1, "d": 5 }, working_directory=working_directory, verbose=True) assert "tests.fixtures.devalidation.A1__42b7b4f2921788ea14dac5566e6f06d0" in result[ "stale"] result = synpp.run([{ "descriptor": "tests.fixtures.devalidation.D" }], config={ "a": 1, "d": 5 }, working_directory=working_directory, verbose=True) assert "tests.fixtures.devalidation.A1__b1d43cd340a6b095b41ad645446b6800" in result[ "stale"] assert "tests.fixtures.devalidation.D__2ea707fadc0d136c95611cd3de856f0a" in result[ "stale"] result = synpp.run([{ "descriptor": "tests.fixtures.devalidation.D" }], config={ "a": 1, "d": 10 }, working_directory=working_directory, verbose=True) assert "tests.fixtures.devalidation.A1__798cc71deef8c6835483eb116d0ce9bd" in result[ "stale"] assert "tests.fixtures.devalidation.D__7532252d06e50cdf1ddbfe8269a47aa8" in result[ "stale"]
def test_sum_config(): result = synpp.run([{ "descriptor": "tests.fixtures.sum_config" }], config={ "a": 5, "b": 11 }) assert result[0] == 16
def test_sum_params(): result = synpp.run([{ "descriptor": "tests.fixtures.sum_config", "config": { "a": 5, "b": 9 } }]) assert result[0] == 14
def test_devalidate_by_config(tmpdir): working_directory = tmpdir.mkdir("sub") result = synpp.run([{ "descriptor": "tests.fixtures.devalidation.B" }], config={"a": 1}, working_directory=working_directory, verbose=True) assert "tests.fixtures.devalidation.A1__42b7b4f2921788ea14dac5566e6f06d0" in result[ "stale"] assert "tests.fixtures.devalidation.A2" in result["stale"] assert "tests.fixtures.devalidation.B__42b7b4f2921788ea14dac5566e6f06d0" in result[ "stale"] result = synpp.run([{ "descriptor": "tests.fixtures.devalidation.B" }], config={ "a": 1, "b": 1 }, working_directory=working_directory, verbose=True) assert not "tests.fixtures.devalidation.A1__42b7b4f2921788ea14dac5566e6f06d0" in result[ "stale"] assert not "tests.fixtures.devalidation.A2" in result["stale"] assert "tests.fixtures.devalidation.B__42b7b4f2921788ea14dac5566e6f06d0" in result[ "stale"] result = synpp.run([{ "descriptor": "tests.fixtures.devalidation.B" }], config={"a": 2}, working_directory=working_directory, verbose=True) assert "tests.fixtures.devalidation.A1__9f8a8e5ba8c70c774d410a9107e2a32b" in result[ "stale"] assert not "tests.fixtures.devalidation.A2" in result["stale"] assert "tests.fixtures.devalidation.B__9f8a8e5ba8c70c774d410a9107e2a32b" in result[ "stale"]
def test_sum_stages_in_memory(): result = synpp.run([{ "descriptor": "tests.fixtures.sum_stages" }], config={ "a": 5, "b": 11 }) assert result[0] == 11 + 16 + 10
def test_recursive(tmpdir): result = synpp.run([{ "descriptor": "tests.fixtures.recursive", "config": { "a": 5 } }], working_directory=tmpdir.mkdir("sub")) assert result[0] == 15
def test_data(tmpdir): data_path = str(tmpdir.mkdir("data")) testdata.create(data_path) cache_path = str(tmpdir.mkdir("cache")) config = dict(data_path=data_path, regions=[10, 11]) stages = [ dict(descriptor="data.spatial.iris"), dict(descriptor="data.spatial.codes"), dict(descriptor="data.spatial.population"), dict(descriptor="data.bpe.cleaned"), dict(descriptor="data.income.municipality"), dict(descriptor="data.hts.entd.cleaned"), dict(descriptor="data.hts.egt.cleaned"), dict(descriptor="data.census.cleaned"), dict(descriptor="data.od.cleaned") ] synpp.run(stages, config, working_directory=cache_path)
def test_sum_stages_with_files(tmpdir): result = synpp.run([{ "descriptor": "tests.fixtures.sum_stages" }], config={ "a": 5, "b": 11 }, working_directory=tmpdir.mkdir("sub")) assert result[0] == 11 + 16 + 10
def test_info(tmpdir): working_directory = tmpdir.mkdir("sub") result = synpp.run([ { "descriptor": "tests.fixtures.info2" } ], working_directory = working_directory, verbose = True) assert "tests.fixtures.info1" in result["stale"] assert "tests.fixtures.info2" in result["stale"] assert result["info"]["abc"] == "123" assert result["info"]["uvw"] == "789" assert result["info"]["concat"] == "123789" result = synpp.run([ { "descriptor": "tests.fixtures.info2" } ], working_directory = working_directory, verbose = True) assert not "tests.fixtures.info1" in result["stale"] assert "tests.fixtures.info2" in result["stale"] assert result["info"]["abc"] == "123" assert result["info"]["uvw"] == "789" assert result["info"]["concat"] == "123789"
def test_parallel_stage(): result = synpp.run([{"descriptor": "tests.fixtures.parallel_stage"}])[0] assert result == [1321, 2321, 3321, 4321, 5321]
def test_progress_stage(): return synpp.run([{"descriptor": "tests.fixtures.progress_stage"}])
def test_parallel_progress_stage(): synpp.run([{"descriptor": "tests.fixtures.parallel_progress_stage"}])
def test_cycle(): with raises(synpp.PipelineError): synpp.run([{"descriptor": "tests.fixtures.cycle_stage_a"}])