def synthesis(params): ds = write(data) for colname in data[0]: verify(params.slices, data, ds, hashlabel=colname) # ok, all the hashing stuff works out, let's test the chaining options. bonus_ds = write(bonus_data, name="bonus", previous=ds) # no chaining options - full chain verify(params.slices, data + bonus_data, bonus_ds, hashlabel="date") # just the bonus ds verify(params.slices, bonus_data, bonus_ds, hashlabel="date", length=1) # built as a chain verify(params.slices, data + bonus_data, bonus_ds, hashlabel="date", as_chain=True) # normal chaining a = verify(params.slices, data, ds, hashlabel="date") b = verify(params.slices, data + bonus_data, bonus_ds, hashlabel="date", previous=a) assert b.chain() == [ a, b ], "chain of %s is not [%s, %s] as expected" % (b, a, b) # as_chain sparseness dw = DatasetWriter(columns=columns, name="empty") dw.get_split_write() ds = verify(params.slices, [], dw.finish(), hashlabel="date", as_chain=True) assert len( ds.chain() ) == 1, ds + ": dataset_hashpart on empty dataset with as_chain=True did not produce a single dataset" # two populated slices with the same data, should end up in two datasets. dw = DatasetWriter(columns=columns, name="0 and 2") dw.set_slice(0) dw.write_dict(data[0]) dw.set_slice(1) dw.set_slice(2) dw.write_dict(data[0]) for s in range(3, params.slices): dw.set_slice(s) ds = verify(params.slices, [data[0]], dw.finish(), hashlabel="date", as_chain=True) got_slices = len(ds.chain()) assert got_slices == 2, "%s (built with as_chain=True) has %d datasets in chain, expected 2." % ( ds, got_slices, )
def analysis(sliceno, prepare_res, job): dw_default = job.datasetwriter() dw_named = DatasetWriter(name="named") dw_passed, num = prepare_res dw_default.write(a=sliceno, b="a") dw_default.write_list([num, str(sliceno)]) dw_named.write(True, date(1536, 12, min(sliceno + 1, 31))) dw_named.write_dict({"c": False, "d": date(2236, 5, min(sliceno + 1, 31))}) # slice 0 is written in synthesis if 0 < sliceno < test_data.value_cnt: dw_passed.write_dict( {k: v[sliceno] for k, v in test_data.data.items()})