Exemplo n.º 1
0
def prepare(params):
    d = datasets.source
    caption = options.caption % dict(caption=d.caption,
                                     hashlabel=options.hashlabel)
    prev_p = job_params(datasets.previous, default_empty=True)
    prev_source = prev_p.datasets.source
    if len(d.chain(stop_jobid=prev_source, length=options.length)) == 1:
        filename = d.filename
    else:
        filename = None
    dws = []
    previous = datasets.previous
    for sliceno in range(params.slices):
        if options.as_chain and sliceno == params.slices - 1:
            name = "default"
        else:
            name = str(sliceno)
        dw = DatasetWriter(
            caption="%s (slice %d)" % (caption, sliceno),
            hashlabel=options.hashlabel,
            filename=filename,
            previous=previous,
            name=name,
            for_single_slice=sliceno,
        )
        previous = (params.jobid, name)
        dws.append(dw)
    names = []
    for n, c in d.columns.items():
        # names has to be in the same order as the add calls
        # so the iterator returns the same order the writer expects.
        names.append(n)
        for dw in dws:
            dw.add(n, c.type)
    return dws, names, prev_source, caption, filename
def synthesis(params):
	dw = DatasetWriter(name="parent")
	in_parent = [ # list because order matters
		"-",      # becomes _ because everything must be a valid python identifier.
		"a b",    # becomes a_b because everything must be a valid python identifier.
		"42",     # becomes _42 because everything must be a valid python identifier.
		"print",  # becomes print_ because print is a keyword (in py2).
		"print@", # becomes print__ because print_ is taken.
		"None",   # becomes None_ because None is a keyword (in py3).
	]
	for colname in in_parent:
		dw.add(colname, "unicode")
	w = dw.get_split_write()
	w(_="- 1", a_b="a b 1", _42="42 1", print_="print 1", None_="None 1", print__="Will be overwritten 1")
	w(_="- 2", a_b="a b 2", _42="42 2", print_="print 2", None_="None 2", print__="Will be overwritten 2")
	parent = dw.finish()
	dw = DatasetWriter(name="child", parent=parent)
	in_child = [ # order still matters
		"print_*", # becomes print___ because print__ is taken.
		"print_",  # becomes print____ because all shorter are taken.
		"normal",  # no collision.
		"Normal",  # no collision.
		"print@",  # re-uses print__ from the parent dataset.
	]
	for colname in in_child:
		dw.add(colname, "unicode")
	w = dw.get_split_write()
	w(print__="print@ 1", print___="print_* 1", print____="print_ 1", normal="normal 1", Normal="Normal 1")
	w(print__="print@ 2", print___="print_* 2", print____="print_ 2", normal="normal 2", Normal="Normal 2")
	child = dw.finish()
	for colname in in_parent + in_child:
		data = set(child.iterate(None, colname))
		assert data == {colname + " 1", colname + " 2"}, "Bad data for %s: %r" % (colname, data)
Exemplo n.º 3
0
def synthesis(prepare_res, params):
    dw_passed, _ = prepare_res
    # Using set_slice on a dataset that was written in analysis is not
    # actually supported, but since it currently works (as long as that
    # particular slice wasn't written in analysis) let's test it.
    dw_passed.set_slice(0)
    dw_passed.write(**{k: v[0] for k, v in test_data.data.items()})
    dw_synthesis_split = DatasetWriter(name="synthesis_split", hashlabel="a")
    dw_synthesis_split.add("a", "int32")
    dw_synthesis_split.add("b", "unicode")
    dw_synthesis_split.get_split_write()(1, "a")
    dw_synthesis_split.get_split_write_list()([2, "b"])
    dw_synthesis_split.get_split_write_dict()({"a": 3, "b": "c"})
    dw_synthesis_manual = DatasetWriter(name="synthesis_manual",
                                        columns={"sliceno": "int32"})
    dw_nonetest = DatasetWriter(name="nonetest",
                                columns={t: t
                                         for t in test_data.data})
    for sliceno in range(params.slices):
        dw_synthesis_manual.set_slice(sliceno)
        dw_synthesis_manual.write(sliceno)
        dw_nonetest.set_slice(sliceno)
        dw_nonetest.write(
            **{
                k: v[0] if k in test_data.not_none_capable else None
                for k, v in test_data.data.items()
            })
Exemplo n.º 4
0
def prepare(params):
    assert params.slices >= test_data.value_cnt
    dw_default = DatasetWriter()
    dw_default.add("a", "number")
    dw_default.add("b", "ascii")
    DatasetWriter(name="named", columns={"c": "bool", "d": "date"})
    dw_passed = DatasetWriter(name="passed",
                              columns={t: t
                                       for t in test_data.data})
    return dw_passed, 42
Exemplo n.º 5
0
def synthesis(jobid):
    manual_chain = [Dataset(jobids.selfchain, name) for name in "abcdefgh"]
    manual_abf = [manual_chain[0], manual_chain[1], manual_chain[5]]
    # build a local abf chain
    prev = None
    for ix, ds in enumerate(manual_abf):
        name = "abf%d" % (ix, )
        ds.link_to_here(name, override_previous=prev)
        prev = (
            jobid,
            name,
        )
    manual_abf_data = list(Dataset.iterate_list(None, None, manual_abf))
    local_abf_data = list(Dataset(jobid, "abf2").iterate_chain(None, None))
    assert manual_abf_data == local_abf_data
    # disconnect h, verify there is no chain
    manual_chain[-1].link_to_here("alone", override_previous=None)
    assert len(Dataset(jobid, "alone").chain()) == 1
    # check that the original chain is unhurt
    assert manual_chain == manual_chain[-1].chain()

    # So far so good, now make a chain long enough to have a cache.
    prev = None
    ix = 0
    going = True
    while going:
        if prev and "cache" in Dataset(prev)._data:
            going = False
        name = "longchain%d" % (ix, )
        dw = DatasetWriter(name=name, previous=prev)
        dw.add("ix", "number")
        dw.get_split_write()(ix)
        dw.finish()
        prev = (
            jobid,
            name,
        )
        ix += 1
    # we now have a chain that goes one past the first cache point
    full_chain = Dataset(prev).chain()
    assert "cache" in full_chain[
        -2]._data  # just to check the above logic is correct
    assert "cache" not in full_chain[-1]._data  # just to be sure..
    full_chain[-2].link_to_here("nocache", override_previous=None)
    full_chain[-1].link_to_here("withcache", override_previous=full_chain[-3])
    assert "cache" not in Dataset(jobid, "nocache")._data
    assert "cache" in Dataset(jobid, "withcache")._data
    # And make sure they both get the right data too.
    assert list(Dataset(prev).iterate_chain(None, "ix")) == list(range(ix))
    assert list(Dataset(jobid, "nocache").iterate_chain(None,
                                                        "ix")) == [ix - 2]
    assert list(Dataset(jobid, "withcache").iterate_chain(
        None, "ix")) == list(range(ix - 2)) + [ix - 1]
Exemplo n.º 6
0
def prepare(params):
    dws = {}
    prev = None
    for name in "abcdefgh":
        dw = DatasetWriter(name=name, previous=prev)
        dw.add("ds", "ascii")
        dw.add("num", "number")
        dws[name] = dw
        prev = "%s/%s" % (
            params.jobid,
            name,
        )
    return dws
def prepare():
	from dataset import DatasetWriter
	# previous allows chaining this method, should you wish to do so
	dw = DatasetWriter(previous=datasets.previous)
	dw.add('a string', 'ascii')  # ascii is not "any string", use 'unicode' for that
	dw.add('large number', 'number') # number is any (real) number, a float or int of any size
	dw.add('small number', 'number')
	dw.add('small integer', 'int32') # int32 is a signed 32 bit number
	dw.add('gauss number', 'number')
	dw.add('gauss float', 'float64') # float64 is what many other languages call double
	return dw
Exemplo n.º 8
0
def prepare(params):
    assert params.slices >= 2, "Hashing won't do anything with just one slice"
    dws = DotDict()
    for name, hashlabel in (
        ("unhashed_manual", None),  # manually interlaved
        ("unhashed_split", None),  # split_write interlaved
        ("up_checked", "up"),  # hashed on up using dw.hashcheck
        ("up_split", "up"),  # hashed on up using split_write
        ("down_checked", "down"),  # hashed on down using dw.hashcheck
        ("down_discarded", "down"),  # hashed on down using discarding writes
        ("down_discarded_list",
         "down"),  # hashed on down using discarding list writes
        ("down_discarded_dict",
         "down"),  # hashed on down using discarding dict writes
    ):
        dw = DatasetWriter(name=name, hashlabel=hashlabel)
        dw.add("up", "int32")
        dw.add("down", "int32")
        dws[name] = dw
    return dws
Exemplo n.º 9
0
def prepare():
    dw = DatasetWriter()
    dw.add("str", "ascii")
    dw.add("num", "number")
    return dw
def prepare():
    dw = DatasetWriter(parent=datasets.source)
    dw.add('prod', 'number')  # works for float as well as int
    return dw
Exemplo n.º 11
0
def prepare():
    dw = DatasetWriter(previous=datasets.previous)
    dw.add('rflt', 'float64')
    dw.add('rint', 'int64')
    return dw