def gen_data(size, seed): data = { 'a': generate_uniform_float_column(size, 0., 1., seed + 1), 'b': generate_uniform_float_column(size, 0., 1., seed + 2), 'c': generate_uniform_float_column(size, 0., 1., seed + 3) } return DataFrame.from_dict(data)
def gen_data(size, seed): data = { 'a': generate_uniform_float_column(size, 0., 1., seed + 1), 'b': generate_uniform_float_column(size, 0., 1., seed + 2), 'c': generate_uniform_string_column(size, ['a', 'b', 'c'], seed + 3), 'd': generate_uniform_string_column(size, ['e', 'f', 'g'], seed + 4) } return DataFrame.from_dict(data)
def add_noise_to_target(target, seed, threshold=0.05): col = generate_uniform_float_column(len(target), 0., 1., seed + 1) < threshold return target * (1 - col) + (1 - target) * col