def test_engine_urandom(df, df_das_stub): """ Test the engine run with random results. Check counts and that there is some noise, but not large""" config = ConfigParser() config.read_file(io.StringIO(engineurandom + confstringddict + laplacedp)) # Create the engine e = spark_sql_das_engine.engine(config=config, name='engine', das=df_das_stub) original_data = {'original_data': {'inputtable': df}} # Run the engine assert e.willRun() private_tables = e.run(original_data)[0] true_tables = original_data["true_tables"] # Create the engine e1 = spark_sql_das_engine.engine(config=config, name='engine', das=df_das_stub) original_data = {'original_data': {'inputtable': df}} # Run the engine assert e1.willRun() private_tables1 = e1.run(original_data)[0] for tname, table in private_tables.items(): # Check counts assert (table.as_matrix().shape[0] == 4) # Check that tables are identical assert sum( sum( abs(private_tables[tname].as_matrix() - true_tables[tname].as_matrix()))) > 0.1 assert sum( sum( abs(private_tables[tname].as_matrix() - true_tables[tname].as_matrix()))) < 50 assert sum( sum( abs(private_tables[tname].as_matrix() - private_tables1[tname].as_matrix()))) > 0.1 assert sum( sum( abs(private_tables[tname].as_matrix() - private_tables1[tname].as_matrix()))) < 50 assert sum((abs(private_tables[tname].as_matrix() - true_tables[tname].as_matrix()) / private_tables[tname].as_matrix())[:, 0]) < 0.1
def test_engine_create_noisifiers(): """ Test noisifiers creation structure and epsilon calculations""" config = ConfigParser() config.read_file(io.StringIO(variosalgos + config4engine)) e = spark_sql_das_engine.engine(config=config) assert repr(e.noisifiers['t'][0][0]) == algreprs[1] assert repr(e.noisifiers['t'][0][1]) == algreprs[2] assert repr(e.noisifiers['t'][1][0]) == algreprs[3] assert repr( e.noisifiers['t2'][0][0] ) == "SmoothLaplaceAlgorithm:{'varname': 'a', 'alpha': 0.05, 'algorithm': 'SmoothLaplace', 'delta': 0.05, 'epsilon': 0.4}" assert repr(e.noisifiers['t2'][0][1]) == algreprs[1] # Calculate master epsilons: # epsilon set in [engine] is 1.0, every variable is assigned fraction of 0.4 (a in t2 set to 0.3, but re-assigned 0.4 # because it is composable with b) # in table 't': b and c are composable so get 0.4 (no sum), another 0.4 for d - 0.8 total assert e.table_epsilons['t'] == 0.8 # in table 't2': a and b are composable, so 0.4 assert e.table_epsilons['t2'] == 0.4 # and the total is 0.8 + 0.4 = 1.2 assert abs(e.epsilon_effective - 1.2) < 1e-5
def test_engine_reproducible(df, df_das_stub): """ Test the engine run with reproducible results. Check counts that noisy tables are those that come of the seed """ config = ConfigParser() config.read_file(io.StringIO(engineseeded + confstringddict + laplacedp)) original_data = {'original_data': {'inputtable': df}} # Create the engine e = spark_sql_das_engine.engine(config=config, name='engine', das=df_das_stub) # Run the engine assert e.willRun() np.random.seed(e.getint("seed", default=101, section="engine")) private_tables = e.run(original_data)[0] for tname, table in private_tables.items(): # Check counts assert (table.as_matrix().shape[0] == 4) # Check that tables are identical assert sum( sum( abs(private_tables[tname].as_matrix() - np.array( [[100.03334714, 1.46291515], [800.15237662, 9.10199919], [797.13439125, 7.51212961], [898.93010144, 10.54752546]])) )) < 1e-7
def test_noisealg_repr(df_das_stub): """ Test the __repr__ and noisify functions of the NoiseAlgorithms, and their creation by engine""" # Make config config = ConfigParser() config.read_file(io.StringIO(variosalgos)) # Make dummy engine e = spark_sql_das_engine.engine(config=config, name='engine', das=df_das_stub) # Make all the supported noise algorithms algs = [ e.create_noise_algorithm_by_name("t", var) for var in list("abcdef") ] # Compare __repr__ outputs to what they are supposed to be for i, alg in enumerate(algs): assert algreprs[i] == repr(alg) # Test the noisify functions (the answers are calculated by looking at algorithm descriptions, not the code) assert algs[0].noisify([{'a': 100}]) == 100 assert abs(algs[1].noisify([{ 'b': 100, 'ssmax': 200 }, { 'b': .816062 }]) - 150) < 1e-3 assert abs(algs[2].noisify([{ 'c': 100, 'ssmax': 200 }, { 'c': .10972505 }]) - 1406.3203865411335) < 1e-3 assert abs(algs[3].noisify([{ 'd': 100, 'ssmax': 200 }, { 'd': .75 }]) - 122.10774911533412) < 1e-3 assert abs(algs[4].noisify([{'e': 100}, {'e': .816062}]) - 101) < 1e-3 assert abs(algs[5].noisify([{ 'f': 100, 'ssmax': 200, 'sectop': 150, 'numret': 50 }]) - 0) < 1e-3 assert abs(algs[5].noisify([{ 'f': 1000, 'ssmax': 20, 'sectop': 15, 'numret': 50 }]) - 1000) < 1e-3 return
def test_engine_nonoise(df): """ Test the engine run with no noise. Check counts and that true and noisy tables are identical """ config = ConfigParser() config.read_file(io.StringIO(engineurandom + confstringddict + nonoise)) # Create the engine e = spark_sql_das_engine.engine(config=config) original_data = {'original_data': {'inputtable': df}} # Run the engine assert e.willRun() private_tables = e.run(original_data)[0] true_tables = original_data["true_tables"] for tname, table in private_tables.items(): # Check counts assert (table.as_matrix().shape[0] == 4) # Check that tables are identical assert (true_tables[tname].as_matrix() == private_tables[tname].as_matrix()).all()