def initialize(): np.random.seed(idx + 1) skeleton = sized_random_skeleton(schema, sizing_method(base_size, schema), seed=idx + 1) lg_rcm = linear_gaussians_rcm(rcm, seed=idx + 1) generate_values_for_skeleton(lg_rcm, skeleton, seed=idx + 1) datasource = DataCenter(skeleton) kerner = RBFKernelComputer(datasource, additive=1e-2, n_jobs=n_jobs, eqsize_only=False, k_cache_max_size=128) _tester = RCITester(kerner, n_jobs=n_jobs) return _tester
def test_company(): n = 400 schema, rcm = company_schema(), company_rcm() functions = dict() effects = {RelationalVariable(RelationalPath(rcm.schema.item_class_of(attr)), attr) for attr in rcm.schema.attrs} skeleton = generate_skeleton(schema, n, max_degree=2) for e in effects: parameters = {cause: 1.0 for cause in rcm.pa(e)} functions[e] = linear_gaussian(parameters, average_agg(), normal_sampler(0, 0.3)) rcm = ParamRCM(rcm.schema, rcm.directed_dependencies, functions) generate_values_for_skeleton(rcm, skeleton) normalize_skeleton(skeleton)
def generate_values(independent, mu, sd, skeleton, slope, seed=None): if seed is not None: np.random.seed(seed) if independent: slope = 0.0 var_X = RelationalVariable(RelationalPath(A), X) var_Y = RelationalVariable(RelationalPath(B), Y) var_U = RelationalVariable(RelationalPath(C), U) var_V = RelationalVariable(RelationalPath(D), V) rcm = RCM( schema, { RelationalDependency( RelationalVariable(RelationalPath([A, AC, C]), U), var_X), RelationalDependency( RelationalVariable(RelationalPath([B, BD, D]), V), var_Y), RelationalDependency( RelationalVariable(RelationalPath([B, AB, A]), X), var_Y) # X-->Y }) functions = { var_U: linear_gaussian(dict(), average_agg(), normal_sampler(mu, sd)), var_V: linear_gaussian(dict(), average_agg(), normal_sampler(mu, sd)), var_X: linear_gaussian( {RelationalVariable(RelationalPath([A, AC, C]), U): 1.0}, sum_agg(), normal_sampler(0, sd)), var_Y: linear_gaussian( { RelationalVariable(RelationalPath([B, BD, D]), V): 1.0, RelationalVariable(RelationalPath([B, AB, A]), X): slope }, sum_agg(), normal_sampler(0, sd)) } # Parametrize RCM and generate values param_rcm = ParamRCM(rcm.schema, rcm.directed_dependencies, functions) generate_values_for_skeleton(param_rcm, skeleton)
def generate_values(seed, schema, skeleton, null_hypothesis=True, mu=0.0, sd=0.1): if seed is not None: np.random.seed(seed) A, Z = schema['A'], schema['Z'] B, Y = schema['B'], schema['Y'] C, X = schema['C'], schema['X'] R_AB = schema['R_AB'] R_AC = schema['R_AC'] R_BC = schema['R_BC'] # 2-hop here = 1 hop in an entity-only graph rpaths_froms = { ent_class: list(enumerate_rpaths(schema, 2, ent_class)) for ent_class in [A, B, C] } # if X _||_Y | Z: # (X --> Z --> Y) or (X <-- Z --> Y) # if not X _||_Y | Z # (X --> Z <-- Y) independents = [[(X, Z), (Z, Y)], [(Z, X), (Z, Y)]] dependents = [ [(X, Z), (Y, Z)], ] if null_hypothesis: templates = independents else: templates = dependents # Model structure specification rdeps = [] arrows = pick_one(templates) for from_attr, to_attr in arrows: # from_attr --> to_Attr from_item_class = schema.item_class_of(from_attr) base_item_class = schema.item_class_of(to_attr) rpath = pick_one( list( filter(lambda rp: rp.terminal == from_item_class, rpaths_froms[base_item_class]))) cause_rvar = RelationalVariable(rpath, from_attr) effect_rvar = RelationalVariable(RelationalPath(base_item_class), to_attr) rdeps.append(RelationalDependency(cause_rvar, effect_rvar)) rcm = RCM(schema, set(rdeps)) # Model 'function' specification functions = dict() canonical_rvars = enumerate_rvars(schema, 0) for canonical_rvar in canonical_rvars: parents = list(rcm.pa(canonical_rvar)) params = {pa_var: 1 for pa_var in parents} # noise = bias + noise (with 0 mean) functions[canonical_rvar] = linear_gaussian(params, sum_agg(), normal_sampler(mu, sd)) param_rcm = ParamRCM(rcm.schema, rcm.directed_dependencies, functions) generate_values_for_skeleton(param_rcm, skeleton) if null_hypothesis: U = RelationalVariable(RelationalPath([B, R_BC, C]), X) V = RelationalVariable(RelationalPath(B), Y) # canonical W = RelationalVariable(RelationalPath([B, R_AB, A]), Z) else: W = RelationalVariable(RelationalPath(A), Z) U = RelationalVariable(RelationalPath([A, R_AB, B]), Y) V = RelationalVariable(RelationalPath([A, R_AC, C]), X) return U, V, W, rcm, param_rcm