def tensorm_reconstruct(X, L, hyperparms=[.5, 1.0], return_layer=False): if not X.dtype == np.int8: X = np.array(X, dtype=np.int8) if np.all([y in [0, 1] for y in np.unique(X)]): X = 2 * X - 1 p_init = hyperparms[0] lbda_init = hyperparms[1] orm = lom.Machine() data = orm.add_matrix(X, fixed=True) layer = orm.add_layer( child=data, latent_size=L, model='OR-AND') orm.infer(burn_in_min=100, no_samples=50) X_recon = layer.output(technique='factor_mean', lazy=False) X_recon_plugin = layer.output(technique='factor_map', lazy=False) f_tensorm = (layer.z.mean(), layer.u.mean(), layer.v.mean()) if return_layer is True: return layer else: return X_recon, f_tensorm, X_recon_plugin
def generate_data(N=100, D=100): np.random.seed(2) L = 5 U = np.array(2 * (np.random.rand(D, L) > .5) - 1, dtype=np.int8) Z = np.array(2 * (np.random.rand(N, L) > .5) - 1, dtype=np.int8) X = np.array(2 * np.dot(Z == 1, U.transpose() == 1) - 1, dtype=np.int8) X[int(X.shape[0] / 2):, :] *= -1 orm = lom.Machine() data = orm.add_matrix(val=X, sampling_indicator=False) layer = orm.add_layer(size=3, child=data, lbda_init=2., noise_model='or-link') layer.z.val = Z layer.u.val = U return layer
def test_orm(): np.random.seed(3) N = 10 M = 5 L = 3 X = 2 * np.array([ M * [0, 0, 1, 1, 0, 0], M * [1, 1, 0, 0, 0, 0], M * [0, 0, 0, 0, 1, 1] ]) - 1 X = np.concatenate(N * [X]) N, D = X.shape orm = lom.Machine() orm.framework = 'numba' data = orm.add_matrix(val=X, fixed=True) # , sampling_indicator=False) layer1 = orm.add_layer(latent_size=L, child=data, model='OR-AND') layer1.lbda.val = 2.0 # layer1.factors[0].val = np.array(2*np.ones([N,L])-1, dtype=np.int8) # layer1.factors[1].val = np.array(2*np.ones([D,L])-1, dtype=np.int8) orm.infer(convergence_window=20, no_samples=20, convergence_eps=1e-3, burn_in_min=20, burn_in_max=1000) assert abs(1 / (1 + np.exp(-orm.layers[0].lbda())) - 1.) < 1e-2
def test_lambda_update_or(): model = 'OR-AND' U, Z, X = generate_orm_product() orm = lom.Machine() data = orm.add_matrix(val=X, fixed=True) layer = orm.add_layer(latent_size=3, child=data, model=model) layer.factors[0].val = Z layer.factors[1].val = U assert np.all(np.dot(Z == 1, U.transpose() == 1) == (data() == 1)) lbda_update_fct = lambda_updates_numba.make_lbda_update_fct(model, 2) lbda_update_fct(layer.lbda) ND = np.prod(X.shape) print('\n') print(ND) assert layer.lbda() == -np.log(((ND + 2) / (ND + 1)) - 1)
def LOM_predictive(experiment, return_machine=True): """ Experiment is a tuple with all relevant settings """ # unpack experiment parameters X, X_train, train_mask, machine, L, random_idx, lbda_init, anneal = experiment orm = lom.Machine() data = orm.add_matrix(X_train, fixed=True) layer = orm.add_layer(latent_size=L, child=data, model=machine) layer.lbda.val = lbda_init # layer.auto_reset = True if anneal is True: orm.anneal = True orm.infer(burn_in_min=600, fix_lbda_iters=0, convergence_window=50, burn_in_max=1000, no_samples=10) else: orm.infer(burn_in_min=100, fix_lbda_iters=50, convergence_window=10, burn_in_max=150, no_samples=10) out = layer.output(technique='factor_mean')[train_mask] > .5 truth = (-2 * layer.invert_data + 1) * X[train_mask] == 1 if return_machine is False: return ([np.mean(out == truth), machine, layer.size]) else: return ([np.mean(out == truth), machine, layer.size], [x.mean() for x in layer.factors])
def tensorm_reconstruct_indp(X, L, hyperparms=[0.5, 1.0]): if not X.dtype == np.int8: X = np.array(X, dtype=np.int8) if np.all([y in [0, 1] for y in np.unique(X)]): X = 2 * X - 1 p_init = hyperparms[0] lbda_init = hyperparms[1] orm = lom.Machine() data = orm.add_matrix(X, sampling_indicator=False) layer = orm.add_tensorm_layer( child=data, size=L, lbda_init=lbda_init, inits=3 * [p_init], noise_model='tensorm-link-indp') # assign the correct updating functions for factor_matrix in data.parents: factor_matrix.sampling_fct = wrappers.draw_tensorm_indp_noparents_onechild_wrapper layer.lbda_p.sampling_fct = sampling.draw_lbda_tensorm_indp_p layer.lbda_m.sampling_fct = sampling.draw_lbda_tensorm_indp_m layer.lbda = (layer.lbda_p, layer.lbda_m) orm.infer(burn_in_min=1000, no_samples=50) X_recon = layer.output(recon_model='mc', force_computation=True) X_recon_plugin = layer.output(recon_model='plugin', force_computation=True) f_tensorm = (layer.z.mean(), layer.u.mean(), layer.v.mean()) return X_recon, f_tensorm, X_recon_plugin
def test_ibp(): X = generate_random_2D_data() orm = lom.Machine() data = orm.add_matrix(X, fixed=True) layer = orm.add_layer(latent_size=1, child=data, model='OR-AND-IBP') orm.infer(burn_in_min=200) assert np.mean((2 * layer.output() - 1) == X) > .9
def test_all_3D_LOMs(): operators = ['AND', 'NAND', 'OR', 'NOR', 'XOR', 'NXOR'] # operators = ['OR', 'AND'] machines = [ x[0] + '-' + x[1] for x in list(itertools.product(operators, repeat=2)) ] for machine in aux.canonical_loms(): # machines: N = 50 D = 10 L = 3 Z = np.array(np.random.rand(N, L) > .5, dtype=np.int8) U = np.array(np.random.rand(D, L) > .5, dtype=np.int8) V = np.array(np.random.rand(D, L) > .5, dtype=np.int8) # generate_data_fast is not available for all machines X = aux.lom_generate_data([2 * Z - 1, 2 * U - 1, 2 * V - 1], model=machine) orm = lom.Machine() data = orm.add_matrix(X, fixed=True) layer = orm.add_layer(latent_size=L, child=data, model=machine) layer.z.val = (1 - 2 * layer.invert_factors) * (2 * Z - 1) layer.u.val = (1 - 2 * layer.invert_factors) * (2 * U - 1) layer.v.val = (1 - 2 * layer.invert_factors) * (2 * V - 1) # we initialise with ground truth, hence set lbda large to avoid effectively # random initialisation layer.lbda.val = 3.0 orm.infer(burn_in_min=10, fix_lbda_iters=2) try: assert np.mean((2 * (layer.output(technique='factor_map') > .5) - 1) == data()) > .98 assert np.mean((2 * (layer.output(technique='factor_mean') > .5) - 1) == data()) > .98 except: acc = np.mean((2 * (layer.output(technique='factor_mean') > .5) - 1) == data()) print(machine + ' failed with reconstruction accuracy of ' + str(acc)) # import pdb; pdb.set_trace() raise ValueError()
def test_maxmachine(): # generate toy data A = 2 * np.array([[0, 0, 0, 0, 0, 1, 1]]) - 1 B = 2 * np.array([[0, 0, 1, 1, 1, 1, 0]]) - 1 C = 2 * np.array([[1, 1, 1, 1, 0, 0, 0]]) - 1 X = np.concatenate(100 * [C] + 100 * [B] + 100 * [A]) # + 100 *[2*((A==1) + (B==1))-1]) # X = np.concatenate(100*[C]+50*[2*np.array([[0,0,0,1,1,1,1]])-1]) for i in range(X.shape[0]): for j in range(0, X.shape[1]): if np.random.rand() > .98: # .9 X[i, j] = -X[i, j] # heterosced noise if True: for j in range(4, X.shape[1]): if np.random.rand() > .95: # .75 X[i, j] = -X[i, j] machine = 'MAX-AND' orm = lom.Machine() L = 3 data = orm.add_matrix(X, fixed=True) layer = orm.add_layer(latent_size=L, child=data, model=machine) # we initialise with ground truth, hence set lbda large to avoid effectively # random initialisation layer.u.val = np.array([[1, -1, -1], [1, -1, -1], [1, 1, -1], [1, 1, -1], [-1, 1, 1], [-1, 1, 1], [-1, -1, 1]], dtype=np.int8) layer.z.val = np.array(np.concatenate( [100 * [[1, -1, -1]], 100 * [[-1, 1, -1]], 100 * [[-1, -1, 1]]]), dtype=np.int8) layer.lbda.val = np.array([.99 for x in range(L + 1)]) orm.infer(burn_in_min=50, burn_in_max=200, fix_lbda_iters=20, no_samples=100) assert np.mean((layer.output(technique='mc') > .5) == (X == 1)) > .8
def generate_data(N=100, D=100, L=10): np.random.seed(2) U = np.array(2 * (np.random.rand(D, L) > .5) - 1, dtype=np.int8) Z = np.array(2 * (np.random.rand(N, L) > .5) - 1, dtype=np.int8) X = np.array(2 * np.dot(Z == 1, U.transpose() == 1) - 1, dtype=np.int8) X[int(X.shape[0] / 2):, :] *= -1 orm = lom.Machine() data = orm.add_matrix(val=X, fixed=True) layer = orm.add_layer(latent_size=3, child=data, model='OR-AND') layer.factors[0].val = Z layer.factors[1].val = U return layer
reDataMin = reDataNum.min(axis=0) reScDataNum = (reDataNum - reDataMin) * (dataNumMax - dataNumMin) / ( reDataMax - reDataMin) + dataNumMin # insert reconstructed data in missing values re2DataNum = dataNum.copy() for idx in randIdxNum: re2DataNum.loc[idx[0], idx[1]] = reScDataNum.loc[idx[0], idx[1]] #------------------------------------------------------------------------------ #--------------------------Boolean matrix factorization------------------------ #------------------------------------------------------------------------------ #BMF with missing values see https://github.com/TammoR/LogicalFactorisationMachines dataBinVal = dataBin.copy() orm = lom.Machine() data = orm.add_matrix(dataBinVal.values, fixed=True) layer1 = orm.add_layer(latent_size=10, child=data, model='OR-AND') layer2 = orm.add_layer(latent_size=3, child=layer1.z, model='OR-AND') orm.infer(burn_in_min=100, burn_in_max=1000, no_samples=1000) reDataBin = pd.DataFrame(layer1.output(technique='factor_map'), columns=dataBinVal.columns) # turn into boolean again reDataBin[reDataBin == -1] = 0 reDataBool = reDataBin.astype(bool) rssBMF = sum([ abs(reDataBool.loc[idx[0], idx[1]] ^ dataBool.loc[idx[0], idx[1]]) for idx in randIdxBool