def run(model, model_updater, params_true, time=100): timer = Timer() trace = [_get_trace_row(model, model_updater, params_true, 0.0)] while timer.elapsed < time: timer.start() model_updater.update(model) timer.stop() trace.append( _get_trace_row(model, model_updater, params_true, timer.elapsed)) df = pd.DataFrame(trace) model.params = params_true df["log_p_true"] = model.log_p df["rel_log_p"] = (df["log_p"] - df["log_p_true"]) / df["log_p_true"].abs() df = df[[ "time", "num_features", "log_p", "log_p_true", "rel_log_p", "b_cubed_f", "b_cubed_p", "b_cubed_r", "max_particles" ]] return df
def run(data_true, model, model_updater, params_true, time=100): timer = Timer() trace = [_get_trace_row(data_true, model, params_true, 0.0)] while timer.elapsed < time: timer.start() model_updater.update(model) timer.stop() trace.append(_get_trace_row(data_true, model, params_true, timer.elapsed)) df = pd.DataFrame(trace) model.params = params_true df['log_p_true'] = model.log_p df['rel_log_p'] = (df['log_p'] - df['log_p_true']) / df['log_p_true'].abs() df = df[['time', 'num_features', 'log_p', 'log_p_true', 'rel_log_p', 'b_cubed_f', 'b_cubed_p', 'b_cubed_r', 'error']] return df
def run(model, model_updater, time=100): timer = Timer() trace = [_get_trace_row(model, 0.0)] while timer.elapsed < time: timer.start() model_updater.update(model) timer.stop() trace.append(_get_trace_row(model, timer.elapsed)) df = pd.DataFrame(trace) df = df[["time", "num_features", "num_features_used", "log_p"]] return df
def run(data_true, model, model_updater, trace_writer, time=100): timer = Timer() trace = [_get_trace_row(data_true, model, 0.0)] while True: timer.start() model_updater.update(model) timer.stop() trace_writer.write_row(model, timer.elapsed) trace.append(_get_trace_row(data_true, model, timer.elapsed)) if timer.elapsed >= time: break df = pd.DataFrame(trace) df = df[["time", "num_features", "num_features_used", "log_p", "rmse"]] return df
def main(args): if args.ibp: print( 'Warning: IBP sampling for the PyClone model is not properly supported.' ) set_seed(args.data_seed) params = pgfa.models.pyclone.binomial.simulate_params(args.num_dims, args.num_data_points, K=args.num_features, alpha=args.alpha) data = pgfa.models.pyclone.binomial.simulate_data(params) model_updater = get_model_updater(annealing_power=args.annealing_power, feat_alloc_updater_type=args.sampler, ibp=args.ibp, mixture_prob=args.mixture_prob, num_particles=args.num_particles, test_path=args.test_path) set_seed(args.param_seed) if args.ibp: model_K = None else: model_K = args.num_features model = pgfa.models.pyclone.binomial.get_model(data, K=model_K) set_seed(args.run_seed) old_params = model.params.copy() model.params = params.copy() log_p_true = model.log_p model.params = old_params.copy() print('Arguments') print('-' * 100) for key, value in sorted(vars(args).items()): print('{0}: {1}'.format(key, value)) print('@' * 100) print('True feature counts (sorted): {}'.format( sorted(np.sum(params.Z, axis=0)))) print('True log density: {}'.format(log_p_true)) print('@' * 100) timer = Timer() i = 0 last_print_time = -np.float('inf') while timer.elapsed < args.time: if (timer.elapsed - last_print_time) > args.print_freq: last_print_time = timer.elapsed print('Iteration: {}'.format(i)) print('Log density: {}'.format(model.log_p)) print('Relative log density: {}'.format( (model.log_p - log_p_true) / abs(log_p_true))) if args.ibp: print('Num features: {}'.format(model.params.K)) print('B-Cube scores: {}'.format( get_b_cubed_score(params.Z, model.params.Z))) print('Feature counts (sorted): {}'.format( sorted(np.sum(model.params.Z, axis=0)))) print('#' * 100) timer.start() model_updater.update(model) timer.stop() i += 1
def run(data_true, model, model_updater, params_true, time=100): trace = [_get_trace_row(data_true, model, params_true, 0.0)] # Run annealing if required timer = Timer() while model_updater.feat_alloc_updater.annealing_schedule(model_updater.feat_alloc_updater.iter) < 1.0: timer.start() model_updater.update(model) timer.stop() trace.append(_get_trace_row(data_true, model, params_true, timer.elapsed, annealed=True)) annealing_time = timer.elapsed # Main run timer = Timer() while timer.elapsed < time: timer.start() model_updater.update(model) timer.stop() trace.append(_get_trace_row(data_true, model, params_true, timer.elapsed, annealed=False)) df = pd.DataFrame(trace) model.params = params_true df["annealing_time"] = annealing_time df["log_p_true"] = model.log_p df["rel_log_p"] = (df["log_p"] - df["log_p_true"]) / df["log_p_true"].abs() df = df[[ "annealed", "time", "annealing_time", "num_features", "log_p", "log_p_true", "rel_log_p", "b_cubed_f", "b_cubed_p", "b_cubed_r", "rmse" ]] return df
def main(args): set_seed(args.data_seed) params = pgfa.models.linear_gaussian.simulate_params( alpha=args.alpha, tau_v=args.tau_v, tau_x=args.tau_x, D=args.num_dims, K=args.num_features, N=args.num_data_points) data, data_true = pgfa.models.linear_gaussian.simulate_data( params, prop_missing=args.prop_missing) # Make sure we do not have rows/columns of data that are all missing for d in range(params.D): assert not np.all(np.isnan(data[:, d])) for n in range(params.N): assert not np.all(np.isnan(data[n])) model_updater = get_model_updater(annealing_power=args.annealing_power, feat_alloc_updater_type=args.sampler, ibp=args.ibp, mixture_prob=args.mixture_prob, num_particles=args.num_particles, test_path=args.test_path) set_seed(args.param_seed) if args.ibp: model_K = None else: model_K = args.num_features model = pgfa.models.linear_gaussian.get_model(data, K=model_K) set_seed(args.run_seed) old_params = model.params.copy() model.params = params.copy() log_p_true = model.log_p model.params = old_params.copy() print('Arguments') print('-' * 100) for key, value in sorted(vars(args).items()): print('{0}: {1}'.format(key, value)) print('@' * 100) print('True feature counts (sorted): {}'.format( sorted(np.sum(params.Z, axis=0)))) print('True log density: {}'.format(log_p_true)) print('@' * 100) timer = Timer() i = 0 last_print_time = -np.float('inf') while timer.elapsed < args.time: if (timer.elapsed - last_print_time) > args.print_freq: last_print_time = timer.elapsed print('Iteration: {}'.format(i)) print('Log density: {}'.format(model.log_p)) print('Relative log density: {}'.format( (model.log_p - log_p_true) / abs(log_p_true))) if args.prop_missing > 0: print('L2 error: {}'.format( compute_l2_error(data, data_true, model.params))) if args.ibp: print('Num features: {}'.format(model.params.K)) print('B-Cube scores: {}'.format( get_b_cubed_score(params.Z, model.params.Z))) print('Feature counts (sorted): {}'.format( sorted(np.sum(model.params.Z, axis=0)))) print('#' * 100) timer.start() model_updater.update(model) timer.stop() i += 1
def main(args): set_seed(args.data_seed) params = pgfa.models.lfrm.simulate_params( args.num_data_points, K=args.num_features, alpha=args.alpha, tau=args.tau ) data, data_true = pgfa.models.lfrm.simulate_data( params, prop_missing=args.prop_missing, symmetric=args.symmetric ) model_updater = get_model_updater( annealing_power=args.annealing_power, feat_alloc_updater_type=args.sampler, ibp=args.ibp, mixture_prob=args.mixture_prob, num_particles=args.num_particles, test_path=args.test_path ) set_seed(args.param_seed) if args.ibp: model_K = None else: model_K = args.num_features model = pgfa.models.lfrm.get_model(data, K=model_K, symmetric=args.symmetric) old_params = model.params.copy() model.params = params.copy() log_p_true = model.log_p model.params = old_params.copy() print('Arguments') print('-' * 100) for key, value in sorted(vars(args).items()): print('{0}: {1}'.format(key, value)) print('@' * 100) print('True feature counts (sorted): {}'.format(sorted(np.sum(params.Z, axis=0)))) print('True log density: {}'.format(log_p_true)) print('@' * 100) timer = Timer() i = 0 last_print_time = -np.float('inf') while timer.elapsed < args.time: if (timer.elapsed - last_print_time) > args.print_freq: last_print_time = timer.elapsed print('Iteration: {}'.format(i)) print('Log density: {}'.format(model.log_p)) print('Relative log density: {}'.format((model.log_p - log_p_true) / abs(log_p_true))) print('Error: {}'.format(np.sum(np.abs(model.predict(method='max') - data_true)))) if args.ibp: print('Num features: {}'.format(model.params.K)) print('B-Cube scores: {}'.format(get_b_cubed_score(params.Z, model.params.Z))) print('Feature counts (sorted): {}'.format(sorted(np.sum(model.params.Z, axis=0)))) print('#' * 100) timer.start() model_updater.update(model) timer.stop() i += 1