def simulate_data(method='nonlinear', sem_type='mlp', n_nodes=6, n_edges=15, n=1000): weighted_random_dag = DAG.erdos_renyi(n_nodes=n_nodes, n_edges=n_edges, weight_range=(0.5, 2.0), seed=1) dataset = IIDSimulation(W=weighted_random_dag, n=n, method=method, sem_type=sem_type) true_dag, X = dataset.B, dataset.X return X, true_dag
def run_simulate(config): """this function used to run simulate data task Parameters ---------- config: dict configuration info. Returns ------- out: tuple (X, true_dag) or (X, true_dag, topology_matrix) """ algo_params = config['algorithm_params'] if config['task_params']['algorithm'] == 'EVENT': true_dag = DAG.erdos_renyi(n_nodes=algo_params['n_nodes'], n_edges=algo_params['n_edges'], weight_range=algo_params['weight_range'], seed=algo_params['seed']) topology_matrix = Topology.erdos_renyi( n_nodes=algo_params['Topology_n_nodes'], n_edges=algo_params['Topology_n_edges'], seed=algo_params['Topology_seed']) simulator = THPSimulation(true_dag, topology_matrix, mu_range=algo_params['mu_range'], alpha_range=algo_params['alpha_range']) X = simulator.simulate( T=algo_params['THPSimulation_simulate_T'], max_hop=algo_params['THPSimulation_simulate_max_hop'], beta=algo_params['THPSimulation_simulate_beta']) return X, true_dag, topology_matrix else: weighted_random_dag = DAG.erdos_renyi( n_nodes=algo_params['n_nodes'], n_edges=algo_params['n_edges'], weight_range=algo_params['weight_range'], seed=algo_params['seed']) dataset = IIDSimulation(W=weighted_random_dag, n=algo_params['n'], method=algo_params['method'], sem_type=algo_params['sem_type'], noise_scale=algo_params['noise_scale']) return pd.DataFrame(dataset.X), dataset.B
`networkx` package, then like the following import method. Warnings: This script is used only for demonstration and cannot be directly imported. """ from castle.common import GraphDAG from castle.metrics import MetricsDAG from castle.datasets import DAG, IIDSimulation from castle.algorithms import DirectLiNGAM ####################################### # DirectLiNGAM used simulate data ####################################### # simulate data for DirectLiNGAM weighted_random_dag = DAG.erdos_renyi(n_nodes=10, n_edges=20, weight_range=(0.5, 2.0), seed=1) dataset = IIDSimulation(W=weighted_random_dag, n=2000, method='linear', sem_type='gauss') true_dag, X = dataset.B, dataset.X # DirectLiNGAM learn g = DirectLiNGAM() g.learn(X) # plot est_dag and true_dag GraphDAG(g.causal_matrix, true_dag) # calculate accuracy met = MetricsDAG(g.causal_matrix, true_dag) print(met.metrics)
from castle.common import GraphDAG from castle.metrics import MetricsDAG from castle.datasets import DAG, IIDSimulation from castle.algorithms import PC from castle.common.priori_knowledge import PrioriKnowledge method = 'linear' sem_type = 'gauss' n_nodes = 10 n_edges = 15 n = 2000 # simulation for pc weighted_random_dag = DAG.erdos_renyi(n_nodes=n_nodes, n_edges=n_edges, weight_range=(0.5, 2.0), seed=1) dataset = IIDSimulation(W=weighted_random_dag, n=n, method=method, sem_type=sem_type) true_dag, X = dataset.B, dataset.X # PC learn priori = PrioriKnowledge(X.shape[1]) priori.add_required_edges([(3, 9), (4, 9), (5, 9), (8, 5), (4, 3)]) priori.add_forbidden_edges([(8, 1), (9, 5)]) pc = PC(variant='original', priori_knowledge=priori) X = pd.DataFrame(X, columns=list('abcdefghij')) pc.learn(X)
from castle.common import GraphDAG from castle.metrics import MetricsDAG from castle.datasets import DAG, IIDSimulation from castle.algorithms import MCSL ####################################### # mcsl used simulate data ####################################### # simulate data for mcsl weighted_random_dag = DAG.erdos_renyi(n_nodes=10, n_edges=20, weight_range=(0.5, 2.0), seed=1) dataset = IIDSimulation(W=weighted_random_dag, n=2000, method='nonlinear', sem_type='mlp') true_dag, X = dataset.B, dataset.X # mcsl learn mc = MCSL() mc.learn(X, iter_step=1000, rho_thres=1e14, init_rho=1e-5, rho_multiply=10, graph_thres=0.5, l1_graph_penalty=2e-3, degree=2, use_float64=False)
If you want to plot causal graph, please make sure you have already install `networkx` package, then like the following import method. Warnings: This script is used only for demonstration and cannot be directly imported. """ from castle.common import GraphDAG from castle.metrics import MetricsDAG from castle.datasets import DAG, IIDSimulation from castle.algorithms import ANMNonlinear weighted_random_dag = DAG.erdos_renyi(n_nodes=6, n_edges=10, weight_range=(0.5, 2.0), seed=1) dataset = IIDSimulation(W=weighted_random_dag, n=1000, method='nonlinear', sem_type='gp-add') true_dag, X = dataset.B, dataset.X anm = ANMNonlinear(alpha=0.05) anm.learn(data=X) # plot predict_dag and true_dag GraphDAG(anm.causal_matrix, true_dag) mm = MetricsDAG(anm.causal_matrix, true_dag) print(mm.metrics)
def simulate_data(data, alg, task_id, parameters): """ Simulation Data Generation Entry. Parameters ---------- data: str Path for storing generated data files. alg: str Generating Operator Strings. task_id: int task key in the database. parameters: dict Data generation parameters. Returns ------- True or False """ parameters = translation_parameters(parameters) task_api = TaskApi() start_time = datetime.datetime.now() task_api.update_task_status(task_id, 0.1) task_api.update_consumed_time(task_id, start_time) task_api.update_update_time(task_id, start_time) if not os.path.exists(data): os.makedirs(data) task_name = task_api.get_task_name(task_id) sample_path = os.path.join(data, "datasets", str(task_id) + "_" + task_name + ".csv") true_dag_path = os.path.join(data, "true", str(task_id) + "_" + task_name + ".npz") node_relationship_path = os.path.join( data, "node_relationship_" + str(task_id) + "_" + task_name + ".csv") topo_path = os.path.join(data, "topo_" + str(task_id) + "_" + task_name + ".npz") task_api.update_task_status(task_id, 0.2) task_api.update_consumed_time(task_id, start_time) topo = None try: if alg == "EVENT": true_dag = DAG.erdos_renyi(n_nodes=parameters['n_nodes'], n_edges=parameters['n_edges'], weight_range=parameters['weight_range'], seed=parameters['seed']) topo = Topology.erdos_renyi(n_nodes=parameters['Topology_n_nodes'], n_edges=parameters['Topology_n_edges'], seed=parameters['Topology_seed']) simulator = THPSimulation(true_dag, topo, mu_range=parameters['mu_range'], alpha_range=parameters['alpha_range']) sample = simulator.simulate( T=parameters['THPSimulation_simulate_T'], max_hop=parameters['THPSimulation_simulate_max_hop'], beta=parameters['THPSimulation_simulate_beta']) task_api.update_task_status(task_id, 0.5) task_api.update_consumed_time(task_id, start_time) else: weighted_random_dag = DAG.erdos_renyi( n_nodes=parameters['n_nodes'], n_edges=parameters['n_edges'], weight_range=parameters['weight_range'], seed=parameters['seed']) dataset = IIDSimulation(W=weighted_random_dag, n=parameters['n'], method=parameters['method'], sem_type=parameters['sem_type'], noise_scale=parameters['noise_scale']) true_dag, sample = dataset.B, dataset.X sample = pd.DataFrame(sample) task_api.update_task_status(task_id, 0.5) task_api.update_consumed_time(task_id, start_time) except Exception as error: task_api.update_task_status(task_id, str(error)) task_api.update_consumed_time(task_id, start_time) logger.warning('Generating simulation data failed, exp=%s' % error) if os.path.exists(sample_path): os.remove(sample_path) if os.path.exists(true_dag_path): os.remove(true_dag_path) if os.path.exists(node_relationship_path): os.remove(node_relationship_path) if os.path.exists(topo_path): os.remove(topo_path) return False if os.path.exists(topo_path): os.remove(topo_path) task_api.update_task_status(task_id, 0.6) task_api.update_consumed_time(task_id, start_time) save_to_file(sample, sample_path) save_to_file(true_dag, true_dag_path) if isinstance(topo, np.ndarray): save_to_file(topo, topo_path) # calculate accuracy save_gragh_edges(true_dag, node_relationship_path) task_api.update_task_status(task_id, 1.0) task_api.update_consumed_time(task_id, start_time) return True