Exemplo n.º 1
0
def edge_errors(pred, target):
    """
    Counts all types of edge errors (false negatives, false positives, reversed edges)

    Parameters:
    -----------
    pred: nx.DiGraph or ndarray
        The predicted adjacency matrix
    target: nx.DiGraph or ndarray
        The true adjacency matrix

    Returns:
    --------
    fn, fp, rev

    """
    true_labels = retrieve_adjacency_matrix(target)
    predictions = retrieve_adjacency_matrix(
        pred,
        target.nodes() if isinstance(target, nx.DiGraph) else None)

    diff = true_labels - predictions

    rev = (((diff + diff.transpose()) == 0) & (diff != 0)).sum() / 2
    # Each reversed edge necessarily leads to one fp and one fn so we need to subtract those
    fn = (diff == 1).sum() - rev
    fp = (diff == -1).sum() - rev

    return fn, fp, rev
Exemplo n.º 2
0
def edge_accurate(pred, target):
    """
    Counts the number of edge in ground truth DAG, true positives and the true
    negatives

    Parameters:
    -----------
    pred: nx.DiGraph or ndarray
        The predicted adjacency matrix
    target: nx.DiGraph or ndarray
        The true adjacency matrix

    Returns:
    --------
    total_edges, tp, tn

    """
    true_labels = retrieve_adjacency_matrix(target)
    predictions = retrieve_adjacency_matrix(pred, target.nodes() if isinstance(target, nx.DiGraph) else None)

    total_edges = (true_labels).sum()

    tp = ((predictions == 1) & (predictions == true_labels)).sum()
    tn = ((predictions == 0) & (predictions == true_labels)).sum()

    return total_edges, tp, tn
Exemplo n.º 3
0
def generator(mechanism = 'linear'):

	number_parents = 20   #int(input('Desired number of parents? '))
	number_of_data_points = 5000  #int(input('Desired number of data points? '))

	generator = AcyclicGraphGenerator(mechanism, nodes=number_parents, parents_max=3,  noise_coeff=.4, npoints=number_of_data_points)
	data, graph = generator.generate()
	#generator.to_csv('generated_graph')

	# Save true matrix
	true_matrix = retrieve_adjacency_matrix(graph)
	savetxt('true_CM.csv', true_matrix, delimiter=',')

	# Normalise the data
	n = len(data)
	data = np.array(data)

	for i in range(len(data.T)):
	    data[:,i] = (data[:,i]-min(data[:,i]))/(max(data[:,i])-min(data[:,i]))

	data = np.concatenate((np.ones(n).reshape(-1,1), data), axis =1)

	# Save current version in home folder
	savetxt('combined.csv', data, delimiter=',')
	run = 0

	name = 'ArchivedData/combined_' + str(mechanism) + '_' + str(run) + '.csv' 
	name_matrix = 'ArchivedData/True_CM_' + str(mechanism) + '_' + str(run) + '.csv' 

	# Check if the item alreay exists
	while os.path.exists(name):
		run += 1
		name = 'ArchivedData/combined_' + str(mechanism) + '_' + str(run) + '.csv' 

	# Save 
	savetxt(name_matrix, true_matrix, delimiter=',')
	savetxt(name, data, delimiter=',')
Exemplo n.º 4
0
	savetxt(name, data, delimiter=',')



# Generate Graph

mechanism = str(input('Which mechanism do you select to generator the data? '))
number_parents = 20   #int(input('Desired number of parents? '))
number_of_data_points = int(input('Desired number of data points? '))

generator = AcyclicGraphGenerator(mechanism, nodes=number_parents, parents_max=3,  noise_coeff=.4, npoints=number_of_data_points)
data, graph = generator.generate()
#generator.to_csv('generated_graph')

# Save true matrix
true_matrix = retrieve_adjacency_matrix(graph)
savetxt('true_CM.csv', true_matrix, delimiter=',')



plt.figure(figsize=(40,20))
nx.draw_networkx(graph, font_size=10) # The plot function allows for quick visualization of the graph. 
plt.show()

# Normalise the data
n = len(data)
data = np.array(data)

for i in range(len(data.T)):
    data[:,i] = (data[:,i]-min(data[:,i]))/(max(data[:,i])-min(data[:,i]))
Exemplo n.º 5
0
def baselines(data):
    # Tests
    start_time = time.time()

    obj = PC()
    output = obj.predict(data)

    adj_mat = nx.adjacency_matrix(output).todense()
    output = clr(adj_mat)

    output[np.isnan(output)] = 0
    output[np.isposinf(output)] = 1

    predicted = retrieve_adjacency_matrix(output)
    true_matrix = pd.read_csv('true_CM.csv', header=None)
    true_matrix = np.array(true_matrix)

    shd = SHD(np.array(true_matrix), predicted, double_for_anticausal=False)
    aupr, curve = precision_recall(np.array(true_matrix), output)

    end_time = (time.time() - start_time)
    print("--- Execution time : %4.4s seconds ---" % end_time)

    results_pc = ['PC', aupr, shd, end_time]
    print(results_pc)

    # Tests
    start_time = time.time()

    obj = GES()
    output = obj.predict(data)

    adj_mat = nx.adjacency_matrix(output).todense()
    output = clr(adj_mat)

    output[np.isnan(output)] = 0
    output[np.isposinf(output)] = 1

    predicted = retrieve_adjacency_matrix(output)
    true_matrix = pd.read_csv('true_CM.csv', header=None)
    true_matrix = np.array(true_matrix)

    shd = SHD(np.array(true_matrix), predicted, double_for_anticausal=False)
    aupr, curve = precision_recall(np.array(true_matrix), output)

    end_time = (time.time() - start_time)
    print("--- Execution time : %4.4s seconds ---" % end_time)

    results_ges = ['GES', aupr, shd, end_time]
    print(results_ges)

    # Tests
    start_time = time.time()

    obj = LiNGAM()
    output = obj.predict(data)

    adj_mat = nx.adjacency_matrix(output).todense()
    output = clr(adj_mat)

    output[np.isnan(output)] = 0
    output[np.isposinf(output)] = 1

    predicted = retrieve_adjacency_matrix(output)
    true_matrix = pd.read_csv('true_CM.csv', header=None)
    true_matrix = np.array(true_matrix)

    shd = SHD(np.array(true_matrix), predicted, double_for_anticausal=False)
    aupr, curve = precision_recall(np.array(true_matrix), output)

    end_time = (time.time() - start_time)
    print("--- Execution time : %4.4s seconds ---" % end_time)

    results_lingam = ['LiNGAM', aupr, shd, end_time]
    print(results_lingam)

    # Tests
    start_time = time.time()

    obj = CCDr()
    output = obj.predict(data)

    adj_mat = nx.adjacency_matrix(output).todense()
    output = clr(adj_mat)

    output[np.isnan(output)] = 0
    output[np.isposinf(output)] = 1

    predicted = retrieve_adjacency_matrix(output)
    true_matrix = pd.read_csv('true_CM.csv', header=None)
    true_matrix = np.array(true_matrix)

    shd = SHD(np.array(true_matrix), predicted, double_for_anticausal=False)
    aupr, curve = precision_recall(np.array(true_matrix), output)

    end_time = (time.time() - start_time)
    print("--- Execution time : %4.4s seconds ---" % end_time)

    results_ccdr = ['CCDR', aupr, shd, end_time]
    print(results_ccdr)

    return results_pc, results_ges, results_lingam, results_ccdr
Exemplo n.º 6
0
    results_ccdr = ['CCDR', aupr, shd, end_time]
    print(results_ccdr)

    return results_pc, results_ges, results_lingam, results_ccdr


# Tests
start_time = time.time()

obj = CGNN(nruns=1, train_epochs=500, test_epochs=500)
output = obj.predict(data)

adj_mat = nx.adjacency_matrix(output).todense()
output = clr(adj_mat)

output[np.isnan(output)] = 0
output[np.isposinf(output)] = 1

predicted = retrieve_adjacency_matrix(output)
true_matrix = pd.read_csv('true_CM.csv', header=None)
true_matrix = np.array(true_matrix)

shd = SHD(np.array(true_matrix), predicted, double_for_anticausal=False)
aupr, curve = precision_recall(np.array(true_matrix), output)

end_time = (time.time() - start_time)
print("--- Execution time : %4.4s seconds ---" % end_time)

results_cgnn = ['CGNN', aupr, shd, end_time]
print(results_cgnn)