# Manufacturing Entropy - Code for the article
# Original Data: Kaggle competition - Bosch Manufacturing Data: train_date.csv file
# Data used in this file: pre-processed ("data/manufacturing_paths.txt" and "data/manufacturing_edges.txt")
# Yamila Mariel Omar
# Date of original code: 21st July 2020

from graphfile import GraphFile


# Load manufacturing paths paths
# ==============================
paths = GraphFile("data/manufacturing_paths.txt").read_paths_with_count()
number_of_manufactured_items = sum(paths.values())

# Load edges
# ==========
edges = GraphFile("data/manufacturing_edges.txt").read_edges_from_file()

# Filter out edges that only serve a handfull of items
# ====================================================
threshold = 0.001 * number_of_manufactured_items
edges_to_remove = [k for k,v in edges.items() if v < threshold]
edges_to_remove = set(edges_to_remove)

# Clean manufacturing paths
# =========================
clean_paths = {}
for k,v in paths.items():
    e = [(i,j) for (i,j) in zip(k, k[1:])]
    e = set(e)
    if len(edges_to_remove.intersection(e)) == 0:
Exemplo n.º 2
0
    paths = GraphFile(filename_paths).read_paths_with_count()

    # Generate graph from clean edges
    # ===============================
    edges = add_self_loops(paths, edges)
    G = Graph(edges)

    # ==========================================================================
    # Get frequency of n in paths
    # ==========================================================================
    freq_n_in_paths = {n: 0 for n in G.nodes}
    for p, v in paths.items():
        for n in p:
            freq_n_in_paths[n] += v

    number_of_paths = sum(list(paths.values()))
    freq_n_in_paths = {
        k: v / number_of_paths
        for k, v in freq_n_in_paths.items()
    }

    # Make plot
    # =========
    x = list(freq_n_in_paths.keys())
    x.sort()
    freq = [freq_n_in_paths[n] for n in x]

    x_pos = [i for i, _ in enumerate(x)]

    plt.barh(x_pos, freq, color='red')
    plt.ylabel("Nodes")