# Manufacturing Entropy - Code for the article # Original Data: Kaggle competition - Bosch Manufacturing Data: train_date.csv file # Data used in this file: pre-processed ("data/manufacturing_paths.txt" and "data/manufacturing_edges.txt") # Yamila Mariel Omar # Date of original code: 21st July 2020 from graphfile import GraphFile # Load manufacturing paths paths # ============================== paths = GraphFile("data/manufacturing_paths.txt").read_paths_with_count() number_of_manufactured_items = sum(paths.values()) # Load edges # ========== edges = GraphFile("data/manufacturing_edges.txt").read_edges_from_file() # Filter out edges that only serve a handfull of items # ==================================================== threshold = 0.001 * number_of_manufactured_items edges_to_remove = [k for k,v in edges.items() if v < threshold] edges_to_remove = set(edges_to_remove) # Clean manufacturing paths # ========================= clean_paths = {} for k,v in paths.items(): e = [(i,j) for (i,j) in zip(k, k[1:])] e = set(e) if len(edges_to_remove.intersection(e)) == 0:
# Data: F2.txt # Yamila Mariel Omar # Date of original code: 9th February 2021 # Date of code last modification: 9th February 2021 from graphviz import Digraph import sys sys.path.append('..') from graphfile import GraphFile from graph import Graph # Load edges # ========== filename = "data/F2.txt" edges = GraphFile(filename).read_edges_from_file() total_items = sum([v for k, v in edges.items() if "source" in k]) edges = {k: v / total_items for k, v in edges.items()} G = Graph(edges) # Get node list # ============= nodes = G.nodes # Nodes positions in plot # ======================= positions = { 'source': "0,9!", '24': "-1,8!", '25': "1,8!",
from graphfile import GraphFile from graph import Graph import multiprocessing import datetime import sys method = int(sys.argv[1]) # Read data: clean paths and clean edges # ====================================== # full_path = "/mnt/irisgpfs/users/yomar/manufacturing_entropy_article/" filename_paths = "data/clean_manufacturing_paths.txt" # filename_paths = full_path + filename_paths filename_edges = "data/clean_manufacturing_edges.txt" # filename_edges = full_path + filename_edges edges = GraphFile(filename_edges).read_edges_from_file() paths = GraphFile(filename_paths).read_paths_with_count() # ========================================================================= # CALCULATE ENTROPY # ========================================================================= if method == 1: # Method 1: weighted, directed graph with self-loops on all nodes # self-loops that do not represent ending nodes have a value of 1 # Due to computation issues, this is deployed in an HPC!!! # ============================================================= edges = add_self_loops(paths, edges) G = Graph(edges) for n in G.nodes: if G.edges.get((n,n), None) == None:
# ===== End Function definitions ========= if __name__ == "__main__": # Import needed modules # ===================== from graphfile import GraphFile from graph import Graph import multiprocessing import datetime # Read data: clean paths and clean edges # ====================================== filename_paths = "data/clean_manufacturing_paths.txt" filename_edges = "data/clean_manufacturing_edges.txt" edges = GraphFile(filename_edges).read_edges_from_file() paths = GraphFile(filename_paths).read_paths_with_count() # Generate graph from clean edges # =============================== edges = add_self_loops(paths, edges) G = Graph(edges) print("Number of nodes: ", len(G.nodes)) print("Number of edges: ", len(G.edges.keys())) # Color code nodes # ======================= node_colors = dict() for node in G.nodes: if node in {0, 1, 24, 12, 25, 13, 2, 3, 14, 15,
from graphfile import GraphFile from graph import Graph from capacity import Capacity from fordfulkerson import FordFulkerson # Input data # ========== graph_to_study = input("Choose graph to study: F1, F2 or F3? ") # Load graph # ========== filename = "data/" + graph_to_study + ".txt" edges = GraphFile(filename).read_edges_from_file() F = Graph(edges) # Get edges capacity # ================== nodes_capacity = GraphFile("data/nodes_capacity.txt").read_nodes_capacity_from_file() C = Capacity(nodes_capacity, 'i', 'f') C_edges = C.get_edges_capacity(F, "weight") for k,v in C_edges.items(): if ("i" in k) or ("f" in k): pass else: C_edges[k] = int(v) C_edges = {k:v for k,v in C_edges.items() if v > 0}
from capacity import Capacity from fordfulkerson import FordFulkerson import string import json import sys sys.path.append('..') from graphfile import GraphFile from graph import Graph # Load edges # ========== filename = "data/F2.txt" edges = GraphFile(filename).read_edges_from_file() total_items = sum([v for k, v in edges.items() if "source" in k]) edges = {k: v / total_items for k, v in edges.items()} G = Graph(edges) # Get edges capacity # ================== filename = "results/capacity_estimation.json" nodes_capacity = open(filename, "r").read() nodes_capacity = json.loads(nodes_capacity) nodes_capacity = {int(k[1:]): v for k, v in nodes_capacity.items()} C = Capacity(nodes_capacity, source_node='source', sink_node='sink') C_edges = C.get_edges_capacity(G, "weight") # Flow Network