def __init__(self, parent, controller): tk.Frame.__init__(self, parent) self.controller = controller self.info_text = "" self.db_cls_list = [x for x in utils.get_leaf_subclasses(DbMetadata)] self.edge_metadata_cls_list = [x for x in utils.get_leaf_subclasses(EdgeRegularMetadata)] + [ x for x in utils.get_leaf_subclasses(EdgeOntoMetadata) ] self.db_cls_list.sort(key=lambda x: x.NAME) self.edge_metadata_cls_list.sort(key=lambda x: x.NAME) self.selected_dbs = None self.selected_meta_edges = None titles_panel = tk.Frame(self) self.info = tk.Button(titles_panel, text=" help ", command=lambda: gui.show_info_box(self.info_text)) self.title = tk.Label(titles_panel, text="(1) Graph Creation", font=controller.title_font) self.actions_el = self._create_action_el(self) select_panel = tk.Frame(self) self.select_el = self._create_select_db_meta_edges_el(select_panel) options_panel = tk.Frame(self) self.graph_prop_el = self._create_graph_prop_el(options_panel) self.output_format_el = self._create_output_format_el(options_panel) buttons_panel = tk.Frame(self) next_button = tk.Button(buttons_panel, text="Next", command=lambda: self.next_page(), height=1, width=15) prev_button = tk.Button( buttons_panel, text="Back", command=lambda: self.controller.show_previous_frame(), height=1, width=15 ) titles_panel.pack(side="top", fill="x", pady=10) self.title.pack(side="left", pady=10, padx=15) self.info.pack(side="right", fill="x", pady=5, padx=15) self.actions_el.pack(side="top", fill="both", padx=15, pady=5, expand=True) select_panel.pack(side="top", fill="both", padx=10, pady=5, expand=True) self.select_el.pack(side="top", fill="both", padx=5, pady=10, expand=True) options_panel.pack(side="top", fill="both", padx=5, pady=5, expand=True) self.graph_prop_el.pack(side="left", fill="both", padx=10, expand=True) self.output_format_el.pack(side="left", fill="both", padx=10, expand=True) ttk.Separator(self, orient="horizontal").pack(side="top", fill="x", pady=(15, 0), padx=10, anchor="s") buttons_panel.pack(side="bottom", padx=15, fill="x") prev_button.pack(side="left", anchor="w", pady=(5, 10)) next_button.pack(side="right", anchor="e", pady=(5, 10))
def __init__(self, folder_path, use_db_metadata_classes=None, use_edge_metadata_classes=None): gcConst.O_FILE_PATH = os.path.join(folder_path, gcConst.O_FILE_FOLDER_NAME) gcConst.IN_FILE_PATH = os.path.join(folder_path, gcConst.IN_FILE_FOLDER_NAME) self.db_file_metadata = [x() for x in utils.get_leaf_subclasses(DbMetadata)] self.file_readers = [x() for x in utils.get_leaf_subclasses(FileReader)] self.file_processors = [x() for x in utils.get_leaf_subclasses(FileProcessor)] self.infile_metadata = [x() for x in utils.get_leaf_subclasses(InfileMetadata)] self.edge_metadata = [x(graphProp.QUALITY) for x in utils.get_leaf_subclasses(EdgeRegularMetadata)] + [ x(graphProp.QUALITY) for x in utils.get_leaf_subclasses(EdgeOntoMetadata) ] self.tn_edge_metadata = [x(graphProp.QUALITY) for x in utils.get_leaf_subclasses(TnEdgeRegularMetadata)] self.dbType_reader_map = utils.cls_list_to_dic(self.file_readers, "dbType") self.readerType_processor_map = utils.cls_list_to_dic(self.file_processors, "readerType") self.infileType_inMetadata_map = {x.infileType: x for x in self.infile_metadata} # if not glob.DIRECTED: ## remove onto # if use_edge_metadata_classes is None: # use_edge_metadata_classes = [x(glob.QUALITY) for x in utils.get_leaf_subclasses(EdgeRegularMetadata)] # else: # temp_use_edge_metadata_classes =[] # for edge_class in use_edge_metadata_classes: # if inspect.isclass(edge_class): # if not issubclass(edge_class, EdgeOntoMetadata): # temp_use_edge_metadata_classes.append(edge_class()) # else: # if not issubclass(type(edge_class), EdgeOntoMetadata): # temp_use_edge_metadata_classes.append(edge_class) # use_edge_metadata_classes = temp_use_edge_metadata_classes # #use_edge_metadata_classes = [x for x in use_edge_metadata_classes if not issubclass(type(x), EdgeOntoMetadata)] # use only the desired sources if use_db_metadata_classes is not None: self.init_custom_sources_bottom_up(use_db_metadata_classes) if use_edge_metadata_classes is not None: self.init_custom_sources_top_down(use_edge_metadata_classes) graphProp.EDGE_TYPES = [str(x.__class__.__name__) for x in self.edge_metadata]
def __init__( self, global_config: dict, graph_path, tn_graph_path, all_nodes_path, sep: Optional[str] = None, # meta_edge_triples=None, #nicetohave (1) split for subsample of edges, define own meta edges t_minus_one_graph_path=None, t_minus_one_tn_graph_path=None, t_minus_one_nodes_path=None): if sep is None: sep = "\t" if _not_csv(graph_path): logging.error("graph path must be a csv file") sys.exit() if _not_csv(tn_graph_path): logging.error("tn_graph path must be a csv file") sys.exit() if _not_csv(all_nodes_path): logging.error("all_nodes path must be a csv file") sys.exit() if t_minus_one_graph_path is not None and _not_csv( t_minus_one_graph_path): logging.error("t_minus_one_graph path must be a csv file") sys.exit() if t_minus_one_tn_graph_path is not None and _not_csv( t_minus_one_tn_graph_path): logging.error("t_minus_one_tn_graph path must be a csv file") sys.exit() if t_minus_one_nodes_path is not None and _not_csv( t_minus_one_nodes_path): logging.error("t_minus_one_nodes path must be a csv file") sys.exit() self.identifier2type = global_config["IDENTIFIER_2_TYPE"] self.col_names_nodes = global_config["COL_NAMES_NODES"] self.col_names_edges = global_config["COL_NAMES_EDGES"] self.edge_type_col_name = global_config["EDGE_TYPE_COL_NAME"] self.value_col_name = global_config["VALUE_COL_NAME"] logging.info(f"loading nodes from {all_nodes_path}") self.all_nodes = pandas.read_csv(all_nodes_path, sep=sep, names=self.col_names_nodes) logging.info(f"loading true edges from {graph_path}") self.all_tp = pandas.read_csv(graph_path, sep=sep, names=self.col_names_edges, usecols=range(4)) self.all_tp[self.value_col_name] = 1 self.tp_edgeTypes = list(self.all_tp[self.edge_type_col_name].unique()) logging.info(f"loading false edges from {tn_graph_path}") self.all_tn = pandas.read_csv(tn_graph_path, sep=sep, names=self.col_names_edges, usecols=range(4)) self.all_tn[self.value_col_name] = 0 self.tn_edgeTypes = list(self.all_tn[self.edge_type_col_name].unique()) self.meta_edges_dic = {} sources = {} for metaEdge in utils.get_leaf_subclasses(meta.EdgeMetadata): edge_type = str(metaEdge.EDGE_INMETA_CLASS.EDGE_TYPE) node1_type = str(metaEdge.EDGE_INMETA_CLASS.NODE1_TYPE) node2_type = str(metaEdge.EDGE_INMETA_CLASS.NODE2_TYPE) source = str(metaEdge.EDGE_INMETA_CLASS.SOURCE) if edge_type in self.tp_edgeTypes: self.meta_edges_dic[ f"{node1_type}_{edge_type}_{node2_type}"] = node1_type, edge_type, node2_type sources[f"{node1_type}_{edge_type}_{node2_type}"] = source self.writer = TrainTestSetWriter(self.identifier2type, sources) # nicetohave (2) check for transient onto edges # transitiv_IS_A_edges = utils.check_for_transitive_edges(self.all_tp[self.all_tp[ttsConst.EDGE_TYPE_COL_NAME] == 'IS_A']) # transitiv_PART_OF_edges = utils.check_for_transitive_edges(self.all_tp[self.all_tp[ttsConst.EDGE_TYPE_COL_NAME] == 'PART_OF']) # if transitiv_IS_A_edges: # print('WARNING: transient edges in IS_A: ({a},b,c) for a IS_A b and a IS_A c', transitiv_IS_A_edges) # if transitiv_PART_OF_edges: # print('WARNING: transient edges in PART_OF: ({a},b,c) for a PART_OF b and a PART_OF c', # transitiv_PART_OF_edges) # for time slices if not (bool(t_minus_one_graph_path) == bool(t_minus_one_tn_graph_path) == (bool(t_minus_one_nodes_path))): logging.error( "either all three or none of these variables must be provided") sys.exit() if (t_minus_one_nodes_path is not None and t_minus_one_graph_path is not None and t_minus_one_tn_graph_path is not None): self.tmo_nodes = pandas.read_csv(t_minus_one_nodes_path, sep=sep, names=self.col_names_nodes) self.tmo_all_tp = pandas.read_csv(t_minus_one_graph_path, sep=sep, names=self.col_names_edges, usecols=range(4)) self.tmo_all_tp[self.value_col_name] = 1 self.tmo_tp_edgeTypes = list( self.all_tp[self.edge_type_col_name].unique()) self.tmo_all_tn = pandas.read_csv(t_minus_one_tn_graph_path, sep=sep, names=self.col_names_edges, usecols=range(4)) self.tmo_all_tn[self.value_col_name] = 0 self.tmo_tn_edgeTypes = list( self.all_tp[self.edge_type_col_name].unique())
def __init__( self, graph_path, tn_graph_path, all_nodes_path, sep='\t', #meta_edge_triples=None, #nicetohave (1) split for subsample of edges, define own meta edges t_minus_one_graph_path=None, t_minus_one_tn_graph_path=None, t_minus_one_nodes_path=None): self.writer = TrainTestSetWriter() with open(all_nodes_path) as file: self.all_nodes = pandas.read_csv( file, sep=sep, names=globalConfig.COL_NAMES_NODES) self.all_nodes = self.all_nodes.sort_values( by=globalConfig.COL_NAMES_NODES).reset_index(drop=True) with open(graph_path) as file: self.all_tp = pandas.read_csv(file, sep=sep, names=globalConfig.COL_NAMES_EDGES) self.all_tp[globalConfig.VALUE_COL_NAME] = 1 self.all_tp = self.all_tp.sort_values( by=globalConfig.COL_NAMES_EDGES).reset_index(drop=True) self.tp_edgeTypes = list( self.all_tp[globalConfig.EDGE_TYPE_COL_NAME].unique()) with open(tn_graph_path) as file: self.all_tn = pandas.read_csv(file, sep=sep, names=globalConfig.COL_NAMES_EDGES) self.all_tn[globalConfig.VALUE_COL_NAME] = 0 self.all_tn = self.all_tn.sort_values( by=globalConfig.COL_NAMES_EDGES).reset_index(drop=True) self.tn_edgeTypes = list( self.all_tn[globalConfig.EDGE_TYPE_COL_NAME].unique()) self.meta_edges_dic = {} for metaEdge in utils.get_leaf_subclasses(meta.EdgeMetadata): edgeType = str(metaEdge.EDGE_INMETA_CLASS.EDGE_TYPE) node1Type = str(metaEdge.EDGE_INMETA_CLASS.NODE1_TYPE) node2Type = str(metaEdge.EDGE_INMETA_CLASS.NODE2_TYPE) if edgeType in self.tp_edgeTypes: self.meta_edges_dic['%s_%s_%s' % (node1Type, edgeType, node2Type)] = ( node1Type, edgeType, node2Type) #nicetohave (2) check for transient onto edges #transitiv_IS_A_edges = utils.check_for_transitive_edges(self.all_tp[self.all_tp[ttsConst.EDGE_TYPE_COL_NAME] == 'IS_A']) #transitiv_PART_OF_edges = utils.check_for_transitive_edges(self.all_tp[self.all_tp[ttsConst.EDGE_TYPE_COL_NAME] == 'PART_OF']) #if transitiv_IS_A_edges: # print('WARNING: transient edges in IS_A: ({a},b,c) for a IS_A b and a IS_A c', transitiv_IS_A_edges) #if transitiv_PART_OF_edges: # print('WARNING: transient edges in PART_OF: ({a},b,c) for a PART_OF b and a PART_OF c', # transitiv_PART_OF_edges) #for time slices if not (bool(t_minus_one_graph_path) == bool(t_minus_one_tn_graph_path) == (bool(t_minus_one_nodes_path))): logging.error( 'either all three or none of these variables must be provided') sys.exit() if t_minus_one_nodes_path and t_minus_one_graph_path and t_minus_one_tn_graph_path: with open(t_minus_one_nodes_path) as file: self.tmo_nodes = pandas.read_csv( file, sep=sep, names=globalConfig.COL_NAMES_NODES) self.tmo_nodes = self.tmo_nodes.sort_values( by=globalConfig.COL_NAMES_NODES).reset_index(drop=True) with open(t_minus_one_graph_path) as file: self.tmo_all_tp = pandas.read_csv( file, sep=sep, names=globalConfig.COL_NAMES_EDGES) self.tmo_all_tp[globalConfig.VALUE_COL_NAME] = 1 self.tmo_all_tp = self.tmo_all_tp.sort_values( by=globalConfig.COL_NAMES_EDGES).reset_index(drop=True) self.tmo_tp_edgeTypes = list( self.all_tp[globalConfig.EDGE_TYPE_COL_NAME].unique()) with open(t_minus_one_tn_graph_path) as file: self.tmo_all_tn = pandas.read_csv( file, sep=sep, names=globalConfig.COL_NAMES_EDGES) self.tmo_all_tn[globalConfig.VALUE_COL_NAME] = 0 self.tmo_all_tn = self.tmo_all_tn.sort_values( by=globalConfig.COL_NAMES_EDGES).reset_index(drop=True) self.tmo_tn_edgeTypes = list( self.all_tp[globalConfig.EDGE_TYPE_COL_NAME].unique())