Пример #1
0
    def __init__(self, parent, controller):
        tk.Frame.__init__(self, parent)
        self.controller = controller
        self.info_text = ""
        self.db_cls_list = [x for x in utils.get_leaf_subclasses(DbMetadata)]
        self.edge_metadata_cls_list = [x for x in utils.get_leaf_subclasses(EdgeRegularMetadata)] + [
            x for x in utils.get_leaf_subclasses(EdgeOntoMetadata)
        ]
        self.db_cls_list.sort(key=lambda x: x.NAME)
        self.edge_metadata_cls_list.sort(key=lambda x: x.NAME)
        self.selected_dbs = None
        self.selected_meta_edges = None

        titles_panel = tk.Frame(self)
        self.info = tk.Button(titles_panel, text=" help ", command=lambda: gui.show_info_box(self.info_text))
        self.title = tk.Label(titles_panel, text="(1) Graph Creation", font=controller.title_font)

        self.actions_el = self._create_action_el(self)
        select_panel = tk.Frame(self)
        self.select_el = self._create_select_db_meta_edges_el(select_panel)

        options_panel = tk.Frame(self)
        self.graph_prop_el = self._create_graph_prop_el(options_panel)
        self.output_format_el = self._create_output_format_el(options_panel)

        buttons_panel = tk.Frame(self)
        next_button = tk.Button(buttons_panel, text="Next", command=lambda: self.next_page(), height=1, width=15)
        prev_button = tk.Button(
            buttons_panel, text="Back", command=lambda: self.controller.show_previous_frame(), height=1, width=15
        )

        titles_panel.pack(side="top", fill="x", pady=10)
        self.title.pack(side="left", pady=10, padx=15)
        self.info.pack(side="right", fill="x", pady=5, padx=15)
        self.actions_el.pack(side="top", fill="both", padx=15, pady=5, expand=True)
        select_panel.pack(side="top", fill="both", padx=10, pady=5, expand=True)
        self.select_el.pack(side="top", fill="both", padx=5, pady=10, expand=True)
        options_panel.pack(side="top", fill="both", padx=5, pady=5, expand=True)
        self.graph_prop_el.pack(side="left", fill="both", padx=10, expand=True)
        self.output_format_el.pack(side="left", fill="both", padx=10, expand=True)

        ttk.Separator(self, orient="horizontal").pack(side="top", fill="x", pady=(15, 0), padx=10, anchor="s")
        buttons_panel.pack(side="bottom", padx=15, fill="x")
        prev_button.pack(side="left", anchor="w", pady=(5, 10))
        next_button.pack(side="right", anchor="e", pady=(5, 10))
Пример #2
0
    def __init__(self, folder_path, use_db_metadata_classes=None, use_edge_metadata_classes=None):
        gcConst.O_FILE_PATH = os.path.join(folder_path, gcConst.O_FILE_FOLDER_NAME)
        gcConst.IN_FILE_PATH = os.path.join(folder_path, gcConst.IN_FILE_FOLDER_NAME)

        self.db_file_metadata = [x() for x in utils.get_leaf_subclasses(DbMetadata)]
        self.file_readers = [x() for x in utils.get_leaf_subclasses(FileReader)]
        self.file_processors = [x() for x in utils.get_leaf_subclasses(FileProcessor)]
        self.infile_metadata = [x() for x in utils.get_leaf_subclasses(InfileMetadata)]
        self.edge_metadata = [x(graphProp.QUALITY) for x in utils.get_leaf_subclasses(EdgeRegularMetadata)] + [
            x(graphProp.QUALITY) for x in utils.get_leaf_subclasses(EdgeOntoMetadata)
        ]
        self.tn_edge_metadata = [x(graphProp.QUALITY) for x in utils.get_leaf_subclasses(TnEdgeRegularMetadata)]

        self.dbType_reader_map = utils.cls_list_to_dic(self.file_readers, "dbType")
        self.readerType_processor_map = utils.cls_list_to_dic(self.file_processors, "readerType")
        self.infileType_inMetadata_map = {x.infileType: x for x in self.infile_metadata}

        # if not glob.DIRECTED:
        ## remove onto
        #    if use_edge_metadata_classes is None:
        #        use_edge_metadata_classes = [x(glob.QUALITY) for x in utils.get_leaf_subclasses(EdgeRegularMetadata)]
        #    else:
        #        temp_use_edge_metadata_classes =[]
        #        for edge_class in use_edge_metadata_classes:
        #            if inspect.isclass(edge_class):
        #                if not issubclass(edge_class, EdgeOntoMetadata):
        #                    temp_use_edge_metadata_classes.append(edge_class())
        #            else:
        #                if not issubclass(type(edge_class), EdgeOntoMetadata):
        #                    temp_use_edge_metadata_classes.append(edge_class)
        #        use_edge_metadata_classes = temp_use_edge_metadata_classes
        #        #use_edge_metadata_classes = [x for x in use_edge_metadata_classes if not issubclass(type(x), EdgeOntoMetadata)]

        # use only the desired sources
        if use_db_metadata_classes is not None:
            self.init_custom_sources_bottom_up(use_db_metadata_classes)
        if use_edge_metadata_classes is not None:
            self.init_custom_sources_top_down(use_edge_metadata_classes)

        graphProp.EDGE_TYPES = [str(x.__class__.__name__) for x in self.edge_metadata]
Пример #3
0
    def __init__(
            self,
            global_config: dict,
            graph_path,
            tn_graph_path,
            all_nodes_path,
            sep: Optional[str] = None,
            # meta_edge_triples=None, #nicetohave (1) split for subsample of edges, define own meta edges
            t_minus_one_graph_path=None,
            t_minus_one_tn_graph_path=None,
            t_minus_one_nodes_path=None):
        if sep is None:
            sep = "\t"
        if _not_csv(graph_path):
            logging.error("graph path must be a csv file")
            sys.exit()
        if _not_csv(tn_graph_path):
            logging.error("tn_graph path must be a csv file")
            sys.exit()
        if _not_csv(all_nodes_path):
            logging.error("all_nodes path must be a csv file")
            sys.exit()
        if t_minus_one_graph_path is not None and _not_csv(
                t_minus_one_graph_path):
            logging.error("t_minus_one_graph path must be a csv file")
            sys.exit()
        if t_minus_one_tn_graph_path is not None and _not_csv(
                t_minus_one_tn_graph_path):
            logging.error("t_minus_one_tn_graph path must be a csv file")
            sys.exit()
        if t_minus_one_nodes_path is not None and _not_csv(
                t_minus_one_nodes_path):
            logging.error("t_minus_one_nodes path must be a csv file")
            sys.exit()

        self.identifier2type = global_config["IDENTIFIER_2_TYPE"]
        self.col_names_nodes = global_config["COL_NAMES_NODES"]
        self.col_names_edges = global_config["COL_NAMES_EDGES"]
        self.edge_type_col_name = global_config["EDGE_TYPE_COL_NAME"]
        self.value_col_name = global_config["VALUE_COL_NAME"]

        logging.info(f"loading nodes from {all_nodes_path}")
        self.all_nodes = pandas.read_csv(all_nodes_path,
                                         sep=sep,
                                         names=self.col_names_nodes)

        logging.info(f"loading true edges from {graph_path}")
        self.all_tp = pandas.read_csv(graph_path,
                                      sep=sep,
                                      names=self.col_names_edges,
                                      usecols=range(4))
        self.all_tp[self.value_col_name] = 1
        self.tp_edgeTypes = list(self.all_tp[self.edge_type_col_name].unique())

        logging.info(f"loading false edges from {tn_graph_path}")
        self.all_tn = pandas.read_csv(tn_graph_path,
                                      sep=sep,
                                      names=self.col_names_edges,
                                      usecols=range(4))
        self.all_tn[self.value_col_name] = 0

        self.tn_edgeTypes = list(self.all_tn[self.edge_type_col_name].unique())

        self.meta_edges_dic = {}
        sources = {}

        for metaEdge in utils.get_leaf_subclasses(meta.EdgeMetadata):
            edge_type = str(metaEdge.EDGE_INMETA_CLASS.EDGE_TYPE)
            node1_type = str(metaEdge.EDGE_INMETA_CLASS.NODE1_TYPE)
            node2_type = str(metaEdge.EDGE_INMETA_CLASS.NODE2_TYPE)
            source = str(metaEdge.EDGE_INMETA_CLASS.SOURCE)
            if edge_type in self.tp_edgeTypes:
                self.meta_edges_dic[
                    f"{node1_type}_{edge_type}_{node2_type}"] = node1_type, edge_type, node2_type
                sources[f"{node1_type}_{edge_type}_{node2_type}"] = source

        self.writer = TrainTestSetWriter(self.identifier2type, sources)

        # nicetohave (2) check for transient onto edges
        # transitiv_IS_A_edges = utils.check_for_transitive_edges(self.all_tp[self.all_tp[ttsConst.EDGE_TYPE_COL_NAME] == 'IS_A'])
        # transitiv_PART_OF_edges = utils.check_for_transitive_edges(self.all_tp[self.all_tp[ttsConst.EDGE_TYPE_COL_NAME] == 'PART_OF'])
        # if transitiv_IS_A_edges:
        #    print('WARNING: transient edges in IS_A: ({a},b,c) for a IS_A b and a IS_A c', transitiv_IS_A_edges)
        # if transitiv_PART_OF_edges:
        #    print('WARNING: transient edges in PART_OF: ({a},b,c) for a PART_OF b and a PART_OF c',
        #          transitiv_PART_OF_edges)

        # for time slices
        if not (bool(t_minus_one_graph_path) == bool(t_minus_one_tn_graph_path)
                == (bool(t_minus_one_nodes_path))):
            logging.error(
                "either all three or none of these variables must be provided")
            sys.exit()
        if (t_minus_one_nodes_path is not None
                and t_minus_one_graph_path is not None
                and t_minus_one_tn_graph_path is not None):
            self.tmo_nodes = pandas.read_csv(t_minus_one_nodes_path,
                                             sep=sep,
                                             names=self.col_names_nodes)

            self.tmo_all_tp = pandas.read_csv(t_minus_one_graph_path,
                                              sep=sep,
                                              names=self.col_names_edges,
                                              usecols=range(4))
            self.tmo_all_tp[self.value_col_name] = 1
            self.tmo_tp_edgeTypes = list(
                self.all_tp[self.edge_type_col_name].unique())

            self.tmo_all_tn = pandas.read_csv(t_minus_one_tn_graph_path,
                                              sep=sep,
                                              names=self.col_names_edges,
                                              usecols=range(4))
            self.tmo_all_tn[self.value_col_name] = 0
            self.tmo_tn_edgeTypes = list(
                self.all_tp[self.edge_type_col_name].unique())
Пример #4
0
    def __init__(
            self,
            graph_path,
            tn_graph_path,
            all_nodes_path,
            sep='\t',
            #meta_edge_triples=None, #nicetohave (1) split for subsample of edges, define own meta edges
            t_minus_one_graph_path=None,
            t_minus_one_tn_graph_path=None,
            t_minus_one_nodes_path=None):

        self.writer = TrainTestSetWriter()
        with open(all_nodes_path) as file:
            self.all_nodes = pandas.read_csv(
                file, sep=sep, names=globalConfig.COL_NAMES_NODES)
            self.all_nodes = self.all_nodes.sort_values(
                by=globalConfig.COL_NAMES_NODES).reset_index(drop=True)

        with open(graph_path) as file:
            self.all_tp = pandas.read_csv(file,
                                          sep=sep,
                                          names=globalConfig.COL_NAMES_EDGES)
            self.all_tp[globalConfig.VALUE_COL_NAME] = 1
            self.all_tp = self.all_tp.sort_values(
                by=globalConfig.COL_NAMES_EDGES).reset_index(drop=True)
        self.tp_edgeTypes = list(
            self.all_tp[globalConfig.EDGE_TYPE_COL_NAME].unique())

        with open(tn_graph_path) as file:
            self.all_tn = pandas.read_csv(file,
                                          sep=sep,
                                          names=globalConfig.COL_NAMES_EDGES)
            self.all_tn[globalConfig.VALUE_COL_NAME] = 0
            self.all_tn = self.all_tn.sort_values(
                by=globalConfig.COL_NAMES_EDGES).reset_index(drop=True)

        self.tn_edgeTypes = list(
            self.all_tn[globalConfig.EDGE_TYPE_COL_NAME].unique())

        self.meta_edges_dic = {}

        for metaEdge in utils.get_leaf_subclasses(meta.EdgeMetadata):
            edgeType = str(metaEdge.EDGE_INMETA_CLASS.EDGE_TYPE)
            node1Type = str(metaEdge.EDGE_INMETA_CLASS.NODE1_TYPE)
            node2Type = str(metaEdge.EDGE_INMETA_CLASS.NODE2_TYPE)
            if edgeType in self.tp_edgeTypes:
                self.meta_edges_dic['%s_%s_%s' %
                                    (node1Type, edgeType, node2Type)] = (
                                        node1Type, edgeType, node2Type)

        #nicetohave (2) check for transient onto edges
        #transitiv_IS_A_edges = utils.check_for_transitive_edges(self.all_tp[self.all_tp[ttsConst.EDGE_TYPE_COL_NAME] == 'IS_A'])
        #transitiv_PART_OF_edges = utils.check_for_transitive_edges(self.all_tp[self.all_tp[ttsConst.EDGE_TYPE_COL_NAME] == 'PART_OF'])
        #if transitiv_IS_A_edges:
        #    print('WARNING: transient edges in IS_A: ({a},b,c) for a IS_A b and a IS_A c', transitiv_IS_A_edges)
        #if transitiv_PART_OF_edges:
        #    print('WARNING: transient edges in PART_OF: ({a},b,c) for a PART_OF b and a PART_OF c',
        #          transitiv_PART_OF_edges)

        #for time slices
        if not (bool(t_minus_one_graph_path) == bool(t_minus_one_tn_graph_path)
                == (bool(t_minus_one_nodes_path))):
            logging.error(
                'either all three or none of these variables must be provided')
            sys.exit()
        if t_minus_one_nodes_path and t_minus_one_graph_path and t_minus_one_tn_graph_path:
            with open(t_minus_one_nodes_path) as file:
                self.tmo_nodes = pandas.read_csv(
                    file, sep=sep, names=globalConfig.COL_NAMES_NODES)
                self.tmo_nodes = self.tmo_nodes.sort_values(
                    by=globalConfig.COL_NAMES_NODES).reset_index(drop=True)

            with open(t_minus_one_graph_path) as file:
                self.tmo_all_tp = pandas.read_csv(
                    file, sep=sep, names=globalConfig.COL_NAMES_EDGES)
                self.tmo_all_tp[globalConfig.VALUE_COL_NAME] = 1
                self.tmo_all_tp = self.tmo_all_tp.sort_values(
                    by=globalConfig.COL_NAMES_EDGES).reset_index(drop=True)
            self.tmo_tp_edgeTypes = list(
                self.all_tp[globalConfig.EDGE_TYPE_COL_NAME].unique())

            with open(t_minus_one_tn_graph_path) as file:
                self.tmo_all_tn = pandas.read_csv(
                    file, sep=sep, names=globalConfig.COL_NAMES_EDGES)
                self.tmo_all_tn[globalConfig.VALUE_COL_NAME] = 0
                self.tmo_all_tn = self.tmo_all_tn.sort_values(
                    by=globalConfig.COL_NAMES_EDGES).reset_index(drop=True)
            self.tmo_tn_edgeTypes = list(
                self.all_tp[globalConfig.EDGE_TYPE_COL_NAME].unique())