def load(self, nodes_file, node_types_file): nodes_hf = h5py.File(nodes_file, 'r') if 'nodes' not in nodes_hf.keys(): raise Exception('Could not find nodes in {}'.format(nodes_file)) nodes_group = nodes_hf['nodes'] self._network_name = nodes_group.attrs[ 'network'] if 'network' in nodes_group.attrs.keys() else 'NA' self._version = 'v0.1' # TODO: get the version number from the attributes # Create Indices self._nodes_index['node_gid'] = pd.Series( nodes_group['node_gid'], dtype=nodes_group['node_gid'].dtype) self._nodes_index['node_type_id'] = pd.Series( nodes_group['node_type_id'], dtype=nodes_group['node_type_id'].dtype) self._nodes_index['node_group'] = pd.Series( nodes_group['node_group'], dtype=nodes_group['node_group'].dtype) self._nodes_index['node_group_index'] = pd.Series( nodes_group['node_group_index'], dtype=nodes_group['node_group_index'].dtype) self._nodes_index.set_index(['node_gid'], inplace=True) self._nrows = len(self._nodes_index) # Save the node-types self._node_types_table = tn.TypesTable(node_types_file, 'node_type_id') # save pointers to the groups table self._group_table = { grp_id: Group(grp_id, grp_ptr, self._node_types_table) for grp_id, grp_ptr in nodes_group.items() if isinstance(grp_ptr, h5py.Group) }
def load(self, nodes_file, node_types_file): self._nodes_df = pd.read_csv(nodes_file, sep=' ', index_col=['node_id']) self._node_types_table = tn.TypesTable(node_types_file, 'node_type_id') self._nrows = len(self._nodes_df.index) self._nodes_columns = tn.ColumnProperty.from_csv(self._nodes_df) self._columns = self._nodes_columns + self._node_types_table.columns
def load(self, edges_file, edge_types_file): edges_hf = h5py.File(edges_file, 'r') self._edge_ptr_ds = edges_hf['edge_ptr'] self._num_syns_ds = edges_hf['num_syns'] self._src_gids_ds = edges_hf['src_gids'] # TODO: validate edge_types dataset keys self._edge_types_ds = edges_hf['edge_types'] self._edge_types_table = tn.TypesTable(edge_types_file, 'edge_type_id') self._index_len = len(self._edge_ptr_ds) self._nrows = len(self._src_gids_ds)
def load(self, edges_file, edge_types_file): edges_hf = h5py.File(edges_file, 'r') if 'edges' not in edges_hf.keys(): raise Exception('Could not find edges in {}'.format(edges_file)) edges_group = edges_hf['edges'] # Preload the target index pointers into memory self._target_index = pd.Series( edges_group['index_pointer'], dtype=edges_group['index_pointer'].dtype) self._target_index_len = len(self._target_index) # For the other index tables we only load in a file pointer self._target_gid_ds = edges_group['target_gid'] if 'network' in self._target_gid_ds.attrs.keys(): self._target_network = self._target_gid_ds.attrs['network'] self._source_gid_ds = edges_group['source_gid'] if 'network' in self._source_gid_ds.attrs.keys(): self._source_network = self._source_gid_ds.attrs['network'] self._edge_type_ds = edges_group['edge_type_id'] self._edge_group_ds = edges_group['edge_group'] self._edge_group_index_ds = edges_group['edge_group_index'] self._nedges = len(self._edge_group_index_ds) # Load in edge-types table self._edge_types_table = tn.TypesTable(edge_types_file, 'edge_type_id') # Load in the group properties # TODO: look in attributes for group synonyms # TODO: HDF5 group name will always be a string, but value in groups dataset will be an int. self._group_table = { grp_id: Group(grp_id, grp_ptr, self._edge_types_table) for grp_id, grp_ptr in edges_group.items() if isinstance(grp_ptr, h5py.Group) }