def __getitem__(self, index: int) -> tuple: # Get the indices for this batch batch_index = self.mol_index[index * self.batch_size : (index + 1) * self.batch_size] # Get the inputs for each batch inputs = self._generate_inputs(batch_index) # Make the graph data inputs = self._combine_graph_data(*inputs) # Return the batch if self.targets is None: return inputs # get targets target_temp = itemgetter_list(self.targets, batch_index) target_temp = np.atleast_2d(target_temp) if self.sample_weights is None: return inputs, expand_1st(target_temp) sample_weights_temp = itemgetter_list(self.sample_weights, batch_index) # sample_weights_temp = np.atleast_2d(sample_weights_temp) return inputs, expand_1st(target_temp), expand_1st(sample_weights_temp)
def __getitem__(self, index): # Get the indices for this batch batch_index = self.mol_index[index * self.batch_size:(index + 1) * self.batch_size] # Get the inputs for each batch inputs = self._generate_inputs(batch_index) # Make the graph data inputs = self._combine_graph_data(*inputs) # get targets it = itemgetter(*batch_index) target_temp = it(self.targets) target_temp = np.atleast_2d(target_temp) return inputs, expand_1st(target_temp)
def graph_to_input(self, graph): """ Turns a graph into model input Args: (dict): Dictionary description of the graph Return: ([np.ndarray]): Inputs in the form needed by MEGNet """ gnode = [0] * len(graph['atom']) gbond = [0] * len(graph['index1']) return [expand_1st(self.atom_converter.convert(graph['atom'])), expand_1st(self.bond_converter.convert(graph['bond'])), expand_1st(np.array(graph['state'])), expand_1st(np.array(graph['index1'])), expand_1st(np.array(graph['index2'])), expand_1st(np.array(gnode)), expand_1st(np.array(gbond))]
def graph_to_input(self, graph: Dict) -> List[np.ndarray]: """ Turns a graph into model input Args: (dict): Dictionary description of the graph Return: ([np.ndarray]): Inputs in the form needed by MEGNet """ gnode = [0] * len(graph["atom"]) gbond = [0] * len(graph["index1"]) return [ expand_1st(self.atom_converter.convert(graph["atom"])), expand_1st(self.bond_converter.convert(graph["bond"])), expand_1st(np.array(graph["state"])), expand_1st(np.array(graph["index1"], dtype=np.int32)), expand_1st(np.array(graph["index2"], dtype=np.int32)), expand_1st(np.array(gnode, dtype=np.int32)), expand_1st(np.array(gbond, dtype=np.int32)), ]
def _combine_graph_data( self, feature_list_temp: List[np.ndarray], connection_list_temp: List[np.ndarray], global_list_temp: List[np.ndarray], index1_temp: List[np.ndarray], index2_temp: List[np.ndarray], ) -> tuple: """Compile the matrices describing each graph into single matrices for the entire graph Beyond concatenating the graph descriptions, this operation updates the indices of each node to be sequential across all graphs so they are not duplicated between graphs Args: feature_list_temp ([ndarray]): List of features for each node connection_list_temp ([ndarray]): List of features for each connection global_list_temp ([ndarray]): List of global state for each graph index1_temp ([ndarray]): List of indices for the start of each bond index2_temp ([ndarray]): List of indices for the end of each bond Returns: (tuple): Input arrays describing the entire batch of networks: - ndarray: Features for each node - ndarray: Features for each connection - ndarray: Global state for each graph - ndarray: Indices for the start of each bond - ndarray: Indices for the end of each bond - ndarray: Index of graph associated with each node - ndarray: Index of graph associated with each connection """ # get atom's structure id gnode = [] for i, j in enumerate(feature_list_temp): gnode += [i] * len(j) # get bond features from a batch of structures # get bond's structure id gbond = [] for i, j in enumerate(connection_list_temp): gbond += [i] * len(j) # assemble atom features together n_atoms = [len(i) for i in feature_list_temp] feature_list_temp = np.concatenate(feature_list_temp, axis=0) feature_list_temp = self.process_atom_feature(feature_list_temp) # assemble bond feature together connection_list_temp = np.concatenate(connection_list_temp, axis=0) connection_list_temp = self.process_bond_feature(connection_list_temp) # assemble state feature together global_list_temp = np.concatenate(global_list_temp, axis=0) global_list_temp = self.process_state_feature(global_list_temp) # assemble bond indices index1 = [] index2 = [] offset_ind = 0 for ind1, ind2, n_atom in zip(index1_temp, index2_temp, n_atoms): index1 += [i + offset_ind for i in ind1] index2 += [i + offset_ind for i in ind2] # offset_ind += max(ind1) + 1 offset_ind += n_atom # Compile the inputs in needed order inputs = ( expand_1st(feature_list_temp), expand_1st(connection_list_temp), expand_1st(global_list_temp), expand_1st(np.array(index1, dtype=np.int32)), expand_1st(np.array(index2, dtype=np.int32)), expand_1st(np.array(gnode, dtype=np.int32)), expand_1st(np.array(gbond, dtype=np.int32)), ) return inputs
def test_expand_dim(self): x = np.array([1, 2, 3]) self.assertListEqual(list(expand_1st(x).shape), [1, 3])