def _create_atom_feature_vector(self, atom: dict) -> List[int]: """Generate the feature vector from the atomic feature dictionary Handles the binarization of categorical variables, and transforming the ring_sizes to a list Args: atom (dict): Dictionary of atomic features Returns: ([int]): Atomic feature vector """ atom_temp = [] for i in self.atom_features: if i == 'chirality': atom_temp.extend(fast_label_binarize(atom[i], [0, 1, 2])) elif i == 'element': atom_temp.extend( fast_label_binarize(atom[i], self.known_elements)) elif i in ['aromatic', 'donor', 'acceptor']: atom_temp.append(int(atom[i])) elif i == 'hybridization': atom_temp.extend( fast_label_binarize(atom[i], [1, 2, 3, 4, 5, 6])) elif i == 'ring_sizes': atom_temp.extend(ring_to_vector(atom[i], self.max_ring_size)) else: # It is a scalar atom_temp.append(atom[i]) return atom_temp
def _create_pair_feature_vector(self, bond: Dict) -> List[int]: """Generate the feature vector from the bond feature dictionary Handles the binarization of categorical variables, and performing the distance conversion Args: bond (dict): Features for a certain pair of atoms Returns: ([float]) Values converted to a vector """ bond_temp: List[int] = [] for i in self.bond_features: # Some features require conversion (e.g., binarization) if i in bond: if i == "bond_type": bond_temp.extend( fast_label_binarize(bond[i], [0, 1, 2, 3, 4])) elif i == "same_ring": bond_temp.append(int(bond[i])) elif i == "spatial_distance": expanded = self.distance_converter.convert([bond[i]])[0] if isinstance(expanded, np.ndarray): # If we use a distance expansion bond_temp.extend(expanded.tolist()) else: # If not bond_temp.append(expanded) else: bond_temp.append(bond[i]) return bond_temp
def test_fast_label_binarize(self): binaries = fast_label_binarize(1, [0, 1]) self.assertListEqual(binaries, [0]) binaries = fast_label_binarize(1, [0, 1, 2]) self.assertListEqual(binaries, [0, 1, 0])