def test_duplicate_different_topology(self): conformer = get_stage1_conformer() # bond topology is conformer_id // 1000 conformer.duplicated_by = conformer.conformer_id + 1000 self.assertEqual( dataset_pb2.Conformer.FATE_DUPLICATE_DIFFERENT_TOPOLOGY, smu_utils_lib.determine_fate(conformer))
def test_duplicate_same_topology(self): conformer = get_stage1_conformer() # bond topology is conformer_id // 1000 conformer.duplicated_by = conformer.conformer_id + 1 smu_utils_lib.clean_up_error_codes(conformer) self.assertEqual(dataset_pb2.Conformer.FATE_DUPLICATE_SAME_TOPOLOGY, smu_utils_lib.determine_fate(conformer))
def process(self, conformer): conformer = copy.deepcopy(conformer) conformer.fate = smu_utils_lib.determine_fate(conformer) yield from self._compare_smiles(conformer) yield conformer
def process(self, conformer, bond_length_records, smiles_id_dict): """Per conformer updates. Args: conformer: dataset_pb2.Conformer bond_length_records: tuples to go to bond_length_distribution.AllAtomPairLengthDistributions smiles_id_dict: dict from SMILES to bond topology id Yields: Conformer. """ # There is probably a better way to do this. # We get the side input with each call to process. We'll assume that it's # always the same input, so we set our cache value and never update it. # We only do this with bond_length_records because there is a reasonable # amount of processing in creating AllAtomPairLengthDistributions. # The smiles_id_dict is used directly. if not self._cached_bond_lengths: self._cached_bond_lengths = ( bond_length_distribution.AllAtomPairLengthDistributions()) try: self._cached_bond_lengths.add_from_sparse_dataframe( bond_length_distribution.sparse_dataframe_from_records( bond_length_records), _BOND_LENGTHS_UNBONDED_RIGHT_TAIL_MASS, _BOND_LENGTHS_SIG_DIGITS) except ValueError as err: raise ValueError( 'Invalid sparse dataframe for conformer {0} org. ValueError: {1}' .format(str(conformer.conformer_id), err)) conformer = copy.deepcopy(conformer) conformer.fate = smu_utils_lib.determine_fate(conformer) yield from self._compare_smiles(conformer) if (conformer.duplicated_by == 0 and conformer.properties.errors.status < 512): # The duplicate records do not need topology extraction and anything # with this high an error is pretty messed so, do we won't bother trying # to match the topolgy. self._add_alternative_bond_topologies(conformer, smiles_id_dict) else: beam.metrics.Metrics.counter(_METRICS_NAMESPACE, 'skipped_topology_matches').inc() yield conformer
def process(self, molecule, bond_length_records, smiles_id_dict): """Per molecule updates. Args: molecule: dataset_pb2.Molecule bond_length_records: tuples to go to bond_length_distribution.AllAtomPairLengthDistributions smiles_id_dict: dict from SMILES to bond topology id Yields: Molecule. """ # There is probably a better way to do this. # We get the side input with each call to process. We'll assume that it's # always the same input, so we set our cache value and never update it. # We only do this with bond_length_records because there is a reasonable # amount of processing in creating AllAtomPairLengthDistributions. # The smiles_id_dict is used directly. if not self._cached_bond_lengths: self._cached_bond_lengths = ( bond_length_distribution.AllAtomPairLengthDistributions()) try: self._cached_bond_lengths.add_from_sparse_dataframe( bond_length_distribution.sparse_dataframe_from_records( bond_length_records), bond_length_distribution.STANDARD_UNBONDED_RIGHT_TAIL_MASS, bond_length_distribution.STANDARD_SIG_DIGITS) except ValueError as err: raise ValueError( 'Invalid sparse dataframe for molecule {0} org. ValueError: {1}' .format(str(molecule.molecule_id), err)) from err molecule = copy.deepcopy(molecule) molecule.properties.errors.fate = smu_utils_lib.determine_fate(molecule) yield from self._compare_smiles(molecule) if smu_utils_lib.molecule_eligible_for_topology_detection(molecule): self._add_alternative_bond_topologies(molecule, smiles_id_dict) else: molecule.bond_topologies[ 0].source = dataset_pb2.BondTopology.SOURCE_STARTING beam.metrics.Metrics.counter(_METRICS_NAMESPACE, 'skipped_topology_matches').inc() yield molecule
def test_success(self): conformer = get_stage2_conformer() self.assertEqual(dataset_pb2.Conformer.FATE_SUCCESS, smu_utils_lib.determine_fate(conformer))
def test_calculation_errors(self): conformer = get_stage2_conformer() # This is a random choice of an error to set. I just need some error. conformer.properties.errors.error_atomic_analysis = 999 self.assertEqual(dataset_pb2.Conformer.FATE_CALCULATION_WITH_ERROR, smu_utils_lib.determine_fate(conformer))
def test_no_result(self): conformer = get_stage1_conformer() self.assertEqual(dataset_pb2.Conformer.FATE_NO_CALCULATION_RESULTS, smu_utils_lib.determine_fate(conformer))
def test_geometry_failures(self, nstat1, expected_fate): conformer = get_stage1_conformer() conformer.properties.errors.error_nstat1 = nstat1 self.assertEqual(expected_fate, smu_utils_lib.determine_fate(conformer))
def test_calculation_warnings_vibrational(self): conformer = get_stage2_conformer() conformer.properties.errors.warn_vib_linearity = 1234 self.assertEqual( dataset_pb2.Conformer.FATE_CALCULATION_WITH_WARNING_VIBRATIONAL, smu_utils_lib.determine_fate(conformer))
def test_calculation_warnings_serious(self): conformer = get_stage2_conformer() conformer.properties.errors.warn_t1_excess = 1234 self.assertEqual( dataset_pb2.Conformer.FATE_CALCULATION_WITH_WARNING_SERIOUS, smu_utils_lib.determine_fate(conformer))
def test_calculation_errors(self, status, expected): conformer = get_stage2_conformer() conformer.properties.errors.status = status self.assertEqual(expected, smu_utils_lib.determine_fate(conformer))
def test_discarded_other(self, status): conformer = get_stage1_conformer() conformer.properties.errors.status = status smu_utils_lib.clean_up_error_codes(conformer) self.assertEqual(dataset_pb2.Conformer.FATE_DISCARDED_OTHER, smu_utils_lib.determine_fate(conformer))