Пример #1
0
    def test_standardizer_different_configs(self):
        """Testing standardization of smiles using threading"""

        df_smiles = read_csv(curDir / "input/test_standardizer.csv")
        outcols = ["canonical_smiles", "success", "error_message"]
        out_types = ["object", "bool", "object"]

        ## Load ref standardizer
        st_ref = Standardizer(
            max_num_atoms=self.config["standardization"]["max_num_atoms"],
            max_num_tautomers=self.config["standardization"]
            ["max_num_tautomers"],
            include_stereoinfo=self.config["standardization"]
            ["include_stereoinfo"],
            verbosity=0,
        )
        dt_ref = DfTransformer(
            st_ref,
            input_columns={"smiles": "smiles"},
            output_columns=outcols,
            output_types=out_types,
            success_column="success",
            nproc=4,
            verbosity=0,
        )
        response_ref = dt_ref.process_dataframe(df_smiles)[0]
        config_2 = ConfigDict(config_path=Path(
            os.path.join(curDir, "input/",
                         "example_parameters_2.json"))).get_parameters()
        ## load test standardizer
        st_tmp = Standardizer(
            max_num_atoms=config_2["standardization"]["max_num_atoms"],
            max_num_tautomers=config_2["standardization"]["max_num_tautomers"],
            include_stereoinfo=config_2["standardization"]
            ["include_stereoinfo"],
            verbosity=0,
        )

        dt_tmp = DfTransformer(
            st_tmp,
            input_columns={"smiles": "smiles"},
            output_columns=outcols,
            output_types=out_types,
            success_column="success",
            nproc=2,
            verbosity=0,
        )
        response_tmp = dt_tmp.process_dataframe(df_smiles)[0]

        try:
            assert_frame_equal(response_ref, response_tmp)
        except AssertionError:
            # frames are not equal
            pass
        else:
            # frames are equal
            raise AssertionError
Пример #2
0
    def test_standardizer_parameter_atom_count(self):
        """Testing standardization with different number of max atom count"""

        df_smiles = read_csv(curDir / "input/test_standardizer.csv")
        outcols = ["canonical_smiles", "success", "error_message"]
        out_types = ["object", "bool", "object"]

        ## Load ref standardizer
        st_ref = Standardizer(
            max_num_atoms=self.config["standardization"]["max_num_atoms"],
            max_num_tautomers=self.config["standardization"]
            ["max_num_tautomers"],
            include_stereoinfo=self.config["standardization"]
            ["include_stereoinfo"],
            verbosity=0,
        )
        dt_ref = DfTransformer(
            st_ref,
            input_columns={"smiles": "smiles"},
            output_columns=outcols,
            output_types=out_types,
            success_column="success",
            nproc=4,
            verbosity=0,
        )
        response_ref = dt_ref.process_dataframe(df_smiles)[0]

        ## load test standardizer
        st_tmp = Standardizer(
            max_num_atoms=5,
            max_num_tautomers=self.config["standardization"]
            ["max_num_tautomers"],
            include_stereoinfo=self.config["standardization"]
            ["include_stereoinfo"],
            verbosity=0,
        )

        dt_tmp = DfTransformer(
            st_tmp,
            input_columns={"smiles": "smiles"},
            output_columns=outcols,
            output_types=out_types,
            success_column="success",
            nproc=2,
            verbosity=0,
        )
        response_tmp = dt_tmp.process_dataframe(df_smiles)[0]

        try:
            assert_frame_equal(response_ref, response_tmp)
        except AssertionError:
            # frames are not equal
            pass
        else:
            # frames are equal
            raise AssertionError
Пример #3
0
    def test_scramble_desc_multiple_key(self):
        """test if scrambling is depending on the input key"""
        newKey = "melloddy_2"

        tempFilePathFeat = curDir / "output/tmp/ecfp_feat_scrambled_new_key.csv"
        df_smiles = read_csv(curDir / "input/chembl/chembl_23_example_T2.csv",
                             nrows=10)

        dc = DescriptorCalculator.from_param_dict(
            secret=newKey,
            method_param_dict=self.config["fingerprint"],
            verbosity=0)
        outcols = ["fp_feat", "fp_val", "success", "error_message"]
        out_types = ["object", "object", "bool", "object"]
        dt = DfTransformer(
            dc,
            input_columns={"smiles": "smiles"},
            output_columns=outcols,
            output_types=out_types,
            success_column="success",
            nproc=1,
            verbosity=0,
        )
        df_test = dt.process_dataframe(df_smiles)[0]
        df_test.to_csv(tempFilePathFeat,
                       index=False)  # write test fingperprints
        result = filecmp.cmp(
            "unit_test/output/test_calculate_desc_y2.csv",
            tempFilePathFeat,
            shallow=False,
        )
        self.assertEqual(result, False)
Пример #4
0
    def test_calculate_desc_multiple(self):
        tempFilePath = curDir / "output/tmp/ecfp_feat_multiple.csv"
        df_smiles = read_csv(curDir / "input/chembl/chembl_23_example_T2.csv",
                             nrows=10)

        dc = DescriptorCalculator.from_param_dict(
            secret=self.keys["key"],
            method_param_dict=self.config["fingerprint"],
            verbosity=0,
        )
        outcols = ["fp_feat", "fp_val", "success", "error_message"]
        out_types = ["object", "object", "bool", "object"]
        dt = DfTransformer(
            dc,
            input_columns={"smiles": "smiles"},
            output_columns=outcols,
            output_types=out_types,
            success_column="success",
            nproc=1,
            verbosity=0,
        )
        # df_ref = dt.process_dataframe(df_smiles)[0] #calculate reference fingperprints
        # df_ref.to_csv("unit_test/output/test_calculate_desc_y2.csv", index=False)   #write reference fingperprints

        df_test = dt.process_dataframe(df_smiles)[0]
        df_test.to_csv(tempFilePath, index=False)  # write test fingperprints
        result = filecmp.cmp("unit_test/output/test_calculate_desc_y2.csv",
                             tempFilePath,
                             shallow=False)

        self.assertEqual(result, True)
Пример #5
0
    def test_standardizer_multiprocessing(self):
        """Testing standardization of smiles using threading"""

        df_smiles = read_csv(curDir / "input/chembl/chembl_23_example_T2.csv",
                             nrows=10)
        st = Standardizer(
            max_num_atoms=self.config["standardization"]["max_num_atoms"],
            max_num_tautomers=self.config["standardization"]
            ["max_num_tautomers"],
            include_stereoinfo=self.config["standardization"]
            ["include_stereoinfo"],
            verbosity=0,
        )
        outcols = ["canonical_smiles", "success", "error_message"]
        out_types = ["object", "bool", "object"]
        dt_2 = DfTransformer(
            st,
            input_columns={"smiles": "smiles"},
            output_columns=outcols,
            output_types=out_types,
            success_column="success",
            nproc=2,
            verbosity=0,
        )
        response2 = dt_2.process_dataframe(df_smiles)[0]
        dt_4 = DfTransformer(
            st,
            input_columns={"smiles": "smiles"},
            output_columns=outcols,
            output_types=out_types,
            success_column="success",
            nproc=4,
            verbosity=0,
        )
        response4 = dt_4.process_dataframe(df_smiles)[0]
        assert_frame_equal(response2, response4)