Пример #1
0
    def test_calculate_sn_fold_multiple(self):
        infile = os.path.join(curDir, "input", "test_sn_fold_input.csv")
        outfile = os.path.join(curDir, "output", "tmp", "sn_fold_output.csv")
        output_columns = [
            "murcko_smiles",
            "sn_smiles",
            "fold_id",
            "success",
            "error_message",
        ]
        output_types = ["object", "object", "int", "bool", "object"]

        sa = ScaffoldFoldAssign(
            nfolds=self.config["scaffold_folding"]["nfolds"],
            secret=self.keys["key"])

        dt = DfTransformer(
            sa,
            input_columns={"canonical_smiles": "smiles"},
            output_columns=output_columns,
            output_types=output_types,
        )
        dt.process_file(infile, outfile)
        result = filecmp.cmp(
            os.path.join(curDir, "output", "test_sn_fold_output.csv"),
            os.path.join(outfile),
            shallow=False,
        )
        self.assertEqual(result, True)
Пример #2
0
    def test_standardizer_pipeline(self):
        """
        Testing standardization of a larger set of smiles from Chembl using serial execution
        Compared are resulting output files.
        """
        infile = os.path.join(curDir, "input", "test_standardizer.csv")
        outfile = os.path.join(curDir, "output", "sn_fold_output.OK.csv")
        errfile = os.path.join(curDir, "output", "sn_fold_output.failed.csv")
        outfile_tmp = os.path.join(curDir, "output", "tmp",
                                   "sn_fold_output.OK.csv")
        errfile_tmp = os.path.join(curDir, "output", "tmp",
                                   "sn_fold_output.failed.csv")
        st = Standardizer.from_param_dict(
            method_param_dict=self.config["standardization"], verbosity=0)
        outcols = ["canonical_smiles", "success", "error_message"]
        out_types = ["object", "bool", "object"]
        dt = DfTransformer(
            st,
            input_columns={"smiles": "smiles"},
            output_columns=outcols,
            output_types=out_types,
            success_column="success",
            nproc=2,
            verbosity=0,
        )

        # build reference files, only run once
        # dt.process_file(infile, outfile, errfile)

        # run test with tmp files
        dt.process_file(infile, outfile_tmp, errfile_tmp)

        result = filecmp.cmp(outfile, outfile_tmp, shallow=False)
        error = filecmp.cmp(errfile, errfile_tmp, shallow=False)
        os.remove(outfile_tmp)
        os.remove(errfile_tmp)
        self.assertEqual(result, error, True)