Пример #1
0
    def test_atomtyper_AlkEthOH(self):
        # Test to make sure that AlkEthOH gets 100% at T=0 starting from correct
        # types
        basetypes_filename = get_data_filename('atomtypes/basetypes.smarts')
        initialtypes_filename = get_data_filename(
            'atomtypes/initial_AlkEthOH.smarts')
        decorators_filename = get_data_filename(
            'atomtypes/new-decorators.smarts')
        replacements_filename = get_data_filename(
            'atomtypes/replacements.smarts')
        molecules = smarty.utils.read_molecules(
            get_data_filename('molecules/AlkEthOH_test_filt1_tripos.mol2'),
            verbose=False)
        reference_typed_molecules = smarty.utils.read_molecules(
            get_data_filename('molecules/AlkEthOH_test_filt1_ff.mol2'),
            verbose=False)

        # Construct atom type sampler.
        atomtype_sampler = smarty.AtomTypeSampler(
            molecules,
            basetypes_filename,
            initialtypes_filename,
            decorators_filename,
            replacements_filename=replacements_filename,
            reference_typed_molecules=reference_typed_molecules,
            verbose=False,
            temperature=0)

        # Start sampling atom types.
        fracfound = atomtype_sampler.run(2)

        # Ensure fraction found is 1.0
        if fracfound < 1.0:
            raise Exception("Not finding 100% of AlkEthOH when starting from"
                            " correct SMARTS.")
Пример #2
0
    def test_atomtyper(self):
        basetypes_filename = get_data_filename('atomtypes/basetypes.smarts')
        initialtypes_filename = get_data_filename('atomtypes/basetypes.smarts')
        decorators_filename = get_data_filename('atomtypes/decorators.smarts')
        replacements_filename = get_data_filename(
            'atomtypes/replacements.smarts')
        molecules = smarty.utils.read_molecules(
            get_data_filename('molecules/zinc-subset-tripos.mol2.gz'),
            verbose=False)
        reference_typed_molecules = smarty.utils.read_molecules(
            get_data_filename('molecules/[email protected]'),
            verbose=False)

        # Construct atom type sampler.
        atomtype_sampler = smarty.AtomTypeSampler(
            molecules,
            basetypes_filename,
            initialtypes_filename,
            decorators_filename,
            replacements_filename=replacements_filename,
            reference_typed_molecules=reference_typed_molecules,
            verbose=False,
            decorator_behavior='simple-decorators')

        # Start sampling atom types.
        atomtype_sampler.run(2)
Пример #3
0
    def test_atomtyper_elemental(self):
        """
        Test elemental atomtype sampler for hydrogen
        """
        atomtype_sampler = smarty.AtomTypeSampler(
            self.mols_alkethoh,
            self.basetypes,
            self.basetypes,
            self.combine_decs,
            replacements_filename=self.replacements,
            reference_typed_molecules=self.mols_alkethoh_ref,
            temperature=0.1,
            verbose=False,
            decorator_behavior='combinatorial-decorators',
            element=1)
        # run sampler with optional outputs
        traj = 'test_smarty.csv'
        plot = 'test_smarty.pdf'
        atomtype_sampler.run(5, traj)
        # test trajectory analysis functions on smarty output
        timeseries = score_utils.load_trajectory(traj)
        scores_vs_time = score_utils.scores_vs_time(timeseries)
        score_utils.create_plot_file(traj, plot, True, False)

        # check if score is 100% at first iteration
        if scores_vs_time['all'][0] == 1.0:
            raise Exception(
                "Scoring problem, 100% at first iteration for total")
Пример #4
0
 def test_atomtyper(self):
     """
     Test atomtype sampler with simple-decorators
     """
     atomtype_sampler = smarty.AtomTypeSampler(
         self.mols_zinc,
         self.basetypes,
         self.basetypes,
         self.simple_decs,
         replacements_filename=self.replacements,
         reference_typed_molecules=self.mols_zinc_ref,
         temperature=0.1,
         verbose=False,
         decorator_behavior='simple-decorators',
         element=0)
     atomtype_sampler.run(2)
Пример #5
0
 def test_atomtyper_AlkEthOH(self):
     """
     Test atomtype sampler with correct "answers"
     """
     atomtype_sampler = smarty.AtomTypeSampler(
         self.mols_alkethoh,
         self.basetypes,
         self.alkethoh_answers,
         self.combine_decs,
         replacements_filename=self.replacements,
         reference_typed_molecules=self.mols_alkethoh_ref,
         temperature=0,
         verbose=False)
     # Start sampling atom types.
     fracfound = atomtype_sampler.run(2)
     # Ensure fraction found is 1.0
     if fracfound < 1.0:
         raise Exception("Not finding 100% of AlkEthOH when starting from"
                         " correct SMARTS.")
Пример #6
0
def main():
    # Create command-line argument options.
    usage_string = """\
    Sample over atom types, optionally attempting to match atom types in a reference typed set of molecules.

    usage: %prog --basetypes smartsfile --decorators smartsfile [--substitutions smartsfile] --molecules molfile [--reference molfile] --iterations niterations [--temperature temperature]

    example:

    python %prog --basetypes=atomtypes/basetypes.smarts --decorators=atomtypes/decorators.smarts --substitutions=atomtypes/substitutions.smarts \
        --molecules=molecules/zinc-subset-tripos.mol2.gz --reference=molecules/[email protected] --iterations 1000 --temperature=0.1

    """
    version_string = "%prog %__version__"
    parser = OptionParser(usage=usage_string, version=version_string)

    parser.add_option(
        "-b",
        "--basetypes",
        metavar='BASETYPES',
        action="store",
        type="string",
        dest='basetypes_filename',
        default=None,
        help="Filename defining base atom types as SMARTS atom matches.")

    parser.add_option(
        "-d",
        "--decorators",
        metavar='DECORATORS',
        action="store",
        type="string",
        dest='decorators_filename',
        default=None,
        help="Filename defining decorator atom types as SMARTS atom matches.")

    parser.add_option(
        "-s",
        "--substitutions",
        metavar="SUBSTITUTIONS",
        action="store",
        type="string",
        dest='substitutions_filename',
        default=None,
        help=
        "Filename defining substitution definitions for SMARTS atom matches (OPTIONAL)."
    )

    parser.add_option(
        "-r",
        "--reference",
        metavar="REFMOL",
        action="store",
        type="string",
        dest='reference_molecules_filename',
        default=None,
        help=
        "Reference typed molecules for computing likelihood (must match same molecule and atom ordering in molecules file) (OPTIONAL)."
    )

    parser.add_option(
        "-m",
        "--molecules",
        metavar='MOLECULES',
        action="store",
        type="string",
        dest='molecules_filename',
        default=None,
        help=
        "Small molecule set (in any OpenEye compatible file format) containing 'dG(exp)' fields with experimental hydration free energies."
    )

    parser.add_option("-i",
                      "--iterations",
                      metavar='ITERATIONS',
                      action="store",
                      type="int",
                      dest='iterations',
                      default=150,
                      help="MCMC iterations.")

    parser.add_option(
        "-t",
        "--temperature",
        metavar='TEMPERATURE',
        action="store",
        type="float",
        dest='temperature',
        default=0.1,
        help=
        "Effective temperature for Monte Carlo acceptance, indicating fractional tolerance of mismatched atoms (default: 0.1). If 0 is specified, will behave in a greedy manner."
    )

    parser.add_option(
        "-l",
        '--trajectory',
        metavar="TRAJECTORY_FILE",
        action="store",
        dest="traj_file",
        default="trajectory.csv",
        help=
        "Name for trajectory file output, trajectory saves only changes to the list of 'atomtypes' for each iteration. For now, if the file name already exists, it just won't create a trajectory file"
    )

    verbose = True

    # Parse command-line arguments.
    (options, args) = parser.parse_args()

    # Ensure all required options have been specified.
    if (options.basetypes_filename is
            None) or (options.decorators_filename is
                      None) or (options.molecules_filename is None):
        parser.print_help()
        parser.error("All input files must be specified.")

    # Load and type all molecules in the specified dataset.
    import smarty.utils
    molecules = smarty.utils.read_molecules(options.molecules_filename,
                                            verbose=True)

    # Read reference typed molecules, if specified.
    reference_typed_molecules = None
    if options.reference_molecules_filename is not None:
        reference_typed_molecules = smarty.utils.read_molecules(
            options.reference_molecules_filename, verbose=True)

    # Construct atom type sampler.
    atomtype_sampler = smarty.AtomTypeSampler(
        molecules,
        options.basetypes_filename,
        options.decorators_filename,
        replacements_filename=options.substitutions_filename,
        reference_typed_molecules=reference_typed_molecules,
        verbose=verbose,
        temperature=options.temperature)

    # Start sampling atom types.
    atomtype_sampler.run(options.iterations, options.traj_file)
Пример #7
0
def main():
    # Create command-line argument options.
    usage_string = """\
    Sample over atom types, optionally attempting to match atom types in a reference typed set of molecules.

    usage: %prog --basetypes smartsfile --initialtypes smartsfile --decorators smartsfile [--substitutions smartsfile] --molecules molfile [--reference molfile] --iterations niterations [--temperature temperature]

    example:

    python %prog --basetypes=atomtypes/basetypes.smarts --initialtypes=atomtypes/initialtypes.smarts --decorators=atomtypes/decorators.smarts --substitutions=atomtypes/substitutions.smarts \
        --molecules=molecules/zinc-subset-tripos.mol2.gz --reference=molecules/[email protected] --iterations 1000 --temperature=0.1

    """
    version_string = "%prog %__version__"
    parser = OptionParser(usage=usage_string, version=version_string)

    parser.add_option(
        "-e",
        "--element",
        metavar='ELEMENT',
        action="store",
        type="int",
        dest='element',
        default=0,
        help=
        "By default the element value is 0 corresponding to sampling all atomtypes. If another atomic number is specified only atoms with that atomic number are sampled (i.e. --element=8 will only sample atomtypes for oxygen atoms)."
    )

    parser.add_option(
        "-b",
        "--basetypes",
        metavar='BASETYPES',
        action="store",
        type="string",
        dest='basetypes_filename',
        default=None,
        help=
        "Filename defining base or generic atom types as SMARTS atom matches; these are indestructible and normally are elemental atom types."
    )

    parser.add_option(
        "-f",
        "--initialtypes",
        metavar='BASETYPES',
        action="store",
        type="string",
        dest='initialtypes_filename',
        default=None,
        help=
        "Filename defining initial (first) atom types as SMARTS atom matches.")

    parser.add_option(
        "-d",
        "--decorators",
        metavar='DECORATORS',
        action="store",
        type="string",
        dest='decorators_filename',
        default=None,
        help="Filename defining decorator atom types as SMARTS atom matches.")

    parser.add_option(
        "-s",
        "--substitutions",
        metavar="SUBSTITUTIONS",
        action="store",
        type="string",
        dest='substitutions_filename',
        default=None,
        help=
        "Filename defining substitution definitions for SMARTS atom matches (OPTIONAL)."
    )

    parser.add_option(
        "-r",
        "--reference",
        metavar="REFMOL",
        action="store",
        type="string",
        dest='reference_molecules_filename',
        default=None,
        help=
        "Reference typed molecules for computing likelihood (must match same molecule and atom ordering in molecules file) (OPTIONAL)."
    )

    parser.add_option(
        "-m",
        "--molecules",
        metavar='MOLECULES',
        action="store",
        type="string",
        dest='molecules_filename',
        default=None,
        help=
        "Small molecule set (in any OpenEye compatible file format) containing 'dG(exp)' fields with experimental hydration free energies."
    )

    parser.add_option("-i",
                      "--iterations",
                      metavar='ITERATIONS',
                      action="store",
                      type="int",
                      dest='iterations',
                      default=150,
                      help="MCMC iterations.")

    parser.add_option(
        "-t",
        "--temperature",
        metavar='TEMPERATURE',
        action="store",
        type="float",
        dest='temperature',
        default=0.1,
        help=
        "Effective temperature for Monte Carlo acceptance, indicating fractional tolerance of mismatched atoms (default: 0.1). If 0 is specified, will behave in a greedy manner."
    )

    parser.add_option(
        "-l",
        '--trajectory',
        metavar="TRAJECTORY_FILE",
        action="store",
        dest="traj_file",
        default="trajectory.csv",
        help=
        "Name for trajectory file output, trajectory saves only changes to the list of 'atomtypes' for each iteration. If the file already exists, it is overwritten."
    )

    parser.add_option(
        "-p",
        '--plot',
        metavar="PLOT_FILE",
        action="store",
        dest="plot_file",
        default=None,
        help=
        "Name for output file of a plot of the score versus time. If not specified, none will be written. If provided, needs to use a file extension suitable for matplotlib/pylab. Currently requires a trajectory file to be written using -l or --trajectory."
    )

    parser.add_option(
        "-x",
        "--decoratorbehavior",
        metavar='DECORATOR_BEHAVIOR',
        action="store",
        type="string",
        dest='decorator_behavior',
        default='combinatorial-decorators',
        help=
        "Choose between simple-decorators or combinatorial-decorators (default = combinatorial-decorators)."
    )

    verbose = True

    # Parse command-line arguments.
    (options, args) = parser.parse_args()

    # Ensure all required options have been specified.
    if (options.basetypes_filename is
            None) or (options.decorators_filename is
                      None) or (options.molecules_filename is None):
        parser.print_help()
        parser.error("All input files must be specified.")

    # Ensure the Decorator Behavior option has been specified right
    if not (options.decorator_behavior == 'simple-decorators'
            or options.decorator_behavior == 'combinatorial-decorators'):
        parser.print_help()
        parser.error("Option not valid for decorator behavior.")

    # Load and type all molecules in the specified dataset.
    molecules = utils.read_molecules(options.molecules_filename, verbose=True)

    # Read reference typed molecules, if specified.
    reference_typed_molecules = None
    if options.reference_molecules_filename is not None:
        reference_typed_molecules = utils.read_molecules(
            options.reference_molecules_filename, verbose=True)

    # Construct atom type sampler.
    if options.element == 0:
        if verbose: print("Sampling all atomtypes")
    elif options.element > 0:
        if verbose:
            print("Sampling atoms with atomic number %i" % options.element)
    else:
        parser.print_help()
        parser.error(
            "Element number must be 0 for all atoms or an integer greater than 0 for an atomic number"
        )
    atomtype_sampler = smarty.AtomTypeSampler(
        molecules,
        options.basetypes_filename,
        options.initialtypes_filename,
        options.decorators_filename,
        replacements_filename=options.substitutions_filename,
        reference_typed_molecules=reference_typed_molecules,
        verbose=verbose,
        temperature=options.temperature,
        decorator_behavior=options.decorator_behavior,
        element=options.element)

    # Start sampling atom types.
    atomtype_sampler.run(options.iterations, options.traj_file)

    if options.plot_file is not None:
        if options.traj_file is None:
            print("Cannot create plot file without a trajectory file")
        else:
            smarty.score_utils.create_plot_file(options.traj_file,
                                                options.plot_file, False,
                                                verbose)