Exemplo n.º 1
0
    def process(self, mol, port):
        kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole)

        # Retrieve data about which molecule we are processing
        title = mol.GetTitle()

        with TemporaryDirectory() as output_directory:
            try:
                # Print out which molecule we are processing
                self.log.info('Processing {} in directory {}.'.format(title, output_directory))

                # Check that molecule is charged.
                if not molecule_is_charged(mol):
                    raise Exception('Molecule %s has no charges; input molecules must be charged.' % mol.GetTitle())

                # Write the specified molecule out to a mol2 file without changing its name.
                mol2_filename = os.path.join(output_directory, 'input.mol2')
                ofs = oechem.oemolostream(mol2_filename)
                oechem.OEWriteMol2File(ofs, mol)

                # Undo oechem fuckery with naming mol2 substructures `<0>`
                from YankCubes.utils import unfuck_oechem_mol2_file
                unfuck_oechem_mol2_file(mol2_filename)

                # Run YANK on the specified molecule.
                from yank.yamlbuild import YamlBuilder
                yaml = self.construct_yaml(output_directory=output_directory)
                yaml_builder = YamlBuilder(yaml)
                yaml_builder.build_experiments()
                self.log.info('Ran Yank experiments for molecule {}.'.format(title))

                # Analyze the hydration free energy.
                from yank.analyze import estimate_free_energies
                (Deltaf_ij_solvent, dDeltaf_ij_solvent) = estimate_free_energies(netcdf.Dataset(output_directory + '/experiments/solvent1.nc', 'r'))
                (Deltaf_ij_vacuum,  dDeltaf_ij_vacuum)  = estimate_free_energies(netcdf.Dataset(output_directory + '/experiments/solvent2.nc', 'r'))
                DeltaG_hydration = Deltaf_ij_vacuum[0,-1] - Deltaf_ij_solvent[0,-1]
                dDeltaG_hydration = np.sqrt(Deltaf_ij_vacuum[0,-1]**2 + Deltaf_ij_solvent[0,-1]**2)

                # Add result to original molecule
                oechem.OESetSDData(mol, 'DeltaG_yank_hydration', str(DeltaG_hydration * kT_in_kcal_per_mole))
                oechem.OESetSDData(mol, 'dDeltaG_yank_hydration', str(dDeltaG_hydration * kT_in_kcal_per_mole))
                self.log.info('Analyzed and stored hydration free energy for molecule {}.'.format(title))

                # Emit molecule to success port.
                self.success.emit(mol)

            except Exception as e:
                self.log.info('Exception encountered when processing molecule {}.'.format(title))
                # Attach error message to the molecule that failed
                # TODO: If there is an error in the leap setup log,
                # we should capture that and attach it to the failed molecule.
                self.log.error(traceback.format_exc())
                mol.SetData('error', str(e))
                # Return failed molecule
                self.failure.emit(mol)
Exemplo n.º 2
0
def run_validation():
    """Run all validation tests.

    This is probably best done by running the different validation set
    singularly since the optimal number of GPUs depends on the protocol.

    """
    for yank_script_filepath in glob.glob(os.path.join('..', '*', '*.yaml')):
        print('Running {}...'.format(os.path.basename(yank_script_filepath)))
        yaml_builder = YamlBuilder(yank_script_filepath)
        yaml_builder.run_experiments()
Exemplo n.º 3
0
def run_yank(job_id, n_jobs):
    openmm_system_dir = os.path.join('..', 'openmmfiles')
    pdb_dir = os.path.join('..', 'pdbfiles')
    yank_script_template_filepath = 'yank_template.yaml'

    # Read in YANK template script.
    with open(yank_script_template_filepath, 'r') as f:
        script_template = f.read()

    # Load cached status calculations.
    molecules_done = read_status()

    # Find all molecules to run.
    molecules_files_pattern = os.path.join(pdb_dir, '*_vacuum.pdb')
    molecule_ids = [os.path.basename(molecule_file)[:-11]
                    for molecule_file in glob.glob(molecules_files_pattern)]

    # Sort molecules so that parallel nodes won't make the same calculation.
    molecule_ids = sorted(molecule_ids)

    # Create YANK input files.
    for i, molecule_id in enumerate(molecule_ids):

        # Check if the job is assigned to this script and/or if we
        # have already completed this.
        if (i % n_jobs != job_id - 1 or
                    molecule_id in molecules_done):
            print_and_flush('Node {}: Skipping {}'.format(job_id, molecule_id))
            continue

        # Output file paths.
        vacuum_filename = molecule_id + '_vacuum'
        solvated_filename = molecule_id + '_solvated'
        vacuum_pdb_filepath = os.path.join(pdb_dir, vacuum_filename + '.pdb')
        solvated_pdb_filepath = os.path.join(pdb_dir, solvated_filename + '.pdb')
        vacuum_xml_filepath = os.path.join(openmm_system_dir, vacuum_filename + '.xml')
        solvated_xml_filepath = os.path.join(openmm_system_dir, solvated_filename + '.xml')

        # Create yank script.
        phase1_path = str([solvated_xml_filepath, solvated_pdb_filepath])
        phase2_path = str([vacuum_xml_filepath, vacuum_pdb_filepath])
        script = script_template.format(experiment_dir=molecule_id,
                                        phase1_path=phase1_path, phase2_path=phase2_path)

        # Run YANK.
        print_and_flush('Node {}: Running {}'.format(job_id, molecule_id))
        yaml_builder = YamlBuilder(script)
        yaml_builder.run_experiments()

        # Update completed molecules.
        update_status(molecule_id)
Exemplo n.º 4
0
def dispatch(args):
    """
    Set up and run YANK calculation from a script.

    Parameters
    ----------
    args : dict
       Command-line arguments from docopt.

    """
    if args['--yaml']:
        yaml_builder = YamlBuilder(yaml_source=args['--yaml'])
        yaml_builder.build_experiment()
        return True

    return False
Exemplo n.º 5
0
def dispatch(args):
    """
    Set up and run YANK calculation from a script.

    Parameters
    ----------
    args : dict
       Command-line arguments from docopt.

    """
    if args['--yaml']:
        yaml_builder = YamlBuilder(yaml_source=args['--yaml'])
        yaml_builder.build_experiment()
        return True

    return False
Exemplo n.º 6
0
def dispatch(args):
    """
    Set up and run YANK calculation from a script.

    Parameters
    ----------
    args : dict
       Command-line arguments from docopt.

    """
    if args['--yaml']:
        yaml_path = args['--yaml']

        if not os.path.isfile(yaml_path):
            raise ValueError('Cannot find YAML script "{}"'.format(yaml_path))

        yaml_builder = YamlBuilder(yaml_source=yaml_path)
        yaml_builder.build_experiments()
        return True

    return False
Exemplo n.º 7
0
    def process(self, mol, port):
        kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole)

        # Retrieve data about which molecule we are processing
        title = mol.GetTitle()

        with TemporaryDirectory() as output_directory:
            try:
                # Print out which molecule we are processing
                self.log.info('Processing {} in {}.'.format(
                    title, output_directory))

                # Check that molecule is charged.
                if not molecule_is_charged(mol):
                    raise Exception(
                        'Molecule %s has no charges; input molecules must be charged.'
                        % mol.GetTitle())

                # Write the receptor.
                pdbfilename = os.path.join(output_directory, 'receptor.pdb')
                with oechem.oemolostream(pdbfilename) as ofs:
                    res = oechem.OEWriteConstMolecule(ofs, self.receptor)
                    if res != oechem.OEWriteMolReturnCode_Success:
                        raise RuntimeError(
                            "Error writing receptor: {}".format(res))

                # Write the specified molecule out to a mol2 file without changing its name.
                mol2_filename = os.path.join(output_directory, 'input.mol2')
                ofs = oechem.oemolostream(mol2_filename)
                oechem.OEWriteMol2File(ofs, mol)

                # Undo oechem fuckery with naming mol2 substructures `<0>`
                from YankCubes.utils import unfuck_oechem_mol2_file
                unfuck_oechem_mol2_file(mol2_filename)

                # Run YANK on the specified molecule.
                from yank.yamlbuild import YamlBuilder
                yaml = self.construct_yaml(output_directory=output_directory)
                yaml_builder = YamlBuilder(yaml)
                yaml_builder.build_experiments()
                self.log.info(
                    'Ran Yank experiments for molecule {}.'.format(title))

                # Analyze the binding free energy
                # TODO: Use yank.analyze API for this
                from YankCubes.analysis import analyze
                store_directory = os.path.join(output_directory, 'experiments')
                [DeltaG_binding, dDeltaG_binding] = analyze(store_directory)
                """
                # Extract trajectory (DEBUG)
                from yank.analyze import extract_trajectory
                trajectory_filename = 'trajectory.pdb'
                store_filename = os.path.join(store_directory, 'complex.pdb')
                extract_trajectory(trajectory_filename, store_filename, state_index=0, keep_solvent=False,
                       discard_equilibration=True, image_molecules=True)
                ifs = oechem.oemolistream(trajectory_filename)
                ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) # load multi-conformer molecule
                mol = oechem.OEMol()
                for mol in ifs.GetOEMols():
                    print (mol.GetTitle(), "has", mol.NumConfs(), "conformers")
                ifs.close()
                os.remove(trajectory_filename)
                """

                # Attach binding free energy estimates to molecule
                oechem.OESetSDData(mol, 'DeltaG_yank_binding',
                                   str(DeltaG_binding * kT_in_kcal_per_mole))
                oechem.OESetSDData(mol, 'dDeltaG_yank_binding',
                                   str(dDeltaG_binding * kT_in_kcal_per_mole))
                self.log.info(
                    'Analyzed and stored binding free energy for molecule {}.'.
                    format(title))

                # Emit molecule to success port.
                self.success.emit(mol)

            except Exception as e:
                self.log.info(
                    'Exception encountered when processing molecule {}.'.
                    format(title))
                # Attach error message to the molecule that failed
                # TODO: If there is an error in the leap setup log,
                # we should capture that and attach it to the failed molecule.
                self.log.error(traceback.format_exc())
                mol.SetData('error', str(e))
                # Return failed molecule
                self.failure.emit(mol)
Exemplo n.º 8
0
def dispatch_binding(args):
    """
    Set up a binding free energy calculation.

    Parameters
    ----------
    args : dict
       Command-line arguments from docopt.

    """

    verbose = args['--verbose']
    store_dir = args['--store']
    utils.config_root_logger(verbose, log_file_path=os.path.join(store_dir, 'prepare.log'))

    #
    # Determine simulation options.
    #

    # Specify thermodynamic parameters.
    temperature = process_unit_bearing_arg(args, '--temperature', unit.kelvin)
    pressure = process_unit_bearing_arg(args, '--pressure', unit.atmospheres)
    thermodynamic_state = ThermodynamicState(temperature=temperature, pressure=pressure)

    # Create systems according to specified setup/import method.
    if args['amber']:
        [phases, systems, positions, atom_indices] = setup_binding_amber(args)
    elif args['gromacs']:
        [phases, systems, positions, atom_indices] = setup_binding_gromacs(args)
    else:
        logger.error("No valid binding free energy calculation setup command specified: Must be one of ['amber', 'systembuilder'].")
        # Trigger help argument to be returned.
        return False

    # Report some useful properties.
    if verbose:
        if 'complex-explicit' in atom_indices:
            phase = 'complex-explicit'
        else:
            phase = 'complex-implicit'
        logger.info("TOTAL ATOMS      : %9d" % len(atom_indices[phase]['complex']))
        logger.info("receptor         : %9d" % len(atom_indices[phase]['receptor']))
        logger.info("ligand           : %9d" % len(atom_indices[phase]['ligand']))
        if phase == 'complex-explicit':
            logger.info("solvent and ions : %9d" % len(atom_indices[phase]['solvent']))

    # Set options.
    options = dict()
    if args['--nsteps']:
        options['nsteps_per_iteration'] = int(args['--nsteps'])
    if args['--iterations']:
        options['number_of_iterations'] = int(args['--iterations'])
    if args['--equilibrate']:
        options['number_of_equilibration_iterations'] = int(args['--equilibrate'])
    if args['--online-analysis']:
        options['online_analysis'] = True
    if args['--restraints']:
        options['restraint_type'] = args['--restraints']
    if args['--randomize-ligand']:
        options['randomize_ligand'] = True
    if args['--minimize']:
        options['minimize'] = True

    # Allow platform to be optionally specified in order for alchemical tests to be carried out.
    if args['--platform'] not in [None, 'None']:
        options['platform'] = openmm.Platform.getPlatformByName(args['--platform'])
    if args['--precision']:
        # We need to modify the Platform object.
        if args['--platform'] is None:
            raise Exception("The --platform argument must be specified in order to specify platform precision.")

        # Set platform precision.
        precision = args['--precision']
        platform_name = args['--platform']
        logger.info("Setting %s platform to use precision model '%s'." % platform_name, precision)
        if precision is not None:
            if platform_name == 'CUDA':
                options['platform'].setPropertyDefaultValue('CudaPrecision', precision)
            elif platform_name == 'OpenCL':
                options['platform'].setPropertyDefaultValue('OpenCLPrecision', precision)
            elif platform_name == 'CPU':
                if precision != 'mixed':
                    raise Exception("CPU platform does not support precision model '%s'; only 'mixed' is supported." % precision)
            elif platform_name == 'Reference':
                if precision != 'double':
                    raise Exception("Reference platform does not support precision model '%s'; only 'double' is supported." % precision)
            else:
                raise Exception("Platform selection logic is outdated and needs to be updated to add platform '%s'." % platform_name)

    # Parse YAML options, CLI options have priority
    if args['--yaml']:
        options.update(YamlBuilder(args['--yaml']).yank_options)

    # Create new simulation.
    yank = Yank(store_dir, **options)
    yank.create(phases, systems, positions, atom_indices, thermodynamic_state)

    # Report success.
    return True
Exemplo n.º 9
0
def test_protein_ligand_restraints():
    """Test the restraints in a protein:ligand system.
    """
    from yank.yamlbuild import YamlBuilder
    from yank.utils import get_data_filename

    yaml_script = """
---
options:
  minimize: no
  verbose: no
  output_dir: %(output_directory)s
  number_of_iterations: 2
  nsteps_per_iteration: 10
  temperature: 300*kelvin

molecules:
  T4lysozyme:
    filepath: %(receptor_filepath)s
  p-xylene:
    filepath: %(ligand_filepath)s
    antechamber:
      charge_method: bcc

solvents:
  vacuum:
    nonbonded_method: NoCutoff

systems:
  lys-pxyl:
    receptor: T4lysozyme
    ligand: p-xylene
    solvent: vacuum
    leap:
      parameters: [oldff/leaprc.ff14SB, leaprc.gaff]

protocols:
  absolute-binding:
    complex:
      alchemical_path:
        lambda_restraints:     [0.0, 0.5, 1.0]
        lambda_electrostatics: [1.0, 1.0, 1.0]
        lambda_sterics:        [1.0, 1.0, 1.0]
    solvent:
      alchemical_path:
        lambda_electrostatics: [1.0, 1.0, 1.0]
        lambda_sterics:        [1.0, 1.0, 1.0]

experiments:
  system: lys-pxyl
  protocol: absolute-binding
  restraint:
    type: %(restraint_type)s
"""
    # Test all possible restraint types.
    available_restraint_types = yank.restraints.available_restraint_types()
    for restraint_type in available_restraint_types:
        print('***********************************')
        print('Testing %s restraints...' % restraint_type)
        print('***********************************')
        output_directory = tempfile.mkdtemp()
        data = {
            'output_directory':
            output_directory,
            'restraint_type':
            restraint_type,
            'receptor_filepath':
            get_data_filename(
                'tests/data/p-xylene-implicit/181L-pdbfixer.pdb'),
            'ligand_filepath':
            get_data_filename('tests/data/p-xylene-implicit/p-xylene.mol2'),
        }
        # run both setup and experiment
        yaml_builder = YamlBuilder(yaml_script % data)
        yaml_builder.build_experiments()
        # Clean up
        shutil.rmtree(output_directory)