Python MoleculePropertyDB примеры использования

Язык программирования: Python

Пространство имен/Пакет: moldesign.store.mongo

Класс/Тип: MoleculePropertyDB

Примеров на hotexamples.com: 7

Python MoleculePropertyDB - 7 примеров найдено. Это лучшие примеры Python кода для moldesign.store.mongo.MoleculePropertyDB, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

from_connection_info(4)

get_training_set(2)

MoleculePropertyDB(1)

Пример #1

Показать файл

Файл: __init__.py Проект: exalearn/electrolyte-design

    def get_base_training_set(
            self, database: MoleculePropertyDB) -> Dict[str, float]:
        """Get the training set for the base model

        Args:
            database: Connection to a collection of molecular properties
        Returns:
            Training set used to train "molecule structure" -> "property" models
        """

        results = database.get_training_set(['identifier.smiles'],
                                            [self.target_property])
        return dict(
            zip(results['identifier.smiles'], results[self.target_property]))

Пример #2

Показать файл

Файл: __init__.py Проект: exalearn/electrolyte-design

    def get_calibration_training_set(
            self, level: int,
            database: MoleculePropertyDB) -> Dict[str, float]:
        """Get the training set for a certain level of fidelity

        Args:
            level: Index of the desired level of fidelity
            database: Connection to a collection of molecular properties
        Returns:
            Training set useful for that calibration model
        """

        # Get the recipe level of fidelity used as the base
        recipe = get_recipe_by_name(self.model_levels[level].base_fidelity)

        # Define the name of the input description of the molecule
        model_type = self.model_levels[level].model_type
        if model_type == ModelType.SCHNET:
            #  Use the geometry at the base level of fidelity, and select the charged geometry only if available
            xyz = f'data.{recipe.geometry_level}.{self.oxidation_state if recipe.adiabatic else "neutral"}.xyz'
        else:
            #  Use the SMILES string
            xyz = 'identifier.smiles'

        # Get the low-res level of fidelity
        low_res = 'oxidation_potential' if self.oxidation_state == OxidationState.OXIDIZED else 'reduction_potential'
        low_res += '.' + recipe.name

        # Query the database to be the output
        results = database.get_training_set([xyz, low_res],
                                            [self.target_property])

        # Compute the delta between base and target
        delta = np.subtract(results[self.target_property], results[low_res])

        # Return that as the training set
        return dict(zip(results[xyz], delta))

Пример #3

Показать файл

def db() -> MoleculePropertyDB:
    client = MongoClient()
    db = client['edw-pytest']
    yield MoleculePropertyDB(db['molecules'])
    db.drop_collection('molecules')
    client.drop_database('edw-pytest')

Пример #4

Показать файл

Файл: load-qcfractal-into-mongo.py Проект: exalearn/electrolyte-design

"""Load summary of data from QCFractal into MongoDB"""
from tqdm import tqdm

from moldesign.simulate.qcfractal import GeometryDataset, SolvationEnergyDataset, HessianDataset, SinglePointDataset
from moldesign.store.models import UnmatchedGeometry
from moldesign.store.mongo import MoleculePropertyDB

# Log in to MongoDB
mongo = MoleculePropertyDB.from_connection_info()

# Get the QCFractal datasets
relax_datasets = [
    GeometryDataset('Electrolyte Geometry XTB', 'xtb'),
    GeometryDataset('Electrolyte Geometry NWChem', 'small_basis'),
    GeometryDataset('Electrolyte Geometry NWChem, 6-31G(2df,p)',
                    'normal_basis')
]

single_point_energy_datasets = [
    # Verticals using XTB geometry
    SinglePointDataset(
        'Electrolyte XTB Neutral Geometry, Small-Basis Energy',
        'nwchem',
        'small_basis',
    ),
    SinglePointDataset('Electrolyte XTB Neutral Geometry, Normal-Basis Energy',
                       'nwchem', 'normal_basis'),
    SinglePointDataset(
        'Electrolyte XTB Neutral Geometry, Diffuse-Basis Energy', 'nwchem',
        'diffuse_basis'),

Пример #5

Показать файл

Файл: run.py Проект: exalearn/electrolyte-design

                        help="Number molecules per inference task")
    parser.add_argument("--beta", default=1, help="Degree of exploration for active learning. "
                                                  "This is the beta from the UCB acquistion function", type=float)

    # Execution system related
    parser.add_argument('--dilation-factor', default=1, type=float,
                        help='Factor by which to artificially increase simulation time')
    parser.add_argument('--num-workers', default=1, type=int, help='Number of workers')

    # Parse the arguments
    args = parser.parse_args()
    run_params = args.__dict__

    # Connect to MongoDB
    mongo_url = parse.urlparse(args.mongo_url)
    mongo = MoleculePropertyDB.from_connection_info(mongo_url.hostname, mongo_url.port)

    full_search = pd.read_csv(args.search_space, delim_whitespace=True)
    search_space = full_search['inchi'].values

    # Create an output directory with the time and run parameters
    start_time = datetime.utcnow()
    params_hash = hashlib.sha256(json.dumps(run_params).encode()).hexdigest()[:6]
    out_dir = Path('runs').joinpath(f'ensemble-{start_time.strftime("%d%b%y-%H%M%S")}-{params_hash}')
    out_dir.mkdir(exist_ok=False, parents=True)

    # Save the run parameters to disk
    with open(os.path.join(out_dir, 'run_params.json'), 'w') as fp:
        json.dump(run_params, fp, indent=2)
    with open(os.path.join(out_dir, 'environment.json'), 'w') as fp:
        json.dump(dict(os.environ), fp, indent=2)

Пример #6

Показать файл

                              learning_rate=args.learning_rate,
                              bootstrap=True)
    my_retrain_mpnn = update_wrapper(my_retrain_mpnn, retrain_mpnn)

    # Create the method server and task generator
    inf_cfg = {'executors': ['ml-inference']}
    tra_cfg = {'executors': ['ml-train']}
    dft_cfg = {'executors': ['qc']}
    doer = ParslMethodServer([(my_evaluate_mpnn, inf_cfg),
                              (run_simulation, dft_cfg),
                              (my_update_mpnn, tra_cfg),
                              (my_retrain_mpnn, tra_cfg)], server_queues,
                             config)

    # Connect to MongoDB
    database = MoleculePropertyDB.from_connection_info(args.mongohost,
                                                       args.mongoport)

    # Configure the "thinker" application
    thinker = Thinker(client_queues, database, args.search_space,
                      args.search_size, args.retrain_frequency,
                      args.retrain_from_scratch, models,
                      args.molecules_per_ml_task, nnodes, args.nodes_per_task,
                      out_dir, args.beta)
    logging.info('Created the method server and task generator')

    try:
        # Launch the servers
        #  The method server is a Thread, so that it can access the Parsl DFK
        #  The task generator is a Thread, so that all debugging methods get cast to screen
        doer.start()
        thinker.start()

Пример #7

Показать файл

Файл: run.py Проект: exalearn/electrolyte-design

        help=
        'Globus Endpoint config file to use with the ProxyStore Globus backend'
    )
    group.add_argument(
        '--ml-ps-globus-config',
        default=None,
        help=
        'Globus Endpoint config file to use with the ProxyStore Globus backend'
    )

    # Parse the arguments
    args = parser.parse_args()
    run_params = args.__dict__

    # Connect to MongoDB
    database = MoleculePropertyDB.from_connection_info(hostname=args.mongohost,
                                                       port=args.mongoport)

    # Get the target level of accuracy
    with open(args.simulation_spec) as fp:
        simulation_spec = MultiFidelitySearchSpecification.parse_obj(
            yaml.safe_load(fp))

    # Create an output directory with the time and run parameters
    start_time = datetime.utcnow()
    params_hash = hashlib.sha256(
        json.dumps(run_params).encode()).hexdigest()[:6]
    out_dir = os.path.join(
        'runs',
        f'{simulation_spec.target_property}-N{args.num_qc_workers}-n{args.nodes_per_task}-'
        f'{params_hash}-{start_time.strftime("%d%b%y-%H%M%S")}')
    os.makedirs(out_dir, exist_ok=False)