Exemplo n.º 1
0
    def run(self, json_path=None):
        ''' Run benchmark

        Parameters
        ----------
        json_path : str, optional
            if `json_path` is given, the results are saved to the path 
        '''
        results = _evaluate_goal_directed_benchmarks(self.optimizer,
                                                     self.benchmark_list)
        if json_path is not None:
            if os.path.splitext(json_path)[1] != '.json':
                raise ValueError('json_path must have extension .json')
            from collections import OrderedDict
            import json
            from guacamol.utils.data import get_time_string

            benchmark_results = OrderedDict()
            benchmark_results['guacamol_version'] = guacamol.__version__
            benchmark_results[
                'benchmark_suite_version'] = self.benchmark_suite_name
            benchmark_results['timestamp'] = get_time_string()
            benchmark_results['results'] = [vars(result) for result in results]
            logger(f'Save results to file {json_path}')
            with open(json_path, 'wt') as f:
                f.write(json.dumps(benchmark_results, indent=4))
        return results
def assess_goal_directed_generation(
        goal_directed_molecule_generator: GoalDirectedGenerator,
        json_output_file='output_goal_directed.json',
        benchmark_version='v3') -> None:
    """
    Assesses a distribution-matching model for de novo molecule design.

    Args:
        goal_directed_molecule_generator: Model to evaluate
        json_output_file: Name of the file where to save the results in JSON format
        benchmark_version: which benchmark suite to execute
    """
    logger.info(
        f'Benchmarking goal-directed molecule generation, version {benchmark_version}'
    )
    benchmarks = goal_directed_benchmark_suite(version_name=benchmark_version)

    results = _evaluate_goal_directed_benchmarks(
        goal_directed_molecule_generator=goal_directed_molecule_generator,
        benchmarks=benchmarks)

    benchmark_results: Dict[str, Any] = OrderedDict()
    benchmark_results['guacamol_version'] = guacamol.__version__
    benchmark_results['benchmark_suite_version'] = benchmark_version
    benchmark_results['timestamp'] = get_time_string()
    benchmark_results['results'] = [vars(result) for result in results]

    logger.info(f'Save results to file {json_output_file}')
    with open(json_output_file, 'wt') as f:
        f.write(json.dumps(benchmark_results, indent=4))
Exemplo n.º 3
0
def _assess_distribution_learning(model: DistributionMatchingGenerator,
                                  chembl_training_file: str,
                                  json_output_file: str,
                                  benchmark_version: str,
                                  number_samples: int) -> None:
    """
    Internal equivalent to assess_distribution_learning, but allows for a flexible number of samples.
    To call directly only for testing.
    """
    logger.info(
        f'Benchmarking distribution learning, version {benchmark_version}')
    benchmarks = distribution_learning_benchmark_suite(
        chembl_file_path=chembl_training_file,
        version_name=benchmark_version,
        number_samples=number_samples)

    results = _evaluate_distribution_learning_benchmarks(model=model,
                                                         benchmarks=benchmarks)

    benchmark_results: Dict[str, Any] = OrderedDict()
    benchmark_results['guacamol_version'] = guacamol.__version__
    benchmark_results['benchmark_suite_version'] = benchmark_version
    benchmark_results['timestamp'] = get_time_string()
    benchmark_results['samples'] = model.generate(100)
    benchmark_results['results'] = [vars(result) for result in results]

    logger.info(f'Save results to file {json_output_file}')
    with open(json_output_file, 'wt') as f:
        f.write(json.dumps(benchmark_results, indent=4))
def WriteGuacaMolBenchmarkResult(benchmark_result, output_json_path):
    results: Dict[str, Any] = OrderedDict()
    results["guacamol_version"] = guacamol.__version__
    results["timestamp"] = get_time_string()
    results["result"] = vars(benchmark_result)
    with open(output_json_path, "w") as file:
        file.write(json.dumps(results, indent=4))
Exemplo n.º 5
0
def get_argparser():
    timestring = get_time_string()
    parser = argparse.ArgumentParser(
        description='Data Preparation for GuacaMol',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-o',
                        '--destination',
                        default='.',
                        help='Download and Output location')
    parser.add_argument('-i',
                        '--input',
                        required=True,
                        help='Filename of input smiles file')
    parser.add_argument('--output_prefix',
                        default=timestring,
                        help='Prefix of the output file')
    parser.add_argument('--n_jobs',
                        default=4,
                        type=int,
                        help='Number of cores to use')
    parser.add_argument('--seed',
                        default=9325,
                        type=int,
                        help='Random number seed')
    parser.add_argument('--with_hydrogens',
                        action='store_true',
                        default=False,
                        help='Whether to add hydrogen nodes to the graph.')
    return parser
Exemplo n.º 6
0
def get_argparser():
    timestring = get_time_string()
    parser = argparse.ArgumentParser(description='Data Preparation for GuacaMol',
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-o', '--destination', default='.', help='Download and Output location')
    parser.add_argument('-i', '--input', default=None, help='Filename of input smiles file')
    parser.add_argument('--output_prefix', default=timestring, help='Prefix of the output file')
    parser.add_argument('--n_jobs', default=8, type=int, help='Number of cores to use')
    parser.add_argument('--tanimoto_cutoff', default=0.323, type=float,
                        help='Remove molecules too similar to the holdout set')
    parser.add_argument('--chembl', action='store_true',
                        help='Specify to download and process molecules from chembl')
    return parser
Exemplo n.º 7
0
def config_logger(model_dir):
    from guacamol.utils.data import get_time_string

    timestring = get_time_string()
    fh = logging.FileHandler(model_dir / '{}-train.log'.format(timestring))
    fh.setFormatter(logging.Formatter(logging.BASIC_FORMAT))

    # configure root logger
    sh = logging.StreamHandler()
    sh.setFormatter(logging.Formatter(logging.BASIC_FORMAT))
    logging.getLogger().addHandler(sh)

    for name in ('gan', 'guacamol', 'tensorflow', 'tensorpack'):
        logger = logging.getLogger(name)
        logger.addHandler(fh)
        logger.setLevel(logging.INFO)

        if name == 'tensorflow':
            # avoid double logging
            logger.propagate = False

    fh.setLevel(logging.DEBUG)
Exemplo n.º 8
0
def assess_distribution_learning(model: DistributionMatchingGenerator,
                                 training_file_path: str,
                                 json_output_file: str,
                                 number_samples: int) -> None:
    LOG.info('Benchmarking distribution learning')
    benchmarks = [
        ValidityBenchmark(number_samples=number_samples),
        UniquenessBenchmark(number_samples=number_samples),
        novelty_benchmark(training_set_file=training_file_path, number_samples=number_samples),
        kldiv_benchmark(training_set_file=training_file_path, number_samples=number_samples),
    ]

    results = _evaluate_distribution_learning_benchmarks(model=model, benchmarks=benchmarks)

    benchmark_results = OrderedDict()
    benchmark_results['guacamol_version'] = guacamol.__version__
    benchmark_results['timestamp'] = get_time_string()
    benchmark_results['results'] = [vars(result) for result in results]

    LOG.info('Save results to file %s', json_output_file)
    with open(json_output_file, 'wt') as f:
        f.write(json.dumps(benchmark_results, indent=4))