Exemplo n.º 1
0
def distribution_learning_suite_v1(chembl_file_path: str, number_samples: int = 10000) -> \
        List[DistributionLearningBenchmark]:
    """
    Suite of distribution learning benchmarks, v1.

    Args:
        chembl_file_path: path to the file with the reference ChEMBL molecules

    Returns:
        List of benchmarks, version 1
    """
    return [
        ValidityBenchmark(number_samples=number_samples),
        UniquenessBenchmark(number_samples=number_samples),
        novelty_benchmark(training_set_file=chembl_file_path,
                          number_samples=number_samples),
        kldiv_benchmark(training_set_file=chembl_file_path,
                        number_samples=number_samples),
        frechet_benchmark(training_set_file=chembl_file_path,
                          number_samples=number_samples)
    ]
Exemplo n.º 2
0
def assess_distribution_learning(model: DistributionMatchingGenerator,
                                 training_file_path: str,
                                 json_output_file: str,
                                 number_samples: int) -> None:
    LOG.info('Benchmarking distribution learning')
    benchmarks = [
        ValidityBenchmark(number_samples=number_samples),
        UniquenessBenchmark(number_samples=number_samples),
        novelty_benchmark(training_set_file=training_file_path, number_samples=number_samples),
        kldiv_benchmark(training_set_file=training_file_path, number_samples=number_samples),
    ]

    results = _evaluate_distribution_learning_benchmarks(model=model, benchmarks=benchmarks)

    benchmark_results = OrderedDict()
    benchmark_results['guacamol_version'] = guacamol.__version__
    benchmark_results['timestamp'] = get_time_string()
    benchmark_results['results'] = [vars(result) for result in results]

    LOG.info('Save results to file %s', json_output_file)
    with open(json_output_file, 'wt') as f:
        f.write(json.dumps(benchmark_results, indent=4))