def fcd_statistics(SMILES, n_jobs, gpu, out, canonicalize=False): """ Pre-compute FCD statistics for a list of SMILES. :param SMILES: List SMILES :param n_jobs: Number of jobs for processing SMILES :param out: Path to output file :return: """ fcd = FCD(device=f'cuda:{gpu}', n_jobs=n_jobs, canonize=False) if canonicalize: can_SMILES = [] for smi in SMILES: if Chem.MolFromSmiles(smi): can_SMILES.append(Chem.MolToSmiles(Chem.MolFromSmiles(smi))) results = fcd.precalc(can_SMILES) else: results = fcd.precalc(SMILES) if not os.path.exists(os.path.dirname(out)): os.makedirs(os.path.dirname(out)) with open(out, 'wb') as f: pkl.dump(results, f)
def test_zero_molecule(self): fcd = FCD() output_pytorch = fcd([], []) self.assertNotEqual( output_pytorch, output_pytorch, msg=("FCD should return np.nan on invalid situations"))
def test_multiprocess(self): fcd = FCD(n_jobs=2) output_pytorch = fcd(self.set1, self.set2) diff = abs(self.output_keras - output_pytorch) self.assertAlmostEqual( output_pytorch, self.output_keras, places=4, msg=("Outputs differ. keras={},".format(self.output_keras) + "torch={}. diff is {}".format(output_pytorch, diff)))
res = res.append( pd.DataFrame({ 'input_file': sampled_file, 'outcome': [ 'KL divergence, hydrogen donors', 'KL divergence, hydrogen acceptors', 'Jensen-Shannon distance, hydrogen donors', 'Jensen-Shannon distance, hydrogen acceptors', 'Wasserstein distance, hydrogen donors', 'Wasserstein distance, hydrogen acceptors' ], 'value': [ kl_donors, kl_acceptors, jsd_donors, jsd_acceptors, emd_donors, emd_acceptors ] })) ## outcome 20: Frechet ChemNet distance fcd = FCD(canonize=False) fcd_calc = fcd(gen_canonical, org_canonical) res = res.append( pd.DataFrame({ 'input_file': sampled_file, 'outcome': 'Frechet ChemNet distance', 'value': [fcd_calc] })) # write output res.to_csv(output_file, index=False, compression='gzip')