def test_VEA(): calc = Calculator([AdjacencyMatrix, DistanceMatrix]) for line in data: line = line.strip().split() smi = line[0] mol = Chem.MolFromSmiles(smi) desireds = dict(zip(descs, map(parse_reference, line[1:]))) actuals = {str(k): v for k, v in zip(calc.descriptors, calc(mol))} for desc in descs: actual = actuals[desc] decimal, desired = desireds[desc] if desired is None: continue assert not is_missing(actual), actual yield ( assert_almost_equal, actual, desired, decimal, "{} of {}".format(desc, smi), )
def mordred_fingerprint2d(mols): result = np.zeros((len(mols), len(descriptors2d)), dtype=np.float32) calc = Calculator(descriptors2d) for i, m in enumerate(tqdm.tqdm(mols)): for j, v in enumerate(calc(m)): result[i, j] = v if not is_missing(v) else np.nan header = np.array([str(d) for d in descriptors2d]) return result, header
def mordred_fingerprint3d(mols): result = np.zeros((len(mols), len(descriptors3d)), dtype=np.float32) calc = Calculator(descriptors3d) for i, m in enumerate(tqdm.tqdm(mols)): if m.GetNumConformers() == 1: for j, v in enumerate(calc(m)): result[i, j] = v if not is_missing(v) else np.nan elif m.GetNumConformers() == 0: result[i] = np.nan else: raise ValueError("every molecule must have at most 1 conformer") header = np.array([str(d) for d in descriptors3d]) return result, header
def test_getitem(): yield eq_, 1, result[0] yield ok_, is_missing(result[1]) yield eq_, 1, result["Dummy1"] yield ok_, is_missing(result["Dummy2"])
def test_name(): yield eq_, 1, result.name["Dummy1"] yield ok_, is_missing(result.name["Dummy2"])
def test_ix(): yield eq_, 1, result.ix[0] yield ok_, is_missing(result.ix[1])
result = calc1(benzene) print(result) # >>> [0.0, 1, 0, 0, 0, 1, (snip) # Calculator constructor can register descriptors calc2 = Calculator(Chi.Chi) # Descriptors module contains all descriptors calc3 = Calculator(descriptors) # User can access all descriptor instances by descriptors property print(calc3.descriptors) # >>> (mordred.EccentricConnectivityIndex.EccentricConnectivityIndex(), (snip) # Calculate descriptors result = calc3(benzene) # get first missing value na1 = next(r for r in result if is_missing(r)) # get reason print(na1.error) # >>> missing 3D coordinate # Delete all missing value result = result.drop_missing() # convert to dict print(result.asdict())