Exemplo n.º 1
0
    def align(self, smiles):
        if isinstance(smiles, str):
            amol = Chem.MolFromSmiles(smiles)
            add_coords(amol)

        if self.inplace:
            self.add_mols()
            self.data["Mol"].apply(
                lambda x: Chem.GenerateDepictionMatching2DStructure(x, amol))
        else:
            result = self.add_mols()
            result.data["Mol"].apply(
                lambda x: Chem.GenerateDepictionMatching2DStructure(x, amol))
            return result
Exemplo n.º 2
0
 def _apply(x):
     if show_prog:
         pb.inc()
     mol = self.mol_method(x)
     if mol:
         add_coords(mol, force=force)
Exemplo n.º 3
0
def read_sdf(fn, store_mol_as="Mol_b64", gen2d=False):
    """Create a MolFrame instance from an SD file (can be gzipped (fn ends with ``.gz``)).

    Arguments:
        store_mol_as: "Mol_b64" or "Smiles" """
    if store_mol_as not in ["Mol_b64", "Smiles"]:
        print("* Mols are stored as Mol_b64")
        store_mol_as = "Mol_b64"
    d = {}
    ctr = 0
    first_mol = True
    if not isinstance(fn, list):
        fn = [fn]
    for f in fn:
        do_close = True
        if isinstance(f, str):
            if f.endswith(".gz"):
                file_obj = gzip.open(f, mode="rb")
            else:
                file_obj = open(f, "rb")
        else:
            file_obj = f
            do_close = False
        reader = Chem.ForwardSDMolSupplier(file_obj)
        for mol in reader:
            ctr += 1
            if not mol:
                print(ctr, file=sys.stderr)
                continue
            if first_mol:
                first_mol = False
                for prop in mol.GetPropNames():
                    if prop in [store_mol_as, "order"]:
                        continue
                    d[prop] = []
                d_keys = set(d.keys())
                d[store_mol_as] = []
            mol_props = set()
            for prop in mol.GetPropNames():
                if prop in d_keys:
                    mol_props.add(prop)
                    d[prop].append(get_value(mol.GetProp(prop)))
                mol.ClearProp(prop)

            # append NAN to the missing props that were not in the mol:
            missing_props = d_keys - mol_props
            for prop in missing_props:
                d[prop].append(np.nan)
            if gen2d:
                add_coords(mol, force=True)
            if store_mol_as == "Smiles":
                smi = Chem.MolToSmiles(mol)
                d["Smiles"].append(smi)
            else:
                mol_b64 = b64.b64encode(pickle.dumps(mol)).decode()
                d["Mol_b64"].append(mol_b64)
        if do_close:
            file_obj.close()
    for k in d.keys():
        print(len(d[k]))
    result = MolFrame()
    result.data = pd.DataFrame(d)
    print_log(result.data, "read SDF")
    return result