Exemplo n.º 1
0
def load_resources():
    """Known resources:
        SMILES,
        STRUCTURES: containing Mol_b64 column,
        BATCH, CONTAINER, DATA"""
    mf_config = mft.load_config("config")
    global SMILES
    SMILES = mf_config["Paths"]["SmilesPath"]
    global STRUCT
    STRUCT = mf_config["Paths"]["StructPath"]
    global DATA
    DATA = mf_config["Paths"]["ContainerDataPath"]
    global CONTAINER
    CONTAINER = mf_config["Paths"]["ContainerPath"]
    global BATCH
    BATCH = mf_config["Paths"]["BatchPath"]
Exemplo n.º 2
0
def load_resource(resource, limit_cols=True):
    """Known resources:
        SMILES,
        STRUCTURES: containing Mol_b64 column,
        BATCH, CONTAINER"""
    mf_config = mft.load_config("config")
    res = resource.lower()
    glbls = globals()
    if "smi" in res:
        if "SMILES" not in glbls:
            # except NameError:
            print("- loading resource:                        (SMILES)")
            result = dd.read_csv(mf_config["Paths"]["SmilesPath"], sep="\t")
            if isinstance(limit_cols, list):
                result = result[limit_cols]
            elif limit_cols is True and len(
                    mf_config["Paths"]["SmilesCols"]) > 0:
                result = result[mf_config["Paths"]["SmilesCols"]]
            # result = result.apply(pd.to_numeric, errors='ignore', axis=1)
            global SMILES
            SMILES = MolFrame()
            SMILES.data = result
    elif "struct" in res:
        if "STRUCTURES" not in glbls:
            # except NameError:
            print("- loading resource:                        (STRUCTURES)")
            result = dd.read_csv(mf_config["Paths"]["SmilesPath"], sep="\t")
            if isinstance(limit_cols, list):
                result = result[limit_cols]
            elif limit_cols is True and len(
                    mf_config["Paths"]["StructCols"]) > 0:
                result = result[mf_config["Paths"]["StructCols"]]
            # result.data = result.data.apply(pd.to_numeric, errors='ignore', axis=1)
            global STRUCT
            STRUCT = result
    elif "data" in res:
        if "DATA" not in glbls:
            print("- loading resource:                        (DATA)")
            result = dd.read_csv(mf_config["Paths"]["ContainerDataPath"],
                                 sep="\t",
                                 compression="gzip")
            if isinstance(limit_cols, list):
                result = result[limit_cols]
            elif limit_cols is True and len(
                    mf_config["Paths"]["ContainerDataCols"]) > 0:
                result = result[mf_config["Paths"]["ContainerDataCols"]]
            # result = result.apply(pd.to_numeric, errors='ignore', axis=1)
            global DATA
            DATA = MolFrame()
            DATA.data = result
    elif "cont" in res:
        if "CONTAINER" not in glbls:
            print("- loading resource:                        (CONTAINER)")
            result = dd.read_csv(mf_config["Paths"]["ContainerPath"], sep="\t")
            if isinstance(limit_cols, list):
                result = result[limit_cols]
            elif limit_cols is True and len(
                    mf_config["Paths"]["ContainerCols"]) > 0:
                result = result[mf_config["Paths"]["ContainerCols"]]
            # result = result.apply(pd.to_numeric, errors='ignore', axis=1)
            global CONTAINER
            CONTAINER = MolFrame()
            CONTAINER.data = result
    elif "batch" in res:
        if "BATCH" not in glbls:
            print("- loading resource:                        (BATCH)")
            result = dd.read_csv(mf_config["Paths"]["BatchPath"], sep="\t")
            if isinstance(limit_cols, list):
                result = result[limit_cols]
            elif limit_cols is True and len(
                    mf_config["Paths"]["BatchCols"]) > 0:
                result = result[mf_config["Paths"]["BatchCols"]]
            # result = result.apply(pd.to_numeric, errors='ignore', axis=1)
            global BATCH
            BATCH = MolFrame()
            BATCH.data = result
    else:
        raise FileNotFoundError("# unknown resource: {}".format(resource))
Exemplo n.º 3
0
from PIL import Image, ImageChops

from rdkit.Chem import AllChem as Chem
from rdkit.Chem.rdCoordGen import AddCoords  # New coord. generation
from rdkit.Chem import Draw

from mol_frame import tools as mft

try:
    Draw.DrawingOptions.atomLabelFontFace = "DejaVu Sans"
    Draw.DrawingOptions.atomLabelFontSize = 18
except KeyError:  # Font "DejaVu Sans" is not available
    pass

config = mft.load_config()

USE_RDKIT_NEW_COORD = config["Options"].get("UseNewRdkitCoord", True)

# try:
#     # Try to import Avalon so it can be used for generation of 2d coordinates.
#     from rdkit.Avalon import pyAvalonTools as pyAv
#     USE_AVALON_2D = True
# except ImportError:
#     print("* Avalon not available. Using RDKit for 2d coordinate generation.")
#     USE_AVALON_2D = False


def rescale(mol, f=1.4):
    tm = np.zeros((4, 4), np.double)
    for i in range(3):