Exemplo n.º 1
0
def readData(propCod):
    """Reads files with data extracted from DBS.

    :param propCod: (str) Property code.
    :return:
        tables: (dict) Tables of property and source (LARS code).
                    {property code {LARS code: [values]}}
    """

    tables = {}

    for prop in propCod:
        tables[prop] = {}

        for larsCod in propCod[prop]:
            fileName = getFileName(prop, larsCod)

            if not os.path.exists(fileName):
                continue

            try:
                tab = pd.read_csv(fileName)
            except FileNotFoundError:
                raise myExceptions.NoFile(fileName)

            tables[prop][larsCod] = tab

    return tables
Exemplo n.º 2
0
def readFieFile(fileName):
    """Reads family isomer enumeration (fie) file.

    :param fileName: (str) File name.
    :return:
        df: (pandas DataFrame) Table with ENU code, molecular formula and SMILEs string.
    """

    if not os.path.exists(fileName):
        raise myExceptions.NoFile(fileName)
    # df = pd.read_csv(fileName, sep='\s+', comment='#', names=['nam', 'frm', 'smiles'])
    df = pd.read_csv(fileName, sep='\s+', names=['nam', 'frm', 'smiles'])
    return df
Exemplo n.º 3
0
def run():
    """Writes identifiers to txt file."""

    nArgs = len(sys.argv)
    if nArgs != 3 and nArgs != 4:
        raise myExceptions.ArgError('3 or 4', nArgs)

    name = sys.argv[2]
    name = name.split('/')[-1]

    enuMolFile = 'out/{}.json'.format(name)
    if not os.path.exists(enuMolFile):
        raise myExceptions.NoFile(enuMolFile)

    with open(enuMolFile) as jsonFile:
        enuData = json.load(jsonFile, object_hook=moleculeDecoder)

    if len(sys.argv) == 4:
        fieFile = sys.argv[3]
        isomers = IO.readFieFile(fieFile)
        isomers = utils.canonicalizeSmiles(isomers)

        enuData = selectMolecules(isomers, enuData)

    outFileName = 'out/00_{}.lst'.format(name)
    out = open(outFileName, 'w')

    for smiles in enuData:
        mol = enuData[smiles]
        # frm = mol.form
        frm = mol.form_pcp

        cas = mol.cas
        smiles = mol.smiles

        if not cas:
            cas = '%'

        name = mol.name_pcp
        if not name:
            name = '%'

        inchi = mol.inchi_pcp

        name = name.replace(' ', '_')
        out.write('{:10} {:12} {:40} {:30} {}\n'.format(
            frm, cas, name, inchi, smiles))

    out.close()
Exemplo n.º 4
0
def getDbsEntries(fileName):
    """Reads dbs entries from file.

    :param fileName: (str) File name.
    :return:
        dbsEntries: (dict) Dictionary of DBS entries.
    """

    if not os.path.exists(fileName):
        raise myExceptions.NoFile(fileName)

    with open(fileName) as jsonFile:
        dbsEntries = json.load(jsonFile, object_hook=dbsEntryDecoder)

        return dbsEntries
Exemplo n.º 5
0
    def __init__(self, fileName):
        """Constructs all the necessary attributes for the dbsConfiguration object.
        A configuration file consists of sections, lead by a "[section]" header,
        and followed by "name: value" entries, with continuations and such in
        the style of RFC 822.
        See 'configparser' documentation for more information.

        :param fileName: (str) File name from which configuration is read.
        """

        if not os.path.exists(fileName):
            raise myExceptions.NoFile(fileName)

        config = configparser.ConfigParser()
        config.read(fileName)
        self.config = config
Exemplo n.º 6
0
def run(dbsConfig):
    """Get identifiers and all available data for a given molecule in DBS
    and save data to data/ directory.

    :param dbsConfig: (dbsConfiguration object) DBS configuration object.
    """

    nArgs = len(sys.argv)
    if nArgs != 3:
        raise myExceptions.ArgError(nArgs, 3)

    # I decided to keep both files (molListFile and enuMolFile)
    # because I can easily comment out entries in molListFile,
    # because of "#" in smiles strings I cannot use this symbol to start a comment.

    # 00_file.lst
    molListFile = sys.argv[2]
    # molList = np.genfromtxt(molListFile, dtype=None, encoding='utf-8')
    if not os.path.exists(molListFile):
        raise myExceptions.NoFile(molListFile)
    molList = pd.read_csv(molListFile, sep='\s+', header=None, names=['frm', 'cas', 'nam', 'inchi', 'smiles'])

    dbsFileName = dbsConfig.getDbsFileName()
    dbsEntries = dbs.getDbsEntries(dbsFileName)

    print('Getting Identifiers')
    path = dbsConfig.getPath()
    molList = getCids(dbsEntries, molList, path)
    # getCidsofSynonyms(dbsEntries, molList)

    print('Getting Data')
    propCod = dbsConfig.getPropCod()
    tables = getData(dbsEntries, molList, path, propCod)

    print('Writing Data')
    writeData(tables)