示例#1
0
def test_file_to_dict_list():
    """
    GIVEN compound files of different types (.tsv, .csv, and .json)
    WHEN the file contents are converted to a list of compound dicts
    THEN check that the list of compound dicts is produced as expected
    """
    res_7 = OrderedDict([('id', 'cpd01211'), ('abbreviation', 'tcynt'),
                         ('name', 'Thiocyanate'), ('formula', 'CNS'),
                         ('mass', '58'), ('source', 'ModelSEED'),
                         ('structure', 'InChI=1S/CHNS/c2-1-3/h3H'),
                         ('charge', '-1'), ('is_core', '1'),
                         ('is_obsolete', '0'), ('linked_compound', 'null'),
                         ('is_cofactor', '0'), ('deltag', '22.2'),
                         ('deltagerr', '5.68687'), ('pka', '3:0.5'),
                         ('pkb', ''), ('abstract_compound', 'null'),
                         ('comprised_of', 'null'), ('aliases', 'null')])

    filenames = [
        'test_compounds.tsv', 'test_compounds.csv', 'test_compounds.json'
    ]

    for file in filenames:
        res = utils.file_to_dict_list(DATA_DIR + file)
        assert len(res) == 15
        assert res[7] == res_7
示例#2
0
 def load_compound_set(self,
                       compound_file=None,
                       structure_field=None,
                       id_field='id'):
     """If a compound file is provided, this function loads the compounds
     into it's internal dictionary. If not, it attempts to find the
     compounds in it's associated MINE database.
     
     :param compound_file: Path to a file containing compounds as tsv
     :type compound_file: basestring
     :param structure_field: the name of the column containing the
         structure incarnation as Inchi or SMILES (Default:'structure')
     :type structure_field: str
     :param id_field: the name of the column containing the desired
         compound ID (Default: 'id)
     :type id_field: str
     :return: compound SMILES
     :rtype: list
     """
     compound_smiles = []
     if compound_file:
         for line in utils.file_to_dict_list(compound_file):
             mol = self._mol_from_dict(line, structure_field)
             if not mol:
                 continue
             # Add compound to internal dictionary as a starting
             # compound and store SMILES string to be returned
             smi = AllChem.MolToSmiles(mol, True)
             _id = line[id_field]
             # Do not operate on inorganic compounds
             if "C" in smi or "c" in smi:
                 AllChem.SanitizeMol(mol)
                 self._add_compound(_id,
                                    smi,
                                    mol=mol,
                                    type='Starting Compound')
                 compound_smiles.append(smi)
     # If a MINE database is being used instead, search for compounds
     # annotated as starting compounds and return those as a list of
     # SMILES strings
     elif self.mine:
         db = MINE(self.mine)
         for compound in db.compounds.find():
             _id = compound['_id']
             smi = compound['SMILES']
             # Assume unannotated compounds are starting compounds
             if 'type' not in compound:
                 compound['Type'] = 'Starting Compound'
             self._add_compound(_id, smi, type=compound['Type'])
             compound_smiles.append(smi)
     else:
         raise ValueError('No input file or database specified for '
                          'starting compounds')
     print("%s compounds loaded" % len(compound_smiles))
     return compound_smiles
示例#3
0
def test_file_to_dict_list():
    res_7 = OrderedDict([('id', 'cpd01211'), ('abbreviation', 'tcynt'),
                         ('name', 'Thiocyanate'), ('formula', 'CNS'),
                         ('mass', '58'), ('source', 'ModelSEED'),
                         ('structure', 'InChI=1S/CHNS/c2-1-3/h3H'),
                         ('charge', '-1'), ('is_core', '1'),
                         ('is_obsolete', '0'), ('linked_compound', 'null'),
                         ('is_cofactor', '0'), ('deltag', '22.2'),
                         ('deltagerr', '5.68687'), ('pka', '3:0.5'),
                         ('pkb', ''), ('abstract_compound', 'null'),
                         ('comprised_of', 'null'), ('aliases', 'null')])
    for file in [
            'test_compounds.tsv', 'test_compounds.csv', 'test_compounds.json'
    ]:
        res = utils.file_to_dict_list(data_dir + file)
        assert len(res) == 15
        assert res[7] == res_7
        print(file)
示例#4
0
def test_file_to_dict_list():
    """
    GIVEN compound files of different types (.tsv, .csv, and .json)
    WHEN the file contents are converted to a list of compound dicts
    THEN check that the list of compound dicts is produced as expected
    """
    res_7 = OrderedDict(
        [
            ("id", "cpd01211"),
            ("abbreviation", "tcynt"),
            ("name", "Thiocyanate"),
            ("formula", "CNS"),
            ("mass", "58"),
            ("source", "ModelSEED"),
            ("structure", "InChI=1S/CHNS/c2-1-3/h3H"),
            ("charge", "-1"),
            ("is_core", "1"),
            ("is_obsolete", "0"),
            ("linked_compound", "null"),
            ("is_cofactor", "0"),
            ("deltag", "22.2"),
            ("deltagerr", "5.68687"),
            ("pka", "3:0.5"),
            ("pkb", ""),
            ("abstract_compound", "null"),
            ("comprised_of", "null"),
            ("aliases", "null"),
        ]
    )

    filenames = ["test_compounds.tsv", "test_compounds.csv", "test_compounds.json"]

    for file in filenames:
        res = utils.file_to_dict_list(DATA_DIR / file)
        assert len(res) == 15
        assert res[7] == res_7
示例#5
0
                 neutralise=options.bnice,
                 image_dir=options.image_dir,
                 database=options.database)
    # Create a directory for image output file if it doesn't already exist
    if options.image_dir and not os.path.exists(options.image_dir):
        os.mkdir(options.image_dir)
    # If starting compound specified as SMILES string, then add it
    if options.smiles:
        pk._add_compound("Start", options.smiles, type='Starting Compound')
    else:
        pk.load_compound_set(compound_file=options.compound_file)
    # Generate reaction network
    pk.transform_all(max_generations=options.generations,
                     num_workers=options.max_workers)
    if options.pruning_whitelist:
        mols = [
            pk._mol_from_dict(line)
            for line in utils.file_to_dict_list(options.pruning_whitelist)
        ]
        pk.prune_network([utils.compound_hash(x) for x in mols if x])
    # Save to database (e.g. Mongo) if present, otherwise create output file
    if options.database:
        print("Saving results to %s" % options.database)
        pk.save_to_MINE(options.database)
    else:
        pk.assign_ids()
        pk.write_compound_output_file(options.output_dir + '/compounds.tsv')
        pk.write_reaction_output_file(options.output_dir + '/reactions.tsv')

    print("Execution took %s seconds." % (time.time() - t1))