Пример #1
0
def prepare_dataset(sdffile, dest=None, rename=True, conformations=False, overwrite=False):
    """ This method bootstraps the analysis of Ames data.
       - Rename the compounds
       - Merge train/test
       - Generate 3D conformations
       - Save "master" and "saliviewer" tables
       - Redirects stdout/stderr to a "prepare.log" file
    """
    root, name = op.split(sdffile)
    name = op.splitext(name)[0]

    if not dest: dest = root

    dest_sdf = op.join(dest, name + '-prepared.sdf')
    master_table = op.join(dest, name + '-prepared-master.csv')
    sali_table = op.join(dest, name + '-prepared-saliviewer.csv')

    if op.exists(dest_sdf) and not overwrite:
        print '%s is already there and not overwriting requested' % dest_sdf
    else:
        print 'Reading %s' % sdffile
        mols = list(pybel.readfile('sdf', sdffile))

        print '\tCreating dataset root: %s' % dest
        if not op.exists(dest):
            os.makedirs(dest)

        if rename:
            print '\tRenaming the compounds to keep track of the provenance'
            rename_mols_by_index(mols, name + '-')

        if conformations:
            print '\tGenerating conformations'
            for mol in mols:
                if not any(name in mol.title for name in ('train-3988', 'train-4205', 'dsstox-4205', 'dsstox-4206')):
                    try:
                        print 'Conformation for %s' % mol.title
                        mol.make3D()
                    except Exception:
                        print 'Error computing a 3D conformation for %s' % mol.title

        print '\tSaving compounds'
        save_mols(mols, dest_sdf)

        print '\tCreating \"master\" table: %s' % master_table
        create_master_table(dest_sdf, master_table, fields=['Activity'])

        print '\tCreating \"saliviewer\" table: %s' % sali_table
        create_saliviewer_input(master_table, sali_table)

    return dest_sdf, master_table
Пример #2
0
def aid2sdf(sdf, csv, dest=None):
    """ Reads a pubchem bioassay results and merge it with the SDF file """
    #Read the known activities to a dictionary        
    activities = {}
    for activity in open(csv).readlines()[1:]:
        value = activity.split(',')[5]
        molid = activity.split(',')[2]
        activities[molid] = value
        #Save the activity to each molecule
    mols = list(pybel.readfile('sdf', sdf))
    for mol in mols:
        activity = activities[mol.title]
        if activity == 'Active': actual_activity = '1'
        elif activity == 'Inactive': actual_activity = '0'
        else: actual_activity = 'Missing'
        mol.data['Activity'] = actual_activity
    if dest:
        save_mols(mols, dest)
    return mols
Пример #3
0
        cas = data[0].data['CAS_NO']
        if not cas in can_dupes2:
            can_dupes2[cas] = [data]
        else:
            can_dupes2[cas].append(data)

    for cas in can_dupes2.keys():
        groups = can_dupes2[cas]
        if len(groups) > 1:
           print 'compound with cas=%s is considered different by OB canonical smiles'%cas
           for group in groups:
                print group[0].write('can').strip()
           print '-'*80

    union = sorted([dupe[0] for dupe in cas_dupes.values()], key=lambda mol: mol.title)
    save_mols(union, op.join(root, 'mutagenicity-all-cas-union.sdf'))
    print '\t\tUnion size=%d' % len(union)

    dest_sdf = op.join(root, 'mutagenicity-all-cas-union-prepared.sdf')
    prepare_dataset(op.join(root, 'mutagenicity-all-cas-union.sdf'), rename=False, conformations=True)

    #Depict the molecules
    depict(dest_sdf)

    #Molecular descriptors
    print 'Computing fingerprints via JCompoundMapper' #TODO: Extract-method this
    jcm_fingerprint(dest_sdf, ('ECFP', 'ECFPVariant', 'PHAP3POINT2D', 'SHED', 'DFS', 'RAD2D'))
    jcm_fingerprint(dest_sdf, ('LSTAR', 'RAD3D', 'PHAP3POINT3D'))
    print 'Computing descriptors via CDKDescUI'
    cdkdescuiprops(dest_sdf, desc_types=('constitutional',))
    print 'Computing spectrophores'