else: can_dupes2[cas].append(data) for cas in can_dupes2.keys(): groups = can_dupes2[cas] if len(groups) > 1: print 'compound with cas=%s is considered different by OB canonical smiles'%cas for group in groups: print group[0].write('can').strip() print '-'*80 union = sorted([dupe[0] for dupe in cas_dupes.values()], key=lambda mol: mol.title) save_mols(union, op.join(root, 'mutagenicity-all-cas-union.sdf')) print '\t\tUnion size=%d' % len(union) dest_sdf = op.join(root, 'mutagenicity-all-cas-union-prepared.sdf') prepare_dataset(op.join(root, 'mutagenicity-all-cas-union.sdf'), rename=False, conformations=True) #Depict the molecules depict(dest_sdf) #Molecular descriptors print 'Computing fingerprints via JCompoundMapper' #TODO: Extract-method this jcm_fingerprint(dest_sdf, ('ECFP', 'ECFPVariant', 'PHAP3POINT2D', 'SHED', 'DFS', 'RAD2D')) jcm_fingerprint(dest_sdf, ('LSTAR', 'RAD3D', 'PHAP3POINT3D')) print 'Computing descriptors via CDKDescUI' cdkdescuiprops(dest_sdf, desc_types=('constitutional',)) print 'Computing spectrophores' spectrophores(dest_sdf) print 'Saving in several data analysis tools file formats' prop4da(dest_sdf)
print '\tCreating \"master\" table: %s' % master_table create_master_table(dest_sdf, master_table, fields=['Activity']) print '\tCreating \"saliviewer\" table: %s' % sali_table create_saliviewer_input(master_table, sali_table) return dest_sdf, master_table if __name__ == '__main__': DEFAULT_AMESV2_DIR = op.join(op.expanduser('~'), 'Proyectos', 'bsc', 'data', 'filtering', 'mutagenicity') root = DEFAULT_AMESV2_DIR dataset = op.join(root, 'tox_benchmark_N6512.sdf') #TODO: check if it exists, otherwise download dest_sdf, master_table = prepare_dataset(dataset) #Depict the molecules depict(dest_sdf) #Molecular descriptors print 'Computing descriptors via CDKDescUI' cdkdescuiprops(dataset) print 'Computing spectrophores' spectrophores(dataset) print 'Saving in several data analysis tools file formats' prop4da(dataset) print 'Computing fingerprints via JCompoundMapper' #TODO: Extract-method this FINGERPRINTS = ('ECFP', 'CATS2D') for fp in FINGERPRINTS: print fp output = op.join(root, 'tox_benchmark_N6512-jcm-' +fp +'.arff') JCompoundMapperCLIDriver().fingerprint(dataset, output, label='Activity')