def search_cocrystals(filter_solvents=True):
    '''
    Search the whole CSD for structures that contain two different molecules
    with the specific settings
    '''
    start_time = time.clock()
    csd = MoleculeReader('CSD')
    entry_reader = EntryReader('CSD')
    settings = search.Search.Settings()
    settings.only_organic = True
    settings.not_polymeric = True
    settings.has_3d_coordinates = True
    settings.no_disorder = True
    settings.no_errors = True
    settings.no_ions = True
    settings.no_metals = True
    pairs=[]
    for entry in csd:
        #if len(pairs)==100:
        #    break
        if settings.test(entry):
            mol = csd.molecule(entry.identifier)
            mol.normalise_labels()
            smi= mol.smiles
            if smi !=  None:
                smi = smi.split('.')
                # We make sure that the structure consist of two different molecules
                if len(Remove(smi)) == 2:                
                    pairs.append(mol.identifier)            
    # clean the list from solvents
    if filter_solvents:
        print('Solvates and hydrates will be removed')
        solvates=[]
        name_dict={}
        for mol1 in pairs:
            mol = csd.molecule(mol1)
            e=entry_reader.entry(mol1)
            name_dict[mol1]=e.chemical_name
            for i in range(0, (len(mol.components))):
                if mol.components[i].smiles in clean_smiles.SOLVENT_SMILES:
                    solvates.append(mol.identifier)    
        solvates = Remove(solvates)
        final_cocrystals = [x for x in pairs if x not in solvates]   
        #print(name_dict) 
    else:
        final_cocrystals=pairs
    # Clean the list from polymorphs
    cocrystals = remove_polymorphs(final_cocrystals)
    #print the time
    end_time = time.clock()
    name=[]
    name= [name_dict[i] for i in cocrystals]
    cocrystals_data= pd.concat([pd.DataFrame(cocrystals, columns=['csd_id']), pd.DataFrame(name, columns=['name'])], axis=1)
    cocrystals_data=cocrystals_data.dropna(axis=0)
    dataset_cocrystals = cocrystals_data[~cocrystals_data.name.str.contains("solvate")]
    dataset_cocrystals = dataset_cocrystals[~dataset_cocrystals.name.str.contains("clathrate")] 
     
    print(end_time-start_time)
    dataset_cocrystals.to_csv('new_all_cocrystals.csv',index=False)
    return cocrystals
示例#2
0
def get_entry(identifier, database="CSD"):
    """
    input an identifier as a string and get the
    ccdc.entry.Entry object
    """
    csd_reader = EntryReader(database)
    entry = csd_reader.entry(identifier)
    return entry
示例#3
0
entries = list(df.refcode)
from ccdc.search import TextNumericSearch

data = []
# for e in entries:
#     query = TextNumericSearch()
#     query.add_all_identifiers(e)
#     hits = query.search()
#     data.append(hits[0].entry.publication.doi)
# from pprint import pprint
#
# print len(data)
# print len(set(data))
from ccdc.diagram import DiagramGenerator
from ccdc.io import EntryReader

diagram_generator = DiagramGenerator()
diagram_generator.settings.font_size = 12
diagram_generator.settings.line_width = 1.6
diagram_generator.settings.image_width = 500
diagram_generator.settings.image_height = 500

csd_reader = EntryReader('CSD')
mols = set([csd_reader.entry(m) for m in entries])

for i, e in enumerate(mols):
    img = diagram_generator.image(e)

    img.save("hit{}.png".format(i))