Пример #1
0
def snomaData(fname, popposi = False):
    datas = []
    with open(fname) as csvf:
        reader = csv.reader(csvf)
        a = 0
        for row in reader:
            if a == 0:
                a = 1
                continue
            gf = pysmiles.read_smiles(row[0]) # 'CCCCCc1c2c(cc(O)c1C(=O)O)OC(=O)c1c(cc(OC)cc1C(=O)CCCC)O2'
            x = torch.empty((gf.number_of_nodes(), 40))
            edge_index = torch.empty((2, 2 * gf.number_of_edges()))
            edge_attr = torch.empty((2 * gf.number_of_edges(), 1), dtype=torch.long)
            for idx in range(gf.number_of_nodes()):
                x[idx] = torch.cat((torch.eye(32)[elemap[gf.nodes(data='element')[idx]]], torch.eye(8)[gf.nodes(data='hcount')[idx]]))
            for idx, edge in enumerate(gf.edges):
                edge_index[:, idx * 2] = torch.tensor(edge)
                edge_index[:, idx * 2 + 1] = torch.tensor(edge[::-1])
            for idx, edge in enumerate(gf.edges(data='order')):
                edgt = edge[2] - 1
                if edgt > 0:
                    edgt += 0.5
                if edgt > 0.5:
                    edgt += 0.5
                edge_attr[idx * 2, 0] = edgt
                edge_attr[idx * 2 + 1, 0] = edgt
            datum = torch_geometric.data.Data(x= x, edge_index=edge_index.to(torch.long), edge_attr=edge_attr, y = torch.tensor([[int(row[1])]], dtype = torch.float))
            if datum.y == 1 and popposi:
                datas += 24 * [datum]
            else:
                datas.append(datum)
    return datas
Пример #2
0
def test_write_smiles(node_data, edge_data, expl_h):
    mol = make_mol(node_data, edge_data)
    smiles = write_smiles(mol)
    found = read_smiles(smiles,
                        explicit_hydrogen=expl_h,
                        reinterpret_aromatic=False)
    assertEqualGraphs(mol, found)
Пример #3
0
    def __init__(self,
                 smiles_string: str,
                 y_list: list,
                 atom_info_path='../raw_data/atom_info.txt'):
        """
        Args:
            smiles_string (string): SMILES for the molecule.
            y_list (list or int): list of multilabels or single label.
        """

        # create graph from smiles
        # (the sys code is to block a pysmiles warning about iseometric stuff)
        sys.stdout = open(os.devnull, 'w')
        self.graph = read_smiles(smiles_string)
        sys.stdout = sys.__stdout__

        self.atom_info_path = atom_info_path

        if isinstance(y_list, list):
            y = torch.tensor(y_list, dtype=torch.float32)
        else:
            y = torch.tensor(y_list, dtype=torch.float32).view(1, -1)

        # inherit superclass from torch-geometric
        super().__init__(x=torch.tensor(self.extract_features(),
                                        dtype=torch.float),
                         edge_index=torch.tensor(self.graph_to_edge_index(),
                                                 dtype=torch.long),
                         y=y)

        # remove graph attribute, necessary to inherit from superclass
        del self.graph
        del self.atom_info_path
Пример #4
0
def get_reward_fitness(state, X, ts, char_idx, chars, net):
    X_seed = dimY(X, ts, char_idx, chars)
    out = net(T.from_numpy(X_seed).float(), None).detach().numpy()

    out_cat = np.argmax(out, axis=1)
    # Penalizing fitness for fake data
    fitness = np.where(np.argmax(out, axis=1) == 0, -1 * out[:, 0], out[:, 1])
    # Reward +1 for correct and -10 for wrong classification
    reward = np.where(out_cat == 0, -10, 1)

    arr = np.hstack((reward.reshape(len(reward),
                                    1), fitness.reshape(len(fitness), 1)))
    try:
        X = X.values.reshape(len(X.values), 1)
    except:
        X = X.reshape(len(X), 1)
    # [smiles,reward,fitness]
    arr = np.hstack((X, arr))

    # Penealizing reward for generating child gene same as parent
    same_genes = (state == X.reshape(len(X))).values
    if True in same_genes:
        for i, g in enumerate(same_genes):
            if g == True:
                arr[i][1] = -10

    # Penalizing for wrong smiles
    for i, g in enumerate(arr):
        try:
            mol = read_smiles(g[0])
        except:
            arr[i][1] = -10

    return arr
Пример #5
0
    def write_read_cycle(self):
        smiles = write_smiles(self.mol)
        note(self.mol.nodes(data=True))
        note(self.mol.edges(data=True))
        note(smiles)

        # self.mol can exist in a mixed implicit/explicit H style. The reference
        # must be one or the other, since we can't read in mixed mode. We want
        # to be sure we produce the correct answer in both cases though.
        for expl_H in (False, True):
            ref_mol = self.mol.copy()
            defaults = {'charge': 0, 'hcount': 0}
            for node in ref_mol:
                for key, val in defaults.items():
                    if key not in ref_mol.nodes[node]:
                        ref_mol.nodes[node][key] = val
            if expl_H:
                add_explicit_hydrogens(ref_mol)
            else:
                remove_explicit_hydrogens(ref_mol)
            found = read_smiles(smiles,
                                explicit_hydrogen=expl_H,
                                reinterpret_aromatic=False)
            note(found.nodes(data=True))
            note(found.edges(data=True))
            assertEqualGraphs(ref_mol, found)
Пример #6
0
def sm2graph(smiles,size, weight = None):
    try:
        mol = Chem.MolFromSmiles(smiles)
        mol = mol_to_nx(mol)
    except:
        mol = read_smiles(smiles)
    #normalized Laplacian matrix
#    nL = nx.normalized_laplacian_matrix(mol,weight = weight).todense().A
#    nL = np.pad(nL,(0,size-nL.shape[0]))
    #adjacent matrix
    adj = nx.to_numpy_matrix(mol, weight=weight).A
    adj = adj+np.eye(adj.shape[0])
    adj = np.pad(adj,(0,size-adj.shape[0]))
    #degree matrix
    de = np.zeros((size,size))
    for i in mol.degree:
        de[i[0]-1][i[0]-1] = i[1]+1
    #feature
    mole = mol.nodes(data='element')
    #random walk normalized Laplacian matrix
    di = de
    di[di!=0] = 1/di[di!=0]
    rwL = di@adj
    #is aromatic
    ar = mol.nodes(data = 'aromatic')
    return rwL, mole, ar
Пример #7
0
    def get_value(self):
        filename = self._pth_widget.text()
        if filename:
            try:
                pdb_mol = read_pdb(filename)
            except Exception as err:
                self._pth_widget.setText('')
                dialog = QErrorMessage()
                dialog.showMessage(str(err))
                dialog.exec_()
                return False
            pdb_mol = pdb_mol[0]
            pdb_mol.graph['name'] = Path(filename).stem
            if not pdb_mol.edges:
                system = System()
                system.add_molecule(pdb_mol)
                MakeBonds(allow_name=False).run_system(system)
            if not self.hydrogen_checkbox.checkState():
                remove_explicit_hydrogens(pdb_mol)
        else:
            pdb_mol = None
        smiles = self._smiles_widget.text()
        if smiles:
            try:
                smiles_mol = read_smiles(smiles)
            except Exception as err:
                dialog = QErrorMessage()
                dialog.showMessage(str(err))
                dialog.exec_()
                self._smiles_widget.setText('')
                return False

            smiles_mol.graph['smiles'] = smiles
            smiles_mol.graph['name'] = smiles
            if self.hydrogen_checkbox.checkState():
                add_explicit_hydrogens(smiles_mol)

        else:
            smiles_mol = None

        if pdb_mol and smiles_mol:
            gm = nx.isomorphism.GraphMatcher(
                pdb_mol, smiles_mol,
                nx.isomorphism.categorical_node_match('element', None))
            match = next(gm.isomorphisms_iter(), {})
            if not match:
                dialog = QErrorMessage()
                dialog.showMessage(
                    'Smiles and PDB molecule are not isomorphic!')
                dialog.exec_()
                return False
            for pdb_idx, smi_idx in match.items():
                smiles_mol.nodes[smi_idx].update(pdb_mol.nodes[pdb_idx])
            smiles_mol.graph.update(pdb_mol.graph)

        molecule = smiles_mol or pdb_mol
        if not molecule:
            return False
        return molecule
Пример #8
0
 def res(self, smile, adj_list, feature_list):
     try:
         mol_with_H = read_smiles(smile, explicit_hydrogen=True)
     except:
         print(smile)
     A = nx.to_numpy_matrix(mol_with_H)
     X = oneHot(smile)
     adj_list.append(np.array(A))
     feature_list.append(np.array(X))
Пример #9
0
 def predict(self, input: JsonSerializable):
     """
     This is a dummy test model.
     It counts atoms in a SMILES string.
     """
     mol = read_smiles(input, explicit_hydrogen=True)
     counts = collections.defaultdict(int)
     for _, atom in mol.nodes(data="element"):
         counts[atom] += 1
     return json.dumps(counts)
Пример #10
0
def evaluate_smiles(smiles_string):
    classes = ['insoluble', 'slightly soluble', 'soluble']
    G = read_smiles(smiles_string, explicit_hydrogen=True) #decode smiles string
    feature = element_to_onehot(np.asarray(G.nodes(data='element'))[:, 1]) #convert element to one-hot vector
    edges = np.asarray(G.edges) #get edge array
    index = np.asarray([edges[:,0], edges[:,1]]) #reformat edge array to torch geometric suitable format
    d = Data(x=torch.tensor(feature, dtype=torch.float),edge_index=torch.tensor(index, dtype=torch.long)) #create torch gemoetry Data object
    data = d.to(device) #send data to device memory
    model.eval() #set model to evaluate mode
    print(classes[torch.argmax(torch.softmax(model(data), dim=0)).item()]) #evaluate the test data
Пример #11
0
def oneHot(smiles):
    mol = read_smiles(smiles)
    mol_with_H = read_smiles(smiles, explicit_hydrogen=True)

    one_hot_matrix = []
    for atom in mol.nodes(data='element'):
        row = [0] * 4
        if atom[1] == "C":
            row[0] = 1
        elif atom[1] == "N":
            row[1] = 1
        elif atom[1] == "O":
            row[2] = 1
        one_hot_matrix.append(row)
    for x in range(len(mol), len(mol_with_H)):
        one_hot_matrix.append([0, 0, 0, 1])

    # for i in one_hot_matrix:
    # print(i)
    return one_hot_matrix
Пример #12
0
def build_graph(smiles):
    """
    Constructs a NetworkX graph out of a SMILES representation of a molecule from the train/test data.
    :param smiles: a string object of SMILES format
    :return: nx.Graph:
        A graph describing a molecule. Nodes will have an 'element', 'aromatic'
        and a 'charge', and if `explicit_hydrogen` is False a 'hcount'.
        Depending on the input, they will also have 'isotope' and 'class'
        information.
        Edges will have an 'order'.
    """
    '''
    can access node data and edge data when the graph is in networkx format
    dgl.from_networkx(g) converts networkx to dgl graph but the node data and edge data doesnt seem to be transferred
    Goal: save the node feats and edge feats of networkx as tensor and set them to dgl graph ndata and edata
    Question: Do we save ndata as ('C', 'C', 'C', 'O', 'C') or do we create one hot vectors like in the hw
    '''
    # read the smile graphs in using pysmiles & build network
    g = pysmiles.read_smiles(smiles)

    # get the features from the graph and convert to tensor
    elems = g.nodes(data='element')
    h_count = g.nodes(data='hcount')
    aros = g.nodes(data='aromatic')
    raw_node_feats = []
    for elem, data, aro in zip(elems, h_count, aros):
        node = list(elem)
        node.append(data[1])
        node.append(aro[1] * 1)
        raw_node_feats.append(node)
    na = np.array(list(raw_node_feats))
    byte_node_feats = tf.convert_to_tensor(na[:, 1])

    # turn the byte string node feats into one_hot node feats
    node_feats = pt_lookup(byte_node_feats).numpy()
    node_feats[:, -2] = na[:, 2]
    node_feats[:, -1] = na[:, 3]
    node_feats = tf.convert_to_tensor(node_feats)

    # get edge data and extract bonds, double them, then convert to tensor
    edata = g.edges(data='order')
    bonds = list(edata)
    na = np.array(bonds)
    tup = zip(na[:, 2], na[:, 2])
    bond_data = tf.convert_to_tensor(list(itertools.chain(*tup)))
    bond_data = tf.cast(bond_data, tf.float32)
    # build dgl graph
    dgl_graph = dgl.from_networkx(g)

    dgl_graph.ndata['node_feats'] = node_feats
    dgl_graph.edata['edge_feats'] = bond_data

    return dgl_graph
Пример #13
0
def read_from_pysmiles(num=10):
    train_path, test_path, dev_path = train_test_path(num)
    f_csv = OpenCSV(train_path)
    # id,smiles,activity
    if num == 10:
        SMILES_list = [row[1] for row in f_csv]
    else:
        SMILES_list = [row[0] for row in f_csv]

    SMILES = SMILES_list[1]
    print(SMILES)
    m = pysmiles.read_smiles(SMILES)
    print(m.nodes(data='element'))
Пример #14
0
    def __init__(self, file_name):
        self.data = pd.read_csv(file_name)

        self.smiles = self.data['smiles']
        self.labels = self.data['activity']
        self.mols = [read_smiles(smile) for smile in self.smiles]

        self.periodic_table = Chem.GetPeriodicTable()
        self.ams = [
            nx.to_numpy_matrix(mol, weight='order') for mol in self.mols
        ]
        self.graphs = [nx.from_numpy_matrix(am) for am in self.ams]
        self.element_lists = [mol.nodes(data='element') for mol in self.mols]
Пример #15
0
def build_graph(smiles):
    """
    Constructs a NetworkX graph out of a SMILES representation of a molecule from the train/test data.
    :param smiles: a string object of SMILES format
    :return: nx.Graph
        A graph describing a molecule. Nodes will have an 'element', 'aromatic'
        and a 'charge', and if `explicit_hydrogen` is False a 'hcount'.
        Depending on the input, they will also have 'isotope' and 'class'
        information.
        Edges will have an 'order'.
    """
    # TODO: Initialize a DGL Graph
    g = pysmiles.read_smiles(smiles)
    return g
Пример #16
0
 def predict(self, input: List[JsonSerializable]):
     """
     This is a dummy test model.
     It counts atoms in a SMILES string.
     """
     input = input[0]
     output = []
     for inp in input:
         mol = read_smiles(inp["input"], explicit_hydrogen=True)
         counts = collections.defaultdict(int)
         for _, atom in mol.nodes(data="element"):
             counts[atom] += 1
         output += [{"atoms": counts}]
     return [output]
Пример #17
0
 def process(self):
     bond_order = set()
     atom_types = set()
     processed = []
     with open(self.csv_file, 'r') as f:
         reader = csv.reader(f, delimiter=',')
         next(reader, None)  # Ignore header
         for item in tqdm(reader):
             mol_graph = pysmiles.read_smiles(item[0],
                                              explicit_hydrogen=False)
             data = convert_networkx(mol_graph, self.ATOM_TYPES)
             data['simles'] = item[0]
             data['label'] = torch.LongTensor([int(item[1])])
             processed.append(data)
     torch.save(processed, self.processed_file)
     return processed
Пример #18
0
def load_data_file(path, mode, pos_lim=-1, neg_lim=-1):

    stdout_backup = sys.stdout
    sys.stdout = open(os.devnull, "w")
    print("stdout not shut down correctly")

    num_pos = 0
    num_neg = 0

    data = []

    with open(path, "r") as fil:
        fil.readline()
        for i, line in enumerate(fil):
            if mode == "test":
                mol, label = line.strip().split(",")[0], -1
            elif mode == "train":
                _, mol, label = line.strip().split(",")
            elif mode == "tdt":
                mol, label = line.strip().split(",")

            label = int(label)

            if label >= 0:
                if label == 0:
                    num_neg += 1
                    if neg_lim > 0 and num_neg > neg_lim:
                        continue
                else:
                    num_pos += 1
                    if pos_lim > 0 and num_pos > pos_lim:
                        continue
            if (pos_lim > 0 and num_pos > pos_lim) and (neg_lim > 0
                                                        and num_neg > neg_lim):
                break

            mol_g = read_smiles(mol)  #将smiles字符串转成networkx graph
            #mol_g.smiles = mol

            data.append([mol_g, int(label), mol])

            if i % 1000 == 0:
                sys.stderr.write("%d\n" % i)
    sys.stdout = stdout_backup
    print("stdout recoverd.")

    return data
Пример #19
0
def readf(fname):
    with open(fname) as csvf:
        reader = csv.reader(csvf)

        a = 0
        for row in reader:
            if a == 0:
                a = 1
                continue
            gf = pysmiles.read_smiles(row[0])
            for ele in gf.nodes(data='element'):
                ele = ele[1]
                # if ele in cnt:
                #     cnt[ele] += 1
                # else:
                #     cnt[ele] = 1
                if ele not in cnt:
                    cnt[ele] = len(cnt)
Пример #20
0
def smiles_to_formula(smiles_string):
    mol = pysmiles.read_smiles(smiles_string, explicit_hydrogen=True)
    atom_counts = {g: 0 for g in ATOM_MASSES}
    for node in mol.nodes(data="element"):
        atom = node[1]
        if atom not in atom_counts:
            return None
        else:
            atom_counts[atom] += 1
    chem_formula = ""
    for atom, count in atom_counts.items():
        if count == 0:
            continue
        elif count == 1:
            chem_formula += atom
        else:
            chem_formula += "{}{}".format(atom, count)
    return chem_formula
Пример #21
0
def read_raw(filename, dataset, device, no_h):
    if dataset == 'covid-19':
        assertion_len = 2
        smile_idx = 0
        separator = ','
    else:
        assertion_len = 13
        smile_idx = 0
        separator = '\t'

    all_smiles = []
    mols = []
    targets = []
    with open(filename) as f:
        if assertion_len == 2:
            f.readline()
        f = tqdm(f)
        f.set_description('Reading raw data ... ')
        for line in f:
            if line != '':
                l = line.strip().split(separator)

                assert len(l) == assertion_len

                m = Chem.MolFromSmiles(l[smile_idx])
                if not no_h:
                    m = Chem.AddHs(m)
                smiles = Chem.MolToSmiles(m)
                all_smiles.append(smiles)

                targets.append(
                    torch.tensor(int(l[1]), device=device) if assertion_len ==
                    2 else torch.
                    tensor([float(i) for i in l[2:]], device=device))
                mol = read_smiles(smiles.replace('[H]', '[G]'),
                                  explicit_hydrogen=False,
                                  reinterpret_aromatic=True)
                mols.append(mol)

    features = extract_atom_feature(all_smiles, device, no_h)

    return mols, targets, features
def find_elements():
    # list all the elements appeared
    special_case = ['b', 'c', 'o', 'p', 's']
    element_list = []
    longest_len =0
    for path_name in paths:
        path = paths[path_name]
        df_smiles = pd.read_csv(os.path.join(path, 'names_smiles.txt'))
        smiles_list = np.array(df_smiles.iloc[:, 1])

        for smiles in smiles_list:
            mol = read_smiles(smiles)
            for node in mol.nodes:
                if 'stereo' in mol.nodes[node]:
                    mol.nodes[node].pop('stereo') # discard stereo infomation by hand

            new_smiles = write_smiles(mol)
            length = 0
            for i, ele in enumerate(new_smiles):
                ele = str(ele)
                #assert ele != 'n', 'SIMPLIFICATION FAILS'
                    
                if ele.islower() and (not ele in special_case) and i > 0 and\
                        str(new_smiles[i-1]).isupper(): # is the suffix of an element
                    continue
                if ele.isupper() and i < len(new_smiles) - 1 and str(new_smiles[i+1]).islower() \
                        and (not str(new_smiles[i+1]) in special_case): # an element with 2 chars
                    ele = ele + str(new_smiles[i+1])

                length += 1
                if not ele in element_list:
                    element_list.append(ele)
                
                if length > longest_len:
                    longest_len = length

    print(element_list)
    with open('element_list.txt', 'w') as f:
        for item in element_list:
            f.write("%s " % item)
        f.write(f'{longest_len}')
Пример #23
0
def main():
    # Parse command line arguments to get a smiles string
    args = parse_arguments()
    if not args.smiles_string:
        smiles_string = dict_aa[args.mol]
    else:
        smiles_string = args.smiles_string

    print("Drawing molecule:\n{}".format(smiles_string))

    # Parse the string into a graph object
    g = read_smiles(smiles_string)
    # Mark the cyclic edges as rings
    try:
        g = structure.mark_rings(g)
    except:
        pass

    # init turtle window and start drawing, wait for window events
    draw.init()
    draw.molecule(g)
    draw.done()
def save_mol_img(mols, f_name='tmp.png', is_test=False):
    orig_f_name = f_name
    for a_mol in mols:
        try:
            if Chem.MolToSmiles(a_mol) is not None:
                print('Generating molecule')

                if is_test:
                    f_name = orig_f_name
                    f_split = f_name.split('.')
                    f_split[-1] = random_string() + '.' + f_split[-1]
                    f_name = ''.join(f_split)

                rdkit.Chem.Draw.MolToFile(a_mol, f_name)
                a_smi = Chem.MolToSmiles(a_mol)
                mol_graph = read_smiles(a_smi)

                break

                # if not is_test:
                #     break
        except:
            continue
Пример #25
0
def augment_dataset(C , dataset , k = 5):

	for _k in range(C.pos_aug):

		new_example = []

		for g , label , smiles in dataset:
			if int(label) == 0:
				continue
	
			for _i in range(1 , len(smiles)):
				i = random.randint(1 , len(smiles) - 1)
				if (smiles[i-1].isalpha() and smiles[i-1].isupper()) \
						and (smiles[i].isalpha() and smiles[i].isupper()):
					smiles = smiles[:i] + 'C' + smiles[i:]
					break
			ng = pysmiles.read_smiles(smiles)
			new_example.append( [ng , label , smiles] )

		dataset = dataset + new_example

	random.shuffle(dataset)

	return dataset
Пример #26
0
    def __init__(self, smiles):
        """
        Initalize Molecule Class
        :param smiles: smiles string
        """

        # Cheminformatics section
        self.smiles = smiles

        self.pybel_mol = readstring("smi", smiles)
        self.pybel_mol.make3D()

        self.mol_formula = self.pybel_mol.formula

        # self.rd_mol = Chem.MolFromSmiles(smiles)
        # self.rd_mol = Chem.AddHs(self.rd_mol)
        # AllChem.EmbedMolecule(self.rd_mol)
        # AllChem.MMFFOptimizeMolecule(self.rd_mol)

        try:
            self.pysmiles_mol = read_smiles(smiles, explicit_hydrogen=True)
        except ValueError:
            self.pysmiles_mol = PySmilesCopy([0, 1, 1])

        # compositional section
        self.natoms = len(self.pybel_mol.atoms)
        self.position = np.array([0.0, 0.0, 0.0], dtype=float)
        self.atoms = []

        for i in range(self.natoms):
            atom = self.pybel_mol.atoms[i]
            self.atoms.append(Atom(atom.atomicnum, atom.coords))

        # geometrical section
        self.bonds = []
        self.bond_orders = []
        for bond in self.pysmiles_mol.edges(data='order'):
            self.bonds.append(bond[:-1])
            self.bond_orders.append(bond[2])

        self.angles = []
        for i in range(self.natoms):
            bonds = self.get_bonds(i)

            for j in range(len(bonds) - 1):
                self.angles.append(sort_bend_angle(bonds[j] + bonds[j + 1]))

        self.torsions = []
        for index1 in range(self.natoms):
            bonds = self.get_bonds(index1)
            if len(bonds) >= 2:
                for i, middle_bond in enumerate(bonds):
                    index2 = [atom for atom in middle_bond
                              if atom != index1][0]
                    bonds2 = self.get_bonds(index2)
                    if len(bonds2) >= 2:

                        if i == len(bonds) - 1:
                            bond1 = bonds[0]
                        else:
                            bond1 = bonds[i + 1]

                        for bond in bonds2:
                            if sorted(bond) != sorted(middle_bond):
                                bond2 = bond

                        self.torsions.append(
                            sort_torsion_bonds(bond1, middle_bond, bond2))
            else:
                continue
Пример #27
0
from property_prediction.data_utils import TaskDataLoader
import networkx as nx
from pysmiles import read_smiles

task = 'FreeSolv'
path = '../datasets/{}.csv'.format(task)

data_loader = TaskDataLoader(task, path)
smiles_list, y = data_loader.load_property_data()

indices = []

for i in range(len(smiles_list)):
    graph = read_smiles(smiles_list[i])
    number_of_nodes = nx.Graph.number_of_nodes(graph)
    print('number of nodes for index ', i, ' is: ', number_of_nodes)
    if number_of_nodes == 1:
        indices.append(i)
        print(smiles_list[i])

print(indices)
Пример #28
0
			wf.write(data)

file = open('REAL.csv', 'r', encoding='utf-8')
f = csv.reader(file)

for idx, line in enumerate(f):
	if idx ==0:
		continue
	if len(line) == 0:
		continue
	name, smiles, _, group = line[:4]

	filename = str(group) + 'REAL' + name.split('-')[1]

	print(smiles, filename, group)
	mol = read_smiles(str(smiles))
	
	labels = mol.nodes(data='element')
	node_labels = ['0']
	for label in labels:
		node_labels += [label[1]]
	matrix = nx.to_numpy_matrix(mol, weight='order').tolist()
	content = [[0 for i in range(len(matrix) + 1)]]
	for ma in matrix:
		content.append([0] + ma)
	print()
	
	with open('group/smiles'+ group + '/' + filename +'.txt', 'w') as f:
		f.write(smiles)
	writeFile('group/' + group + '/' + filename + '.txt', content, node_labels)
	
Пример #29
0
import pysmiles
import dgl
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pdb

g = pysmiles.read_smiles(
    "CN1CC[C@@]23C=C[C@@H](C[C@@H]2OC4=C(C=CCC(=C34)C1)OC)O.Br")
#g = pysmiles.read_smiles("O=[N+]([O-])C(Br)(CO)CO")

for i in range(30):
    for j in range(30):
        try:
            e = g[i][j]
        except Exception:
            continue
        print("%d - %d" % (i, j), e)

for i in range(min(30, len(g.nodes))):
    print(g.nodes[i])

pdb.set_trace()
#g = dgl.DGLGraph(g)


def draw(g):
    #g = g.to_networkx().to_undirected()

    def make_color(x):
        if x == 'C':
Пример #30
0
    return edges_occ
           
chemicals = pd.read_pickle("smiles.pickle")
chemicals_data = []
print(chemicals)
    
headers = []
targets = []
i = 0
for index, row in chemicals.iterrows(): 
    if not is_tree(G):
        continue
    i+=1
    if i%1 == 0:
        print(i)
    G = read_smiles(row["smiles"], explicit_hydrogen=True)
    try:
        mol_weight = MW(CAS_from_any(row["chemicals"]))
        boiling_point =  Tb(CAS_from_any(row['chemicals']))
    except(ValueError):
        continue
    if boiling_point == None or mol_weight == None:
        continue
    occ = count_atom_occurencies(G)
    occ.update({'boiling_point': boiling_point})
    """
    try:
        with timeout(2, exception=RuntimeError):
            occ.update({'GP_index': calculate_indices.calcuate_pisanski(G)})
    except RuntimeError:
        continue