def test(): import chemconvert G1 = chemconvert.hash2graph("CH@1,CH@1,CH@2,CH2,CH@2,O,OH,OH,OH,O,PO3-2~0110101000000100010000100000100000000000200000000010000") G2 = chemconvert.hash2graph("CH@2,CH@1,CH@2,CH2,CH@1,O,OH,OH,OH,O,PO3-2~0110101000000100010000100000100000000000200000000010000") G3 = chemconvert.hash2graph("CH@1,CH@2,CH@1,CH2,CH@2,O,OH,OH,OH,O,PO3-2~0110101000000100010000100000100000000000200000000010000") print G1.hash() print G2.hash() print G3.hash()
def test(): import chemconvert G1 = chemconvert.hash2graph( "CH@1,CH@1,CH@2,CH2,CH@2,O,OH,OH,OH,O,PO3-2~0110101000000100010000100000100000000000200000000010000" ) G2 = chemconvert.hash2graph( "CH@2,CH@1,CH@2,CH2,CH@1,O,OH,OH,OH,O,PO3-2~0110101000000100010000100000100000000000200000000010000" ) G3 = chemconvert.hash2graph( "CH@1,CH@2,CH@1,CH2,CH@2,O,OH,OH,OH,O,PO3-2~0110101000000100010000100000100000000000200000000010000" ) print G1.hash() print G2.hash() print G3.hash()
def generate_all_graphs(motif_hist, size): """Generates hash strings for all the likely connected graphs with 'size' nodes """ if (size <= 1): raise Exception("size must be greater than 1 for using generate_all_graphs") # start by adding all the existing graph of size atoms hash_set = set(motif_hist[size]) # expand by seeding with all graphs of (size-1) atoms that appear in the database # and adding another atom to them graph_seeds = [chemconvert.hash2graph(h) for h in motif_hist[size-1]] # connect the new atom by all possible bond configurations (all possible values for the new row in the matrix) bonds_list = [[]] for i in range(size-1): bonds_list = [(l + [b]) for l in bonds_list for b in bond_types] n = size-1 # the index for the new added atom for G in graph_seeds: # for all motifs with size-1 atoms G.add_node("") for atom in atom_types: # add each of the atoms in atom_types G.set_node(n, atom) for bonds in bonds_list: # connect with all possible bond combinations for m in range(n): G.set_bond(n, m, bonds[m]) if (get_likelihood(G, motif_hist, size-1) > log_zero and G.is_connected() and G.is_legal_valence()): # check connectivity and valence h = G.hash() if (not h in hash_set): hash_set.add(h) return list(hash_set)
def generate_all_graphs(motif_hist, size): """Generates hash strings for all the likely connected graphs with 'size' nodes """ if (size <= 1): raise Exception( "size must be greater than 1 for using generate_all_graphs") # start by adding all the existing graph of size atoms hash_set = set(motif_hist[size]) # expand by seeding with all graphs of (size-1) atoms that appear in the database # and adding another atom to them graph_seeds = [chemconvert.hash2graph(h) for h in motif_hist[size - 1]] # connect the new atom by all possible bond configurations (all possible values for the new row in the matrix) bonds_list = [[]] for i in range(size - 1): bonds_list = [(l + [b]) for l in bonds_list for b in bond_types] n = size - 1 # the index for the new added atom for G in graph_seeds: # for all motifs with size-1 atoms G.add_node("") for atom in atom_types: # add each of the atoms in atom_types G.set_node(n, atom) for bonds in bonds_list: # connect with all possible bond combinations for m in range(n): G.set_bond(n, m, bonds[m]) if (get_likelihood(G, motif_hist, size - 1) > log_zero and G.is_connected() and G.is_legal_valence() ): # check connectivity and valence h = G.hash() if (not h in hash_set): hash_set.add(h) return list(hash_set)
def read_hash(): try: hash = input_stringvar.get() graph = hash2graph(hash) graph.initialize_pos() error_stringvar.set("Ready.\nHASH = %s" % hash) graph_list.append(graph) except ChemException, strerror: error_stringvar.set("ERROR :" + str(strerror)) root.update() graph = None
def convert_compounds_hash(compound_dat_filename): compound2hash = util.parse_dat(compound_dat_filename, "UNIQUE-ID", "HASH") util._mkdir("../mol") for (key, smiles) in compound2hash.iteritems(): mol_filename = "../mol/" + key + ".mol" if (not os.path.exists(mol_filename)): print "Writing a MOL file to: " + mol_filename if (len(smiles) > 0): mol = chemconvert.hash2graph(smiles[0]).to_mol() mol_file = open(mol_filename, "w") mol_file.write(mol) mol_file.close() else: print "Found the MOL file: " + mol_filename return
def __init__(self, carbon_only=True, ignore_chirality=True, use_antimotifs=True, reaction_database_fname="../rec/reaction_templates.dat"): def deduce_reaction_list(dict): G_subs = dict['G_SUBS'] G_prod = dict['G_PROD'] import_indices = dict['IMPORT'] export_indices = dict['EXPORT'] reaction_list = [] N = G_subs.get_num_nodes() # add the new imported atoms to the graph if (import_indices != []): import_atoms = [G_subs.get_node(n) for n in import_indices] reaction_list.append(Reaction("import", import_indices, None, import_atoms, external=True)) # add the bonds that the imported atoms come with 'built-in' for n in import_indices: for m in import_indices: if (n > m and G_subs.get_bond(n, m) != 0): reaction_list.append(Reaction("bond", [n, m], 0, G_subs.get_bond(n, m), external=True)) reaction_list.append(Reaction("update_position", import_indices, None, None, external=True)) # change the attributes of all the atoms in the graph from 'subs' to 'prod' for n in range(N): if (G_prod.chirality[n] != G_subs.chirality[n]): reaction_list.append(Reaction("chirality", [n], G_subs.chirality[n], G_prod.chirality[n])) if (G_prod.charges[n] != G_subs.charges[n]): reaction_list.append(Reaction("charge", [n], G_subs.charges[n], G_prod.charges[n])) if (G_prod.hydrogens[n] != G_subs.hydrogens[n]): reaction_list.append(Reaction("hydrogen", [n], G_subs.hydrogens[n], G_prod.hydrogens[n])) # change the values of all the bonds from 'subs' to 'prod' for n in range(N): for m in range(n): if (G_prod.get_bond(n, m) != G_subs.get_bond(n, m)): reaction_list.append(Reaction("bond", [n, m], G_subs.get_bond(n, m), G_prod.get_bond(n, m))) # break all the bonds in the exported atoms for n in export_indices: for m in export_indices: if (n > m and G_prod.get_bond(n, m) != 0): reaction_list.append(Reaction("bond", [n, m], G_prod.get_bond(n, m), 0, external=True)) # remove the exported atoms from the graph if (export_indices != []): reaction_list.append(Reaction("export", export_indices, None, None, external=True)) return reaction_list # read the template file and store the information in a dictionary self.forward_reaction_list = [] self.backward_reaction_list = [] self.reaction_templates = {} self.antimotif_counter = {} self.ignore_chirality = ignore_chirality self.use_antimotifs = use_antimotifs self.carbon_only = carbon_only print >> sys.stderr, "Parsing reaction database file: " + reaction_database_fname file = open(reaction_database_fname, 'r') while (True): dict = util.read_next_dat_section(file) if (dict == None): break elif ('SUBSTRATE' in dict and 'PRODUCT' in dict): unique_id = dict['UNIQUE_ID'] #if (not unique_id in ['ec2.2.1a', 'ec2.2.1b']): # transketolase and transaldolase # continue # if the reaction is not tagged as "CARBON", and we are working only on carbon, skip. if (self.carbon_only and dict.get('CARBON', 'FALSE') == 'FALSE'): continue # if the reaction has to do with chirality, and we choose to ignore it, skip. if (self.ignore_chirality and dict.get('CHIRAL', 'FALSE') == 'TRUE'): continue forward_id = unique_id + "_forward" backward_id = unique_id + "_backward" try: forward_dict = {} forward_dict['EC'] = dict['EC'] forward_dict['REMARK'] = dict.get('REMARK', "") forward_dict['NAME'] = dict['NAME'] forward_dict['DESCRIPTION'] = dict.get('DESCRIPTION', '') forward_dict['DIRECTION'] = 'forward' forward_dict['SUBSTRATE'] = dict['SUBSTRATE'] forward_dict['PRODUCT'] = dict['PRODUCT'] forward_dict['IMPORT'] = util.str2intvector(dict.get('IMPORTED_ATOMS', '')) forward_dict['EXPORT'] = util.str2intvector(dict.get('EXPORTED_ATOMS', '')) forward_dict['G_SUBS'] = hash2graph(dict['SUBSTRATE']) forward_dict['G_PROD'] = hash2graph(dict['PRODUCT']) forward_dict['REACTION'] = deduce_reaction_list(forward_dict) forward_dict['UNIQUE_ID'] = forward_id forward_dict['REVERSE_ID'] = backward_id self.reaction_templates[forward_id] = forward_dict except KeyError, msg: raise ReactionException(str(msg) + " in " + forward_id) try: backward_dict = {} backward_dict['EC'] = dict['EC'] backward_dict['REMARK'] = dict.get('REMARK', "") backward_dict['NAME'] = dict.get('REV_NAME', dict['NAME'] + " [r]") backward_dict['DESCRIPTION'] = dict.get('DESCRIPTION', '') backward_dict['DIRECTION'] = 'backward' backward_dict['SUBSTRATE'] = dict['PRODUCT'] backward_dict['PRODUCT'] = dict['SUBSTRATE'] backward_dict['IMPORT'] = util.str2intvector(dict.get('EXPORTED_ATOMS', '')) backward_dict['EXPORT'] = util.str2intvector(dict.get('IMPORTED_ATOMS', '')) backward_dict['G_SUBS'] = hash2graph(dict['PRODUCT']) backward_dict['G_PROD'] = hash2graph(dict['SUBSTRATE']) backward_dict['REACTION'] = deduce_reaction_list(backward_dict) backward_dict['UNIQUE_ID'] = backward_id backward_dict['REVERSE_ID'] = forward_id self.reaction_templates[backward_id] = backward_dict self.forward_reaction_list.append(forward_id) self.backward_reaction_list.append(backward_id) if (dict['REVERSIBLE'] == 'TRUE'): self.forward_reaction_list.append(backward_id) self.backward_reaction_list.append(forward_id) except KeyError, msg: raise ReactionException(msg + " in " + backward_id)
def __init__( self, carbon_only=True, ignore_chirality=True, use_antimotifs=True, reaction_database_fname="../rec/reaction_templates.dat", ): def deduce_reaction_list(dict): G_subs = dict["G_SUBS"] G_prod = dict["G_PROD"] import_indices = dict["IMPORT"] export_indices = dict["EXPORT"] reaction_list = [] N = G_subs.get_num_nodes() # add the new imported atoms to the graph if import_indices != []: import_atoms = [G_subs.get_node(n) for n in import_indices] reaction_list.append(Reaction("import", import_indices, None, import_atoms, external=True)) # add the bonds that the imported atoms come with 'built-in' for n in import_indices: for m in import_indices: if n > m and G_subs.get_bond(n, m) != 0: reaction_list.append(Reaction("bond", [n, m], 0, G_subs.get_bond(n, m), external=True)) reaction_list.append(Reaction("update_position", import_indices, None, None, external=True)) # change the attributes of all the atoms in the graph from 'subs' to 'prod' for n in range(N): if G_prod.chirality[n] != G_subs.chirality[n]: reaction_list.append(Reaction("chirality", [n], G_subs.chirality[n], G_prod.chirality[n])) if G_prod.charges[n] != G_subs.charges[n]: reaction_list.append(Reaction("charge", [n], G_subs.charges[n], G_prod.charges[n])) if G_prod.hydrogens[n] != G_subs.hydrogens[n]: reaction_list.append(Reaction("hydrogen", [n], G_subs.hydrogens[n], G_prod.hydrogens[n])) # change the values of all the bonds from 'subs' to 'prod' for n in range(N): for m in range(n): if G_prod.get_bond(n, m) != G_subs.get_bond(n, m): reaction_list.append(Reaction("bond", [n, m], G_subs.get_bond(n, m), G_prod.get_bond(n, m))) # break all the bonds in the exported atoms for n in export_indices: for m in export_indices: if n > m and G_prod.get_bond(n, m) != 0: reaction_list.append(Reaction("bond", [n, m], G_prod.get_bond(n, m), 0, external=True)) # remove the exported atoms from the graph if export_indices != []: reaction_list.append(Reaction("export", export_indices, None, None, external=True)) return reaction_list # read the template file and store the information in a dictionary self.forward_reaction_list = [] self.backward_reaction_list = [] self.reaction_templates = {} self.antimotif_counter = {} self.ignore_chirality = ignore_chirality self.use_antimotifs = use_antimotifs self.carbon_only = carbon_only print >> sys.stderr, "Parsing reaction database file: " + reaction_database_fname file = open(reaction_database_fname, "r") while True: dict = util.read_next_dat_section(file) if dict == None: break elif "SUBSTRATE" in dict and "PRODUCT" in dict: unique_id = dict["UNIQUE_ID"] # if (not unique_id in ['ec2.2.1a', 'ec2.2.1b']): # transketolase and transaldolase # continue # if the reaction is not tagged as "CARBON", and we are working only on carbon, skip. if self.carbon_only and dict.get("CARBON", "FALSE") == "FALSE": continue # if the reaction has to do with chirality, and we choose to ignore it, skip. if self.ignore_chirality and dict.get("CHIRAL", "FALSE") == "TRUE": continue forward_id = unique_id + "_forward" backward_id = unique_id + "_backward" try: forward_dict = {} forward_dict["EC"] = dict["EC"] forward_dict["REMARK"] = dict.get("REMARK", "") forward_dict["NAME"] = dict["NAME"] forward_dict["DESCRIPTION"] = dict.get("DESCRIPTION", "") forward_dict["DIRECTION"] = "forward" forward_dict["SUBSTRATE"] = dict["SUBSTRATE"] forward_dict["PRODUCT"] = dict["PRODUCT"] forward_dict["IMPORT"] = util.str2intvector(dict.get("IMPORTED_ATOMS", "")) forward_dict["EXPORT"] = util.str2intvector(dict.get("EXPORTED_ATOMS", "")) forward_dict["G_SUBS"] = hash2graph(dict["SUBSTRATE"]) forward_dict["G_PROD"] = hash2graph(dict["PRODUCT"]) forward_dict["REACTION"] = deduce_reaction_list(forward_dict) forward_dict["UNIQUE_ID"] = forward_id forward_dict["REVERSE_ID"] = backward_id self.reaction_templates[forward_id] = forward_dict except KeyError, msg: raise ReactionException(str(msg) + " in " + forward_id) try: backward_dict = {} backward_dict["EC"] = dict["EC"] backward_dict["REMARK"] = dict.get("REMARK", "") backward_dict["NAME"] = dict.get("REV_NAME", dict["NAME"] + " [r]") backward_dict["DESCRIPTION"] = dict.get("DESCRIPTION", "") backward_dict["DIRECTION"] = "backward" backward_dict["SUBSTRATE"] = dict["PRODUCT"] backward_dict["PRODUCT"] = dict["SUBSTRATE"] backward_dict["IMPORT"] = util.str2intvector(dict.get("EXPORTED_ATOMS", "")) backward_dict["EXPORT"] = util.str2intvector(dict.get("IMPORTED_ATOMS", "")) backward_dict["G_SUBS"] = hash2graph(dict["PRODUCT"]) backward_dict["G_PROD"] = hash2graph(dict["SUBSTRATE"]) backward_dict["REACTION"] = deduce_reaction_list(backward_dict) backward_dict["UNIQUE_ID"] = backward_id backward_dict["REVERSE_ID"] = forward_id self.reaction_templates[backward_id] = backward_dict self.forward_reaction_list.append(forward_id) self.backward_reaction_list.append(backward_id) if dict["REVERSIBLE"] == "TRUE": self.forward_reaction_list.append(backward_id) self.backward_reaction_list.append(forward_id) except KeyError, msg: raise ReactionException(msg + " in " + backward_id)
#!/usr/bin/python import sys import os import util from chemconvert import hash2graph from html_writer import HtmlWriter from svg import Scene html = HtmlWriter("../results/hash_list.html") util._mkdir("../results/hash_list") for line in util.parse_text_file(sys.argv[1]): print line graph = hash2graph(line) graph.initialize_pos() scene = graph.svg(Scene(200, 200, font_size=12)) html.write_svg(scene, "../results/hash_list/" + line) html.display()
def main(): init() subdir = "motifs" motif_fullpath = html_path + "/motifs" util._mkdir(motif_fullpath) main_html_file = open(html_path + "/motifs.html", "w") anti_motif_list = [] for size in motif_sizes: motifs = verify_file(size) motifs_t = motifs_templates(motifs) motif_hist = normalize_motifs(motifs) motif_hist_t = normalize_motifs(motifs_t) print "Generating all graphs of size %d ..." % size, all_graphs = generate_all_graphs(motif_hist, size) print "[DONE]" results = {} for h in all_graphs: G = chemconvert.hash2graph(h) template = G.template() count = int(motifs.get(h, 0)) likelihoods = [0, 0] for i in range(2, size+1): likelihoods.append(get_likelihood(G, motif_hist, i)) delta_l = likelihoods[-2] - likelihoods[-1] if (not results.has_key(template)): results[template] = [] results[template].append((delta_l, likelihoods, count, h)) main_html_file.write("<p>") size_html_file = util.embed_link(main_html_file, html_path, subdir + "/motifs%s" % size, "Motifs of size %s" % size) main_html_file.write("</p>") for (template, graph_list) in results.iteritems(): size_html_file.write("<p>") template_svg = chemconvert.hash2svg(template, 200, 200, node_color=black, bond_color=grey) template_svg.embed_in_html(size_html_file, motif_fullpath, template) count_t = len(graph_list) if (count_t > 0): template_html_file = util.embed_link(size_html_file, motif_fullpath, template, "View all %d instances" % count_t) for (delta_l, likelihoods, count, h) in sorted(graph_list): if (likelihoods[-1] <= log_zero and likelihoods[-2] <= log_zero): continue elif (likelihoods[-1] <= log_zero): bond_color = red anti_motif_list.append(h) else: #bond_color = (0, 255 * math.exp(-delta_l), 255 * (1 - math.exp(-delta_l))) bond_color = green motif_svg = chemconvert.hash2svg(h, 150, 150, black, bond_color) motif_svg.set_attribute("height", 200) if (True): # add #motif_svg.add(svg.Text((35, 25), h, 12, green)) l_string = ",".join(["%.1f" % l for l in likelihoods[2:]]) motif_svg.add(svg.Text((10, 160), "ΔL = %.1f" % delta_l, font_size=12)) motif_svg.add(svg.Text((10, 175), l_string, font_size=12)) if (count > 0): #motif_svg.add(svg.Text((35, 75), "diff = %.1f" % (l1-l0), 12, black)) motif_svg.add(svg.Text((10, 190), "count = %d" % count, font_size=12)) motif_svg.embed_in_html(template_html_file, motif_fullpath, h) else: template_html_file.write("<p>") motif_svg.embed_in_html(template_html_file, motif_fullpath, h) template_html_file.write("<a href=\"" + h + ".svg\">" + h + "</a>") template_html_file.write(", count = %d" % count) for i in range(2, size+1): template_html_file.write(", L(%d) = %.1f" % (i, likelihoods[i])) template_html_file.write("</p>") template_html_file.close() else: size_html_file.write("No instances") size_html_file.write("</p>") size_html_file.close() main_html_file.close() util.write_text_file(anti_motif_list, "../results/stat/anti_motifs.txt") return
def main(): init() subdir = "motifs" motif_fullpath = html_path + "/motifs" util._mkdir(motif_fullpath) main_html_file = open(html_path + "/motifs.html", "w") anti_motif_list = [] for size in motif_sizes: motifs = verify_file(size) motifs_t = motifs_templates(motifs) motif_hist = normalize_motifs(motifs) motif_hist_t = normalize_motifs(motifs_t) print "Generating all graphs of size %d ..." % size, all_graphs = generate_all_graphs(motif_hist, size) print "[DONE]" results = {} for h in all_graphs: G = chemconvert.hash2graph(h) template = G.template() count = int(motifs.get(h, 0)) likelihoods = [0, 0] for i in range(2, size + 1): likelihoods.append(get_likelihood(G, motif_hist, i)) delta_l = likelihoods[-2] - likelihoods[-1] if (not results.has_key(template)): results[template] = [] results[template].append((delta_l, likelihoods, count, h)) main_html_file.write("<p>") size_html_file = util.embed_link(main_html_file, html_path, subdir + "/motifs%s" % size, "Motifs of size %s" % size) main_html_file.write("</p>") for (template, graph_list) in results.iteritems(): size_html_file.write("<p>") template_svg = chemconvert.hash2svg(template, 200, 200, node_color=black, bond_color=grey) template_svg.embed_in_html(size_html_file, motif_fullpath, template) count_t = len(graph_list) if (count_t > 0): template_html_file = util.embed_link( size_html_file, motif_fullpath, template, "View all %d instances" % count_t) for (delta_l, likelihoods, count, h) in sorted(graph_list): if (likelihoods[-1] <= log_zero and likelihoods[-2] <= log_zero): continue elif (likelihoods[-1] <= log_zero): bond_color = red anti_motif_list.append(h) else: #bond_color = (0, 255 * math.exp(-delta_l), 255 * (1 - math.exp(-delta_l))) bond_color = green motif_svg = chemconvert.hash2svg(h, 150, 150, black, bond_color) motif_svg.set_attribute("height", 200) if (True): # add #motif_svg.add(svg.Text((35, 25), h, 12, green)) l_string = ",".join( ["%.1f" % l for l in likelihoods[2:]]) motif_svg.add( svg.Text((10, 160), "ΔL = %.1f" % delta_l, font_size=12)) motif_svg.add( svg.Text((10, 175), l_string, font_size=12)) if (count > 0): #motif_svg.add(svg.Text((35, 75), "diff = %.1f" % (l1-l0), 12, black)) motif_svg.add( svg.Text((10, 190), "count = %d" % count, font_size=12)) motif_svg.embed_in_html(template_html_file, motif_fullpath, h) else: template_html_file.write("<p>") motif_svg.embed_in_html(template_html_file, motif_fullpath, h) template_html_file.write("<a href=\"" + h + ".svg\">" + h + "</a>") template_html_file.write(", count = %d" % count) for i in range(2, size + 1): template_html_file.write(", L(%d) = %.1f" % (i, likelihoods[i])) template_html_file.write("</p>") template_html_file.close() else: size_html_file.write("No instances") size_html_file.write("</p>") size_html_file.close() main_html_file.close() util.write_text_file(anti_motif_list, "../results/stat/anti_motifs.txt") return