def testGetRawAlignment(self): from collections import defaultdict cmd.fab('ACDEGGKLMN', 'm1') cmd.fab('CDEFFGGK', 'm2') cmd.fab('ASDEKLMNFY', 'm3') cmd.align('m2 & guide', 'm1 & guide', cycles=0, object='aln') cmd.align('m3 & guide', 'm1 & guide', cycles=0, object='aln') cmd.disable('m2') # expecting alignment: # m1 ACDE--GGKLMN-- # m2 -CDEFFGGK----- # m3 ASDE----KLMQFY guideids = defaultdict(list) cmd.iterate('guide', 'guideids[model].append(index)', space=locals()) idx = lambda m, i: (m, guideids[m][i]) aln_expect = [ [idx('m1', 0), idx('m3', 0)], [idx('m1', 1), idx('m2', 0), idx('m3', 1)], [idx('m1', 2), idx('m2', 1), idx('m3', 2)], [idx('m1', 3), idx('m2', 2), idx('m3', 3)], [ idx('m1', 4), idx('m2', 5), ], [ idx('m1', 5), idx('m2', 6), ], [idx('m1', 6), idx('m2', 7), idx('m3', 4)], [idx('m1', 7), idx('m3', 5)], [idx('m1', 8), idx('m3', 6)], [idx('m1', 9), idx('m3', 7)], ] dictify = lambda aln: [dict(col) for col in aln] aln_expect = dictify(aln_expect) aln = cmd.get_raw_alignment('aln', 0) self.assertEqual(dictify(aln), aln_expect) # remove m2 from alignment for d in aln_expect: d.pop('m2', None) aln = cmd.get_raw_alignment('aln', 1) self.assertEqual(dictify(aln), aln_expect)
def get_alignment_coords(name, active_only=0, state=-1, quiet=0): ''' DESCRIPTION API only function. Returns a dictionary with items (object name, Nx3 coords list) N is the number of alignment columns without gaps. EXAMPLE import numpy from psico.multistuff import * from psico.querying import * extra_fit('name CA', cycles=0, object='aln') x = get_alignment_coords('aln') m = numpy.array(x.values()) ''' active_only, state, quiet = int(active_only), int(state), int(quiet) aln = cmd.get_raw_alignment(name, active_only) object_list = cmd.get_object_list(name) idx2coords = dict() cmd.iterate_state(state, name, 'idx2coords[model,index] = (x,y,z)', space={'idx2coords': idx2coords}) allcoords = dict((model, []) for model in object_list) for pos in aln: if len(pos) != len(object_list): continue for model,index in pos: allcoords[model].append(idx2coords[model,index]) return allcoords
def from_alignment(self, mobile, target, aln_obj): ''' Use alignment given by "aln_obj" (name of alignment object) ''' from .selecting import wait_for wait_for(aln_obj) self.mobile = '(%s) and %s' % (mobile, aln_obj) self.target = '(%s) and %s' % (target, aln_obj) if self.check(): return # difficult: if selections spans only part of the alignment or # if alignment object covers more than the two objects, then we # need to pick those columns that have no gap in any of the two # given selections mobileidx = set(cmd.index(mobile)) targetidx = set(cmd.index(target)) mobileidxsel = [] targetidxsel = [] for column in cmd.get_raw_alignment(aln_obj): mobiles = mobileidx.intersection(column) if len(mobiles) == 1: targets = targetidx.intersection(column) if len(targets) == 1: mobileidxsel.extend(mobiles) targetidxsel.extend(targets) self.mobile = cmd.get_unused_name('_mobile') self.target = cmd.get_unused_name('_target') self.temporary.append(self.mobile) self.temporary.append(self.target) mobile_objects = set(idx[0] for idx in mobileidxsel) target_objects = set(idx[0] for idx in targetidxsel) if len(mobile_objects) == len(target_objects) == 1: mobile_index_list = [idx[1] for idx in mobileidxsel] target_index_list = [idx[1] for idx in targetidxsel] cmd.select_list(self.mobile, mobile_objects.pop(), mobile_index_list, mode='index') cmd.select_list(self.target, target_objects.pop(), target_index_list, mode='index') else: cmd.select(self.mobile, ' '.join('%s`%d' % idx for idx in mobileidxsel)) cmd.select(self.target, ' '.join('%s`%d' % idx for idx in targetidxsel))
def return_aligned_res(): cmd.align("thermo", "meso", object='aln') raw_aln = cmd.get_raw_alignment('aln') idx2resi = {} cmd.iterate('aln', 'idx2resi[model, index] = resi', space={'idx2resi': idx2resi}) raw_res = [] for idx1, idx2 in raw_aln: raw_res.append((idx2resi[idx1], idx2resi[idx2])) aligned_res = [] for (a, b) in raw_res: if (a, b) not in aligned_res: aligned_res.append((a, b)) return aligned_res
def bssa( sel1, sel2, polymer1="polymer", polymer2="polymer", radius=4, method="overlap", verbose=1, ): """ Bind site similarity analysis. Align the sequence of both selections and compute similarity coefficients between two sites. OPTIONS sel1 Selection or object 1. sel2 Selection or object 2. polymer1 protein of sel1. polymer2 protein of sel2. radius Radius to look for nearby aminoacids. method 'overlap' or 'sorensen–dice' EXAMPLES bssa *CS.000_*, *CS.002_*, radius=4 bssa *D.001*, *D.002*, polymer1='obj1', polymer2='obj2' bssa 6y84.Bs.001, 6y84.B.004, method=sorensen-dice """ sel1 = f"(polymer and ({polymer1})) within {radius} of ({sel1})" sel2 = f"(polymer and ({polymer2})) within {radius} of ({sel2})" pm.align(sel1, sel2, object='aln') n1 = pm.count_atoms(sel1) n2 = pm.count_atoms(sel2) inter = len(pm.get_raw_alignment('aln')) pm.delete('aln') if method == "overlap": coef = inter / min(n1, n2) elif method == "sorensen-dice": coef = 2 * inter / (n1 + n2) else: raise Exception("Not supported method.") if verbose: print("Similarity coefficient =", coef) return coef
def color_by_conservation(aln, names=(), color="rainbow", as_putty=0, _self=cmd): # PyMOL doesn't yet know about object:alignment # but we need to check that this exists or we might crash if _self.get_type(aln) not in ("object:", "object:alignment"): print("Error: Bad or incorrectly specified alignment object.") return None r = cmd.get_raw_alignment(aln) if names == (): known_objs = [] list(map(known_objs.extend, [[y[0] for y in x] for x in r])) known_objs = set(known_objs) # highest number of matches seen M = max(list(map(len, r))) + 1 else: known_objs = set(names) M = len(known_objs) + 1 for obj in known_objs: _self.alter(obj, "b=0.0") for af in r: c = float(1.0 + len(af)) / float(M) for y in af: _self.alter("%s and index %s" % (y[0], y[1]), "b=c", space={'c': c}) if as_putty != 0: for obj in known_objs: _self.show_as("cartoon", "%s" % obj) _self.cartoon("putty", "%s" % obj) _self.spectrum('b', color, obj) _self.sort() _self.rebuild() return None
def colorbyrmsd(mobile, target, doAlign=1, doPretty=1, guide=1, method='super', quiet=1): ''' DESCRIPTION Align two structures and show the structural deviations in color to more easily see variable regions. Colors each mobile/target atom-pair by distance (the name is a bit misleading). Modifies the B-factor columns in your original structures. ARGUMENTS mobile = string: atom selection for mobile atoms target = string: atom selection for target atoms doAlign = 0 or 1: Superpose selections before calculating distances {default: 1} doPretty = 0 or 1: Show nice representation and colors {default: 1} EXAMPLE fetch 1ake 4ake, async=0 remove chain B colorbyrmsd 1ake, 4ake ''' from chempy import cpv # import pdb doAlign, doPretty = int(doAlign), int(doPretty) guide, quiet = int(guide), int(quiet) aln, seleboth = '_aln', '_objSelBoth' try: align = cmd.keyword[method][0] except: print ' Error: no such method:', method raise CmdException if guide: mobile = '(%s) and guide' % mobile target = '(%s) and guide' % target try: if doAlign: # superpose zz=align(mobile, target, object=aln) else: # get alignment without superposing zz=align(mobile, target, cycles=0, transform=0, object=aln) if not quiet: print "RMSD = ", zz[0] except: print ' Error: Alignment with method %s failed' % (method) raise CmdException cmd.select(seleboth, '(%s) or (%s)' % (mobile, target)) idx2coords = dict() cmd.iterate_state(-1, seleboth, 'idx2coords[model,index] = (x,y,z)', space=locals()) if cmd.count_atoms('?' + aln, 1, 1) == 0: # this should ensure that "aln" will be available as selectable object cmd.refresh() b_dict = dict() for col in cmd.get_raw_alignment(aln): assert len(col) == 2 b = cpv.distance(idx2coords[col[0]], idx2coords[col[1]]) for idx in col: b_dict[idx] = b #pdb.set_trace() cmd.alter(seleboth, 'b = b_dict.get((model, index), -1)', space=locals()) if doPretty: cmd.orient(seleboth) cmd.show_as('cartoon', 'byobj ' + seleboth) cmd.color('gray', seleboth) cmd.spectrum('b', 'blue_red', seleboth + ' and b > -0.5') if not quiet: print " ColorByRMSD: Minimum Distance: %.2f" % (min(b_dict.values())) print " ColorByRMSD: Maximum Distance: %.2f" % (max(b_dict.values())) print " ColorByRMSD: Average Distance: %.2f" % (sum(b_dict.values()) / len(b_dict)) cmd.delete(aln) cmd.delete(seleboth)
def morpheasy(source, target, source_state=0, target_state=0, name=None, refinement=5, quiet=1): ''' DESCRIPTION Morph source to target, based on sequence alignment USAGE morpheasy source, target [, source_state [, target_state [, name ]]] EXAMPLE fetch 1akeA 4akeA, async=0 extra_fit morpheasy 1akeA, 4akeA ''' try: from epymol import rigimol except ImportError: print 'No epymol available, please use a "Incentive PyMOL" build' print 'You may use "morpheasy_linear" instead' return from .editing import mse2met from .querying import get_selection_state # arguments source_state = int(source_state) target_state = int(target_state) refinement = int(refinement) quiet = int(quiet) if source_state < 1: source_state = get_selection_state(source) if target_state < 1: target_state = get_selection_state(target) # temporary objects # IMPORTANT: cmd.get_raw_alignment does not work with underscore object names! alnobj = cmd.get_unused_name('_aln') so_obj = cmd.get_unused_name('source') # see above ta_obj = cmd.get_unused_name('target') # see above so_sel = cmd.get_unused_name('_source_sel') ta_sel = cmd.get_unused_name('_target_sel') cmd.create(so_obj, source, source_state, 1) cmd.create(ta_obj, target, target_state, 1) mse2met(so_obj) mse2met(ta_obj) # align sequence cmd.align(ta_obj, so_obj, object=alnobj, cycles=0, transform=0, mobile_state=1, target_state=1) cmd.refresh() cmd.select(so_sel, '%s and %s' % (so_obj, alnobj)) cmd.select(ta_sel, '%s and %s' % (ta_obj, alnobj)) alnmap = dict(cmd.get_raw_alignment(alnobj)) alnmap.update(dict((v,k) for (k,v) in alnmap.iteritems())) # copy source atom identifiers to temporary target idmap = dict() cmd.iterate(so_sel, 'idmap[model,index] = (segi,chain,resi,resn,name)', space={'idmap': idmap}) cmd.alter(ta_sel, '(segi,chain,resi,resn,name) = idmap[alnmap[model,index]]', space={'idmap': idmap, 'alnmap': alnmap}) # remove unaligned cmd.remove('%s and not %s' % (so_obj, so_sel)) cmd.remove('%s and not %s' % (ta_obj, ta_sel)) assert cmd.count_atoms(so_obj) == cmd.count_atoms(ta_obj) cmd.sort(so_obj) cmd.sort(ta_obj) # append target to source as 2-state morph-in object cmd.create(so_obj, ta_obj, 1, 2) # morph if name is None: name = cmd.get_unused_name('morph') rigimol.morph(so_obj, name, refinement=refinement, async=0) # clean up for obj in [alnobj, so_obj, so_sel, ta_obj, ta_sel]: cmd.delete(obj) return name
def colorbyrmsd(mobile, target, doAlign=1, doPretty=1, guide=1, method='super', quiet=1): ''' DESCRIPTION Align two structures and show the structural deviations in color to more easily see variable regions. Colors each mobile/target atom-pair by distance (the name is a bit misleading). Modifies the B-factor columns in your original structures. ARGUMENTS mobile = string: atom selection for mobile atoms target = string: atom selection for target atoms doAlign = 0 or 1: Superpose selections before calculating distances {default: 1} doPretty = 0 or 1: Show nice representation and colors {default: 1} EXAMPLE fetch 1ake 4ake, async=0 remove chain B colorbyrmsd 1ake, 4ake ''' from chempy import cpv doAlign, doPretty = int(doAlign), int(doPretty) guide, quiet = int(guide), int(quiet) aln, seleboth = '_aln', '_objSelBoth' try: align = cmd.keyword[method][0] except: print(' Error: no such method: ' + str(method)) raise CmdException if guide: mobile = '(%s) and guide' % mobile target = '(%s) and guide' % target try: if doAlign: # superpose align(mobile, target) # get alignment without superposing align(mobile, target, cycles=0, transform=0, object=aln) except: print(' Error: Alignment with method %s failed' % (method)) raise CmdException cmd.select(seleboth, '(%s) or (%s)' % (mobile, target)) idx2coords = dict() cmd.iterate_state(-1, seleboth, 'idx2coords[model,index] = (x,y,z)', space=locals()) if cmd.count_atoms('?' + aln, 1, 1) == 0: # this should ensure that "aln" will be available as selectable object cmd.refresh() b_dict = dict() for col in cmd.get_raw_alignment(aln): assert len(col) == 2 b = cpv.distance(idx2coords[col[0]], idx2coords[col[1]]) for idx in col: b_dict[idx] = b cmd.alter(seleboth, 'b = b_dict.get((model, index), -1)', space=locals()) if doPretty: cmd.orient(seleboth) cmd.show_as('cartoon', 'byobj ' + seleboth) cmd.color('gray', seleboth) cmd.spectrum('b', 'blue_red', seleboth + ' and b > -0.5') if not quiet: print(" ColorByRMSD: Minimum Distance: %.2f" % (min(b_dict.values()))) print(" ColorByRMSD: Maximum Distance: %.2f" % (max(b_dict.values()))) print(" ColorByRMSD: Average Distance: %.2f" % (sum(b_dict.values()) / len(b_dict))) cmd.delete(aln) cmd.delete(seleboth)
def rmsd(selection = "all", chains = "", doAlign = 1, doPretty = 1, \ algorithm = 1, guide = 1, method = "super", quiet = 1, colorstyle = "blue_red", colormode = ""): """ DESCRIPTION Align all structures and show the structural. ARGUMENTS Haves following arguments: selection = "all" chains = "" : like {chains = ab"} doAlign = 0 or 1 : Superpose selections before calculating distances {default: 1} doPretty = 1 guide = 1 algorithm = 0 or 1 : method = "super" quiet = 1 EXAMPLE fetch """ from chempy import cpv #initial parameters doAlign, doPretty = int(doAlign), int(doPretty) guide, quiet = int(guide), int(quiet) algorithm = int(algorithm) #get suitable align method try: align = cmd.keyword[method][0] except: print "Error: no such method:", method raise CmdException #get object and store each atom's coordinate objects = set() idx2coords = dict() cmd.iterate_state(-1, selection, "objects.add(model) ", space=locals()) #store the compared rmsd tree for each objects, like {obj:{obj1:{(model, index):(model1, index1)}}} rmsd_stored = dict() for obj in objects: rmsd_stored[obj] = {} for obj1 in objects: if obj != obj1: if guide: guide = " and guide" else: guide = "" rmsd_stored[obj][obj1] = {} total_values = {} if chains: for eachchain in list(chains): if doAlign: align(obj1 + guide + " and chain " + eachchain, obj + guide + " and chain " + eachchain) align(obj1 + " and chain " + eachchain, obj + " and chain " + eachchain, cycles = 0, transform = 0, object="aln") cmd.iterate_state(-1, selection, "idx2coords[model,index] = (x,y,z)", space=locals()) if cmd.count_atoms('?' + "aln", 1, 1) == 0: # this should ensure that "aln" will be available as selectable object cmd.refresh() for col in cmd.get_raw_alignment("aln"): assert len(col) == 2 b = cpv.distance(idx2coords[col[0]], idx2coords[col[1]]) for idx in col: total_values[idx] = b if col[0][0] == obj: rmsd_stored[obj][obj1][col[0]] = [col[1],b] else: rmsd_stored[obj][obj1][col[1]] = [col[0],b] cmd.delete("aln") else: if doAlign: align(obj1 + guide, obj + guide) align(obj1 + guide, obj + guide, cycles=0, transform=0, object="aln") cmd.iterate_state(-1, selection, "idx2coords[model,index] = (x,y,z)", space=locals()) if cmd.count_atoms('?' + "aln", 1, 1) == 0: # this should ensure that "aln" will be available as selectable object cmd.refresh() for col in cmd.get_raw_alignment("aln"): assert len(col) == 2 b = cpv.distance(idx2coords[col[0]], idx2coords[col[1]]) for idx in col: total_values[idx] = b if col[0][0] == obj: rmsd_stored[obj][obj1][col[0]] = [col[1],b] else: rmsd_stored[obj][obj1][col[1]] = [col[0],b] cmd.delete("aln") if algorithm: def b_replace(model, index): n = 0 bsum = 0 for obj1 in objects: if model != obj1: if (model, index) in rmsd_stored[model][obj1]: nextmodel, nextindex = rmsd_stored[model][obj1][model, index][0] bsum += rmsd_stored[model][obj1][model, index][1] n += 1 for nextobj1 in objects: if nextmodel != nextobj1 and nextmodel != obj1 : if (nextmodel, nextindex) in rmsd_stored[nextmodel][nextobj1]: bsum += rmsd_stored[nextmodel][nextobj1][nextmodel, nextindex][1] n += 1 if n == 0 : return -1 else: return eval("bsum / n") else: def b_replace(model, index): n = 0 bsum = 0 for obj1 in objects: if model != obj1: if (model, index) in rmsd_stored[model][obj1]: bsum += rmsd_stored[model][obj1][model, index][1] n += 1 if n == 0 : return -1 else: return eval("bsum / n") cmd.alter(selection, 'b = b_replace(model, index)', space=locals()) if doPretty: mini = min(total_values.values()) maxi = max(total_values.values()) if colormode: if colormode == "lowshow": maxi = sum(total_values.values()) / len(total_values) print("This is lowshow") elif colormode == "highshow": mini = sum(total_values.values()) / len(total_values) print("This is highshow") else: raise CmdException cmd.orient(selection) cmd.show_as('cartoon', 'byobj ' + selection) cmd.color('gray', selection) cmd.spectrum('b', "blue_red", selection + ' and b > -0.5',minimum = mini, maximum = maxi) if not quiet: print " ColorByRMSD: Minimum Distance: %.2f" % (min(total_values.values())) print " ColorByRMSD: Maximum Distance: %.2f" % (max(total_values.values())) print " ColorByRMSD: Average Distance: %.2f" % (sum(total_values.values()) / len(total_values))
def drawNetwork(path1, path2, sele=None, sele1=None, sele2=None, top1=None, top2=None, r=1, edge_norm=None, alpha=0.5, mutations=False, align_with = None, node_color=(0.6, 0.6, 0.6), edge_color1 = (0, 0, 1), palette="colorblind", edge_color2 = (1, 0, 0), labeling='0', norm_expected=False, threshold=0, topk=None, max_compo=None, mean_vp=None, strong_compo=None, around=None, keep_previous=False, compo_size=None, save_cc=None, load_cc=None, compos_to_excel = None, force_binary_color=False, compo_radius=None, compo_diam=None, label_compo='', auto_patch=True, printall=False, sum=False, n_clusters=None, color_by_compo=False, color_by_group=False, show_top_group=None, name1 = None, name2 = None, name_nodes='nodes', userSelection='all', fromstruct=None, color_by_contact_type=False, standard_and_expected=None): ''' Draws a NetworkX network on the PyMol structure ''' #Initialization of labeling variables and retreieving residue XYZ positions if not keep_previous: cmd.delete('*nodes *edges Component* Group*') cmd.label(selection=userSelection, expression="") cmd.hide("licorice", "?mutations") # Building position -- name correspondance stored.posCA = [] stored.names = [] stored.ss = [] userSelection = userSelection + " and ((n. CA) or n. C)" cmd.iterate_state(1, selector.process(userSelection), "stored.posCA.append([x,y,z])") cmd.iterate(userSelection, "stored.ss.append(ss)") cmd.iterate(userSelection, 'stored.names.append(resn+resi+chain)') stored.labels = list(map(relabel, stored.names)) stored.resid = list(map(selection, stored.names)) node2id = dict(zip(stored.labels, stored.resid)) node2CA = dict(zip(stored.labels, stored.posCA)) #Secondary Structure labels prevSS, prevChain = None, None counters = {'': 0, 'H': 0, 'S': 0, 'L': 0} node2SS = dict(zip(stored.labels, stored.ss)) SS2nodelist = {} putflag = lambda X: 'U' if X in ['', 'L'] else X for label in node2SS: ss = node2SS[label] chain = label[-1] if prevChain != chain: for counter in counters: counters[counter] = 0 if prevSS != ss: counters[ss] +=1 labss = putflag(ss)+str(counters[ss])+':'+chain if labss in SS2nodelist: SS2nodelist[labss].append(label) else: SS2nodelist[labss] = [label] prevSS = ss prevChain = chain prevkey, prevChain = None, None order = [] keys = list(SS2nodelist.keys()) for key in keys: if prevChain != key.split(':')[-1]: prevkey = None if key[0] == 'U': if prevkey == None: newkey = 'Head:'+key.split(':')[-1] else: newkey = 'U'+prevkey SS2nodelist[newkey] = SS2nodelist.pop(key) order.append(newkey) else: order.append(key) prevkey = key prevChain = key.split(':')[-1] prevkey = None final = [] for key in order[::-1]: if prevChain != key.split(':')[-1]: prevkey = None if key[0] == 'U': if prevkey == None: newkey = 'Tail:'+key.split(':')[-1] else: newkey = '{}-{}'.format(key[1:], prevkey) SS2nodelist[newkey] = SS2nodelist.pop(key) final.append(newkey) else: final.append(key) prevkey = key prevChain = key.split(':')[-1] # ss_dict = dict(zip(keys, final[::-1])) mapss = {} for key in final: newkey = key.replace('S', 'β').replace('H', 'α').replace('αead', 'Head') if 'IGPS' in str(label_compo): _ = [] for elt in newkey.split('-'): if elt.split(':')[1] in ['A', 'C', 'E']: _.append('𝘧{}'.format(elt.split(':')[0])) elif elt.split(':')[1] in ['B', 'D', 'F']: _.append('𝘩{}'.format(elt.split(':')[0])) newkey = '-'.join(_) mapss[key] = IGPS_mapping[newkey] else: mapss[key] = newkey for ss in SS2nodelist: for node in SS2nodelist[ss]: node2SS[node] = mapss[ss] #Loading external data atom_mat1, atom_mat2 = list(map(load, [path1, path2])) get_ext = lambda X: X.split('.')[-1] ext1, ext2 = list(map(get_ext, [path1, path2])) top1 = load(path1.split('_')[0].split('.')[0]+'.topy') if top1 == None else load(top1) top2 = load(path2.split('_')[0].split('.')[0]+'.topy') if top2 == None else load(top2) #Handling selections if sele != None: sele1, sele2 = [sele]*2 if sele == None and sele1 == None and sele2 == None: sele1, sele2 = ['protein && not hydrogen']*2 print('Default selection protein without hydrogens') sels = [sele1, sele2] #Creating topology matrices for each selection topg1, topd1 = [create_top(sel, top1, fromstruct) for sel in sels] topg2, topd2 = [create_top(sel, top2, fromstruct) for sel in sels] #From atomic to residual contacts and perturbation network computation mat1 = (atom_mat1 @ topd1).transpose() @ topg1 mat2 = (atom_mat2 @ topd2).transpose() @ topg2 #Apply expected norm if necessary if norm_expected: exp1 = (topd1.sum(axis=1).transpose() @ topd1).transpose() @ (topg1.sum(axis=1).transpose() @ topg1) exp2 = (topd2.sum(axis=1).transpose() @ topd2).transpose() @ (topg2.sum(axis=1).transpose() @ topg2) mat1 = divide_expected(mat1, exp1) mat2 = divide_expected(mat2, exp2) mat1, mat2 = list(map(csr_matrix, [mat1, mat2])) if align_with != None: cmd.align(align_with, userSelection, object='aln') raw_aln = cmd.get_raw_alignment('aln') cmd.hide('cgo', 'aln') order_string = [idx[0] for idx in raw_aln[-1]][::-1] trans_mat = dok_matrix(tuple([cmd.count_atoms(X) for X in order_string])) for idx1, idx2 in raw_aln: trans_mat[idx2[1]-1, idx1[1]-1] = 1 trans_mat = csr_matrix(trans_mat) top_t1, top_t2 = [create_top('name CA', top) for top in [top1, top2]] trans_res = (trans_mat @ top_t1).transpose() @ top_t2 mat2 = trans_res @ (mat2 @ trans_res.transpose()) pertmat = mat2 - mat1 pertmat.setdiag(0) pertmat.eliminate_zeros() net = nx.from_scipy_sparse_matrix(pertmat) #Creating labeling dictionnary if str(next(top1.residues))[-1] == '0': offset = 1 else: offset = 0 chain_names = [chr(ord('A') + i) for i in range(26)] t2o = lambda X: three2one[X] if X in three2one else X[0] get_chain = lambda X: chain_names[(X.chain.index % len(chain_names))] res2str = lambda X: t2o(X.name)+str(X.resSeq+offset)+':'+get_chain(X) id2label = {i: res2str(res) for i, res in enumerate(top1.residues)} # if 'IGPS' in label_compo: # igps_label = {} # for elt in id2label.items(): # if elt.split(':')[1] in ['A', 'C', 'E']: # rerelabel[elt] = '𝘧{}'.format(elt.split(':')[0]) # elif elt.split(':')[1] in ['B', 'D', 'F']: # rerelabel[elt] = '𝘩{}'.format(elt.split(':')[0]) #Relabeling network net = nx.relabel_nodes(net, id2label) label2id = {res2str(res): i for i, res in enumerate(top1.residues)} #Auto_patching network labels if not all(elem in node2CA for elem in net.nodes()): print('PDB structure and topology labeling not matching.') if auto_patch: print('Attempting to auto-patch residue names. (this can be disabled with auto_patch=False)') if len(node2CA.keys()) == len(net.nodes()): remap = dict(zip(net.nodes(), node2CA.keys())) net = nx.relabel_nodes(net, remap) label2id = dict(zip(node2CA.keys(), range(top1.n_residues))) else: print("Auto-patching not working, please try on different PDB file") #Output topK if necessary if type(topk) == int: limit_weight = np.sort([abs(net.edges[(u, v)]['weight']) for u, v in net.edges])[::-1][topk] threshold = limit_weight if type(standard_and_expected) == int: limit_weight = np.sort([abs(net.edges[(u, v)]['weight']) for u, v in net.edges])[::-1][standard_and_expected] relabel_net2 = dict(enumerate(net.nodes())) threshold = limit_weight if max_compo or mean_vp or any(np.array([compo_size, compo_diam, compo_radius, strong_compo])!= None): color_by_compo = True if load_cc != None: cc = np.load(load_cc) else: cc = get_connected_components(pertmat) if save_cc != None: np.save(save_cc, cc) if max_compo: threshold = np.sort(np.abs(pertmat.data))[::-1][np.argmax(cc[::-1])] else: lastmax = np.sort(np.abs(pertmat.data))[::-1][np.argmax(cc[::-1])] print('last maximum: {}'.format(np.round(lastmax, 2))) net.remove_edges_from([(u, v) for u, v in net.edges() if abs(net[u][v]['weight']) < lastmax]) net.remove_nodes_from(list(nx.isolates(net))) components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)] if mean_vp: vanishing_points = [np.max([abs(net[u][v]['weight']) for u, v in c.edges()]) for c in components_list] threshold = np.median(vanishing_points) elif compo_size !=None: robust = [list(c.nodes()) for c in components_list if len(c.edges())>=float(compo_size)] net = net.subgraph([x for robust in list(robust) for x in robust]) threshold = 0 elif compo_diam !=None: robust = [list(c.nodes()) for c in components_list if nx.diameter(c)>=float(compo_diam)] net = net.subgraph([x for robust in list(robust) for x in robust]) threshold = 0 elif compo_radius !=None: robust = [list(c.nodes()) for c in components_list if nx.radius(c)>=float(compo_radius)] net = net.subgraph([x for robust in list(robust) for x in robust]) threshold = 0 elif strong_compo !=None: vanishing_points = [np.max([abs(net[u][v]['weight']) for u, v in c.edges()]) for c in components_list] edges_len = [len(c.edges()) for c in components_list] percentile = float(strong_compo)*len(components_list)/100 vani_ranks = len(vanishing_points)+1-rankdata(vanishing_points, method='max') size_ranks = len(edges_len)+1-rankdata(edges_len, method='max') vani_nodes = [list(c.nodes()) for i, c in enumerate(components_list) if vani_ranks[i]<percentile] size_nodes = [list(c.nodes()) for i, c in enumerate(components_list) if size_ranks[i]<percentile] vani_nodes = [x for vani_nodes in list(vani_nodes) for x in vani_nodes] size_nodes = [x for size_nodes in list(size_nodes) for x in size_nodes] strong = list(set(vani_nodes) & set(size_nodes)) net = net.subgraph(strong) #Detect mutations if mutations: cmd.show_as(representation="cartoon", selection="?mutations") cmd.color(color="grey80", selection="?mutations") cmd.delete("?mutations") mutations_list = [] y = {j: res2str(res) for j, res in enumerate(top2.residues)} for resid in id2label: if resid in y: if id2label[resid] != y[resid]: mutations_list.append((resid, (y[resid][0]+':').join(id2label[resid].split(':')))) cmd.select("mutations", 'resi '+str(id2label[resid].split(':')[0][1:])+ ' and chain '+id2label[resid][-1], merge=1) else: print('Deletion of ', id2label[resid]) print('List of mutations: ', ', '.join([elt[1] for elt in mutations_list])) cmd.show_as(representation="licorice", selection="?mutations") cmd.color(color="magenta", selection="?mutations") #Apply threshold if threshold !=0: print('Applying threshold {}'.format(threshold)) net.remove_edges_from([(u, v) for u, v in net.edges() if abs(net[u][v]['weight']) < threshold]) net.remove_nodes_from(list(nx.isolates(net))) #Induced perturbation network if needed if around !=None: net = net.subgraph(nx.node_connected_component(net, around)) #Setting Pymol parameters cmd.set('auto_zoom', 0) cmd.set("cgo_sphere_quality", 4) if len(net.edges()) == 0: raise ValueError('Computations give empty network') #Norm edges if edge_norm == None: edge_norm = max([net.edges()[(u, v)]['weight'] for u, v in net.edges()])/r elif edge_norm == True: tot_atoms_in_sel = np.sum([np.sum(elt) for elt in [topd1, topd2, topg1, topg2]]) tot_atoms = np.sum([max(elt.shape) for elt in [topd1, topd2, topg1, topg2]]) norm_fact = tot_atoms_in_sel**2/tot_atoms**2 edge_norm = norm_fact*30 print('Global normalization factor: {}'.format(1/norm_fact)) #Function to name edges def name_edges(name, path): if name == None: return '.'.join(basename(path).split('.')[:-1]) return name if type(standard_and_expected) == int: exp1 = (topd1.sum(axis=1).transpose() @ topd1).transpose() @ (topg1.sum(axis=1).transpose() @ topg1) exp2 = (topd2.sum(axis=1).transpose() @ topd2).transpose() @ (topg2.sum(axis=1).transpose() @ topg2) mat1 = divide_expected(mat1, exp1) mat2 = divide_expected(mat2, exp2) mat1, mat2 = list(map(csr_matrix, [mat1, mat2])) net2 = nx.from_scipy_sparse_matrix(mat2-mat1) net2 = nx.relabel_nodes(net2, relabel_net2) limit_weight = np.sort([abs(net2.edges[(u, v)]['weight']) for u, v in net2.edges])[::-1][standard_and_expected] net2.remove_edges_from([(u, v) for u, v in net2.edges() if abs(net2[u][v]['weight']) < limit_weight]) net2.remove_nodes_from(list(nx.isolates(net2))) colors = [(1, 1, 0), (0, 1, 1), (1, 0, 1)] objs_inboth = [] objs_instd = [] objs_inexp = [] nodes = [] for u, v in net.edges(): radius = net[u][v]['weight']/edge_norm if (u, v) in list(net2.edges()): objs_inboth += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[0], *colors[0]] else: objs_instd += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[1], *colors[1]] nodes += [u, v] edge_norm2 = max([net2.edges()[(u, v)]['weight'] for u, v in net2.edges()])/r for u, v in net2.edges(): radius = net2[u][v]['weight']/edge_norm2 if (u, v) not in list(net.edges()): objs_inexp += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[2], *colors[2]] nodes += [u, v] nodelist = set(nodes) objs_nodes = [COLOR, *node_color] for u in nodelist: x, y, z = node2CA[u] objs_nodes += [SPHERE, x, y, z, r] selnodes = ''.join([node2id[u] for u in nodelist])[4:] cmd.load_cgo(objs_inboth, 'in_both_edges') cmd.load_cgo(objs_instd, 'in_std_edges') cmd.load_cgo(objs_inexp, 'in_exp_edges') cmd.load_cgo(objs_nodes, 'nodes') elif color_by_contact_type: expected_matrices = get_expected_type(atom_mat1, atom_mat2, top1, top2, fromstruct) name1, name2 = list(map(name_edges, [name1, name2], [path1, path2])) names = ['{0}_{1}'.format(name1, sel) for sel in ['hydro', 'polar', 'mixed']] + ['{0}_{1}'.format(name2, sel) for sel in ['hydro', 'polar', 'mixed']] nodes_dict = {i: [] for i in range(len(names))} objs_dict = {i: [] for i in range(len(names))} colors = [(1, 0.86, 0.73), (0.68, 0.85, 0.90), (0.60, 0.98, 0.60), (1, 0.86, 0), (0.25, 0.41, 0.88), (0, 0.50, 0)] for u, v in net.edges(): radius = net[u][v]['weight']/edge_norm id_u, id_v = label2id[u], label2id[v] values = list(map(lambda _mat: _mat[id_v, id_u], expected_matrices)) type_of_contact = np.argmax(values) objs_dict[type_of_contact] += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[type_of_contact], *colors[type_of_contact]] nodes_dict[type_of_contact] += [u, v] selnodes = '' for toc in nodes_dict: nodelist = set(nodes_dict[toc]) objs_dict[toc]+=[COLOR, *node_color] for u in nodelist: x, y, z = node2CA[u] objs_dict[toc]+=[SPHERE, x, y, z, r] selnodes += ''.join([node2id[u] for u in nodelist])[4:] for i, name in zip(objs_dict.keys(), names): cmd.load_cgo(objs_dict[i], '{}_edges'.format(name)) #Coloring by components elif color_by_compo: components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)] diameters = [nx.diameter(c) for c in components_list] ranking = np.argsort(diameters)[::-1] colors = sns.color_palette(palette, n_colors=len(components_list)+1) for i, c in enumerate(colors): if c[0] == c[1] == c[2]: print(c) colors.pop(i) break selnodes = '' for i, rank in enumerate(ranking): color, compo = colors[rank], components_list[rank] _obj, nodelist = [], [] for u, v in compo.edges(): radius = net[u][v]['weight']/edge_norm if abs(net[u][v]['weight']) >= threshold: if not force_binary_color: _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *color, *color] else: if net[u][v]['weight'] <= 0: _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1] else: _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2] nodelist += [u, v] # cmd.load_cgo(_obj, 'Component{}_edges'.format(i+1)) _obj+=[COLOR, *node_color] nodelist = set(nodelist) selnodes += ''.join([node2id[u] for u in nodelist])[4:] for u in nodelist: x, y, z = node2CA[u] _obj+=[SPHERE, x, y, z, r] cmd.load_cgo(_obj, 'Component{}'.format(i+1)) #Color by group of relevance elif color_by_group: weights = np.array([abs(net[u][v]['weight']) for u, v in net.edges()]).reshape(-1, 1) birch = Birch(n_clusters=n_clusters).fit(weights) labels = birch.predict(weights) ordered_labels = labels[np.argsort(pertmat.data)] _, idx = np.unique(ordered_labels, return_index=True) mapping = dict(zip(ordered_labels[np.sort(idx)], np.sort(np.unique(ordered_labels)))) i2color = dict(zip(ordered_labels[np.sort(idx)], sns.color_palette(palette, len(np.unique(ordered_labels)))[::-1])) selnodes = '' if show_top_group == None: show_top_group = len(mapping.keys()) for j, i in enumerate(list(mapping.keys())[:show_top_group]): _obj, nodelist = [], [] _net = net.copy() to_remove_edges = [(u, v) for j, (u, v) in enumerate(net.edges()) if labels[j] != i] _net.remove_edges_from(to_remove_edges) _net.remove_nodes_from(list(nx.isolates(_net))) for u, v in _net.edges(): radius = net[u][v]['weight']/edge_norm if abs(net[u][v]['weight']) >= threshold: _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *i2color[j], *i2color[j]] nodelist += [u, v] # cmd.load_cgo(_obj, 'Component{}_edges'.format(i+1)) _obj+=[COLOR, *node_color] nodelist = set(nodelist) selnodes += ''.join([node2id[u] for u in nodelist])[4:] for u in nodelist: x, y, z = node2CA[u] _obj+=[SPHERE, x, y, z, r] cmd.load_cgo(_obj, 'Group{}'.format(j+1)) #Default edge coloring else: obj1, obj2, nodelist = [], [], [] for u, v in net.edges(): radius = net[u][v]['weight']/edge_norm if abs(net[u][v]['weight']) >= threshold: if 'color' in net[u][v]: if net[u][v]['color'] == 'r': obj1+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1] else: obj2+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2] else: if net[u][v]['weight'] <= 0: obj1+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1] else: obj2+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2] nodelist+=[u, v] name1, name2 = map(name_edges, [name1, name2], [path1, path2]) cmd.load_cgo(obj1, name1+'_edges') cmd.load_cgo(obj2, name2+'_edges') #Drawing nodes obj=[COLOR, *node_color] nodelist = set(nodelist) selnodes = ''.join([node2id[u] for u in nodelist])[4:] for u in nodelist: x, y, z = node2CA[u] obj+=[SPHERE, x, y, z, r] cmd.load_cgo(obj, name_nodes) #Creating text for labeling components if label_compo != '' or compos_to_excel !=None: if compos_to_excel != None: rows_list = [] objtxt = [] axes = -np.array(cmd.get_view()[:9]).reshape(3,3) components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)] diameters = [nx.diameter(c) for c in components_list] for i, j in enumerate(np.argsort(diameters)[::-1]): row_dict = {} c = components_list[j] sses = sorted(list(set([node2SS[node] for node in c]))) if compos_to_excel !=None: row_dict['Secondary structure elements'] = ','.join(sses) row_dict['Vanishing point'] = np.max([abs(net[u][v]['weight']) for u, v in c.edges()]) row_dict['Diameter'] = nx.diameter(c) row_dict['Size'] = len(c.edges()) row_dict['Size rank'] = i+1 else: print('Component {}\n'.format(i+1), ', '.join(sses)) print('Size (number of edges) {}'.format(len(c.edges()))) print('Vanishing point: {}'.format(np.max([abs(net[u][v]['weight']) for u, v in c.edges()]))) if 'h' in str(label_compo): methods = ['eigenvector', 'hits_hub', 'hits_authority', 'pagerank', 'betweenness', 'katz'] hubs = [get_hubs(c, method) for method in methods] if compos_to_excel !=None: row_dict.update(dict(zip(methods, hubs))) else: print(dict(zip(methods, hubs))) if 'c' in str(label_compo): pos = np.array(node2CA[next(c.__iter__())]) + (axes[0]) cyl_text(objtxt, plain, pos, 'Component {}'.format(i+1), radius=0.1, color=[0, 0, 0], axes=axes) if compos_to_excel: rows_list.append(row_dict) if compos_to_excel: df = pd.DataFrame(rows_list) df.to_excel(compos_to_excel) if 's' in str(label_compo): for ss in SS2nodelist: nodelist = SS2nodelist[ss] print(mapss[ss], ': ', ('{}--{}'.format(nodelist[0], nodelist[-1]) if len(nodelist)>1 else nodelist[0])) # print(objtxt) cmd.set("cgo_line_radius", 0.03) cmd.load_cgo(objtxt, 'txt') #labeling if labeling==1: cmd.label(selection=selnodes, expression="t2o(resn)+resi") if labeling==3: cmd.label(selection=selnodes, expression="resn+resi") #Summing if sum: print('Sum of contacts lost: ', np.sum(pertmat)) if printall: print([(u,v, net[u][v]) for u, v in net.edges()])
def pca_plot( aln_object, ref="all", state=0, maxlabels=20, size=20, invert="", which=(0, 1), alpha=0.75, filename=None, quiet=1, load_b=0, ): """ DESCRIPTION Principal Component Analysis on a set of superposed conformations, given by an alignment object. By default all states in all objects are considered. Generates a 2d-plot of the first two principal components. USAGE pca_plot aln_object [, ref [, state [, maxlabels ]]] ARGUMENTS aln_object = string: name of alignment object, defines the selection and the atom mapping between objects ref = string: object names for which to calculate PCA for {default: all} state = integer: if state=0 use all states {default: 0} maxlabels = integer: label dots in plot if maxlabels<0 or number of models not more than maxlabels {default: 20} size = float: size of plot points in px^2 {default: 20} invert = string: invert plotting axes x, y or xy {default: ''} which = (int,int): indices of principal components to plot {default: (0,1)} alpha = float: opacity of plotting points filename = string: if given, plot to file {default: None} EXAMPLE fetch 1ake 4ake 1dvr 1ak2, async=0 split_chains extra_fit (*_*) and name CA, reference=1ake_A, cycles=0, object=aln pca_plot aln, 1ake_* 4ake_* fetch 1ubq 2k39, async=0 align 2k39, 1ubq and guide, cycles=0, object=aln2 color blue, 1ubq color orange, 2k39 pca_plot aln2, filename=pca-ubq.pdf """ from numpy import array, dot from numpy.linalg import svd, LinAlgError from . import matplotlib_fix from matplotlib.pyplot import figure state, quiet = int(state), int(quiet) maxlabels = int(maxlabels) if cmd.is_string(which): which = cmd.safe_list_eval(which) if aln_object not in cmd.get_names_of_type("object:"): print(" Warning: first argument should be an alignment object") from .fitting import extra_fit selection = aln_object aln_object = cmd.get_unused_name("aln") extra_fit(selection, cycles=0, transform=0, object=aln_object) if state == 0: states = list(range(1, cmd.count_states() + 1)) elif state < 0: states = [cmd.get_state()] else: states = [state] models = cmd.get_object_list(aln_object) references = set(cmd.get_object_list("(" + ref + ")")).intersection(models) others = set(models).difference(references) aln = cmd.get_raw_alignment(aln_object) if not quiet: print(" PCA References:", ", ".join(references)) print(" PCA Others:", ", ".join(others)) if len(references) == 0: print(" PCA Error: No reference objects") raise CmdException model_count = len(models) coords = dict((model, []) for model in models) aln = [pos for pos in aln if len(pos) == model_count] for state in states: idx2xyz = dict() cmd.iterate_state(state, aln_object, "idx2xyz[model,index] = (x,y,z)", space={"idx2xyz": idx2xyz}) for pos in aln: for idx in pos: if idx not in idx2xyz: continue c = coords[idx[0]] if len(c) < state: c.append([]) c[-1].extend(idx2xyz[idx]) c_iter = lambda models: ((c, model, i + 1) for model in models for (i, c) in enumerate(coords[model])) X = array([i[0] for i in c_iter(references)]) Y = array([i[0] for i in c_iter(others)]) center = X.mean(0) X = X - center try: U, L, V = svd(X) except LinAlgError as e: print(" PCA Error: ", e) raise CmdException if int(load_b): cmd.alter("byobj " + aln_object, "b=-0.01") b_dict = {} i = which[0] b_array = (V[i].reshape((-1, 3)) ** 2).sum(1) ** 0.5 for pos, b in zip(aln, b_array): for idx in pos: b_dict[idx] = b cmd.alter(aln_object, "b=b_dict.get((model,index), -0.01)", space=locals()) cmd.color("yellow", "byobj " + aln_object) cmd.spectrum("b", "blue_red", aln_object + " and b > -0.01") X_labels = [i[1:3] for i in c_iter(references)] Y_labels = [i[1:3] for i in c_iter(others)] x_list = [] y_list = [] colors = [] text_list = [] def plot_pc_2d(X, labels): pca_12 = dot(X, V.T)[:, which] for (x, y), (model, state) in zip(pca_12, labels): x_list.append(x) y_list.append(y) colors.append(get_model_color(model)) if maxlabels < 0 or len(pca_12) <= maxlabels: text_list.append("%s(%d)" % (model, state)) else: text_list.append(None) plot_pc_2d(X, X_labels) if len(Y) > 0: Y = Y - center plot_pc_2d(Y, Y_labels) if "x" in invert: x_list = [-x for x in x_list] if "y" in invert: y_list = [-y for y in y_list] fig = figure() plt = fig.add_subplot(111, xlabel="PC %d" % (which[0] + 1), ylabel="PC %d" % (which[1] + 1)) plt.scatter(x_list, y_list, float(size), colors, linewidths=0, alpha=float(alpha)) for (x, y, text) in zip(x_list, y_list, text_list): if text is not None: plt.text(x, y, text, horizontalalignment="left") _showfigure(fig, filename, quiet)
def rmsd(selection = "all", chains = "", doAlign = 1, doPretty = 1, \ algorithm = 1, guide = 1, method = "super", quiet = 1, colorstyle = "blue_red", colormode = ""): """ DESCRIPTION Align all structures and show the structural. ARGUMENTS Haves following arguments: selection = "all" chains = "" : like {chains = ab"} doAlign = 0 or 1 : Superpose selections before calculating distances {default: 1} doPretty = 1 guide = 1 algorithm = 0 or 1 : method = "super" quiet = 1 EXAMPLE fetch """ from chempy import cpv #initial parameters doAlign, doPretty = int(doAlign), int(doPretty) guide, quiet = int(guide), int(quiet) algorithm = int(algorithm) #get suitable align method try: align = cmd.keyword[method][0] except: print "Error: no such method:", method raise CmdException #get object and store each atom's coordinate objects = set() idx2coords = dict() cmd.iterate_state(-1, selection, "objects.add(model) ", space=locals()) #store the compared rmsd tree for each objects, like {obj:{obj1:{(model, index):(model1, index1)}}} rmsd_stored = dict() for obj in objects: rmsd_stored[obj] = {} for obj1 in objects: if obj != obj1: if guide: guide = " and guide" else: guide = "" rmsd_stored[obj][obj1] = {} total_values = {} if chains: for eachchain in list(chains): if doAlign: align(obj1 + guide + " and chain " + eachchain, obj + guide + " and chain " + eachchain) align(obj1 + " and chain " + eachchain, obj + " and chain " + eachchain, cycles=0, transform=0, object="aln") cmd.iterate_state(-1, selection, "idx2coords[model,index] = (x,y,z)", space=locals()) if cmd.count_atoms('?' + "aln", 1, 1) == 0: # this should ensure that "aln" will be available as selectable object cmd.refresh() for col in cmd.get_raw_alignment("aln"): assert len(col) == 2 b = cpv.distance(idx2coords[col[0]], idx2coords[col[1]]) for idx in col: total_values[idx] = b if col[0][0] == obj: rmsd_stored[obj][obj1][col[0]] = [col[1], b] else: rmsd_stored[obj][obj1][col[1]] = [col[0], b] cmd.delete("aln") else: if doAlign: align(obj1 + guide, obj + guide) align(obj1 + guide, obj + guide, cycles=0, transform=0, object="aln") cmd.iterate_state(-1, selection, "idx2coords[model,index] = (x,y,z)", space=locals()) if cmd.count_atoms('?' + "aln", 1, 1) == 0: # this should ensure that "aln" will be available as selectable object cmd.refresh() for col in cmd.get_raw_alignment("aln"): assert len(col) == 2 b = cpv.distance(idx2coords[col[0]], idx2coords[col[1]]) for idx in col: total_values[idx] = b if col[0][0] == obj: rmsd_stored[obj][obj1][col[0]] = [col[1], b] else: rmsd_stored[obj][obj1][col[1]] = [col[0], b] cmd.delete("aln") if algorithm: def b_replace(model, index): n = 0 bsum = 0 for obj1 in objects: if model != obj1: if (model, index) in rmsd_stored[model][obj1]: nextmodel, nextindex = rmsd_stored[model][obj1][ model, index][0] bsum += rmsd_stored[model][obj1][model, index][1] n += 1 for nextobj1 in objects: if nextmodel != nextobj1 and nextmodel != obj1: if (nextmodel, nextindex ) in rmsd_stored[nextmodel][nextobj1]: bsum += rmsd_stored[nextmodel][nextobj1][ nextmodel, nextindex][1] n += 1 if n == 0: return -1 else: return eval("bsum / n") else: def b_replace(model, index): n = 0 bsum = 0 for obj1 in objects: if model != obj1: if (model, index) in rmsd_stored[model][obj1]: bsum += rmsd_stored[model][obj1][model, index][1] n += 1 if n == 0: return -1 else: return eval("bsum / n") cmd.alter(selection, 'b = b_replace(model, index)', space=locals()) if doPretty: mini = min(total_values.values()) maxi = max(total_values.values()) if colormode: if colormode == "lowshow": maxi = sum(total_values.values()) / len(total_values) print("This is lowshow") elif colormode == "highshow": mini = sum(total_values.values()) / len(total_values) print("This is highshow") else: raise CmdException cmd.orient(selection) cmd.show_as('cartoon', 'byobj ' + selection) cmd.color('gray', selection) cmd.spectrum('b', "blue_red", selection + ' and b > -0.5', minimum=mini, maximum=maxi) if not quiet: print " ColorByRMSD: Minimum Distance: %.2f" % (min( total_values.values())) print " ColorByRMSD: Maximum Distance: %.2f" % (max( total_values.values())) print " ColorByRMSD: Average Distance: %.2f" % ( sum(total_values.values()) / len(total_values))
def pca_plot(aln_object, ref='all', state=0, maxlabels=20, size=20, invert='', which=(0,1), alpha=0.75, filename=None, quiet=1, load_b=0): ''' DESCRIPTION Principal Component Analysis on a set of superposed conformations, given by an alignment object. By default all states in all objects are considered. Generates a 2d-plot of the first two principal components. USAGE pca_plot aln_object [, ref [, state [, maxlabels ]]] ARGUMENTS aln_object = string: name of alignment object, defines the selection and the atom mapping between objects ref = string: object names for which to calculate PCA for {default: all} state = integer: if state=0 use all states {default: 0} maxlabels = integer: label dots in plot if maxlabels<0 or number of models not more than maxlabels {default: 20} size = float: size of plot points in px^2 {default: 20} invert = string: invert plotting axes x, y or xy {default: ''} which = (int,int): indices of principal components to plot {default: (0,1)} alpha = float: opacity of plotting points filename = string: if given, plot to file {default: None} EXAMPLE fetch 1ake 4ake 1dvr 1ak2, async=0 split_chains extra_fit (*_*) and name CA, reference=1ake_A, cycles=0, object=aln pca_plot aln, 1ake_* 4ake_* fetch 1ubq 2k39, async=0 align 2k39, 1ubq and guide, cycles=0, object=aln2 color blue, 1ubq color orange, 2k39 pca_plot aln2, filename=pca-ubq.pdf ''' from numpy import array, dot from numpy.linalg import svd, LinAlgError from . import matplotlib_fix from matplotlib.pyplot import figure state, quiet = int(state), int(quiet) maxlabels = int(maxlabels) if cmd.is_string(which): which = cmd.safe_list_eval(which) if aln_object not in cmd.get_names_of_type('object:alignment'): print(' Warning: first argument should be an alignment object') from .fitting import extra_fit selection = aln_object aln_object = cmd.get_unused_name('aln') extra_fit(selection, cycles=0, transform=0, object=aln_object) if state == 0: states = list(range(1, cmd.count_states()+1)) elif state < 0: states = [cmd.get_state()] else: states = [state] models = cmd.get_object_list(aln_object) references = set(cmd.get_object_list('(' + ref + ')')).intersection(models) others = set(models).difference(references) aln = cmd.get_raw_alignment(aln_object) if not quiet: print(' PCA References:', ', '.join(references)) print(' PCA Others:', ', '.join(others)) if len(references) == 0: print(' PCA Error: No reference objects') raise CmdException model_count = len(models) coords = dict((model, []) for model in models) aln = [pos for pos in aln if len(pos) == model_count] for state in states: idx2xyz = dict() cmd.iterate_state(state, aln_object, 'idx2xyz[model,index] = (x,y,z)', space={'idx2xyz': idx2xyz}) for pos in aln: for idx in pos: if idx not in idx2xyz: continue c = coords[idx[0]] if len(c) < state: c.append([]) c[-1].extend(idx2xyz[idx]) c_iter = lambda models: ((c,model,i+1) for model in models for (i,c) in enumerate(coords[model])) X = array([i[0] for i in c_iter(references)]) Y = array([i[0] for i in c_iter(others)]) center = X.mean(0) X = X - center try: U, L, V = svd(X) except LinAlgError as e: print(' PCA Error: ', e) raise CmdException if int(load_b): cmd.alter('byobj ' + aln_object, 'b=-0.01') b_dict = {} i = which[0] b_array = (V[i].reshape((-1, 3))**2).sum(1)**0.5 for pos, b in zip(aln, b_array): for idx in pos: b_dict[idx] = b cmd.alter(aln_object, 'b=b_dict.get((model,index), -0.01)', space=locals()) cmd.color('yellow', 'byobj ' + aln_object) cmd.spectrum('b', 'blue_red', aln_object + ' and b > -0.01') X_labels = [i[1:3] for i in c_iter(references)] Y_labels = [i[1:3] for i in c_iter(others)] x_list = [] y_list = [] colors = [] text_list = [] def plot_pc_2d(X, labels): pca_12 = dot(X, V.T)[:,which] for (x,y), (model,state) in zip(pca_12, labels): x_list.append(x) y_list.append(y) colors.append(get_model_color(model)) if maxlabels < 0 or len(pca_12) <= maxlabels: text_list.append('%s(%d)' % (model, state)) else: text_list.append(None) plot_pc_2d(X, X_labels) if len(Y) > 0: Y = Y - center plot_pc_2d(Y, Y_labels) if 'x' in invert: x_list = [-x for x in x_list] if 'y' in invert: y_list = [-y for y in y_list] fig = figure() plt = fig.add_subplot(111, xlabel='PC %d' % (which[0]+1), ylabel='PC %d' % (which[1]+1)) plt.scatter(x_list, y_list, float(size), colors, linewidths=0, alpha=float(alpha)) for (x, y, text) in zip(x_list, y_list, text_list): if text is not None: plt.text(x, y, text, horizontalalignment='left') _showfigure(fig, filename, quiet)
def morpheasy(source, target, source_state=0, target_state=0, name=None, refinement=5, quiet=1): ''' DESCRIPTION Morph source to target, based on sequence alignment USAGE morpheasy source, target [, source_state [, target_state [, name ]]] EXAMPLE fetch 1akeA 4akeA, async=0 extra_fit morpheasy 1akeA, 4akeA ''' try: from epymol import rigimol except ImportError: print('No epymol available, please use a "Incentive PyMOL" build') print('You may use "morpheasy_linear" instead') return from .editing import mse2met from .querying import get_selection_state # arguments source_state = int(source_state) target_state = int(target_state) refinement = int(refinement) quiet = int(quiet) if source_state < 1: source_state = get_selection_state(source) if target_state < 1: target_state = get_selection_state(target) # temporary objects # IMPORTANT: cmd.get_raw_alignment does not work with underscore object names! alnobj = cmd.get_unused_name('_aln') so_obj = cmd.get_unused_name('source') # see above ta_obj = cmd.get_unused_name('target') # see above so_sel = cmd.get_unused_name('_source_sel') ta_sel = cmd.get_unused_name('_target_sel') cmd.create(so_obj, source, source_state, 1) cmd.create(ta_obj, target, target_state, 1) mse2met(so_obj) mse2met(ta_obj) # align sequence cmd.align(ta_obj, so_obj, object=alnobj, cycles=0, transform=0, mobile_state=1, target_state=1) cmd.refresh() cmd.select(so_sel, '%s and %s' % (so_obj, alnobj)) cmd.select(ta_sel, '%s and %s' % (ta_obj, alnobj)) alnmap = dict(cmd.get_raw_alignment(alnobj)) alnmap.update(dict((v, k) for (k, v) in alnmap.items())) # copy source atom identifiers to temporary target idmap = dict() cmd.iterate(so_sel, 'idmap[model,index] = (segi,chain,resi,resn,name)', space={'idmap': idmap}) cmd.alter(ta_sel, '(segi,chain,resi,resn,name) = idmap[alnmap[model,index]]', space={ 'idmap': idmap, 'alnmap': alnmap }) # remove unaligned cmd.remove('%s and not %s' % (so_obj, so_sel)) cmd.remove('%s and not %s' % (ta_obj, ta_sel)) assert cmd.count_atoms(so_obj) == cmd.count_atoms(ta_obj) cmd.sort(so_obj) cmd.sort(ta_obj) # append target to source as 2-state morph-in object cmd.create(so_obj, ta_obj, 1, 2) # morph if name is None: name = cmd.get_unused_name('morph') rigimol.morph(so_obj, name, refinement=refinement, async=0) # clean up for obj in [alnobj, so_obj, so_sel, ta_obj, ta_sel]: cmd.delete(obj) return name