def _seqalign_pickmatches(self, alignment, window=0): """Find the indexes of aligned residues. Returns a list of lists with the indexes: [match index, sequence a index, sequence b index] """ # Find sequence alignment matches -- "|" or "*" # Return the alignment index, the source index, and target index # of each match. matches = alignment.matches() aligned = [] for i, ia, ib, im, a, b in zip(itertools.count(0), alignment.i_seqs_a, alignment.i_seqs_b, matches, alignment.a, alignment.b): # print i, ia, ib, im, a, b left = i - window if left < 0: left = 0 right = i + window + 1 if right > len(matches): right = len(matches) w = matches[left:right] append = all((i == '*' or i == '|') for i in w) if append: # print "window: %s -- left, right: %s %s -- i: %s -- append? %s"%(w, left, right, i, append) aligned.append((i, ia, ib)) return aligned
def _seqalign_score(self, alignment): matches = alignment.matches() total = len(alignment.a) - alignment.a.count("-") equal = matches.count("|") similar = matches.count("*") score = 100. * (equal + similar) / max(1, total) return score
def _seqalign_score(self, alignment): matches = alignment.matches() total = len(alignment.a) - alignment.a.count("-") equal = matches.count("|") similar = matches.count("*") score = 100.*(equal+similar) / max(1,total) return score
def _seqalign_pickmatches(self, alignment, window=0): """Find the indexes of aligned residues. Returns a list of lists with the indexes: [match index, sequence a index, sequence b index] """ # Find sequence alignment matches -- "|" or "*" # Return the alignment index, the source index, and target index # of each match. matches = alignment.matches() aligned = [] for i, ia, ib, im, a, b in zip( itertools.count(0), alignment.i_seqs_a, alignment.i_seqs_b, matches, alignment.a, alignment.b): # print i, ia, ib, im, a, b left = i-window if left < 0: left = 0 right = i+window+1 if right > len(matches): right = len(matches) w = matches[left:right] append = all((i == '*' or i == '|') for i in w) if append: # print "window: %s -- left, right: %s %s -- i: %s -- append? %s"%(w, left, right, i, append) aligned.append((i, ia, ib)) return aligned
def _print_seqalign(self, alignment, quiet=False): """Print a sequence alignment details.""" matches = alignment.matches() self.log("Alignment details:") self.log("\tmatches after alignment: %s" % (matches.count("|") + matches.count("*"))) self.log("\tsequence alignment:") # Since this prints directly, check if quiet. if not (quiet or self._quiet): # Change the labels to target.desc alignment.pretty_print(matches=matches, block_size=50, n_block=1, top_name="moving", bottom_name="fixed")
def _print_seqalign(self, alignment, quiet=False): """Print a sequence alignment details.""" matches = alignment.matches() self.log("Alignment details:") self.log("\tmatches after alignment: %s"%(matches.count("|") + matches.count("*"))) self.log("\tsequence alignment:") # Since this prints directly, check if quiet. if not (quiet or self._quiet): # Change the labels to target.desc alignment.pretty_print( matches = matches, block_size = 50, n_block = 1, top_name = "moving", bottom_name = "fixed")
def align_chains_rigid(mov_chain, ref_chain): """Takes two chains and aligns them - return rt_mx""" mov_seq, mov_sites, mov_flags = extract_sites_for_alignment(mov_chain) ref_seq, ref_sites, ref_flags = extract_sites_for_alignment(ref_chain) align_obj = mmtbx.alignment.align( seq_a=ref_seq, seq_b=mov_seq, gap_opening_penalty = 20, gap_extension_penalty = 2, similarity_function = 'blosum50', style = 'local') # Extract the alignment alignment = align_obj.extract_alignment() # List of matches - '|' for exact match, '*' for good match matches = alignment.matches() equal = matches.count("|") similar = matches.count("*") total = len(alignment.a) - alignment.a.count("-") alignment.pretty_print( matches=matches, block_size=50, n_block=1, top_name="fixed", bottom_name="moving") # Create list of selected sites ref_sites_sel = flex.vec3_double() mov_sites_sel = flex.vec3_double() for ia,ib,m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if (m not in ["|", "*"]): continue # Check that the sites are flagged to be used if (ref_flags[ia] and mov_flags[ib]): # Append sites to list to align ref_sites_sel.append(ref_sites[ia]) mov_sites_sel.append(mov_sites[ib]) if (ref_sites_sel.size() == 0): raise Exception("No matching C-alpha atoms.") lsq_rt = superpose.least_squares_fit(reference_sites=ref_sites_sel, other_sites=mov_sites_sel).rt() return lsq_rt, mov_sites_sel, ref_sites_sel
def __init__(self, pdb_hierarchy, crystal_symmetry, angular_difference_threshold_deg=5., sequence_identity_threshold=90., quiet=False): h = pdb_hierarchy superposition_threshold = 2 * sequence_identity_threshold - 100. n_atoms_all = h.atoms_size() s_str = "altloc ' ' and (protein or nucleotide)" h = h.select(h.atom_selection_cache().selection(s_str)) h1 = iotbx.pdb.hierarchy.root() h1.append_model(h.models()[0].detached_copy()) unit_cell = crystal_symmetry.unit_cell() result = {} if not quiet: print("Find groups of chains related by translational NCS") # double loop over chains to find matching pairs related by pure translation for c1 in h1.chains(): c1.parent().remove_chain(c1) nchains = len(h1.models()[0].chains()) if ([c1.is_protein(), c1.is_na()].count(True) == 0): continue r1 = list(c1.residues()) c1_seq = "".join(c1.as_sequence()) sc_1_tmp = c1.atoms().extract_xyz() h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry) for (ii, c2) in enumerate(h1_p1.chains()): orig_c2 = h1.models()[0].chains()[ii % nchains] r2 = list(c2.residues()) c2_seq = "".join(c2.as_sequence()) sites_cart_1, sites_cart_2 = None, None sc_2_tmp = c2.atoms().extract_xyz() # chains are identical if (c1_seq == c2_seq and sc_1_tmp.size() == sc_2_tmp.size()): sites_cart_1 = sc_1_tmp sites_cart_2 = sc_2_tmp p_identity = 100. # chains are not identical, do alignment else: align_obj = mmtbx.alignment.align(seq_a=c1_seq, seq_b=c2_seq) alignment = align_obj.extract_alignment() matches = alignment.matches() equal = matches.count("|") total = len(alignment.a) - alignment.a.count("-") p_identity = 100. * equal / max(1, total) if (p_identity > superposition_threshold): sites_cart_1 = flex.vec3_double() sites_cart_2 = flex.vec3_double() for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if (i1 is not None and i2 is not None and match == "|"): r1i, r2i = r1[i1], r2[i2] assert r1i.resname == r2i.resname, [ r1i.resname, r2i.resname, i1, i2 ] for a1 in r1i.atoms(): for a2 in r2i.atoms(): if (a1.name == a2.name): sites_cart_1.append(a1.xyz) sites_cart_2.append(a2.xyz) break # superpose two sequence-aligned chains if ([sites_cart_1, sites_cart_2].count(None) == 0): lsq_fit_obj = superpose.least_squares_fit( reference_sites=sites_cart_1, other_sites=sites_cart_2) angle = lsq_fit_obj.r.rotation_angle() t_frac = unit_cell.fractionalize( (sites_cart_1 - sites_cart_2).mean()) t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1] radius = flex.sum( flex.sqrt((sites_cart_1 - sites_cart_1.mean() ).dot())) / sites_cart_1.size() * 4. / 3. fracscat = min(c1.atoms_size(), c2.atoms_size()) / n_atoms_all result.setdefault(frozenset([c1, orig_c2]), []).append([ p_identity, [lsq_fit_obj.r, t_frac, angle, radius, fracscat] ]) else: result.setdefault(frozenset([c1, orig_c2]), []).append([p_identity, None]) # Build graph g = graph.adjacency_list() vertex_handle = {} for key in result: seqid = result[key][0][0] sup = min(result[key], key=lambda s: 0 if s[1] is None else s[1][2])[1] result[key] = [seqid, sup] if ((seqid > sequence_identity_threshold) and (sup[2] < angular_difference_threshold_deg)): (c1, c2) = key if (c1 not in vertex_handle): vertex_handle[c1] = g.add_vertex(label=c1) if (c2 not in vertex_handle): vertex_handle[c2] = g.add_vertex(label=c2) g.add_edge(vertex1=vertex_handle[c1], vertex2=vertex_handle[c2]) # Do connected component analysis and compose final tNCS pairs object components = connected_component_algorithm.connected_components(g) import itertools self.ncs_pairs = [] self.tncsresults = [0, "", [], 0.0] for (i, group) in enumerate(components): chains = [g.vertex_label(vertex=v) for v in group] fracscats = [] radii = [] for pair in itertools.combinations(chains, 2): sup = result[frozenset(pair)][1] fracscats.append(sup[-1]) radii.append(sup[-2]) fs = sum(fracscats) / len(fracscats) self.tncsresults[3] = fs # store fracscat in array rad = sum(radii) / len(radii) #import code, traceback; code.interact(local=locals(), banner="".join( traceback.format_stack(limit=10) ) ) maxorder = 1 vectors = [] previous_id = next(itertools.combinations(chains, 2))[0].id for pair in itertools.combinations(chains, 2): sup = result[frozenset(pair)][1] ncs_pair = ext.pair( r=sup[0], t=sup[1], radius=rad, radius_estimate=rad, fracscat=fs, rho_mn=flex.double( ), # rho_mn undefined, needs to be set later id=i) self.ncs_pairs.append(ncs_pair) # show tNCS pairs in group fmt = "group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f" t = ",".join([("%6.3f" % t_).strip() for t_ in sup[1]]).strip() if not quiet: print(fmt % (i, pair[0].id, pair[1].id, sup[2], t, fs)) if pair[0].id == previous_id: maxorder += 1 orthoxyz = unit_cell.orthogonalize(sup[1]) vectors.append((sup[1], orthoxyz, sup[2])) else: previous_id = pair[0].id maxorder = 1 vectors = [] if maxorder > self.tncsresults[0]: self.tncsresults[0] = maxorder self.tncsresults[1] = previous_id self.tncsresults[2] = vectors if not quiet: print("Largest TNCS order, peptide chain, fracvector, orthvector, angle, fracscat = ", \ str(self.tncsresults))
def __init__(self, pdb_hierarchy, crystal_symmetry, angular_difference_threshold_deg=10., sequence_identity_threshold=90.): h = pdb_hierarchy n_atoms_all = h.atoms_size() s_str = "altloc ' ' and (protein or nucleotide)" h = h.select(h.atom_selection_cache().selection(s_str)) h1 = h.deep_copy() unit_cell = crystal_symmetry.unit_cell() result = [] # double loop over chains to find matching pairs related by pure translation for c1 in h1.chains(): c1.parent().remove_chain(c1) if([c1.is_protein(), c1.is_na()].count(True)==0): continue r1 = list(c1.residues()) c1_seq = "".join(c1.as_sequence()) sc_1_tmp = c1.atoms().extract_xyz() h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry) for c2 in h1_p1.chains(): r2 = list(c2.residues()) c2_seq = "".join(c2.as_sequence()) sites_cart_1, sites_cart_2 = None,None sc_2_tmp = c2.atoms().extract_xyz() # chains are identical if(c1_seq==c2_seq and sc_1_tmp.size()==sc_2_tmp.size()): sites_cart_1 = sc_1_tmp sites_cart_2 = sc_2_tmp # chains are not identical, do alignment else: align_obj = mmtbx.alignment.align(seq_a = c1_seq, seq_b = c2_seq) alignment = align_obj.extract_alignment() matches = alignment.matches() equal = matches.count("|") total = len(alignment.a) - alignment.a.count("-") p_identity = 100.*equal/max(1,total) if(p_identity>sequence_identity_threshold): sites_cart_1 = flex.vec3_double() sites_cart_2 = flex.vec3_double() for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if(i1 is not None and i2 is not None and match=="|"): r1i, r2i = r1[i1], r2[i2] assert r1i.resname==r2i.resname, [r1i.resname,r2i.resname,i1,i2] for a1 in r1i.atoms(): for a2 in r2i.atoms(): if(a1.name == a2.name): sites_cart_1.append(a1.xyz) sites_cart_2.append(a2.xyz) break # superpose two sequence-aligned chains if([sites_cart_1,sites_cart_2].count(None)==0): lsq_fit_obj = superpose.least_squares_fit( reference_sites = sites_cart_1, other_sites = sites_cart_2) angle = lsq_fit_obj.r.rotation_angle() if(angle < angular_difference_threshold_deg): t_frac = unit_cell.fractionalize((sites_cart_1-sites_cart_2).mean()) t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1] radius = flex.sum(flex.sqrt((sites_cart_1- sites_cart_1.mean()).dot()))/sites_cart_1.size()*4./3. fracscat = c1.atoms_size()/n_atoms_all result.append([lsq_fit_obj.r, t_frac, angle, radius, fracscat]) # show tNCS group fmt="chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f" t = ",".join([("%6.3f"%t_).strip() for t_ in t_frac]).strip() print fmt%(c1.id, c2.id, angle, t, fracscat) # compose final tNCS pairs object self.ncs_pairs = [] for _ in result: r, t, angle, rad, fs = _ ncs_pair = ext.pair( r = r, t = t, radius=rad, radius_estimate=rad, fracscat=fs, rho_mn=flex.double()) # rho_mn undefined, needs to be set later self.ncs_pairs.append(ncs_pair)
def run(args, command_name="mmtbx.super"): if (len(args) == 0): print "usage: %s fixed.pdb moving.pdb [parameter=value ...]" % command_name return print "#" print "# ", command_name print "#" print "# A lightweight sequence-based structure superposition tool." print "#" print "#" phil_objects = [] argument_interpreter = master_params.command_line_argument_interpreter( home_scope="super") fixed_pdb_file_name = None moving_pdb_file_name = None for arg in args: if (os.path.isfile(arg)): if (fixed_pdb_file_name is None): fixed_pdb_file_name = arg elif (moving_pdb_file_name is None): moving_pdb_file_name = arg else: raise Sorry("Too many file names.") else: try: command_line_params = argument_interpreter.process(arg=arg) except KeyboardInterrupt: raise except Exception: raise Sorry("Unknown file or keyword: %s" % arg) else: phil_objects.append(command_line_params) working_params = master_params.fetch(sources=phil_objects) params = working_params.extract() def raise_missing(what): raise Sorry("""\ Missing file name for %(what)s structure: Please add %(what)s=file_name to the command line to specify the %(what)s structure.""" % vars()) if (fixed_pdb_file_name is None): if (params.super.fixed is None): raise_missing("fixed") else: params.super.fixed = fixed_pdb_file_name if (moving_pdb_file_name is None): if (params.super.moving is None): raise_missing("moving") else: params.super.moving = moving_pdb_file_name print "#Parameters used:" print "#phil __ON__" print working_params = master_params.format(python_object=params) working_params.show() print print "#phil __OFF__" print print "Reading fixed structure:", params.super.fixed fixed_pdb = iotbx.pdb.input(file_name=params.super.fixed) print print "Reading moving structure:", params.super.moving moving_pdb = iotbx.pdb.input(file_name=params.super.moving) print fixed_seq, fixed_sites, fixed_site_flags = extract_sequence_and_sites( pdb_input=fixed_pdb) moving_seq, moving_sites, moving_site_flags = extract_sequence_and_sites( pdb_input=moving_pdb) print "Computing sequence alignment..." align_obj = mmtbx.alignment.align( seq_a=fixed_seq, seq_b=moving_seq, gap_opening_penalty=params.super.gap_opening_penalty, gap_extension_penalty=params.super.gap_extension_penalty, similarity_function=params.super.similarity_matrix, style=params.super.alignment_style) print "done." print alignment = align_obj.extract_alignment() matches = alignment.matches() equal = matches.count("|") similar = matches.count("*") total = len(alignment.a) - alignment.a.count("-") alignment.pretty_print( matches=matches, block_size=50, n_block=1, top_name="fixed", bottom_name="moving", comment="""\ The alignment used in the superposition is shown below. The sequence identity (fraction of | symbols) is %4.1f%% of the aligned length of the fixed molecule sequence. The sequence similarity (fraction of | and * symbols) is %4.1f%% of the aligned length of the fixed molecule sequence. """ % (100.*equal/max(1,total), 100.*(equal+similar)/max(1,total))) fixed_sites_sel = flex.vec3_double() moving_sites_sel = flex.vec3_double() for ia,ib,m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if (m not in ["|", "*"]): continue if (fixed_site_flags[ia] and moving_site_flags[ib]): fixed_sites_sel.append(fixed_sites[ia]) moving_sites_sel.append(moving_sites[ib]) print "Performing least-squares superposition of C-alpha atom pairs:" print " Number of C-alpha atoms pairs in matching residues" print " indicated by | or * above:", fixed_sites_sel.size() if (fixed_sites_sel.size() == 0): raise Sorry("No matching C-alpha atoms.") lsq_fit = superpose.least_squares_fit( reference_sites=fixed_sites_sel, other_sites=moving_sites_sel) rmsd = fixed_sites_sel.rms_difference(lsq_fit.other_sites_best_fit()) print " RMSD between the aligned C-alpha atoms: %.3f" % rmsd print print "Writing moved pdb to file: %s" % params.super.moved pdb_hierarchy = moving_pdb.construct_hierarchy() for atom in pdb_hierarchy.atoms(): atom.xyz = lsq_fit.r * matrix.col(atom.xyz) + lsq_fit.t pdb_hierarchy.write_pdb_file(file_name=params.super.moved, append_end=True) print
def run(args, command_name="mmtbx.super"): if len(args) == 0: print "usage: %s fixed.pdb moving.pdb [parameter=value ...]" % command_name return print "#" print "# ", command_name print "#" print "# A lightweight sequence-based structure superposition tool." print "#" print "#" phil_objects = [] argument_interpreter = master_params.command_line_argument_interpreter(home_scope="super") fixed_pdb_file_name = None moving_pdb_file_name = None for arg in args: if os.path.isfile(arg): if fixed_pdb_file_name is None: fixed_pdb_file_name = arg elif moving_pdb_file_name is None: moving_pdb_file_name = arg else: raise Sorry("Too many file names.") else: try: command_line_params = argument_interpreter.process(arg=arg) except KeyboardInterrupt: raise except Exception: raise Sorry("Unknown file or keyword: %s" % arg) else: phil_objects.append(command_line_params) working_params = master_params.fetch(sources=phil_objects) params = working_params.extract() def raise_missing(what): raise Sorry( """\ Missing file name for %(what)s structure: Please add %(what)s=file_name to the command line to specify the %(what)s structure.""" % vars() ) if fixed_pdb_file_name is None: if params.super.fixed is None: raise_missing("fixed") else: params.super.fixed = fixed_pdb_file_name if moving_pdb_file_name is None: if params.super.moving is None: raise_missing("moving") else: params.super.moving = moving_pdb_file_name print "#Parameters used:" print "#phil __ON__" print working_params = master_params.format(python_object=params) working_params.show() print print "#phil __OFF__" print print "Reading fixed structure:", params.super.fixed fixed_pdb = iotbx.pdb.input(file_name=params.super.fixed) print print "Reading moving structure:", params.super.moving moving_pdb = iotbx.pdb.input(file_name=params.super.moving) print fixed_seq, fixed_sites, fixed_site_flags = extract_sequence_and_sites(pdb_input=fixed_pdb) moving_seq, moving_sites, moving_site_flags = extract_sequence_and_sites(pdb_input=moving_pdb) print "Computing sequence alignment..." align_obj = mmtbx.alignment.align( seq_a=fixed_seq, seq_b=moving_seq, gap_opening_penalty=params.super.gap_opening_penalty, gap_extension_penalty=params.super.gap_extension_penalty, similarity_function=params.super.similarity_matrix, style=params.super.alignment_style, ) print "done." print alignment = align_obj.extract_alignment() matches = alignment.matches() equal = matches.count("|") similar = matches.count("*") total = len(alignment.a) - alignment.a.count("-") alignment.pretty_print( matches=matches, block_size=50, n_block=1, top_name="fixed", bottom_name="moving", comment="""\ The alignment used in the superposition is shown below. The sequence identity (fraction of | symbols) is %4.1f%% of the aligned length of the fixed molecule sequence. The sequence similarity (fraction of | and * symbols) is %4.1f%% of the aligned length of the fixed molecule sequence. """ % (100.0 * equal / max(1, total), 100.0 * (equal + similar) / max(1, total)), ) fixed_sites_sel = flex.vec3_double() moving_sites_sel = flex.vec3_double() for ia, ib, m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if m not in ["|", "*"]: continue if fixed_site_flags[ia] and moving_site_flags[ib]: fixed_sites_sel.append(fixed_sites[ia]) moving_sites_sel.append(moving_sites[ib]) print "Performing least-squares superposition of C-alpha atom pairs:" print " Number of C-alpha atoms pairs in matching residues" print " indicated by | or * above:", fixed_sites_sel.size() if fixed_sites_sel.size() == 0: raise Sorry("No matching C-alpha atoms.") lsq_fit = superpose.least_squares_fit(reference_sites=fixed_sites_sel, other_sites=moving_sites_sel) rmsd = fixed_sites_sel.rms_difference(lsq_fit.other_sites_best_fit()) print " RMSD between the aligned C-alpha atoms: %.3f" % rmsd print print "Writing moved pdb to file: %s" % params.super.moved pdb_hierarchy = moving_pdb.construct_hierarchy() for atom in pdb_hierarchy.atoms(): atom.xyz = lsq_fit.r * matrix.col(atom.xyz) + lsq_fit.t pdb_hierarchy.write_pdb_file(file_name=params.super.moved, append_end=True) print
def __init__(self, pdb_hierarchy, crystal_symmetry, angular_difference_threshold_deg=5., sequence_identity_threshold=90.): h = pdb_hierarchy superposition_threshold = 2*sequence_identity_threshold - 100. n_atoms_all = h.atoms_size() s_str = "altloc ' ' and (protein or nucleotide)" h = h.select(h.atom_selection_cache().selection(s_str)) h1 = iotbx.pdb.hierarchy.root() h1.append_model(h.models()[0].detached_copy()) unit_cell = crystal_symmetry.unit_cell() result = {} print "Find groups of chains related by translational NCS" # double loop over chains to find matching pairs related by pure translation for c1 in h1.chains(): c1.parent().remove_chain(c1) nchains = len(h1.models()[0].chains()) if([c1.is_protein(), c1.is_na()].count(True)==0): continue r1 = list(c1.residues()) c1_seq = "".join(c1.as_sequence()) sc_1_tmp = c1.atoms().extract_xyz() h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry) for (ii,c2) in enumerate(h1_p1.chains()): orig_c2 = h1.models()[0].chains()[ii%nchains] r2 = list(c2.residues()) c2_seq = "".join(c2.as_sequence()) sites_cart_1, sites_cart_2 = None,None sc_2_tmp = c2.atoms().extract_xyz() # chains are identical if(c1_seq==c2_seq and sc_1_tmp.size()==sc_2_tmp.size()): sites_cart_1 = sc_1_tmp sites_cart_2 = sc_2_tmp p_identity = 100. # chains are not identical, do alignment else: align_obj = mmtbx.alignment.align(seq_a = c1_seq, seq_b = c2_seq) alignment = align_obj.extract_alignment() matches = alignment.matches() equal = matches.count("|") total = len(alignment.a) - alignment.a.count("-") p_identity = 100.*equal/max(1,total) if(p_identity>superposition_threshold): sites_cart_1 = flex.vec3_double() sites_cart_2 = flex.vec3_double() for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if(i1 is not None and i2 is not None and match=="|"): r1i, r2i = r1[i1], r2[i2] assert r1i.resname==r2i.resname, [r1i.resname,r2i.resname,i1,i2] for a1 in r1i.atoms(): for a2 in r2i.atoms(): if(a1.name == a2.name): sites_cart_1.append(a1.xyz) sites_cart_2.append(a2.xyz) break # superpose two sequence-aligned chains if([sites_cart_1,sites_cart_2].count(None)==0): lsq_fit_obj = superpose.least_squares_fit( reference_sites = sites_cart_1, other_sites = sites_cart_2) angle = lsq_fit_obj.r.rotation_angle() t_frac = unit_cell.fractionalize((sites_cart_1-sites_cart_2).mean()) t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1] radius = flex.sum(flex.sqrt((sites_cart_1- sites_cart_1.mean()).dot()))/sites_cart_1.size()*4./3. fracscat = min(c1.atoms_size(),c2.atoms_size())/n_atoms_all result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,[lsq_fit_obj.r, t_frac, angle, radius, fracscat]] ) else: result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,None] ) # Build graph g = graph.adjacency_list() vertex_handle = {} for key in result: seqid = result[key][0][0] sup = min( result[key],key=lambda s:0 if s[1] is None else s[1][2])[1] result[key] = [seqid,sup] if ((seqid > sequence_identity_threshold) and (sup[2] < angular_difference_threshold_deg)): (c1,c2) = key if (c1 not in vertex_handle): vertex_handle[c1] = g.add_vertex(label=c1) if (c2 not in vertex_handle): vertex_handle[c2] = g.add_vertex(label=c2) g.add_edge(vertex1=vertex_handle[c1],vertex2=vertex_handle[c2]) # Do connected component analysis and compose final tNCS pairs object components = connected_component_algorithm.connected_components(g) import itertools self.ncs_pairs = [] for (i,group) in enumerate(components): chains = [g.vertex_label(vertex=v) for v in group] fracscats = [] radii = [] for pair in itertools.combinations(chains,2): sup = result[frozenset(pair)][1] fracscats.append(sup[-1]) radii.append(sup[-2]) fs = sum(fracscats)/len(fracscats) rad = sum(radii)/len(radii) for pair in itertools.combinations(chains,2): sup = result[frozenset(pair)][1] ncs_pair = ext.pair( r = sup[0], t = sup[1], radius = rad, radius_estimate = rad, fracscat = fs, rho_mn = flex.double(), # rho_mn undefined, needs to be set later id = i) self.ncs_pairs.append(ncs_pair) # show tNCS pairs in group fmt="group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f" t = ",".join([("%6.3f"%t_).strip() for t_ in sup[1]]).strip() print fmt%(i, pair[0].id, pair[1].id, sup[2], t, fs)