def test_13_3d_modelling_centroid(self): #model with no optimisation """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and ONLY != '13': return if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) models = exp.model_region(51, 71, n_models=40, n_keep=25, n_cpus=4, config={ 'kforce': 5, 'maxdist': 500, 'scale': 0.01, 'upfreq': 1.0, 'lowfreq': -0.6 }) models.save_models('models.pick') avg = models.average_model() nmd = len(models) dev = rmsdRMSD_wrapper([models[m]['x'] for m in xrange(nmd)] + [avg['x']], [models[m]['y'] for m in xrange(nmd)] + [avg['y']], [models[m]['z'] for m in xrange(nmd)] + [avg['z']], models._zeros, models.nloci, 200, range(len(models) + 1), len(models) + 1, int(False), 'rmsd', 0) centroid = models[models.centroid_model()] # find closest model = min([(k, dev[(k, nmd)]) for k in range(nmd)], key=lambda x: x[1])[0] self.assertEqual(centroid['rand_init'], models[model]['rand_init']) if CHKTIME: print '13', time() - t0
def calc_consistency(models, nloci, dcutoff=200): combines = list(combinations(models, 2)) parts = [0 for _ in xrange(nloci)] for md1, md2 in combines: md1s = models[md1] md2s = models[md2] for i, p in enumerate( rmsdRMSD_wrapper(md1s['x'], md1s['y'], md1s['z'], md2s['x'], md2s['y'], md2s['z'], nloci, dcutoff, 1)): parts[i] += p return [float(p) / len(combines) * 100 for p in parts]
def calc_eqv_rmsd(models, beg, end, zeros, dcutoff=200, one=False, what='score', normed=True): """ Calculates the RMSD, dRMSD, the number of equivalent positions and a score combining these three measures. The measure are done between a group of models in a one against all manner. :param beg: start particle number of the region to compare :param end: end particle number of the region to compare :param zeros: list of True/False representing particles to skip :param 200 dcutoff: distance in nanometer from which it is considered that two particles are separated. :param 0.75 fact: Factor for equivalent positions :param False one: if True assumes that only two models are passed, and returns the rmsd of their comparison :param 'score' what: values to return. Can be one of 'score', 'rmsd', 'drmsd' or 'eqv' :param True normed: normalize result by maximum value (only applies to rmsd and drmsd) :returns: a score of each pairwise comparison according to: .. math:: score_i = eqvs_i \\times \\frac{dRMSD_i / max(dRMSD)} {RMSD_i / max(RMSD)} where :math:`eqvs_i` is the number of equivalent position for the ith pairwise model comparison. """ what = what.lower() if not what in ['score', 'rmsd', 'drmsd', 'eqv']: raise NotImplementedError("Only 'score', 'rmsd', 'drmsd' or 'eqv' " + "features are available\n") # remove particles with zeros from calculation x = [] y = [] z = [] for m in range(len(models)): x.append([models[m]['x'][i] for i in range(beg, end) if zeros[i]]) y.append([models[m]['y'][i] for i in range(beg, end) if zeros[i]]) z.append([models[m]['z'][i] for i in range(beg, end) if zeros[i]]) zeros = tuple([True for _ in range(len(x[0]))]) scores = rmsdRMSD_wrapper(x, y, z, zeros, len(zeros), dcutoff, list(range(len(models))), len(models), int(one), what, int(normed)) return scores
def calc_consistency(models, nloci, dcutoff=200): combines = list(combinations(models, 2)) parts = [0 for _ in xrange(nloci)] for md1, md2 in combines: md1s = models[md1] md2s = models[md2] for i, p in enumerate(rmsdRMSD_wrapper( md1s['x'], md1s['y'], md1s['z'], md2s['x'], md2s['y'], md2s['z'], nloci, dcutoff, 1)): parts[i] += p return [float(p)/len(combines) * 100 for p in parts]
def calc_eqv_rmsd(models, beg, end, zeros, dcutoff=200, one=False, what='score', normed=True): """ Calculates the RMSD, dRMSD, the number of equivalent positions and a score combining these three measures. The measure are done between a group of models in a one against all manner. :param beg: start particle number of the region to compare :param end: end particle number of the region to compare :param zeros: list of True/False representing particles to skip :param 200 dcutoff: distance in nanometer from which it is considered that two particles are separated. :param 0.75 fact: Factor for equivalent positions :param False one: if True assumes that only two models are passed, and returns the rmsd of their comparison :param 'score' what: values to return. Can be one of 'score', 'rmsd', 'drmsd' or 'eqv' :param True normed: normalize result by maximum value (only applies to rmsd and drmsd) :returns: a score of each pairwise comparison according to: .. math:: score_i = eqvs_i \\times \\frac{dRMSD_i / max(dRMSD)} {RMSD_i / max(RMSD)} where :math:`eqvs_i` is the number of equivalent position for the ith pairwise model comparison. """ what = what.lower() if not what in ['score', 'rmsd', 'drmsd', 'eqv']: raise NotImplementedError("Only 'score', 'rmsd', 'drmsd' or 'eqv' " + "features are available\n") # remove particles with zeros from calculation x = [] y = [] z = [] for m in xrange(len(models)): x.append([models[m]['x'][i] for i in range(beg, end) if zeros[i]]) y.append([models[m]['y'][i] for i in range(beg, end) if zeros[i]]) z.append([models[m]['z'][i] for i in range(beg, end) if zeros[i]]) zeros = tuple([True for _ in xrange(len(x[0]))]) scores = rmsdRMSD_wrapper(x, y, z, zeros, len(zeros), dcutoff, range(len(models)), len(models), int(one), what, int(normed)) return scores
def calc_eqv_rmsd(models, nloci, dcutoff=200, var='score', one=False): """ :param nloci: number of particles per model :param 200 dcutoff: distance in nanometer from which it is considered that two particles are separated. :param 0.75 fact: Factor for equivalent positions :param 'score' var: value to return, can be either (i) 'drmsd' (symmetry independent: mirrors will show no differences) (ii) 'score' that is: :: dRMSD[i] / max(dRMSD) score[i] = eqvs[i] * ----------------------- RMSD[i] / max(RMSD) where eqvs[i] is the number of equivalent position for the ith pairwise model comparison. :returns: a score (depends on 'var' argument) """ scores = {} nrmsds = [] drmsds = [] for md1 in xrange(len(models)): md1s = models[md1] for md2 in xrange(md1 + 1, len(models)): md2s = models[md2] eqv, nrmsd, drmsd = rmsdRMSD_wrapper(md1s['x'], md1s['y'], md1s['z'], md2s['x'], md2s['y'], md2s['z'], nloci, dcutoff, 0) nrmsds.append(nrmsd) drmsds.append(drmsd) scores[(md1, md2)] = eqv * drmsd / nrmsd if one: return drmsd max_rmsd_ov_max_drmsd = max(nrmsds) / max(drmsds) if var == 'score': for md1, md2 in scores.keys()[:]: score = scores[(md1, md2)] * max_rmsd_ov_max_drmsd scores[(md1, md2)] = score scores[(md2, md1)] = score elif var == 'drmsd': for i, (md1, md2) in enumerate(scores.keys()): scores[(md2, md1)] = drmsds[i] return scores
def calc_eqv_rmsd(models, nloci, dcutoff=200, var='score', one=False): """ :param nloci: number of particles per model :param 200 dcutoff: distance in nanometer from which it is considered that two particles are separated. :param 0.75 fact: Factor for equivalent positions :param 'score' var: value to return, can be either (i) 'drmsd' (symmetry independent: mirrors will show no differences) (ii) 'score' that is: :: dRMSD[i] / max(dRMSD) score[i] = eqvs[i] * ----------------------- RMSD[i] / max(RMSD) where eqvs[i] is the number of equivalent position for the ith pairwise model comparison. :returns: a score (depends on 'var' argument) """ scores = {} nrmsds = [] drmsds = [] for md1 in xrange(len(models)): md1s = models[md1] for md2 in xrange(md1 + 1, len(models)): md2s = models[md2] eqv, nrmsd, drmsd = rmsdRMSD_wrapper( md1s['x'], md1s['y'], md1s['z'], md2s['x'], md2s['y'], md2s['z'], nloci, dcutoff, 0) nrmsds.append(nrmsd) drmsds.append(drmsd) scores[(md1, md2)] = eqv * drmsd / nrmsd if one: return drmsd max_rmsd_ov_max_drmsd = max(nrmsds) / max(drmsds) if var=='score': for md1, md2 in scores.keys()[:]: score = scores[(md1, md2)] * max_rmsd_ov_max_drmsd scores[(md1, md2)] = score scores[(md2, md1)] = score elif var=='drmsd': for i, (md1, md2) in enumerate(scores.keys()): scores[(md2, md1)] = drmsds[i] return scores
def test_13_3d_modelling_centroid(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and ONLY != '13': return if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) models = exp.model_region(51, 71, n_models=40, n_keep=25, n_cpus=4, config={'kforce': 5, 'maxdist': 500, 'scale': 0.01, 'upfreq': 1.0, 'lowfreq': -0.6}) models.save_models('models.pick') avg = models.average_model() nmd = len(models) dev = rmsdRMSD_wrapper( [models[m]['x'] for m in xrange(nmd)] + [avg['x']], [models[m]['y'] for m in xrange(nmd)] + [avg['y']], [models[m]['z'] for m in xrange(nmd)] + [avg['z']], models._zeros, models.nloci, 200, range(len(models)+1), len(models)+1, int(False), 'rmsd', 0) centroid = models[models.centroid_model()] # find closest model = min([(k, dev[(k, nmd)] ) for k in range(nmd)], key=lambda x: x[1])[0] self.assertEqual(centroid['rand_init'], models[model]['rand_init']) if CHKTIME: print '13', time() - t0
def test_13_3d_modelling_centroid(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.normalize_hic(silent=True) models = exp.model_region(51, 71, n_models=110, n_keep=25, n_cpus=4, config={'kforce': 5, 'maxdist': 500, 'scale': 0.01, 'upfreq': 1.0, 'lowfreq': -0.6}) models.save_models('models.pick') avg = models.average_model() a = rmsdRMSD_wrapper([models[m]['x'] for m in xrange(len(models))] + [avg['x']], [models[m]['y'] for m in xrange(len(models))] + [avg['y']], [models[m]['z'] for m in xrange(len(models))] + [avg['z']], models.nloci, 410, range(len(models)+1), len(models)+1, int(False), 'score', 1) self.assertEqual(21, sorted([(k, sum([a[(i, j)] for i, j in a if i==k or j==k])) for k in range(26)], key=lambda x: x[1])[-1][0]) centroid = models[models.centroid_model()] expsc = sum([sum([a[(i, j)] for i, j in a if i==k or j==k]) for k in range(26)]) / 26 # find closest model = min([(k, sum([a[(i, j)] for i, j in a if i==k or j==k])) for k in range(26)], key=lambda x:abs(x[1]-expsc))[0] self.assertEqual(centroid['rand_init'], models[model]['rand_init']) if CHKTIME: print '13', time() - t0
def calc_eqv_rmsd(models, nloci, dcutoff=200, one=False, what='score', normed=True): """ Calculates the RMSD, dRMSD, the number of equivalent positions and a score combining these three measures. The measure are done between a group of models in a one against all manner. :param nloci: number of particles per model :param 200 dcutoff: distance in nanometer from which it is considered that two particles are separated. :param 0.75 fact: Factor for equivalent positions :param False one: if True assumes that only two models are passed, and returns the rmsd of their comparison :param 'score' what: values to return. Can be one of 'score', 'rmsd', 'drmsd' or 'eqv' :param True normed: normalize result by maximum value (only applies to rmsd and drmsd) :returns: a score of each pairwise comparison according to: .. math:: score_i = eqvs_i \\times \\frac{dRMSD_i / max(dRMSD)} {RMSD_i / max(RMSD)} where :math:`eqvs_i` is the number of equivalent position for the ith pairwise model comparison. """ what = what.lower() if not what in ['score', 'rmsd', 'drmsd', 'eqv']: raise NotImplementedError("Only 'score', 'rmsd', 'drmsd' or 'eqv' " + "features are available\n") scores = rmsdRMSD_wrapper([models[m]['x'] for m in xrange(len(models))], [models[m]['y'] for m in xrange(len(models))], [models[m]['z'] for m in xrange(len(models))], nloci, dcutoff, range(len(models)), len(models), int(one), what, int(normed)) return scores
def test_13_3d_modelling_centroid(self): #model with no optimisation """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and not "13" in ONLY: return if CHKTIME: t0 = time() try: __import__("IMP") except ImportError: warn("IMP not found, skipping test\n") return test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) models = exp.model_region(51, 71, n_models=40, n_keep=25, n_cpus=4, config={ 'kforce': 5, 'maxdist': 500, 'scale': 0.01, 'kbending': 0.0, 'upfreq': 1.0, 'lowfreq': -0.6 }) #models.save_models('models.pick') avg = models.average_model() nmd = len(models) dev = rmsdRMSD_wrapper([models[m]["x"] for m in xrange(nmd)] + [avg["x"]], [models[m]["y"] for m in xrange(nmd)] + [avg["y"]], [models[m]["z"] for m in xrange(nmd)] + [avg["z"]], models._zeros, models.nloci, 200, range(len(models) + 1), len(models) + 1, int(False), "rmsd", 0) centroid = models[models.centroid_model()] # find closest model = min([(k, dev[(k, nmd)]) for k in range(nmd)], key=lambda x: x[1])[0] self.assertEqual(centroid["rand_init"], models[model]["rand_init"]) refmodels = load_structuralmodels(PATH + "/models.pick") refrestraints = refmodels._restraints refrestraints = dict( (r, (refrestraints[r][0], round(refrestraints[r][1], 2), round(refrestraints[r][2], 2))) for r in refrestraints) restraints = models._restraints restraints = dict((r, (restraints[r][0], round(restraints[r][1], 2), round(restraints[r][2], 2))) for r in restraints) self.assertEqual(refrestraints, restraints) if CHKTIME: print "13", time() - t0