def run_batch_job(exp, opts, m, u, l, s, outdir): zscores, values, zeros = exp._sub_experiment_zscore(opts.beg, opts.end) zeros = tuple([i not in zeros for i in xrange(opts.end - opts.beg + 1)]) nloci = opts.end - opts.beg + 1 coords = {"crm": opts.crm, "start": opts.beg, "end": opts.end} optpar = {'maxdist': m, 'upfreq': u, 'lowfreq': l, 'scale': s, 'kforce': 5} models = generate_3d_models(zscores, opts.reso, nloci, values=values, n_models=opts.nmodels, n_keep=opts.nkeep, n_cpus=opts.cpus, keep_all=True, start=int(opts.rand), container=None, config=optpar, coords=coords, zeros=zeros) # Save models muls = tuple(map(my_round, (m, u, l, s))) models.save_models( path.join(outdir, 'cfg_%s_%s_%s_%s' % muls, ('models_%s-%s.pick' % (opts.rand, int(opts.rand) + opts.nmodels)) if opts.nmodels > 1 else ('model_%s.pick' % (opts.rand))))
def run_batch_job(exp, opts, m, u, l, s, outdir): zscores, values, zeros = exp._sub_experiment_zscore(opts.beg, opts.end) zeros = tuple([i not in zeros for i in xrange(opts.end - opts.beg + 1)]) nloci = opts.end - opts.beg + 1 coords = {"crm" : opts.crm, "start": opts.beg, "end" : opts.end} optpar = {'maxdist': m, 'upfreq' : u, 'lowfreq': l, 'scale' : s, 'kforce' : 5} models = generate_3d_models(zscores, opts.reso, nloci, values=values, n_models=opts.nmodels, n_keep=opts.nkeep, n_cpus=opts.cpus, keep_all=True, start=int(opts.rand), container=None, config=optpar, coords=coords, zeros=zeros) # Save models muls = tuple(map(my_round, (m, u, l, s))) models.save_models( path.join(outdir, 'cfg_%s_%s_%s_%s' % muls, ('models_%s-%s.pick' % (opts.rand, int(opts.rand) + opts.nmodels)) if opts.nmodels > 1 else ('model_%s.pick' % (opts.rand))))
def to_optimize(params, zscores, resolution, values, n_models, n_keep, n_cpus=1): upfreq, lowfreq, maxdist = params tmp = { 'kforce': 5, 'lowrdist': 100, 'maxdist': maxdist, 'upfreq': upfreq, 'lowfreq': lowfreq } tdm = generate_3d_models(zscores, resolution, n_models, n_keep, config=tmp, n_cpus=n_cpus, values=values) global COUNT COUNT += 1 print '%5s ' % (COUNT), params, try: result = tdm.correlate_with_real_data(cutoff=200)[0] print result return 1. - result except: print 'ERROR' return 1.0
def grid_search(zscores=None, upfreq_range=(0, 1, 0.1), lowfreq_range=(-1, 0, 0.1), scale_range=(0.005, 0.005, 0.001), maxdist_range=(400, 1500, 100), resolution=None, values=None, n_models=500, cutoff=300, n_keep=100, n_cpus=1, close_bins=1, verbose=True): count = 0 if type(maxdist_range) == tuple: maxdist_step = maxdist_range[2] maxdist_arange = range(maxdist_range[0], maxdist_range[1] + maxdist_step, maxdist_step) else: maxdist_arange = maxdist_range if type(lowfreq_range) == tuple: lowfreq_step = lowfreq_range[2] lowfreq_arange = np.arange(lowfreq_range[0], lowfreq_range[1] + lowfreq_step / 2, lowfreq_step) else: lowfreq_arange = lowfreq_range if type(upfreq_range) == tuple: upfreq_step = upfreq_range[2] upfreq_arange = np.arange(upfreq_range[0], upfreq_range[1] + upfreq_step / 2, upfreq_step) else: upfreq_arange = upfreq_range if type(scale_range) == tuple: scale_step = scale_range[2] scale_arange = np.arange(scale_range[0], scale_range[1] + scale_step / 2, scale_step) else: scale_arange = scale_range results = np.empty((len(scale_arange), len(maxdist_arange), len(upfreq_arange), len(lowfreq_arange))) for w, scale in enumerate(scale_arange): for x, maxdist in enumerate(maxdist_arange): for y, upfreq in enumerate(upfreq_arange): for z, lowfreq in enumerate(lowfreq_arange): tmp = {'kforce' : 5, 'lowrdist' : 100, 'maxdist' : maxdist, 'upfreq' : upfreq, 'lowfreq' : lowfreq, 'scale' : scale} tdm = generate_3d_models(zscores, resolution, n_models, n_keep, config=tmp, n_cpus=n_cpus, values=values, close_bins=close_bins) count += 1 if verbose: print '%5s ' % (count), upfreq, lowfreq, maxdist, scale, try: result = tdm.correlate_with_real_data(cutoff=cutoff)[0] if verbose: print result results[w, x, y, z] = result except: print 'ERROR' return results, scale_arange, maxdist_arange, upfreq_arange, lowfreq_arange
def run_batch_job(exp, opts, m, u, l, s, outdir): zscores, values, zeros = exp._sub_experiment_zscore(opts.beg, opts.end) zeros = tuple([i not in zeros for i in xrange(opts.end - opts.beg + 1)]) nloci = opts.end - opts.beg + 1 coords = {"crm": opts.crm, "start": opts.beg, "end": opts.end} optpar = { 'maxdist': float(m), 'upfreq': float(u), 'lowfreq': float(l), 'scale': float(s), 'kforce': 5 } models = generate_3d_models(zscores, opts.reso, nloci, values=values, n_models=opts.nmodels, n_keep=opts.nkeep, n_cpus=opts.cpus, keep_all=True, start=int(opts.rand), container=None, config=optpar, coords=coords, zeros=zeros) # Save models muls = tuple(map(my_round, (m, u, l, s))) dirname = 'cfg_%s_%s_%s_%s' % muls runned = [int(mod['rand_init']) for mod in models] if not len(runned): raise Exception(("\n\n\nNothing to be done.\n\n" " All models asked for are already run.\n" " - ask for more models\n" " - use higher random initial number\n" " - go ahead with the analysis!")) models.save_models( path.join(outdir, dirname, ('models_%s-%s.pick' % (min(runned), max(runned))) if len(runned) > 1 else ('model_%s.pick' % (runned[0]))))
def to_optimize(params, zscores, resolution, values, n_models, n_keep, n_cpus=1): upfreq, lowfreq, maxdist = params tmp = {'kforce' : 5, 'lowrdist' : 100, 'maxdist' : maxdist, 'upfreq' : upfreq, 'lowfreq' : lowfreq} tdm = generate_3d_models(zscores, resolution, n_models, n_keep, config=tmp, n_cpus=n_cpus, values=values) global COUNT COUNT += 1 print '%5s ' % (COUNT), params, try: result = tdm.correlate_with_real_data(cutoff=200)[0] print result return 1. - result except: print 'ERROR' return 1.0
def run_grid_search(self, upfreq_range=(0, 1, 0.1), lowfreq_range=(-1, 0, 0.1), scale_range=[0.01], maxdist_range=(400, 1500, 100), n_cpus=1, verbose=True): """ This function calculates the correlation between the models generated by IMP and the input data for the four main IMP parameters (scale, maxdist, lowfreq and upfreq) in the given ranges of values. :param n_cpus: number of CPUs to use :param (-1,0,0.1) lowfreq_range: range of lowfreq values to be optimized. The last value of the input tuple is the incremental step for the lowfreq values :param (0,1,0.1) upfreq_range: range of upfreq values to be optimized. The last value of the input tuple is the incremental step for the upfreq values :param (400,1400,100) maxdist_range: upper and lower bounds used to search for the optimal maximum experimental distance. The last value of the input tuple is the incremental step for maxdist values :param [0.01] scale_range: upper and lower bounds used to search for the optimal scale parameter (nm per nucleotide). The last value of the input tuple is the incremental step for scale parameter values :param True verbose: print the results to the standard output """ if type(maxdist_range) == tuple: maxdist_step = maxdist_range[2] maxdist_arange = range(maxdist_range[0], maxdist_range[1] + maxdist_step, maxdist_step) else: maxdist_arange = maxdist_range if type(lowfreq_range) == tuple: lowfreq_step = lowfreq_range[2] lowfreq_arange = np.arange(lowfreq_range[0], lowfreq_range[1] + lowfreq_step / 2, lowfreq_step) else: lowfreq_arange = lowfreq_range if type(upfreq_range) == tuple: upfreq_step = upfreq_range[2] upfreq_arange = np.arange(upfreq_range[0], upfreq_range[1] + upfreq_step / 2, upfreq_step) else: upfreq_arange = upfreq_range if type(scale_range) == tuple: scale_step = scale_range[2] scale_arange = np.arange(scale_range[0], scale_range[1] + scale_step / 2, scale_step) else: scale_arange = scale_range count = 0 for scale in scale_arange: if not scale in self.scale_range: self.scale_range.append(scale) for maxdist in maxdist_arange: if not maxdist in self.maxdist_range: self.maxdist_range.append(maxdist) for upfreq in upfreq_arange: if not upfreq in self.upfreq_range: self.upfreq_range.append(upfreq) for lowfreq in lowfreq_arange: if not lowfreq in self.lowfreq_range: self.lowfreq_range.append(lowfreq) if (scale, maxdist, upfreq, lowfreq) in self.results: continue tmp = {'kforce' : 5, 'lowrdist' : 100, 'maxdist' : maxdist, 'upfreq' : upfreq, 'lowfreq' : lowfreq, 'scale' : scale} tdm = generate_3d_models(self.zscores, self.resolution, self.n_models, self.n_keep, config=tmp, n_cpus=n_cpus, values=self.values, close_bins=self.close_bins) count += 1 if verbose: print '%5s ' % (count), print upfreq, lowfreq, maxdist, scale, try: result = tdm.correlate_with_real_data( cutoff=self.cutoff)[0] if verbose: print result self.results[(my_round(scale), my_round(maxdist), my_round(upfreq), my_round(lowfreq))] = result except Exception, e: print 'ERROR %s' % e
def run_grid_search(self, upfreq_range=(0, 1, 0.1), lowfreq_range=(-1, 0, 0.1), scale_range=(0.005, 0.005, 0.001), maxdist_range=(400, 1500, 100), n_cpus=1, verbose=True): if type(maxdist_range) == tuple: maxdist_step = maxdist_range[2] maxdist_arange = range(maxdist_range[0], maxdist_range[1] + maxdist_step, maxdist_step) else: maxdist_arange = maxdist_range if type(lowfreq_range) == tuple: lowfreq_step = lowfreq_range[2] lowfreq_arange = np.arange(lowfreq_range[0], lowfreq_range[1] + lowfreq_step / 2, lowfreq_step) else: lowfreq_arange = lowfreq_range if type(upfreq_range) == tuple: upfreq_step = upfreq_range[2] upfreq_arange = np.arange(upfreq_range[0], upfreq_range[1] + upfreq_step / 2, upfreq_step) else: upfreq_arange = upfreq_range if type(scale_range) == tuple: scale_step = scale_range[2] scale_arange = np.arange(scale_range[0], scale_range[1] + scale_step / 2, scale_step) else: scale_arange = scale_range count = 0 for scale in scale_arange: if not scale in self.scale_range: self.scale_range.append(scale) for maxdist in maxdist_arange: if not maxdist in self.maxdist_range: self.maxdist_range.append(maxdist) for upfreq in upfreq_arange: if not upfreq in self.upfreq_range: self.upfreq_range.append(upfreq) for lowfreq in lowfreq_arange: if not lowfreq in self.lowfreq_range: self.lowfreq_range.append(lowfreq) if (scale, maxdist, upfreq, lowfreq) in self.results: continue tmp = {'kforce' : 5, 'lowrdist' : 100, 'maxdist' : maxdist, 'upfreq' : upfreq, 'lowfreq' : lowfreq, 'scale' : scale} tdm = generate_3d_models(self.zscores, self.resolution, self.n_models, self.n_keep, config=tmp, n_cpus=n_cpus, values=self.values, close_bins=self.close_bins) count += 1 if verbose: print '%5s ' % (count), print upfreq, lowfreq, maxdist, scale, try: result = tdm.correlate_with_real_data( cutoff=self.cutoff)[0] if verbose: print result self.results[(scale, maxdist, upfreq, lowfreq)] = result except: print 'ERROR' self.scale_range.sort() self.maxdist_range.sort() self.lowfreq_range.sort() self.upfreq_range.sort()
def run_grid_search(self, upfreq_range=(0, 1, 0.1), lowfreq_range=(-1, 0, 0.1), scale_range=(0.005, 0.005, 0.001), maxdist_range=(400, 1500, 100), n_cpus=1, verbose=True): if type(maxdist_range) == tuple: maxdist_step = maxdist_range[2] maxdist_arange = range(maxdist_range[0], maxdist_range[1] + maxdist_step, maxdist_step) else: maxdist_arange = maxdist_range if type(lowfreq_range) == tuple: lowfreq_step = lowfreq_range[2] lowfreq_arange = np.arange(lowfreq_range[0], lowfreq_range[1] + lowfreq_step / 2, lowfreq_step) else: lowfreq_arange = lowfreq_range if type(upfreq_range) == tuple: upfreq_step = upfreq_range[2] upfreq_arange = np.arange(upfreq_range[0], upfreq_range[1] + upfreq_step / 2, upfreq_step) else: upfreq_arange = upfreq_range if type(scale_range) == tuple: scale_step = scale_range[2] scale_arange = np.arange(scale_range[0], scale_range[1] + scale_step / 2, scale_step) else: scale_arange = scale_range count = 0 for scale in scale_arange: if not scale in self.scale_range: self.scale_range.append(scale) for maxdist in maxdist_arange: if not maxdist in self.maxdist_range: self.maxdist_range.append(maxdist) for upfreq in upfreq_arange: if not upfreq in self.upfreq_range: self.upfreq_range.append(upfreq) for lowfreq in lowfreq_arange: if not lowfreq in self.lowfreq_range: self.lowfreq_range.append(lowfreq) if (scale, maxdist, upfreq, lowfreq) in self.results: continue tmp = { 'kforce': 5, 'lowrdist': 100, 'maxdist': maxdist, 'upfreq': upfreq, 'lowfreq': lowfreq, 'scale': scale } tdm = generate_3d_models(self.zscores, self.resolution, self.n_models, self.n_keep, config=tmp, n_cpus=n_cpus, values=self.values, close_bins=self.close_bins) count += 1 if verbose: print '%5s ' % (count), print upfreq, lowfreq, maxdist, scale, try: result = tdm.correlate_with_real_data( cutoff=self.cutoff)[0] if verbose: print result self.results[(scale, maxdist, upfreq, lowfreq)] = result except: print 'ERROR' self.scale_range.sort() self.maxdist_range.sort() self.lowfreq_range.sort() self.upfreq_range.sort()
def grid_search(zscores=None, upfreq_range=(0, 1, 0.1), lowfreq_range=(-1, 0, 0.1), scale_range=(0.005, 0.005, 0.001), maxdist_range=(400, 1500, 100), resolution=None, values=None, n_models=500, cutoff=300, n_keep=100, n_cpus=1, close_bins=1, verbose=True): count = 0 if type(maxdist_range) == tuple: maxdist_step = maxdist_range[2] maxdist_arange = range(maxdist_range[0], maxdist_range[1] + maxdist_step, maxdist_step) else: maxdist_arange = maxdist_range if type(lowfreq_range) == tuple: lowfreq_step = lowfreq_range[2] lowfreq_arange = np.arange(lowfreq_range[0], lowfreq_range[1] + lowfreq_step / 2, lowfreq_step) else: lowfreq_arange = lowfreq_range if type(upfreq_range) == tuple: upfreq_step = upfreq_range[2] upfreq_arange = np.arange(upfreq_range[0], upfreq_range[1] + upfreq_step / 2, upfreq_step) else: upfreq_arange = upfreq_range if type(scale_range) == tuple: scale_step = scale_range[2] scale_arange = np.arange(scale_range[0], scale_range[1] + scale_step / 2, scale_step) else: scale_arange = scale_range results = np.empty((len(scale_arange), len(maxdist_arange), len(upfreq_arange), len(lowfreq_arange))) for w, scale in enumerate(scale_arange): for x, maxdist in enumerate(maxdist_arange): for y, upfreq in enumerate(upfreq_arange): for z, lowfreq in enumerate(lowfreq_arange): tmp = { 'kforce': 5, 'lowrdist': 100, 'maxdist': maxdist, 'upfreq': upfreq, 'lowfreq': lowfreq, 'scale': scale } tdm = generate_3d_models(zscores, resolution, n_models, n_keep, config=tmp, n_cpus=n_cpus, values=values, close_bins=close_bins) count += 1 if verbose: print '%5s ' % ( count), upfreq, lowfreq, maxdist, scale, try: result = tdm.correlate_with_real_data(cutoff=cutoff)[0] if verbose: print result results[w, x, y, z] = result except: print 'ERROR' return results, scale_arange, maxdist_arange, upfreq_arange, lowfreq_arange
def run_grid_search(self, upfreq_range=(0, 1, 0.1), lowfreq_range=(-1, 0, 0.1), scale_range=[0.01], maxdist_range=(400, 1500, 100), n_cpus=1, verbose=True): """ This function calculates the correlation between the models generated by IMP and the input data for the four main IMP parameters (scale, maxdist, lowfreq and upfreq) in the given ranges of values. :param n_cpus: number of CPUs to use :param (-1,0,0.1) lowfreq_range: range of lowfreq values to be optimized. The last value of the input tuple is the incremental step for the lowfreq values :param (0,1,0.1) upfreq_range: range of upfreq values to be optimized. The last value of the input tuple is the incremental step for the upfreq values :param (400,1400,100) maxdist_range: upper and lower bounds used to search for the optimal maximum experimental distance. The last value of the input tuple is the incremental step for maxdist values :param [0.01] scale_range: upper and lower bounds used to search for the optimal scale parameter (nm per nucleotide). The last value of the input tuple is the incremental step for scale parameter values :param True verbose: print the results to the standard output """ if type(maxdist_range) == tuple: maxdist_step = maxdist_range[2] maxdist_arange = range(maxdist_range[0], maxdist_range[1] + maxdist_step, maxdist_step) else: maxdist_arange = maxdist_range if type(lowfreq_range) == tuple: lowfreq_step = lowfreq_range[2] lowfreq_arange = np.arange(lowfreq_range[0], lowfreq_range[1] + lowfreq_step / 2, lowfreq_step) else: lowfreq_arange = lowfreq_range if type(upfreq_range) == tuple: upfreq_step = upfreq_range[2] upfreq_arange = np.arange(upfreq_range[0], upfreq_range[1] + upfreq_step / 2, upfreq_step) else: upfreq_arange = upfreq_range if type(scale_range) == tuple: scale_step = scale_range[2] scale_arange = np.arange(scale_range[0], scale_range[1] + scale_step / 2, scale_step) else: scale_arange = scale_range count = 0 for scale in scale_arange: if not scale in self.scale_range: self.scale_range.append(scale) for maxdist in maxdist_arange: if not maxdist in self.maxdist_range: self.maxdist_range.append(maxdist) for upfreq in upfreq_arange: if not upfreq in self.upfreq_range: self.upfreq_range.append(upfreq) for lowfreq in lowfreq_arange: if not lowfreq in self.lowfreq_range: self.lowfreq_range.append(lowfreq) if (scale, maxdist, upfreq, lowfreq) in self.results: continue tmp = { 'kforce': 5, 'lowrdist': 100, 'maxdist': maxdist, 'upfreq': upfreq, 'lowfreq': lowfreq, 'scale': scale } tdm = generate_3d_models(self.zscores, self.resolution, self.n_models, self.n_keep, config=tmp, n_cpus=n_cpus, values=self.values, close_bins=self.close_bins) count += 1 if verbose: print '%5s ' % (count), print upfreq, lowfreq, maxdist, scale, try: result = tdm.correlate_with_real_data( cutoff=self.cutoff)[0] if verbose: print result self.results[(my_round(scale), my_round(maxdist), my_round(upfreq), my_round(lowfreq))] = result except Exception, e: print 'ERROR %s' % e
def run_grid_search(self, upfreq_range=(0, 1, 0.1), lowfreq_range=(-1, 0, 0.1), maxdist_range=(400, 1500, 100), scale_range=0.01, dcutoff_range=2, corr='spearman', off_diag=1, savedata=None, n_cpus=1, verbose=True): """ This function calculates the correlation between the models generated by IMP and the input data for the four main IMP parameters (scale, maxdist, lowfreq and upfreq) in the given ranges of values. :param n_cpus: number of CPUs to use :param (-1,0,0.1) lowfreq_range: range of lowfreq values to be optimized. The last value of the input tuple is the incremental step for the lowfreq values :param (0,1,0.1) upfreq_range: range of upfreq values to be optimized. The last value of the input tuple is the incremental step for the upfreq values :param (400,1400,100) maxdist_range: upper and lower bounds used to search for the optimal maximum experimental distance. The last value of the input tuple is the incremental step for maxdist values :param 0.01 scale_range: upper and lower bounds used to search for the optimal scale parameter (nm per nucleotide). The last value of the input tuple is the incremental step for scale parameter values :param 2 dcutoff_range: upper and lower bounds used to search for the optimal distance cutoff parameter (distance, in number of beads, from which to consider 2 beads as being close). The last value of the input tuple is the incremental step for scale parameter values :param None savedata: concatenate all generated models into a dictionary and save it into a file named by this argument :param True verbose: print the results to the standard output """ if verbose: stderr.write('Optimizing %s particles\n' % self.nloci) if isinstance(maxdist_range, tuple): maxdist_step = maxdist_range[2] maxdist_arange = range(maxdist_range[0], maxdist_range[1] + maxdist_step, maxdist_step) else: if isinstance(maxdist_range, (float, int)): maxdist_range = [maxdist_range] maxdist_arange = maxdist_range # if isinstance(lowfreq_range, tuple): lowfreq_step = lowfreq_range[2] lowfreq_arange = np.arange(lowfreq_range[0], lowfreq_range[1] + lowfreq_step / 2, lowfreq_step) else: if isinstance(lowfreq_range, (float, int)): lowfreq_range = [lowfreq_range] lowfreq_arange = lowfreq_range # if isinstance(upfreq_range, tuple): upfreq_step = upfreq_range[2] upfreq_arange = np.arange(upfreq_range[0], upfreq_range[1] + upfreq_step / 2, upfreq_step) else: if isinstance(upfreq_range, (float, int)): upfreq_range = [upfreq_range] upfreq_arange = upfreq_range # if isinstance(scale_range, tuple): scale_step = scale_range[2] scale_arange = np.arange(scale_range[0], scale_range[1] + scale_step / 2, scale_step) else: if isinstance(scale_range, (float, int)): scale_range = [scale_range] scale_arange = scale_range # if isinstance(dcutoff_range, tuple): dcutoff_step = dcutoff_range[2] dcutoff_arange = np.arange(dcutoff_range[0], dcutoff_range[1] + dcutoff_step / 2, dcutoff_step) else: if isinstance(dcutoff_range, (float, int)): dcutoff_range = [dcutoff_range] dcutoff_arange = dcutoff_range # round everything if not self.maxdist_range: self.maxdist_range = [my_round(i) for i in maxdist_arange] else: self.maxdist_range = sorted([my_round(i) for i in maxdist_arange if not my_round(i) in self.maxdist_range] + self.maxdist_range) if not self.upfreq_range: self.upfreq_range = [my_round(i) for i in upfreq_arange ] else: self.upfreq_range = sorted([my_round(i) for i in upfreq_arange if not my_round(i) in self.upfreq_range] + self.upfreq_range) if not self.lowfreq_range: self.lowfreq_range = [my_round(i) for i in lowfreq_arange] else: self.lowfreq_range = sorted([my_round(i) for i in lowfreq_arange if not my_round(i) in self.lowfreq_range] + self.lowfreq_range) if not self.scale_range: self.scale_range = [my_round(i) for i in scale_arange ] else: self.scale_range = sorted([my_round(i) for i in scale_arange if not my_round(i) in self.scale_range] + self.scale_range) if not self.dcutoff_range: self.dcutoff_range = [my_round(i) for i in dcutoff_arange] else: self.dcutoff_range = sorted([my_round(i) for i in dcutoff_arange if not my_round(i) in self.dcutoff_range] + self.dcutoff_range) # grid search models = {} count = 0 if verbose: stderr.write('# %3s %6s %7s %7s %6s %7s %7s\n' % ( "num", "upfrq", "lowfrq", "maxdist", "scale", "cutoff", "corr")) for scale in [my_round(i) for i in scale_arange]: for maxdist in [my_round(i) for i in maxdist_arange]: for upfreq in [my_round(i) for i in upfreq_arange]: for lowfreq in [my_round(i) for i in lowfreq_arange]: # check if this optimization has been already done if (scale, maxdist, upfreq, lowfreq) in [ tuple(k[:4]) for k in self.results]: k = [k for k in self.results if (scale, maxdist, upfreq, lowfreq) == tuple(k[:4])][0] result = self.results[(scale, maxdist, upfreq, lowfreq, k[-1])] if verbose: verb = '%5s %6s %7s %7s %6s %7s ' % ( 'xx', upfreq, lowfreq, maxdist, scale, k[-1]) if verbose == 2: stderr.write(verb + str(round(result, 4)) + '\n') else: print verb + str(round(result, 4)) continue tmp = {'kforce' : 5, 'lowrdist' : 100, 'maxdist' : int(maxdist), 'upfreq' : float(upfreq), 'lowfreq' : float(lowfreq), 'scale' : float(scale)} try: count += 1 tdm = generate_3d_models( self.zscores, self.resolution, self.nloci, n_models=self.n_models, n_keep=self.n_keep, config=tmp, n_cpus=n_cpus, first=0, values=self.values, container=self.container, close_bins=self.close_bins, zeros=self.zeros) result = 0 cutoff = my_round(dcutoff_arange[0]) for cut in [i for i in dcutoff_arange]: sub_result = tdm.correlate_with_real_data( cutoff=(int(cut * self.resolution * float(scale))), corr=corr, off_diag=off_diag)[0] if result < sub_result: result = sub_result cutoff = my_round(cut) except Exception, e: print ' SKIPPING: %s' % e result = 0 cutoff = my_round(dcutoff_arange[0]) if verbose: verb = '%5s %6s %7s %7s %6s %7s ' % ( count, upfreq, lowfreq, maxdist, scale, cutoff) if verbose == 2: stderr.write(verb + str(round(result, 4)) + '\n') else: print verb + str(round(result, 4)) # store self.results[(scale, maxdist, upfreq, lowfreq, cutoff)] = result if savedata and result: models[(scale, maxdist, upfreq, lowfreq, cutoff) ] = tdm._reduce_models(minimal=True)
def model_region(self, start, end, n_models=5000, n_keep=1000, n_cpus=1, verbose=0, keep_all=False, close_bins=1, outfile=None, config=CONFIG['dmel_01']): """ :param start: first bin to model (bin number) :param end: last bin to model (bin number) :param 5000 n_models: number of modes to generate :param 1000 n_keep: number of models used in the final analysis (usually the top 20% of the generated models). The models are ranked according to their objective function value (the lower the better) :param False keep_all: whether or not to keep the discarded models (if True, models will be stored under tructuralModels.bad_models) :param 1 close_bins: number of particles away (i.e. the bin number difference) a particle pair must be in order to be considered as neighbors (e.g. 1 means consecutive particles) :param n_cpus: number of CPUs to use :param 0 verbose: the information printed can be: nothing (0), the objective function value the selected models (1), the objective function value of all the models (2), all the modeling information (3) :param CONFIG['dmel_01'] a dictionary containing the standard parameters used to generate the models. The dictionary should contain the keys kforce, maxdist, upfreq and lowfreq. Examples can be seen by doing: :: from pytadbit.imp.CONFIG import CONFIG where CONFIG is a dictionarry of dictionnaries to be passed to this function: ::: CONFIG = { 'dmel_01': { # use these paramaters with the Hi-C data from: 'reference' : 'victor corces dataset 2013', # Force applied to the restraints inferred to neighbor particles 'kforce' : 5, # Maximum experimental contact distance 'maxdist' : 600, # OPTIMIZATION: 500-1200 # Minimum and maximum thresholds used to decide which experimental values have to be # included in the computation of restraints. Z-score values bigger than upfreq # and less that lowfreq will be include, whereas all the others will be rejected 'upfreq' : 0.3, # OPTIMIZATION: min/max Z-score 'lowfreq' : -0.7 # OPTIMIZATION: min/max Z-score } } """ if self._normalization != 'visibility': warn('WARNING: normalizing according to visibility method') self.normalize_hic(method='visibility') zscores, values = self._sub_experiment_zscore(start, end) return generate_3d_models(zscores, self.resolution, values=values, n_models=n_models, outfile=outfile, n_keep=n_keep, n_cpus=n_cpus, verbose=verbose, keep_all=keep_all, close_bins=close_bins, config=config)
def run_grid_search(self, upfreq_range=(0, 1, 0.1), lowfreq_range=(-1, 0, 0.1), maxdist_range=(400, 1500, 100), scale_range=0.01, corr='spearman', off_diag=1, savedata=None, n_cpus=1, verbose=True): """ This function calculates the correlation between the models generated by IMP and the input data for the four main IMP parameters (scale, maxdist, lowfreq and upfreq) in the given ranges of values. :param n_cpus: number of CPUs to use :param (-1,0,0.1) lowfreq_range: range of lowfreq values to be optimized. The last value of the input tuple is the incremental step for the lowfreq values :param (0,1,0.1) upfreq_range: range of upfreq values to be optimized. The last value of the input tuple is the incremental step for the upfreq values :param (400,1400,100) maxdist_range: upper and lower bounds used to search for the optimal maximum experimental distance. The last value of the input tuple is the incremental step for maxdist values :param 0.01 scale_range: upper and lower bounds used to search for the optimal scale parameter (nm per nucleotide). The last value of the input tuple is the incremental step for scale parameter values :param True verbose: print the results to the standard output """ if type(maxdist_range) == tuple: maxdist_step = maxdist_range[2] maxdist_arange = range(maxdist_range[0], maxdist_range[1] + maxdist_step, maxdist_step) else: if type(maxdist_range) in (float, int): maxdist_range = [maxdist_range] maxdist_arange = maxdist_range if type(lowfreq_range) == tuple: lowfreq_step = lowfreq_range[2] lowfreq_arange = np.arange(lowfreq_range[0], lowfreq_range[1] + lowfreq_step / 2, lowfreq_step) else: if type(lowfreq_range) in (float, int): lowfreq_range = [lowfreq_range] lowfreq_arange = lowfreq_range if type(upfreq_range) == tuple: upfreq_step = upfreq_range[2] upfreq_arange = np.arange(upfreq_range[0], upfreq_range[1] + upfreq_step / 2, upfreq_step) else: if type(upfreq_range) in (float, int): upfreq_range = [upfreq_range] upfreq_arange = upfreq_range if type(scale_range) == tuple: scale_step = scale_range[2] scale_arange = np.arange(scale_range[0], scale_range[1] + scale_step / 2, scale_step) else: if type(scale_range) in (float, int): scale_range = [scale_range] scale_arange = scale_range # round everything if not self.maxdist_range: self.maxdist_range = [my_round(i) for i in maxdist_arange] else: self.maxdist_range = sorted([my_round(i) for i in maxdist_arange if not my_round(i) in self.maxdist_range] + self.maxdist_range) if not self.upfreq_range: self.upfreq_range = [my_round(i) for i in upfreq_arange ] else: self.upfreq_range = sorted([my_round(i) for i in upfreq_arange if not my_round(i) in self.upfreq_range] + self.upfreq_range) if not self.lowfreq_range: self.lowfreq_range = [my_round(i) for i in lowfreq_arange] else: self.lowfreq_range = sorted([my_round(i) for i in lowfreq_arange if not my_round(i) in self.lowfreq_range] + self.lowfreq_range) if not self.scale_range: self.scale_range = [my_round(i) for i in scale_arange ] else: self.scale_range = sorted([my_round(i) for i in scale_arange if not my_round(i) in self.scale_range] + self.scale_range) # grid search models = {} count = 0 for scale in [my_round(i) for i in scale_arange]: for maxdist in [my_round(i) for i in maxdist_arange]: for upfreq in [my_round(i) for i in upfreq_arange]: for lowfreq in [my_round(i) for i in lowfreq_arange]: if (scale, maxdist, upfreq, lowfreq) in self.results: continue if not self.cutoff: cutoff = int(2 * self.resolution * float(scale)) else: cutoff = self.cutoff tmp = {'kforce' : 5, 'lowrdist' : 100, 'maxdist' : int(maxdist), 'upfreq' : float(upfreq), 'lowfreq' : float(lowfreq), 'scale' : float(scale)} tdm = generate_3d_models(self.zscores, self.resolution, self.nloci, self.n_models, self.n_keep, config=tmp, n_cpus=n_cpus, values=self.values, close_bins=self.close_bins) count += 1 if verbose: verb = '%5s %s %s %s %s ' % ( count, upfreq, lowfreq, maxdist, scale) try: result = tdm.correlate_with_real_data( cutoff=cutoff, corr=corr, off_diag=off_diag)[0] if verbose: if verbose == 2: stderr.write(verb + str(result) + '\n') else: print verb + str(result) except Exception, e: print 'ERROR %s' % e continue # store self.results[(scale, maxdist, upfreq, lowfreq)] = result if savedata: models[(scale, maxdist, upfreq, lowfreq) ] = tdm._reduce_models(minimal=True)