Exemplo n.º 1
0
def run_batch_job(exp, opts, m, u, l, s, outdir):
    zscores, values, zeros = exp._sub_experiment_zscore(opts.beg, opts.end)
    zeros = tuple([i not in zeros for i in xrange(opts.end - opts.beg + 1)])
    nloci = opts.end - opts.beg + 1
    coords = {"crm": opts.crm, "start": opts.beg, "end": opts.end}

    optpar = {'maxdist': m, 'upfreq': u, 'lowfreq': l, 'scale': s, 'kforce': 5}

    models = generate_3d_models(zscores,
                                opts.reso,
                                nloci,
                                values=values,
                                n_models=opts.nmodels,
                                n_keep=opts.nkeep,
                                n_cpus=opts.cpus,
                                keep_all=True,
                                start=int(opts.rand),
                                container=None,
                                config=optpar,
                                coords=coords,
                                zeros=zeros)
    # Save models
    muls = tuple(map(my_round, (m, u, l, s)))
    models.save_models(
        path.join(outdir, 'cfg_%s_%s_%s_%s' % muls,
                  ('models_%s-%s.pick' %
                   (opts.rand, int(opts.rand) + opts.nmodels))
                  if opts.nmodels > 1 else ('model_%s.pick' % (opts.rand))))
Exemplo n.º 2
0
def run_batch_job(exp, opts, m, u, l, s, outdir):
    zscores, values, zeros = exp._sub_experiment_zscore(opts.beg, opts.end)
    zeros = tuple([i not in zeros for i in xrange(opts.end - opts.beg + 1)])
    nloci = opts.end - opts.beg + 1
    coords = {"crm"  : opts.crm,
              "start": opts.beg,
              "end"  : opts.end}

    optpar = {'maxdist': m,
              'upfreq' : u,
              'lowfreq': l,
              'scale'  : s,
              'kforce' : 5}

    models = generate_3d_models(zscores, opts.reso, nloci,
                                values=values, n_models=opts.nmodels,
                                n_keep=opts.nkeep,
                                n_cpus=opts.cpus, keep_all=True,
                                start=int(opts.rand), container=None,
                                config=optpar, coords=coords,
                                zeros=zeros)
    # Save models
    muls = tuple(map(my_round, (m, u, l, s)))
    models.save_models(
        path.join(outdir, 'cfg_%s_%s_%s_%s' % muls,
                  ('models_%s-%s.pick' % (opts.rand, int(opts.rand) + opts.nmodels))
                  if opts.nmodels > 1 else 
                  ('model_%s.pick' % (opts.rand))))
Exemplo n.º 3
0
def to_optimize(params,
                zscores,
                resolution,
                values,
                n_models,
                n_keep,
                n_cpus=1):
    upfreq, lowfreq, maxdist = params
    tmp = {
        'kforce': 5,
        'lowrdist': 100,
        'maxdist': maxdist,
        'upfreq': upfreq,
        'lowfreq': lowfreq
    }
    tdm = generate_3d_models(zscores,
                             resolution,
                             n_models,
                             n_keep,
                             config=tmp,
                             n_cpus=n_cpus,
                             values=values)
    global COUNT
    COUNT += 1
    print '%5s  ' % (COUNT), params,
    try:
        result = tdm.correlate_with_real_data(cutoff=200)[0]
        print result
        return 1. - result
    except:
        print 'ERROR'
        return 1.0
Exemplo n.º 4
0
def grid_search(zscores=None, upfreq_range=(0, 1, 0.1), lowfreq_range=(-1, 0, 0.1),
                scale_range=(0.005, 0.005, 0.001), maxdist_range=(400, 1500, 100),
                resolution=None, values=None, n_models=500,
                cutoff=300, n_keep=100, n_cpus=1, close_bins=1, verbose=True):
    count = 0
    if type(maxdist_range) == tuple:
        maxdist_step = maxdist_range[2]
        maxdist_arange = range(maxdist_range[0], maxdist_range[1] + maxdist_step,
                               maxdist_step)
    else:
        maxdist_arange = maxdist_range
    if type(lowfreq_range) == tuple:
        lowfreq_step = lowfreq_range[2]
        lowfreq_arange = np.arange(lowfreq_range[0],
                                   lowfreq_range[1] + lowfreq_step / 2,
                                   lowfreq_step)
    else:
        lowfreq_arange = lowfreq_range
    if type(upfreq_range) == tuple:
        upfreq_step = upfreq_range[2]
        upfreq_arange = np.arange(upfreq_range[0],
                                  upfreq_range[1] + upfreq_step / 2,
                                  upfreq_step)
    else:
        upfreq_arange = upfreq_range
    if type(scale_range) == tuple:
        scale_step = scale_range[2]
        scale_arange = np.arange(scale_range[0],
                                  scale_range[1] + scale_step / 2,
                                  scale_step)
    else:
        scale_arange = scale_range
    results = np.empty((len(scale_arange), len(maxdist_arange),
                        len(upfreq_arange), len(lowfreq_arange)))
    for w, scale in enumerate(scale_arange):
        for x, maxdist in enumerate(maxdist_arange):
            for y, upfreq in enumerate(upfreq_arange):
                for z, lowfreq in enumerate(lowfreq_arange):
                    tmp = {'kforce'   : 5,
                           'lowrdist' : 100,
                           'maxdist'  : maxdist,
                           'upfreq'   : upfreq,
                           'lowfreq'  : lowfreq,
                           'scale'    : scale}
                    tdm = generate_3d_models(zscores, resolution, n_models,
                                             n_keep, config=tmp, n_cpus=n_cpus,
                                             values=values,
                                             close_bins=close_bins)
                    count += 1
                    if verbose:
                        print '%5s  ' % (count), upfreq, lowfreq, maxdist, scale,
                    try:
                        result = tdm.correlate_with_real_data(cutoff=cutoff)[0]
                        if verbose:
                            print result
                        results[w, x, y, z] = result
                    except:
                        print 'ERROR'
    return results, scale_arange, maxdist_arange, upfreq_arange, lowfreq_arange
Exemplo n.º 5
0
def run_batch_job(exp, opts, m, u, l, s, outdir):
    zscores, values, zeros = exp._sub_experiment_zscore(opts.beg, opts.end)
    zeros = tuple([i not in zeros for i in xrange(opts.end - opts.beg + 1)])
    nloci = opts.end - opts.beg + 1
    coords = {"crm": opts.crm, "start": opts.beg, "end": opts.end}

    optpar = {
        'maxdist': float(m),
        'upfreq': float(u),
        'lowfreq': float(l),
        'scale': float(s),
        'kforce': 5
    }

    models = generate_3d_models(zscores,
                                opts.reso,
                                nloci,
                                values=values,
                                n_models=opts.nmodels,
                                n_keep=opts.nkeep,
                                n_cpus=opts.cpus,
                                keep_all=True,
                                start=int(opts.rand),
                                container=None,
                                config=optpar,
                                coords=coords,
                                zeros=zeros)
    # Save models
    muls = tuple(map(my_round, (m, u, l, s)))
    dirname = 'cfg_%s_%s_%s_%s' % muls
    runned = [int(mod['rand_init']) for mod in models]
    if not len(runned):
        raise Exception(("\n\n\nNothing to be done.\n\n"
                         "   All models asked for are already run.\n"
                         "    - ask for more models\n"
                         "    - use higher random initial number\n"
                         "    - go ahead with the analysis!"))
    models.save_models(
        path.join(outdir, dirname,
                  ('models_%s-%s.pick' %
                   (min(runned), max(runned))) if len(runned) > 1 else
                  ('model_%s.pick' % (runned[0]))))
Exemplo n.º 6
0
def to_optimize(params, zscores, resolution, values, n_models, n_keep,
                n_cpus=1):
    upfreq, lowfreq, maxdist = params
    tmp = {'kforce'   : 5,
           'lowrdist' : 100,
           'maxdist'  : maxdist,
           'upfreq'   : upfreq,
           'lowfreq'  : lowfreq}
    tdm = generate_3d_models(zscores, resolution, n_models, n_keep,
                             config=tmp, n_cpus=n_cpus, values=values)
    global COUNT
    COUNT += 1
    print '%5s  ' % (COUNT), params,
    try:
        result = tdm.correlate_with_real_data(cutoff=200)[0]
        print result
        return 1. - result
    except:
        print 'ERROR'
        return 1.0
Exemplo n.º 7
0
 def run_grid_search(self, upfreq_range=(0, 1, 0.1), lowfreq_range=(-1, 0, 0.1),
                 scale_range=[0.01],
                 maxdist_range=(400, 1500, 100), n_cpus=1, verbose=True):
     """
     This function calculates the correlation between the models generated 
     by IMP and the input data for the four main IMP parameters (scale, 
     maxdist, lowfreq and upfreq) in the given ranges of values.
     
     :param n_cpus: number of CPUs to use
     :param (-1,0,0.1) lowfreq_range: range of lowfreq values to be 
        optimized. The last value of the input tuple is the incremental 
        step for the lowfreq values
     :param (0,1,0.1) upfreq_range: range of upfreq values to be optimized.
        The last value of the input tuple is the incremental step for the
        upfreq values
     :param (400,1400,100) maxdist_range: upper and lower bounds
        used to search for the optimal maximum experimental distance. The 
        last value of the input tuple is the incremental step for maxdist 
        values
     :param [0.01] scale_range: upper and lower bounds used to search for 
        the optimal scale parameter (nm per nucleotide). The last value of
        the input tuple is the incremental step for scale parameter values
     :param True verbose: print the results to the standard output
     """
     if type(maxdist_range) == tuple:
         maxdist_step = maxdist_range[2]
         maxdist_arange = range(maxdist_range[0],
                                     maxdist_range[1] + maxdist_step,
                                     maxdist_step)
     else:
         maxdist_arange = maxdist_range
     if type(lowfreq_range) == tuple:
         lowfreq_step = lowfreq_range[2]
         lowfreq_arange = np.arange(lowfreq_range[0],
                                         lowfreq_range[1] + lowfreq_step / 2,
                                         lowfreq_step)
     else:
         lowfreq_arange = lowfreq_range
     if type(upfreq_range) == tuple:
         upfreq_step = upfreq_range[2]
         upfreq_arange = np.arange(upfreq_range[0],
                                        upfreq_range[1] + upfreq_step / 2,
                                        upfreq_step)
     else:
         upfreq_arange = upfreq_range
     if type(scale_range) == tuple:
         scale_step = scale_range[2]
         scale_arange = np.arange(scale_range[0],
                                       scale_range[1] + scale_step / 2,
                                       scale_step)
     else:
         scale_arange = scale_range
         
     count = 0
     for scale in scale_arange:
         if not scale in self.scale_range:
             self.scale_range.append(scale)
         for maxdist in maxdist_arange:
             if not maxdist in self.maxdist_range:
                 self.maxdist_range.append(maxdist)
             for upfreq in upfreq_arange:
                 if not upfreq in self.upfreq_range:
                     self.upfreq_range.append(upfreq)
                 for lowfreq in lowfreq_arange:
                     if not lowfreq in self.lowfreq_range:
                         self.lowfreq_range.append(lowfreq)
                     if (scale, maxdist, upfreq, lowfreq) in self.results:
                         continue
                     tmp = {'kforce'   : 5,
                            'lowrdist' : 100,
                            'maxdist'  : maxdist,
                            'upfreq'   : upfreq,
                            'lowfreq'  : lowfreq,
                            'scale'    : scale}
                     tdm = generate_3d_models(self.zscores, self.resolution,
                                              self.n_models,
                                              self.n_keep, config=tmp,
                                              n_cpus=n_cpus,
                                              values=self.values,
                                              close_bins=self.close_bins)
                     count += 1
                     if verbose:
                         print '%5s  ' % (count),
                         print upfreq, lowfreq, maxdist, scale,
                     try:
                         result = tdm.correlate_with_real_data(
                             cutoff=self.cutoff)[0]
                         if verbose:
                             print result
                         self.results[(my_round(scale),
                                       my_round(maxdist),
                                       my_round(upfreq),
                                       my_round(lowfreq))] = result
                     except Exception, e:
                         print 'ERROR %s' % e
Exemplo n.º 8
0
 def run_grid_search(self, upfreq_range=(0, 1, 0.1), lowfreq_range=(-1, 0, 0.1),
                 scale_range=(0.005, 0.005, 0.001),
                 maxdist_range=(400, 1500, 100), n_cpus=1, verbose=True):
     if type(maxdist_range) == tuple:
         maxdist_step = maxdist_range[2]
         maxdist_arange = range(maxdist_range[0],
                                     maxdist_range[1] + maxdist_step,
                                     maxdist_step)
     else:
         maxdist_arange = maxdist_range
     if type(lowfreq_range) == tuple:
         lowfreq_step = lowfreq_range[2]
         lowfreq_arange = np.arange(lowfreq_range[0],
                                         lowfreq_range[1] + lowfreq_step / 2,
                                         lowfreq_step)
     else:
         lowfreq_arange = lowfreq_range
     if type(upfreq_range) == tuple:
         upfreq_step = upfreq_range[2]
         upfreq_arange = np.arange(upfreq_range[0],
                                        upfreq_range[1] + upfreq_step / 2,
                                        upfreq_step)
     else:
         upfreq_arange = upfreq_range
     if type(scale_range) == tuple:
         scale_step = scale_range[2]
         scale_arange = np.arange(scale_range[0],
                                       scale_range[1] + scale_step / 2,
                                       scale_step)
     else:
         scale_arange = scale_range
         
     count = 0
     for scale in scale_arange:
         if not scale in self.scale_range:
             self.scale_range.append(scale)
         for maxdist in maxdist_arange:
             if not maxdist in self.maxdist_range:
                 self.maxdist_range.append(maxdist)
             for upfreq in upfreq_arange:
                 if not upfreq in self.upfreq_range:
                     self.upfreq_range.append(upfreq)
                 for lowfreq in lowfreq_arange:
                     if not lowfreq in self.lowfreq_range:
                         self.lowfreq_range.append(lowfreq)
                     if (scale, maxdist, upfreq, lowfreq) in self.results:
                         continue
                     tmp = {'kforce'   : 5,
                            'lowrdist' : 100,
                            'maxdist'  : maxdist,
                            'upfreq'   : upfreq,
                            'lowfreq'  : lowfreq,
                            'scale'    : scale}
                     tdm = generate_3d_models(self.zscores, self.resolution,
                                              self.n_models,
                                              self.n_keep, config=tmp,
                                              n_cpus=n_cpus,
                                              values=self.values,
                                              close_bins=self.close_bins)
                     count += 1
                     if verbose:
                         print '%5s  ' % (count),
                         print upfreq, lowfreq, maxdist, scale,
                     try:
                         result = tdm.correlate_with_real_data(
                             cutoff=self.cutoff)[0]
                         if verbose:
                             print result
                         self.results[(scale, maxdist, upfreq, lowfreq)] = result
                     except:
                         print 'ERROR'
                         
     self.scale_range.sort()
     self.maxdist_range.sort()
     self.lowfreq_range.sort()
     self.upfreq_range.sort()
Exemplo n.º 9
0
    def run_grid_search(self,
                        upfreq_range=(0, 1, 0.1),
                        lowfreq_range=(-1, 0, 0.1),
                        scale_range=(0.005, 0.005, 0.001),
                        maxdist_range=(400, 1500, 100),
                        n_cpus=1,
                        verbose=True):
        if type(maxdist_range) == tuple:
            maxdist_step = maxdist_range[2]
            maxdist_arange = range(maxdist_range[0],
                                   maxdist_range[1] + maxdist_step,
                                   maxdist_step)
        else:
            maxdist_arange = maxdist_range
        if type(lowfreq_range) == tuple:
            lowfreq_step = lowfreq_range[2]
            lowfreq_arange = np.arange(lowfreq_range[0],
                                       lowfreq_range[1] + lowfreq_step / 2,
                                       lowfreq_step)
        else:
            lowfreq_arange = lowfreq_range
        if type(upfreq_range) == tuple:
            upfreq_step = upfreq_range[2]
            upfreq_arange = np.arange(upfreq_range[0],
                                      upfreq_range[1] + upfreq_step / 2,
                                      upfreq_step)
        else:
            upfreq_arange = upfreq_range
        if type(scale_range) == tuple:
            scale_step = scale_range[2]
            scale_arange = np.arange(scale_range[0],
                                     scale_range[1] + scale_step / 2,
                                     scale_step)
        else:
            scale_arange = scale_range

        count = 0
        for scale in scale_arange:
            if not scale in self.scale_range:
                self.scale_range.append(scale)
            for maxdist in maxdist_arange:
                if not maxdist in self.maxdist_range:
                    self.maxdist_range.append(maxdist)
                for upfreq in upfreq_arange:
                    if not upfreq in self.upfreq_range:
                        self.upfreq_range.append(upfreq)
                    for lowfreq in lowfreq_arange:
                        if not lowfreq in self.lowfreq_range:
                            self.lowfreq_range.append(lowfreq)
                        if (scale, maxdist, upfreq, lowfreq) in self.results:
                            continue
                        tmp = {
                            'kforce': 5,
                            'lowrdist': 100,
                            'maxdist': maxdist,
                            'upfreq': upfreq,
                            'lowfreq': lowfreq,
                            'scale': scale
                        }
                        tdm = generate_3d_models(self.zscores,
                                                 self.resolution,
                                                 self.n_models,
                                                 self.n_keep,
                                                 config=tmp,
                                                 n_cpus=n_cpus,
                                                 values=self.values,
                                                 close_bins=self.close_bins)
                        count += 1
                        if verbose:
                            print '%5s  ' % (count),
                            print upfreq, lowfreq, maxdist, scale,
                        try:
                            result = tdm.correlate_with_real_data(
                                cutoff=self.cutoff)[0]
                            if verbose:
                                print result
                            self.results[(scale, maxdist, upfreq,
                                          lowfreq)] = result
                        except:
                            print 'ERROR'

        self.scale_range.sort()
        self.maxdist_range.sort()
        self.lowfreq_range.sort()
        self.upfreq_range.sort()
Exemplo n.º 10
0
def grid_search(zscores=None,
                upfreq_range=(0, 1, 0.1),
                lowfreq_range=(-1, 0, 0.1),
                scale_range=(0.005, 0.005, 0.001),
                maxdist_range=(400, 1500, 100),
                resolution=None,
                values=None,
                n_models=500,
                cutoff=300,
                n_keep=100,
                n_cpus=1,
                close_bins=1,
                verbose=True):
    count = 0
    if type(maxdist_range) == tuple:
        maxdist_step = maxdist_range[2]
        maxdist_arange = range(maxdist_range[0],
                               maxdist_range[1] + maxdist_step, maxdist_step)
    else:
        maxdist_arange = maxdist_range
    if type(lowfreq_range) == tuple:
        lowfreq_step = lowfreq_range[2]
        lowfreq_arange = np.arange(lowfreq_range[0],
                                   lowfreq_range[1] + lowfreq_step / 2,
                                   lowfreq_step)
    else:
        lowfreq_arange = lowfreq_range
    if type(upfreq_range) == tuple:
        upfreq_step = upfreq_range[2]
        upfreq_arange = np.arange(upfreq_range[0],
                                  upfreq_range[1] + upfreq_step / 2,
                                  upfreq_step)
    else:
        upfreq_arange = upfreq_range
    if type(scale_range) == tuple:
        scale_step = scale_range[2]
        scale_arange = np.arange(scale_range[0],
                                 scale_range[1] + scale_step / 2, scale_step)
    else:
        scale_arange = scale_range
    results = np.empty((len(scale_arange), len(maxdist_arange),
                        len(upfreq_arange), len(lowfreq_arange)))
    for w, scale in enumerate(scale_arange):
        for x, maxdist in enumerate(maxdist_arange):
            for y, upfreq in enumerate(upfreq_arange):
                for z, lowfreq in enumerate(lowfreq_arange):
                    tmp = {
                        'kforce': 5,
                        'lowrdist': 100,
                        'maxdist': maxdist,
                        'upfreq': upfreq,
                        'lowfreq': lowfreq,
                        'scale': scale
                    }
                    tdm = generate_3d_models(zscores,
                                             resolution,
                                             n_models,
                                             n_keep,
                                             config=tmp,
                                             n_cpus=n_cpus,
                                             values=values,
                                             close_bins=close_bins)
                    count += 1
                    if verbose:
                        print '%5s  ' % (
                            count), upfreq, lowfreq, maxdist, scale,
                    try:
                        result = tdm.correlate_with_real_data(cutoff=cutoff)[0]
                        if verbose:
                            print result
                        results[w, x, y, z] = result
                    except:
                        print 'ERROR'
    return results, scale_arange, maxdist_arange, upfreq_arange, lowfreq_arange
Exemplo n.º 11
0
    def run_grid_search(self,
                        upfreq_range=(0, 1, 0.1),
                        lowfreq_range=(-1, 0, 0.1),
                        scale_range=[0.01],
                        maxdist_range=(400, 1500, 100),
                        n_cpus=1,
                        verbose=True):
        """
        This function calculates the correlation between the models generated 
        by IMP and the input data for the four main IMP parameters (scale, 
        maxdist, lowfreq and upfreq) in the given ranges of values.
        
        :param n_cpus: number of CPUs to use
        :param (-1,0,0.1) lowfreq_range: range of lowfreq values to be 
           optimized. The last value of the input tuple is the incremental 
           step for the lowfreq values
        :param (0,1,0.1) upfreq_range: range of upfreq values to be optimized.
           The last value of the input tuple is the incremental step for the
           upfreq values
        :param (400,1400,100) maxdist_range: upper and lower bounds
           used to search for the optimal maximum experimental distance. The 
           last value of the input tuple is the incremental step for maxdist 
           values
        :param [0.01] scale_range: upper and lower bounds used to search for 
           the optimal scale parameter (nm per nucleotide). The last value of
           the input tuple is the incremental step for scale parameter values
        :param True verbose: print the results to the standard output
        """
        if type(maxdist_range) == tuple:
            maxdist_step = maxdist_range[2]
            maxdist_arange = range(maxdist_range[0],
                                   maxdist_range[1] + maxdist_step,
                                   maxdist_step)
        else:
            maxdist_arange = maxdist_range
        if type(lowfreq_range) == tuple:
            lowfreq_step = lowfreq_range[2]
            lowfreq_arange = np.arange(lowfreq_range[0],
                                       lowfreq_range[1] + lowfreq_step / 2,
                                       lowfreq_step)
        else:
            lowfreq_arange = lowfreq_range
        if type(upfreq_range) == tuple:
            upfreq_step = upfreq_range[2]
            upfreq_arange = np.arange(upfreq_range[0],
                                      upfreq_range[1] + upfreq_step / 2,
                                      upfreq_step)
        else:
            upfreq_arange = upfreq_range
        if type(scale_range) == tuple:
            scale_step = scale_range[2]
            scale_arange = np.arange(scale_range[0],
                                     scale_range[1] + scale_step / 2,
                                     scale_step)
        else:
            scale_arange = scale_range

        count = 0
        for scale in scale_arange:
            if not scale in self.scale_range:
                self.scale_range.append(scale)
            for maxdist in maxdist_arange:
                if not maxdist in self.maxdist_range:
                    self.maxdist_range.append(maxdist)
                for upfreq in upfreq_arange:
                    if not upfreq in self.upfreq_range:
                        self.upfreq_range.append(upfreq)
                    for lowfreq in lowfreq_arange:
                        if not lowfreq in self.lowfreq_range:
                            self.lowfreq_range.append(lowfreq)
                        if (scale, maxdist, upfreq, lowfreq) in self.results:
                            continue
                        tmp = {
                            'kforce': 5,
                            'lowrdist': 100,
                            'maxdist': maxdist,
                            'upfreq': upfreq,
                            'lowfreq': lowfreq,
                            'scale': scale
                        }
                        tdm = generate_3d_models(self.zscores,
                                                 self.resolution,
                                                 self.n_models,
                                                 self.n_keep,
                                                 config=tmp,
                                                 n_cpus=n_cpus,
                                                 values=self.values,
                                                 close_bins=self.close_bins)
                        count += 1
                        if verbose:
                            print '%5s  ' % (count),
                            print upfreq, lowfreq, maxdist, scale,
                        try:
                            result = tdm.correlate_with_real_data(
                                cutoff=self.cutoff)[0]
                            if verbose:
                                print result
                            self.results[(my_round(scale), my_round(maxdist),
                                          my_round(upfreq),
                                          my_round(lowfreq))] = result
                        except Exception, e:
                            print 'ERROR %s' % e
Exemplo n.º 12
0
    def run_grid_search(self, 
                        upfreq_range=(0, 1, 0.1),
                        lowfreq_range=(-1, 0, 0.1),
                        maxdist_range=(400, 1500, 100),
                        scale_range=0.01,
                        dcutoff_range=2,
                        corr='spearman', off_diag=1,
                        savedata=None, n_cpus=1, verbose=True):
        """
        This function calculates the correlation between the models generated 
        by IMP and the input data for the four main IMP parameters (scale, 
        maxdist, lowfreq and upfreq) in the given ranges of values.
        
        :param n_cpus: number of CPUs to use
        :param (-1,0,0.1) lowfreq_range: range of lowfreq values to be 
           optimized. The last value of the input tuple is the incremental 
           step for the lowfreq values
        :param (0,1,0.1) upfreq_range: range of upfreq values to be optimized.
           The last value of the input tuple is the incremental step for the
           upfreq values
        :param (400,1400,100) maxdist_range: upper and lower bounds
           used to search for the optimal maximum experimental distance. The 
           last value of the input tuple is the incremental step for maxdist 
           values
        :param 0.01 scale_range: upper and lower bounds used to search for
           the optimal scale parameter (nm per nucleotide). The last value of
           the input tuple is the incremental step for scale parameter values
        :param 2 dcutoff_range: upper and lower bounds used to search for
           the optimal distance cutoff parameter (distance, in number of beads,
           from which to consider 2 beads as being close). The last value of the
           input tuple is the incremental step for scale parameter values
        :param None savedata: concatenate all generated models into a dictionary
           and save it into a file named by this argument
        :param True verbose: print the results to the standard output
        """
        if verbose:
            stderr.write('Optimizing %s particles\n' % self.nloci)
        if isinstance(maxdist_range, tuple):
            maxdist_step = maxdist_range[2]
            maxdist_arange = range(maxdist_range[0],
                                        maxdist_range[1] + maxdist_step,
                                        maxdist_step)
        else:
            if isinstance(maxdist_range, (float, int)):
                maxdist_range = [maxdist_range]
            maxdist_arange = maxdist_range
        #
        if isinstance(lowfreq_range, tuple):
            lowfreq_step = lowfreq_range[2]
            lowfreq_arange = np.arange(lowfreq_range[0],
                                            lowfreq_range[1] + lowfreq_step / 2,
                                            lowfreq_step)
        else:
            if isinstance(lowfreq_range, (float, int)):
                lowfreq_range = [lowfreq_range]
            lowfreq_arange = lowfreq_range
        #
        if isinstance(upfreq_range, tuple):
            upfreq_step = upfreq_range[2]
            upfreq_arange = np.arange(upfreq_range[0],
                                           upfreq_range[1] + upfreq_step / 2,
                                           upfreq_step)
        else:
            if isinstance(upfreq_range, (float, int)):
                upfreq_range = [upfreq_range]
            upfreq_arange = upfreq_range
        #
        if isinstance(scale_range, tuple):
            scale_step = scale_range[2]
            scale_arange = np.arange(scale_range[0],
                                          scale_range[1] + scale_step / 2,
                                          scale_step)
        else:
            if isinstance(scale_range, (float, int)):
                scale_range = [scale_range]
            scale_arange = scale_range
        #
        if isinstance(dcutoff_range, tuple):
            dcutoff_step = dcutoff_range[2]
            dcutoff_arange = np.arange(dcutoff_range[0],
                                          dcutoff_range[1] + dcutoff_step / 2,
                                          dcutoff_step)
        else:
            if isinstance(dcutoff_range, (float, int)):
                dcutoff_range = [dcutoff_range]
            dcutoff_arange = dcutoff_range

        # round everything
        if not self.maxdist_range:
            self.maxdist_range = [my_round(i) for i in maxdist_arange]
        else:
            self.maxdist_range = sorted([my_round(i) for i in maxdist_arange
                                         if not my_round(i) in self.maxdist_range] +
                                        self.maxdist_range)
        if not self.upfreq_range:
            self.upfreq_range  = [my_round(i) for i in upfreq_arange ]
        else:
            self.upfreq_range = sorted([my_round(i) for i in upfreq_arange
                                        if not my_round(i) in self.upfreq_range] +
                                       self.upfreq_range)
        if not self.lowfreq_range:
            self.lowfreq_range = [my_round(i) for i in lowfreq_arange]
        else:
            self.lowfreq_range = sorted([my_round(i) for i in lowfreq_arange
                                         if not my_round(i) in self.lowfreq_range] +
                                        self.lowfreq_range)
        if not self.scale_range:
            self.scale_range   = [my_round(i) for i in scale_arange  ]
        else:
            self.scale_range = sorted([my_round(i) for i in scale_arange
                                       if not my_round(i) in self.scale_range] +
                                      self.scale_range)
        if not self.dcutoff_range:
            self.dcutoff_range = [my_round(i) for i in dcutoff_arange]
        else:
            self.dcutoff_range = sorted([my_round(i) for i in dcutoff_arange
                                         if not my_round(i) in self.dcutoff_range] +
                                        self.dcutoff_range)
        # grid search
        models = {}
        count = 0
        if verbose:
            stderr.write('# %3s %6s %7s %7s %6s %7s %7s\n' % (
                                    "num", "upfrq", "lowfrq", "maxdist",
                                    "scale", "cutoff", "corr"))
        for scale in [my_round(i) for i in scale_arange]:
            for maxdist in [my_round(i) for i in maxdist_arange]:
                for upfreq in [my_round(i) for i in upfreq_arange]:
                    for lowfreq in [my_round(i) for i in lowfreq_arange]:
                        # check if this optimization has been already done
                        if (scale, maxdist, upfreq, lowfreq) in [
                            tuple(k[:4]) for k in self.results]:
                            k = [k for k in self.results
                                 if (scale, maxdist, upfreq,
                                     lowfreq) == tuple(k[:4])][0]
                            result = self.results[(scale, maxdist, upfreq,
                                                   lowfreq, k[-1])]
                            if verbose:
                                verb = '%5s %6s %7s %7s %6s %7s  ' % (
                                    'xx', upfreq, lowfreq, maxdist,
                                    scale, k[-1])
                                if verbose == 2:
                                    stderr.write(verb + str(round(result, 4))
                                                 + '\n')
                                else:
                                    print verb + str(round(result, 4))
                            continue
                        tmp = {'kforce'   : 5,
                               'lowrdist' : 100,
                               'maxdist'  : int(maxdist),
                               'upfreq'   : float(upfreq),
                               'lowfreq'  : float(lowfreq),
                               'scale'    : float(scale)}
                        try:
                            count += 1
                            tdm = generate_3d_models(
                                self.zscores, self.resolution,
                                self.nloci, n_models=self.n_models,
                                n_keep=self.n_keep, config=tmp,
                                n_cpus=n_cpus, first=0,
                                values=self.values, container=self.container,
                                close_bins=self.close_bins, zeros=self.zeros)
                            result = 0
                            cutoff = my_round(dcutoff_arange[0])
                            for cut in [i for i in dcutoff_arange]:
                                sub_result = tdm.correlate_with_real_data(
                                    cutoff=(int(cut * self.resolution *
                                                float(scale))),
                                    corr=corr,
                                    off_diag=off_diag)[0]
                                if result < sub_result:
                                    result = sub_result
                                    cutoff = my_round(cut)
                        except Exception, e:
                            print '  SKIPPING: %s' % e
                            result = 0
                            cutoff = my_round(dcutoff_arange[0])
                        if verbose:
                            verb = '%5s %6s %7s %7s %6s %7s  ' % (
                                count, upfreq, lowfreq, maxdist,
                                scale, cutoff)
                            if verbose == 2:
                                stderr.write(verb + str(round(result, 4))
                                             + '\n')
                            else:
                                print verb + str(round(result, 4))
                        # store
                        self.results[(scale, maxdist,
                                      upfreq, lowfreq, cutoff)] = result
                        if savedata and result:
                            models[(scale, maxdist, upfreq, lowfreq, cutoff)
                                   ] = tdm._reduce_models(minimal=True)
Exemplo n.º 13
0
    def model_region(self, start, end, n_models=5000, n_keep=1000, n_cpus=1,
                     verbose=0, keep_all=False, close_bins=1, outfile=None,
                     config=CONFIG['dmel_01']):
        """

        :param start:  first bin to model (bin number)
        :param end: last bin to model (bin number)
        :param 5000 n_models: number of modes to generate
        :param 1000 n_keep: number of models used in the final analysis 
           (usually the top 20% of the generated models). The models are ranked
           according to their objective function value (the lower the better)
        :param False keep_all: whether or not to keep the discarded models (if
           True, models will be stored under tructuralModels.bad_models)
        :param 1 close_bins: number of particles away (i.e. the bin number
           difference) a particle pair must be in order to be considered as
           neighbors (e.g. 1 means consecutive particles)
        :param n_cpus: number of CPUs to use
        :param 0 verbose: the information printed can be: nothing (0), the
           objective function value the selected models (1), the objective
           function value of all the models (2), all the modeling 
           information (3)
        :param CONFIG['dmel_01'] a dictionary containing the standard
           parameters used to generate the models. The dictionary should
           contain the keys kforce, maxdist, upfreq and lowfreq.
           Examples can be seen by doing:
           
           ::
           
             from pytadbit.imp.CONFIG import CONFIG

           where CONFIG is a dictionarry of dictionnaries to be passed to this
           function:
           
           :::
           
             CONFIG = {
              'dmel_01': {
                  # use these paramaters with the Hi-C data from:
                  'reference' : 'victor corces dataset 2013',
             
                  # Force applied to the restraints inferred to neighbor particles
                  'kforce'    : 5,
             
                  # Maximum experimental contact distance
                  'maxdist'   : 600, # OPTIMIZATION: 500-1200
             
                  # Minimum and maximum thresholds used to decide which experimental values have to be
                  # included in the computation of restraints. Z-score values bigger than upfreq
                  # and less that lowfreq will be include, whereas all the others will be rejected
                  'upfreq'    : 0.3, # OPTIMIZATION: min/max Z-score
             
                  'lowfreq'   : -0.7 # OPTIMIZATION: min/max Z-score
             
                  }
              }

        """
        if self._normalization != 'visibility':
            warn('WARNING: normalizing according to visibility method')
            self.normalize_hic(method='visibility')
        zscores, values = self._sub_experiment_zscore(start, end)
        return generate_3d_models(zscores, self.resolution, values=values,
                                  n_models=n_models, outfile=outfile,
                                  n_keep=n_keep, n_cpus=n_cpus, verbose=verbose,
                                  keep_all=keep_all, close_bins=close_bins,
                                  config=config)
Exemplo n.º 14
0
    def model_region(self,
                     start,
                     end,
                     n_models=5000,
                     n_keep=1000,
                     n_cpus=1,
                     verbose=0,
                     keep_all=False,
                     close_bins=1,
                     outfile=None,
                     config=CONFIG['dmel_01']):
        """

        :param start:  first bin to model (bin number)
        :param end: last bin to model (bin number)
        :param 5000 n_models: number of modes to generate
        :param 1000 n_keep: number of models used in the final analysis 
           (usually the top 20% of the generated models). The models are ranked
           according to their objective function value (the lower the better)
        :param False keep_all: whether or not to keep the discarded models (if
           True, models will be stored under tructuralModels.bad_models)
        :param 1 close_bins: number of particles away (i.e. the bin number
           difference) a particle pair must be in order to be considered as
           neighbors (e.g. 1 means consecutive particles)
        :param n_cpus: number of CPUs to use
        :param 0 verbose: the information printed can be: nothing (0), the
           objective function value the selected models (1), the objective
           function value of all the models (2), all the modeling 
           information (3)
        :param CONFIG['dmel_01'] a dictionary containing the standard
           parameters used to generate the models. The dictionary should
           contain the keys kforce, maxdist, upfreq and lowfreq.
           Examples can be seen by doing:
           
           ::
           
             from pytadbit.imp.CONFIG import CONFIG

           where CONFIG is a dictionarry of dictionnaries to be passed to this
           function:
           
           :::
           
             CONFIG = {
              'dmel_01': {
                  # use these paramaters with the Hi-C data from:
                  'reference' : 'victor corces dataset 2013',
             
                  # Force applied to the restraints inferred to neighbor particles
                  'kforce'    : 5,
             
                  # Maximum experimental contact distance
                  'maxdist'   : 600, # OPTIMIZATION: 500-1200
             
                  # Minimum and maximum thresholds used to decide which experimental values have to be
                  # included in the computation of restraints. Z-score values bigger than upfreq
                  # and less that lowfreq will be include, whereas all the others will be rejected
                  'upfreq'    : 0.3, # OPTIMIZATION: min/max Z-score
             
                  'lowfreq'   : -0.7 # OPTIMIZATION: min/max Z-score
             
                  }
              }

        """
        if self._normalization != 'visibility':
            warn('WARNING: normalizing according to visibility method')
            self.normalize_hic(method='visibility')
        zscores, values = self._sub_experiment_zscore(start, end)
        return generate_3d_models(zscores,
                                  self.resolution,
                                  values=values,
                                  n_models=n_models,
                                  outfile=outfile,
                                  n_keep=n_keep,
                                  n_cpus=n_cpus,
                                  verbose=verbose,
                                  keep_all=keep_all,
                                  close_bins=close_bins,
                                  config=config)
Exemplo n.º 15
0
    def run_grid_search(self, 
                        upfreq_range=(0, 1, 0.1),
                        lowfreq_range=(-1, 0, 0.1),
                        maxdist_range=(400, 1500, 100),
                        scale_range=0.01,
                        corr='spearman', off_diag=1,
                        savedata=None, n_cpus=1, verbose=True):
        """
        This function calculates the correlation between the models generated 
        by IMP and the input data for the four main IMP parameters (scale, 
        maxdist, lowfreq and upfreq) in the given ranges of values.
        
        :param n_cpus: number of CPUs to use
        :param (-1,0,0.1) lowfreq_range: range of lowfreq values to be 
           optimized. The last value of the input tuple is the incremental 
           step for the lowfreq values
        :param (0,1,0.1) upfreq_range: range of upfreq values to be optimized.
           The last value of the input tuple is the incremental step for the
           upfreq values
        :param (400,1400,100) maxdist_range: upper and lower bounds
           used to search for the optimal maximum experimental distance. The 
           last value of the input tuple is the incremental step for maxdist 
           values
        :param 0.01 scale_range: upper and lower bounds used to search for
           the optimal scale parameter (nm per nucleotide). The last value of
           the input tuple is the incremental step for scale parameter values
        :param True verbose: print the results to the standard output
        """
        if type(maxdist_range) == tuple:
            maxdist_step = maxdist_range[2]
            maxdist_arange = range(maxdist_range[0],
                                        maxdist_range[1] + maxdist_step,
                                        maxdist_step)
        else:
            if type(maxdist_range) in (float, int):
                maxdist_range = [maxdist_range]
            maxdist_arange = maxdist_range
        if type(lowfreq_range) == tuple:
            lowfreq_step = lowfreq_range[2]
            lowfreq_arange = np.arange(lowfreq_range[0],
                                            lowfreq_range[1] + lowfreq_step / 2,
                                            lowfreq_step)
        else:
            if type(lowfreq_range) in (float, int):
                lowfreq_range = [lowfreq_range]
            lowfreq_arange = lowfreq_range
        if type(upfreq_range) == tuple:
            upfreq_step = upfreq_range[2]
            upfreq_arange = np.arange(upfreq_range[0],
                                           upfreq_range[1] + upfreq_step / 2,
                                           upfreq_step)
        else:
            if type(upfreq_range) in (float, int):
                upfreq_range = [upfreq_range]
            upfreq_arange = upfreq_range
        if type(scale_range) == tuple:
            scale_step = scale_range[2]
            scale_arange = np.arange(scale_range[0],
                                          scale_range[1] + scale_step / 2,
                                          scale_step)
        else:
            if type(scale_range) in (float, int):
                scale_range = [scale_range]
            scale_arange = scale_range

        # round everything
        if not self.maxdist_range:
            self.maxdist_range = [my_round(i) for i in maxdist_arange]
        else:
            self.maxdist_range = sorted([my_round(i) for i in maxdist_arange
                                         if not my_round(i) in self.maxdist_range] +
                                        self.maxdist_range)
        if not self.upfreq_range:
            self.upfreq_range  = [my_round(i) for i in upfreq_arange ]
        else:
            self.upfreq_range = sorted([my_round(i) for i in upfreq_arange
                                        if not my_round(i) in self.upfreq_range] +
                                       self.upfreq_range)
        if not self.lowfreq_range:
            self.lowfreq_range = [my_round(i) for i in lowfreq_arange]
        else:
            self.lowfreq_range = sorted([my_round(i) for i in lowfreq_arange
                                         if not my_round(i) in self.lowfreq_range] +
                                        self.lowfreq_range)
        if not self.scale_range:
            self.scale_range   = [my_round(i) for i in scale_arange  ]
        else:
            self.scale_range = sorted([my_round(i) for i in scale_arange
                                       if not my_round(i) in self.scale_range] +
                                      self.scale_range)
        
        # grid search
        models = {}
        count = 0
        for scale in [my_round(i) for i in scale_arange]:
            for maxdist in [my_round(i) for i in maxdist_arange]:
                for upfreq in [my_round(i) for i in upfreq_arange]:
                    for lowfreq in [my_round(i) for i in lowfreq_arange]:
                        if (scale, maxdist, upfreq, lowfreq) in self.results:
                            continue
                        if not self.cutoff:
                            cutoff = int(2 * self.resolution * float(scale))
                        else:
                            cutoff = self.cutoff
                        tmp = {'kforce'   : 5,
                               'lowrdist' : 100,
                               'maxdist'  : int(maxdist),
                               'upfreq'   : float(upfreq),
                               'lowfreq'  : float(lowfreq),
                               'scale'    : float(scale)}
                        tdm = generate_3d_models(self.zscores, self.resolution,
                                                 self.nloci, self.n_models,
                                                 self.n_keep, config=tmp,
                                                 n_cpus=n_cpus,
                                                 values=self.values,
                                                 close_bins=self.close_bins)
                        count += 1
                        if verbose:
                            verb = '%5s  %s %s %s %s ' % (
                                count, upfreq, lowfreq, maxdist, scale)
                        try:
                            result = tdm.correlate_with_real_data(
                                cutoff=cutoff, corr=corr,
                                off_diag=off_diag)[0]
                            if verbose:
                                if verbose == 2:
                                    stderr.write(verb + str(result) + '\n')
                                else:
                                    print verb + str(result)
                        except Exception, e:
                            print 'ERROR %s' % e
                            continue
                        # store
                        self.results[(scale, maxdist, upfreq, lowfreq)] = result
                        if savedata:
                            models[(scale, maxdist, upfreq, lowfreq)
                                   ] = tdm._reduce_models(minimal=True)