Пример #1
0
    def test_14_3d_clustering(self):
        """
        """
        if ONLY and "14" not in ONLY:
            return
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels("models.pick")
        if find_executable("mcl"):
            models.cluster_models(method="mcl", fact=0.9, verbose=False, dcutoff=200)
            self.assertTrue(5 <= len(models.clusters.keys()) <= 7)
        models.cluster_models(method="ward", verbose=False, dcutoff=200)
        self.assertTrue(2 <= len(models.clusters.keys()) <= 3)
        d = models.cluster_analysis_dendrogram()
        self.assertEqual(d["icoord"], [[5.0, 5.0, 15.0, 15.0]])
        # align models
        m1, m2 = models.align_models(models=[1, 2])
        nrmsd = sum(
            [
                ((m1[0][i] - m2[0][i]) ** 2 + (m1[1][i] - m2[1][i]) ** 2 + (m1[2][i] - m2[2][i]) ** 2) ** 0.5
                for i in xrange(len(m1[0]))
            ]
        ) / (len(m1[0]))
        self.assertTrue(nrmsd < 160)
        # fetching models
        models.define_best_models(5)
        m = models.fetch_model_by_rand_init("1", all_models=True)
        self.assertEqual(m, 8)
        models.define_best_models(25)
        m = models.fetch_model_by_rand_init("1", all_models=False)
        self.assertEqual(m, 8)
        if CHKTIME:
            print "14", time() - t0
Пример #2
0
    def test_16_models_stats(self):
        if ONLY and "16" not in ONLY:
            return
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels("models.pick")
        # write cmm
        models.write_cmm(".", model_num=2)
        model = load_impmodel_from_cmm("model.%s.cmm" % models[2]["rand_init"])
        # clean
        system("rm -f model.*")
        # stats
        self.assertEqual(200, round(model.distance(2, 3), 0))
        self.assertTrue(9 <= round(model.distance(8, 20) / 100, 0) <= 10)
        self.assertEqual(round(30, 0), round(model.radius_of_gyration() / 20, 0))
        self.assertEqual(400, round(model.contour() / 10, 0))
        self.assertTrue(21 <= round((model.shortest_axe() + model.longest_axe()) / 100, 0) <= 22)
        self.assertEqual([15, 16], model.inaccessible_particles(1000))

        acc, num, acc_area, tot_area, bypt = model.accessible_surface(150, superradius=200, nump=150)
        self.assertTrue(210 <= acc <= 240)
        self.assertTrue(500 <= num <= 600)
        self.assertEqual(0.4, round(acc_area, 1))
        self.assertEqual(4, round(tot_area, 0))
        self.assertEqual(101, len(bypt))
        self.assertTrue(19 <= bypt[100][0] <= 22 and 8 <= bypt[100][1] <= 38 and 8 <= bypt[100][2] <= 23)
        if CHKTIME:
            print "16", time() - t0
Пример #3
0
    def test_16_models_stats(self):
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick')
        # write cmm
        models.write_cmm('.', model_num=2)
        model = load_impmodel_from_cmm('model.%s.cmm' % models[2]['rand_init'])
        # clean
        system('rm -f model.*')
        # stats
        self.assertEqual(200, round(model.distance(2, 3), 0))
        self.assertTrue(9 <= round(model.distance(8, 20) / 100, 0) <= 10)
        self.assertEqual(round(30, 0), round(model.radius_of_gyration() / 20,
                                             0))
        self.assertEqual(400, round(model.contour() / 10, 0))
        self.assertTrue(
            21 <= round((model.shortest_axe() + model.longest_axe()) /
                        100, 0) <= 22)
        self.assertEqual([15, 16], model.inaccessible_particles(1000))

        acc, num, acc_area, tot_area, bypt = model.accessible_surface(
            150, superradius=200, nump=150)
        self.assertTrue(210 <= acc <= 240)
        self.assertTrue(500 <= num <= 600)
        self.assertEqual(0.4, round(acc_area, 1))
        self.assertEqual(4, round(tot_area, 0))
        self.assertEqual(101, len(bypt))
        self.assertTrue(19 <= bypt[100][0] <= 22 and 8 <= bypt[100][1] <= 38
                        and 8 <= bypt[100][2] <= 23)
        if CHKTIME:
            print '16', time() - t0
Пример #4
0
    def test_14_3d_clustering(self):
        """
        """
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick')
        if find_executable('mcl'):
            models.cluster_models(method='mcl',
                                  fact=0.9,
                                  verbose=False,
                                  dcutoff=200)
            self.assertTrue(5 <= len(models.clusters.keys()) <= 7)
        models.cluster_models(method='ward', verbose=False, dcutoff=200)
        self.assertTrue(2 <= len(models.clusters.keys()) <= 3)
        d = models.cluster_analysis_dendrogram()
        self.assertEqual(d['icoord'], [[5., 5., 15., 15.]])
        # align models
        m1, m2 = models.align_models(models=[1, 2])
        nrmsd = (sum([((m1[0][i] - m2[0][i])**2 + (m1[1][i] - m2[1][i])**2 +
                       (m1[2][i] - m2[2][i])**2)**.5
                      for i in xrange(len(m1[0]))]) / (len(m1[0])))
        self.assertTrue(nrmsd < 150)
        # fetching models
        models.define_best_models(5)
        m = models.fetch_model_by_rand_init('1', all_models=True)
        self.assertEqual(m, 2)
        models.define_best_models(25)
        m = models.fetch_model_by_rand_init('1', all_models=False)
        self.assertEqual(m, 2)
        if CHKTIME:
            print '14', time() - t0
Пример #5
0
    def test_14_3d_clustering(self):
        """
        """
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick')
        if find_executable('mcl'):
            models.cluster_models(method='mcl', fact=0.9, verbose=False,
                                  dcutoff=200)
            self.assertTrue(5 <= len(models.clusters.keys()) <= 7)
        models.cluster_models(method='ward', verbose=False, dcutoff=200)
        self.assertTrue(2 <= len(models.clusters.keys()) <= 3)
        d = models.cluster_analysis_dendrogram()
        self.assertEqual(d['icoord'], [[5., 5., 15., 15.]])
        # align models
        m1, m2 = models.align_models(models=[1,2])
        nrmsd = (sum([((m1[0][i] - m2[0][i])**2 + (m1[1][i] - m2[1][i])**2 + (m1[2][i] - m2[2][i])**2)**.5
                      for i in xrange(len(m1[0]))]) / (len(m1[0])))
        self.assertTrue(nrmsd < 160)
        # fetching models
        models.define_best_models(5)
        m = models.fetch_model_by_rand_init('1', all_models=True)
        self.assertEqual(m, 8)
        models.define_best_models(25)
        m = models.fetch_model_by_rand_init('1', all_models=False)
        self.assertEqual(m, 8)
        if CHKTIME:
            print '14', time() - t0
Пример #6
0
    def test_16_models_stats(self):
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick') 
        # write cmm
        models.write_cmm('.', model_num=2)
        model = load_impmodel_from_cmm('model.%s.cmm' % models[2]['rand_init'])
        # clean
        system('rm -f model.*')
        # stats
        self.assertEqual(200, round(model.distance(2, 3), 0))
        self.assertEqual(11, round(model.distance(8, 20)/100, 0))
        self.assertEqual(round(593, 0),
                         round(model.radius_of_gyration(), 0))
        self.assertEqual(400, round(model.contour()/10, 0))
        self.assertEqual(21,
                         round((model.shortest_axe()+model.longest_axe())/100, 0))
        self.assertEqual([11, 16], model.inaccessible_particles(1000))

        acc, num, acc_area, tot_area, bypt = model.accessible_surface(
            150, superradius=200, nump=150)
        self.assertEqual(214, acc)
        self.assertEqual(502, num)
        self.assertEqual(0.4, round(acc_area, 1))
        self.assertEqual(4, round(tot_area, 0))
        self.assertEqual(101, len(bypt))
        self.assertEqual(bypt[100], (21, 11, 8))
        if CHKTIME:
            print '16', time() - t0
Пример #7
0
    def test_14_3d_clustering(self):
        """
        """
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick')
        if find_executable('mcl'):
            models.cluster_models(method='mcl', fact=0.9, verbose=False,
                                  dcutoff=200)
            self.assertTrue(2 <= len(models.clusters.keys()) <= 3)
        models.cluster_models(method='ward', verbose=False, dcutoff=200)
        self.assertTrue(2 <= len(models.clusters.keys()) <= 3)
        if CHKTIME:
            print '14', time() - t0
Пример #8
0
    def test_14_3d_clustering(self):
        """
        """
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick')
        if find_executable('mcl'):
            models.cluster_models(method='mcl', fact=0.9, verbose=False,
                                  dcutoff=200)
            self.assertTrue(2 <= len(models.clusters.keys()) <= 3)
        models.cluster_models(method='ward', verbose=False, dcutoff=200)
        self.assertTrue(2 <= len(models.clusters.keys()) <= 3)
        if CHKTIME:
            print '14', time() - t0
Пример #9
0
def main():
    """
    main function
    """
    opts = get_options()
    nmodels_opt, nkeep_opt, ncpus = (int(opts.nmodels_opt),
                                     int(opts.nkeep_opt), int(opts.ncpus))
    nmodels_mod, nkeep_mod = int(opts.nmodels_mod), int(opts.nkeep_mod)
    if opts.xname:
        xnames = opts.xname
    elif opts.data[0]:
        xnames = [os.path.split(d)[-1] for d in opts.data]
    else:
        xnames = [os.path.split(d)[-1] for d in opts.norm]

    name = '{0}_{1}_{2}'.format(opts.crm, opts.beg, opts.end)
    opts.outdir

    ############################################################################
    ############################  LOAD HI-C DATA  ##############################
    ############################################################################

    if not opts.analyze_only:
        crm = load_hic_data(opts, xnames)

    ############################################################################
    ##########################  SEARCH TADs PARAMETERS #########################
    ############################################################################
    if opts.tad and not opts.analyze_only:
        search_tads(opts, crm, name)
        
    # Save the chromosome
    # Chromosomes can later on be loaded to avoid re-reading the original
    # matrices. See function "load_chromosome".
    if not opts.tad_only and not opts.analyze_only:
        # Sum all experiments into a new one
        if len(xnames) > 1:
            logging.info("\tSumming experiments %s..." % (' + '.join(xnames)))
            exp = crm.experiments[0] + crm.experiments[1]
            for i in range(2, len(xnames)):
                exp += crm.experiments[i]
            crm.add_experiment(exp)
        else:
            exp = crm.experiments[0]

    if  not opts.tad_only and not opts.analyze_only:
        exp.filter_columns(draw_hist="column filtering" in opts.analyze,
                           perc_zero=opts.filt, savefig=os.path.join(
                               opts.outdir, name ,
                               name + '_column_filtering.pdf'),
                           diagonal=not opts.nodiag)
    if (not opts.tad_only and "column filtering" in opts.analyze
        and not opts.analyze_only):
        out = open(os.path.join(opts.outdir, name ,
                                name + '_column_filtering.dat'), 'w')
        out.write('# particles not considered in the analysis\n' +
                  '\n'.join(map(str, sorted(exp._zeros.keys()))))

    if not opts.analyze_only:
        logging.info("\tSaving the chromosome...")
        crm.save_chromosome(os.path.join(opts.outdir, name,
                                         '{0}.tdb'.format(name)),
                            force=True)
    if opts.tad_only:
        exit()

    ############################################################################
    #######################  LOAD OPTIMAL IMP PARAMETERS #######################
    ############################################################################

    if not opts.analyze_only:
        results = load_optimal_imp_parameters(opts, name, exp)
        
    ############################################################################
    #########################  OPTIMIZE IMP PARAMETERS #########################
    ############################################################################

    if not opts.analyze_only:
        optpar = optimize(results, opts, name)

    ############################################################################
    ##############################  MODEL REGION ###############################
    ############################################################################

    # if models are already calculated and we just want to load them
    if opts.analyze_only:
        ########################################################################
        # function for loading models
        try:
            models = load_structuralmodels(
                os.path.join(opts.outdir, name, name + '.models'))
            dcutoff = int(models._config['dcutoff'] *
                          models._config['scale']   *
                          models.resolution)
        except IOError:
            pass
        ########################################################################
    else:
        # Build 3D models based on the HiC data.
        logging.info("\tModeling (this can take long)...")
        models = model_region(exp, optpar, opts, name)
        for line in repr(models).split('\n'):
            logging.info(line)


    ############################################################################
    ##############################  ANALYZE MODELS #############################
    ############################################################################
    
    if "correlation real/models" in opts.analyze:
        # Calculate the correlation coefficient between a set of kept models and
        # the original HiC matrix
        logging.info("\tCorrelation with data...")
        rho, pval = models.correlate_with_real_data(
            cutoff=dcutoff,
            savefig=os.path.join(opts.outdir, name,
                                 name + '_corre_real.pdf'),
            plot=True)
        logging.info("\t Correlation coefficient: %s [p-value: %s]" % (
            rho, pval))

    if "z-score plot" in opts.analyze:
        # zscore plots
        logging.info("\tZ-score plot...")
        models.zscore_plot(
            savefig=os.path.join(opts.outdir, name, name + '_zscores.pdf'))

    # Cluster models based on structural similarity
    logging.info("\tClustering all models into sets of structurally similar" +
                 " models...")
    ffact    = 0.95 # Fraction of particles that are within the dcutoff value
    clcutoff = dcutoff - 50 # RMSD cut-off to consider two models equivalent(nm)
    for ffact in [0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5]:
        logging.info('   fact = ' + str(ffact))
        for clcutoff in [dcutoff / 2 , dcutoff, dcutoff * 1.5]:
            try:
                logging.info('      cutoff = ' + str(clcutoff))
                models.cluster_models(fact=ffact, dcutoff=clcutoff,
                                      n_cpus=int(opts.ncpus))
                break
            except:
                continue
        else:
            continue
        break
    logging.info("\tSaving again the models this time with clusters...")
    models.save_models(os.path.join(opts.outdir, name, name + '.models'))
    # Plot the clustering
    try:
        models.cluster_analysis_dendrogram(
            color=True, savefig=os.path.join(
                opts.outdir, name, name + '_clusters.pdf'))
    except:
        logging.info("\t\tWARNING: plot for clusters could not be made...")

    if not (opts.not_write_xyz and opts.not_write_cmm):
        # Save the clustered models into directories for easy visualization with
        # Chimera (http://www.cgl.ucsf.edu/chimera/)
        # Move into the cluster directory and run in the prompt
        # "chimera cl_1_superimpose.cmd"
        logging.info("\t\tWriting models, list and chimera files...")
        for cluster in models.clusters:
            logging.info("\t\tCluster #{0} has {1} models {2}".format(
                cluster, len(models.clusters[cluster]),
                models.clusters[cluster]))
            if not os.path.exists(os.path.join(
                opts.outdir, name, 'models', 'cl_' + str(cluster))):
                os.makedirs(os.path.join(
                    opts.outdir, name, 'models', 'cl_' + str(cluster)))
            if not opts.not_write_xyz:
                models.write_xyz(directory=os.path.join(
                    opts.outdir, name, 'models', 'cl_' + str(cluster)),
                                 cluster=cluster)
            if not opts.not_write_cmm:
                models.write_cmm(directory=os.path.join(
                    opts.outdir, name, 'models', 'cl_' + str(cluster)),
                                 cluster=cluster)
            # Write list file
            clslstfile = os.path.join(
                opts.outdir, name,
                'models', 'cl_{}.lst'.format(str(cluster)))
            out = open(clslstfile,'w')
            for model_n in models.clusters[cluster]:
                out.write("model.{0}\n".format(model_n))
            out.close()
            if not opts.not_write_cmm:
                # Write chimera file
                clschmfile = os.path.join(
                    opts.outdir, name, 'models',
                    'cl_{}_superimpose.cmd'.format(str(cluster)))
                out = open(clschmfile, 'w')
                out.write("open " + " ".join(["cl_{0}/model.{1}.cmm".format(
                    cluster, model_n) for model_n in models.clusters[cluster]]))
                out.write("\nlabel; represent wire; ~bondcolor\n")
                for i in range(1, len(models.clusters[cluster]) + 1):
                    out.write("match #{0} #0\n".format(i-1))
                out.close()
        # same with singletons
        singletons = [m['rand_init'] for m in models if m['cluster']=='Singleton']
        logging.info("\t\tSingletons has {1} models {2}".format(
            'Singletons', len(singletons), singletons))
        if not os.path.exists(os.path.join(
            opts.outdir, name, 'models', 'Singletons')):
            os.makedirs(os.path.join(
                opts.outdir, name, 'models', 'Singletons'))
        if not opts.not_write_xyz:
            models.write_xyz(directory=os.path.join(
                opts.outdir, name, 'models', 'Singletons'),
                             models=singletons)
        if not opts.not_write_cmm:
            models.write_cmm(directory=os.path.join(
                opts.outdir, name, 'models', 'Singletons'),
                             models=singletons)
        # Write best model and centroid model
        models[models.centroid_model()].write_cmm(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='centroid.cmm')
        models[models.centroid_model()].write_cmm(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='centroid.xyz')
        models[0].write_cmm(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='best.cmm')
        models[0].write_xyz(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='best.xyz')
        # Write list file
        clslstfile = os.path.join(
            opts.outdir, name, 'models', 'Singletons.lst')
        out = open(clslstfile,'w')
        for model_n in singletons:
            out.write("model.{0}\n".format(model_n))
        out.close()
        if not opts.not_write_cmm:
            # Write chimera file
            clschmfile = os.path.join(
                opts.outdir, name, 'models', 'Singletons_superimpose.cmd')
            out = open(clschmfile, 'w')
            out.write("open " + " ".join(["Singletons/model.{0}.cmm".format(
                model_n) for model_n in singletons]))
            out.write("\nlabel; represent wire; ~bondcolor\n")
            for i in range(1, len(singletons) + 1):
                out.write("match #{0} #0\n".format(i-1))
            out.close()

    if "objective function" in opts.analyze:
        logging.info("\tPlotting objective function decay for vbest model...")
        models.objective_function_model(
            0, log=True, smooth=False,
            savefig=os.path.join(opts.outdir, name, name + '_obj-func.pdf'))
        
    if "centroid" in opts.analyze:
        # Get the centroid model of cluster #1
        logging.info("\tGetting centroid...")
        centroid = models.centroid_model(cluster=1)
        logging.info("\t\tThe model centroid (closest to the average) " +
                     "for cluster 1 is: {}".format(centroid))

    if "consistency" in opts.analyze:
        # Calculate a consistency plot for all models in cluster #1
        logging.info("\tGetting consistency data...")
        models.model_consistency(
            cluster=1, cutoffs=range(50, dcutoff + 50, 50),
            savefig =os.path.join(opts.outdir, name,
                                  name + '_consistency.pdf'),
            savedata=os.path.join(opts.outdir, name,
                                  name + '_consistency.dat'))

    if "density" in opts.analyze:
        # Calculate a DNA density plot
        logging.info("\tGetting density data...")
        models.density_plot(
            error=True, steps=(1,3,5,7),
            savefig =os.path.join(opts.outdir, name, name + '_density.pdf'),
            savedata=os.path.join(opts.outdir, name, name + '_density.dat'))

    if "contact map" in opts.analyze:
        # Get a contact map at cut-off of 150nm for cluster #1
        logging.info("\tGetting a contact map...")
        models.contact_map(
            cluster=1, cutoff=dcutoff,
            savedata=os.path.join(opts.outdir, name, name + '_contact.dat'))

    if "walking angle" in opts.analyze:
        # Get Dihedral angle plot for cluster #1
        logging.info("\tGetting angle data...")
        models.walking_angle(
            cluster=1, steps=(1,5),
            savefig = os.path.join(opts.outdir, name, name + '_wang.pdf'),
            savedata= os.path.join(opts.outdir, name, name + '_wang.dat'))

    if "persistence length" in opts.analyze:
        # Get persistence length of all models
        logging.info("\tGetting persistence length data...")
        pltfile = os.path.join(opts.outdir, name, name + '_pL.dat')
        f = open(pltfile,'w')
        f.write('#Model_Number\tpL\n')
        for model in models:
            try:
                f.write('%s\t%.2f\n' % (model["rand_init"],
                                        model.persistence_length()))
            except:
                sys.stderr.write('WARNING: failed to compute persistence ' +
                     'length for model %s' % model["rand_init"])

    if "accessibility" in opts.analyze:
        # Calculate a DNA density plot
        logging.info("\tGetting accessibility data...")
        radius = 75   # Radius of an object to calculate accessibility
        nump   = 30   # number of particles (resolution)
        logging.info("\tGetting accessibility data (this can take long)...")
        models.accessibility(radius, nump=nump,
            error=True, 
            savefig =os.path.join(opts.outdir, name, name + '_accessibility.pdf'),
            savedata=os.path.join(opts.outdir, name, name + '_accessibility.dat'))

    # if "accessibility" in opts.analyze:
    #     # Get accessibility of all models
    #     radius = 75   # Radius of an object to calculate accessibility
    #     nump   = 30   # number of particles (resolution)
    #     logging.info("\tGetting accessibility data (this can take long)...")
    #     if not os.path.exists(
    #         os.path.join(opts.outdir, name, 'models', 'asa')):
    #         os.makedirs(os.path.join(opts.outdir, name, 'models', 'asa'))
    #     for model in models:
    #         by_part = model.accessible_surface(radius, nump=nump,
    #                                            include_edges=False)[4]
    #         asafile = os.path.join(opts.outdir, name, 'models',
    #                                'asa', 'model_{}.asa'.format(model['rand_init']))
    #         out = open(asafile, 'w')
    #         for part, acc, ina in by_part:
    #             try:
    #                 out.write('%s\t%.2f\n' % (part,
    #                                           100*float(acc) / (acc + ina)))
    #             except ZeroDivisionError:
    #                 out.write('%s\t%s\n' % (part, 'nan'))
    #         out.close()

    if "interaction" in opts.analyze:
        # Get interaction data of all models at 200 nm cut-off
        logging.info("\tGetting interaction data...")
        models.interactions(
            cutoff=dcutoff, steps=(1,3,5),
            savefig =os.path.join(opts.outdir, name,
                                  name + '_interactions.pdf'),
            savedata=os.path.join(opts.outdir, name,
                                  name + '_interactions.dat'),
            error=True)
Пример #10
0
def model_region(exp, optpar, opts, name):
    """
    generate structural models
    """
    zscores, values, zeros = exp._sub_experiment_zscore(opts.beg, opts.end)

    tmp_name = ''.join([letters[int(random()*52)]for _ in xrange(50)])

    tmp = open('_tmp_zscore_' + tmp_name, 'w')
    dump([zscores, values, zeros, optpar], tmp)
    tmp.close()

    tmp = open('_tmp_opts_' + tmp_name, 'w')
    dump(opts, tmp)
    tmp.close()

    tmp = open('_tmp_model_' + tmp_name + '.py', 'w')
    tmp.write('''
from cPickle import load, dump
from pytadbit.imp.imp_modelling import generate_3d_models
import os

tmp_name = "%s"

zscore_file = open("_tmp_zscore_" + tmp_name)
zscores, values, zeros, optpar = load(zscore_file)
zscore_file.close()

opts_file = open("_tmp_opts_" + tmp_name)
opts = load(opts_file)
opts_file.close()

nloci = opts.end - opts.beg + 1
coords = {"crm"  : opts.crm,
          "start": opts.beg,
          "end"  : opts.end}

zeros = tuple([i not in zeros for i in xrange(opts.end - opts.beg + 1)])

models=  generate_3d_models(zscores, opts.res, nloci,
                            values=values, n_models=opts.nmodels_mod,
                            n_keep=opts.nkeep_mod,
                            n_cpus=opts.ncpus,
                            keep_all=True,
                            first=0, container=opts.container,
                            config=optpar, verbose=0.5,
                            coords=coords, zeros=zeros)
# Save models
models.save_models(
    os.path.join(opts.outdir, "%s", "%s" + ".models"))

''' % (tmp_name, name, name))

    tmp.close()
    constraints = Popen("python _tmp_model_%s.py" % tmp_name,
                        shell=True, stdout=PIPE).communicate()[0]
    if "constraints" in opts.analyze:
        out = open(os.path.join(opts.outdir, name, name + '_constraints.txt'),
                   'w')
        out.write(constraints)
        out.close()
    
    os.system('rm -f _tmp_zscore_%s' % (tmp_name))
    os.system('rm -f _tmp_model_%s.py' % (tmp_name))
    os.system('rm -f _tmp_opts_%s' % (tmp_name))
    models = load_structuralmodels(
        os.path.join(opts.outdir, name, name + '.models'))
    models.experiment = exp
    coords = {"crm"  : opts.crm,
              "start": opts.beg,
              "end"  : opts.end}
    crm = exp.crm
    description = {'identifier'     : exp.identifier,
                   'chromosome'     : coords['crm'],
                   'start'          : exp.resolution * coords['start'],
                   'end'            : exp.resolution * coords['end'],
                   'species'        : crm.species,
                   'cell type'      : exp.cell_type,
                   'experiment type': exp.exp_type,
                   'resolution'     : exp.resolution,
                   'assembly'       : crm.assembly}
    for desc in exp.description:
        description[desc] = exp.description[desc]
    for desc in crm.description:
        description[desc] = exp.description[desc]
    for i, m in enumerate([m for m in models] + models._bad_models.values()):
        m['index'] = i
        m['description'] = description
    models.description = description
    return models
Пример #11
0
    def test_15_3d_modelling(self):
        """
        """
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick')
        models.cluster_models(method='ward', verbose=False)
        # density
        models.density_plot(savedata='lala', plot=False)
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual([
            round(float(i), 1) if i != 'nan' else i
            for i in lines[1].split('\t')[:3]
        ], [1.0, 'nan', 'nan'])
        self.assertEqual(
            [round(float(i), 1) for i in lines[15].split('\t')[:3]],
            [15, 100.0, 100.0])
        # contacts
        cmap = models.get_contact_matrix(cutoff=300)
        self.assertEqual(
            round(
                round(
                    sum([
                        i if i >= 0 else 0
                        for i in reduce(lambda x, y: x + y, cmap)
                    ]) / 10, 0), 3), 8)
        # define best models
        models.define_best_models(10)
        self.assertEqual(len(models), 10)
        m1 = models[9]
        models.define_best_models(25)
        self.assertEqual(len(models), 25)
        self.assertEqual(m1, models[9])
        # correlation
        corr, pval = models.correlate_with_real_data(cutoff=300)
        self.assertTrue(0.6 <= round(corr, 1) <= 0.7)
        self.assertEqual(round(pval, 4), round(0, 4))
        # consistency
        models.model_consistency(cutoffs=(50, 100, 150, 200),
                                 plot=False,
                                 savedata='lala')
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual(
            [round(float(i) / 15, 0) for i in lines[1].split('\t')],
            [0, 1, 2, 3, 3])
        self.assertEqual(
            [round(float(i) / 15, 0) for i in lines[15].split('\t')],
            [1, 6, 7, 7, 7])
        # measure angle
        self.assertTrue(
            13 <= round(models.angle_between_3_particles(2, 8, 15) /
                        10, 0) <= 14)
        self.assertEqual(
            round(models.angle_between_3_particles(19, 20, 21), 0), 60)
        self.assertEqual(
            round(models.angle_between_3_particles(15, 14, 11) / 5, 0), 13)
        # coordinates
        self.assertEqual(
            [round(x, 2) for x in models.particle_coordinates(15)],
            [1529.39, 4703.51, -1793.39])
        # dihedral_angle
        self.assertTrue(round(models.dihedral_angle(2, 8, 15, 16), 2), -13.44)
        self.assertEqual(round(models.dihedral_angle(15, 19, 20, 21), 2),
                         83.07)
        self.assertEqual(round(models.dihedral_angle(15, 14, 11, 12), 2), 7.31)
        # median distance
        self.assertEqual(
            round(models.median_3d_dist(3, 20, plot=False) / 100, 0), 15)
        self.assertEqual(
            round(
                models.median_3d_dist(3, 20, cluster=1, plot=False) / 200, 0),
            8)
        self.assertEqual(
            round(models.median_3d_dist(7, 10, models=range(5), plot=False),
                  0), 250)
        # accessibility
        models.accessibility(radius=75,
                             nump=10,
                             plot=False,
                             savedata='model.acc')
        vals = [l.split() for l in open('model.acc').readlines()[1:]]
        self.assertEqual(vals[0][1:3], ['0.520', '0.999'])
        self.assertEqual(vals[20][1:3], ['1.000', '0.000'])
        # contact map
        models.contact_map(savedata='model.contacts')
        vals = [l.split() for l in open('model.contacts').readlines()[1:]]
        self.assertEqual(vals[0], ['0', '1', '1.0'])
        self.assertEqual(vals[1], ['0', '2', '0.92'])
        self.assertEqual(vals[192], ['14', '18', '0.12'])
        # interactions
        models.interactions(plot=False, savedata='model.inter')
        vals = [[float(i) for i in l.split()]
                for l in open('model.inter').readlines()[1:]]
        self.assertEqual(
            vals[2],
            [3.0, 4.88, 1.03, 3.94, 0.52, 4.72, 0.64, 4.02, 0.51, 4.82, 0.41])
        # walking angle
        models.walking_angle(savedata='model.walkang')
        vals = [[round(float(i), 2) if i != 'None' else i for i in l.split()]
                for l in open('model.walkang').readlines()[1:]]
        self.assertEqual(
            vals[0],
            [1.0, 137.99, 'None'],
        )
        self.assertEqual(
            vals[14],
            [15.0, -50.1, 'None'],
        )
        self.assertEqual(vals[13], [14.0, -95.73, 'None'])
        self.assertEqual(vals[12], [13.0, 155.7, 3.29])
        # write cmm
        models.write_cmm('.', model_num=2)
        models.write_cmm('.', models=range(5))
        models.write_cmm('.', cluster=2)
        # write xyz
        models.write_xyz('.', model_num=2)
        models.write_xyz('.', models=range(5))
        models.write_xyz('.', cluster=2)
        # write json
        models.write_json('model.json', model_num=2)
        models.write_json('model.json', models=range(5))
        models.write_json('model.json', cluster=2)
        # clean
        system('rm -f model.*')
        system('rm -rf lala*')
        if CHKTIME:
            print '15', time() - t0
Пример #12
0
def main():
    """
    main function
    """
    opts = get_options()
    nmodels_opt, nkeep_opt, ncpus = (int(opts.nmodels_opt),
                                     int(opts.nkeep_opt), int(opts.ncpus))
    nmodels_mod, nkeep_mod = int(opts.nmodels_mod), int(opts.nkeep_mod)
    if opts.xname:
        xnames = opts.xname
    elif opts.data[0]:
        xnames = [os.path.split(d)[-1] for d in opts.data]
    else:
        xnames = [os.path.split(d)[-1] for d in opts.norm]

    name = '{0}_{1}_{2}'.format(opts.crm, opts.beg, opts.end)
    opts.outdir

    ############################################################################
    ############################  LOAD HI-C DATA  ##############################
    ############################################################################

    if not opts.analyze_only:
        crm = load_hic_data(opts, xnames)

    ############################################################################
    ##########################  SEARCH TADs PARAMETERS #########################
    ############################################################################
    if opts.tad and not opts.analyze_only:
        search_tads(opts, crm, name)
        
    # Save the chromosome
    # Chromosomes can later on be loaded to avoid re-reading the original
    # matrices. See function "load_chromosome".
    if not opts.tad_only and not opts.analyze_only:
        # Sum all experiments into a new one
        if len(xnames) > 1:
            logging.info("\tSumming experiments %s..." % (' + '.join(xnames)))
            exp = crm.experiments[0] + crm.experiments[1]
            for i in range(2, len(xnames)):
                exp += crm.experiments[i]
            crm.add_experiment(exp)
        else:
            exp = crm.experiments[0]

    if  not opts.tad_only and not opts.analyze_only:
        exp.filter_columns(draw_hist="column filtering" in opts.analyze,
                           perc_zero=opts.filt, savefig=os.path.join(
                               opts.outdir, name ,
                               name + '_column_filtering.pdf'),
                           diagonal=not opts.nodiag)
    if (not opts.tad_only and "column filtering" in opts.analyze
        and not opts.analyze_only):
        out = open(os.path.join(opts.outdir, name ,
                                name + '_column_filtering.dat'), 'w')
        out.write('# particles not considered in the analysis\n' +
                  '\n'.join(map(str, sorted(exp._zeros.keys()))))

    if not opts.analyze_only:
        logging.info("\tSaving the chromosome...")
        crm.save_chromosome(os.path.join(opts.outdir, name,
                                         '{0}.tdb'.format(name)),
                            force=True)
    if opts.tad_only:
        exit()

    ############################################################################
    #######################  LOAD OPTIMAL IMP PARAMETERS #######################
    ############################################################################

    if not opts.analyze_only:
        results = load_optimal_imp_parameters(opts, name, exp)
        
    ############################################################################
    #########################  OPTIMIZE IMP PARAMETERS #########################
    ############################################################################

    if not opts.analyze_only:
        optpar = optimize(results, opts, name)

    ############################################################################
    ##############################  MODEL REGION ###############################
    ############################################################################

    # if models are already calculated and we just want to load them
    if opts.analyze_only:
        ########################################################################
        # function for loading models
        try:
            models = load_structuralmodels(
                os.path.join(opts.outdir, name, name + '.models'))
        except IOError:
            pass
        ########################################################################
    else:
        # Build 3D models based on the HiC data.
        logging.info("\tModeling (this can take long)...")
        models = model_region(exp, optpar, opts, name)
        for line in repr(models).split('\n'):
            logging.info(line)


    dcutoff = int(models._config['dcutoff'] *
                  models._config['scale']   *
                  models.resolution)
    ############################################################################
    ##############################  ANALYZE MODELS #############################
    ############################################################################
    
    if "correlation real/models" in opts.analyze:
        # Calculate the correlation coefficient between a set of kept models and
        # the original HiC matrix
        logging.info("\tCorrelation with data...")
        rho, pval = models.correlate_with_real_data(
            cutoff=dcutoff,
            savefig=os.path.join(opts.outdir, name,
                                 name + '_corre_real.pdf'),
            plot=True)
        logging.info("\t Correlation coefficient: %s [p-value: %s]" % (
            rho, pval))

    if "z-score plot" in opts.analyze:
        # zscore plots
        logging.info("\tZ-score plot...")
        models.zscore_plot(
            savefig=os.path.join(opts.outdir, name, name + '_zscores.pdf'))

    # Cluster models based on structural similarity
    logging.info("\tClustering all models into sets of structurally similar" +
                 " models...")
    ffact    = 0.95 # Fraction of particles that are within the dcutoff value
    clcutoff = dcutoff - 50 # RMSD cut-off to consider two models equivalent(nm)
    for ffact in [0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5]:
        logging.info('   fact = ' + str(ffact))
        for clcutoff in [dcutoff / 2 , dcutoff, dcutoff * 1.5]:
            try:
                logging.info('      cutoff = ' + str(clcutoff))
                models.cluster_models(fact=ffact, dcutoff=clcutoff,
                                      n_cpus=int(opts.ncpus))
                break
            except:
                continue
        else:
            continue
        break
    logging.info("\tSaving again the models this time with clusters...")
    models.save_models(os.path.join(opts.outdir, name, name + '.models'))
    # Plot the clustering
    try:
        models.cluster_analysis_dendrogram(
            color=True, savefig=os.path.join(
                opts.outdir, name, name + '_clusters.pdf'))
    except:
        logging.info("\t\tWARNING: plot for clusters could not be made...")

    if not (opts.not_write_xyz and opts.not_write_cmm):
        # Save the clustered models into directories for easy visualization with
        # Chimera (http://www.cgl.ucsf.edu/chimera/)
        # Move into the cluster directory and run in the prompt
        # "chimera cl_1_superimpose.cmd"
        logging.info("\t\tWriting models, list and chimera files...")
        for cluster in models.clusters:
            logging.info("\t\tCluster #{0} has {1} models {2}".format(
                cluster, len(models.clusters[cluster]),
                models.clusters[cluster]))
            if not os.path.exists(os.path.join(
                opts.outdir, name, 'models', 'cl_' + str(cluster))):
                os.makedirs(os.path.join(
                    opts.outdir, name, 'models', 'cl_' + str(cluster)))
            if not opts.not_write_xyz:
                models.write_xyz(directory=os.path.join(
                    opts.outdir, name, 'models', 'cl_' + str(cluster)),
                                 cluster=cluster)
            if not opts.not_write_cmm:
                models.write_cmm(directory=os.path.join(
                    opts.outdir, name, 'models', 'cl_' + str(cluster)),
                                 cluster=cluster)
            # Write list file
            clslstfile = os.path.join(
                opts.outdir, name,
                'models', 'cl_{}.lst'.format(str(cluster)))
            out = open(clslstfile,'w')
            for model_n in models.clusters[cluster]:
                out.write("model.{0}\n".format(model_n))
            out.close()
            if not opts.not_write_cmm:
                # Write chimera file
                clschmfile = os.path.join(
                    opts.outdir, name, 'models',
                    'cl_{}_superimpose.cmd'.format(str(cluster)))
                out = open(clschmfile, 'w')
                out.write("open " + " ".join(["cl_{0}/model.{1}.cmm".format(
                    cluster, model_n) for model_n in models.clusters[cluster]]))
                out.write("\nlabel; represent wire; ~bondcolor\n")
                for i in range(1, len(models.clusters[cluster]) + 1):
                    out.write("match #{0} #0\n".format(i-1))
                out.close()
        # same with singletons
        singletons = [m['rand_init'] for m in models if m['cluster']=='Singleton']
        logging.info("\t\tSingletons has {1} models {2}".format(
            'Singletons', len(singletons), singletons))
        if not os.path.exists(os.path.join(
            opts.outdir, name, 'models', 'Singletons')):
            os.makedirs(os.path.join(
                opts.outdir, name, 'models', 'Singletons'))
        if not opts.not_write_xyz:
            models.write_xyz(directory=os.path.join(
                opts.outdir, name, 'models', 'Singletons'),
                             models=singletons)
        if not opts.not_write_cmm:
            models.write_cmm(directory=os.path.join(
                opts.outdir, name, 'models', 'Singletons'),
                             models=singletons)
        # Write best model and centroid model
        models[models.centroid_model()].write_cmm(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='centroid.cmm')
        models[models.centroid_model()].write_cmm(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='centroid.xyz')
        models[0].write_cmm(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='best.cmm')
        models[0].write_xyz(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='best.xyz')
        # Write list file
        clslstfile = os.path.join(
            opts.outdir, name, 'models', 'Singletons.lst')
        out = open(clslstfile,'w')
        for model_n in singletons:
            out.write("model.{0}\n".format(model_n))
        out.close()
        if not opts.not_write_cmm:
            # Write chimera file
            clschmfile = os.path.join(
                opts.outdir, name, 'models', 'Singletons_superimpose.cmd')
            out = open(clschmfile, 'w')
            out.write("open " + " ".join(["Singletons/model.{0}.cmm".format(
                model_n) for model_n in singletons]))
            out.write("\nlabel; represent wire; ~bondcolor\n")
            for i in range(1, len(singletons) + 1):
                out.write("match #{0} #0\n".format(i-1))
            out.close()

    if "objective function" in opts.analyze:
        logging.info("\tPlotting objective function decay for vbest model...")
        models.objective_function_model(
            0, log=True, smooth=False,
            savefig=os.path.join(opts.outdir, name, name + '_obj-func.pdf'))
        
    if "centroid" in opts.analyze:
        # Get the centroid model of cluster #1
        logging.info("\tGetting centroid...")
        centroid = models.centroid_model(cluster=1)
        logging.info("\t\tThe model centroid (closest to the average) " +
                     "for cluster 1 is: {}".format(centroid))

    if "consistency" in opts.analyze:
        # Calculate a consistency plot for all models in cluster #1
        logging.info("\tGetting consistency data...")
        models.model_consistency(
            cluster=1, cutoffs=range(50, dcutoff + 50, 50),
            savefig =os.path.join(opts.outdir, name,
                                  name + '_consistency.pdf'),
            savedata=os.path.join(opts.outdir, name,
                                  name + '_consistency.dat'))

    if "density" in opts.analyze:
        # Calculate a DNA density plot
        logging.info("\tGetting density data...")
        models.density_plot(
            error=True, steps=(1,3,5,7),
            savefig =os.path.join(opts.outdir, name, name + '_density.pdf'),
            savedata=os.path.join(opts.outdir, name, name + '_density.dat'))

    if "contact map" in opts.analyze:
        # Get a contact map at cut-off of 150nm for cluster #1
        logging.info("\tGetting a contact map...")
        models.contact_map(
            cluster=1, cutoff=dcutoff,
            savedata=os.path.join(opts.outdir, name, name + '_contact.dat'))

    if "walking angle" in opts.analyze:
        # Get Dihedral angle plot for cluster #1
        logging.info("\tGetting angle data...")
        models.walking_angle(
            cluster=1, steps=(1,5),
            savefig = os.path.join(opts.outdir, name, name + '_wang.pdf'),
            savedata= os.path.join(opts.outdir, name, name + '_wang.dat'))

    if "persistence length" in opts.analyze:
        # Get persistence length of all models
        logging.info("\tGetting persistence length data...")
        pltfile = os.path.join(opts.outdir, name, name + '_pL.dat')
        f = open(pltfile,'w')
        f.write('#Model_Number\tpL\n')
        for model in models:
            try:
                f.write('%s\t%.2f\n' % (model["rand_init"],
                                        model.persistence_length()))
            except:
                sys.stderr.write('WARNING: failed to compute persistence ' +
                     'length for model %s' % model["rand_init"])

    if "accessibility" in opts.analyze:
        # Calculate a DNA density plot
        logging.info("\tGetting accessibility data...")
        radius = 75   # Radius of an object to calculate accessibility
        nump   = 30   # number of particles (resolution)
        logging.info("\tGetting accessibility data (this can take long)...")
        models.accessibility(radius, nump=nump,
            error=True, 
            savefig =os.path.join(opts.outdir, name, name + '_accessibility.pdf'),
            savedata=os.path.join(opts.outdir, name, name + '_accessibility.dat'))

    # if "accessibility" in opts.analyze:
    #     # Get accessibility of all models
    #     radius = 75   # Radius of an object to calculate accessibility
    #     nump   = 30   # number of particles (resolution)
    #     logging.info("\tGetting accessibility data (this can take long)...")
    #     if not os.path.exists(
    #         os.path.join(opts.outdir, name, 'models', 'asa')):
    #         os.makedirs(os.path.join(opts.outdir, name, 'models', 'asa'))
    #     for model in models:
    #         by_part = model.accessible_surface(radius, nump=nump,
    #                                            include_edges=False)[4]
    #         asafile = os.path.join(opts.outdir, name, 'models',
    #                                'asa', 'model_{}.asa'.format(model['rand_init']))
    #         out = open(asafile, 'w')
    #         for part, acc, ina in by_part:
    #             try:
    #                 out.write('%s\t%.2f\n' % (part,
    #                                           100*float(acc) / (acc + ina)))
    #             except ZeroDivisionError:
    #                 out.write('%s\t%s\n' % (part, 'nan'))
    #         out.close()

    if "interaction" in opts.analyze:
        # Get interaction data of all models at 200 nm cut-off
        logging.info("\tGetting interaction data...")
        models.interactions(
            cutoff=dcutoff, steps=(1,3,5),
            savefig =os.path.join(opts.outdir, name,
                                  name + '_interactions.pdf'),
            savedata=os.path.join(opts.outdir, name,
                                  name + '_interactions.dat'),
            error=True)
Пример #13
0
def model_region(exp, optpar, opts, name):
    """
    generate structural models
    """
    beg, end = opts.beg or 1, opts.end or exp.size
    zscores, values, zeros = exp._sub_experiment_zscore(beg, end)

    tmp_name = "".join([letters[int(random() * 52)] for _ in xrange(50)])

    tmp = open("_tmp_zscore_" + tmp_name, "w")
    dump([zscores, values, zeros, optpar, beg, end], tmp)
    tmp.close()

    tmp = open("_tmp_opts_" + tmp_name, "w")
    dump(opts, tmp)
    tmp.close()

    tmp = open("_tmp_model_" + tmp_name + ".py", "w")
    tmp.write(
        """
from cPickle import load, dump
from pytadbit.imp.imp_modelling import generate_3d_models
import os

tmp_name = "%s"

zscore_file = open("_tmp_zscore_" + tmp_name)
zscores, values, zeros, optpar, beg, end = load(zscore_file)
zscore_file.close()

opts_file = open("_tmp_opts_" + tmp_name)
opts = load(opts_file)
opts_file.close()

nloci = end - beg + 1
coords = {"crm"  : opts.crm,
          "start": opts.beg,
          "end"  : opts.end}

zeros = tuple([i not in zeros for i in xrange(end - beg + 1)])

models =  generate_3d_models(zscores, opts.res, nloci,
                             values=values, n_models=opts.nmodels_mod,
                             n_keep=opts.nkeep_mod,
                             n_cpus=opts.ncpus,
                             keep_all=True,
                             first=0, container=opts.container,
                             config=optpar, coords=coords, zeros=zeros)
# Save models
models.save_models(
    os.path.join(opts.outdir, "%s", "%s" + ".models"))

"""
        % (tmp_name, name, name)
    )

    tmp.close()
    check_call(["python", "_tmp_model_%s.py" % tmp_name])
    os.system("rm -f _tmp_zscore_%s" % (tmp_name))
    os.system("rm -f _tmp_model_%s.py" % (tmp_name))
    os.system("rm -f _tmp_opts_%s" % (tmp_name))
    models = load_structuralmodels(os.path.join(opts.outdir, name, name + ".models"))
    if "constraints" in opts.analyze:
        out = open(os.path.join(opts.outdir, name, name + "_constraints.txt"), "w")
        out.write("# Harmonic\tpart1\tpart2\tdist\tkforce\n")
        out.write(
            "\n".join(
                [
                    "%s\t%s\t%s\t%.1f\t%.3f" % (harm, p1, p2, dist, kforce)
                    for (p1, p2), (harm, dist, kforce) in models._restraints.iteritems()
                ]
            )
            + "\n"
        )
        out.close()
    models.experiment = exp
    coords = {"crm": opts.crm, "start": opts.beg, "end": opts.end}
    crm = exp.crm
    description = {
        "identifier": exp.identifier,
        "chromosome": coords["crm"],
        "start": (exp.resolution * coords["start"]) if coords["start"] else None,
        "end": (exp.resolution * coords["end"]) if coords["end"] else None,
        "species": crm.species,
        "cell type": exp.cell_type,
        "experiment type": exp.exp_type,
        "resolution": exp.resolution,
        "assembly": crm.assembly,
    }
    for key in opts.description:
        description[key] = opts.description[key]
    for desc in exp.description:
        description[desc] = exp.description[desc]
    for desc in crm.description:
        description[desc] = exp.description[desc]
    for i, m in enumerate([m for m in models] + models._bad_models.values()):
        m["index"] = i
        m["description"] = description
    models.description = description
    return models
Пример #14
0
    def test_15_3d_modelling(self):
        """
        """
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick') 
        models.cluster_models(method='ward', verbose=False)
        # density
        models.density_plot(savedata='lala', plot=False)
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual([round(float(i), 1) for i in lines[1].split('\t')[:3]],
                         [1, 100.0, 100.0])
        self.assertEqual([round(float(i), 1) for i in lines[15].split('\t')[:3]],
                         [15, 99.9, 100.0])
        # contacts
        cmap = models.get_contact_matrix(cutoff=300)
        self.assertEqual(round(
            round(sum([i if i >=0 else 0 for i in
                       reduce(lambda x, y: x+y, cmap)])/10, 0),
            3), 8)
        # define best models
        models.define_best_models(10)
        self.assertEqual(len(models), 10)
        m1 = models[9]
        models.define_best_models(25)
        self.assertEqual(len(models), 25)
        self.assertEqual(m1, models[9])
        # correlation
        corr, pval = models.correlate_with_real_data(cutoff=300)
        self.assertEqual(round(corr, 1), 0.7)
        self.assertEqual(round(pval, 4), round(0, 4))
        # consistency
        models.model_consistency(cutoffs=(50, 100, 150, 200), plot=False,
                                 savedata='lala')
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual([round(float(i)/10, 0) for i in lines[1].split('\t')],
                         [0, 1, 3, 4, 5])
        self.assertEqual([round(float(i)/10, 0) for i in lines[15].split('\t')],
                         [2, 8, 10, 10, 10])
        # measure angle
        self.assertEqual(round(models.angle_between_3_particles(2,8,15)/10, 0),
                         13)
        self.assertEqual(round(models.angle_between_3_particles(19,20,21), 0),
                         60)
        self.assertEqual(round(models.angle_between_3_particles(15,14,11)/5, 0),
                         13)
        # coordinates
        # self.assertEqual([round(x, 3) for x in models.particle_coordinates(15)],
        #                  [2372.253, -1193.602, -1145.397])
        # dihedral_angle
        # self.assertTrue(round(models.dihedral_angle(2,8,15, 16), 3), -13.443)
        # self.assertEqual(round(models.dihedral_angle(15,19,20,21), 3), 79.439)
        # self.assertEqual(round(models.dihedral_angle(15,14,11, 12), 3), 8.136)
        # median distance
        self.assertEqual(round(models.median_3d_dist(3, 20, plot=False)/100, 0),
                         15)
        self.assertEqual(round(models.median_3d_dist(3, 20, cluster=1,
                                                     plot=False)/200, 0), 8)
        self.assertEqual(round(models.median_3d_dist(7, 10, models=range(5),
                                                     plot=False), 0), 250)
        # write cmm
        models.write_cmm('.', model_num=2)
        models.write_cmm('.', models=range(5))
        models.write_cmm('.', cluster=2)
        # write xyz
        models.write_xyz('.', model_num=2)
        models.write_xyz('.', models=range(5))
        models.write_xyz('.', cluster=2)
        # clean
        system('rm -f model.*')
        system('rm -f lala')
        if CHKTIME:
            print '15', time() - t0
Пример #15
0
    def test_15_3d_modelling(self):
        """
        """
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick') 
        models.cluster_models(method='ward', verbose=False)
        # density
        models.density_plot(savedata='lala', plot=False)
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual([round(float(i), 1) if i != 'nan' else i for i in lines[1].split('\t')[:3]],
                         [1.0, 'nan', 'nan'])
        self.assertEqual([round(float(i), 1) for i in lines[15].split('\t')[:3]],
                         [15, 100.0, 100.0])
        # contacts
        cmap = models.get_contact_matrix(cutoff=300)
        self.assertEqual(round(
            round(sum([i if i >=0 else 0 for i in
                       reduce(lambda x, y: x+y, cmap)])/10, 0),
            3), 8)
        # define best models
        models.define_best_models(10)
        self.assertEqual(len(models), 10)
        m1 = models[9]
        models.define_best_models(25)
        self.assertEqual(len(models), 25)
        self.assertEqual(m1, models[9])
        # correlation
        corr, pval = models.correlate_with_real_data(cutoff=300)
        self.assertTrue(0.6 <= round(corr, 1) <= 0.7)
        self.assertEqual(round(pval, 4), round(0, 4))
        # consistency
        models.model_consistency(cutoffs=(50, 100, 150, 200), plot=False,
                                 savedata='lala')
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual([round(float(i)/15, 0) for i in lines[1].split('\t')],
                         [0, 2, 3, 4, 4])
        self.assertEqual([round(float(i)/15, 0) for i in lines[15].split('\t')],
                         [1, 6, 7, 7, 7])
        # measure angle
        self.assertTrue(13 <= round(models.angle_between_3_particles(2,8,15)/10,
                                    0) <= 14)
        self.assertEqual(round(models.angle_between_3_particles(19,20,21), 0),
                         60)
        self.assertEqual(round(models.angle_between_3_particles(15,14,11)/5, 0),
                         13)
        # coordinates
        # self.assertEqual([round(x, 3) for x in models.particle_coordinates(15)],
        #                  [2372.253, -1193.602, -1145.397])
        # dihedral_angle
        # self.assertTrue(round(models.dihedral_angle(2,8,15, 16), 3), -13.443)
        # self.assertEqual(round(models.dihedral_angle(15,19,20,21), 3), 79.439)
        # self.assertEqual(round(models.dihedral_angle(15,14,11, 12), 3), 8.136)
        # median distance
        self.assertEqual(round(models.median_3d_dist(3, 20, plot=False)/100, 0),
                         15)
        self.assertEqual(round(models.median_3d_dist(3, 20, cluster=1,
                                                     plot=False)/200, 0), 8)
        self.assertEqual(round(models.median_3d_dist(7, 10, models=range(5),
                                                     plot=False), 0), 250)
        # write cmm
        models.write_cmm('.', model_num=2)
        models.write_cmm('.', models=range(5))
        models.write_cmm('.', cluster=2)
        # write xyz
        models.write_xyz('.', model_num=2)
        models.write_xyz('.', models=range(5))
        models.write_xyz('.', cluster=2)
        # clean
        system('rm -f model.*')
        system('rm -f lala')
        if CHKTIME:
            print '15', time() - t0
Пример #16
0
    def test_15_3d_modelling(self):
        """
        """
        if ONLY and "15" not in ONLY:
            return
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels("models.pick")
        models.cluster_models(method="ward", verbose=False)
        # density
        models.density_plot(savedata="lala", plot=False)
        lines = open("lala").readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual(
            [round(float(i), 1) if i != "nan" else i for i in lines[1].split("\t")[:3]], [1.0, "nan", "nan"]
        )
        self.assertEqual([round(float(i), 1) for i in lines[15].split("\t")[:3]], [15, 99.9, 0.0])
        # contacts
        cmap = models.get_contact_matrix(cutoff=300)
        self.assertEqual(
            round(round(sum([i if i >= 0 else 0 for i in reduce(lambda x, y: x + y, cmap)]) / 10, 0), 3), 8
        )
        # define best models
        models.define_best_models(10)
        self.assertEqual(len(models), 10)
        m1 = models[9]
        models.define_best_models(25)
        self.assertEqual(len(models), 25)
        self.assertEqual(m1, models[9])
        # correlation
        corr, pval = models.correlate_with_real_data(cutoff=300)
        self.assertTrue(0.6 <= round(corr, 1) <= 0.7)
        self.assertEqual(round(pval, 4), round(0, 4))
        # consistency
        models.model_consistency(cutoffs=(50, 100, 150, 200), plot=False, savedata="lala")
        lines = open("lala").readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual([round(float(i) / 15, 0) for i in lines[1].split("\t")], [0, 2, 2, 3, 4])
        self.assertEqual([round(float(i) / 15, 0) for i in lines[15].split("\t")], [1, 5, 6, 7, 7])
        # measure angle
        self.assertTrue(13 <= round(models.angle_between_3_particles(2, 8, 15) / 10, 0) <= 14)
        self.assertEqual(round(models.angle_between_3_particles(19, 20, 21), 0), 60)
        self.assertEqual(round(models.angle_between_3_particles(15, 14, 11) / 5, 0), 14)
        # coordinates
        self.assertEqual([round(x, 2) for x in models.particle_coordinates(15)], [3199.84, 4361.61, -4695.41])
        # dihedral_angle
        self.assertTrue(round(models.dihedral_angle(2, 8, 15, 8, 16, [0])[0], 2), -13.44)
        self.assertEqual(round(models.dihedral_angle(15, 19, 20, 19, 21, [0])[0], 2), 75.95)
        self.assertEqual(round(models.dihedral_angle(15, 14, 11, 14, 12, [0])[0], 2), 2.07)
        # median distance
        self.assertEqual(round(models.median_3d_dist(3, 20, plot=False) / 100, 0), 15)
        self.assertEqual(round(models.median_3d_dist(3, 20, cluster=1, plot=False) / 200, 0), 8)
        self.assertEqual(round(models.median_3d_dist(7, 10, models=range(5), plot=False), 0), 250)
        # accessibility
        models.accessibility(radius=75, nump=10, plot=False, savedata="model.acc")
        vals = [l.split() for l in open("model.acc").readlines()[1:]]
        self.assertEqual(vals[0][1:3], ["0.56", "0.993"])
        self.assertEqual(vals[20][1:3], ["1.0", "0.0"])
        # contact map
        models.contact_map(savedata="model.contacts")
        vals = [l.split() for l in open("model.contacts").readlines()[1:]]
        self.assertEqual(vals[0], ["0", "1", "1.0"])
        self.assertEqual(vals[1], ["0", "2", "0.72"])
        self.assertEqual(vals[192], ["14", "18", "0.12"])
        # interactions
        models.interactions(plot=False, savedata="model.inter")
        vals = [[float(i) for i in l.split()] for l in open("model.inter").readlines()[1:]]
        self.assertEqual(vals[2], [3.0, 4.68, 1.23, 3.78, 0.7, 4.65, 0.87, 3.92, 0.72, 4.74, 0.57])
        # walking angle
        models.walking_angle(savedata="model.walkang")
        vals = [
            [round(float(i), 2) if i != "None" else i for i in l.split()] for l in open("model.walkang").readlines()[1:]
        ]
        self.assertEqual(vals[17], [18.0, -45.42, 100.0, -9.78, 135.0])
        self.assertEqual(vals[3], [4.0, 124.97, 274.0, 2.05, 254.0])
        self.assertEqual(vals[16], [17.0, -62.84, 201.0, -3.20, 77.0])
        self.assertEqual(vals[15], [16.0, -132.38, 286.0, -12.70, 124.0])
        # write cmm
        models.write_cmm(".", model_num=2)
        models.write_cmm(".", models=range(5))
        models.write_cmm(".", cluster=2)
        # write xyz
        models.write_xyz(".", model_num=2)
        models.write_xyz(".", models=range(5))
        models.write_xyz(".", cluster=2)
        # write json
        models.write_json("model.json", model_num=2)
        models.write_json("model.json", models=range(5))
        models.write_json("model.json", cluster=2)
        # clean
        system("rm -f model.*")
        system("rm -rf lala*")
        if CHKTIME:
            print "15", time() - t0
Пример #17
0
    def test_15_3d_modelling(self):
        """
        """
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick') 
        models.cluster_models(method='ward', verbose=False)
        # density
        models.density_plot(savedata='lala', plot=False)
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual([round(float(i), 1) if i != 'nan' else i for i in lines[1].split('\t')[:3]],
                         [1.0, 'nan', 'nan'])
        self.assertEqual([round(float(i), 1) for i in lines[15].split('\t')[:3]],
                         [15, 100.0, 100.0])
        # contacts
        cmap = models.get_contact_matrix(cutoff=300)
        self.assertEqual(round(
            round(sum([i if i >=0 else 0 for i in
                       reduce(lambda x, y: x+y, cmap)])/10, 0),
            3), 8)
        # define best models
        models.define_best_models(10)
        self.assertEqual(len(models), 10)
        m1 = models[9]
        models.define_best_models(25)
        self.assertEqual(len(models), 25)
        self.assertEqual(m1, models[9])
        # correlation
        corr, pval = models.correlate_with_real_data(cutoff=300)
        self.assertTrue(0.6 <= round(corr, 1) <= 0.7)
        self.assertEqual(round(pval, 4), round(0, 4))
        # consistency
        models.model_consistency(cutoffs=(50, 100, 150, 200), plot=False,
                                 savedata='lala')
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual([round(float(i)/15, 0) for i in lines[1].split('\t')],
                         [0, 2, 2, 3, 4])
        self.assertEqual([round(float(i)/15, 0) for i in lines[15].split('\t')],
                         [1, 5, 6, 7, 7])
        # measure angle
        self.assertTrue(13 <= round(models.angle_between_3_particles(2,8,15)/10,
                                    0) <= 14)
        self.assertEqual(round(models.angle_between_3_particles(19,20,21), 0),
                         60)
        self.assertEqual(round(models.angle_between_3_particles(15,14,11)/5, 0),
                         14)
        # coordinates
        self.assertEqual([round(x, 2) for x in models.particle_coordinates(15)],
                         [3199.84, 4361.61, -4695.41])
        # dihedral_angle
        self.assertTrue (round(models.dihedral_angle(2,8,15, 16)  , 2), -13.44)
        self.assertEqual(round(models.dihedral_angle(15,19,20,21) , 2), 64.26 )
        self.assertEqual(round(models.dihedral_angle(15,14,11, 12), 2), 8.95  )
        # median distance
        self.assertEqual(round(models.median_3d_dist(3, 20, plot=False)/100, 0),
                         15)
        self.assertEqual(round(models.median_3d_dist(3, 20, cluster=1,
                                                     plot=False)/200, 0), 8)
        self.assertEqual(round(models.median_3d_dist(7, 10, models=range(5),
                                                     plot=False), 0), 250)
        # accessibility
        models.accessibility(radius=75, nump=10, plot=False, savedata='model.acc')
        vals = [l.split() for l in open('model.acc').readlines()[1:]]
        self.assertEqual(vals[0][1:3], ['0.560', '0.993'])
        self.assertEqual(vals[20][1:3], ['1.000', '0.000'])
        # contact map
        models.contact_map(savedata='model.contacts')
        vals = [l.split() for l in open('model.contacts').readlines()[1:]]
        self.assertEqual(vals[0], ['0', '1', '1.0'])
        self.assertEqual(vals[1], ['0', '2', '0.72'])
        self.assertEqual(vals[192], ['14', '18', '0.12'])
        # interactions
        models.interactions(plot=False, savedata='model.inter')
        vals = [[float(i) for i in l.split()] for l in open('model.inter').readlines()[1:]]
        self.assertEqual(vals[2], [3.0, 4.68, 1.23, 3.78, 0.7, 4.65, 0.87, 3.92, 0.72, 4.74, 0.57])
        # walking angle
        models.walking_angle(savedata='model.walkang')
        vals = [[round(float(i), 2) if i != 'None' else i for i in l.split()] for l in open('model.walkang').readlines()[1:]]
        self.assertEqual(vals[0],  [1.0, 137.06, 'None'],)
        self.assertEqual(vals[14], [15.0, -49.65, 'None'],)
        self.assertEqual(vals[13], [14.0, -101.41, 'None'])
        self.assertEqual(vals[12], [13.0, 150.45, -0.2])
        # write cmm
        models.write_cmm('.', model_num=2)
        models.write_cmm('.', models=range(5))
        models.write_cmm('.', cluster=2)
        # write xyz
        models.write_xyz('.', model_num=2)
        models.write_xyz('.', models=range(5))
        models.write_xyz('.', cluster=2)
        # write json
        models.write_json('model.json', model_num=2)
        models.write_json('model.json', models=range(5))
        models.write_json('model.json', cluster=2)
        # clean
        system('rm -f model.*')
        system('rm -rf lala*')
        if CHKTIME:
            print '15', time() - t0
Пример #18
0
def model_region(exp, optpar, opts, name):
    """
    generate structural models
    """
    beg, end = opts.beg or 1, opts.end or exp.size
    zscores, values, zeros = exp._sub_experiment_zscore(beg, end)

    tmp_name = ''.join([letters[int(random()*52)]for _ in xrange(50)])

    tmp = open('_tmp_zscore_' + tmp_name, 'w')
    dump([zscores, values, zeros, optpar, beg, end], tmp)
    tmp.close()

    tmp = open('_tmp_opts_' + tmp_name, 'w')
    dump(opts, tmp)
    tmp.close()

    tmp = open('_tmp_model_' + tmp_name + '.py', 'w')
    tmp.write('''
from cPickle import load, dump
from pytadbit.imp.imp_modelling import generate_3d_models
import os

tmp_name = "%s"

zscore_file = open("_tmp_zscore_" + tmp_name)
zscores, values, zeros, optpar, beg, end = load(zscore_file)
zscore_file.close()

opts_file = open("_tmp_opts_" + tmp_name)
opts = load(opts_file)
opts_file.close()

nloci = end - beg + 1
coords = {"crm"  : opts.crm,
          "start": opts.beg,
          "end"  : opts.end}

zeros = tuple([i not in zeros for i in xrange(end - beg + 1)])

models=  generate_3d_models(zscores, opts.res, nloci,
                            values=values, n_models=opts.nmodels_mod,
                            n_keep=opts.nkeep_mod,
                            n_cpus=opts.ncpus,
                            keep_all=True,
                            first=0, container=opts.container,
                            config=optpar, verbose=0.5,
                            coords=coords, zeros=zeros)
# Save models
models.save_models(
    os.path.join(opts.outdir, "%s", "%s" + ".models"))

''' % (tmp_name, name, name))

    tmp.close()
    constraints = Popen("python _tmp_model_%s.py" % tmp_name,
                        shell=True, stdout=PIPE).communicate()[0]
    if "constraints" in opts.analyze:
        out = open(os.path.join(opts.outdir, name, name + '_constraints.txt'),
                   'w')
        out.write(constraints)
        out.close()
    
    os.system('rm -f _tmp_zscore_%s' % (tmp_name))
    os.system('rm -f _tmp_model_%s.py' % (tmp_name))
    os.system('rm -f _tmp_opts_%s' % (tmp_name))
    models = load_structuralmodels(
        os.path.join(opts.outdir, name, name + '.models'))
    models.experiment = exp
    coords = {"crm"  : opts.crm,
              "start": opts.beg,
              "end"  : opts.end}
    crm = exp.crm
    description = {'identifier'     : exp.identifier,
                   'chromosome'     : coords['crm'],
                   'start'          : (exp.resolution * coords['start']) if coords['start'] else None,
                   'end'            : (exp.resolution * coords['end'])   if coords['end'  ] else None,
                   'species'        : crm.species,
                   'cell type'      : exp.cell_type,
                   'experiment type': exp.exp_type,
                   'resolution'     : exp.resolution,
                   'assembly'       : crm.assembly}
    for desc in exp.description:
        description[desc] = exp.description[desc]
    for desc in crm.description:
        description[desc] = exp.description[desc]
    for i, m in enumerate([m for m in models] + models._bad_models.values()):
        m['index'] = i
        m['description'] = description
    models.description = description
    return models