示例#1
0
def main():

    opts, params = get_options()
    if opts.inabc:
        zscores = parse_zscores(opts.inabc)
        models = generate_3d_models(zscores, opts.resolution, start=1,
                                    n_models=opts.nmodels,
                                    n_keep=opts.nkeep, n_cpus=opts.ncpus,
                                    keep_all=False, verbose=False,
                                    outfile=None,
                                    config=params)
    
    else:
        crm  = 'crm'
        xnam = 'X'
        crmbit=Chromosome(crm)
        crmbit.add_experiment(xnam, resolution=opts.resolution, xp_handler=opts.incrm)
        exp = crmbit.experiments[xnam]
        models = exp.model_region(start=opts.start, end=opts.end,
                                  n_models=opts.nmodels,
                                  n_keep=opts.nkeep, n_cpus=opts.ncpus,
                                  keep_all=False, verbose=False,
                                  config=params)

    if opts.save:
        models.save_models('%s/models_%s_%s.pik' % (opts.out, opts.start,
                                                    opts.start + opts.nmodels))
    for i in xrange(int(opts.cmm)):
        models.write_cmm(i, opts.out)

    if opts.full_report:
        
        models.cluster_models(dcutoff=200)
        models.cluster_analysis_dendrogram(n_best_clusters=10)
        models.model_consistency()
示例#2
0
def load_hic_data(opts, xnames):
    """
    Load Hi-C data
    """
    # Start reading the data
    crm = Chromosome(opts.crm, species=(
        opts.species.split('_')[0].capitalize() + opts.species.split('_')[1]
                          if '_' in opts.species else opts.species),
                          centromere_search=opts.centromere,
                          assembly=opts.assembly) # Create chromosome object

    # Load three different experimental data sets named TR1, TR2 and BR.
    # Data obtained from Hou et al (2012) Molecular Cell.
    # doi:10.1016/j.molcel.2012.08.031
    logging.info("\tReading input data...")
    for xnam, xpath, xnorm in zip(xnames, opts.data, opts.norm):
        crm.add_experiment(
            xnam, exp_type='Hi-C', enzyme=opts.enzyme,
            cell_type=opts.cell,
            identifier=opts.identifier, # general descriptive fields
            project=opts.project, # user descriptions
            resolution=opts.res,
            hic_data=xpath,
            norm_data=xnorm)
        if not xnorm:
            logging.info("\tNormalizing HiC data of %s..." % xnam)
            crm.experiments[xnam].normalize_hic(iterations=5)
    if opts.beg > crm.experiments[-1].size:
        raise Exception('ERROR: beg parameter is larger than chromosome size.')
    if opts.end > crm.experiments[-1].size:
        logging.info('WARNING: end parameter is larger than chromosome ' +
                     'size. Setting end to %s.\n' % (crm.experiments[-1].size *
                                                     opts.res))
        opts.end = crm.experiments[-1].size
    return crm
示例#3
0
 def test_08_changing_resolution(self):
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data='20Kb/chrT/chrT_D.tsv')
     exp = test_chr.experiments['exp1']
     sum20 = sum(exp.hic_data[0])
     exp.set_resolution(80000)
     sum80 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(160000)
     sum160 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(360000)
     sum360 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(2400000)
     sum2400 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(40000)
     sum40 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(20000)
     sum21 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(40000)
     sum41 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \
                     == sum21 == sum2400 == sum41)
示例#4
0
def load_genome_from_tad_def(genome_path, res, verbose=False):
    """
    Search, at a given path, for chromosome folders containing TAD
    definitions in tsv files.

    :param genome_path: Path where to search for TADbit chromosomes
    :param res: Resolution at were saved chromosomes
    :param False verbose:

    :returns: a dictionary with all TADbit chromosomes found
    """
    ref_genome = {}
    for crm in listdir(genome_path):
        crm_path = os.path.join(genome_path, crm)
        if not isfile(crm_path):
            continue
        if crm in ref_genome:
            raise Exception('More than 1 TAD definition file found\n')
        crm = crm.replace('.tsv', '').replace('chr', '').upper()
        if verbose:
            print '  Chromosome:', crm
        crmO = Chromosome(crm)
        crmO.add_experiment('sample', res)
        crmO.experiments[0].load_tad_def(crm_path)
        ref_genome[crm] = crmO
    return ref_genome
示例#5
0
def main():
    """
    main function
    """

    opts = get_options()
    crm = Chromosome(':P')

    for i, data in enumerate(opts.data):
        crm.add_experiment('exp' + str(i), resolution=int(opts.resolution[i]),
                           hic_data=data)
        crm.experiments['exp' + str(i)].normalize_hic()

    if len(opts.data) > 1:
        exp = crm.experiments[0] + crm.experiments[1]
        for i in range(2, len(opts.data)):
            exp += crm.experiments[i]
    else:
        exp = crm.experiments[0]

    if opts.abc:
        exp.write_interaction_pairs(opts.output, normalized=opts.norm,
                                    zscored=False)
    else:
        if type(opts.output) == file:
            out = opts.output
        else:
            out = open(opts.output, 'w')
        out.write(exp.print_hic_matrix(print_it=False,
                                       normalized=opts.norm))
示例#6
0
    def test_08_changing_resolution(self):
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments['exp1']
        sum20 = sum(exp.hic_data[0].values())
        exp.set_resolution(80000)
        sum80 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(160000)
        sum160 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(360000)
        sum360 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(2400000)
        sum2400 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum40 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(20000)
        sum21 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum41 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \
                        == sum21 == sum2400 == sum41)
        if CHKTIME:
            print '8', time() - t0
def load_genome_from_tad_def(genome_path, res, verbose=False):
    """
    Search, at a given path, for chromosome folders containing TAD
    definitions in tsv files.

    :param genome_path: Path where to search for TADbit chromosomes
    :param res: Resolution at were saved chromosomes
    :param False verbose:

    :returns: a dictionary with all TADbit chromosomes found
    """
    ref_genome = {}
    for crm in listdir(genome_path):
        crm_path = os.path.join(genome_path, crm)
        if not isfile(crm_path):
            continue
        if crm in ref_genome:
            raise Exception('More than 1 TAD definition file found\n')
        crm = crm.replace('.tsv', '').replace('chr', '').upper()
        if verbose:
            print '  Chromosome:', crm
        crmO = Chromosome(crm)
        crmO.add_experiment('sample', res)
        crmO.experiments[0].load_tad_def(crm_path)
        ref_genome[crm] = crmO
    return ref_genome
示例#8
0
    def test_12_3d_modelling_optimization(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if CHKTIME:
            t0 = time()

        try:
            __import__('IMP')
        except ImportError:
            warn('IMP not found, skipping test\n')
            return
        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv')
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv')
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        result = exp.optimal_imp_parameters(50, 70, n_cpus=4,
                                            n_models=8, n_keep=2,
                                            lowfreq_range=[-0.6],
                                            upfreq_range=(0, 1.1, 1.1),
                                            maxdist_range=[500, 600],
                                            verbose=False)

        # get best correlations
        config = result.get_best_parameters_dict()
        wanted = {'maxdist': 600.0, 'upfreq': 0.0, 'kforce': 5,
                  'dcutoff': 2,
                  'reference': '', 'lowfreq': -0.6, 'scale': 0.01}
        self.assertEqual([round(i, 4) for i in config.values()if not type(i) is str],
                         [round(i, 4) for i in wanted.values()if not type(i) is str])
        if CHKTIME:
            print '12', time() - t0
示例#9
0
    def test_08_changing_resolution(self):
        if ONLY and ONLY != "08":
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True)
        exp = test_chr.experiments["exp1"]
        sum20 = sum(exp.hic_data[0].values())
        exp.set_resolution(80000)
        sum80 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(160000)
        sum160 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(360000)
        sum360 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(2400000)
        sum2400 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum40 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(20000)
        sum21 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum41 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 == sum21 == sum2400 == sum41)
        if CHKTIME:
            print "8", time() - t0
示例#10
0
def main():
    """
    main function
    """
    # retieve HOX genes

    distmatrix, geneids = get_genes()
    # compute TADs for human chromosome 19
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1',
                            100000,
                            xp_handler=PATH +
                            'HIC_gm06690_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    exp = test_chr.experiments['exp1']
    clust = linkage(distmatrix['19'])
    cl_idx = list(fcluster(clust, t=1, criterion='inconsistent'))
    print max(cl_idx), 'clusters'
    cluster = [[] for _ in xrange(1, max(cl_idx) + 1)]
    for i, j in enumerate(cl_idx):
        cluster[j - 1].append(geneids['19'][i][1])
    for i, _ in enumerate(cluster):
        cluster[i] = min(cluster[i]), max(cluster[i])
    tad_breaker(exp.tads,
                cluster,
                exp.resolution,
                show_plot=True,
                bins=5,
                title='Proportion of HOX genes according to position in a TAD')
示例#11
0
    def test_09_hic_normalization(self):
        """
        writes interaction pair file.
        """
        if ONLY and not "09" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv",
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.normalize_hic(silent=True)
        exp.get_hic_zscores()
        exp.get_hic_zscores(zscored=False)
        sumz = sum([
            exp._zscores[k1][k2] for k1 in exp._zscores.keys()
            for k2 in exp._zscores[k1]
        ])
        self.assertEqual(round(sumz, 4), round(4059.2877, 4))
        if CHKTIME:
            print "9", time() - t0
示例#12
0
def main():
    matrix_path   = sys.argv[1]
    config_string = sys.argv[2]
    compute_keep = sys.argv[3]

    uf, lf, md = config_string.split(':')
    lf = float(lf)
    uf = float(uf)
    md = int  (md)
    config = {'reference' : '', 'kforce'    : 5,
              'maxdist'   : md,
              'upfreq'    : uf,
              'lowfreq'   : lf,
              'scale'     : 0.01,
              'kbending'  : 0.0,
              }

    compute, keep = map(int, compute_keep.split(':'))

    chrom = Chromosome('chr')
    chrom.add_experiment('sample', norm_data=matrix_path, resolution=15000)
    exp = chrom.experiments[0]

    models = exp.model_region(n_models=compute, n_keep=keep, n_cpus=8, config=config)

    models.save_models('models_%s.pickle' % (config_string))
示例#13
0
    def test_11_write_interaction_pairs(self):
        if ONLY and not "11" in ONLY:
            return
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv")
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(factor=None, silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs("lala")
        lines = open("lala").readlines()
        self.assertEqual(len(lines), 4674)
        self.assertEqual(lines[25], "1\t28\t0.612332461036\n")
        self.assertEqual(lines[2000], "26\t70\t0.0738742984321\n")
        system("rm -f lala")
        if CHKTIME:
            print "11", time() - t0
示例#14
0
    def test_11_write_interaction_pairs(self):
        if ONLY and ONLY != '11':
            return
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv')
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(factor=None, silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs('lala')
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 4674)
        self.assertEqual(lines[25], '1\t28\t0.612332461036\n')
        self.assertEqual(lines[2000], '26\t70\t0.0738742984321\n')
        system('rm -f lala')
        if CHKTIME:
            print '11', time() - t0
示例#15
0
    def test_11_write_interaction_pairs(self):
        if ONLY and not "11" in ONLY:
            return
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv")
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(factor=1, silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs("lala")
        with open("lala") as f_lala:
            lines = f_lala.readlines()
        self.assertEqual(len(lines), 4674)
        self.assertAlmostEqual(float(lines[25].split('\t')[2]),
                               0.5852295196345679)
        self.assertAlmostEqual(float(lines[2000].split('\t')[2]),
                               0.07060448846960976)
        system("rm -f lala")
        if CHKTIME:
            print("11", time() - t0)
示例#16
0
    def test_11_write_interaction_pairs(self):
        if ONLY and ONLY != '11':
            return
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv')
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(factor=None, silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs('lala')
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 4674)
        self.assertEqual(lines[25], '1\t28\t0.612332461036\n')
        self.assertEqual(lines[2000], '26\t70\t0.0738742984321\n')
        system('rm -f lala')
        if CHKTIME:
            print '11', time() - t0
示例#17
0
    def test_13_3d_modelling_centroid(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and "13" not in ONLY:
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__("IMP")
        except ImportError:
            warn("IMP not found, skipping test\n")
            return
        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        models = exp.model_region(
            51,
            71,
            ncopies=4,
            n_models=10,
            n_keep=10,
            n_cpus=10,
            # verbose=3,
            config={"kforce": 5, "maxdist": 500, "scale": 0.01, "upfreq": 0.5, "lowfreq": -0.5},
        )
        models.save_models("models.pick")

        avg = models.average_model()
        nmd = len(models)
        print "I'm here test 13"
示例#18
0
    def test_07_forbidden_regions(self):
        if ONLY and ONLY != '07':
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000,
                              centromere_search=True,)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        # Values with square root normalization.
        #brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0,
        #        61.0, 66.0, 75.0, 89.0, 94.0, 99.0]
        brks = [3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0,
                  66.0, 71.0, 83.0, 89.0, 94.0, 99.0]
        tads = test_chr.experiments['exp1'].tads
        found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
        self.assertEqual(brks, found)
        items1 = test_chr.forbidden.keys(), test_chr.forbidden.values()
        test_chr.add_experiment('exp2', 20000, tad_def=exp3,
                                hic_data=PATH + '/20Kb/chrT/chrT_C.tsv',
                                silent=True)
        items2 = test_chr.forbidden.keys(), test_chr.forbidden.values()
        know1 = ([38, 39], ['Centromere', 'Centromere'])
        #know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22,
        #          23, 24, 25, 26, 27, 28, 29, 30, 31],
        #         [None, None, None, 'Centromere', 'Centromere',
        #          None, None, None, None, None, None, None,
        #          None, None, None, None, None, None])
        know2 = ([38], ['Centromere'])
        self.assertEqual(items1, know1)
        self.assertEqual(items2, know2)
        if CHKTIME:
            print '7', time() - t0
示例#19
0
    def test_07_forbidden_regions(self):
        if ONLY and ONLY != "07":
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000, centromere_search=True)
        test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True)
        # Values with square root normalization.
        # brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0,
        #        61.0, 66.0, 75.0, 89.0, 94.0, 99.0]
        brks = [3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0, 66.0, 71.0, 83.0, 89.0, 94.0, 99.0]
        tads = test_chr.experiments["exp1"].tads
        found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0]
        self.assertEqual(brks, found)
        items1 = test_chr.forbidden.keys(), test_chr.forbidden.values()
        test_chr.add_experiment("exp2", 20000, tad_def=exp3, hic_data=PATH + "/20Kb/chrT/chrT_C.tsv", silent=True)
        items2 = test_chr.forbidden.keys(), test_chr.forbidden.values()
        know1 = ([38, 39], ["Centromere", "Centromere"])
        # know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22,
        #          23, 24, 25, 26, 27, 28, 29, 30, 31],
        #         [None, None, None, 'Centromere', 'Centromere',
        #          None, None, None, None, None, None, None,
        #          None, None, None, None, None, None])
        know2 = ([38], ["Centromere"])
        self.assertEqual(items1, know1)
        self.assertEqual(items2, know2)
        if CHKTIME:
            print "7", time() - t0
示例#20
0
 def tad_clustering(self):
     test_chr = Chromosome(name="Test Chromosome", resolution=20000)
     test_chr.add_experiment("chrT/chrT_A.tsv", name="exp1")
     test_chr.find_TAD(["exp1"])
     all_tads = list(test_chr.iter_tads("exp1"))
     align1, align2 = optimal_cmo(all_tads[4], all_tads[8], 9)
     self.assertEqual(align1, [1, 2, "-", "-", "-", "-", 3, "-", 4, 5, 6, "-", 7, 8])
     self.assertEqual(align2, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
示例#21
0
    def test_12_3d_modelling_optimization(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and "12" not in ONLY:
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__("IMP")
        except ImportError:
            warn("IMP not found, skipping test\n")
            return
        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment(
            "exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv"
        )  # norm_data para dar directamente la matrix normalizada
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv")
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        result = exp.optimal_imp_parameters(
            50,
            70,
            ncopies=4,
            n_cpus=1,  # It can be that this function requires also the raw hic_data matrix
            n_models=8,
            n_keep=2,
            lowfreq_range=[-0.6],
            upfreq_range=(0, 1.1, 1.1),
            maxdist_range=[500, 600],
            verbose=True,
        )

        # get best correlations
        config = result.get_best_parameters_dict()

        # Save the models and the contact map
        # result.save_model or result.save_data
        # result.write_cmm to visualize the best models
        # result.write_xyz to visualize the best models

        wanted = {
            "maxdist": 600.0,
            "upfreq": 0.0,
            "kforce": 5,
            "dcutoff": 2,
            "reference": "",
            "lowfreq": -0.6,
            "scale": 0.01,
        }
        self.assertEqual(
            [round(i, 4) for i in config.values() if not type(i) is str],
            [round(i, 4) for i in wanted.values() if not type(i) is str],
        )
        if CHKTIME:
            print "12", time() - t0
示例#22
0
    def tb_generate_tads(self, expt_name, adj_list, chrom, resolution,
                         normalized, tad_file):
        """
        Function to the predict TAD sites for a given resolution from the Hi-C
        matrix

        Parameters
        ----------
        expt_name : str
                Location of the adjacency list
        matrix_file : str
            Location of the HDF5 output matrix file
        resolution : int
            Resolution to read the Hi-C adjacency list at
        tad_file : str
            Location of the output TAD file

        Returns
        -------
        tad_file : str
            Location of the output TAD file

        """
        # chr_hic_data = read_matrix(matrix_file, resolution=int(resolution))

        print("TB TAD GENERATOR:", expt_name, adj_list, chrom, resolution,
              normalized, tad_file)

        hic_data = load_hic_data_from_reads(adj_list,
                                            resolution=int(resolution))

        if normalized is False:
            hic_data.normalize_hic(iterations=9, max_dev=0.1)

        save_matrix_file = adj_list + "_" + str(chrom) + "_tmp.txt"
        hic_data.write_matrix(save_matrix_file, (chrom, chrom),
                              normalized=True)

        chr_hic_data = hic_data.get_matrix((chrom, chrom))
        print("TB - chr_hic_data:", chr_hic_data)

        my_chrom = Chromosome(name=chrom, centromere_search=True)
        my_chrom.add_experiment(expt_name,
                                hic_data=save_matrix_file,
                                resolution=int(resolution))

        # Run core TADbit function to find TADs on each expt.
        my_chrom.find_tad(expt_name, n_cpus=15)

        exp = my_chrom.experiments[expt_name]
        exp.write_tad_borders(savedata=tad_file + ".tmp")

        with open(tad_file, "wb") as f_out:
            with open(tad_file + ".tmp", "rb") as f_in:
                f_out.write(f_in.read())

        return True
示例#23
0
    def test_12_3d_modelling_optimization(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and not "12" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__("IMP")
        except ImportError:
            warn("IMP not found, skipping test\n")
            return
        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv")
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv")
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        result = exp.optimal_imp_parameters(
            50,
            70,
            n_cpus=4,
            n_models=8,
            n_keep=2,
            lowfreq_range=[-0.6],
            upfreq_range=(0, 1.1,
                          1.1),  #from 0 till 1.1 in step of 1.1 with ()
            maxdist_range=[500, 600],  # it will use 500 and 600 with []
            verbose=False)

        # get best correlations
        config = result.get_best_parameters_dict()  #dict with parameters
        wanted = {
            "maxdist": 600.0,
            "upfreq": 0.0,
            "kforce": 5,
            "dcutoff": 2,
            "reference": "",
            "lowfreq": -0.6,
            "scale": 0.01
        }

        self.assertEqual([
            round(config[i], 4)
            for i in list(config.keys()) if not type(i) is str
        ], [
            round(config[i], 4)
            for i in list(wanted.keys()) if not type(i) is str
        ])
        if CHKTIME:
            print("12", time() - t0)
示例#24
0
    def test_13_3d_modelling_centroid(self):  #model with no optimisation
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and ONLY != '13':
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__('IMP')
        except ImportError:
            warn('IMP not found, skipping test\n')
            return
        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1',
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        models = exp.model_region(51,
                                  71,
                                  n_models=40,
                                  n_keep=25,
                                  n_cpus=4,
                                  config={
                                      'kforce': 5,
                                      'maxdist': 500,
                                      'scale': 0.01,
                                      'upfreq': 1.0,
                                      'lowfreq': -0.6
                                  })
        models.save_models('models.pick')

        avg = models.average_model()
        nmd = len(models)
        dev = rmsdRMSD_wrapper([models[m]['x']
                                for m in xrange(nmd)] + [avg['x']],
                               [models[m]['y']
                                for m in xrange(nmd)] + [avg['y']],
                               [models[m]['z']
                                for m in xrange(nmd)] + [avg['z']],
                               models._zeros, models.nloci, 200,
                               range(len(models) + 1),
                               len(models) + 1, int(False), 'rmsd', 0)
        centroid = models[models.centroid_model()]
        # find closest
        model = min([(k, dev[(k, nmd)]) for k in range(nmd)],
                    key=lambda x: x[1])[0]
        self.assertEqual(centroid['rand_init'], models[model]['rand_init'])
        if CHKTIME:
            print '13', time() - t0
示例#25
0
    def _sub_experiment_zscore(self, start, end):
        """
        Get the z-score of a sub-region of an  experiment.

        TODO: find a nicer way to do this...

        :param start: first bin to model (bin number)
        :param end: first bin to model (bin number)

        :returns: z-score and raw values of the experiment
        """
        if self._normalization != 'visibility':
            warn('WARNING: normalizing according to visibility method')
            self.normalize_hic(method='visibility')
        from pytadbit import Chromosome
        matrix = self.get_hic_matrix()
        end += 1
        new_matrix = [[] for _ in range(end - start)]
        for i in xrange(start, end):
            for j in xrange(start, end):
                new_matrix[i - start].append(matrix[i][j])

        tmp = Chromosome('tmp')
        tmp.add_experiment('exp1',
                           hic_data=[new_matrix],
                           resolution=self.resolution,
                           filter_columns=False)
        exp = tmp.experiments[0]
        # We want the weights and zeros calculated in the full chromosome
        siz = self.size
        exp.norm = [[
            self.norm[0][i + siz * j] for i in xrange(start, end)
            for j in xrange(start, end)
        ]]
        exp._zeros = dict([(z - start, None) for z in self._zeros
                           if start <= z <= end])
        if len(exp._zeros) == (end + 1 - start):
            raise Exception('ERROR: no interaction found in selected regions')
        # ... but the z-scores in this particular region
        exp.get_hic_zscores(remove_zeros=True)
        values = [[float('nan') for _ in xrange(exp.size)]
                  for _ in xrange(exp.size)]
        for i in xrange(exp.size):
            # zeros are rows or columns having a zero in the diagonal
            if i in exp._zeros:
                continue
            for j in xrange(i + 1, exp.size):
                if j in exp._zeros:
                    continue
                if (not exp.hic_data[0][i * exp.size + j]
                        or not exp.hic_data[0][i * exp.size + j]):
                    continue
                values[i][j] = exp.norm[0][i * exp.size + j]
                values[j][i] = exp.norm[0][i * exp.size + j]
        return exp._zscores, values
示例#26
0
 def test_09_hic_normalization(self):
     """
     TODO: check with Davide's script
     """
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data='20Kb/chrT/chrT_D.tsv')
     exp = test_chr.experiments[0]
     exp.load_experiment('20Kb/chrT/chrT_A.tsv')
     exp.get_hic_zscores()
     exp.get_hic_zscores(zscored=False)
示例#27
0
 def test_10_generate_weights(self):
     """
     method names are: 'sqrt' or 'over_tot'
     """
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data='20Kb/chrT/chrT_D.tsv')
     exp = test_chr.experiments[0]
     tadbit_weigths = exp.norm[:]
     exp.norm = None
     exp.normalize_hic()
     self.assertEqual(tadbit_weigths[0], exp.norm[0])
示例#28
0
 def test_10_generate_weights(self):
     """
     TODO: using Francois' formula
     method names are: 'sqrt' or 'over_tot'
     """
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_handler=exp4,
                             xp_handler='20Kb/chrT/chrT_D.tsv')
     exp = test_chr.experiments[0]
     tadbit_weigths = exp.wght[:]
     exp.wght = None
     exp.normalize_hic()
     self.assertEqual(tadbit_weigths[0], exp.wght[0])
示例#29
0
文件: experiment.py 项目: dbau/tadbit
    def _sub_experiment_zscore(self, start, end):
        """
        Get the z-score of a sub-region of an  experiment.

        TODO: find a nicer way to do this...

        :param start: first bin to model (bin number)
        :param end: first bin to model (bin number)

        :returns: z-score and raw values of the experiment
        """
        if self._normalization != 'visibility':
            warn('WARNING: normalizing according to visibility method')
            self.normalize_hic(method='visibility')
        from pytadbit import Chromosome
        matrix = self.get_hic_matrix()
        end += 1
        new_matrix = [[] for _ in range(end-start)]
        for i in xrange(start, end):
            for j in xrange(start, end):
                new_matrix[i - start].append(matrix[i][j])
                
        tmp = Chromosome('tmp')
        tmp.add_experiment('exp1', hic_data=[new_matrix],
                           resolution=self.resolution, filter_columns=False)
        exp = tmp.experiments[0]
        # We want the weights and zeros calculated in the full chromosome
        siz = self.size
        exp.norm = [[self.norm[0][i + siz * j] for i in xrange(start, end)
                     for j in xrange(start, end)]]
        exp._zeros = dict([(z - start, None) for z in self._zeros
                           if start <= z <= end])
        if len(exp._zeros) == (end + 1 - start):
            raise Exception('ERROR: no interaction found in selected regions')
        # ... but the z-scores in this particular region
        exp.get_hic_zscores(remove_zeros=True)
        values = [[float('nan') for _ in xrange(exp.size)]
                  for _ in xrange(exp.size)]
        for i in xrange(exp.size):
            # zeros are rows or columns having a zero in the diagonal
            if i in exp._zeros:
                continue
            for j in xrange(i + 1, exp.size):
                if j in exp._zeros:
                    continue
                if (not exp.hic_data[0][i * exp.size + j] 
                    or not exp.hic_data[0][i * exp.size + j]):
                    continue
                values[i][j] = exp.norm[0][i * exp.size + j]
                values[j][i] = exp.norm[0][i * exp.size + j]
        return exp._zscores, values
示例#30
0
def main():
    """
    main function
    """
    n_pick = 4
    n_tot  = 10
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1', 100000, xp_handler=PATH +
                            'HIC_gm06690_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    real_tads = {}
    for i, t in enumerate(test_chr.iter_tads('exp1', normed=False)):
        real_tads[i] = test_chr.experiments['exp1'].tads[i]
        real_tads[i]['hic'] = t[1]
    global DISTRA
    global DISTRD
    DISTRA, DISTRD = get_hic_distr(real_tads)
    # pick some tads
    picked_tads = []
    picked_keys = []
    for i in xrange(n_pick):
        key, new_tad = get_random_tad(real_tads)
        while key in picked_keys or (new_tad['end'] - new_tad['start'] < 15):
            key, new_tad = get_random_tad(real_tads)
        picked_tads.append(new_tad)
        picked_keys.append(key)
    # mutate this tads
    tads = {}
    tad_matrices = []
    tad_names = []
    for i in xrange(n_pick):
        print i
        tads[uppercase[i] + '_' + str(0)] = picked_tads[i]
        tad_names.append(uppercase[i] + '_' + str(0))
        for j in xrange(1, n_tot):
            hic, indels = generate_random_contacts(
                tad1=picked_tads[i]['hic'], prob=0.05, ext=int(random()*4) + 1,
                indel=int(random() * 4) + 1)[1:]
            # indels = '|'.join([str(n-1) if n>0 else '-' + str((abs(n)-1)) for n in indels])
            tads[uppercase[i] + '_' + str(j)] = {
                'hic'  : hic,
                'start': picked_tads[i]['start'],
                'end'  : picked_tads[i]['end']}
            tad_matrices.append(hic)
            tad_names.append(uppercase[i] + '_' + str(j))
    distances, cci = get_distances(tad_matrices, max_num_v=4,
                                   n_cpus=mu.cpu_count())
    results, clusters = pre_cluster(distances, cci, len(tad_matrices))
    paint_clustering(results, clusters, len(tad_matrices), test_chr,
                     tad_names, tad_matrices)
示例#31
0
def load_experiments(opts):
    crm = Chromosome(opts.crm)
    for i, xpr in enumerate(opts.hic_files):
        if opts.exp_names:
            name = opts.exp_names[i]
        else:
            name = ''.join(xpr.split('/')[-1].split('.')[:-1])
        if opts.verbose:
            print ' Reading Hi-C datafile #%s (%s)' % (i+1, name)
        crm.add_experiment(name, hic_data=xpr,
                           resolution=int(opts.resolution))
        if opts.verbose:
            print '     loaded as: %s\n' % (crm.experiments[name])
    return crm
示例#32
0
    def test_07_forbidden_regions(self):
        if ONLY and not "07" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(
            name="Test Chromosome",
            max_tad_size=260000,
            centromere_search=True,
        )
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv",
                                silent=True)
        # Values with square root normalization.
        #brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0,
        #        61.0, 66.0, 75.0, 89.0, 94.0, 99.0]
        brks = [
            3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0, 66.0, 71.0, 83.0,
            89.0, 94.0, 99.0
        ]
        tads = test_chr.experiments["exp1"].tads
        found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0]
        self.assertEqual(brks, found)
        items1 = list(test_chr.forbidden.keys()), list(
            test_chr.forbidden.values())
        test_chr.add_experiment("exp2",
                                20000,
                                tad_def=exp3,
                                hic_data=PATH + "/20Kb/chrT/chrT_C.tsv",
                                silent=True)
        items2 = list(test_chr.forbidden.keys()), list(
            test_chr.forbidden.values())
        know1 = ([38, 39], ["Centromere", "Centromere"])
        #know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22,
        #          23, 24, 25, 26, 27, 28, 29, 30, 31],
        #         [None, None, None, "Centromere", "Centromere",
        #          None, None, None, None, None, None, None,
        #          None, None, None, None, None, None])
        know2 = ([38], ["Centromere"])
        self.assertEqual(items1, know1)
        self.assertEqual(items2, know2)
        if CHKTIME:
            print("7", time() - t0)
示例#33
0
    def test_13_3d_modelling_centroid(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and ONLY != '13':
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__('IMP')
        except ImportError:
            warn('IMP not found, skipping test\n')
            return
        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        models = exp.model_region(51, 71, n_models=40, n_keep=25,
                                  n_cpus=4,
                                  config={'kforce': 5, 'maxdist': 500,
                                          'scale': 0.01,
                                          'upfreq': 1.0, 'lowfreq': -0.6})
        models.save_models('models.pick')

        avg = models.average_model()
        nmd = len(models)
        dev = rmsdRMSD_wrapper(
            [models[m]['x'] for m in xrange(nmd)] + [avg['x']],
            [models[m]['y'] for m in xrange(nmd)] + [avg['y']],
            [models[m]['z'] for m in xrange(nmd)] + [avg['z']],
            models._zeros,
            models.nloci, 200, range(len(models)+1),
            len(models)+1, int(False), 'rmsd', 0)
        centroid = models[models.centroid_model()]
        # find closest
        model = min([(k, dev[(k, nmd)] )
                     for k in range(nmd)], key=lambda x: x[1])[0]
        self.assertEqual(centroid['rand_init'], models[model]['rand_init'])
        if CHKTIME:
            print '13', time() - t0
示例#34
0
 def test_10_compartments(self):
     """
     """
     if ONLY and ONLY != "10":
         return
     if CHKTIME:
         t0 = time()
     test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
     test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True)
     exp = test_chr.experiments[0]
     exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
     hic_data = exp.hic_data[0]
     hic_data.find_compartments(label_compartments="cluster")
     self.assertEqual(len(hic_data.compartments[None]), 39)
     # self.assertEqual(round(hic_data.compartments[None][24]['dens'], 5),
     #                  0.75434)
     if CHKTIME:
         print "10", time() - t0
示例#35
0
    def test_10_generate_weights(self):
        """
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments[0]
        tadbit_weights = exp.norm[:]
        exp.norm = None
        exp.normalize_hic()
        self.assertEqual([round(i, 3) for i in tadbit_weights[0][:100]],
                         [round(i, 3) for i in exp.norm[0][:100]])
        if CHKTIME:
            print '10', time() - t0
示例#36
0
    def test_13_3d_modelling_centroid(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if CHKTIME:
            t0 = time()

        try:
            __import__('IMP')
        except ImportError:
            warn('IMP not found, skipping test\n')
            return
        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.normalize_hic(silent=True)
        models = exp.model_region(51, 71, n_models=110, n_keep=25,
                                  n_cpus=4,
                                  config={'kforce': 5, 'maxdist': 500,
                                          'scale': 0.01,
                                          'upfreq': 1.0, 'lowfreq': -0.6})
        models.save_models('models.pick')
        
        avg = models.average_model()

        a = rmsdRMSD_wrapper([models[m]['x'] for m in xrange(len(models))] + [avg['x']],
                             [models[m]['y'] for m in xrange(len(models))] + [avg['y']],
                             [models[m]['z'] for m in xrange(len(models))] + [avg['z']],
                             models.nloci, 410, range(len(models)+1),
                             len(models)+1, int(False), 'score', 1)
        self.assertEqual(21, sorted([(k, sum([a[(i, j)] for i, j in a if i==k or j==k]))
                                     for k in range(26)], key=lambda x: x[1])[-1][0])
        centroid = models[models.centroid_model()]
        expsc = sum([sum([a[(i, j)] for i, j in a if i==k or j==k])
                     for k in range(26)]) / 26
        # find closest
        model = min([(k, sum([a[(i, j)] for i, j in a if i==k or j==k]))
                     for k in range(26)], key=lambda x:abs(x[1]-expsc))[0]
        self.assertEqual(centroid['rand_init'], models[model]['rand_init'])
        if CHKTIME:
            print '13', time() - t0
示例#37
0
def main():

    opts, params = get_options()
    if opts.inabc:
        zscores = parse_zscores(opts.inabc)
        models = generate_3d_models(zscores,
                                    opts.resolution,
                                    start=1,
                                    n_models=opts.nmodels,
                                    n_keep=opts.nkeep,
                                    n_cpus=opts.ncpus,
                                    keep_all=False,
                                    verbose=False,
                                    outfile=None,
                                    config=params)

    else:
        crm = 'crm'
        xnam = 'X'
        crmbit = Chromosome(crm)
        crmbit.add_experiment(xnam,
                              resolution=opts.resolution,
                              xp_handler=opts.incrm)
        exp = crmbit.experiments[xnam]
        models = exp.model_region(start=opts.start,
                                  end=opts.end,
                                  n_models=opts.nmodels,
                                  n_keep=opts.nkeep,
                                  n_cpus=opts.ncpus,
                                  keep_all=False,
                                  verbose=False,
                                  config=params)

    if opts.save:
        models.save_models('%s/models_%s_%s.pik' %
                           (opts.out, opts.start, opts.start + opts.nmodels))
    for i in xrange(int(opts.cmm)):
        models.write_cmm(i, opts.out)

    if opts.full_report:

        models.cluster_models(dcutoff=200)
        models.cluster_analysis_dendrogram(n_best_clusters=10)
        models.model_consistency()
示例#38
0
def main():
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1', 100000, xp_handler=PATH +
                            'HIC_k562_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    tad_names = []
    tad_matrices = []
    for name, matrix in test_chr.iter_tads('exp1'):
        if test_chr.experiments['exp1'].tads[name]['score'] < 0:
            continue
        if (test_chr.experiments['exp1'].tads[name]['end'] -
            test_chr.experiments['exp1'].tads[name]['start']) < 10:
            continue
        tad_names.append(name)
        tad_matrices.append(matrix)
    num = len(tad_names)
    distances, cci = get_distances(tad_matrices, max_num_v=mu.cpu_count())
    results, clusters = pre_cluster(distances, cci, num)
    paint_clustering(results, clusters, num, test_chr, tad_names)
示例#39
0
def load_hic_data(opts):
    """
    Load Hi-C data
    """
    # Start reading the data
    crm = Chromosome(opts.crm) # Create chromosome object

    crm.add_experiment('test', exp_type='Hi-C', resolution=opts.reso,
                       norm_data=opts.matrix)
    # TODO: if not bad columns:...
    crm.experiments[-1].filter_columns(perc_zero=opts.perc_zero)
    if opts.beg > crm.experiments[-1].size:
        raise Exception('ERROR: beg parameter is larger than chromosome size.')
    if opts.end > crm.experiments[-1].size:
        print ('WARNING: end parameter is larger than chromosome ' +
               'size. Setting end to %s.\n' % (crm.experiments[-1].size *
                                               opts.reso))
        opts.end = crm.experiments[-1].size
    return crm
示例#40
0
    def test_09_hic_normalization(self):
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.get_hic_zscores()
        exp.get_hic_zscores(zscored=False)
        sumz = sum([exp._zscores[k1][k2] for k1 in exp._zscores.keys()
                    for k2 in exp._zscores[k1]])
        self.assertEqual(round(sumz, 4), round(3993.7842, 4))
        if CHKTIME:
            print '9', time() - t0
示例#41
0
    def test_09_hic_normalization(self):
        """
        writes interaction pair file.
        """
        if ONLY and ONLY != "09":
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.normalize_hic(silent=True)
        exp.get_hic_zscores()
        exp.get_hic_zscores(zscored=False)
        sumz = sum([exp._zscores[k1][k2] for k1 in exp._zscores.keys() for k2 in exp._zscores[k1]])
        self.assertEqual(round(sumz, 4), round(4059.2877, 4))
        if CHKTIME:
            print "9", time() - t0
示例#42
0
 def test_10_compartments(self):
     """
     """
     if ONLY and not "10" in ONLY:
         return
     if CHKTIME:
         t0 = time()
     test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
     test_chr.add_experiment("exp1", 20000, tad_def=exp4,
                             hic_data=PATH + "/20Kb/chrT/chrT_D.tsv",
                             silent=True)
     exp = test_chr.experiments[0]
     exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
     hic_data = exp.hic_data[0]
     hic_data.find_compartments(label_compartments="cluster")
     self.assertEqual(len(hic_data.compartments[None]), 39)
     # self.assertEqual(round(hic_data.compartments[None][24]["dens"], 5),
     #                  0.75434)
     if CHKTIME:
         print "10", time() - t0
示例#43
0
 def test_10_compartments(self):
     """
     """
     if ONLY and ONLY != '10':
         return
     if CHKTIME:
         t0 = time()
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                             silent=True)
     exp = test_chr.experiments[0]
     exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
     hic_data = exp.hic_data[0]
     hic_data.find_compartments(label_compartments='cluster')
     self.assertEqual(len(hic_data.compartments[None]), 39)
     # self.assertEqual(round(hic_data.compartments[None][24]['dens'], 5),
     #                  0.75434)
     if CHKTIME:
         print '10', time() - t0
示例#44
0
def main():
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1',
                            100000,
                            xp_handler=PATH +
                            'HIC_k562_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    tad_names = []
    tad_matrices = []
    for name, matrix in test_chr.iter_tads('exp1'):
        if test_chr.experiments['exp1'].tads[name]['score'] < 0:
            continue
        if (test_chr.experiments['exp1'].tads[name]['end'] -
                test_chr.experiments['exp1'].tads[name]['start']) < 10:
            continue
        tad_names.append(name)
        tad_matrices.append(matrix)
    num = len(tad_names)
    distances, cci = get_distances(tad_matrices, max_num_v=mu.cpu_count())
    results, clusters = pre_cluster(distances, cci, num)
    paint_clustering(results, clusters, num, test_chr, tad_names)
示例#45
0
 def test_07_forbidden_regions(self):
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data='20Kb/chrT/chrT_D.tsv')
     brks = [2.0, 7.0, 12.0, 18.0, 49.0,
             61.0, 66.0, 75.0, 89.0, 94.0, 99.0]
     tads = test_chr.experiments['exp1'].tads
     found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
     self.assertEqual(brks, found)
     items1 = test_chr.forbidden.keys(), test_chr.forbidden.values()
     test_chr.add_experiment('exp2', 20000, tad_def=exp3,
                             hic_data='20Kb/chrT/chrT_C.tsv')
     items2 = test_chr.forbidden.keys(), test_chr.forbidden.values()
     know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22,
               23, 24, 25, 26, 27, 28, 29, 30, 31],
              [None, None, None, 'Centromere', 'Centromere',
               None, None, None, None, None, None, None,
               None, None, None, None, None, None])
     know2 = ([38], ['Centromere'])
     self.assertEqual(items1, know1)
     self.assertEqual(items2, know2)
示例#46
0
def load_hic_data(opts):
    """
    Load Hi-C data
    """
    # Start reading the data
    crm = Chromosome(opts.crm)  # Create chromosome object

    crm.add_experiment('test',
                       exp_type='Hi-C',
                       resolution=opts.reso,
                       norm_data=opts.matrix)
    # TODO: if not bad columns:...
    crm.experiments[-1].filter_columns(perc_zero=opts.perc_zero)
    if opts.beg > crm.experiments[-1].size:
        raise Exception('ERROR: beg parameter is larger than chromosome size.')
    if opts.end > crm.experiments[-1].size:
        print(
            'WARNING: end parameter is larger than chromosome ' +
            'size. Setting end to %s.\n' %
            (crm.experiments[-1].size * opts.reso))
        opts.end = crm.experiments[-1].size
    return crm
示例#47
0
def load_hic_data(opts):
    """
    Load Hi-C data
    """
    # Start reading the data
    crm = Chromosome(opts.crm)  # Create chromosome object
    print '     o Loading Hi-C matrix'
    try:
        hic = optimal_reader(open(opts.matrix),
                             normalized=True,
                             resolution=opts.reso)
        crm.add_experiment('test',
                           exp_type='Hi-C',
                           resolution=opts.reso,
                           norm_data=hic)
    except Exception, e:
        print str(e)
        warn('WARNING: failed to load data as TADbit standardized matrix\n')
        crm.add_experiment('test',
                           exp_type='Hi-C',
                           resolution=opts.reso,
                           norm_data=opts.matrix)
示例#48
0
    def test_08_changing_resolution(self):
        if ONLY and not "08" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv",
                                silent=True)
        exp = test_chr.experiments["exp1"]
        sum20 = sum(exp.hic_data[0].values())
        exp.set_resolution(80000)
        sum80 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(160000)
        sum160 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(360000)
        sum360 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(2400000)
        sum2400 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum40 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(20000)
        sum21 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum41 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \
                        == sum21 == sum2400 == sum41)
        if CHKTIME:
            print "8", time() - t0
示例#49
0
    def test_11_write_interaction_pairs(self):
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs('lala')
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 4851)
        self.assertEqual(lines[25], '1\t28\t0.933380667098\n')
        self.assertEqual(lines[2000], '24\t100\t0.233201219512\n')
        system('rm -f lala')
        if CHKTIME:
            print '11', time() - t0
示例#50
0
def process():

    if (options.outputFilename != ""):
        outfilefileprefix = options.outputDir + options.outputFilename
    else:
        outfilefileprefix = options.outputDir + os.path.basename(args[0])

    for matrixFile in xrange(len(args)):
        sample = os.path.splitext(os.path.basename(
            args[matrixFile]))[0].split(".matrix")[0]
        chr = sample.rsplit(".", 1)[-1]
        sample = sample.rsplit(".", 1)[0]
        chrom = Chromosome(name=chr,
                           centromere_search=True,
                           species=options.species,
                           assembly=options.assembly)
        chrom.set_max_tad_size(5000000)
        chrom.add_experiment(sample,
                             exp_type='Hi-C',
                             identifier=sample,
                             hic_data=args[matrixFile],
                             resolution=options.resolution)

        exp = chrom.experiments[sample]
        exp.normalize_hic(silent=True)
        chrom.find_tad(sample,
                       n_cpus=options.threads,
                       normalized=True,
                       verbose=False)
        exp.write_tad_borders(outfilefileprefix + "." + chr + ".border")

        # chrom.tad_density_plot(sample,savefig=outfilefileprefix+".density."+chr+".pdf")
        chrom.visualize(exp.name,
                        paint_tads=True,
                        savefig=outfilefileprefix + "chr." + chr + ".pdf")
        chrom.save_chromosome(outfilefileprefix + "chr." + chr + ".tdb",
                              force=True)
示例#51
0
    def generate_tads(self, chrom):
        """
        Uses TADbit to generate the TAD borders based on the computed hic_data
        """
        from pytadbit import Chromosome

        exptName = self.library + "_" + str(
            self.resolution) + "_" + str(chrom) + "-" + str(chrom)
        fname = self.parsed_reads_dir + '/adjlist_map_' + str(
            chrom) + '-' + str(chrom) + '_' + str(self.resolution) + '.tsv'
        chr_hic_data = read_matrix(fname, resolution=int(self.resolution))

        my_chrom = Chromosome(name=exptName, centromere_search=True)
        my_chrom.add_experiment(exptName,
                                hic_data=chr_hic_data,
                                resolution=int(self.resolution))

        # Run core TADbit function to find TADs on each expt.
        # For the current dataset required 61GB of RAM
        my_chrom.find_tad(exptName, n_cpus=15)

        exp = my_chrom.experiments[exptName]
        tad_file = self.library_dir + exptName + '_tads.tsv'
        exp.write_tad_borders(savedata=tad_file)