def test_merge_tiling(self):
    epidb = DeepBlueClient(address="localhost", port=31415)
    self.init_full(epidb)

    res, qid1 = epidb.tiling_regions(10000, "hg19", "chr1", self.admin_key)
    self.assertSuccess(res, qid1)

    res, qid2 = epidb.select_regions("hg19_chr1_1", "hg19", None, None, None, None, None, None, None, self.admin_key)
    self.assertSuccess(res, qid2)

    # limit the tilings on the range of the experiment
    res, qid3 = epidb.filter_regions(qid1, "START",  ">=", "713240", "number", self.admin_key)
    self.assertSuccess(res, qid3)
    res, qid4 = epidb.filter_regions(qid3, "END",  "<=", "876330", "number", self.admin_key)
    self.assertSuccess(res, qid4)

    res, qid5 = epidb.merge_queries(qid4, qid2, self.admin_key)
    self.assertSuccess(res, qid5)

    res, req = epidb.get_regions(qid5, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)

    expected_regions = helpers.get_result("merge_tiling")
    self.assertEqual(regions, expected_regions)
  def test_filter_regions(self):
    epidb = DeepBlueClient(address="localhost", port=31415)
    self.init_full(epidb)

    res, qid = epidb.select_regions("hg19_chr1_1", "hg19", None, None, None,
                                 None, None, None, None, self.admin_key)
    self.assertSuccess(res, qid)

    res, qid2 = epidb.filter_regions(qid, "START",  ">=", "875400 ", "number", self.admin_key)
    self.assertSuccess(res, qid2)

    res, req = epidb.get_regions(qid2, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)

    # Test filter with string values
    expected_regions = helpers.get_result("filter_ge_875400")
    self.assertEqual(regions, expected_regions)

    res, qid3 = epidb.filter_regions(qid, "STRAND",  "==", "+", "string", self.admin_key)
    self.assertSuccess(res, qid3)

    res, req = epidb.get_regions(qid3, "CHROMOSOME,START,END,STRAND", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)
    self.assertEqual(regions, 'chr1\t713240\t713390\t+\nchr1\t713900\t714050\t+\nchr1\t714160\t714310\t+\nchr1\t714540\t714690\t+\nchr1\t715060\t715210\t+\nchr1\t762060\t762210\t+\nchr1\t839540\t839690\t+\nchr1\t840080\t840230\t+\nchr1\t860240\t860390\t+\nchr1\t875400\t875550\t+\nchr1\t876180\t876330\t+')

    res, qid3 = epidb.filter_regions(qid, "STRAND",  "!=", "+", "string", self.admin_key)
    self.assertSuccess(res, qid3)

    res, req = epidb.get_regions(qid3, "CHROMOSOME,START,END,STRAND", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)
    self.assertEqual(regions, 'chr1\t713520\t713670\t-\nchr1\t761180\t761330\t-\nchr1\t762420\t762570\t.\nchr1\t762820\t762970\t-\nchr1\t763020\t763170\t-\nchr1\t840600\t840750\t-\nchr1\t858880\t859030\t.\nchr1\t859600\t859750\t.\nchr1\t861040\t861190\t-\nchr1\t875900\t876050\t-')
    def test_big_file(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_base(epidb)

        f = gzip.open("data/bedgraph/bigwig.bg.gz")
        data = f.read()
        (status, a1) = epidb.add_annotation("exp_wig", "hg19", "bla", data,
                                            "bedgraph", None, self.admin_key)

        (status, q1) = epidb.select_annotations("exp_wig", "hg19", None, None,
                                                None, self.admin_key)

        status, r1 = epidb.binning(q1, "VALUE", 5, self.admin_key)
        binning = self.get_regions_request(r1)
        self.assertEqual(
            binning, {
                'binning': {
                    'ranges': [
                        -1126.72, -726.6238, -326.5276, 73.5686, 473.6648,
                        873.761
                    ],
                    'counts': [8, 5, 3992582, 3489, 13]
                }
            })

        to_filter_low = binning["binning"]["ranges"][2]
        status, filtered = epidb.filter_regions(q1, "VALUE", ">",
                                                str(to_filter_low), "number",
                                                self.admin_key)

        to_filter_high = binning["binning"]["ranges"][4]
        status, filtered = epidb.filter_regions(q1, "VALUE", "<",
                                                str(to_filter_high), "number",
                                                self.admin_key)

        status, r_filtered = epidb.binning(filtered, "VALUE", 10,
                                           self.admin_key)
        binning = self.get_regions_request(r_filtered)
        self.assertEqual(
            binning, {
                'binning': {
                    'counts': [4, 4, 1, 2, 2, 17, 1, 3932813, 772, 119],
                    'ranges': [
                        -1126.72, -967.0013, -807.2826, -647.5638, -487.8452,
                        -328.1265, -168.4077, -8.689, 151.0297, 310.7484,
                        470.4671
                    ]
                }
            })
示例#4
0
    def test_annotation_signal_bedgraph(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_base(epidb)

        sample_id = self.sample_ids[0]

        files = ["test1"]

        for filename in files:
            wig_data = helpers.load_bedgraph(filename)
            res = epidb.add_annotation(filename, "hg19", "Test data", wig_data,
                                       "bedgraph", None, self.admin_key)
            self.assertSuccess(res)

        (s, q) = epidb.select_annotations(files, "hg19", None, None, None,
                                          self.admin_key)

        (s, req) = epidb.count_regions(q, self.admin_key)
        self.assertSuccess(s, req)
        count = self.count_request(req)

        self.assertEqual(1000, count)

        (s, q_filtered_down) = epidb.filter_regions(q, "VALUE", ">", "0.75",
                                                    "number", self.admin_key)
        (s, q_filtered_up) = epidb.filter_regions(q_filtered_down, "VALUE",
                                                  "<", "0.8", "number",
                                                  self.admin_key)
        (s, q_chr_x) = epidb.filter_regions(q_filtered_up, "CHROMOSOME", "!=",
                                            "chrX", "string", self.admin_key)
        (s, q_chr_7) = epidb.filter_regions(q_chr_x, "CHROMOSOME", "!=",
                                            "chr7", "string", self.admin_key)

        (s, req) = epidb.get_regions(
            q_chr_7, "CHROMOSOME,START,END,VALUE,@NAME,@EPIGENETIC_MARK",
            self.admin_key)
        regions = self.get_regions_request(req)

        self.assertEqual(
            regions,
            'chr1\t104372258\t104372293\t0.7767\ttest1\t\nchr10\t126498141\t126498176\t0.7695\ttest1\t\nchr11\t66110277\t66110312\t0.7613\ttest1\t\nchr15\t38653026\t38653061\t0.7720\ttest1\t\nchr15\t87725326\t87725361\t0.7727\ttest1\t\nchr16\t2119419\t2119454\t0.7696\ttest1\t\nchr16\t63360719\t63360754\t0.7740\ttest1\t\nchr19\t46369215\t46369250\t0.7727\ttest1\t\nchr8\t21923667\t21923702\t0.7930\ttest1\t'
        )
  def test_filter_tiling(self):
    epidb = DeepBlueClient(address="localhost", port=31415)
    self.init_base()

    res, qid = epidb.tiling_regions(10000, "hg19", "chr1", self.admin_key)
    self.assertSuccess(res, qid)

    res, qid2 = epidb.filter_regions(qid, "END",  "<=", "100000", "number", self.admin_key)
    self.assertSuccess(res, qid2)

    res, req = epidb.get_regions(qid2, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)

    expected_regions = helpers.get_result("filter_tiling")
    self.assertEqual(regions, expected_regions)
  def test_filter_two_genomes(self):
    epidb = DeepBlueClient(address="localhost", port=31415)
    self.init_full(epidb)

    res, qid = epidb.select_regions(["hg19_chr1_1", "hg18_chr1_1"], ["hg19", "hg18"], None, None, None,
                                 None, None, None, None, self.admin_key)
    self.assertSuccess(res, qid)

    res, qid2 = epidb.filter_regions(qid, "START",  ">=", "875400 ", "number", self.admin_key)
    self.assertSuccess(res, qid2)

    res, req = epidb.get_regions(qid2, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)

    expected_regions = helpers.get_result("filter_multiple_genomes_ge_875400")

    self.assertEqual(regions, expected_regions)
  def test_remove_full_chromosome_data(self):
    epidb = DeepBlueClient(address="localhost", port=31415)
    self.init_full(epidb)

    self.insert_experiment(epidb, "hg19_big_2")
    res, qid_1_1 = epidb.select_regions("hg19_big_2", "hg19", None, None, None,
                                      None, None, 0, 9841558, self.admin_key)

    res, req = epidb.count_regions(qid_1_1, self.admin_key)
    self.assertSuccess(res, req)
    count = self.count_request(req)

    (status, filtered_chr) = epidb.filter_regions(qid_1_1,"CHROMOSOME", "==", "chr21", "string", self.admin_key)
    res, req = epidb.get_regions(filtered_chr, "CHROMOSOME,START,END,STRAND", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)

    self.assertEquals(regions, "chr21\t9656828\t9656920\t.\nchr21\t9700370\t9700415\t.\nchr21\t9825445\t9826573\t.\nchr21\t9826759\t9827609\t.\nchr21\t9829381\t9829420\t.\nchr21\t9831594\t9831981\t.\nchr21\t9833197\t9833459\t.\nchr21\t9833733\t9833902\t.\nchr21\t9841288\t9841558\t.")
示例#8
0
    def test_correction_score(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_base(epidb)

        sample_id = self.sample_ids[0]
        self.insert_experiment(epidb, "hg19_chr1_1", sample_id)
        (s, data) = epidb.select_experiments("hg19_chr1_1", "chr1", None, None,
                                             self.admin_key)
        res, qid_2 = epidb.tiling_regions(1000, "hg19", "chr1", self.admin_key)
        self.assertSuccess(res, qid_2)

        (s, q_agg) = epidb.aggregate(data, qid_2, "SIGNAL_VALUE",
                                     self.admin_key)
        res, qid_4 = epidb.filter_regions(q_agg, "@AGG.COUNT", ">", "0",
                                          "number", self.admin_key)
        status, req = epidb.get_regions(
            qid_4, "CHROMOSOME,START,END,@AGG.MEAN,@AGG.COUNT", self.admin_key)

        rs = self.get_regions_request(req)

        self.assertEquals(
            rs,
            "chr1\t713000\t714000\t27.1111\t3\nchr1\t714000\t715000\t39.5556\t3\nchr1\t715000\t716000\t24.0000\t1\nchr1\t761000\t762000\t6.0000\t1\nchr1\t762000\t763000\t54.6667\t3\nchr1\t763000\t764000\t12.0000\t1\nchr1\t839000\t840000\t15.0000\t1\nchr1\t840000\t841000\t14.0000\t2\nchr1\t858000\t859000\t16.8000\t1\nchr1\t859000\t860000\t10.6000\t2\nchr1\t860000\t861000\t41.0000\t1\nchr1\t861000\t862000\t22.0000\t1\nchr1\t875000\t876000\t13.0000\t2\nchr1\t876000\t877000\t13.5000\t2"
        )
    def test_gene_expression(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_base(epidb)

        (s, project) = epidb.add_project("DEEP", "Deutsche Epigenom",
                                         self.admin_key)
        self.assertSuccess(s, project)

        data = gzip.open("data/fpkm/small_1.fpkm_tracking.gz").read()
        (s, gene_expression) = epidb.add_expression("gene", "s2", 1, data,
                                                    "cufflinks", "DEEP", None,
                                                    self.admin_key)
        self.assertSuccess(s, gene_expression)

        data = gzip.open("data/fpkm/small_2.fpkm_tracking.gz").read()
        (s, gene_expression) = epidb.add_expression("gene", "s2", 2, data,
                                                    "cufflinks", "DEEP", None,
                                                    self.admin_key)
        self.assertSuccess(s, gene_expression)

        data = gzip.open("data/fpkm/small_3.fpkm_tracking.gz").read()
        (s, gene_expression) = epidb.add_expression("gene", "s2", 44, data,
                                                    "cufflinks", "DEEP", None,
                                                    self.admin_key)
        self.assertSuccess(s, gene_expression)

        (s, gex) = epidb.list_expressions("gene", "s1", None, None,
                                          self.admin_key)
        self.assertEquals(gex, [])

        (s, gex) = epidb.list_expressions("gene", "s2", None, None,
                                          self.admin_key)
        self.assertEquals(gex, [['gx1', ''], ['gx2', ''], ['gx3', '']])

        (s, gex) = epidb.list_expressions("gene", "s2", [1, 2], None,
                                          self.admin_key)
        self.assertEquals(gex, [['gx1', ''], ['gx2', '']])

        (s, gex) = epidb.list_expressions("gene", "s2", 44, None,
                                          self.admin_key)
        self.assertEquals(gex, [['gx3', '']])

        (s, gex) = epidb.list_expressions("gene", None, 1, "DEEP",
                                          self.admin_key)
        self.assertEquals(gex, [['gx1', '']])

        data = gzip.open(
            "data/fpkm/51_Hf03_BlTN_Ct_mRNA_M_1.LXPv1.20150708_genes.fpkm_tracking.gz"
        ).read()
        (s, gene_expression) = epidb.add_expression("gene", "s1", 1, data,
                                                    "cufflinks", "ENCODE",
                                                    None, self.admin_key)
        self.assertSuccess(s, gene_expression)

        (s, gex) = epidb.list_expressions("gene", None, None, None,
                                          self.admin_key)
        self.assertEquals(gex,
                          [['gx1', ''], ['gx2', ''], ['gx3', ''], ['gx4', '']])
        (s, gex) = epidb.list_expressions("gene", "s1", 1, None,
                                          self.admin_key)
        self.assertEquals(gex, [['gx4', '']])
        (s, gex) = epidb.list_expressions("gene", None, None, "ENCODE",
                                          self.admin_key)
        self.assertEquals(gex, [['gx4', '']])

        (s, gex) = epidb.list_expressions("gene", ["s1", "s2"], 1, None,
                                          self.admin_key)
        self.assertEquals(gex, [['gx1', ''], ['gx4', '']])

        (s, gex) = epidb.list_expressions("gene", ["s1", "s2"], 2, None,
                                          self.admin_key)
        self.assertEquals(gex, [['gx2', '']])

        (s, gex) = epidb.list_expressions("gene", None, 1, "ENCODE",
                                          self.admin_key)
        self.assertEquals(gex, [['gx4', '']])

        s, user = epidb.add_user("user", "email", "institution",
                                 self.admin_key)
        (user_id, user_key) = user
        self.assertSuccess(s)
        (s, ss) = epidb.modify_user_admin(user_id, "permission_level",
                                          "GET_DATA", self.admin_key)
        self.assertSuccess(s, ss)

        (s, gex) = epidb.list_expressions("gene", None, None, None, user_key)
        self.assertEquals(gex, [])
        (s, gex) = epidb.list_expressions("gene", "s1", 1, None, user_key)
        self.assertEquals(gex, [])
        (s, gex) = epidb.list_expressions("gene", None, None, "ENCODE",
                                          user_key)
        self.assertEquals(gex, "107000:Project 'ENCODE' does not exist.")

        (s, info) = epidb.info(gene_expression, self.admin_key)

        self.assertEquals(
            info[0], {
                'format':
                'TRACKING_ID,GENE_ID,GENE_SHORT_NAME,FPKM,FPKM_CONF_LO,FPKM_CONF_HI,FPKM_STATUS',
                'sample_info': {
                    'biosource_name': 'K562',
                    'karyotype': 'cancer',
                    'sex': 'F'
                },
                'content_format':
                'cufflinks',
                'total_genes':
                57910,
                'replica':
                1,
                'sample_id':
                's1',
                '_id':
                'gx4',
                'extra_metadata': {},
                'columns': [{
                    'name': 'TRACKING_ID',
                    'column_type': 'string'
                }, {
                    'name': 'GENE_ID',
                    'column_type': 'string'
                }, {
                    'name': 'GENE_SHORT_NAME',
                    'column_type': 'string'
                }, {
                    'name': 'FPKM',
                    'column_type': 'double'
                }, {
                    'name': 'FPKM_CONF_LO',
                    'column_type': 'double'
                }, {
                    'name': 'FPKM_CONF_HI',
                    'column_type': 'double'
                }, {
                    'name': 'FPKM_STATUS',
                    'column_type': 'string'
                }]
            })

        data = gzip.open(
            "data/grape2/SP8-TH91.gene_quantification.rsem_grape2_crg.GRCh38.20150622.results.txt.gz"
        ).read()
        (s, gene_expression) = epidb.add_expression("gene", "s1", 1, data,
                                                    "grape2", "ENCODE", None,
                                                    self.admin_key)
        self.assertEquals(
            gene_expression,
            "131001:A Expression of the type 'gene' with sample_id 's1' and replica '1' already exists."
        )

        (s, gene_expression) = epidb.add_expression("gene", "s1", 2, data,
                                                    "grape2", "ENCODE", None,
                                                    self.admin_key)
        self.assertSuccess(s, gene_expression)

        data = gzip.open(
            "data/gtf/gencode.v19.annotation.ONLY_GENES.gtf.gz").read()
        (s, ss) = epidb.add_gene_model("gencode v19", "hg19",
                                       "Test One Description", data, "GTF", {},
                                       self.admin_key)
        self.assertSuccess(s, ss)

        (status, gx_query) = epidb.select_expressions("gene", "s1", 2,
                                                      "ENSG00000000003.13",
                                                      "ENCODE", "gencode v19",
                                                      self.admin_key)
        self.assertSuccess(status, gx_query)
        status, info = epidb.info(gx_query, user_key)
        (status, r_id) = epidb.get_regions(
            gx_query,
            "CHROMOSOME,START,END,@STRAND,GENE_ID,TRANSCRIPT_IDS,LENGTH,EFFECTIVE_LENGTH,EXPECTED_COUNT,TPM,FPKM,POSTERIOR_MEAN_COUNT,POSTERIOR_STANDARD_DEVIATION_OF_COUNT,PME_TPM,PME_FPKM,TPM_CI_LOWER_BOUND,TPM_CI_UPPER_BOUND,FPKM_CI_LOWER_BOUND,FPKM_CI_UPPER_BOUND",
            self.admin_key)
        self.assertSuccess(status, r_id)
        regions = self.get_regions_request(r_id)
        self.assertEquals(
            regions,
            "chrX\t99883667\t99894988\t-\tENSG00000000003.13\tENSG00000000003.13\t2025\t1855.4301\t161.0000\t1.0000\t2.1300\t161.0000\t0.0000\t1.0500\t2.2700\t0.8742\t1.2451\t1.8882\t2.6879"
        )

        (status, gx_query) = epidb.select_expressions("gene", "s1", 2,
                                                      "ENSG00000000003.13",
                                                      "ENCODE", "gencode v19",
                                                      self.admin_key)
        self.assertSuccess(status, gx_query)
        status, info = epidb.info(gx_query, user_key)
        (status, r_id) = epidb.get_regions(
            gx_query,
            "GENE_ID,TRANSCRIPT_IDS,LENGTH,EFFECTIVE_LENGTH,EXPECTED_COUNT,TPM,FPKM,POSTERIOR_MEAN_COUNT,POSTERIOR_STANDARD_DEVIATION_OF_COUNT,PME_TPM,PME_FPKM,TPM_CI_LOWER_BOUND,TPM_CI_UPPER_BOUND,FPKM_CI_LOWER_BOUND,FPKM_CI_UPPER_BOUND",
            self.admin_key)
        self.assertSuccess(status, r_id)
        regions = self.get_regions_request(r_id)
        self.assertEquals(
            regions,
            "ENSG00000000003.13\tENSG00000000003.13\t2025\t1855.4301\t161.0000\t1.0000\t2.1300\t161.0000\t0.0000\t1.0500\t2.2700\t0.8742\t1.2451\t1.8882\t2.6879"
        )

        (status, gx_query) = epidb.select_expressions("gene", "s1", 1,
                                                      "OR4G11P", "ENCODE",
                                                      "gencode v19",
                                                      self.admin_key)
        self.assertSuccess(status, gx_query)
        status, info = epidb.info("gx1", user_key)
        (status, r_id) = epidb.get_regions(gx_query, info[0]["format"],
                                           self.admin_key)
        self.assertSuccess(status, r_id)
        regions = self.get_regions_request(r_id)
        self.assertEquals(
            regions,
            "ENSG00000240361.1\tENSG00000240361.1\tOR4G11P\t0.0000\t0.0000\t0.0000\tOK"
        )

        (status, gx_query) = epidb.select_expressions(
            "gene", "s1", 1,
            ['CCR1', 'CD164', 'CD1D', 'CD2', 'CD34', 'CD3G', 'CD44'], "ENCODE",
            "gencode v19", self.admin_key)
        self.assertSuccess(status, gx_query)
        status, info = epidb.info("gx1", user_key)
        (status, r_id) = epidb.get_regions(gx_query, info[0]["format"],
                                           self.admin_key)
        self.assertSuccess(status, r_id)
        regions_a = self.get_regions_request(r_id)

        excepted = "ENSG00000135535.10\tENSG00000135535.10\tCD164\t101.3820\t98.8947\t103.8680\tOK\nENSG00000026508.12\tENSG00000026508.12\tCD44\t193.4920\t189.4020\t197.5830\tOK\nENSG00000160654.5\tENSG00000160654.5\tCD3G\t53.0051\t51.4405\t54.5696\tOK\nENSG00000163823.3\tENSG00000163823.3\tCCR1\t0.0201\t0.0000\t0.0433\tOK\nENSG00000116824.4\tENSG00000116824.4\tCD2\t90.0146\t87.9630\t92.0661\tOK\nENSG00000158473.6\tENSG00000158473.6\tCD1D\t0.0241\t0.0000\t0.0519\tOK\nENSG00000174059.12\tENSG00000174059.12\tCD34\t0.0000\t0.0000\t0.0000\tOK"

        lexp = excepted.split("\n")

        lresult = regions_a.split("\n")

        self.assertEquals(len(lresult), len(lexp))
        for l in lresult:
            self.assertTrue(l in lexp)

        (status, gx_query) = epidb.select_expressions("gene", "s1", 1, 'CCR1',
                                                      "ENCODE", "gencode v19",
                                                      self.admin_key)
        self.assertSuccess(status, gx_query)
        status, info = epidb.info("gx1", user_key)
        (status, r_id) = epidb.get_regions(gx_query, info[0]["format"],
                                           self.admin_key)
        self.assertSuccess(status, r_id)
        regions = self.get_regions_request(r_id)
        self.assertEquals(
            regions,
            "ENSG00000163823.3\tENSG00000163823.3\tCCR1\t0.0201\t0.0000\t0.0433\tOK"
        )

        q1 = gx_query

        (status, gx_query) = epidb.select_expressions("gene", "s1", 1, 'CD164',
                                                      "ENCODE", "gencode v19",
                                                      self.admin_key)
        self.assertSuccess(status, gx_query)
        status, info = epidb.info("gx1", user_key)
        (status, r_id) = epidb.get_regions(gx_query, info[0]["format"],
                                           self.admin_key)
        self.assertSuccess(status, r_id)
        regions = self.get_regions_request(r_id)
        self.assertEquals(
            regions,
            "ENSG00000135535.10\tENSG00000135535.10\tCD164\t101.3820\t98.8947\t103.8680\tOK"
        )

        self.assertTrue(q1 != gx_query)

        (s, info) = epidb.info(ss, self.admin_key)

        self.assertEquals(
            info[0], {
                'total_genes': 57820,
                '_id': 'gs1',
                'genome': 'hg19',
                'description': 'Test One Description',
                'format': 'GTF',
                'name': 'gencode v19'
            })

        (status, gene_info) = epidb.info("gn1", self.admin_key)
        self.assertEquals(
            gene_info[0], {
                'transcript_status': 'KNOWN',
                'gene_name': 'DDX11L1',
                'gene_type': 'pseudogene',
                'end': 14412,
                'source': 'HAVANA',
                'frame': '.',
                'level': '2',
                'gene_id': 'ENSG00000223972.4',
                'start': 11869,
                'transcript_id': 'ENSG00000223972.4',
                'score': 0.0,
                'strand': '+',
                'havana_gene': 'OTTHUMG00000000961.2',
                'transcript_name': 'DDX11L1',
                '_id': 'gn1',
                'gene_status': 'KNOWN',
                'transcript_type': 'pseudogene',
                'chromosome': 'chr1'
            })

        (status, query) = epidb.select_expressions("gene", "s1",
                                                   [1, 5, 10, 122], None,
                                                   "ENCODE", "gencode v19",
                                                   self.admin_key)
        query_one = query

        self.assertSuccess(status, query)
        (status, filtered) = epidb.filter_regions(query, "FPKM_STATUS", "!=",
                                                  "OK", "string",
                                                  self.admin_key)
        self.assertSuccess(status, filtered)
        (status, filtered_chr) = epidb.filter_regions(filtered, "CHROMOSOME",
                                                      "==", "chr21", "string",
                                                      self.admin_key)
        self.assertSuccess(status, filtered_chr)
        (status, r_id) = epidb.get_regions(
            filtered_chr, "GENE_ID,FPKM_STATUS,@SAMPLE_ID,@BIOSOURCE",
            self.admin_key)
        self.assertSuccess(status, r_id)

        regions = self.get_regions_request(r_id)

        self.assertEquals(
            regions,
            "ENSG00000240755.1\tLOWDATA\ts1\tK562\nENSG00000256386.1\tLOWDATA\ts1\tK562\nENSG00000198743.5\tLOWDATA\ts1\tK562\nENSG00000267937.1\tLOWDATA\ts1\tK562\nENSG00000238556.1\tLOWDATA\ts1\tK562\nENSG00000255902.1\tLOWDATA\ts1\tK562\nENSG00000266692.1\tLOWDATA\ts1\tK562"
        )

        (status, query) = epidb.select_expressions("gene", "s1",
                                                   [1, 5, 10, 122], None, "",
                                                   "gencode v19", user_key)

        self.assertSuccess(status, query)
        (status, filtered) = epidb.filter_regions(query, "FPKM_STATUS", "!=",
                                                  "OK", "string", user_key)
        self.assertSuccess(status, filtered)
        (status, filtered_chr) = epidb.filter_regions(filtered, "CHROMOSOME",
                                                      "==", "chr21", "string",
                                                      user_key)
        self.assertSuccess(status, filtered_chr)
        (status,
         req) = epidb.get_regions(filtered_chr,
                                  "GENE_ID,FPKM_STATUS,@SAMPLE_ID,@BIOSOURCE",
                                  user_key)
        self.assertSuccess(status, r_id)

        (s, ss) = epidb.info(req, user_key)
        while ss[0]["state"] != "done":
            time.sleep(1)
            (s, ss) = epidb.info(req, user_key)

        s, regions = epidb.get_request_data(req, user_key)

        self.assertEquals(
            regions,
            "ENSG00000240755.1\tLOWDATA\ts1\tK562\nENSG00000256386.1\tLOWDATA\ts1\tK562\nENSG00000198743.5\tLOWDATA\ts1\tK562\nENSG00000267937.1\tLOWDATA\ts1\tK562\nENSG00000238556.1\tLOWDATA\ts1\tK562\nENSG00000255902.1\tLOWDATA\ts1\tK562\nENSG00000266692.1\tLOWDATA\ts1\tK562"
        )
示例#10
0
    def test_complex2(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_full(epidb)

        self.insert_experiment(epidb, "hg19_big_1")
        self.insert_experiment(epidb, "hg19_big_2")

        res, qid_1_1 = epidb.select_regions("hg19_big_1", "hg19", None, None,
                                            None, None, None, 1000000, 3000000,
                                            self.admin_key)
        self.assertSuccess(res, qid_1_1)
        res, req = epidb.count_regions(qid_1_1, self.admin_key)
        self.assertSuccess(res, req)
        c = self.count_request(req)

        res, qid_1_2 = epidb.select_regions(
            "hg19_big_1", "hg19", None, None, None, None,
            ["chr1", "chr3", "chr11", "chrX", "chr9"], None, None,
            self.admin_key)
        self.assertSuccess(res, qid_1_2)
        res, req = epidb.count_regions(qid_1_2, self.admin_key)
        self.assertSuccess(res, req)
        c = self.count_request(req)

        # this gives us regions from 1,000,000 to 3,000,000 on chromosomes chr1, chr3, chr9, chr11, chrY
        res, qid_2_1 = epidb.intersection(qid_1_1, qid_1_2, self.admin_key)
        self.assertSuccess(res, qid_2_1)
        res, req = epidb.count_regions(qid_2_1, self.admin_key)
        self.assertSuccess(res, req)
        c = self.count_request(req)
        self.assertEqual(c, 247)

        res, qid_2_2 = epidb.tiling_regions(1000, "hg19",
                                            ["chr1", "chr2", "chr15", "chrX"],
                                            self.admin_key)
        self.assertSuccess(res, qid_2_2)
        res, req = epidb.count_regions(qid_2_2, self.admin_key)
        self.assertSuccess(res, req)
        c = self.count_request(req)

        res, qid_3_1 = epidb.merge_queries(qid_2_1, qid_2_2, self.admin_key)
        self.assertSuccess(res, qid_3_1)
        res, req = epidb.count_regions(qid_3_1, self.admin_key)
        self.assertSuccess(res, req)
        c = self.count_request(req)

        res, qid_4_1 = epidb.filter_regions(qid_3_1, "START", ">=", "2000000",
                                            "number", self.admin_key)
        self.assertSuccess(res, qid_4_1)
        res, req = epidb.count_regions(qid_4_1, self.admin_key)
        self.assertSuccess(res, req)
        c = self.count_request(req)

        res, qid_4_2 = epidb.select_regions("hg19_big_2", "hg19", None, None,
                                            None, None, ["chr1", "chrX"], None,
                                            None, self.admin_key)
        self.assertSuccess(res, qid_4_2)
        (res, qid_4_2_cached) = epidb.query_cache(qid_4_2, True,
                                                  self.admin_key)
        res, req = epidb.count_regions(qid_4_2_cached, self.admin_key)
        self.assertSuccess(res, req)
        c = self.count_request(req)
        self.assertEqual(c, 8961)

        res, qid_5_1 = epidb.intersection(qid_4_1, qid_4_2_cached,
                                          self.admin_key)
        self.assertSuccess(res, qid_5_1)
        res, req = epidb.count_regions(qid_5_1, self.admin_key)
        self.assertSuccess(res, req)
        count = self.count_request(req)

        self.assertEqual(count, 14356)

        res, qid_6_1 = epidb.filter_regions(qid_5_1, "END", "<", "2200000",
                                            "number", self.admin_key)
        self.assertSuccess(res, qid_6_1)

        (res, qid_6_1_cached) = epidb.query_cache(qid_6_1, True,
                                                  self.admin_key)
        res, req = epidb.count_regions(qid_6_1, self.admin_key)
        self.assertSuccess(res, req)
        count = self.count_request(req)
        self.assertEqual(count, 52)

        res, req = epidb.get_regions(
            qid_6_1,
            "CHROMOSOME,START,END,NAME,SCORE,STRAND,SIGNAL_VALUE,P_VALUE,Q_VALUE,PEAK,@NAME",
            self.admin_key)
        self.assertSuccess(res, req)
        regions = self.get_regions_request(req)

        expected_regions = helpers.get_result("complex2")
        self.assertEqual(regions, expected_regions)
示例#11
0
    def test_aggregation(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_base(epidb)

        sample_id = self.sample_ids[0]

        cpg_island = ",".join([
            "CHROMOSOME", "START", "END", "NAME", "LENGTH", "NUM_CPG",
            "NUM_GC", "PER_CPG", "PER_CG", "OBS_EXP"
        ])

        with open("data/cpgIslandExtFull.txt", 'r') as f:
            file_data = f.read()
            (res, a_1) = epidb.add_annotation("Cpg Islands", "hg19",
                                              "Complete CpG islands",
                                              file_data, cpg_island, None,
                                              self.admin_key)
            self.assertSuccess(res, a_1)
            res, q_cgi = epidb.select_annotations("Cpg Islands", "hg19", None,
                                                  None, None, self.admin_key)
            self.assertSuccess(res, q_cgi)

        res, qid_2 = epidb.tiling_regions(1000000, "hg19", None,
                                          self.admin_key)
        self.assertSuccess(res, qid_2)
        res, req = epidb.count_regions(qid_2, self.admin_key)
        self.assertSuccess(res, req)
        count = self.count_request(req)
        self.assertEquals(count, 3118)

        res, _qid_3 = epidb.aggregate(q_cgi, qid_2, "@LENGTH", self.admin_key)
        self.assertSuccess(res, _qid_3)

        res, qid_3 = epidb.query_cache(_qid_3, True, self.admin_key)
        self.assertSuccess(res, qid_3)

        res, qid_4 = epidb.filter_regions(qid_3, "@AGG.COUNT", ">", "0",
                                          "number", self.admin_key)
        res, req = epidb.count_regions(qid_4, self.admin_key)
        count = self.count_request(req)
        self.assertEquals(count, 2574)

        res, qid_4 = epidb.filter_regions(qid_2, "@AGG.COUNT", "<", "0",
                                          "number", self.admin_key)
        res, req = epidb.count_regions(qid_4, self.admin_key)
        count = self.count_request(req)
        self.assertEquals(count, 0)

        res, qid_4 = epidb.filter_regions(qid_3, "@AGG.COUNT", ">=", "100",
                                          "number", self.admin_key)
        (res, req) = epidb.get_regions(
            qid_4,
            "CHROMOSOME,START,END,@AGG.MIN,@AGG.MAX,@AGG.MEDIAN,@AGG.MEAN,@AGG.VAR,@AGG.SD,@AGG.COUNT,@AGG.SUM",
            self.admin_key)
        self.assertSuccess(res, req)
        regions = self.get_regions_request(req)

        expected = 'chr1\t1000000\t2000000\t201.0000\t5585.0000\t469.0000\t766.0082\t589695.4375\t767.9163\t122\t93453.0000\nchr16\t0\t1000000\t201.0000\t6377.0000\t484.0000\t746.6083\t674998.0625\t821.5826\t120\t89593.0000\nchr16\t1000000\t2000000\t201.0000\t5449.0000\t398.0000\t666.6393\t630197.3125\t793.8497\t122\t81330.0000\nchr16\t2000000\t3000000\t201.0000\t4843.0000\t533.0000\t780.4951\t559994.2500\t748.3276\t101\t78830.0000\nchr16\t88000000\t89000000\t202.0000\t3785.0000\t347.0000\t553.3461\t295400.6875\t543.5078\t104\t57548.0000\nchr19\t0\t1000000\t201.0000\t7814.0000\t424.0000\t776.2705\t944608.4375\t971.9097\t122\t94705.0000\nchr19\t1000000\t2000000\t201.0000\t6035.0000\t430.0000\t738.8853\t625527.1250\t790.9027\t183\t135216.0000\nchr19\t2000000\t3000000\t201.0000\t3978.0000\t395.0000\t673.9907\t444749.5000\t666.8954\t107\t72117.0000\nchr19\t3000000\t4000000\t201.0000\t2753.0000\t387.0000\t531.0648\t172512.1094\t415.3458\t108\t57355.0000\nchr20\t62000000\t63000000\t202.0000\t5019.0000\t501.0000\t716.2427\t427763.9375\t654.0366\t103\t73773.0000\nchr7\t0\t1000000\t201.0000\t6234.0000\t348.0000\t556.3500\t475220.5625\t689.3624\t100\t55635.0000\nchr9\t139000000\t140000000\t202.0000\t6342.0000\t406.0000\t777.3303\t817548.5625\t904.1839\t109\t84729.0000'

        self.assertEquals(regions, expected)
        (s, req) = epidb.count_regions(qid_4, self.admin_key)
        self.assertSuccess(s, req)
        count = self.count_request(req)
        self.assertEquals(count, 12)
示例#12
0
    def test_cancel_aggregation(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_base(epidb)

        sample_id = self.sample_ids[0]

        self.insert_experiment(epidb, "hg19_big_2", sample_id)

        cpg_island = ",".join([
            "CHROMOSOME", "START", "END", "NAME", "LENGTH", "NUM_CPG",
            "NUM_GC", "PER_CPG", "PER_CG", "OBS_EXP"
        ])

        with open("data/cpgIslandExtFull.txt", 'r') as f:
            file_data = f.read()
            (res, a_1) = epidb.add_annotation("Cpg Islands", "hg19",
                                              "Complete CpG islands",
                                              file_data, cpg_island, None,
                                              self.admin_key)
            self.assertSuccess(res, a_1)
            res, q_cgi = epidb.select_annotations("Cpg Islands", "hg19", None,
                                                  None, None, self.admin_key)
            self.assertSuccess(res, q_cgi)

        res, qid_2 = epidb.tiling_regions(1000000, "hg19", None,
                                          self.admin_key)
        self.assertSuccess(res, qid_2)
        res, req_count = epidb.count_regions(qid_2, self.admin_key)
        self.assertSuccess(res, req_count)
        count = self.count_request(req_count)
        self.assertEquals(count, 3118)

        res, qid_3 = epidb.aggregate(q_cgi, qid_2, "@LENGTH", self.admin_key)
        self.assertSuccess(res, qid_3)

        res, qid_4 = epidb.filter_regions(qid_3, "@AGG.COUNT", ">=", "100",
                                          "number", self.admin_key)
        (res, req_regions) = epidb.get_regions(
            qid_4,
            "CHROMOSOME,START,END,@AGG.MIN,@AGG.MAX,@AGG.MEDIAN,@AGG.MEAN,@AGG.VAR,@AGG.SD,@AGG.COUNT",
            self.admin_key)
        self.assertSuccess(res, req_regions)

        (s, user_two) = epidb.add_user("ANOTHER NAME", "ANOTHER EMAIL",
                                       "INSTITUTE", self.admin_key)
        s, tmp_user = epidb.modify_user_admin(user_two[0], "permission_level",
                                              "GET_DATA", self.admin_key)
        s, msg = epidb.cancel_request(req_regions, user_two[1])
        self.assertEquals(msg, "130003:The request ID 'r2' is invalid.")
        s, msg = epidb.cancel_request(req_count, user_two[1])
        self.assertEquals(msg, "130003:The request ID 'r1' is invalid.")

        (s, m) = epidb.cancel_request(req_regions, self.admin_key)
        self.assertSuccess(s, m)
        (s, m) = epidb.cancel_request(req_count, self.admin_key)
        self.assertSuccess(s, m)

        (s, user_ass) = epidb.add_user("ASS NAME", "ASS EMAIL", "INSTITUTE",
                                       self.admin_key)
        s, tmp_user = epidb.modify_user_admin(user_ass[0], "permission_level",
                                              "GET_DATA", self.admin_key)

        res, q_cgi_other = epidb.select_annotations("Cpg Islands", "hg19",
                                                    None, None, None,
                                                    user_two[1])
        self.assertSuccess(res, q_cgi)
        (res, req_other) = epidb.get_regions(q_cgi_other,
                                             "CHROMOSOME,START,END",
                                             user_two[1])
        self.assertSuccess(res, req_regions)

        (s, msg) = epidb.cancel_request(req_other, user_ass[1])
        self.assertEquals(msg, "130003:The request ID 'r3' is invalid.")
        (s, m) = epidb.cancel_request(req_other, self.admin_key)
        self.assertSuccess(s, m)
        (s, ss) = epidb.info(req_other, self.admin_key)
        self.assertEquals(ss[0]['state'], 'canceled')

        (s, ss_count) = epidb.info(req_count, self.admin_key)
        self.assertEquals(ss_count[0]["state"], "removed")
        (s, ss_regions) = epidb.info(req_regions, self.admin_key)
        self.assertEquals(ss_regions[0]["state"], "canceled")

        s, e1 = epidb.get_request_data(req_count, self.admin_key)
        self.assertEqual(
            e1, "Request ID r1 was not finished. Please, check its status.")
        s, e2 = epidb.get_request_data(req_regions, self.admin_key)
        self.assertEqual(
            e2, "Request ID r2 was not finished. Please, check its status.")