def test_double_experiment_same_user_fail(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) uid, user_key = self.insert_user(epidb, "test_user") s, tmp_user = epidb.modify_user_admin(uid, "permission_level", "INCLUDE_EXPERIMENTS", self.admin_key) self.assertSuccess(s) sample_id = self.sample_ids[0] regions_data = helpers.load_bed("hg19_chr1_1") format = data_info.EXPERIMENTS["hg19_chr1_1"]["format"] # adding the same experiments with different users should work exp = ("test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, None) res = epidb.add_experiment(*(exp + (self.admin_key, ))) self.assertSuccess(res) res = epidb.add_experiment(*(exp + (user_key, ))) self.assertFailure(res) self.assertEqual( res[1], "102001:The experiment name 'test_exp1' is already being used.")
def test_add_with_out_of_bound_region(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = helpers.load_bed("hg18_out_of_bounds") _format = data_info.EXPERIMENTS["hg19_chr1_1"]["format"] res = epidb.add_experiment("test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, _format, None, self.admin_key) self.assertFailure(res) res = epidb.add_experiment("test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, _format, {"__trim_to_chromosome_size__": True}, self.admin_key) self.assertSuccess(res) res, q_exp = epidb.select_regions("test_exp1", "hg19", None, None, None, None, None, None, None, self.admin_key) res, req = epidb.get_regions(q_exp, _format, self.admin_key) data = self.get_regions_request(req) regions_data_okay = helpers.load_bed("hg18_out_of_bounds_okay") self.assertEqual(data, regions_data_okay)
def test_experiments_pass(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = helpers.load_bed("hg19_chr1_1") format = data_info.EXPERIMENTS["hg19_chr1_1"]["format"] # adding two experiments with the same data should work res = epidb.add_experiment("test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, None, self.admin_key) self.assertSuccess(res) res = epidb.add_experiment("test_exp2", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, None, self.admin_key) self.assertSuccess(res) res, experiments = epidb.list_experiments("hg19", "peaks", None, "NO_BIOSOURCE", None, None, None, self.admin_key) self.assertSuccess(res, experiments) self.assertEqual(len(experiments), 0) res, experiments = epidb.list_experiments(None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, experiments) self.assertEqual(len(experiments), 2) res, experiments = epidb.list_experiments("hg19", "peaks", None, "K562", None, None, None, self.admin_key) self.assertSuccess(res, experiments) self.assertEqual(len(experiments), 2) experiments_names = [x[1] for x in experiments] self.assertTrue("test_exp1" in experiments_names) self.assertTrue("test_exp2" in experiments_names) s, ids = epidb.name_to_id(['test_exp1'], 'experiments', self.admin_key) self.assertEqual(ids, [['e1', 'test_exp1']]) s, ids = epidb.name_to_id(['test_exp1', 'test_exp2'], 'experiments', self.admin_key) self.assertEqual([['e1', 'test_exp1'], ['e2', 'test_exp2']], ids) s, ids = epidb.name_to_id('test_exp1', 'experiments', self.admin_key) self.assertEqual([['e1', 'test_exp1']], ids)
def test_wig_files(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] files = [ "scores1", "scores2", "scores3", "scores4", "scores5", "scores6", "scores7", "yeast_pol2", "yeast_rap1" ] for filename in files: wig_data = helpers.load_wig(filename) res = epidb.add_experiment(filename, "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", wig_data, "wig", None, self.admin_key) self.assertSuccess(res) (s, r) = epidb.select_regions(files, "hg19", None, None, None, None, None, None, None, self.admin_key) (s, rs) = epidb.get_regions(r, "CHROMOSOME, START, END, VALUE", self.admin_key) (s, req) = epidb.count_regions(r, self.admin_key) count = self.count_request(req) self.assertEqual(5667, count)
def test_search_experiment_related(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) s = epidb.add_biosource("Ana", "Ana", {}, self.admin_key) self.assertSuccess(s) s = epidb.add_biosource("Beatriz", "Beatriz", {}, self.admin_key) self.assertSuccess(s) s = epidb.add_biosource("Carolina", "Carolina", {}, self.admin_key) self.assertSuccess(s) s = epidb.set_biosource_parent("Beatriz", "Carolina", self.admin_key) self.assertSuccess(s) (s, sid) = epidb.add_sample("Carolina", {}, self.admin_key) self.assertSuccess(s, sid) data = "chr1\t1\t100" (s, e) = epidb.add_experiment("las chicas", "hg19", "Methylation", sid, "tech1", "ENCODE", "interesting experiment", data, "CHROMOSOME,START,END", {}, self.admin_key) self.assertSuccess(s, e) s = epidb.set_biosource_parent("Ana", "Beatriz", self.admin_key) self.assertSuccess(s) s = epidb.set_biosource_synonym("Ana", "Zebra", self.admin_key) self.assertSuccess(s) (s, r1) = epidb.search("Zebra", "experiments", self.admin_key) self.assertEqual([['e1', 'las chicas', 'experiments']], r1) (s, r2) = epidb.search("Carolina", [], self.admin_key) self.assertEqual([['bs5', 'Carolina', 'biosources'], ['s3', '', 'samples'], ['e1', 'las chicas', 'experiments']], r2)
def test_error_maximum_number_of_instructions(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] # adding two experiments with the same data should work res = epidb.add_experiment( "test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", "", "wig", {"__local_file__": "../tests/data/wig/scores1.wig"}, self.admin_key) self.assertSuccess(res) res, qid1 = epidb.select_regions("test_exp1", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid1) # missing math. before log (s, req) = epidb.get_regions( qid1, "CHROMOSOME,START,END,VALUE,@CALCULATED(while 1 do math.log(value_of('VALUE')) end return 'never')", self.admin_key) self.assertSuccess(s, req) msg = self.get_regions_request_error(req) self.assertEqual( msg, 'The maximum number of instructions has been reached')
def test_wrong_column_creation(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] # adding two experiments with the same data should work res = epidb.add_experiment( "test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", "", "wig", {"__local_file__": "../tests/data/wig/scores1.wig"}, self.admin_key) self.assertSuccess(res) res, qid1 = epidb.select_regions("test_exp1", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid1) # It is missing a ' before the EM res = epidb.create_column_type_calculated( "calculated", "description", "return EM and Name: - '.. value_of('@EPIGENETIC_MARK') .. ' - ' .. value_of('@NAME')", self.admin_key) self.assertFailure(res) self.assertEqual( res[1], '[string "function row_value()..."]:2: \'<name>\' expected near \'-\'' )
def test_category_fail(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] format = ",".join([ "CHROMOSOME", "START", "END", "NAME", "SCORE", "STRAND_X", "SIGNAL_VALUE", "P_VALUE", "Q_VALUE", "PEAK" ]) res = epidb.create_column_type_category("STRAND_X", "strand of the region", ["X", "-"], self.admin_key) self.assertSuccess(res) regions_data = helpers.load_bed("hg19_chr1_1") res, msg = epidb.add_experiment("test_exp_fail2", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, None, self.admin_key) self.assertFailure(res, msg) self.assertTrue("STRAND_X" in msg)
def test_select_only_peaks_cached(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] files = ["reference_example", "test1"] for filename in files: wig_data = helpers.load_bedgraph(filename) res = epidb.add_experiment(filename, "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", wig_data, "bedgraph", None, self.admin_key) (s, q) = epidb.select_experiments(files, "", None, None, self.admin_key) (s, q_cache) = epidb.query_cache(q, True, self.admin_key) (s, req) = epidb.count_regions(q_cache, self.admin_key) count = self.count_request(req) self.assertEqual(1009, count) (s, req2) = epidb.count_regions(q_cache, self.admin_key) count = self.count_request(req2) self.assertEqual(1009, count) self.assertEqual(req, req2)
def test_remove_technique(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = helpers.load_bed("hg19_chr1_1") format = data_info.EXPERIMENTS["hg19_chr1_1"]["format"] (res, add_technique_id) = epidb.add_technique( "Hypster Technique", "I knew this technique before everybody else", {}, self.admin_key) self.assertSuccess(res, add_technique_id) # adding two experiments with the same data should work res, eid = epidb.add_experiment("test_exp1", "hg19", "Methylation", sample_id, "Hypster Technique", "ENCODE", "desc1", regions_data, format, None, self.admin_key) self.assertSuccess(res) res = epidb.remove(add_technique_id, self.admin_key) self.assertFailure(res) res = epidb.remove(eid, self.admin_key) self.assertSuccess(res) res = epidb.remove(add_technique_id, self.admin_key) self.assertSuccess(res)
def test_biosource_true_hierarchy(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] data = "chr1\t100\t110\t1\nchr1\t200\t400\t0\nchr1\t400\t500\t1\nchr1\t200\t400\t0\n" format = "CHROMOSOME,START,END,SCORE" (res, a_1) = epidb.add_experiment( "test", "hg19", "H3K4me3", sample_id, "tech1", "ENCODE", "wgEncodeBroadHistoneH1hescH3k27me3StdPk.bed from ENCODE", data, format, None, self.admin_key) self.assertSuccess(res, a_1) (s, q) = epidb.select_regions("test", "hg19", None, None, None, None, "chr1", None, None, self.admin_key) (s, tl) = epidb.tiling_regions(150000000, "hg19", "chr1", self.admin_key) res, qid_4 = epidb.aggregate(q, tl, "SCORE", self.admin_key) s, req = epidb.get_regions( qid_4, "CHROMOSOME,START,END,@AGG.MIN,@AGG.MAX,@AGG.MEAN,@AGG.COUNT", self.admin_key) self.assertSuccess(s, req) rs = self.get_regions_request(req) self.assertEquals(rs, "chr1\t0\t150000000\t0.0000\t1.0000\t0.5000\t4")
def test_remove_project(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = helpers.load_bed("hg19_chr1_1") format = data_info.EXPERIMENTS["hg19_chr1_1"]["format"] res, projects = epidb.list_projects(self.admin_key) # adding two experiments with the same data should work res, eid = epidb.add_experiment("test_exp1", "hg19", "Methylation", sample_id, "tech1", projects[0][1], "desc1", regions_data, format, None, self.admin_key) self.assertSuccess(res, eid) res = epidb.remove(projects[0][0], self.admin_key) self.assertFailure(res) res = epidb.remove(eid, self.admin_key) self.assertSuccess(res) res = epidb.remove(projects[0][0], self.admin_key) self.assertSuccess(res)
def test_remove_epigenetic_mark(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = helpers.load_bed("hg19_chr1_1") format = data_info.EXPERIMENTS["hg19_chr1_1"]["format"] (res, epigenetic_mark_id) = epidb.add_epigenetic_mark( "H3K666ac3", "acetil metal \,,,/", {"category": "heavy histone metal"}, self.admin_key) self.assertSuccess(res, epigenetic_mark_id) # adding two experiments with the same data should work res, eid = epidb.add_experiment("test_exp1", "hg19", "H3K666ac3", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, None, self.admin_key) self.assertSuccess(res) res = epidb.remove(epigenetic_mark_id, self.admin_key) self.assertFailure(res) res = epidb.remove(eid, self.admin_key) self.assertSuccess(res) res = epidb.remove(epigenetic_mark_id, self.admin_key) self.assertSuccess(res)
def test_bug_do_not_reuse_existing_query(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] data = "chr1\t100\t110\t1\nchr1\t200\t400\t0\nchr1\t400\t500\t1\nchr1\t200\t400\t0\n" format = "CHROMOSOME,START,END,SCORE" (res, a_1) = epidb.add_experiment( "test", "hg19", "H3K4me3", sample_id, "tech1", "ENCODE", "wgEncodeBroadHistoneH1hescH3k27me3StdPk.bed from ENCODE", data, format, None, self.admin_key) self.assertSuccess(res, a_1) (s, q0) = epidb.select_regions("test", "hg19", None, None, None, None, "chr1", None, None, self.admin_key) (s, q1) = epidb.query_experiment_type(q0, "peaks", self.admin_key) (s, q00) = epidb.select_regions("test", "hg19", None, None, None, None, "chr1", None, None, self.admin_key) (s, q11) = epidb.query_experiment_type(q0, "peaks", self.admin_key) self.assertEqual(q0, q00) self.assertEqual(q1, q11)
def test_bed_graph_files(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] files = ["reference_example", "test1", "bigwig"] for filename in files: wig_data = helpers.load_bedgraph(filename) res = epidb.add_experiment(filename, "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", wig_data, "bedgraph", None, self.admin_key) self.assertSuccess(res) (s, q) = epidb.select_regions(files, "hg19", None, None, None, None, None, None, None, self.admin_key) (s, req) = epidb.count_regions(q, self.admin_key) self.assertSuccess(s, req) count = self.count_request(req) # 3997106 // grep -v # *.bg | grep -v browser | grep -v track | wc -l self.assertEqual(3997106, count)
def test_calculated_metafield(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] # adding two experiments with the same data should work res = epidb.add_experiment( "test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", "", "wig", {"__local_file__": "../tests/data/wig/scores1.wig"}, self.admin_key) self.assertSuccess(res) res, qid1 = epidb.select_regions("test_exp1", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid1) res = epidb.create_column_type_calculated( "calculated", "description", "return 'EM and Name: - '.. value_of('@EPIGENETIC_MARK') .. ' - ' .. value_of('@NAME')", self.admin_key) self.assertSuccess(res) (s, req) = epidb.get_regions(qid1, "CHROMOSOME,START,END,VALUE, calculated", self.admin_key) self.assertSuccess(s, req) regions = self.get_regions_request(req) r = regions.split("\n")[0].split("\t")[4] self.assertEqual(r, 'EM and Name: - Methylation - test_exp1')
def test_calculated_get_region(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] # adding two experiments with the same data should work res = epidb.add_experiment( "test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", "", "wig", {"__local_file__": "../tests/data/wig/scores1.wig"}, self.admin_key) self.assertSuccess(res) res, qid1 = epidb.select_regions("test_exp1", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid1) (s, req) = epidb.get_regions( qid1, "CHROMOSOME,START,END,VALUE,@CALCULATED(return math.log(value_of('VALUE'))),@CALCULATED(em = value_of('@EPIGENETIC_MARK') if em == 'Methylation' then return 'it is methylation!' else return 'it is not methylation' end)", self.admin_key) self.assertSuccess(s, req) regions_1 = self.get_regions_request(req) r0 = regions_1.split('\n')[0].split('\t')[3] r1 = regions_1.split('\n')[0].split('\t')[4] r2 = regions_1.split('\n')[0].split('\t')[5] self.assertEqual(r0, '8.1235') self.assertEqual(r1, '2.094756') self.assertEqual(r2, 'it is methylation!')
def test_double_experiment_fail(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = helpers.load_bed("hg19_chr1_1") format = data_info.EXPERIMENTS["hg19_chr1_1"]["format"] # adding the same experiment with the same user should fail exp = ("test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, None) res = epidb.add_experiment(*(exp + (self.admin_key, ))) self.assertSuccess(res) res = epidb.add_experiment(*(exp + (self.admin_key, ))) self.assertFailure(res)
def test_insert_local_file(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] # adding two experiments with the same data should work (res, _id) = epidb.add_experiment( "test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", "", "wig", {"__local_file__": "../tests/data/wig/scores1.wig"}, self.admin_key) self.assertSuccess(res, _id) res = epidb.add_experiment("test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", "", "wig", {"__local_file__": "inexistent_file.wig"}, self.admin_key) self.assertFailure(res) res, experiments = epidb.list_experiments("hg19", "signal", None, "K562", None, None, None, self.admin_key) self.assertSuccess(res, experiments) self.assertEqual(len(experiments), 1) res, qid1 = epidb.select_regions("test_exp1", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid1) (s, req1) = epidb.get_regions(qid1, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(s, req1) data1 = self.get_regions_request(req1) res, qid1 = epidb.select_regions(_id, None, None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid1) (s, req2) = epidb.get_regions(qid1, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(s, req2) data2 = self.get_regions_request(req2) self.assertEqual(data1, data2)
def test_list_recent(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = helpers.load_bed("hg19_chr1_1") format = data_info.EXPERIMENTS["hg19_chr1_1"]["format"] res, eid1 = epidb.add_experiment("test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, None, self.admin_key) self.assertSuccess(res, eid1) # get some distance in insertion times time.sleep(5) res, eid2 = epidb.add_experiment("test_exp2", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, None, self.admin_key) self.assertEqual(res, "okay") time.sleep(3) ago = 1.0 / 24 / 60 / 60 * 5 res, experiments = epidb.list_recent_experiments( ago, "hg19", None, None, None, None, self.admin_key) self.assertSuccess(res, experiments) experiments_names = [x[1] for x in experiments] self.assertTrue("test_exp1" not in experiments_names) self.assertTrue("test_exp2" in experiments_names) res, experiments = epidb.list_recent_experiments( 1.0, "hg19", None, None, None, None, self.admin_key) self.assertSuccess(res, experiments) experiments_names = [x[1] for x in experiments] self.assertTrue("test_exp1" in experiments_names) self.assertTrue("test_exp2" in experiments_names)
def test_calculated_math(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] # adding two experiments with the same data should work res = epidb.add_experiment( "test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", "", "wig", {"__local_file__": "../tests/data/wig/scores1.wig"}, self.admin_key) self.assertSuccess(res) res, qid1 = epidb.select_regions("test_exp1", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid1) res = epidb.create_column_type_calculated("calculated_error", "description", "return value_of('SCORE')", self.admin_key) self.assertSuccess(res) (s, req) = epidb.get_regions( qid1, "CHROMOSOME,START,END,VALUE, calculated_error", self.admin_key) regions = self.get_regions_request(req) self.assertEqual( regions.split("\n")[0], "chr1\t0\t10\t8.1235\tInvalid column name SCORE") res = epidb.create_column_type_calculated( "calculated_dummy", "description", "return value_of('START') - value_of('END') * value_of('VALUE')", self.admin_key) self.assertSuccess(res) (s, req) = epidb.get_regions( qid1, "CHROMOSOME,START,END,VALUE, calculated_dummy", self.admin_key) regions = self.get_regions_request(req) r = regions.split("\n")[0].split("\t")[4] self.assertEqual(r, '-81.234570') res = epidb.create_column_type_calculated( "calculated_norm_by_length", "description", "return math.sqrt(value_of('VALUE') / (value_of('END') - value_of('START')))", self.admin_key) self.assertSuccess(res) (s, req) = epidb.get_regions( qid1, "CHROMOSOME,START,END,VALUE, calculated_norm_by_length", self.admin_key) regions = self.get_regions_request(req) r = regions.split("\n")[0].split("\t")[4] self.assertEqual(r, '0.901302')
def test_add_with_invalid_epigenetic_mark(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = helpers.load_bed("hg19_chr1_1") format = data_info.EXPERIMENTS["hg19_chr1_1"]["format"] res = epidb.add_experiment("test_exp1", "hg19", "No Epigenetic ", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, None, self.admin_key) self.assertFailure(res)
def test_wig_clone_calculated(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] res, _id = epidb.add_experiment( "test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", "", "wig", {"__local_file__": "../tests/data/wig/scores1.wig"}, self.admin_key) self.assertSuccess(res, id) (s, m) = epidb.create_column_type_simple("METHYLATION_LEVEL", "", "double", self.admin_key) self.assertSuccess(s, m) res = epidb.create_column_type_calculated( "METHYLATION_LEVEL_SQRT", "Square root of the methylation level", "return math.sqrt(value_of('VALUE'))", self.admin_key) self.assertSuccess(res) (s, clone_id) = epidb.clone_dataset( _id, "New Wig File", "", "", "", "", "", "CHROMOSOME,START,END,METHYLATION_LEVEL_SQRT", None, self.admin_key) self.assertFailure(s, clone_id) self.assertEqual( clone_id, "The column 'METHYLATION_LEVEL_SQRT' (type: calculated) is incompatible with the original column 'VALUE' (type: double)" ) (s, clone_id) = epidb.clone_dataset( _id, "New Wig File", "", "", "", "", "", "CHROMOSOME,START,END,METHYLATION_LEVEL", None, self.admin_key) self.assertSuccess(s, clone_id) (status, wig_data) = epidb.select_regions('New Wig File', "hg19", None, None, None, None, None, None, None, self.admin_key) (s, req) = epidb.get_regions( wig_data, "CHROMOSOME,START,END,METHYLATION_LEVEL,METHYLATION_LEVEL_SQRT", self.admin_key) rs = self.get_regions_request(req) self.assertEqual(rs.split("\n")[0].split("\t")[3], "8.1235") self.assertEqual(rs.split("\n")[0].split("\t")[4], "2.850168") self.assertEqual(rs.split("\n")[6].split("\t")[3], "30.0000") self.assertEqual(rs.split("\n")[6].split("\t")[4], "5.477226")
def test_load_bedgraph(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = gzip.open("data/bedgraph/chr19.txt.gz").read() # adding two experiments with the same data should work res = epidb.add_experiment( "S0022IH2.ERX300681.H3K36me3.bwa.GRCh38.20150528.bedgraph", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, "bedgraph", {"md5sum": "afd4af5afd5afd4af5afd5afd4af5afd5"}, self.admin_key) self.assertSuccess(res) (status, query_id) = epidb.select_regions( "#afd4af5afd5afd4af5afd5afd4af5afd5", None, None, None, None, None, "chr19", 49388217, 49417994, self.admin_key) self.assertSuccess(status, query_id) (status, input) = epidb.input_regions("hg19", "chr19\t49388217\t49417994", self.admin_key) self.assertSuccess(status, input) (status, query_overlap) = epidb.intersection(query_id, input, self.admin_key) self.assertSuccess(status, query_overlap) (status, request_id) = epidb.get_regions(query_id, "CHROMOSOME,START,END,VALUE", self.admin_key) self.assertSuccess(status, request_id) (status, overlap_request_id) = epidb.get_regions(query_id, "CHROMOSOME,START,END,VALUE", self.admin_key) self.assertSuccess(status, overlap_request_id) by_select = self.get_regions_request(request_id) by_overlap = self.get_regions_request(overlap_request_id) self.assertEqual(by_overlap, by_select) self.assertTrue(len(by_select) > 0) (status, info) = epidb.info("#afd4af5afd5afd4af5afd5afd4af5afd5", self.admin_key) self.assertEquals(info[0]["_id"], "e1")
def test_experiment_metadata(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = helpers.load_bed("hg19_chr1_1") format = data_info.EXPERIMENTS["hg19_chr1_1"]["format"] res, eid1 = epidb.add_experiment("test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, {"source": "ncbi"}, self.admin_key) self.assertSuccess(res, eid1) res, eid2 = epidb.add_experiment("test_exp2", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, {"source":"encode"}, self.admin_key) self.assertSuccess(res, eid2) res, ids = epidb.search("ncbi", "Experiments", self.admin_key) self.assertSuccess(res, ids) self.assertEqual(ids[0][0], eid1) # Should be the first because the "encode" name appears twice (project and extra metadata) res, ids = epidb.search("encode", "experiments", self.admin_key) self.assertSuccess(res, ids) self.assertEqual(ids[0][0], eid2)
def test_experiment_info(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] format = ",".join([ "CHROMOSOME", "START", "END", "NAME", "SCORE", "STRAND", "SIGNAL_VALUE", "P_VALUE", "Q_VALUE", "PEAK" ]) eid = None with open("data/bed/hg19_chr1_1.bed") as f: res, eid = epidb.add_experiment("exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", f.read(), format, {"foo":"bar", "extra":"123"}, self.admin_key) self.assertSuccess(res, eid) res, data = epidb.info(eid, self.admin_key) data[0]["upload_info"]["upload_start"] = '0' data[0]["upload_info"]["upload_end"] = '0' data[0]["upload_info"]["client_address"] = '0' data[0]["upload_info"]["total_size"] = '0' self.assertEqual(data[0], {'format': 'CHROMOSOME,START,END,NAME,SCORE,STRAND,SIGNAL_VALUE,P_VALUE,Q_VALUE,PEAK', 'extra_metadata': {'foo': 'bar', 'extra': '123'}, 'sample_info': {'karyotype': 'cancer', 'biosource_name': 'K562', 'karyotype': 'cancer', 'sex': 'F'}, 'technique': 'tech1', 'upload_info': {'total_size': '0', 'done': 'true', 'user': '******', 'upload_end': '0', 'upload_start': '0', 'client_address': '0'}, 'name': 'exp1', 'project': 'ENCODE', 'genome': 'hg19', 'sample_id': 's1', 'epigenetic_mark': 'Methylation', '_id': 'e1', 'type': 'experiment', 'columns': [{'name': 'CHROMOSOME', 'column_type': 'string'}, {'name': 'START', 'column_type': 'integer'}, {'name': 'END', 'column_type': 'integer'}, {'name': 'NAME', 'column_type': 'string'}, {'name': 'SCORE', 'column_type': 'double'}, {'name': 'STRAND', 'column_type':'category', 'items': '+,-,.'}, {'name': 'SIGNAL_VALUE', 'column_type': 'double'}, {'name': 'P_VALUE', 'column_type': 'double'}, {'name': 'Q_VALUE', 'column_type': 'double'}, {'name': 'PEAK', 'column_type': 'integer'}], 'description': 'desc1', 'data_type': 'peaks'}) self.assertEqual(res, 'okay') self.assertEqual(data[0]['sample_id'], sample_id) self.assertEqual(data[0]['description'], "desc1") self.assertEqual(data[0]['extra_metadata'], {"foo":"bar", "extra":"123"}) self.assertEqual(data[0]['epigenetic_mark'], "Methylation") self.assertEqual(data[0]['genome'], "hg19") self.assertEqual(data[0]['name'], "exp1") self.assertEqual(data[0]['project'], "ENCODE") self.assertEqual(data[0]['technique'], "tech1") self.assertEqual(data[0]['upload_info']['user'], "test_admin") self.assertEqual(data[0]['format'], "CHROMOSOME,START,END,NAME,SCORE,STRAND,SIGNAL_VALUE,P_VALUE,Q_VALUE,PEAK") self.assertEqual(data[0]['_id'], eid)
def test_basic_distinct(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) data = None with open("data/wgEncodeBroadHmmGm12878HMM.bed", 'r') as f: data = f.read() sample_id = self.sample_ids[0] fmt = "CHROMOSOME,START,END,NAME,SCORE,STRAND,THICK_START,THICK_END,ITEM_RGB" ## It is a Chromatin State Segmentation data, but just feeling these metadata... (res, a_1) = epidb.add_experiment( "test", "hg19", "H3K4me3", sample_id, "tech1", "ENCODE", "wgEncodeBroadHistoneH1hescH3k27me3StdPk.bed from ENCODE", data, fmt, None, self.admin_key) res, qid = epidb.select_regions("test", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid) status, req = epidb.distinct_column_values(qid, "NAME", self.admin_key) self.assertSuccess(status, req) distinct = self.get_regions_request(req) self.assertEqual( distinct, { 'distinct': { '13_Heterochrom/lo': 75112, '2_Weak_Promoter': 35065, '7_Weak_Enhancer': 109468, '15_Repetitive/CNV': 6128, '12_Repressed': 25483, '4_Strong_Enhancer': 25486, '5_Strong_Enhancer': 38604, '11_Weak_Txn': 82312, '3_Poised_Promoter': 5263, '8_Insulator': 33265, '14_Repetitive/CNV': 8028, '1_Active_Promoter': 15278, '6_Weak_Enhancer': 69111, '10_Txn_Elongation': 26509, '9_Txn_Transition': 16227 } })
def test_experiments_preview(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = helpers.load_bed("hg19_chr1_1") format = data_info.EXPERIMENTS["hg19_chr1_1"]["format"] # adding two experiments with the same data should work res = epidb.add_experiment("test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, None, self.admin_key) self.assertSuccess(res) status, preview = epidb.preview_experiment('test_exp1', self.admin_key) self.assertEqual( preview, 'CHROMOSOME\tSTART\tEND\tNAME\tSCORE\tSTRAND\tSIGNAL_VALUE\tP_VALUE\tQ_VALUE\tPEAK\nchr1\t713240\t713390\t.\t0.0000\t+\t21.0000\t69.6000\t-1.0000\t-1\nchr1\t713520\t713670\t.\t0.0000\t-\t21.0000\t22.4866\t-1.0000\t-1\nchr1\t713900\t714050\t.\t0.0000\t+\t59.0000\t71.2352\t-1.0000\t-1\nchr1\t714160\t714310\t.\t0.0000\t+\t22.0000\t101.8740\t-1.0000\t-1\nchr1\t714540\t714690\t.\t0.0000\t+\t77.0000\t105.3120\t-1.0000\t-1' )
def test_wig_files_pass(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] pass_files = [ "chr_unsorted", "empty_lines", "fix_span", "line_breaks", "null_scores", "tab_seperator", "var_simple", "comments", "fix_simple", "inverted_header", "long_header", "negative_scores", "strange_chromosome", "var_span" ] for filename in pass_files: wig_data = helpers.load_wig("should_pass/%s" % filename) res = epidb.add_experiment(filename, "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", wig_data, "wig", None, self.admin_key) self.assertSuccess(res)
def test_change_extra_metadata(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = "chr1\t1\t100" format = "" # adding two experiments with the same data should work res = epidb.add_experiment("test_exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, format, { "NAME": "FELIPE", "LAST_NAME": "ALBRECHT" }, self.admin_key) self.assertSuccess(res) _id = res[1] res = epidb.change_extra_metadata(_id, "NAME", "JOSE", self.admin_key) self.assertSuccess(res) res = epidb.change_extra_metadata(_id, "LAST_NAME", "FERNANDES", self.admin_key) self.assertSuccess(res) status, info = epidb.info(_id, self.admin_key) self.assertSuccess(status, info) self.assertEqual({ "NAME": "JOSE", "LAST_NAME": "FERNANDES" }, info[0]["extra_metadata"]) (status, ss) = epidb.search("JOSE", "", self.admin_key) self.assertEqual(1, len(ss)) (status, ss) = epidb.search("FELIPE", "", self.admin_key) self.assertEqual(0, len(ss)) res = epidb.change_extra_metadata(sample_id, "source", "ENCODE", self.admin_key) self.assertSuccess(res) s, info = epidb.info(sample_id, self.admin_key) self.assertEqual(info[0]["source"], "ENCODE")