def test_gene_annotations(): runner = clt.CliRunner() input_file = op.join(testdir, 'sample_data', 'exon_unions_mm10.bed') f = tempfile.NamedTemporaryFile(delete=False) result = runner.invoke( cca.bedfile, [input_file, '--max-per-tile', '20', '--importance-column', '5', '--delimiter', '\t', '--assembly', 'mm10', '--output-file', f.name]) # import traceback a, b, tb = result.exc_info ''' print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) ''' rows = cdt.get_tiles(f.name, 0, 0) assert(len(rows[0]) == 2) rows = cdt.get_tiles(f.name, 11, 113) assert(rows[113][0]['fields'][3] == 'Lrp1b') rows = cdt.get_tiles(f.name, 11, 112) assert(rows[112][0]['fields'][3] == 'Lrp1b')
def test_clodius_aggregate_bedfile(): input_file = op.join(testdir, 'sample_data', 'geneAnnotationsExonsUnions.hg19.short.bed') output_file = '/tmp/geneAnnotationsExonsUnions.hg19.short.bed' runner = clt.CliRunner() result = runner.invoke(cca.bedfile, [ input_file, '--max-per-tile', 20, '--importance-column', 5, '--assembly', 'hg19', '--output-file', output_file, '--delimiter', '\t' ]) print("exc_info:", result.exc_info) a, b, tb = result.exc_info print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a, b) assert (result.exit_code == 0) results = cdt.get_tiles(output_file, 6, 3, num_tiles=1) print("results:", results) assert (len(results[3][0]['fields']) == 14) runner = clt.CliRunner() result = runner.invoke(cca.bedfile, [ input_file, '--max-per-tile', 20, '--importance-column', 5, '--assembly', 'hg19', '--output-file', output_file ]) assert (result.exit_code == 0) results = cdt.get_tiles(output_file, 6, 3, num_tiles=3) assert (len(results[3][0]['fields']) == 17)
def test_clodius_aggregate_bedfile(): input_file = op.join(testdir, "sample_data", "geneAnnotationsExonsUnions.hg19.short.bed") output_file = "/tmp/geneAnnotationsExonsUnions.hg19.short.bed" runner = clt.CliRunner() result = runner.invoke( cca.bedfile, [ input_file, "--max-per-tile", 20, "--importance-column", 5, "--assembly", "hg19", "--output-file", output_file, "--delimiter", "\t", ], ) a, b, tb = result.exc_info """ print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) """ assert result.exit_code == 0 results = cdt.get_tiles(output_file, 6, 3, num_tiles=1) # print("results:", results) assert len(results[3][0]["fields"]) == 14 runner = clt.CliRunner() result = runner.invoke( cca.bedfile, [ input_file, "--max-per-tile", 20, "--importance-column", 5, "--assembly", "hg19", "--output-file", output_file, ], ) assert result.exit_code == 0 results = cdt.get_tiles(output_file, 6, 3, num_tiles=3) assert len(results[3][0]["fields"]) == 17
def test_random_importance(): # check that when aggregating using random importance, all values that # are in a higher resolution tile are also in the lower resolution f = tempfile.NamedTemporaryFile(delete=False) runner = clt.CliRunner() input_file = op.join(testdir, 'sample_data', '25435_PM15-000877_SM-7QK6O.seg') result = runner.invoke( cca.bedfile, [input_file, '--max-per-tile', '2', '--importance-column', 'random', '--assembly', 'b37', '--has-header', '--output-file', f.name]) import traceback print("exc_info:", result.exc_info) a,b,tb = result.exc_info print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) tileset_info = cdt.get_tileset_info(f.name) # print("tileset_info:", tileset_info) rows = cdt.get_tiles(f.name, 0, 0) #print("rows:", rows) rows = list(cdt.get_tiles(f.name, 1, 0).values()) + list(cdt.get_tiles(f.name, 1,1).values()) #print('rows:', rows) found = False for row in cdt.get_tiles(f.name, 6, 31).values(): for rect in row: if rect['xEnd'] == 2195875458: found = True found = False for row in cdt.get_tiles(f.name, 6, 32).values(): for rect in row: if rect['xEnd'] == 2195875458: found = True assert(found == True) pass
def test_get_tiles(): filename = 'test/sample_data/gene_annotations.short.db' tiles = cdt.get_tiles(filename, 18, 169283)[169283] # print("tiles:", tiles) x = int(tiles[0]['xStart']) fields = tiles[0]['fields']
def test_random_importance(): # check that when aggregating using random importance, all values that # are in a higher resolution tile are also in the lower resolution f = tempfile.NamedTemporaryFile(delete=False) runner = clt.CliRunner() input_file = op.join(testdir, 'sample_data', '25435_PM15-000877_SM-7QK6O.seg') result = runner.invoke( cca.bedfile, [input_file, '--max-per-tile', '2', '--importance-column', 'random', '--assembly', 'b37', '--has-header', '--output-file', f.name]) # import traceback a, b, tb = result.exc_info ''' print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) ''' cdt.get_tileset_info(f.name) # print("tileset_info:", tileset_info) # TODO: Make assertions about result cdt.get_tiles(f.name, 0, 0) # print("rows:", rows) # TODO: Make assertions about result list(cdt.get_tiles(f.name, 1, 0).values()) + \ list(cdt.get_tiles(f.name, 1, 1).values()) # print('rows:', rows) # TODO: Make assertions about result # check to make sure that tiles in the higher zoom levels # are all present in lower zoom levels found = {} for row in cdt.get_tiles(f.name, 5, 15).values(): for rect in row: found[rect['xStart']] = False for row in cdt.get_tiles(f.name, 6, 30).values(): for rect in row: if rect['xStart'] in found: found[rect['xStart']] = True for row in cdt.get_tiles(f.name, 6, 31).values(): for rect in row: if rect['xStart'] in found: found[rect['xStart']] = True for key, value in found.items(): assert(value) pass
def generate_beddb_tiles(tileset, tile_ids): ''' Generate tiles from a beddb file. Parameters ---------- tileset: tilesets.models.Tileset object The tileset that the tile ids should be retrieved from tile_ids: [str,...] A list of tile_ids (e.g. xyx.0.1) identifying the tiles to be retrieved Returns ------- generated_tiles: [(tile_id, tile_data),...] A list of tile_id, tile_data tuples ''' tile_ids_by_zoom = bin_tiles_by_zoom(tile_ids).values() partitioned_tile_ids = list(it.chain(*[partition_by_adjacent_tiles(t, dimension=1) for t in tile_ids_by_zoom])) generated_tiles = [] for tile_group in partitioned_tile_ids: zoom_level = int(tile_group[0].split('.')[1]) tileset_id = tile_group[0].split('.')[0] tile_positions = [[int(x) for x in t.split('.')[2:3]] for t in tile_group] if len(tile_positions) == 0: continue minx = min([t[0] for t in tile_positions]) maxx = max([t[0] for t in tile_positions]) t1 = time.time() tile_data_by_position = cdt.get_tiles( get_cached_datapath(tileset.datafile.url), zoom_level, minx, maxx - minx + 1 ) generated_tiles += [(".".join(map(str, [tileset_id] + [zoom_level] + [position])), tile_data) for (position, tile_data) in tile_data_by_position.items()] return generated_tiles
def test_chromosome_limit(): f = tempfile.NamedTemporaryFile(delete=False) runner = clt.CliRunner() input_file = op.join(testdir, 'sample_data', 'geneAnnotationsExonsUnions.short.bed') result = runner.invoke(cca.bedfile, [ input_file, '--max-per-tile', '60', '--importance-column', '5', '--assembly', 'hg19', '--chromosome', 'chr14', '--output-file', f.name ]) # print('output:', result.output, result) rows = cdt.get_tiles(f.name, 0, 0)[0] foundOther = False for row in rows: assert (row['fields'][0] == 'chr14') os.remove(f.name) pass
def test_no_chromosome_limit(): f = tempfile.NamedTemporaryFile(delete=False) runner = clt.CliRunner() input_file = op.join(testdir, 'sample_data', 'geneAnnotationsExonsUnions.short.bed') result = runner.invoke( cca.bedfile, [input_file, '--max-per-tile', '60', '--importance-column', '5', '--assembly', 'hg19', '--output-file', f.name]) # import traceback ''' print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) ''' a, b, tb = result.exc_info rows = cdt.get_tiles(f.name, 0, 0)[0] foundOther = False for row in rows: if row['fields'][0] != 'chr1': # print("row", row) assert(row['xStart'] > 200000000) if row['fields'][0] != 'chr14': foundOther = True break # make sure there's chromosome other than 14 in the output assert(foundOther) os.remove(f.name) pass
def test_get_tiles(): filename = 'test/sample_data/gene_annotations.short.db' cdt.get_tiles(filename, 18, 169283)[169283]