def test_clodius_aggregate_bigwig(): runner = clt.CliRunner() input_file = op.join(testdir, 'sample_data', 'test.tile_generation.bw') print("input_file:", input_file) with open('/tmp/test_chrs.tsv', 'w') as f: f.write('{}\t{}'.format('test', 100000)) result = runner.invoke(cca.bigwig, [ input_file, '--chromsizes-filename', '/tmp/test_chrs.tsv', '--output-file', '/tmp/test.mr.bw' ]) import traceback print("exc_info:", result.exc_info) a, b, tb = result.exc_info print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a, b) import clodius.hdf_tiles as ch filename = '/tmp/test.mr.bw' f = h5py.File(filename) max_zoom = f['meta'].attrs['max-zoom'] tile_size = int(f['meta'].attrs['tile-size']) d = ch.get_data(f, max_zoom, 0) print("d:", d) # lowest zoom should have values of 1 for i in range(tile_size): assert (d[i] == 1) d = ch.get_data(f, max_zoom - 1, 0) print("d:", d) for i in range(tile_size): # because we're taking averages assert (d[i] == 1) d = ch.get_data(f, max_zoom - 2, 0) for i in range(tile_size // 2): assert (d[i] == 1) print("hey") assert (d[513] == 1) assert (result.exit_code == 0)
def main(): parser = argparse.ArgumentParser(description=""" python get_hitile.py filename z x """) parser.add_argument('filename') parser.add_argument('z', type=int) parser.add_argument('x', type=int) #parser.add_argument('argument', nargs=1) #parser.add_argument('-o', '--options', default='yo', # help="Some option", type='str') #parser.add_argument('-u', '--useless', action='store_true', # help='Another useless option') args = parser.parse_args() with h5py.File(args.filename, 'r') as f: tileset_info = hdft.get_tileset_info(f) max_width = tileset_info['max_width'] max_pos = tileset_info['max_pos'] tile_size = tileset_info['tile_size'] print("max_width", max_width) print("max_pos", max_pos) last_index = int(tile_size * (max_pos / max_width)) print("last_index:", last_index) tile_data = hdft.get_data(f, args.z, args.x)
def main(): parser = argparse.ArgumentParser(description=""" python read.py hdf_file """) parser.add_argument("filepath") parser.add_argument("-z", default=None, type=int) parser.add_argument("-x", default=None, type=int) parser.add_argument("-n", "--num-trials", default=1, type=int) # parser.add_argument('argument', nargs=1) # parser.add_argument('-o', '--options', default='yo', # help="Some option", type='str') # parser.add_argument('-u', '--useless', action='store_true', # help='Another useless option') args = parser.parse_args() f = h5py.File(args.filepath, "r") t1 = time.time() if args.num_trials < 1: print("The number of trials needs to be greater than 0", file=sys.stderr) if args.x is not None and args.z is not None: d = ch.get_data(f, args.z, args.x) print("z:", args.z, "x:", args.x, "len:", len(d), d) return for i in range(args.num_trials): z = random.randint(0, int(f["meta"].attrs["max-zoom"])) x = random.randint(0, 2**z) d = ch.get_data(f, z, x) print("z:", z, "x:", x, "len:", len(d), d) # d = ch.get_data(f, 1, 1) # print "z:", z, "x:", x t2 = time.time() print("avg time:", (t2 - t1) / args.num_trials) """
def generate_hitile_tiles(tileset, tile_ids): ''' Generate tiles from a hitile file. Parameters ---------- tileset: tilesets.models.Tileset object The tileset that the tile ids should be retrieved from tile_ids: [str,...] A list of tile_ids (e.g. xyx.0.0) identifying the tiles to be retrieved Returns ------- tile_list: [(tile_id, tile_data),...] A list of tile_id, tile_data tuples ''' generated_tiles = [] for tile_id in tile_ids: tile_id_parts = tile_id.split('.') tile_position = list(map(int, tile_id_parts[1:3])) dense = hdft.get_data(h5py.File(tileset.datafile.path), tile_position[0], tile_position[1]) if len(dense): max_dense = max(dense) min_dense = min(dense) else: max_dense = 0 min_dense = 0 min_f16 = np.finfo('float16').min max_f16 = np.finfo('float16').max has_nan = len([d for d in dense if np.isnan(d)]) > 0 if (not has_nan and max_dense > min_f16 and max_dense < max_f16 and min_dense > min_f16 and min_dense < max_f16): tile_value = { 'dense': base64.b64encode(dense.astype('float16')).decode('utf-8'), 'dtype': 'float16' } else: tile_value = { 'dense': base64.b64encode(dense.astype('float32')).decode('utf-8'), 'dtype': 'float32' } generated_tiles += [(tile_id, tile_value)] return generated_tiles
def check_1d_file(filename): f = h5py.File(filename) max_zoom = f['meta'].attrs['max-zoom'] tile_size = int(f['meta'].attrs['tile-size']) d = ch.get_data(f, max_zoom, 0) # lowest zoom should have values of 1 for i in range(tile_size): assert (d[i] == 1) d = ch.get_data(f, max_zoom - 1, 0) for i in range(tile_size): assert (d[i] == 2) d = ch.get_data(f, max_zoom - 2, 0) for i in range(tile_size // 2): assert (d[i] == 4) assert (d[513] == 4)
def test_clodius_aggregate_bedgraph(): input_file = op.join(testdir, 'sample_data', 'cnvs_hw.tsv') assembly_file = op.join(testdir, 'sample_data', 'test_cnvs_assembly') output_file = '/tmp/cnvs_hw.hitile' # run once to make sure it doesn't crash on a smaller genome runner = clt.CliRunner() result = runner.invoke( cca.bedgraph, [ input_file, '--output-file', output_file, # '--assembly', 'grch37', '--chromsizes-filename', assembly_file, '--chromosome-col', '2', '--from-pos-col', '3', '--to-pos-col', '4', '--value-col', '5', '--has-header', '--nan-value', 'NA' ]) # run again with the proper assembly runner = clt.CliRunner() result = runner.invoke( cca.bedgraph, [ input_file, '--output-file', output_file, '--assembly', 'grch37', # '--chromsizes-filename', assembly_file, '--chromosome-col', '2', '--from-pos-col', '3', '--to-pos-col', '4', '--value-col', '5', '--has-header', '--nan-value', 'NA' ]) ''' import traceback a,b,tb = result.exc_info print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) ''' assert (result.exit_code == 0) f = h5py.File(output_file) # print("tile_0_0", d) # print("tile:", cht.get_data(f, 22, 0)) # return d = cht.get_data(f, 0, 0) assert (not np.isnan(d[0])) assert (np.isnan(d[-1])) cht.get_data(f, 3, 0) # TODO: Make assertions about result # print("prev_tile_3_0:", prev_tile_3_0) assert (result.exit_code == 0)
def test_clodius_aggregate_bedgraph1(): input_file = op.join(testdir, 'sample_data', 'dm3_values.tsv') output_file = '/tmp/dm3_values.hitile' runner = clt.CliRunner() result = runner.invoke( cca.bedgraph, [input_file, '--output-file', output_file, '--assembly', 'dm3']) a, b, tb = result.exc_info """ print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) """ # print("result.output", result.output) f = h5py.File('/tmp/dm3_values.hitile') # max_zoom = f['meta'].attrs['max-zoom'] # TODO: Make assertions about result values = f['values_0'] import numpy as np # print("values:", values[8]) # genome positions are 0 based as stored in hitile files assert (np.isnan(values[8])) assert (values[9] == 1) assert (values[10] == 1) assert (values[13] == 1) assert (np.isnan(values[14])) assert (np.isnan(values[15])) chrom_info = nc.get_chrominfo('dm3') chr_2r_pos = nc.chr_pos_to_genome_pos('chr2R', 0, chrom_info) # print('chr_2r_pos:', chr_2r_pos) assert (np.isnan(values[chr_2r_pos + 28])) assert (values[chr_2r_pos + 29] == 77) assert (values[chr_2r_pos + 38] == 77) assert (values[chr_2r_pos + 39] == 0) assert (result.exit_code == 0) d = cht.get_data(f, 0, 0) # print("d[:10]", d[:10]) # print("sum(d):", sum([x for x in d if not np.isnan(x)])) assert (np.nansum(d) > 1.0 and np.nansum(d) < 10.0) return input_file = op.join(testdir, 'sample_data', 'test3chroms_values.tsv') output_file = '/tmp/test3chroms_values.hitile' runner = clt.CliRunner() result = runner.invoke(cca.bedgraph, [ input_file, '--output-file', output_file, '--assembly', 'test3chroms' ]) # print('output:', result.output, result) f = h5py.File('/tmp/test3chroms_values.hitile') # f['meta'].attrs['max-zoom'] # TODO: Make assertions about result # print('max_zoom:', max_zoom) # print("len", len(f['values_0'])) values = f['values_0'] # print('values', values[:100]) # genome positions are 0 based as stored in hitile files assert (values[8] == 0) assert (values[9] == 1) assert (values[10] == 1) assert (values[13] == 1) assert (values[14] == 0) assert (values[15] == 0) chr2_pos = nc.chr_pos_to_genome_pos('chr2', 0, 'test3chroms') assert (values[chr2_pos + 28] == 0) assert (values[chr2_pos + 29] == 77) assert (values[chr2_pos + 38] == 77) assert (values[chr2_pos + 39] == 0) assert (result.exit_code == 0) d = cht.get_data(f, 0, 0) assert (sum(d) == 770 + 880 + 5)
def test_clodius_aggregate_bedgraph(): input_file = op.join(testdir, "sample_data", "cnvs_hw.tsv") assembly_file = op.join(testdir, "sample_data", "test_cnvs_assembly") output_file = "/tmp/cnvs_hw.hitile" # run once to make sure it doesn't crash on a smaller genome runner = clt.CliRunner() result = runner.invoke( cca.bedgraph, [ input_file, "--output-file", output_file, # '--assembly', 'grch37', "--chromsizes-filename", assembly_file, "--chromosome-col", "2", "--from-pos-col", "3", "--to-pos-col", "4", "--value-col", "5", "--has-header", "--nan-value", "NA", ], ) # run again with the proper assembly runner = clt.CliRunner() result = runner.invoke( cca.bedgraph, [ input_file, "--output-file", output_file, "--assembly", "grch37", # '--chromsizes-filename', assembly_file, "--chromosome-col", "2", "--from-pos-col", "3", "--to-pos-col", "4", "--value-col", "5", "--has-header", "--nan-value", "NA", ], ) """ import traceback a,b,tb = result.exc_info print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) """ assert result.exit_code == 0 f = h5py.File(output_file, "r") # print("tile_0_0", d) # print("tile:", cht.get_data(f, 22, 0)) # return d = cht.get_data(f, 0, 0) assert not np.isnan(d[0]) assert np.isnan(d[-1]) cht.get_data(f, 3, 0) # TODO: Make assertions about result # print("prev_tile_3_0:", prev_tile_3_0) assert result.exit_code == 0
import clodius.hdf_tiles as cht import h5py import sys f = h5py.File('test_chr14.hitile') tile = cht.get_data(f, 12, 3316) sys.exit(1) max_zoom = 17 pos = 117440512 - 30 for i in range(max_zoom): tile_pos = pos / (1024 * 2**(max_zoom - i)) tile = cht.get_data(f, i, tile_pos) print("z", i, "tile_pos:", tile_pos, "data", tile)
def generate_tile(tile_id, request): ''' Create a tile. The tile_id specifies the dataset as well as the position. This function will look at the filetype and determine what type of tile to retrieve (e..g cooler -> 2D dense, hitile -> 1D dense, elasticsearch -> anything) Args: tile_id (str): The id of a tile, consisting of the tileset id, followed by the tile position (e.g. PIYqJpdyTCmAZGmA6jNHJw.4.0.0) request (django.http.HTTPRequest): The request that included this tile. Returns: (string, dict): A tuple containing the tile ID tile data ''' tile_id_parts = tile_id.split('.') tile_position = map(int, tile_id_parts[1:]) tileset_uuid = tile_id_parts[0] tileset = tm.Tileset.objects.get(uuid=tileset_uuid) if tileset.private and request.user != tileset.owner: # dataset is not public return an empty set return (tileset_uuid, {'error': "Forbidden"}) tile_value = rdb.get(tile_id) if tile_value is not None: tile_value = pickle.loads(tile_value) return (tile_id, tile_value) if tileset.filetype == "hitile": dense = hdft.get_data( h5py.File( get_datapath(tileset.datafile) ), tile_position[0], tile_position[1] ) tile_value = {'dense': base64.b64encode(dense)} elif tileset.filetype == 'beddb': tile_value = cdt.get_tile( get_datapath(tileset.datafile), tile_position[0], tile_position[1] ) elif tileset.filetype == 'bed2ddb': tile_value = cdt.get_2d_tile( get_datapath(tileset.datafile), tile_position[0], tile_position[1], tile_position[2] ) elif tileset.filetype == 'hibed': dense = hdft.get_discrete_data( h5py.File( get_datapath(tileset.datafile) ), tile_position[0], tile_position[1] ) tile_value = {'discrete': list([list(d) for d in dense])} elif tileset.filetype == "elasticsearch": response = urllib.urlopen( tileset.datafile + '/' + '.'.join(map(str, tile_position)) ) tile_value = json.loads(response.read())["_source"]["tile_value"] else: tile_value = make_cooler_tile( get_datapath(tileset.datafile), tile_position ) if tile_value is None: return None rdb.set(tile_id, pickle.dumps(tile_value)) return (tile_id, tile_value)