def test_random_importance(): # check that when aggregating using random importance, all values that # are in a higher resolution tile are also in the lower resolution f = tempfile.NamedTemporaryFile(delete=False) runner = clt.CliRunner() input_file = op.join(testdir, 'sample_data', '25435_PM15-000877_SM-7QK6O.seg') result = runner.invoke( cca.bedfile, [input_file, '--max-per-tile', '2', '--importance-column', 'random', '--assembly', 'b37', '--has-header', '--output-file', f.name]) # import traceback a, b, tb = result.exc_info ''' print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) ''' cdt.get_tileset_info(f.name) # print("tileset_info:", tileset_info) # TODO: Make assertions about result cdt.get_tiles(f.name, 0, 0) # print("rows:", rows) # TODO: Make assertions about result list(cdt.get_tiles(f.name, 1, 0).values()) + \ list(cdt.get_tiles(f.name, 1, 1).values()) # print('rows:', rows) # TODO: Make assertions about result # check to make sure that tiles in the higher zoom levels # are all present in lower zoom levels found = {} for row in cdt.get_tiles(f.name, 5, 15).values(): for rect in row: found[rect['xStart']] = False for row in cdt.get_tiles(f.name, 6, 30).values(): for rect in row: if rect['xStart'] in found: found[rect['xStart']] = True for row in cdt.get_tiles(f.name, 6, 31).values(): for rect in row: if rect['xStart'] in found: found[rect['xStart']] = True for key, value in found.items(): assert(value) pass
def test_clodius_aggregate_bedpe2(): '''Use galGal6 chromsizes file''' input_file = op.join(testdir, 'sample_data', 'galGal6.bed') chromsizes_file = op.join(testdir, 'sample_data', 'galGal6.chrom.sizes') expected_file = op.join(testdir, 'sample_data', 'galGal6.bed.multires.db') with tempfile.TemporaryDirectory() as tmpdirname: output_file = op.join(tmpdirname, 'blah.bed2ddb') # the test is here to ensure that this doesn't raise an error cca._bedpe(input_file, output_file, None, chr1_col=1, chr2_col=1, from1_col=2, from2_col=2, to1_col=3, to2_col=3, importance_column=None, chromosome=None, chromsizes_filename=chromsizes_file, max_per_tile=100, tile_size=1024, has_header=True) tsinfo = cdt.get_tileset_info(output_file) stat_output = os.stat(output_file) stat_expected = os.stat(expected_file) assert tsinfo['max_length'] == 1065365426 assert stat_output.st_size == stat_expected.st_size
def test_get_tileset_info(): filename = 'test/sample_data/gene_annotations.short.db' t = cdt.get_tileset_info(filename) assert(t['zoom_step'] == 1) assert(t['max_length'] == 3137161264) assert(t['max_width'] > 4000000000) assert(t['max_width'] < 5000000000)
def test_get_tileset_info(): filename = "test/sample_data/arrowhead_domains_short.txt.multires.db" t = cdt.get_tileset_info(filename) assert t["zoom_step"] == 1 assert t["max_length"] == 3095693981 assert t["max_width"] > 4000000000 assert t["max_width"] < 5000000000
def test_get_tileset_info(): filename = 'test/sample_data/arrowhead_domains_short.txt.multires.db' t = cdt.get_tileset_info(filename) assert (t['zoom_step'] == 1) assert (t['max_length'] == 3095693981) assert (t['max_width'] > 4000000000) assert (t['max_width'] < 5000000000)
def test_clodius_aggregate_bedpe(): input_file = op.join(testdir, "sample_data", "isidro.bedpe") with tempfile.TemporaryDirectory() as tmpdirname: output_file = op.join(tmpdirname, "isidro.bed2ddb") cca._bedpe( input_file, output_file, "b37", importance_column=None, chromosome=None, max_per_tile=100, tile_size=1024, has_header=True, ) """ runner = clt.CliRunner() result = runner.invoke( cca.bedpe, [input_file, '--output-file', output_file, '--importance-column', 'random', '--has-header', '--assembly', 'b37']) # print('output:', result.output, result) assert(result.exit_code == 0) """ cdt.get_2d_tiles(output_file, 0, 0, 0) # print("entries:", entries) cdt.get_tileset_info(output_file) # TODO: Make assertions about result # print('tileset_info', tileset_info) cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2) # TODO: Make assertions about result # print("entries:", entries) cdt.get_tileset_info(output_file)
def test_clodius_aggregate_bedpe(): input_file = op.join(testdir, 'sample_data', 'isidro.bedpe') output_file = '/tmp/isidro.bed2ddb' cca._bedpe(input_file, output_file, 'b37', importance_column=None, chromosome=None, max_per_tile=100, tile_size=1024, has_header=True) """ runner = clt.CliRunner() result = runner.invoke( cca.bedpe, [input_file, '--output-file', output_file, '--importance-column', 'random', '--has-header', '--assembly', 'b37']) #print('output:', result.output, result) assert(result.exit_code == 0) """ entries = cdt.get_2d_tiles(output_file, 0, 0, 0) #print("entries:", entries) tileset_info = cdt.get_tileset_info(output_file) #print('tileset_info', tileset_info) entries = cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2) #print("entries:", entries) tileset_info = cdt.get_tileset_info(output_file)
def test_random_importance(): # check that when aggregating using random importance, all values that # are in a higher resolution tile are also in the lower resolution f = tempfile.NamedTemporaryFile(delete=False) runner = clt.CliRunner() input_file = op.join(testdir, 'sample_data', '25435_PM15-000877_SM-7QK6O.seg') result = runner.invoke( cca.bedfile, [input_file, '--max-per-tile', '2', '--importance-column', 'random', '--assembly', 'b37', '--has-header', '--output-file', f.name]) import traceback print("exc_info:", result.exc_info) a,b,tb = result.exc_info print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) tileset_info = cdt.get_tileset_info(f.name) # print("tileset_info:", tileset_info) rows = cdt.get_tiles(f.name, 0, 0) #print("rows:", rows) rows = list(cdt.get_tiles(f.name, 1, 0).values()) + list(cdt.get_tiles(f.name, 1,1).values()) #print('rows:', rows) found = False for row in cdt.get_tiles(f.name, 6, 31).values(): for rect in row: if rect['xEnd'] == 2195875458: found = True found = False for row in cdt.get_tiles(f.name, 6, 32).values(): for rect in row: if rect['xEnd'] == 2195875458: found = True assert(found == True) pass
def tileset_info(request): """Get information about a tileset Tilesets have information critical to their display such as the maximum number of dimensions and well as their width. This needs to be relayed to the client in order for it to know which tiles to request. Args: request (django.http.HTTPRequest): The request object containing tileset_ids in the 'd' parameter. Return: django.http.JsonResponse: A JSON object containing the tileset meta-information """ queryset = tm.Tileset.objects.all() tileset_uuids = request.GET.getlist("d") tileset_infos = {} chromsizes_error = None if "cs" in request.GET: # we need to call a different server to get the tiles if not "ci" in request.GET.getlist: chromsizes_error = "cs param present without ci" # call the request server and get the chromsizes pass else: if "ci" in request.GET: try: chromsizes = tm.Tileset.objects.get(uuid=request.GET["ci"]) data = tcs.chromsizes_array_to_series( tcs.get_tsv_chromsizes(chromsizes.datafile.path) ) except Exception as ex: pass for tileset_uuid in tileset_uuids: tileset_object = queryset.filter(uuid=tileset_uuid).first() if tileset_uuid == "osm-image": tileset_infos[tileset_uuid] = { "min_x": 0, "max_height": 134217728, "min_y": 0, "max_y": 134217728, "max_zoom": 19, "tile_size": 256, } continue if tileset_object is None: tileset_infos[tileset_uuid] = { "error": "No such tileset with uid: {}".format(tileset_uuid) } continue if tileset_object.private and request.user != tileset_object.owner: # dataset is not public tileset_infos[tileset_uuid] = {"error": "Forbidden"} continue if tileset_object.requiresAuthentication and not request.user.is_authenticated: # dataset is not public tileset_infos[tileset_uuid] = { "error": "This request required authentication" } continue #print(tileset_uuid,"accessibleTilesets" in request.session) if tileset_object.requiresAuthentication and ( ("accessibleTilesets" not in request.session) or (tileset_uuid not in request.session["accessibleTilesets"]) ): # dataset is not accessible for this user tileset_infos[tileset_uuid] = { "error": "You don't have access to this tileset" } continue if tileset_object.filetype == "hitile" or tileset_object.filetype == "hibed": tileset_info = hdft.get_tileset_info( h5py.File(tileset_object.datafile.path, "r") ) tileset_infos[tileset_uuid] = { "min_pos": [int(tileset_info["min_pos"])], "max_pos": [int(tileset_info["max_pos"])], "max_width": 2 ** math.ceil( math.log(tileset_info["max_pos"] - tileset_info["min_pos"]) / math.log(2) ), "tile_size": int(tileset_info["tile_size"]), "max_zoom": int(tileset_info["max_zoom"]), } elif tileset_object.filetype == "bigwig": chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes) # print('tsinfo:', tsinfo) if "chromsizes" in tsinfo: tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == "bigbed": chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes) # print('tsinfo:', tsinfo) if "chromsizes" in tsinfo: tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == "multivec": tileset_infos[tileset_uuid] = hgmu.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "elastic_search": response = urllib.urlopen(tileset_object.datafile + "/tileset_info") tileset_infos[tileset_uuid] = json.loads(response.read()) elif tileset_object.filetype == "beddb": tileset_infos[tileset_uuid] = cdt.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "bed2ddb": tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "cooler": tileset_infos[tileset_uuid] = hgco.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "time-interval-json": tileset_infos[tileset_uuid] = hgti.tileset_info( tileset_object.datafile.path ) elif ( tileset_object.filetype == "2dannodb" or tileset_object.filetype == "imtiles" ): tileset_infos[tileset_uuid] = hgim.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "geodb": tileset_infos[tileset_uuid] = hggo.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "bam": tileset_infos[tileset_uuid] = ctb.tileset_info(tileset_object.datafile.path) tileset_infos[tileset_uuid]["max_tile_width"] = hss.MAX_BAM_TILE_WIDTH else: # Unknown filetype tileset_infos[tileset_uuid] = { "error": "Unknown filetype " + tileset_object.filetype } tileset_infos[tileset_uuid]["name"] = tileset_object.name tileset_infos[tileset_uuid]["datatype"] = tileset_object.datatype tileset_infos[tileset_uuid]["coordSystem"] = tileset_object.coordSystem tileset_infos[tileset_uuid]["coordSystem2"] = tileset_object.coordSystem2 return JsonResponse(tileset_infos)
def tileset_info(request): ''' Get information about a tileset Tilesets have information critical to their display such as the maximum number of dimensions and well as their width. This needs to be relayed to the client in order for it to know which tiles to request. Args: request (django.http.HTTPRequest): The request object containing tileset_ids in the 'd' parameter. Return: django.http.JsonResponse: A JSON object containing the tileset meta-information ''' queryset = tm.Tileset.objects.all() tileset_uuids = request.GET.getlist("d") tileset_infos = {} chromsizes_error = None if 'cs' in request.GET: # we need to call a different server to get the tiles if not 'ci' in request.GET.getlist: chromsizes_error = 'cs param present without ci' # call the request server and get the chromsizes pass else: if 'ci' in request.GET: try: chromsizes = tm.Tileset.objects.get(uuid=request.GET['ci']) data = tcs.chromsizes_array_to_series( tcs.get_tsv_chromsizes(chromsizes.datafile.path)) except Exception as ex: pass for tileset_uuid in tileset_uuids: tileset_object = queryset.filter(uuid=tileset_uuid).first() if tileset_uuid == 'osm-image': tileset_infos[tileset_uuid] = { 'min_x': 0, 'max_height': 134217728, 'min_y': 0, 'max_y': 134217728, 'max_zoom': 19, 'tile_size': 256 } continue if tileset_object is None: tileset_infos[tileset_uuid] = { 'error': 'No such tileset with uid: {}'.format(tileset_uuid) } continue if tileset_object.private and request.user != tileset_object.owner: # dataset is not public tileset_infos[tileset_uuid] = {'error': "Forbidden"} continue if ( tileset_object.filetype == 'hitile' or tileset_object.filetype == 'hibed' ): tileset_info = hdft.get_tileset_info( h5py.File(tileset_object.datafile.path, 'r')) tileset_infos[tileset_uuid] = { "min_pos": [int(tileset_info['min_pos'])], "max_pos": [int(tileset_info['max_pos'])], "max_width": 2 ** math.ceil( math.log( tileset_info['max_pos'] - tileset_info['min_pos'] ) / math.log(2) ), "tile_size": int(tileset_info['tile_size']), "max_zoom": int(tileset_info['max_zoom']) } elif tileset_object.filetype == 'bigwig': chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info( tileset_object.datafile.path, chromsizes ) #print('tsinfo:', tsinfo) if 'chromsizes' in tsinfo: tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == 'bigbed': chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info( tileset_object.datafile.path, chromsizes ) #print('tsinfo:', tsinfo) if 'chromsizes' in tsinfo: tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == 'multivec': tileset_infos[tileset_uuid] = hgmu.tileset_info( tileset_object.datafile.path) elif tileset_object.filetype == "elastic_search": response = urllib.urlopen( tileset_object.datafile + "/tileset_info") tileset_infos[tileset_uuid] = json.loads(response.read()) elif tileset_object.filetype == 'beddb': tileset_infos[tileset_uuid] = cdt.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'bed2ddb': tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'cooler': tileset_infos[tileset_uuid] = hgco.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'time-interval-json': tileset_infos[tileset_uuid] = hgti.tileset_info( tileset_object.datafile.path ) elif ( tileset_object.filetype == '2dannodb' or tileset_object.filetype == 'imtiles' ): tileset_infos[tileset_uuid] = hgim.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'geodb': tileset_infos[tileset_uuid] = hggo.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'bam': tileset_infos[tileset_uuid] = ctb.tileset_info( tileset_object.datafile.path ) tileset_infos[tileset_uuid]['max_tile_width'] = hss.MAX_BAM_TILE_WIDTH else: # Unknown filetype tileset_infos[tileset_uuid] = { 'error': 'Unknown filetype ' + tileset_object.filetype } tileset_infos[tileset_uuid]['name'] = tileset_object.name tileset_infos[tileset_uuid]['datatype'] = tileset_object.datatype tileset_infos[tileset_uuid]['coordSystem'] = tileset_object.coordSystem tileset_infos[tileset_uuid]['coordSystem2'] =\ tileset_object.coordSystem2 return JsonResponse(tileset_infos)
def tileset_info(request): ''' Get information about a tileset Tilesets have information critical to their display such as the maximum number of dimensions and well as their width. This needs to be relayed to the client in order for it to know which tiles to request. Args: request (django.http.HTTPRequest): The request object containing tileset_ids in the 'd' parameter. Return: django.http.JsonResponse: A JSON object containing the tileset meta-information ''' global mats queryset = tm.Tileset.objects.all() tileset_uuids = request.GET.getlist("d") tileset_infos = {} for tileset_uuid in tileset_uuids: tileset_object = queryset.filter(uuid=tileset_uuid).first() if tileset_object is None: return JsonResponse({ 'error': 'No such tileset with uuid: {}'.format(tileset_uuid) }, status=500) if tileset_object.private and request.user != tileset_object.owner: return JsonResponse({ 'error': 'Data set (uuid: {}) is not public'.format( tileset_uuid ) }, status=403) if ( tileset_object.filetype == "hitile" or tileset_object.filetype == 'hibed' ): tileset_info = hdft.get_tileset_info( h5py.File(get_datapath(tileset_object.datafile))) tileset_infos[tileset_uuid] = { "min_pos": [0], "max_pos": [tileset_info['max_pos']], "max_width": 2 ** math.ceil( math.log(tileset_info['max_pos'] - 0) / math.log(2) ), "tile_size": tileset_info['tile_size'], "max_zoom": tileset_info['max_zoom'] } elif tileset_object.filetype == "elastic_search": response = urllib.urlopen( tileset_object.datafile + "/tileset_info") tileset_infos[tileset_uuid] = json.loads(response.read()) elif tileset_object.filetype == 'beddb': tileset_infos[tileset_uuid] = cdt.get_tileset_info( get_datapath(tileset_object.datafile) ) elif tileset_object.filetype == 'bed2ddb': tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info( get_datapath(tileset_object.datafile) ) else: dsetname = get_datapath(tileset_object.datafile) if dsetname not in mats: try: make_mats(dsetname) except Exception: return JsonResponse({ 'error': 'File not found (uuid: {})'.format( tileset_uuid ) }, status=500) tileset_infos[tileset_uuid] = mats[dsetname][1] tileset_infos[tileset_uuid]['name'] = tileset_object.name return JsonResponse(tileset_infos)