Пример #1
0
def test_random_importance():
    # check that when aggregating using random importance, all values that
    # are in a higher resolution tile are also in the lower resolution
    f = tempfile.NamedTemporaryFile(delete=False)

    runner = clt.CliRunner()
    input_file = op.join(testdir, 'sample_data',
                         '25435_PM15-000877_SM-7QK6O.seg')

    result = runner.invoke(
        cca.bedfile,
        [input_file,
         '--max-per-tile', '2', '--importance-column', 'random',
         '--assembly', 'b37', '--has-header', '--output-file', f.name])

    # import traceback
    a, b, tb = result.exc_info
    '''
    print("exc_info:", result.exc_info)
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a,b)
    '''

    cdt.get_tileset_info(f.name)
    # print("tileset_info:", tileset_info)
    # TODO: Make assertions about result

    cdt.get_tiles(f.name, 0, 0)
    # print("rows:", rows)
    # TODO: Make assertions about result

    list(cdt.get_tiles(f.name, 1, 0).values()) + \
        list(cdt.get_tiles(f.name, 1, 1).values())
    # print('rows:', rows)
    # TODO: Make assertions about result

    # check to make sure that tiles in the higher zoom levels
    # are all present in lower zoom levels
    found = {}
    for row in cdt.get_tiles(f.name, 5, 15).values():
        for rect in row:
            found[rect['xStart']] = False

    for row in cdt.get_tiles(f.name, 6, 30).values():
        for rect in row:
            if rect['xStart'] in found:
                found[rect['xStart']] = True

    for row in cdt.get_tiles(f.name, 6, 31).values():
        for rect in row:
            if rect['xStart'] in found:
                found[rect['xStart']] = True

    for key, value in found.items():
        assert(value)

    pass
Пример #2
0
def test_clodius_aggregate_bedpe2():
    '''Use galGal6 chromsizes file'''
    input_file = op.join(testdir, 'sample_data', 'galGal6.bed')
    chromsizes_file = op.join(testdir, 'sample_data', 'galGal6.chrom.sizes')
    expected_file = op.join(testdir, 'sample_data', 'galGal6.bed.multires.db')

    with tempfile.TemporaryDirectory() as tmpdirname:
        output_file = op.join(tmpdirname, 'blah.bed2ddb')
        # the test is here to ensure that this doesn't raise an error
        cca._bedpe(input_file, output_file, None,
                   chr1_col=1, chr2_col=1,
                   from1_col=2, from2_col=2,
                   to1_col=3, to2_col=3,
                   importance_column=None,
                   chromosome=None,
                   chromsizes_filename=chromsizes_file,
                   max_per_tile=100,
                   tile_size=1024,
                   has_header=True)

        tsinfo = cdt.get_tileset_info(output_file)

        stat_output = os.stat(output_file)
        stat_expected = os.stat(expected_file)

        assert tsinfo['max_length'] == 1065365426
        assert stat_output.st_size == stat_expected.st_size
Пример #3
0
def test_get_tileset_info():
    filename = 'test/sample_data/gene_annotations.short.db'
    t = cdt.get_tileset_info(filename)

    assert(t['zoom_step'] == 1)
    assert(t['max_length'] == 3137161264)
    assert(t['max_width'] > 4000000000)
    assert(t['max_width'] < 5000000000)
Пример #4
0
def test_get_tileset_info():
    filename = "test/sample_data/arrowhead_domains_short.txt.multires.db"
    t = cdt.get_tileset_info(filename)

    assert t["zoom_step"] == 1
    assert t["max_length"] == 3095693981
    assert t["max_width"] > 4000000000
    assert t["max_width"] < 5000000000
Пример #5
0
def test_get_tileset_info():
    filename = 'test/sample_data/arrowhead_domains_short.txt.multires.db'
    t = cdt.get_tileset_info(filename)

    assert (t['zoom_step'] == 1)
    assert (t['max_length'] == 3095693981)
    assert (t['max_width'] > 4000000000)
    assert (t['max_width'] < 5000000000)
Пример #6
0
def test_clodius_aggregate_bedpe():
    input_file = op.join(testdir, "sample_data", "isidro.bedpe")

    with tempfile.TemporaryDirectory() as tmpdirname:
        output_file = op.join(tmpdirname, "isidro.bed2ddb")

        cca._bedpe(
            input_file,
            output_file,
            "b37",
            importance_column=None,
            chromosome=None,
            max_per_tile=100,
            tile_size=1024,
            has_header=True,
        )
        """
        runner = clt.CliRunner()
        result = runner.invoke(
                cca.bedpe,
                [input_file,
                '--output-file', output_file,
                '--importance-column', 'random',
                '--has-header',
                '--assembly', 'b37'])

        # print('output:', result.output, result)
        assert(result.exit_code == 0)
        """

        cdt.get_2d_tiles(output_file, 0, 0, 0)
        # print("entries:", entries)

        cdt.get_tileset_info(output_file)
        # TODO: Make assertions about result
        # print('tileset_info', tileset_info)

        cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2)
        # TODO: Make assertions about result
        # print("entries:", entries)

        cdt.get_tileset_info(output_file)
Пример #7
0
def test_clodius_aggregate_bedpe():
    input_file = op.join(testdir, 'sample_data', 'isidro.bedpe')
    output_file = '/tmp/isidro.bed2ddb'

    cca._bedpe(input_file,
               output_file,
               'b37',
               importance_column=None,
               chromosome=None,
               max_per_tile=100,
               tile_size=1024,
               has_header=True)
    """
    runner = clt.CliRunner()
    result = runner.invoke(
            cca.bedpe,
            [input_file,
            '--output-file', output_file,
            '--importance-column', 'random',
            '--has-header', 
            '--assembly', 'b37'])

    #print('output:', result.output, result)
    assert(result.exit_code == 0)
    """

    entries = cdt.get_2d_tiles(output_file, 0, 0, 0)
    #print("entries:", entries)

    tileset_info = cdt.get_tileset_info(output_file)
    #print('tileset_info', tileset_info)

    entries = cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2)
    #print("entries:", entries)

    tileset_info = cdt.get_tileset_info(output_file)
Пример #8
0
def test_random_importance():
    # check that when aggregating using random importance, all values that
    # are in a higher resolution tile are also in the lower resolution
    f = tempfile.NamedTemporaryFile(delete=False)

    runner = clt.CliRunner()
    input_file = op.join(testdir, 'sample_data', '25435_PM15-000877_SM-7QK6O.seg')

    result = runner.invoke(
            cca.bedfile,
            [input_file,
                '--max-per-tile', '2', '--importance-column', 'random',
                '--assembly', 'b37', '--has-header', '--output-file', f.name])

    import traceback
    print("exc_info:", result.exc_info)
    a,b,tb = result.exc_info
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a,b)

    tileset_info = cdt.get_tileset_info(f.name)
    # print("tileset_info:", tileset_info)

    rows = cdt.get_tiles(f.name, 0, 0)
    #print("rows:", rows)

    rows = list(cdt.get_tiles(f.name, 1, 0).values()) + list(cdt.get_tiles(f.name, 1,1).values())
    #print('rows:', rows)

    found = False
    for row in cdt.get_tiles(f.name, 6, 31).values():
        for rect in row:
            if rect['xEnd'] == 2195875458:
                found = True


    found = False
    for row in cdt.get_tiles(f.name, 6, 32).values():
        for rect in row:
            if rect['xEnd'] == 2195875458:
                found = True

    assert(found == True)

    pass
Пример #9
0
def tileset_info(request):
    """Get information about a tileset

    Tilesets have information critical to their display
    such as the maximum number of dimensions and well as
    their width. This needs to be relayed to the client
    in order for it to know which tiles to request.

    Args:
        request (django.http.HTTPRequest): The request object
            containing tileset_ids in the 'd' parameter.
    Return:
        django.http.JsonResponse: A JSON object containing
            the tileset meta-information
    """
    queryset = tm.Tileset.objects.all()
    tileset_uuids = request.GET.getlist("d")
    tileset_infos = {}

    chromsizes_error = None

    if "cs" in request.GET:
        # we need to call a different server to get the tiles
        if not "ci" in request.GET.getlist:
            chromsizes_error = "cs param present without ci"

        # call the request server and get the chromsizes
        pass
    else:
        if "ci" in request.GET:
            try:
                chromsizes = tm.Tileset.objects.get(uuid=request.GET["ci"])
                data = tcs.chromsizes_array_to_series(
                    tcs.get_tsv_chromsizes(chromsizes.datafile.path)
                )
            except Exception as ex:
                pass

    for tileset_uuid in tileset_uuids:
        tileset_object = queryset.filter(uuid=tileset_uuid).first()

        if tileset_uuid == "osm-image":
            tileset_infos[tileset_uuid] = {
                "min_x": 0,
                "max_height": 134217728,
                "min_y": 0,
                "max_y": 134217728,
                "max_zoom": 19,
                "tile_size": 256,
            }
            continue

        if tileset_object is None:
            tileset_infos[tileset_uuid] = {
                "error": "No such tileset with uid: {}".format(tileset_uuid)
            }
            continue

        if tileset_object.private and request.user != tileset_object.owner:
            # dataset is not public
            tileset_infos[tileset_uuid] = {"error": "Forbidden"}
            continue

        if tileset_object.requiresAuthentication and not request.user.is_authenticated:
            # dataset is not public
            tileset_infos[tileset_uuid] = {
                "error": "This request required authentication"
            }
            continue
        
        #print(tileset_uuid,"accessibleTilesets" in request.session)
        if tileset_object.requiresAuthentication and (
            ("accessibleTilesets" not in request.session)
            or (tileset_uuid not in request.session["accessibleTilesets"])
        ):
            # dataset is not accessible for this user
            tileset_infos[tileset_uuid] = {
                "error": "You don't have access to this tileset"
            }
            continue

        if tileset_object.filetype == "hitile" or tileset_object.filetype == "hibed":
            tileset_info = hdft.get_tileset_info(
                h5py.File(tileset_object.datafile.path, "r")
            )
            tileset_infos[tileset_uuid] = {
                "min_pos": [int(tileset_info["min_pos"])],
                "max_pos": [int(tileset_info["max_pos"])],
                "max_width": 2
                ** math.ceil(
                    math.log(tileset_info["max_pos"] - tileset_info["min_pos"])
                    / math.log(2)
                ),
                "tile_size": int(tileset_info["tile_size"]),
                "max_zoom": int(tileset_info["max_zoom"]),
            }
        elif tileset_object.filetype == "bigwig":
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes)
            # print('tsinfo:', tsinfo)
            if "chromsizes" in tsinfo:
                tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == "bigbed":
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes)
            # print('tsinfo:', tsinfo)
            if "chromsizes" in tsinfo:
                tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == "multivec":
            tileset_infos[tileset_uuid] = hgmu.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "elastic_search":
            response = urllib.urlopen(tileset_object.datafile + "/tileset_info")
            tileset_infos[tileset_uuid] = json.loads(response.read())
        elif tileset_object.filetype == "beddb":
            tileset_infos[tileset_uuid] = cdt.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "bed2ddb":
            tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "cooler":
            tileset_infos[tileset_uuid] = hgco.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "time-interval-json":
            tileset_infos[tileset_uuid] = hgti.tileset_info(
                tileset_object.datafile.path
            )
        elif (
            tileset_object.filetype == "2dannodb"
            or tileset_object.filetype == "imtiles"
        ):
            tileset_infos[tileset_uuid] = hgim.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "geodb":
            tileset_infos[tileset_uuid] = hggo.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "bam":
            tileset_infos[tileset_uuid] = ctb.tileset_info(tileset_object.datafile.path)
            tileset_infos[tileset_uuid]["max_tile_width"] = hss.MAX_BAM_TILE_WIDTH
        else:
            # Unknown filetype
            tileset_infos[tileset_uuid] = {
                "error": "Unknown filetype " + tileset_object.filetype
            }

        tileset_infos[tileset_uuid]["name"] = tileset_object.name
        tileset_infos[tileset_uuid]["datatype"] = tileset_object.datatype
        tileset_infos[tileset_uuid]["coordSystem"] = tileset_object.coordSystem
        tileset_infos[tileset_uuid]["coordSystem2"] = tileset_object.coordSystem2

    return JsonResponse(tileset_infos)
Пример #10
0
def tileset_info(request):
    ''' Get information about a tileset

    Tilesets have information critical to their display
    such as the maximum number of dimensions and well as
    their width. This needs to be relayed to the client
    in order for it to know which tiles to request.

    Args:
        request (django.http.HTTPRequest): The request object
            containing tileset_ids in the 'd' parameter.
    Return:
        django.http.JsonResponse: A JSON object containing
            the tileset meta-information
    '''
    queryset = tm.Tileset.objects.all()
    tileset_uuids = request.GET.getlist("d")
    tileset_infos = {}

    chromsizes_error = None

    if 'cs' in request.GET:
        # we need to call a different server to get the tiles
        if not 'ci' in request.GET.getlist:
            chromsizes_error = 'cs param present without ci'

        # call the request server and get the chromsizes
        pass
    else:
        if 'ci' in request.GET:
            try:
                chromsizes = tm.Tileset.objects.get(uuid=request.GET['ci'])
                data = tcs.chromsizes_array_to_series(
                        tcs.get_tsv_chromsizes(chromsizes.datafile.path))
            except Exception as ex:
                pass

    for tileset_uuid in tileset_uuids:
        tileset_object = queryset.filter(uuid=tileset_uuid).first()

        if tileset_uuid == 'osm-image':
            tileset_infos[tileset_uuid] = {
                'min_x': 0,
                'max_height': 134217728,
                'min_y': 0,
                'max_y': 134217728,
                'max_zoom': 19,
                'tile_size': 256
            }
            continue

        if tileset_object is None:
            tileset_infos[tileset_uuid] = {
                'error': 'No such tileset with uid: {}'.format(tileset_uuid)
            }
            continue

        if tileset_object.private and request.user != tileset_object.owner:
            # dataset is not public
            tileset_infos[tileset_uuid] = {'error': "Forbidden"}
            continue

        if (
            tileset_object.filetype == 'hitile' or
            tileset_object.filetype == 'hibed'
        ):
            tileset_info = hdft.get_tileset_info(
                h5py.File(tileset_object.datafile.path, 'r'))
            tileset_infos[tileset_uuid] = {
                "min_pos": [int(tileset_info['min_pos'])],
                "max_pos": [int(tileset_info['max_pos'])],
                "max_width": 2 ** math.ceil(
                    math.log(
                        tileset_info['max_pos'] - tileset_info['min_pos']
                    ) / math.log(2)
                ),
                "tile_size": int(tileset_info['tile_size']),
                "max_zoom": int(tileset_info['max_zoom'])
            }
        elif tileset_object.filetype == 'bigwig':
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(
                    tileset_object.datafile.path,
                    chromsizes
                )
            #print('tsinfo:', tsinfo)
            if 'chromsizes' in tsinfo:
                tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == 'bigbed':
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(
                    tileset_object.datafile.path,
                    chromsizes
                )
            #print('tsinfo:', tsinfo)
            if 'chromsizes' in tsinfo:
                tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == 'multivec':
            tileset_infos[tileset_uuid] = hgmu.tileset_info(
                    tileset_object.datafile.path)
        elif tileset_object.filetype == "elastic_search":
            response = urllib.urlopen(
                tileset_object.datafile + "/tileset_info")
            tileset_infos[tileset_uuid] = json.loads(response.read())
        elif tileset_object.filetype == 'beddb':
            tileset_infos[tileset_uuid] = cdt.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'bed2ddb':
            tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'cooler':
            tileset_infos[tileset_uuid] = hgco.tileset_info(
                    tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'time-interval-json':
            tileset_infos[tileset_uuid] = hgti.tileset_info(
                    tileset_object.datafile.path
            )
        elif (
            tileset_object.filetype == '2dannodb' or
            tileset_object.filetype == 'imtiles'
        ):
            tileset_infos[tileset_uuid] = hgim.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'geodb':
            tileset_infos[tileset_uuid] = hggo.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'bam':
            tileset_infos[tileset_uuid] = ctb.tileset_info(
                tileset_object.datafile.path
            )
            tileset_infos[tileset_uuid]['max_tile_width'] = hss.MAX_BAM_TILE_WIDTH
        else:
            # Unknown filetype
            tileset_infos[tileset_uuid] = {
                'error': 'Unknown filetype ' + tileset_object.filetype
            }

        tileset_infos[tileset_uuid]['name'] = tileset_object.name
        tileset_infos[tileset_uuid]['datatype'] = tileset_object.datatype
        tileset_infos[tileset_uuid]['coordSystem'] = tileset_object.coordSystem
        tileset_infos[tileset_uuid]['coordSystem2'] =\
            tileset_object.coordSystem2

    return JsonResponse(tileset_infos)
Пример #11
0
def tileset_info(request):
    ''' Get information about a tileset

    Tilesets have information critical to their display
    such as the maximum number of dimensions and well as
    their width. This needs to be relayed to the client
    in order for it to know which tiles to request.

    Args:
        request (django.http.HTTPRequest): The request object
            containing tileset_ids in the 'd' parameter.
    Return:
        django.http.JsonResponse: A JSON object containing
            the tileset meta-information
    '''
    global mats
    queryset = tm.Tileset.objects.all()
    tileset_uuids = request.GET.getlist("d")
    tileset_infos = {}
    for tileset_uuid in tileset_uuids:
        tileset_object = queryset.filter(uuid=tileset_uuid).first()

        if tileset_object is None:
            return JsonResponse({
                'error': 'No such tileset with uuid: {}'.format(tileset_uuid)
            }, status=500)

        if tileset_object.private and request.user != tileset_object.owner:
            return JsonResponse({
                'error': 'Data set (uuid: {}) is not public'.format(
                    tileset_uuid
                )
            }, status=403)

        if (
            tileset_object.filetype == "hitile" or
            tileset_object.filetype == 'hibed'
        ):
            tileset_info = hdft.get_tileset_info(
                h5py.File(get_datapath(tileset_object.datafile)))
            tileset_infos[tileset_uuid] = {
                "min_pos": [0],
                "max_pos": [tileset_info['max_pos']],
                "max_width": 2 ** math.ceil(
                    math.log(tileset_info['max_pos'] - 0) / math.log(2)
                ),
                "tile_size": tileset_info['tile_size'],
                "max_zoom": tileset_info['max_zoom']
            }
        elif tileset_object.filetype == "elastic_search":
            response = urllib.urlopen(
                tileset_object.datafile + "/tileset_info")
            tileset_infos[tileset_uuid] = json.loads(response.read())
        elif tileset_object.filetype == 'beddb':
            tileset_infos[tileset_uuid] = cdt.get_tileset_info(
                get_datapath(tileset_object.datafile)
            )
        elif tileset_object.filetype == 'bed2ddb':
            tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info(
                get_datapath(tileset_object.datafile)
            )
        else:
            dsetname = get_datapath(tileset_object.datafile)

            if dsetname not in mats:
                try:
                    make_mats(dsetname)
                except Exception:
                    return JsonResponse({
                        'error': 'File not found (uuid: {})'.format(
                            tileset_uuid
                        )
                    }, status=500)

            tileset_infos[tileset_uuid] = mats[dsetname][1]

        tileset_infos[tileset_uuid]['name'] = tileset_object.name

    return JsonResponse(tileset_infos)