Пример #1
0
def test_get_containingds_from_agginfo():
    eq_(None, _get_containingds_from_agginfo({}, 'any'))
    # direct hit returns itself
    eq_('match',
        _get_containingds_from_agginfo({
            'match': {},
            'other': {}
        }, 'match'))
    # matches
    down = op.join('match', 'down')
    eq_('match', _get_containingds_from_agginfo({'match': {}}, down))
    # closest match
    down_under = op.join(down, 'under')
    eq_(down,
        _get_containingds_from_agginfo({
            'match': {},
            down: {}
        }, down_under))
    # absolute works too
    eq_(
        op.abspath(down),
        _get_containingds_from_agginfo(
            {
                op.abspath('match'): {},
                op.abspath(down): {}
            }, op.abspath(down_under)))
    # will not tollerate mix'n'match
    assert_raises(ValueError, _get_containingds_from_agginfo, {'match': {}},
                  op.abspath(down))
    assert_raises(ValueError, _get_containingds_from_agginfo,
                  {op.abspath('match'): {}}, down)
Пример #2
0
def test_get_containingds_from_agginfo():
    eq_(None, _get_containingds_from_agginfo({}, 'any'))
    # direct hit returns itself
    eq_('match', _get_containingds_from_agginfo({'match': {}, 'other': {}}, 'match'))
    # matches
    down = op.join('match', 'down')
    eq_('match', _get_containingds_from_agginfo({'match': {}}, down))
    # closest match
    down_under = op.join(down, 'under')
    eq_(down, _get_containingds_from_agginfo({'match': {}, down: {}}, down_under))
    # absolute works too
    eq_(op.abspath(down),
        _get_containingds_from_agginfo(
            {op.abspath('match'): {}, op.abspath(down): {}}, op.abspath(down_under)))
    # will not tollerate mix'n'match
    assert_raises(ValueError, _get_containingds_from_agginfo, {'match': {}}, op.abspath(down))
    assert_raises(ValueError, _get_containingds_from_agginfo, {op.abspath('match'): {}}, down)
Пример #3
0
def _get_dsinfo_from_aggmetadata(ds_path, path, recursive, db):
    """Grab info on aggregated metadata for a path from a given dataset.

    The actual info is stored in a `db` dict under the absolute path
    of the dataset that contains the query path, plus any subdataset
    in case of recursion (with their own DB entries).

    Parameters
    ----------
    ds : Dataset
      source dataset
    path : str
      absolute path for which to obtain metadata
    recursive : bool

    Returns
    -------
    str or list
      A string/tuple is an error message, a list contains all absolute paths
      for all datasets on which info was put into the DB.
    """
    # TODO cache these
    agginfos = load_ds_aggregate_db(Dataset(ds_path), abspath=True)

    seed_ds = _get_containingds_from_agginfo(agginfos, path)
    if seed_ds is None:
        # nothing found
        # this will be the message in the result for the query path
        # and could be a tuple
        return (
            "No matching aggregated metadata for path '%s' in Dataset at %s",
            op.relpath(path, start=ds_path), ds_path)

    # easy peasy
    db[seed_ds] = agginfos[seed_ds]
    hits = [seed_ds]

    if not recursive:
        return hits

    # a little more complicated: we need to loop over all subdataset
    # records and pick the ones that are underneath the seed
    for agginfo_path in agginfos:
        if path_is_subpath(agginfo_path, seed_ds):
            db[agginfo_path] = agginfos[agginfo_path]
            hits.append(agginfo_path)
    # TODO we must keep the info on these recursively discovered datasets
    # somewhere, because we cannot rediscover them on the filesystem
    # when updating the datasets later on
    return hits
Пример #4
0
def _get_dsinfo_from_aggmetadata(ds_path, path, recursive, db):
    """Grab info on aggregated metadata for a path from a given dataset.

    The actual info is stored in a `db` dict under the absolute path
    of the dataset that contains the query path, plus any subdataset
    in case of recursion (with their own DB entries).

    Parameters
    ----------
    ds : Dataset
      source dataset
    path : str
      absolute path for which to obtain metadata
    recursive : bool

    Returns
    -------
    str or list
      A string/tuple is an error message, a list contains all absolute paths
      for all datasets on which info was put into the DB.
    """
    # TODO cache these
    agginfos = load_ds_aggregate_db(Dataset(ds_path), abspath=True)

    seed_ds = _get_containingds_from_agginfo(agginfos, path)
    if seed_ds is None:
        # nothing found
        # this will be the message in the result for the query path
        # and could be a tuple
        return ("No matching aggregated metadata for path '%s' in Dataset at %s",
                op.relpath(path, start=ds_path), ds_path)

    # easy peasy
    db[seed_ds] = agginfos[seed_ds]
    hits = [seed_ds]

    if not recursive:
        return hits

    # a little more complicated: we need to loop over all subdataset
    # records and pick the ones that are underneath the seed
    for agginfo_path in agginfos:
        if path_is_subpath(agginfo_path, seed_ds):
            db[agginfo_path] = agginfos[agginfo_path]
            hits.append(agginfo_path)
    # TODO we must keep the info on these recursively discovered datasets
    # somewhere, because we cannot rediscover them on the filesystem
    # when updating the datasets later on
    return hits
Пример #5
0
def _get_dsinfo_from_aggmetadata(ds_path, path, recursive, db):
    """Grab info on aggregated metadata for a path from a given dataset.

    The actual info is stored in a `db` dict under the absolute path
    of the dataset that contains the query path, plus any subdataset
    in case of recursion (with their own DB entries).

    Parameters
    ----------
    ds : Dataset
      source dataset
    path : str
      absolute path for which to obtain metadata
    recursive : bool

    Returns
    -------
    str or list
      A string is an error message, a list contains all absolute paths for
      all datasets on which info was put into the DB.
    """
    info_fpath = opj(ds_path, agginfo_relpath)
    info_basepath = dirname(info_fpath)
    # TODO cache these
    agginfos = _load_json_object(info_fpath)

    def _ensure_abs_obj_location(rec):
        # object location in the DB must be absolute so we can copy easily
        # to all relevant datasets
        for key in location_keys:
            if key in rec and not isabs(rec[key]):
                rec[key] = opj(info_basepath, rec[key])
        return rec

    rpath = relpath(path, start=ds_path)
    seed_ds = _get_containingds_from_agginfo(agginfos, rpath)
    if seed_ds is None:
        # nothing found
        # this will be the message in the result for the query path
        # and could be a tuple
        return ("No matching aggregated metadata in Dataset at %s", ds_path)

    # easy peasy
    seed_abs = opj(ds_path, seed_ds)
    db[seed_abs] = _ensure_abs_obj_location(agginfos[seed_ds])
    hits = [seed_abs]

    if not recursive:
        return hits

    # a little more complicated: we need to loop over all subdataset
    # records an pick the ones that are underneath the seed
    for agginfo_path in agginfos:
        if agginfo_path.startswith(_with_sep(seed_ds)):
            absp = opj(ds_path, agginfo_path)
            db[absp] = _ensure_abs_obj_location(agginfos[agginfo_path])
            hits.append(absp)
    # TODO we must keep the info on these recursively discovered datasets
    # somewhere, because we cannot rediscover them on the filesystem
    # when updating the datasets later on
    return hits