def size(self, iterator, recurse=False, verbose=False):
        """ Yields (path, size-in-bytes) tuples for the selected data
        objects and collections.

        Examples:

        >>> session.bulk.size('~/data/out*.txt')
        >>> session.bulk.size('./data', recurse=True)

        Arguments:

        iterator: iterator or str
            Defines which items are subject to the bulk operation.
            Can be an iterator (e.g. using search_manager.find())
            or a string (which will be used to construct a
            search_manager.iglob() iterator). Data sizes will be returned
            for matching data objects and, if used recursively, collections.

        recurse: bool (default: False)
            Whether to use recursion, meaning that the data size of
            matching collections will be calculated as the sum of
            their data objects and subcollection sizes.

        verbose: bool (default: False)
            Whether to print more output.
        """
        if isinstance(iterator, str):
            iterator = self.session.search.iglob(iterator)

        for item in iterator:
            path = self.session.path.get_absolute_irods_path(item)

            if self.session.collections.exists(path):
                if recurse:
                    new_iterator = self.size(item + '/*',
                                             recurse=True,
                                             verbose=verbose)
                    size = sum([result[1] for result in new_iterator])
                else:
                    self.log('Skipping collection %s (no recursion)' % item,
                             verbose)
                    continue
            else:
                dirname = os.path.dirname(path)
                basename = os.path.basename(path)
                criteria = [
                    Criterion('=', Collection.name, dirname),
                    Criterion('=', DataObject.name, basename)
                ]
                fields = [DataObject.size]
                q = self.session.query(*fields).filter(*criteria)

                results = [result for result in q.get_results()]
                if len(results) > 1:
                    raise MultipleResultsFound('Different replicas of data ' + \
                                        'object %s have different sizes' % path)

                size = results[0][DataObject.size]

            yield (item, size)
Пример #2
0
def get_logical_location_get(filename,
                             match_exact=None,
                             include_trash=None) -> str:
    max_rows = 1000
    offset = 0
    sess = create_session()
    conditions = []
    if match_exact:
        conditions += [DataObject.name == str(filename)]
    else:
        conditions += [
            Criterion('like', DataObject.name, '%' + str(filename) + '%')
        ]
    if not include_trash:
        conditions += [Criterion('not like', Collection.name, '%/trash/%')]
    results = sess.query(DataObject.name,
                            Collection.name).\
        filter(*conditions).\
        offset(offset).\
        limit(max_rows).all()
    sess.cleanup()
    data = {}
    data['irods_filenames'] = []
    for r in results.rows:
        l_path = r.popitem()
        f_name = r.popitem()
        data['irods_filenames'] += {str(l_path[1]) + '/' + str(f_name[1])}

    return jsonify(data)
Пример #3
0
def get_metadata_value_for_collection(session, coll_name, key):

    results = session.query(Collection, CollectionMeta).filter( \
            Criterion('=', Collection.name, coll_name)).filter( \
            Criterion('=', CollectionMeta.name, key))
    for r in results:
        return r[CollectionMeta.value]
    return ''
Пример #4
0
def get_metadata_value(session, coll_name, data_name, key):

    results = session.query(DataObject, DataObjectMeta).filter( \
            Criterion('=', Collection.name, coll_name)).filter( \
            Criterion('=', DataObject.name, data_name)).filter( \
            Criterion('=', DataObjectMeta.name, key))
    for r in results:
        return r[DataObjectMeta.value]
    return ''
Пример #5
0
 def get_flag_dataobj_names_by_user(self, user_id):
     """
     Returns the names of all flag data objects associated with the given user.
     :param user_id: wdk id of user to whom the flags pertain
     :return: list of all flag data object names satisfying the criteria.
     """
     criteria = [
         Criterion("=", Collection.name, paths.FLAGS_PATH),
         Criterion('like', DataObject.name, '%_u' + user_id + '%')
     ]
     return self.get_dataobj_names_by_query(criteria)
Пример #6
0
 def get_event_dataobj_names_created_since(self, start_time):
     """
     Returns the names of all event data objects created since the given start time.
     :param start_time: datetime in sec
     :return: list of all event data object names satisfying the criteria
     """
     criteria = [
         Criterion("=", Collection.name, paths.EVENTS_PATH),
         Criterion(">=", DataObject.create_time, start_time)
     ]
     return self.get_dataobj_names_by_query(criteria)
Пример #7
0
def get_syncable_irods_groups(sess):
    irods_group_names_set = set()
    # filter only rodsgroups,
    query = sess.query(User.name, User.id,
                       User.type).filter(Criterion('=', User.type,
                                                   'rodsgroup'))
    n = 0
    for result in query:
        n = n + 1
        #       if not result[User.name] in unsynced_users:
        irodsGroup = sess.users.get(result[User.name])
        syncAVUs = irodsGroup.metadata.get_all(LDAP_SYNC_AVU)
        if not syncAVUs:
            irods_group_names_set.add(irodsGroup.name)
        elif (len(syncAVUs) == 1) and (syncAVUs[0].value == "true"):
            irods_group_names_set.add(irodsGroup.name)
        elif (len(syncAVUs) == 1) and (syncAVUs[0].value == "false"):
            logger.debug("AVU ldapSync=false found for group: {}".format(
                irodsGroup.name))
            continue
        else:
            logger.error(
                "found unexpected number of AVUs for key ldapSync and group: {} {}"
                .format(irodsGroup.name, len(syncAVUs)))

    logger.debug(
        "iRods groups found: {} (allowed for synchronization: {})".format(
            n, len(irods_group_names_set)))
    return irods_group_names_set
Пример #8
0
def syncable_irods_users(sess):
    irods_user_names_set = set()
    # filter only rodsusers, filter the special users, check wich one are not in the LDAP list
    query = sess.query(User.name, User.id,
                       User.type).filter(Criterion('=', User.type, 'rodsuser'))
    n = 0
    for result in query:
        n = n + 1
        irodsUser = sess.users.get(result[User.name])
        syncAVUs = irodsUser.metadata.get_all(LDAP_SYNC_AVU)
        if not syncAVUs:
            irods_user_names_set.add(result[User.name])
        elif (len(syncAVUs) == 1) and (syncAVUs[0].value == "true"):
            irods_user_names_set.add(result[User.name])
        elif (len(syncAVUs) == 1) and (syncAVUs[0].value == "false"):
            logger.debug("AVU ldapSync=false found for user: {}".format(
                irodsUser.name))
            continue
        else:
            logger.error(
                "found unexpected number of AVUs for key ldapSync and user: {} {}"
                .format(irodsUser.name, len(syncAVUs)))

    logger.debug(
        "iRods users found: {} (allowed for synchronization: {})".format(
            n, len(irods_user_names_set)))
    return irods_user_names_set
Пример #9
0
    def test_query_like(self):
        '''Equivalent to:
        iquest "select RESC_NAME where RESC_NAME like 'dem%'"
        '''

        rows = self.sess.query(Resource).filter(Criterion('like', Resource.name, 'dem%')).get_results()
        self.assertIn('demoResc', [row[Resource.name] for row in rows])
Пример #10
0
def get_data_object_physical_path(coll_match, data_match, resc_match):

    session = session_object()
    NotInTrash = Criterion('not like', Collection.name, '%/trash/%')
    q = session.query(DataObject).filter(NotInTrash).filter(
        Collection.name == coll_match).filter(
            DataObject.name == data_match).filter(Resource.name == resc_match)
    return q.one()[DataObject.path]
Пример #11
0
def search_collection_metadata(q, config={}):
    print("Search::CollectionMeta", q)
    with new_session(config) as session:
        query = session.query(Collection, CollectionMeta) \
            .add_keyword('zone', 'seq')

        for k in q:
            query = query.filter(Criterion('=', CollectionMeta.name, k)) \
                .filter(Criterion('=', CollectionMeta.value, q[k]))
 
        return [{
            "type": iRODSCollection,
            "id": result[Collection.id],
            "name": result[Collection.name]
        } for result in query]

# << search methods
Пример #12
0
def search_data_object_metadata(q, config={}):
    print("Search::DataObjectMeta", q)
    with new_session(config) as session:
        query = session.query(DataObject, Collection.name) \
            .filter(Like(DataObject.path, "/irods-seq-sr%")) \
            .add_keyword('zone', 'seq')

        print("Searching for", q)

        for k in q:
           query = query.filter(Criterion('=', DataObjectMeta.name, k)) \
                .filter(Criterion('=', DataObjectMeta.value, q[k]))

        return [{
            "type": iRODSDataObject,
            "id": result[DataObject.id],
            "name": result[DataObject.name],
            "size": sizeof_fmt(result[DataObject.size]),
            "modified": result[DataObject.modify_time].isoformat(),
            "path": "{}/{}".format(result[Collection.name], result[DataObject.name])
        } for result in query]
Пример #13
0
def do_audit(run_handle):

    run_data_dir_filesystem = "%s/hpc_storage/run_data/%s" % (
        phengs_path_prefix, run_handle)
    machine_fastqs_dir_filesystem = "%s/hpc_storage/machine_fastqs/%s" % (
        phengs_path_prefix, run_handle)

    env_file = os.path.expanduser('~/.irods/irods_environment.json')
    with iRODSSession(irods_env_file=env_file) as session:

        # select COLL_NAME, DATA_NAME where META_DATA_ATTR_NAME = 'filesystem::run_handle' and META_DATA_ATTR_VALUE = <run_handle>
        results = session.query(Collection.name, DataObject).filter( \
                Criterion('=', DataObjectMeta.name, 'filesystem::run_handle')).filter( \
                Criterion('=', DataObjectMeta.value, run_handle))

        for r in results:

            filesystem_path = get_metadata_value(session, r[Collection.name],
                                                 r[DataObject.name],
                                                 'filesystem::path')
            if not os.path.exists(filesystem_path):
                print("%s does not exist" % filesystem_path)
            else:
                print("%s exists" % filesystem_path)
Пример #14
0
    def test_force_unlink(self):
        collection = self.coll_path
        filename = 'test_force_unlink.txt'
        file_path = '{collection}/{filename}'.format(**locals())
        
        # make object
        obj = helpers.make_object(self.sess, file_path)
        
        # force remove object
        obj.unlink(force=True)

        # should be gone
        with self.assertRaises(DataObjectDoesNotExist):
            obj = self.sess.data_objects.get(file_path)

        # make sure it's not in the trash either
        conditions = [DataObject.name == filename, 
                      Criterion('like', Collection.name, "/dev/trash/%%")]
        query = self.sess.query(DataObject.id, DataObject.name, Collection.name).filter(*conditions)
        results = query.all()
        self.assertEqual(len(results), 0)
Пример #15
0
def search_data_object(q, config={}):
    print("Search::DataObject", q)
    with new_session(config) as session:
        query = session.query(DataObject, Collection.name) \
            .filter(Like(DataObject.path, "/irods-seq-sr%")) \
            .filter(Criterion('=', DataObject.name, '%{}%'.format(q['value']))) \
            .add_keyword('zone', 'seq')

        results = [{
            "type": iRODSDataObject,
            "id": result[DataObject.id],
            "name": result[DataObject.name],
            "size": sizeof_fmt(result[DataObject.size]),
            "modified": result[DataObject.modify_time].isoformat(),
            "path": "{}/{}".format(result[Collection.name], result[DataObject.name])
        } for result in query]
        
        return {
            "id": "search-result",
            "name": q['value'],
            "count": len(results),
            "children": results
        }
Пример #16
0
def do_restore(run_handle):

    run_data_dir_filesystem = "%s/hpc_storage/run_data/%s" % (
        phengs_path_prefix, run_handle)
    machine_fastqs_dir_filesystem = "%s/hpc_storage/machine_fastqs/%s" % (
        phengs_path_prefix, run_handle)

    env_file = os.path.expanduser('~/.irods/irods_environment.json')
    with iRODSSession(irods_env_file=env_file) as session:

        # Restore files

        # select COLL_NAME, DATA_NAME where META_DATA_ATTR_NAME = 'filesystem::run_handle' and META_DATA_ATTR_VALUE = <run_handle>
        results = session.query(Collection.name, DataObject).filter( \
                Criterion('=', DataObjectMeta.name, 'filesystem::run_handle')).filter( \
                Criterion('=', DataObjectMeta.value, run_handle))

        for r in results:

            # get filesystem attributes
            filesystem_path = get_metadata_value_for_data_object(
                session, r[Collection.name], r[DataObject.name],
                'filesystem::path')
            atime = get_metadata_value_for_data_object(session,
                                                       r[Collection.name],
                                                       r[DataObject.name],
                                                       'filesystem::atime')
            mtime = get_metadata_value_for_data_object(session,
                                                       r[Collection.name],
                                                       r[DataObject.name],
                                                       'filesystem::mtime')
            owner = get_metadata_value_for_data_object(session,
                                                       r[Collection.name],
                                                       r[DataObject.name],
                                                       'filesystem::owner')
            perms = get_metadata_value_for_data_object(session,
                                                       r[Collection.name],
                                                       r[DataObject.name],
                                                       'filesystem::perms')
            group = get_metadata_value_for_data_object(session,
                                                       r[Collection.name],
                                                       r[DataObject.name],
                                                       'filesystem::group')

            print(r[Collection.name], r[DataObject.name], filesystem_path,
                  atime, mtime, owner, perms, group)
            restore_to_lustre(session, r[Collection.name], r[DataObject.name],
                              filesystem_path, atime, mtime, owner, perms,
                              group)

        # Restore directory metadata

        # select COLL_NAME where META_COLL_ATTR_NAME = 'filesystem::run_handle' and META_COLL_ATTR_VALUE = <run_handle>
        results = session.query(Collection, CollectionMeta).filter( \
                Criterion('=', CollectionMeta.name, 'filesystem::run_handle')).filter( \
                Criterion('=', CollectionMeta.value, run_handle))

        for r in results:

            # get filesystem attributes
            filesystem_path = get_metadata_value_for_collection(
                session, r[Collection.name], 'filesystem::path')
            atime = get_metadata_value_for_collection(session,
                                                      r[Collection.name],
                                                      'filesystem::atime')
            mtime = get_metadata_value_for_collection(session,
                                                      r[Collection.name],
                                                      'filesystem::mtime')
            owner = get_metadata_value_for_collection(session,
                                                      r[Collection.name],
                                                      'filesystem::owner')
            perms = get_metadata_value_for_collection(session,
                                                      r[Collection.name],
                                                      'filesystem::perms')
            group = get_metadata_value_for_collection(session,
                                                      r[Collection.name],
                                                      'filesystem::group')

            print(r[Collection.name], filesystem_path, atime, mtime, owner,
                  perms, group)
            restore_to_lustre(session, r[Collection.name], None,
                              filesystem_path, atime, mtime, owner, perms,
                              group)

    # create the restore_from_archive file
    os.system("touch %s/restore_from_archive" % run_data_dir_filesystem)

    # remove the writte_to_archive file, ignore error if it does not exist
    os.system("rm %s/written_to_archive 2>/dev/null" % run_data_dir_filesystem)
Пример #17
0
    def iglob(self, pattern, debug=False):
        """ Returns an iterator of iRODS collection and data object paths
        which match the given pattern, similar to the glob.iglob builtin.

        .. note::
 
            Currently only '*' is expanded. The other special characters
            '?' and '[]' are not (yet) taken into account.

        Examples:

        >>> session.glob('m*/ch4.xyz')
            ['molecules_database/ch4.xyz']
        >>> session.glob('./*/*')
            ['./molecule_database/a.out', './foo/bar.so']
        >>> session.glob('~/foo/c*.xyz')
            ['~/foo/ch4.xyz', '~/foo/co2.xyz']

        Arguments:

        pattern: str
            The search pattern

        debug: bool (default: False)
            Set to True for debugging info
        """
        self.log('DBG| search.iglob pattern: %s' % pattern, debug)

        if '*' in pattern:
            index = pattern.index('*')
            path_root = os.path.dirname(pattern[:index])
        else:
            path_root = pattern

        path_root = path_root.rstrip('/') if path_root else '.'
        path_root_abs = self.session.path.get_absolute_irods_path(path_root)

        # First, the collections
        pattern_collection = self.session.path.get_absolute_irods_path(pattern)
        pattern_collection = pattern_collection.replace('*', '%')
        self.log('DBG| search.iglob pattern_collection: %s' % \
                 pattern_collection, debug)

        fields = [Collection.name]
        criteria = [
            Criterion('like', Collection.name, pattern_collection),
            Criterion('not like', Collection.name, pattern_collection + '/%')
        ]
        q = self.session.query(*fields).filter(*criteria)

        for result in q.get_results():
            path = result[Collection.name].replace(path_root_abs, path_root, 1)
            yield path

        # Next, the data objects
        pattern_collection = os.path.dirname(pattern_collection)
        pattern_object = os.path.basename(pattern)
        pattern_object = pattern_object.replace('*', '%')
        self.log('DBG| search.iglob pattern_object: %s' % pattern_object,
                 debug)

        fields = [Collection.name, DataObject.name]
        criteria = [
            Criterion('like', Collection.name, pattern_collection),
            Criterion('not like', Collection.name, pattern_collection + '/%'),
            Criterion('like', DataObject.name, pattern_object)
        ]

        q = self.session.query(*fields).filter(*criteria)

        for result in q.get_results():
            path = os.path.join(result[Collection.name],
                                result[DataObject.name])
            path = path.replace(path_root_abs, path_root, 1)
            yield path
Пример #18
0
def do_register(run_handle):

    error_file = '%s_errors.log' % run_handle
    error_file = error_file.replace('/', '_')
    with open(error_file, 'w') as error_file:

        checksum_map = {}

        run_data_dir_filesystem = "%s/hpc_storage/run_data/%s" % (
            phengs_path_prefix, run_handle)
        machine_fastqs_dir_filesystem = "%s/hpc_storage/machine_fastqs/%s" % (
            phengs_path_prefix, run_handle)

        # remove the restore_from_archive and written_to_archive files, ignore error if they do not exist
        os.system("rm %s/restore_from_archive 2>/dev/null" %
                  run_data_dir_filesystem)
        os.system("rm %s/written_to_archive 2>/dev/null" %
                  run_data_dir_filesystem)

        recursively_register_and_checksum(run_data_dir_filesystem,
                                          checksum_map, run_handle, error_file)
        recursively_register_and_checksum(machine_fastqs_dir_filesystem,
                                          checksum_map, run_handle, error_file)

        # open results_ngssample_dirs and register directories in it
        results_file = "%s/results_ngssample_dirs" % run_data_dir_filesystem

        try:
            with open(results_file) as f:
                for line in f:
                    os_path = line.strip()

                    # register
                    recursively_register_and_checksum(os_path, checksum_map,
                                                      run_handle, error_file)

                    # replicate and trim
                    recursively_replicate_and_trim(os_path, run_handle)
        except IOError as e:
            print('WARNING:  No results_ngssample_dirs file found.')
            error_file.write(
                'WARNING:  No results_ngssample_dirs file found.\n')

        # replicate and trim run_data
        recursively_replicate_and_trim(run_data_dir_filesystem, run_handle)

        # replicate and trim fastqs
        recursively_replicate_and_trim(machine_fastqs_dir_filesystem,
                                       run_handle)

        # do post verification

        # wait for all rules to complete
        active_rules = int(
            subprocess.check_output([
                'iquest', '%s',
                "select COUNT(RULE_EXEC_ID) where RULE_EXEC_USER_NAME = 'ngsservicearchive' and RULE_EXEC_NAME like '%{run_handle}%'"
                .format(**locals())
            ]).strip())
        while active_rules > 0:
            print("Waiting for replication jobs to complete.  Job count = %d" %
                  active_rules)
            time.sleep(5)
            active_rules = int(
                subprocess.check_output([
                    'iquest', '%s',
                    "select COUNT(RULE_EXEC_ID) where RULE_EXEC_USER_NAME = 'ngsservicearchive' and RULE_EXEC_NAME like '%{run_handle}%'"
                    .format(**locals())
                ]).strip())

        print("Replication jobs completed...")

        # now compare checksum with those in checksum_map
        print("-------------------")
        print("Validating files...")
        print("-------------------")
        env_file = os.path.expanduser('~/.irods/irods_environment.json')

        validation_status = True

        with iRODSSession(irods_env_file=env_file) as session:

            for file_path in checksum_map:

                # find object in iRODS and get checksum
                # select COLL_NAME, DATA_NAME where META_DATA_ATTR_NAME = 'filesystem::path' and META_DATA_ATTR_VALUE = <file_path>

                found_file = False
                results = session.query(Collection.name, DataObject, Resource.name).filter( \
                        Criterion('=', DataObjectMeta.name, 'filesystem::path')).filter( \
                        Criterion('=', DataObjectMeta.value, file_path)).filter( \
                        Criterion('=', Resource.name, 's3_resc'))

                for result in results:

                    found_file = True

                    stored_checksum = "".join("%02x" % b for b in bytearray(
                        base64.b64decode(result[DataObject.checksum][5:])))

                    if checksum_map[file_path] == stored_checksum:
                        pass
                        #print("Checksum validated for %s" % file_path)
                    else:
                        validation_status = False
                        print(
                            "ERROR:  Checksum validation failed for %s: %s vs %s"
                            % (file_path, checksum_map[file_path],
                               stored_checksum))
                        error_file.write(
                            "ERROR:  Checksum validation failed for %s: %s vs %s\n"
                            % (file_path, checksum_map[file_path],
                               stored_checksum))

                if found_file is False:
                    validation_status = False
                    print("ERROR: File %s was not found in archive..." %
                          file_path)
                    error_file.write(
                        "ERROR: File %s was not found in archive...\n" %
                        file_path)

        # create the written_to_archive file
        os.system("touch %s/written_to_archive" % run_data_dir_filesystem)

        if validation_status is False:
            print(
                "ERROR: Post replication validation failed for at least one file."
            )
            error_file.write(
                "ERROR: Post replication validation failed for at least one file.\n"
            )
            sys.exit(1)
        else:
            print("Post replication validation succeeded.")
Пример #19
0
    def find(self,
             irods_path='.',
             pattern='*',
             use_wholename=False,
             types='d,f',
             mindepth=0,
             maxdepth=-1,
             collection_avu=[],
             object_avu=[],
             debug=False):
        """ Returns a list of iRODS collection and data object paths
        which match the given pattern, similar to the UNIX `find` command.

        Examples:

        >>> session.find('.', pattern='*mol*/*.xyz', types='f',
        >>>              object_avu=('=,kind', 'like,%organic'))
            ['data/molecules/c6h6.xyz', './data/molecules/ch3cooh.xyz']
        >>> session.find('~/data*', pattern='molecules', types='d')
            ['~/data/molecules']

        Arguments:

        irods_path: str (default: '.')
            Glob pattern of the roots of the iRODS collection trees
            in which to search

        pattern: str (default: '*')
            The search pattern

        use_wholename: bool (default: False)
            Whether it is the whole (absolute) path name that has to
            match the pattern, or only the basename of the collection
            or data object.

        types: str (default: 'd,f')
            Comma-separated list of one or more of the following characters
            to select the type of results to include:

            * 'd' for directories (i.e. collections)
            * 'f' for files (i.e. data objects)

        mindepth: int (default: 0)
            Minimal depth with respect to the root collections

        maxdepth: int (default: -1)
            Maximal depth with respect to the root collections

        collection_avu: tuple or list of tuples (default: [])
            One or several attribute[-value[-unit]] patterns to be used
            in filtering collections.

        object_avu: tuple or list of tuples (default: [])
            One or several attribute[-value[-unit]] patterns to be used
            in filtering data objects.

        debug: bool (default: False)
            Set to True for debugging info
        """
        # Process arguments:
        assert mindepth >= 0, 'mindepth argument must be >= 0'
        if isinstance(object_avu, tuple): object_avu = [object_avu]
        if isinstance(collection_avu, tuple): collection_avu = [collection_avu]

        if not use_wholename and '/' in pattern:
            msg = "Pattern %s contains a slash. UNIX file names usually don't, "
            msg += "so this search will probably yield no results. Setting "
            msg += "'wholename=True' may help you find what you're looking for."
            warnings.warn(msg % pattern)

        # Set up the metadata fields and criteria for the queries:
        def parse_avu_component(component):
            if component.count(',') == 0:
                operation, meta_pattern = '=', component
            elif component.count(',') == 1:
                operation, meta_pattern = component.split(',')
            else:
                raise ValueError('Cannot parse AVU component: %s' % component)
            return operation, meta_pattern

        meta_fields = {Collection: [], DataObject: []}
        meta_criteria = {Collection: [], DataObject: []}

        for model, avu_list in zip([Collection, DataObject],
                                   [collection_avu, object_avu]):
            for avu in avu_list:
                if model == Collection:
                    fields = [
                        CollectionMeta.name, CollectionMeta.value,
                        CollectionMeta.units
                    ]
                elif model == DataObject:
                    fields = [
                        DataObjectMeta.name, DataObjectMeta.value,
                        DataObjectMeta.units
                    ]

                for item, field in zip(avu, fields):
                    operation, meta_pattern = parse_avu_component(item)
                    self.log('DBG| AVU criterion: %s %s %s' % \
                             (operation, field, meta_pattern), debug)
                    criterion = Criterion(operation, field, meta_pattern)
                    meta_criteria[model].append(criterion)
                    meta_fields[model].append(field)

        # Loop over the glob-pattern-matching collections and data objects
        for path_root in self.iglob(irods_path, debug=debug):
            self.log('DBG| search.find path_root: %s' % path_root, debug)
            path_root_abs = self.session.path.get_absolute_irods_path(
                path_root)

            if not self.session.collections.exists(path_root_abs):
                if 'f' in types.split(','):
                    yield path_root
                continue

            # Walk the collection trees
            iterators = [
                self.walk(path_root,
                          mindepth=mindepth,
                          maxdepth=maxdepth,
                          return_objects=True,
                          debug=debug)
            ]
            if mindepth == 0:
                # Also include the root collection,
                # which is not covered by self.walk
                collection = self.session.collections.get(path_root_abs)
                iterators.insert(0, [(collection, [collection], [])])

            iterator = itertools.chain(*iterators)

            for (collection, subcollections, data_objects) in iterator:
                self.log('DBG| search.find collection: %s' % collection.path,
                         debug)
                # Now we are left with collections and data objects
                # which match the depths and the given 'irods_path'
                # glob pattern, and we just need to further filter
                # on the (whole)name pattern and the AVUs.

                # Things to keep in mind:
                # * Collection: 'name' attribute refers to full path
                #               'path' attribute non-existent
                # * DataObject: 'name' attribute refers to basename
                #               'path' attribute non-existent
                # * iRODSCollection and iRODSDataObject:
                #               'name' refers to basename,
                #               'path' referse to full path

                for t, items in zip(['d', 'f'],
                                    [subcollections, data_objects]):
                    if t not in types.split(','):
                        continue

                    for item in items:
                        name = item.path if use_wholename else item.name
                        if not fnmatch.fnmatch(name, pattern):
                            continue

                        if t == 'd':
                            q = self.session.query(Collection.name,
                                                   *meta_fields[Collection])
                            criterion = Criterion('=', Collection.name,
                                                  item.path)
                            q = q.filter(criterion, *meta_criteria[Collection])

                        elif t == 'f':
                            q = self.session.query(Collection.name,
                                                   DataObject.name,
                                                   *meta_fields[DataObject])
                            criteria = [
                                Criterion('=', Collection.name,
                                          collection.path),
                                Criterion('=', DataObject.name, item.name)
                            ]
                            q = q.filter(*criteria, *meta_criteria[DataObject])

                        results = [result for result in q.get_results()]
                        assert len(results) in [0, 1], results

                        if len(results) == 1:
                            path = item.path.replace(path_root_abs,
                                                     path_root.rstrip('/'), 1)
                            yield path