def get_dir_meta(worker_name, path, cliargs, reindex_dict, statsembeded=False): """This is the get directory meta data function. It gets directory metadata and returns dir meta dict. It checks if meta data is in Redis and compares times mtime and ctime on disk compared to Redis and if same returns sametimes string. """ try: if statsembeded: metadata = path[1] dirpath = path[0] # get directory meta embeded in path mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime = metadata else: dirpath = path # get directory meta using lstat mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime = os.lstat(dirpath) # convert times to utc for es mtime_utc = datetime.utcfromtimestamp(mtime).isoformat() atime_utc = datetime.utcfromtimestamp(atime).isoformat() ctime_utc = datetime.utcfromtimestamp(ctime).isoformat() if cliargs['index2']: # check if directory times cached in Redis redis_dirtime = redis_conn.get(base64.encodestring(dirpath.encode('utf-8', errors='ignore'))) if redis_dirtime: cached_times = float(redis_dirtime.decode('utf-8')) # check if cached times are the same as on disk current_times = float(mtime + ctime) if cached_times == current_times: return "sametimes" # get time now in utc indextime_utc = datetime.utcnow().isoformat() # get owner and group names owner, group = get_owner_group_names(uid, gid) filename = os.path.basename(dirpath) parentdir = os.path.abspath(os.path.join(dirpath, os.pardir)) dirmeta_dict = { "filename": filename, "path_parent": parentdir, "filesize": 0, "items": 1, # 1 for itself "items_files": 0, "items_subdirs": 0, "last_modified": mtime_utc, "last_access": atime_utc, "last_change": ctime_utc, "hardlinks": nlink, "inode": str(ino), "owner": owner, "group": group, "tag": "", "tag_custom": "", "crawl_time": 0, "change_percent_filesize": "", "change_percent_items": "", "change_percent_items_files": "", "change_percent_items_subdirs": "", "costpergb": "", "worker_name": worker_name, "indexing_date": indextime_utc, "_type": "directory" } # check plugins for adding extra meta data to dirmeta_dict for plugin in plugins: try: # check if plugin is for directory doc mappings = {'mappings': {'directory': {'properties': {}}}} plugin.add_mappings(mappings) dirmeta_dict.update(plugin.add_meta(dirpath)) except KeyError: pass # add any autotags to dirmeta_dict if cliargs['autotag'] and len(config['autotag_dirs']) > 0: dirmeta_dict = auto_tag(dirmeta_dict, 'directory', mtime, atime, ctime) # search for and copy over any existing tags from reindex_dict for sublist in reindex_dict['directory']: if sublist[0] == dirpath: dirmeta_dict['tag'] = sublist[1] dirmeta_dict['tag_custom'] = sublist[2] break except (OSError, IOError) as e: warnings.warn("OS/IO Exception caused by: %s" % e) return False except Exception as e: warnings.warn("Exception caused by: %s" % e) return False # cache directory times in Redis, encode path (key) using base64 if config['redis_cachedirtimes'] == 'true': redis_conn.set(base64.encodestring(dirpath.encode('utf-8', errors='ignore')), mtime + ctime, ex=config['redis_dirtimesttl']) return dirmeta_dict
def get_dir_meta(worker_name, path, cliargs, reindex_dict, statsembeded=False): """This is the get directory meta data function. It gets directory metadata and returns dir meta dict. It checks if meta data is in Redis and compares times mtime and ctime on disk compared to Redis and if same returns sametimes string. """ try: if statsembeded: metadata = path[1] dirpath = path[0] # get directory meta embeded in path mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime = metadata else: dirpath = path # get directory meta using lstat mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime = os.lstat( dirpath) # convert times to utc for es mtime_utc = datetime.utcfromtimestamp(mtime).isoformat() atime_utc = datetime.utcfromtimestamp(atime).isoformat() ctime_utc = datetime.utcfromtimestamp(ctime).isoformat() if cliargs['index2']: # check if directory times cached in Redis redis_dirtime = redis_conn.get( base64.encodestring(dirpath.encode('utf-8', errors='ignore'))) if redis_dirtime: cached_times = float(redis_dirtime.decode('utf-8')) # check if cached times are the same as on disk current_times = float(mtime + ctime) if cached_times == current_times: return "sametimes" # get time now in utc indextime_utc = datetime.utcnow().isoformat() # try to get owner user name # first check cache if uid in uids: owner = owners[uid] # not in cache else: try: owner = pwd.getpwuid(uid).pw_name.split('\\') # remove domain before owner if len(owner) == 2: owner = owner[1] else: owner = owner[0] # if we can't find the owner's user name, use the uid number except KeyError: owner = uid # store it in cache if not uid in uids: uids.append(uid) owners[uid] = owner # try to get group name # first check cache if gid in gids: group = groups[gid] # not in cache else: try: group = grp.getgrgid(gid).gr_name.split('\\') # remove domain before group if len(group) == 2: group = group[1] else: group = group[0] # if we can't find the group name, use the gid number except KeyError: group = gid # store in cache if not gid in gids: gids.append(gid) groups[gid] = group filename = os.path.basename(dirpath) parentdir = os.path.abspath(os.path.join(dirpath, os.pardir)) dirmeta_dict = { "filename": filename, "path_parent": parentdir, "filesize": 0, "items": 1, # 1 for itself "items_files": 0, "items_subdirs": 0, "last_modified": mtime_utc, "last_access": atime_utc, "last_change": ctime_utc, "hardlinks": nlink, "inode": ino, "owner": owner, "group": group, "tag": "", "tag_custom": "", "crawl_time": 0, "change_percent_filesize": "", "change_percent_items": "", "change_percent_items_files": "", "change_percent_items_subdirs": "", "worker_name": worker_name, "indexing_date": indextime_utc, "_type": "directory" } # check plugins for adding extra meta data to dirmeta_dict for plugin in plugins: try: # check if plugin is for directory doc mappings = {'mappings': {'directory': {'properties': {}}}} plugin.add_mappings(mappings) dirmeta_dict.update(plugin.add_meta(dirpath)) except KeyError: pass # add any autotags to dirmeta_dict if cliargs['autotag'] and len(config['autotag_dirs']) > 0: auto_tag(dirmeta_dict, 'directory', mtime, atime, ctime) # search for and copy over any existing tags from reindex_dict for sublist in reindex_dict['directory']: if sublist[0] == dirpath: dirmeta_dict['tag'] = sublist[1] dirmeta_dict['tag_custom'] = sublist[2] break except (IOError, OSError) as e: return False except FileNotFoundError as e: return False # cache directory times in Redis, encode path (key) using base64 if config['redis_cachedirtimes'] == 'True' or config[ 'redis_cachedirtimes'] == 'true': redis_conn.set(base64.encodestring( dirpath.encode('utf-8', errors='ignore')), mtime + ctime, ex=config['redis_dirtimesttl']) return dirmeta_dict