def qumulo_get_file_meta(worker_name, path, cliargs, reindex_dict): filename = path['name'] # check if file is in exluded_files list extension = os.path.splitext(filename)[1][1:].strip().lower() if file_excluded(filename, extension): return None # get file size (bytes) size = int(path['size']) # Skip files smaller than minsize cli flag if size < cliargs['minsize']: return None # check file modified time mtime_utc = path['modification_time'] mtime_unix = time.mktime(time.strptime(mtime_utc, '%Y-%m-%dT%H:%M:%S')) # Convert time in days (mtime cli arg) to seconds time_sec = cliargs['mtime'] * 86400 file_mtime_sec = time.time() - mtime_unix # Only process files modified at least x days ago if file_mtime_sec < time_sec: return None # get change time ctime_utc = path['change_time'] ctime_unix = time.mktime(time.strptime(ctime_utc, '%Y-%m-%dT%H:%M:%S')) # get creation time creation_time_utc = path['creation_time'] # create md5 hash of file using metadata filesize and mtime filestring = str(size) + str(mtime_unix) filehash = hashlib.md5(filestring.encode('utf-8')).hexdigest() # get time indextime_utc = datetime.utcnow().isoformat() # get absolute path of parent directory parentdir = os.path.abspath(os.path.join(path['path'], os.pardir)) # get user id of owner uid = path['owner'] # try to get owner user name # first check cache if uid in uids: owner = owners[uid] # not in cache else: owner = uid # store it in cache if not uid in uids: uids.append(uid) owners[uid] = owner # get group id gid = path['group'] # try to get group name # first check cache if gid in gids: group = groups[gid] # not in cache else: group = gid # store in cache if not gid in gids: gids.append(gid) groups[gid] = group # create file metadata dictionary filemeta_dict = { "filename": filename, "extension": extension, "path_parent": parentdir, "filesize": size, "owner": owner, "group": group, "last_modified": mtime_utc, "creation_time": creation_time_utc, "last_change": ctime_utc, "hardlinks": path['num_links'], "inode": str(path['id']), "filehash": filehash, "tag": "", "tag_custom": "", "dupe_md5": "", "indexing_date": indextime_utc, "worker_name": worker_name, "_type": "file" } # check plugins for adding extra meta data to filemeta_dict for plugin in plugins: try: # check if plugin is for file doc mappings = {'mappings': {'file': {'properties': {}}}} plugin.add_mappings(mappings) filemeta_dict.update(plugin.add_meta(path['path'])) except KeyError: pass # add any autotags to filemeta_dict if cliargs['autotag'] and len(config['autotag_files']) > 0: auto_tag(filemeta_dict, 'file', mtime_unix, None, ctime_unix) # search for and copy over any existing tags from reindex_dict for sublist in reindex_dict['file']: if sublist[0] == path['path']: filemeta_dict['tag'] = sublist[1] filemeta_dict['tag_custom'] = sublist[2] break return filemeta_dict
def qumulo_get_dir_meta(worker_name, path, cliargs, reindex_dict, redis_conn): if path['path'] != '/': fullpath = path['path'].rstrip(os.path.sep) else: fullpath = path['path'] mtime_utc = path['modification_time'] mtime_unix = time.mktime(time.strptime(mtime_utc, '%Y-%m-%dT%H:%M:%S')) ctime_utc = path['change_time'] ctime_unix = time.mktime(time.strptime(ctime_utc, '%Y-%m-%dT%H:%M:%S')) creation_time_utc = path['creation_time'] if cliargs['index2']: # check if directory times cached in Redis redis_dirtime = redis_conn.get(base64.encodestring(fullpath.encode('utf-8', errors='ignore'))) if redis_dirtime: cached_times = float(redis_dirtime.decode('utf-8')) # check if cached times are the same as on disk current_times = float(mtime_unix + ctime_unix) if cached_times == current_times: return "sametimes" # get time now in utc indextime_utc = datetime.utcnow().isoformat() # get user id of owner uid = path['owner'] # try to get owner user name # first check cache if uid in uids: owner = owners[uid] # not in cache else: owner = uid # store it in cache if not uid in uids: uids.append(uid) owners[uid] = owner # get group id gid = path['group'] # try to get group name # first check cache if gid in gids: group = groups[gid] # not in cache else: group = gid # store in cache if not gid in gids: gids.append(gid) groups[gid] = group filename = path['name'] parentdir = os.path.abspath(os.path.join(fullpath, os.pardir)) dirmeta_dict = { "filename": filename, "path_parent": parentdir, "filesize": 0, "items": 1, # 1 for itself "items_files": 0, "items_subdirs": 0, "last_modified": mtime_utc, "creation_time": creation_time_utc, "last_change": ctime_utc, "hardlinks": path['num_links'], "inode": str(path['id']), "owner": owner, "group": group, "tag": "", "tag_custom": "", "indexing_date": indextime_utc, "worker_name": worker_name, "change_percent_filesize": "", "change_percent_items": "", "change_percent_items_files": "", "change_percent_items_subdirs": "", "_type": "directory" } # check plugins for adding extra meta data to dirmeta_dict for plugin in plugins: try: # check if plugin is for directory doc mappings = {'mappings': {'directory': {'properties': {}}}} plugin.add_mappings(mappings) dirmeta_dict.update(plugin.add_meta(fullpath)) except KeyError: pass # add any autotags to dirmeta_dict if cliargs['autotag'] and len(config['autotag_dirs']) > 0: auto_tag(dirmeta_dict, 'directory', mtime_unix, None, ctime_unix) # search for and copy over any existing tags from reindex_dict for sublist in reindex_dict['directory']: if sublist[0] == fullpath: dirmeta_dict['tag'] = sublist[1] dirmeta_dict['tag_custom'] = sublist[2] break # cache directory times in Redis if config['redis_cachedirtimes'] == 'True' or config['redis_cachedirtimes'] == 'true': redis_conn.set(base64.encodestring(fullpath.encode('utf-8', errors='ignore')), mtime_unix + ctime_unix, ex=config['redis_dirtimesttl']) return dirmeta_dict