def pgfid_to_path(brick, changelog_data): """ For all the pgfids in table, converts into path using recursive readlink. """ # pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}): # In case of Data/Metadata only, pgfid1 will not be their if row[0] == "": continue try: path = symlink_gfid_to_path(brick, row[0]) path = output_path_prepare(path, args) changelog_data.gfidpath_set_path1(path, row[0]) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue # pgfid2 to path2 in case of RENAME for row in changelog_data.gfidpath_get_distinct("pgfid2", { "type": "RENAME", "path2": "" }): # Only in case of Rename pgfid2 exists if row[0] == "": continue try: path = symlink_gfid_to_path(brick, row[0]) path = output_path_prepare(path, args) changelog_data.gfidpath_set_path2(path, row[0]) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue
def pgfid_to_path(brick, changelog_data): """ For all the pgfids in table, converts into path using recursive readlink. """ # pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}): # In case of Data/Metadata only, pgfid1 will not be their if row[0] == "": continue try: path = symlink_gfid_to_path(brick, row[0]) path = output_path_prepare(path, args) changelog_data.gfidpath_set_path1(path, row[0]) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue # pgfid2 to path2 in case of RENAME for row in changelog_data.gfidpath_get_distinct("pgfid2", {"type": "RENAME", "path2": ""}): # Only in case of Rename pgfid2 exists if row[0] == "": continue try: path = symlink_gfid_to_path(brick, row[0]) path = output_path_prepare(path, args) changelog_data.gfidpath_set_path2(path, row[0]) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue
def populate_pgfid_and_inodegfid(brick, changelog_data): """ For all the DATA/METADATA modifications GFID, If symlink, directly convert to Path using Readlink. If not symlink, try to get PGFIDs via xattr query and populate it to pgfid table, collect inodes in inodegfid table """ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}): gfid = row[3].strip() p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) if os.path.islink(p): # It is a Directory if GFID backend path is symlink try: path = symlink_gfid_to_path(brick, gfid) path = output_path_prepare(path, args) changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid}) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue else: try: # INODE and GFID to inodegfid table changelog_data.inodegfid_add(os.stat(p).st_ino, gfid) file_xattrs = xattr.list(p) for x in file_xattrs: if x.startswith("trusted.pgfid."): # PGFID in pgfid table changelog_data.pgfid_add(x.split(".")[-1]) except (IOError, OSError): # All OS Errors ignored, since failures will be logged # in End. All GFIDs present in gfidpath table continue
def enum_hard_links_using_gfid2path(brick, gfid, args): hardlinks = [] p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) if not os.path.isdir(p): # we have a symlink or a normal file try: file_xattrs = xattr.list(p) for x in file_xattrs: x_str = bytearray_to_str(x) if x_str.startswith("trusted.gfid2path."): # get the value for the xattr i.e. <PGFID>/<BN> v = xattr.getxattr(p, x_str) v_str = bytearray_to_str(v) pgfid, bn = v_str.split(os.sep) try: path = symlink_gfid_to_path(brick, pgfid) fullpath = os.path.join(path, bn) fullpath = output_path_prepare(fullpath, args) hardlinks.append(fullpath) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue except (IOError, OSError): pass return hardlinks
def gfid_to_path_using_pgfid(brick, changelog_data, args): """ For all the pgfids collected, Converts to Path and does readdir on those directories and looks up inodegfid table for matching inode number. """ populate_pgfid_and_inodegfid(brick, changelog_data) # If no GFIDs needs conversion to Path if not changelog_data.inodegfid_exists({"converted": 0}): return def inode_filter(path): # Looks in inodegfid table, if exists returns # inode number else None try: st = os.lstat(path) except (OSError, IOError): st = None if st and changelog_data.inodegfid_exists({"inode": st.st_ino}): return st.st_ino return None # Length of brick path, to remove from output path brick_path_len = len(brick) def output_callback(path, inode): # For each path found, encodes it and updates path1 # Also updates converted flag in inodegfid table as 1 path = path.strip() path = path[brick_path_len + 1:] path = output_path_prepare(path, args) changelog_data.append_path1(path, inode) changelog_data.inodegfid_update({"converted": 1}, {"inode": inode}) ignore_dirs = [ os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",") ] for row in changelog_data.pgfid_get(): try: path = symlink_gfid_to_path(brick, row[0]) find(os.path.join(brick, path), callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs, subdirs_crawl=False) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue
def gfid_to_path_using_pgfid(brick, changelog_data, args): """ For all the pgfids collected, Converts to Path and does readdir on those directories and looks up inodegfid table for matching inode number. """ populate_pgfid_and_inodegfid(brick, changelog_data) # If no GFIDs needs conversion to Path if not changelog_data.inodegfid_exists({"converted": 0}): return def inode_filter(path): # Looks in inodegfid table, if exists returns # inode number else None try: st = os.lstat(path) except (OSError, IOError): st = None if st and changelog_data.inodegfid_exists({"inode": st.st_ino}): return st.st_ino return None # Length of brick path, to remove from output path brick_path_len = len(brick) def output_callback(path, inode): # For each path found, encodes it and updates path1 # Also updates converted flag in inodegfid table as 1 path = path.strip() path = path[brick_path_len+1:] path = output_path_prepare(path, args) changelog_data.append_path1(path, inode) changelog_data.inodegfid_update({"converted": 1}, {"inode": inode}) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] for row in changelog_data.pgfid_get(): try: path = symlink_gfid_to_path(brick, row[0]) find(os.path.join(brick, path), callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs, subdirs_crawl=False) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue
def gfid_to_path_using_pgfid(brick, gfids_file, output_file, outfile_failures): """ Parent GFID is saved as xattr, collect Parent GFIDs from all the files from gfids_file. Convert parent GFID to path and Crawl each directories to get the list of files/dirs having same inode number. Do find with maxdepth as 1 and print the output in <INODE_NUM> <PATH> format, use this output to look into in memory dictionary of inode numbers got from the list of GFIDs """ with open(output_file, "a+") as fout: pgfids = set() inode_dict = {} with open(gfids_file) as f: for gfid in f: gfid = gfid.strip() p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) if os.path.islink(p): path = symlink_gfid_to_path(brick, gfid) output_write(fout, path, args.output_prefix) else: try: inode_dict[str(os.stat(p).st_ino)] = 1 file_xattrs = xattr.list(p) num_parent_gfid = 0 for x in file_xattrs: if x.startswith("trusted.pgfid."): num_parent_gfid += 1 pgfids.add(x.split(".")[-1]) if num_parent_gfid == 0: with open(outfile_failures, "a") as f: f.write("%s\n" % gfid) f.flush() os.fsync(f.fileno()) except (IOError, OSError) as e: if e.errno == ENOENT: continue else: fail("%s Failed to convert to path from " "GFID %s: %s" % (brick, gfid, e), logger=logger) if not inode_dict: return def inode_filter(path): try: st = os.lstat(path) except (OSError, IOError) as e: if e.errno == ENOENT: st = None else: raise if st and inode_dict.get(str(st.st_ino), None): return True return False # Length of brick path, to remove from output path brick_path_len = len(brick) def output_callback(path): path = path.strip() path = path[brick_path_len+1:] output_write(fout, path, args.output_prefix) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] for pgfid in pgfids: path = symlink_gfid_to_path(brick, pgfid) find(os.path.join(brick, path), callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs, subdirs_crawl=False) fout.flush() os.fsync(fout.fileno())
def gfid_to_path_using_pgfid(brick, gfids_file, output_file, outfile_failures): """ Parent GFID is saved as xattr, collect Parent GFIDs from all the files from gfids_file. Convert parent GFID to path and Crawl each directories to get the list of files/dirs having same inode number. Do find with maxdepth as 1 and print the output in <INODE_NUM> <PATH> format, use this output to look into in memory dictionary of inode numbers got from the list of GFIDs """ with open(output_file, "a+") as fout: pgfids = set() inode_dict = {} with open(gfids_file) as f: for gfid in f: gfid = gfid.strip() p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) if os.path.islink(p): path = symlink_gfid_to_path(brick, gfid) output_write(fout, path, args.output_prefix) else: try: inode_dict[str(os.stat(p).st_ino)] = 1 file_xattrs = xattr.list(p) num_parent_gfid = 0 for x in file_xattrs: if x.startswith("trusted.pgfid."): num_parent_gfid += 1 pgfids.add(x.split(".")[-1]) if num_parent_gfid == 0: with open(outfile_failures, "a") as f: f.write("%s\n" % gfid) f.flush() os.fsync(f.fileno()) except (IOError, OSError) as e: if e.errno == ENOENT: continue else: fail("%s Failed to convert to path from " "GFID %s: %s" % (brick, gfid, e), logger=logger) if not inode_dict: return def inode_filter(path): try: st = os.lstat(path) except (OSError, IOError) as e: if e.errno == ENOENT: st = None else: raise if st and inode_dict.get(str(st.st_ino), None): return True return False # Length of brick path, to remove from output path brick_path_len = len(brick) def output_callback(path): path = path.strip() path = path[brick_path_len + 1:] output_write(fout, path, args.output_prefix) ignore_dirs = [ os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",") ] for pgfid in pgfids: path = symlink_gfid_to_path(brick, pgfid) find(os.path.join(brick, path), callback_func=output_callback, filter_func=inode_filter, ignore_dirs=ignore_dirs, subdirs_crawl=False) fout.flush() os.fsync(fout.fileno())