def process_change(self, change, done): clist = [] entries = [] purges = set() links = set() datas = set() pfx = gauxpfx() try: f = open(change, "r") clist = f.readlines() f.close() except IOError: raise def edct(op, **ed): dct = {} dct["op"] = op for k in ed: if k == "stat": st = ed[k] dst = dct["stat"] = {} dst["uid"] = st.st_uid dst["gid"] = st.st_gid dst["mode"] = st.st_mode else: dct[k] = ed[k] return dct for e in clist: e = e.strip() et = e[self.IDX_START : self.IDX_END] ec = e[self.IDX_END :].split(" ") if et in self.TYPE_ENTRY: ty = ec[self.POS_TYPE] en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1])) gfid = ec[self.POS_GFID] # definitely need a better way bucketize entry ops if ty in ["UNLINK", "RMDIR"]: entries.append(edct(ty, gfid=gfid, entry=en)) purges.update([os.path.join(pfx, gfid)]) continue if not ty == "RENAME": go = os.path.join(pfx, gfid) st = self.lstat(go) if isinstance(st, int): logging.debug("file %s got purged in the interim" % go) continue if ty in ["CREATE", "MKDIR", "MKNOD"]: entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == "LINK": entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) links.update([os.path.join(pfx, gfid)]) elif ty == "SYMLINK": entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=os.readlink(en))) elif ty == "RENAME": e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2])) entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2)) else: pass elif et in self.TYPE_GFID: da = os.path.join(pfx, ec[0]) st = self.lstat(da) if isinstance(st, int): logging.debug("file %s got purged in the interim" % da) continue datas.update([da]) logging.debug("entries: %s" % repr(entries)) # sync namespace if entries: self.slave.server.entry_ops(entries) # sync data if self.syncdata(datas - (purges - links)): if done: self.master.server.changelog_done(change) return True
def process_change(self, change, done, retry): pfx = gauxpfx() clist = [] entries = [] datas = set() # basic crawl stats: files and bytes files_pending = {'count': 0, 'purge': 0, 'bytes': 0, 'files': []} try: f = open(change, "r") clist = f.readlines() f.close() except IOError: raise def edct(op, **ed): dct = {} dct['op'] = op for k in ed: if k == 'stat': st = ed[k] dst = dct['stat'] = {} dst['uid'] = st.st_uid dst['gid'] = st.st_gid dst['mode'] = st.st_mode else: dct[k] = ed[k] return dct # regular file update: bytes & count def _update_reg(entry, size): if not entry in files_pending['files']: files_pending['count'] += 1 files_pending['bytes'] += size files_pending['files'].append(entry) # updates for directories, symlinks etc.. def _update_rest(): files_pending['count'] += 1 # entry count def entry_update(entry, size, mode): if stat.S_ISREG(mode): _update_reg(entry, size) else: _update_rest() # purge count def purge_update(): files_pending['purge'] += 1 for e in clist: e = e.strip() et = e[self.IDX_START:self.IDX_END] ec = e[self.IDX_END:].split(' ') if et in self.TYPE_ENTRY: ty = ec[self.POS_TYPE] en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1])) gfid = ec[self.POS_GFID] # definitely need a better way bucketize entry ops if ty in ['UNLINK', 'RMDIR']: purge_update() entries.append(edct(ty, gfid=gfid, entry=en)) continue go = os.path.join(pfx, gfid) st = lstat(go) if isinstance(st, int): if ty == 'RENAME': entries.append(edct('UNLINK', gfid=gfid, entry=en)) else: logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) if ty in ['CREATE', 'MKDIR', 'MKNOD']: entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'LINK': entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'SYMLINK': rl = errno_wrap(os.readlink, [en], [ENOENT]) if isinstance(rl, int): continue entries.append( edct(ty, stat=st, entry=en, gfid=gfid, link=rl)) elif ty == 'RENAME': e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2])) entries.append( edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st)) else: logging.warn('ignoring %s [op %s]' % (gfid, ty)) elif et in self.TYPE_GFID: go = os.path.join(pfx, ec[0]) st = lstat(go) if isinstance(st, int): logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) datas.update([go]) logging.debug('entries: %s' % repr(entries)) if not retry: self.update_cumulative_stats(files_pending) # sync namespace if (entries): self.slave.server.entry_ops(entries) # sync data if self.syncdata(datas): if done: self.master.server.changelog_done(change) return True
def entry_ops(cls, entries): pfx = gauxpfx() logging.debug('entries: %s' % repr(entries)) # regular file def entry_pack_reg(gf, bn, mo, uid, gid): blen = len(bn) return struct.pack(cls._fmt_mknod(blen), uid, gid, gf, mo, bn, stat.S_IMODE(mo), 0, umask()) def entry_pack_reg_stat(gf, bn, st): blen = len(bn) mo = st['mode'] return struct.pack(cls._fmt_mknod(blen), st['uid'], st['gid'], gf, mo, bn, stat.S_IMODE(mo), 0, umask()) # mkdir def entry_pack_mkdir(gf, bn, mo, uid, gid): blen = len(bn) return struct.pack(cls._fmt_mkdir(blen), uid, gid, gf, mo, bn, stat.S_IMODE(mo), umask()) # symlink def entry_pack_symlink(gf, bn, lnk, st): blen = len(bn) llen = len(lnk) return struct.pack(cls._fmt_symlink(blen, llen), st['uid'], st['gid'], gf, st['mode'], bn, lnk) def entry_purge(entry, gfid): # This is an extremely racy code and needs to be fixed ASAP. # The GFID check here is to be sure that the pargfid/bname # to be purged is the GFID gotten from the changelog. # (a stat(changelog_gfid) would also be valid here) # The race here is between the GFID check and the purge. disk_gfid = cls.gfid_mnt(entry) if isinstance(disk_gfid, int): return if not gfid == disk_gfid: return er = errno_wrap(os.unlink, [entry], [ENOENT, EISDIR]) if isinstance(er, int): if er == EISDIR: er = errno_wrap(os.rmdir, [entry], [ENOENT, ENOTEMPTY]) if er == ENOTEMPTY: return er for e in entries: blob = None op = e['op'] gfid = e['gfid'] entry = e['entry'] (pg, bname) = entry2pb(entry) if op in ['RMDIR', 'UNLINK']: while True: er = entry_purge(entry, gfid) if isinstance(er, int): time.sleep(1) else: break elif op in ['CREATE', 'MKNOD']: blob = entry_pack_reg( gfid, bname, e['mode'], e['uid'], e['uid']) elif op == 'MKDIR': blob = entry_pack_mkdir( gfid, bname, e['mode'], e['uid'], e['uid']) elif op == 'LINK': slink = os.path.join(pfx, gfid) st = lstat(slink) if isinstance(st, int): (pg, bname) = entry2pb(entry) blob = entry_pack_reg_stat(gfid, bname, e['stat']) else: errno_wrap(os.link, [slink, entry], [ENOENT, EEXIST]) elif op == 'SYMLINK': blob = entry_pack_symlink(gfid, bname, e['link'], e['stat']) elif op == 'RENAME': en = e['entry1'] st = lstat(entry) if isinstance(st, int): (pg, bname) = entry2pb(en) blob = entry_pack_reg_stat(gfid, bname, e['stat']) else: errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST]) if blob: errno_wrap(Xattr.lsetxattr_l, [pg, 'glusterfs.gfid.newfile', blob], [EEXIST], [ENOENT, ESTALE, EINVAL])
def entry_ops(cls, entries): pfx = gauxpfx() logging.debug('entries: %s' % repr(entries)) # regular file def entry_pack_reg(gf, bn, st): blen = len(bn) mo = st['mode'] return struct.pack(cls._fmt_mknod(blen), st['uid'], st['gid'], gf, mo, bn, stat.S_IMODE(mo), 0, umask()) # mkdir def entry_pack_mkdir(gf, bn, st): blen = len(bn) mo = st['mode'] return struct.pack(cls._fmt_mkdir(blen), st['uid'], st['gid'], gf, mo, bn, stat.S_IMODE(mo), umask()) #symlink def entry_pack_symlink(gf, bn, lnk, st): blen = len(bn) llen = len(lnk) return struct.pack(cls._fmt_symlink(blen, llen), st['uid'], st['gid'], gf, st['mode'], bn, lnk) def entry_purge(entry, gfid): # This is an extremely racy code and needs to be fixed ASAP. # The GFID check here is to be sure that the pargfid/bname # to be purged is the GFID gotten from the changelog. # (a stat(changelog_gfid) would also be valid here) # The race here is between the GFID check and the purge. disk_gfid = cls.gfid(entry) if isinstance(disk_gfid, int): return if not gfid == disk_gfid: return er = errno_wrap(os.unlink, [entry], [ENOENT, EISDIR]) if isinstance(er, int): if er == EISDIR: er = errno_wrap(os.rmdir, [entry], [ENOENT, ENOTEMPTY]) if er == ENOTEMPTY: return er for e in entries: blob = None op = e['op'] gfid = e['gfid'] entry = e['entry'] (pg, bname) = entry2pb(entry) if op in ['RMDIR', 'UNLINK']: while True: er = entry_purge(entry, gfid) if isinstance(er, int): time.sleep(1) else: break elif op == 'CREATE': blob = entry_pack_reg(gfid, bname, e['stat']) elif op == 'MKDIR': blob = entry_pack_mkdir(gfid, bname, e['stat']) elif op == 'LINK': errno_wrap(os.link, [os.path.join(pfx, gfid), entry], [ENOENT, EEXIST]) elif op == 'SYMLINK': blob = entry_pack_symlink(gfid, bname, e['link'], e['stat']) elif op == 'RENAME': en = e['entry1'] errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST]) if blob: errno_wrap(Xattr.lsetxattr_l, [pg, 'glusterfs.gfid.newfile', blob], [ENOENT, EEXIST])
def entry_ops(cls, entries): pfx = gauxpfx() logging.debug("entries: %s" % repr(entries)) # regular file def entry_pack_reg(gf, bn, mo, uid, gid): blen = len(bn) return struct.pack(cls._fmt_mknod(blen), uid, gid, gf, mo, bn, stat.S_IMODE(mo), 0, umask()) def entry_pack_reg_stat(gf, bn, st): blen = len(bn) mo = st["mode"] return struct.pack(cls._fmt_mknod(blen), st["uid"], st["gid"], gf, mo, bn, stat.S_IMODE(mo), 0, umask()) # mkdir def entry_pack_mkdir(gf, bn, mo, uid, gid): blen = len(bn) return struct.pack(cls._fmt_mkdir(blen), uid, gid, gf, mo, bn, stat.S_IMODE(mo), umask()) # symlink def entry_pack_symlink(gf, bn, lnk, st): blen = len(bn) llen = len(lnk) return struct.pack(cls._fmt_symlink(blen, llen), st["uid"], st["gid"], gf, st["mode"], bn, lnk) def entry_purge(entry, gfid): # This is an extremely racy code and needs to be fixed ASAP. # The GFID check here is to be sure that the pargfid/bname # to be purged is the GFID gotten from the changelog. # (a stat(changelog_gfid) would also be valid here) # The race here is between the GFID check and the purge. disk_gfid = cls.gfid_mnt(entry) if isinstance(disk_gfid, int): return if not gfid == disk_gfid: return er = errno_wrap(os.unlink, [entry], [ENOENT, EISDIR]) if isinstance(er, int): if er == EISDIR: er = errno_wrap(os.rmdir, [entry], [ENOENT, ENOTEMPTY]) if er == ENOTEMPTY: return er for e in entries: blob = None op = e["op"] gfid = e["gfid"] entry = e["entry"] (pg, bname) = entry2pb(entry) if op in ["RMDIR", "UNLINK"]: while True: er = entry_purge(entry, gfid) if isinstance(er, int): if er == ENOTEMPTY and op == "RMDIR": er1 = errno_wrap(shutil.rmtree, [os.path.join(pg, bname)], [ENOENT]) if not isinstance(er1, int): logging.info("Removed %s/%s recursively" % (pg, bname)) break logging.warn("Failed to remove %s => %s/%s. %s" % (gfid, pg, bname, os.strerror(er))) time.sleep(1) else: break elif op in ["CREATE", "MKNOD"]: blob = entry_pack_reg(gfid, bname, e["mode"], e["uid"], e["gid"]) elif op == "MKDIR": blob = entry_pack_mkdir(gfid, bname, e["mode"], e["uid"], e["gid"]) elif op == "LINK": slink = os.path.join(pfx, gfid) st = lstat(slink) if isinstance(st, int): (pg, bname) = entry2pb(entry) blob = entry_pack_reg_stat(gfid, bname, e["stat"]) else: errno_wrap(os.link, [slink, entry], [ENOENT, EEXIST]) elif op == "SYMLINK": blob = entry_pack_symlink(gfid, bname, e["link"], e["stat"]) elif op == "RENAME": en = e["entry1"] st = lstat(entry) if isinstance(st, int): if e["stat"] and not stat.S_ISDIR(e["stat"]["mode"]): (pg, bname) = entry2pb(en) blob = entry_pack_reg_stat(gfid, bname, e["stat"]) else: errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST]) if blob: errno_wrap(Xattr.lsetxattr, [pg, "glusterfs.gfid.newfile", blob], [EEXIST], [ENOENT, ESTALE, EINVAL])
def process_change(self, change, done, retry): pfx = gauxpfx() clist = [] entries = [] datas = set() # basic crawl stats: files and bytes files_pending = {'count': 0, 'purge': 0, 'bytes': 0, 'files': []} try: f = open(change, "r") clist = f.readlines() f.close() except IOError: raise def edct(op, **ed): dct = {} dct['op'] = op for k in ed: if k == 'stat': st = ed[k] dst = dct['stat'] = {} dst['uid'] = st.st_uid dst['gid'] = st.st_gid dst['mode'] = st.st_mode else: dct[k] = ed[k] return dct # regular file update: bytes & count def _update_reg(entry, size): if not entry in files_pending['files']: files_pending['count'] += 1 files_pending['bytes'] += size files_pending['files'].append(entry) # updates for directories, symlinks etc.. def _update_rest(): files_pending['count'] += 1 # entry count def entry_update(entry, size, mode): if stat.S_ISREG(mode): _update_reg(entry, size) else: _update_rest() # purge count def purge_update(): files_pending['purge'] += 1 for e in clist: e = e.strip() et = e[self.IDX_START:self.IDX_END] ec = e[self.IDX_END:].split(' ') if et in self.TYPE_ENTRY: ty = ec[self.POS_TYPE] en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1])) gfid = ec[self.POS_GFID] # definitely need a better way bucketize entry ops if ty in ['UNLINK', 'RMDIR']: purge_update() entries.append(edct(ty, gfid=gfid, entry=en)) continue go = os.path.join(pfx, gfid) st = lstat(go) if isinstance(st, int): if ty == 'RENAME': entries.append(edct('UNLINK', gfid=gfid, entry=en)) else: logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) if ty in ['CREATE', 'MKDIR', 'MKNOD']: entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'LINK': entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'SYMLINK': rl = errno_wrap(os.readlink, [en], [ENOENT]) if isinstance(rl, int): continue entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=rl)) elif ty == 'RENAME': e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2])) entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st)) else: logging.warn('ignoring %s [op %s]' % (gfid, ty)) elif et in self.TYPE_GFID: go = os.path.join(pfx, ec[0]) st = lstat(go) if isinstance(st, int): logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) datas.update([go]) logging.debug('entries: %s' % repr(entries)) if not retry: self.update_cumulative_stats(files_pending) # sync namespace if (entries): self.slave.server.entry_ops(entries) # sync data if self.syncdata(datas): if done: self.master.server.changelog_done(change) return True
def entry_ops(cls, entries): pfx = gauxpfx() logging.debug("entries: %s" % repr(entries)) # regular file def entry_pack_reg(gf, bn, st): blen = len(bn) mo = st["mode"] return struct.pack(cls._fmt_mknod(blen), st["uid"], st["gid"], gf, mo, bn, stat.S_IMODE(mo), 0, umask()) # mkdir def entry_pack_mkdir(gf, bn, st): blen = len(bn) mo = st["mode"] return struct.pack(cls._fmt_mkdir(blen), st["uid"], st["gid"], gf, mo, bn, stat.S_IMODE(mo), umask()) # symlink def entry_pack_symlink(gf, bn, lnk, st): blen = len(bn) llen = len(lnk) return struct.pack(cls._fmt_symlink(blen, llen), st["uid"], st["gid"], gf, st["mode"], bn, lnk) def entry_purge(entry, gfid): # This is an extremely racy code and needs to be fixed ASAP. # The GFID check here is to be sure that the pargfid/bname # to be purged is the GFID gotten from the changelog. # (a stat(changelog_gfid) would also be valid here) # The race here is between the GFID check and the purge. disk_gfid = cls.gfid(entry) if isinstance(disk_gfid, int): return if not gfid == disk_gfid: return er = errno_wrap(os.unlink, [entry], [ENOENT, EISDIR]) if isinstance(er, int): if er == EISDIR: er = errno_wrap(os.rmdir, [entry], [ENOENT, ENOTEMPTY]) if er == ENOTEMPTY: return er for e in entries: blob = None op = e["op"] gfid = e["gfid"] entry = e["entry"] (pg, bname) = entry2pb(entry) if op in ["RMDIR", "UNLINK"]: while True: er = entry_purge(entry, gfid) if isinstance(er, int): time.sleep(1) else: break elif op == "CREATE": blob = entry_pack_reg(gfid, bname, e["stat"]) elif op == "MKDIR": blob = entry_pack_mkdir(gfid, bname, e["stat"]) elif op == "LINK": errno_wrap(os.link, [os.path.join(pfx, gfid), entry], [ENOENT, EEXIST]) elif op == "SYMLINK": blob = entry_pack_symlink(gfid, bname, e["link"], e["stat"]) elif op == "RENAME": en = e["entry1"] errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST]) if blob: errno_wrap(Xattr.lsetxattr_l, [pg, "glusterfs.gfid.newfile", blob], [ENOENT, EEXIST])