def regjob(se, xte, pb): rv = pb.wait() if rv[0]: logging.debug('synced ' + se) return True else: if rv[1] in [23, 24]: # stat to check if the file exist st = lstat(se) if isinstance(st, int): # file got unlinked in the interim return True logging.warn('Rsync: %s [errcode: %d]' % (se, rv[1]))
def crawl(self, path='.', xtr=None, done=0): """ generate a CHANGELOG file consumable by process_change """ if path == '.': self.open() self.crawls += 1 if not xtr: # get the root stime and use it for all comparisons xtr = self.xtime('.', self.slave) if isinstance(xtr, int): if xtr != ENOENT: raise GsyncdError('slave is corrupt') xtr = self.minus_infinity xtl = self.xtime(path) if isinstance(xtl, int): raise GsyncdError('master is corrupt') if xtr == xtl: if path == '.': self.close() return self.xtime_reversion_hook(path, xtl, xtr) logging.debug("entering " + path) dem = self.master.server.entries(path) pargfid = self.master.server.gfid(path) if isinstance(pargfid, int): logging.warn('skipping directory %s' % (path)) for e in dem: bname = e e = os.path.join(path, e) st = lstat(e) if isinstance(st, int): logging.warn('%s got purged in the interim..' % e) continue gfid = self.master.server.gfid(e) if isinstance(gfid, int): logging.warn('skipping entry %s..' % (e)) continue xte = self.xtime(e) if isinstance(xte, int): raise GsyncdError('master is corrupt') if not self.need_sync(e, xte, xtr): continue mo = st.st_mode if stat.S_ISDIR(mo): self.write_entry_change( "E", [gfid, 'MKDIR', escape(os.path.join(pargfid, bname))]) self.crawl(e, xtr) elif stat.S_ISLNK(mo): rl = errno_wrap(os.readlink, [en], [ENOENT]) if isinstance(rl, int): continue self.write_entry_change("E", [ gfid, 'SYMLINK', escape(os.path.join(pargfid, bname)), rl ]) else: # if a file has a hardlink, create a Changelog entry as 'LINK' so the slave # side will decide if to create the new entry, or to create link. if st.st_nlink == 1: self.write_entry_change( "E", [gfid, 'MKNOD', escape(os.path.join(pargfid, bname))]) else: self.write_entry_change( "E", [gfid, 'LINK', escape(os.path.join(pargfid, bname))]) if stat.S_ISREG(mo): self.write_entry_change("D", [gfid]) if path == '.': logging.info('processing xsync changelog %s' % self.fname()) self.close() self.process([self.fname()], done) self.upd_stime(xtl)
def process_change(self, change, done, retry): pfx = gauxpfx() clist = [] entries = [] datas = set() # basic crawl stats: files and bytes files_pending = {'count': 0, 'purge': 0, 'bytes': 0, 'files': []} try: f = open(change, "r") clist = f.readlines() f.close() except IOError: raise def edct(op, **ed): dct = {} dct['op'] = op for k in ed: if k == 'stat': st = ed[k] dst = dct['stat'] = {} dst['uid'] = st.st_uid dst['gid'] = st.st_gid dst['mode'] = st.st_mode else: dct[k] = ed[k] return dct # regular file update: bytes & count def _update_reg(entry, size): if not entry in files_pending['files']: files_pending['count'] += 1 files_pending['bytes'] += size files_pending['files'].append(entry) # updates for directories, symlinks etc.. def _update_rest(): files_pending['count'] += 1 # entry count def entry_update(entry, size, mode): if stat.S_ISREG(mode): _update_reg(entry, size) else: _update_rest() # purge count def purge_update(): files_pending['purge'] += 1 for e in clist: e = e.strip() et = e[self.IDX_START:self.IDX_END] ec = e[self.IDX_END:].split(' ') if et in self.TYPE_ENTRY: ty = ec[self.POS_TYPE] en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1])) gfid = ec[self.POS_GFID] # definitely need a better way bucketize entry ops if ty in ['UNLINK', 'RMDIR']: purge_update() entries.append(edct(ty, gfid=gfid, entry=en)) continue go = os.path.join(pfx, gfid) st = lstat(go) if isinstance(st, int): if ty == 'RENAME': entries.append(edct('UNLINK', gfid=gfid, entry=en)) else: logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) if ty in ['CREATE', 'MKDIR', 'MKNOD']: entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'LINK': entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'SYMLINK': rl = errno_wrap(os.readlink, [en], [ENOENT]) if isinstance(rl, int): continue entries.append( edct(ty, stat=st, entry=en, gfid=gfid, link=rl)) elif ty == 'RENAME': e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2])) entries.append( edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st)) else: logging.warn('ignoring %s [op %s]' % (gfid, ty)) elif et in self.TYPE_GFID: go = os.path.join(pfx, ec[0]) st = lstat(go) if isinstance(st, int): logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) datas.update([go]) logging.debug('entries: %s' % repr(entries)) if not retry: self.update_cumulative_stats(files_pending) # sync namespace if (entries): self.slave.server.entry_ops(entries) # sync data if self.syncdata(datas): if done: self.master.server.changelog_done(change) return True
def entry_ops(cls, entries): pfx = gauxpfx() logging.debug('entries: %s' % repr(entries)) # regular file def entry_pack_reg(gf, bn, mo, uid, gid): blen = len(bn) return struct.pack(cls._fmt_mknod(blen), uid, gid, gf, mo, bn, stat.S_IMODE(mo), 0, umask()) def entry_pack_reg_stat(gf, bn, st): blen = len(bn) mo = st['mode'] return struct.pack(cls._fmt_mknod(blen), st['uid'], st['gid'], gf, mo, bn, stat.S_IMODE(mo), 0, umask()) # mkdir def entry_pack_mkdir(gf, bn, mo, uid, gid): blen = len(bn) return struct.pack(cls._fmt_mkdir(blen), uid, gid, gf, mo, bn, stat.S_IMODE(mo), umask()) # symlink def entry_pack_symlink(gf, bn, lnk, st): blen = len(bn) llen = len(lnk) return struct.pack(cls._fmt_symlink(blen, llen), st['uid'], st['gid'], gf, st['mode'], bn, lnk) def entry_purge(entry, gfid): # This is an extremely racy code and needs to be fixed ASAP. # The GFID check here is to be sure that the pargfid/bname # to be purged is the GFID gotten from the changelog. # (a stat(changelog_gfid) would also be valid here) # The race here is between the GFID check and the purge. disk_gfid = cls.gfid_mnt(entry) if isinstance(disk_gfid, int): return if not gfid == disk_gfid: return er = errno_wrap(os.unlink, [entry], [ENOENT, EISDIR]) if isinstance(er, int): if er == EISDIR: er = errno_wrap(os.rmdir, [entry], [ENOENT, ENOTEMPTY]) if er == ENOTEMPTY: return er for e in entries: blob = None op = e['op'] gfid = e['gfid'] entry = e['entry'] (pg, bname) = entry2pb(entry) if op in ['RMDIR', 'UNLINK']: while True: er = entry_purge(entry, gfid) if isinstance(er, int): time.sleep(1) else: break elif op in ['CREATE', 'MKNOD']: blob = entry_pack_reg( gfid, bname, e['mode'], e['uid'], e['uid']) elif op == 'MKDIR': blob = entry_pack_mkdir( gfid, bname, e['mode'], e['uid'], e['uid']) elif op == 'LINK': slink = os.path.join(pfx, gfid) st = lstat(slink) if isinstance(st, int): (pg, bname) = entry2pb(entry) blob = entry_pack_reg_stat(gfid, bname, e['stat']) else: errno_wrap(os.link, [slink, entry], [ENOENT, EEXIST]) elif op == 'SYMLINK': blob = entry_pack_symlink(gfid, bname, e['link'], e['stat']) elif op == 'RENAME': en = e['entry1'] st = lstat(entry) if isinstance(st, int): (pg, bname) = entry2pb(en) blob = entry_pack_reg_stat(gfid, bname, e['stat']) else: errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST]) if blob: errno_wrap(Xattr.lsetxattr_l, [pg, 'glusterfs.gfid.newfile', blob], [EEXIST], [ENOENT, ESTALE, EINVAL])
def entry_ops(cls, entries): pfx = gauxpfx() logging.debug('entries: %s' % repr(entries)) # regular file def entry_pack_reg(gf, bn, mo, uid, gid): blen = len(bn) return struct.pack(cls._fmt_mknod(blen), uid, gid, gf, mo, bn, stat.S_IMODE(mo), 0, umask()) def entry_pack_reg_stat(gf, bn, st): blen = len(bn) mo = st['mode'] return struct.pack(cls._fmt_mknod(blen), st['uid'], st['gid'], gf, mo, bn, stat.S_IMODE(mo), 0, umask()) # mkdir def entry_pack_mkdir(gf, bn, mo, uid, gid): blen = len(bn) return struct.pack(cls._fmt_mkdir(blen), uid, gid, gf, mo, bn, stat.S_IMODE(mo), umask()) # symlink def entry_pack_symlink(gf, bn, lnk, st): blen = len(bn) llen = len(lnk) return struct.pack(cls._fmt_symlink(blen, llen), st['uid'], st['gid'], gf, st['mode'], bn, lnk) def entry_purge(entry, gfid): # This is an extremely racy code and needs to be fixed ASAP. # The GFID check here is to be sure that the pargfid/bname # to be purged is the GFID gotten from the changelog. # (a stat(changelog_gfid) would also be valid here) # The race here is between the GFID check and the purge. disk_gfid = cls.gfid_mnt(entry) if isinstance(disk_gfid, int): return if not gfid == disk_gfid: return er = errno_wrap(os.unlink, [entry], [ENOENT, EISDIR]) if isinstance(er, int): if er == EISDIR: er = errno_wrap(os.rmdir, [entry], [ENOENT, ENOTEMPTY]) if er == ENOTEMPTY: return er for e in entries: blob = None op = e['op'] gfid = e['gfid'] entry = e['entry'] (pg, bname) = entry2pb(entry) if op in ['RMDIR', 'UNLINK']: while True: er = entry_purge(entry, gfid) if isinstance(er, int): time.sleep(1) else: break elif op in ['CREATE', 'MKNOD']: blob = entry_pack_reg(gfid, bname, e['mode'], e['uid'], e['uid']) elif op == 'MKDIR': blob = entry_pack_mkdir(gfid, bname, e['mode'], e['uid'], e['uid']) elif op == 'LINK': slink = os.path.join(pfx, gfid) st = lstat(slink) if isinstance(st, int): (pg, bname) = entry2pb(entry) blob = entry_pack_reg_stat(gfid, bname, e['stat']) else: errno_wrap(os.link, [slink, entry], [ENOENT, EEXIST]) elif op == 'SYMLINK': blob = entry_pack_symlink(gfid, bname, e['link'], e['stat']) elif op == 'RENAME': en = e['entry1'] st = lstat(entry) if isinstance(st, int): (pg, bname) = entry2pb(en) blob = entry_pack_reg_stat(gfid, bname, e['stat']) else: errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST]) if blob: errno_wrap(Xattr.lsetxattr_l, [pg, 'glusterfs.gfid.newfile', blob], [EEXIST], [ENOENT, ESTALE, EINVAL])
def entry_ops(cls, entries): pfx = gauxpfx() logging.debug("entries: %s" % repr(entries)) # regular file def entry_pack_reg(gf, bn, mo, uid, gid): blen = len(bn) return struct.pack(cls._fmt_mknod(blen), uid, gid, gf, mo, bn, stat.S_IMODE(mo), 0, umask()) def entry_pack_reg_stat(gf, bn, st): blen = len(bn) mo = st["mode"] return struct.pack(cls._fmt_mknod(blen), st["uid"], st["gid"], gf, mo, bn, stat.S_IMODE(mo), 0, umask()) # mkdir def entry_pack_mkdir(gf, bn, mo, uid, gid): blen = len(bn) return struct.pack(cls._fmt_mkdir(blen), uid, gid, gf, mo, bn, stat.S_IMODE(mo), umask()) # symlink def entry_pack_symlink(gf, bn, lnk, st): blen = len(bn) llen = len(lnk) return struct.pack(cls._fmt_symlink(blen, llen), st["uid"], st["gid"], gf, st["mode"], bn, lnk) def entry_purge(entry, gfid): # This is an extremely racy code and needs to be fixed ASAP. # The GFID check here is to be sure that the pargfid/bname # to be purged is the GFID gotten from the changelog. # (a stat(changelog_gfid) would also be valid here) # The race here is between the GFID check and the purge. disk_gfid = cls.gfid_mnt(entry) if isinstance(disk_gfid, int): return if not gfid == disk_gfid: return er = errno_wrap(os.unlink, [entry], [ENOENT, EISDIR]) if isinstance(er, int): if er == EISDIR: er = errno_wrap(os.rmdir, [entry], [ENOENT, ENOTEMPTY]) if er == ENOTEMPTY: return er for e in entries: blob = None op = e["op"] gfid = e["gfid"] entry = e["entry"] (pg, bname) = entry2pb(entry) if op in ["RMDIR", "UNLINK"]: while True: er = entry_purge(entry, gfid) if isinstance(er, int): if er == ENOTEMPTY and op == "RMDIR": er1 = errno_wrap(shutil.rmtree, [os.path.join(pg, bname)], [ENOENT]) if not isinstance(er1, int): logging.info("Removed %s/%s recursively" % (pg, bname)) break logging.warn("Failed to remove %s => %s/%s. %s" % (gfid, pg, bname, os.strerror(er))) time.sleep(1) else: break elif op in ["CREATE", "MKNOD"]: blob = entry_pack_reg(gfid, bname, e["mode"], e["uid"], e["gid"]) elif op == "MKDIR": blob = entry_pack_mkdir(gfid, bname, e["mode"], e["uid"], e["gid"]) elif op == "LINK": slink = os.path.join(pfx, gfid) st = lstat(slink) if isinstance(st, int): (pg, bname) = entry2pb(entry) blob = entry_pack_reg_stat(gfid, bname, e["stat"]) else: errno_wrap(os.link, [slink, entry], [ENOENT, EEXIST]) elif op == "SYMLINK": blob = entry_pack_symlink(gfid, bname, e["link"], e["stat"]) elif op == "RENAME": en = e["entry1"] st = lstat(entry) if isinstance(st, int): if e["stat"] and not stat.S_ISDIR(e["stat"]["mode"]): (pg, bname) = entry2pb(en) blob = entry_pack_reg_stat(gfid, bname, e["stat"]) else: errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST]) if blob: errno_wrap(Xattr.lsetxattr, [pg, "glusterfs.gfid.newfile", blob], [EEXIST], [ENOENT, ESTALE, EINVAL])
def crawl(self, path='.', xtr=None, done=0): """ generate a CHANGELOG file consumable by process_change """ if path == '.': self.open() self.crawls += 1 if not xtr: # get the root stime and use it for all comparisons xtr = self.xtime('.', self.slave) if isinstance(xtr, int): if xtr != ENOENT: raise GsyncdError('slave is corrupt') xtr = self.minus_infinity xtl = self.xtime(path) if isinstance(xtl, int): raise GsyncdError('master is corrupt') if xtr == xtl: if path == '.': self.close() return self.xtime_reversion_hook(path, xtl, xtr) logging.debug("entering " + path) dem = self.master.server.entries(path) pargfid = self.master.server.gfid(path) if isinstance(pargfid, int): logging.warn('skipping directory %s' % (path)) for e in dem: bname = e e = os.path.join(path, e) st = lstat(e) if isinstance(st, int): logging.warn('%s got purged in the interim..' % e) continue gfid = self.master.server.gfid(e) if isinstance(gfid, int): logging.warn('skipping entry %s..' % (e)) continue xte = self.xtime(e) if isinstance(xte, int): raise GsyncdError('master is corrupt') if not self.need_sync(e, xte, xtr): continue mo = st.st_mode if stat.S_ISDIR(mo): self.write_entry_change("E", [gfid, 'MKDIR', escape(os.path.join(pargfid, bname))]) self.crawl(e, xtr) elif stat.S_ISLNK(mo): rl = errno_wrap(os.readlink, [en], [ENOENT]) if isinstance(rl, int): continue self.write_entry_change("E", [gfid, 'SYMLINK', escape(os.path.join(pargfid, bname)), rl]) else: # if a file has a hardlink, create a Changelog entry as 'LINK' so the slave # side will decide if to create the new entry, or to create link. if st.st_nlink == 1: self.write_entry_change("E", [gfid, 'MKNOD', escape(os.path.join(pargfid, bname))]) else: self.write_entry_change("E", [gfid, 'LINK', escape(os.path.join(pargfid, bname))]) if stat.S_ISREG(mo): self.write_entry_change("D", [gfid]) if path == '.': logging.info('processing xsync changelog %s' % self.fname()) self.close() self.process([self.fname()], done) self.upd_stime(xtl)
def process_change(self, change, done, retry): pfx = gauxpfx() clist = [] entries = [] datas = set() # basic crawl stats: files and bytes files_pending = {'count': 0, 'purge': 0, 'bytes': 0, 'files': []} try: f = open(change, "r") clist = f.readlines() f.close() except IOError: raise def edct(op, **ed): dct = {} dct['op'] = op for k in ed: if k == 'stat': st = ed[k] dst = dct['stat'] = {} dst['uid'] = st.st_uid dst['gid'] = st.st_gid dst['mode'] = st.st_mode else: dct[k] = ed[k] return dct # regular file update: bytes & count def _update_reg(entry, size): if not entry in files_pending['files']: files_pending['count'] += 1 files_pending['bytes'] += size files_pending['files'].append(entry) # updates for directories, symlinks etc.. def _update_rest(): files_pending['count'] += 1 # entry count def entry_update(entry, size, mode): if stat.S_ISREG(mode): _update_reg(entry, size) else: _update_rest() # purge count def purge_update(): files_pending['purge'] += 1 for e in clist: e = e.strip() et = e[self.IDX_START:self.IDX_END] ec = e[self.IDX_END:].split(' ') if et in self.TYPE_ENTRY: ty = ec[self.POS_TYPE] en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1])) gfid = ec[self.POS_GFID] # definitely need a better way bucketize entry ops if ty in ['UNLINK', 'RMDIR']: purge_update() entries.append(edct(ty, gfid=gfid, entry=en)) continue go = os.path.join(pfx, gfid) st = lstat(go) if isinstance(st, int): if ty == 'RENAME': entries.append(edct('UNLINK', gfid=gfid, entry=en)) else: logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) if ty in ['CREATE', 'MKDIR', 'MKNOD']: entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'LINK': entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) elif ty == 'SYMLINK': rl = errno_wrap(os.readlink, [en], [ENOENT]) if isinstance(rl, int): continue entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=rl)) elif ty == 'RENAME': e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2])) entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st)) else: logging.warn('ignoring %s [op %s]' % (gfid, ty)) elif et in self.TYPE_GFID: go = os.path.join(pfx, ec[0]) st = lstat(go) if isinstance(st, int): logging.debug('file %s got purged in the interim' % go) continue entry_update(go, st.st_size, st.st_mode) datas.update([go]) logging.debug('entries: %s' % repr(entries)) if not retry: self.update_cumulative_stats(files_pending) # sync namespace if (entries): self.slave.server.entry_ops(entries) # sync data if self.syncdata(datas): if done: self.master.server.changelog_done(change) return True