def init_transfer(self, bcast_msg, info, tmo): bsock = socket(AF_INET, SOCK_DGRAM) bsock.setsockopt(SOL_SOCKET, SO_BROADCAST, 1) t0 = time.time() ctl_listen_sock = socket(AF_INET, SOCK_STREAM) ctl_listen_sock.bind(('', 0)) ctl_listen_sock.listen(0) ctl_listen_port = ctl_listen_sock.getsockname()[1] bcast = '%s %s %d' % (bcast_msg, self.MyHost, ctl_listen_port) if info is not None: bcast += " " + info.serialize() peer_ctl_sock = None ctl_listen_sock.settimeout(1.0) while peer_ctl_sock is None and (tmo is None or time.time() < t0 + tmo): bsock.sendto(to_bytes(bcast), self.BroadcastAddress) try: peer_ctl_sock, peer_ctl_addr = ctl_listen_sock.accept() except timeout: pass else: done = True ctl_listen_sock.close() bsock.close() if peer_ctl_sock is None: raise DCTimeOut() data_listen_sock = socket(AF_INET, SOCK_STREAM) data_listen_sock.bind((self.MyHost, 0)) data_listen_port = data_listen_sock.getsockname()[1] data_listen_sock.listen(0) ctl_str = SockStream(peer_ctl_sock) ctl_str.send('DATA %s %s' % (self.MyHost, data_listen_port)) data_listen_sock.settimeout(tmo) try: peer_data_sock, peer_data_addr = data_listen_sock.accept() except timeout: peer_ctl_sock.close() raise RuntimeError('Data connection accept() time out') finally: data_listen_sock.close() return peer_ctl_sock, peer_ctl_addr, peer_data_sock, peer_data_addr
def _checkEOF(self): if not self.EOFReceived: try: stream = SockStream(self.CtlSock) msg = stream.recv() words = msg.split() assert words[0] == "EOF" count = int(words[1]) stream.send("OK") except: raise IOError("Error processing EOF message") finally: self.CtlSock.close() if self.NBytes != count: raise IOError("Incomplete file ransfer") self.EOFReceived = True
def _remote_get(self, peer_ctl_sock, peer_ctl_addr, peer_data_sock, peer_data_addr, ppath, tmo): ctl_str = SockStream(peer_ctl_sock) eof = False t0 = time.time() nbytes = 0 if tmo is not None: peer_data_sock.settimeout(tmo) with open(ppath, 'wb') as fd: while not eof: #print("_remote_get: peer_data_sock.recv()...") data = peer_data_sock.recv(1024 * 1024) #print("_remote_get: peer_data_sock.recv() -> %d" % (len(data),)) if not data: eof = True else: fd.write(data) nbytes += len(data) #print("_remote_get: EOF") peer_data_sock.close() t1 = time.time() msg = "" try: msg = ctl_str.recv() #print("_remote_get: EOF message: [%s]" % (msg,)) words = msg.split() assert words[0] == "EOF" count = int(words[1]) #print("_remote_get: EOF received: %d" % (count,)) ctl_str.send("OK") except: return False, "Can not parse EOF message: [%s]" % (msg, ) if nbytes != count: return False, "Incorrect byte count: EOF message: %d, actual count: %d" % ( count, nbytes) try: sndr = gethostbyaddr(peer_data_addr[0])[0] except: sndr = peer_data_addr[0] rate = '' size = float(nbytes) / 1024.0 / 1024.0 if t1 > t0 and size >= 0: rate = ' at %f MB/sec' % (size / (t1 - t0)) return True, 'Reveived %f MB from %s%s' % (size, sndr, rate)
def connect(self): self.Sock = socket(AF_INET, SOCK_STREAM) try: self.Sock.connect(self.DSAddr) except: self.log('can not connect to VFS Server') return False self.debug("connected as %s" % (self.Sock.getsockname(),)) self.Str = SockStream(self.Sock) ans = self.Str.sendAndRecv('HELLO %s' % self.ID) self.log('connect: HELLO -> %s' % ans) if ans == 'HOLD': self.CellStorage.hold() elif ans != 'OK': if ans == 'EXIT': self.log('Shot down by VFS Server') sys.exit(3) return False self.Connected = True return True
def close(self): try: if not self.Closed: self.DataSock.close() stream = SockStream(self.CtlSock) answer = stream.sendAndRecv('EOF %d' % (self.NBytes, )) self.CtlSock.close() if answer != "OK": raise IOError("Protocol error during closing handshake") self.Closed = True finally: try: self.DataSock.close() except: pass try: self.CtlSock.close() except: pass
def _remote_put(self, peer_ctl_sock, peer_ctl_addr, peer_data_sock, peer_data_addr, ppath, tmo): ctl_str = SockStream(peer_ctl_sock) eof = False t0 = time.time() nbytes = 0 if tmo is not None: peer_data_sock.settimeout(tmo) if isinstance(ppath, str): fd = open(ppath, 'rb') else: fd = ppath with fd: while not eof: data = fd.read(60000) if not data: #print ("_remote_put: eof") eof = True else: peer_data_sock.sendall(data) nbytes += len(data) #print ("_remote_put: sent %d bytes" % (len(data),)) t1 = time.time() peer_data_sock.shutdown(SHUT_RDWR) peer_data_sock.close() #print ("_remote_put: sending EOF...") answer = ctl_str.sendAndRecv('EOF %d' % (nbytes, )) done = answer == "OK" size = float(nbytes) / 1024.0 / 1024.0 if done: try: rcvr = gethostbyaddr(peer_data_addr[0])[0] except: rcvr = peer_data_addr[0] rate = '' if t1 >= t0 and size > 0: rate = ' at %f MB/sec' % (size / (t1 - t0)) return True, 'Stored %f MB on %s%s' % (size, rcvr, rate) else: return False, 'Transfer aborted'
def connect(self): if self.DStr != None: return connected = 0 tmo = 10 retry = 10 while retry > 0 and not connected: self.DSock = socket(AF_INET, SOCK_STREAM) try: self.DSock.connect(self.DAddr) except: self.DSock.close() retry = retry - 1 # server is out, let's wait and retry if retry: time.sleep(10) else: connected = 1 if not connected: raise DiskFarmError("Can not connect to VFS Server") self.DStr = SockStream(self.DSock) ans = self.DStr.sendAndRecv('HELLO %s' % self.Username) if ans != 'OK': self.disconnect() raise DiskFarmError("Error connecting to VFS Server: <%s>" % (ans, ))
class DiskFarmClient: def __init__(self, cfg=None): if isinstance(cfg, str): from dfconfig import DFConfig cfg = DFConfig(cfg, 'DFARM_CONFIG') else: assert isinstance(cfg, dict) self.Cfg = cfg self.CAddr = (cfg["cell"]['broadcast'], cfg["cell"]['listen_port']) self.FarmName = cfg['cell']['farm_name'] self.DAddr = (cfg['vfssrv']['host'], cfg['vfssrv']['api_port']) self.NodeList = list(cfg['cell_class'].keys()) if not self.NodeList: self.NodeList = [] self.NodeAddrMap = {} domain = cfg['cell']['domain'] if domain and domain[0] != '.': domain = '.' + domain for n in self.NodeList: self.NodeAddrMap[n] = n + domain self.DSock = None self.DStr = None self.Username = None for i in range(10): try: self.Username = pwd.getpwuid(os.getuid())[0] except: time.sleep(1) else: break if not self.Username: raise ValueError( 'Can not determine clients username. Possible NIS problem') def connect(self): if self.DStr != None: return connected = 0 tmo = 10 retry = 10 while retry > 0 and not connected: self.DSock = socket(AF_INET, SOCK_STREAM) try: self.DSock.connect(self.DAddr) except: self.DSock.close() retry = retry - 1 # server is out, let's wait and retry if retry: time.sleep(10) else: connected = 1 if not connected: raise DiskFarmError("Can not connect to VFS Server") self.DStr = SockStream(self.DSock) ans = self.DStr.sendAndRecv('HELLO %s' % self.Username) if ans != 'OK': self.disconnect() raise DiskFarmError("Error connecting to VFS Server: <%s>" % (ans, )) def disconnect(self): if self.DSock != None: self.DSock.close() self.DSock = None if self.DStr != None: self.DStr = None def open(self, lpath, mode): info = self.getInfo(lpath) if not info or info.Type != 'f': raise DiskFarmError('File not found') h = FileHandle(self, info) sts = h.open(mode) if not sts: raise DiskFarmError('Open failed') return h def holdNodes(self, nlst): if type(nlst) != type([]): nlst = [nlst] self.connect() ans = self.DStr.sendAndRecv('HOLD %s' % " ".join(nlst)) if not ans: return None, 'Connection closed' words = ans.split(None, 1) if words[0] == 'OK': return 1, 'OK' else: return 0, words[1] def releaseNodes(self, nlst): if type(nlst) != type([]): nlst = [nlst] self.connect() ans = self.DStr.sendAndRecv('RELEASE %s' % " ".join(nlst)) if not ans: return None, 'Connection closed' words = ans.split(None, 1) if words[0] == 'OK': return 1, 'OK' else: return 0, words[1] def replicateNodes(self, mult, nlst): if type(nlst) != type([]): nlst = [nlst] self.connect() ans = self.DStr.sendAndRecv('REPNODE %d %s' % (mult, " ".join(nlst))) if not ans: return None, 'Connection closed' words = ans.split(None, 1) if words[0] == 'OK': return 1, 'OK' else: return 0, words[1] def replicateFile(self, lpath, mult): self.connect() ans = self.DStr.sendAndRecv('RR %s %s' % (lpath, mult)) self.disconnect() words = ans.split(None, 1) if words[0] != 'OK': return 0, ans return 1, 'OK' def getUsage(self, user): self.connect() ans = self.DStr.sendAndRecv('USAGE %s' % user) self.disconnect() if not ans: return None, 'Connection closed' words = ans.split() if words[0] != 'OK': return None, words[1] if len(words) < 4: return None, 'Protocol error: <%s>' % ans return eval(words[1]), eval(words[2]), eval(words[3]) def listFiles(self, dir='/'): self.connect() ans = self.DStr.sendAndRecv('LIST %s' % dir) words = ans.split() if not words or words[0] != 'OK': self.disconnect() return ans, [] lst = [] #print 'listFiles: ans: <%s>' % ans while ans and ans != '.': #print 'listFiles: ans: <%s>' % ans # ans: <fn> <typ> [<info>] words = ans.split(None, 2) if len(words) >= 2: fn, typ = tuple(words[:2]) info = None if words[2:]: if typ == 'f': info = VFSFileInfo(fn, words[2]) else: info = VFSDirInfo(fn, words[2]) lst.append((fn, typ, info)) ans = self.DStr.recv() self.disconnect() return 'OK', lst def getInfo(self, lpath): self.connect() ans = self.DStr.sendAndRecv('GET %s' % lpath) self.disconnect() if not ans: return None, 'Connection closed' words = ans.split(None, 1) if words[0] != 'OK': return None, words[1] words = words[1].split(None, 1) typ = words[0] info = None if typ == 'f': info = VFSFileInfo(lpath, words[1]) elif typ == 'd': info = VFSDirInfo(lpath, words[1]) return info, '' def getType(self, lpath): self.connect() ans = self.DStr.sendAndRecv('GETT %s' % lpath) self.disconnect() if not ans: raise DiskFarmError('Connection closed') words = ans.split(None, 1) if words[0] != 'OK': return '' return words[1] def exists(self, lpath): return self.getType(lpath) != '' def isDir(self, lpath): return self.getType(lpath) == 'd' def isFile(self, lpath): return self.getType(lpath) == 'f' def chmod(self, lpath, prot): self.connect() ans = self.DStr.sendAndRecv('CHMOD %s %s' % (lpath, prot)) self.disconnect() if not ans: raise DiskFarmError('Connection closed') words = ans.split(None, 1) if words[0] != 'OK': return 0, ans return 1, 'OK' def setAttr(self, lpath, attr, value): self.connect() ans = self.DStr.sendAndRecv('SATTR %s %s %s' % (lpath, attr, value)) self.disconnect() if not ans: raise DiskFarmError('Connection closed') words = ans.split(None, 1) if words[0] != 'OK': return 0, ans return 1, 'OK' def fileInfo(self, lpath, path, size=None): info = VFSFileInfo(lpath) if path: st = os.stat(path) info.setActualSize(st[stat.ST_SIZE]) elif size != None: info.setActualSize(size) if info.Size != None: if info.Size >= 2 * 1024 * 1024 * 1024: raise ValueError('Source file is too large, >= 2GB') return info def dirInfo(self, lpath): info = VFSDirInfo(lpath) return info def createFile(self, info, ncopies): lpath = info.Path self.connect() info.CTime = 0 ans = self.DStr.sendAndRecv('MF %s %s %s' % (lpath, ncopies, info.serialize())) self.disconnect() words = ans.split(None, 1) if words[0] != 'OK': return None, words[1] else: i1 = VFSFileInfo(lpath, words[1]) return i1, '' def delFile(self, lpath): self.connect() ans = self.DStr.sendAndRecv('DF %s' % lpath) self.disconnect() words = ans.split(None, 1) if words[0] != 'OK': return 0, ans return 1, 'OK' def delDir(self, lpath): self.connect() ans = self.DStr.sendAndRecv('DD %s' % lpath) self.disconnect() words = ans.split(None, 1) if words[0] != 'OK': return 0, ans return 1, 'OK' def recursiveRemoveDir(self, path): sts, lst = self.listFiles(path) if sts != 'OK': return 0, sts subdirs = [] for lp, t, info in lst: fpath = path + '/' + lp if t == 'd': subdirs.append(fpath) else: #print 'deleting %s' % fpath sts, err = self.delFile(fpath) if not sts: return sts, 'Error deleting %s: %s' % (fpath, err) #print 'subdirs: ', subdirs for subdir in subdirs: sts, err = self.recursiveRemoveDir(subdir) if not sts: return sts, err sts, err = self.delDir(path) return sts, err def makeDir(self, lpath, info): self.connect() ans = self.DStr.sendAndRecv('MD %s %s' % (lpath, info.serialize())) self.disconnect() words = ans.split(None, 1) if words[0] != 'OK': return 0, ans return 1, 'OK' def localDataPath(self, lpath, info): sock = socket(AF_INET, SOCK_DGRAM) r = [] retry = 5 while retry > 0 and not r: msg = to_bytes('DPATH %s %s %s' % (self.FarmName, lpath, info.CTime)) try: sock.sendto(msg, ('127.0.0.1', self.CAddr[1])) except: break r, w, e = select.select([sock], [], [], 3) retry = retry - 1 ans = None if r: ans, addr = sock.recvfrom(10000) if not ans: ans = None sock.close() return to_str(ans) def cellInfo(self, node): sock = socket(AF_INET, SOCK_DGRAM) sock.sendto(to_bytes('STATPSA %s' % self.FarmName), (node, self.CAddr[1])) r, w, e = select.select([sock], [], [], 30) if not r: sock.close() return None ans, addr = sock.recvfrom(100000) ans = to_str(ans) lines = ans.split('\n') st = CellInfo(node) # parse PSAs psalst = [] while lines: l = lines[0] lines = lines[1:] if l == '.': break words = l.split() if len(words) < 5: continue psn, size, used, rsrvd, free = tuple(words[:5]) size = int(size) used = int(used) rsrvd = int(rsrvd) free = int(free) psalst.append((psn, size, used, rsrvd, free)) st.PSAs = psalst # parse transactions txlst = [] while lines: l = lines[0] lines = lines[1:] if l == '.': break words = l.split() if len(words) < 3: continue txlst.append(tuple(words[:3])) st.Txns = txlst return st def ping(self, pongcbk=None, donecbk=None): pinger = Pinger(self.FarmName, self.NodeAddrMap, self.CAddr[1]) lst = [] for cid, addr, delay, status in pinger: if pongcbk is not None: pongcbk(addr, cid, delay, status) lst.append((cid, addr, delay, status)) if donecbk is not None: donecbk(lst) return lst def get(self, info, fn, nolocal=True, tmo=None): data_client = DataClient(self.CAddr, self.FarmName) return data_client.get(info, fn, nolocal, tmo) def put(self, info, fn, ncopies=1, nolocal=True, tmo=None): data_client = DataClient(self.CAddr, self.FarmName) return data_client.put(info, fn, ncopies, nolocal, tmo) def open(self, info, mode, ncopies=1, nolocal=True, tmo=None): assert mode in ("r", "w") data_client = DataClient(self.CAddr, self.FarmName) if mode == "r": return data_client.openRead(info, nolocal=nolocal, tmo=tmo) elif mode == "w": return data_client.openWrite(info, ncopies=ncopies, nolocal=nolocal, tmo=tmo)
class VFSSrvIF(PyThread, Logged): def __init__(self, myid, cfg, storage): PyThread.__init__(self) self.ID = myid self.DSAddr = (cfg['host'], cfg['cellif_port']) self.Connected = 0 self.Reconciled = 0 self.LastIdle = 0 self.NextReconnect = 0 self.NextProbeTime = 0 #self.connect() self.CellStorage = storage def connect(self): self.Sock = socket(AF_INET, SOCK_STREAM) try: self.Sock.connect(self.DSAddr) except: self.log('can not connect to VFS Server') return False self.debug("connected as %s" % (self.Sock.getsockname(),)) self.Str = SockStream(self.Sock) ans = self.Str.sendAndRecv('HELLO %s' % self.ID) self.log('connect: HELLO -> %s' % ans) if ans == 'HOLD': self.CellStorage.hold() elif ans != 'OK': if ans == 'EXIT': self.log('Shot down by VFS Server') sys.exit(3) return False self.Connected = True return True def batchedFileList(self, nfull=100): batch = [] for lp, info in self.CellStorage.listFiles(): if info: batch.append(info) if len(batch) >= nfull: yield batch batch = [] if len(batch): yield batch def reconcile(self): for batch in self.batchedFileList(): if batch: lines = ("%s %s %d" % (info.Path, info.CTime, info.Size) for info in batch) self.Str.send("IHAVE %s" % (','.join(lines),)) self.Str.send('SYNC') return True def run(self): while True: if not self.connect(): time.sleep(0.5 + random.random()) continue self.log("connected") if not self.reconcile(): self.disconnect() time.sleep(0.5 + random.random()) continue self.log("reconciled") eof = False while not eof: msg = self.Str.recv() self.log('doRead: msg: [%s]' % msg) if not msg: eof = True else: words = msg.split() if words[0] == 'SYNC': self.Reconciled = 1 self.log('reconcile confirmed') elif words[0] == 'DEL': if not words[1:]: self.disconnect() lp = words[1] self.CellStorage.delFile(lp) elif words[0] == 'HOLD': self.CellStorage.hold() elif words[0] == 'RELEASE': self.CellStorage.release() elif words[0] == 'REPLICATE': self.doReplicate(words[1:]) else: # ??? eof = True self.disconnect() def doReplicate(self, args): # args: (<path>|*) <nfrep> if len(args) < 2: return path = args[0] mult = int(args[1]) if path == '*': self.CellStorage.replicateAll(mult) else: self.CellStorage.replicateFile(path, mult) def disconnect(self): self.Reconciled = False if self.Str: self.Sock.close() self.Sock = None self.Str = None self.Connected = False def probe(self): if not self.Connected or time.time() < self.NextProbeTime: return self.Str.probe() self.NextProbeTime = time.time() + 300 def sendIHave(self, lpath, info): self.log("sendIHave...") if self.Connected: sizestr = '%s' % info.Size if sizestr[-1] == 'L': sizestr = sizestr[:-1] self.Str.send('IHAVE %s %s %s' % (lpath, info.CTime, sizestr)) self.log("sent IHAVE %s %s %s" % (lpath, info.CTime, sizestr))