def main(argv): if argv: zoo_ip = str(argv[0]) else: zoo_ip = ZOO_IP chunkserver = zchunkserver.ZChunkserver(zoo_ip=zoo_ip) reg_num = int(chunkserver.chunkloc) s = zerorpc.Server(chunkserver) port = 4400 + reg_num address = 'tcp://%s:%d' % (zutils.get_myip(), port) try: print 'Registering chunkserver %d on at %s' % (reg_num, address) s.bind(address) s.run() except ZMQError as e: print "Unable to start server: " + e.strerror s.close() sys.exit(2) except KeyboardInterrupt: pass finally: print 'Closing server on %s' % address s.close()
def main(argv): if argv: zoo_ip = str(argv[0]) else: zoo_ip = ZOO_IP chunkserver = watcher.Watcher(zoo_ip=zoo_ip, port=PORT) s = zerorpc.Server(chunkserver) address = 'tcp://%s:%s' % (zutils.get_myip(), PORT) try: print 'Registering watcher at %s' % address s.bind(address) s.run() except ZMQError as e: print "Unable to start watcher: " + e.strerror s.close() sys.exit(2) except KeyboardInterrupt: pass finally: print 'Closing watcher on %s' % address s.close()
def __init__(self, zoo_ip='localhost:2181', master_port=1400): self.lock = threading.RLock() # lock for modifying metadata self.num_chunkservers = 0 self.last_updated = 0 # time since last stats poll self.ip = zutils.get_myip() + ':' + str(master_port) # self.chunksize = 10 self.chunkrobin = 0 self.versntable = {} # file version counter # self.filetable = {'#garbage_collection#': {'0000000024': [ # 'abc$%#0$%#27a04c46-9c4f-11e5-92b7-000c29c12a87']}} # file to chunk mapping self.filetable = {'#garbage_collection#': {}} self.chunktable = {} # chunkuuid to chunkloc mapping self.chunkservers = {} # loc id to chunkserver mapping self.no_replica = 3 # self.init_chunkservers() self.chunkclients = {} # zerorpc clients connected to chunkservers self.chunkstats = {} # stats for capacity and network load self.chunksize = {} # filename -> chunksize mapping self.zookeeper = KazooClient(hosts=zoo_ip) self._register_with_zookeeper(master_port) # this schedules background tasks in separate thread scheduler = BackgroundScheduler() scheduler.add_job(self.collect_garbage, 'interval', minutes=10) scheduler.add_job(self.replicate, 'interval', minutes=5) scheduler.start()
def __init__(self, zoo_ip='localhost:2181', port=PORT): self.lock = threading.RLock() self.chunkservers = {} self.master_address = None self.garbage_table = {'#garbage_collection#': {}} self.zookeeper = KazooClient(hosts=zoo_ip) self._register_with_zookeeper(port) global SERVER if zoo_ip == 'localhost': zoo_ip = zutils.get_myip() SERVER = SERVER.format(zoo_ip) global MASTER MASTER = MASTER.format(zoo_ip)
def _register_with_zookeeper(self, port): try: self.zookeeper.start() address = "tcp://%s:%s" % (zutils.get_myip(), port) self.zookeeper.ensure_path('watcher') self.zookeeper.set('watcher', address) # self.master_address = self.zookeeper.get('master')[0].split('@')[-1] master_ip = self.zookeeper.get('master')[0] self.master_address = self.convert_zookeeper_ip(master_ip) print 'master address', self.master_address, master_ip except: print 'Unable to connect to zookeeper, shutting down' sys.exit(2) def watch_it(event): path = event.path # get chunkserver_ip = uname@tcp://ip:port, convert to uname@ip for ssh try: chunkserver_ip = self.convert_zookeeper_ip( self.zookeeper.get(path)[0]) chunkserver_num = path[path.rfind('/') + 1:] except: print 'Error registering chunkserver' return False print 'Registering chunkserver num %s as %s' % (chunkserver_num, chunkserver_ip) self._register_chunkserver(chunkserver_num, chunkserver_ip) @self.zookeeper.ChildrenWatch(MASTER_PATH) def watch_master(children): children = self.zookeeper.get_children('master') if children: self.master_address = self.convert_zookeeper_ip( self.zookeeper.get('master')[0]) print '\nMaster down - attempting to recover', self.master_address else: if ssh(self.master_address, MASTER): print 'Another master successfully started' else: print 'Could not recover master' @self.zookeeper.ChildrenWatch(CHUNKSERVER_PATH) def watch_chunkservers(children): if len(children) > len(self.chunkservers): print "New chunkserver(s) detected" # This creates a watch function for each new chunk server, where the # master waits to register until the data(ip address) is updated new_chunkservers = [ chunkserver_num for chunkserver_num in children if chunkserver_num not in self.chunkservers ] for chunkserver_num in new_chunkservers: try: # zoo_ip = self.zookeeper.get(CHUNKSERVER_PATH + chunkserver_num, # watch=watch_it)[0] zoo_ip = self.zookeeper.get(CHUNKSERVER_PATH + chunkserver_num)[0] print 'zoo ip ', zoo_ip # chunkserver_ip = self.convert_zookeeper_ip( # self.zookeeper.get(CHUNKSERVER_PATH + chunkserver_num)[0]) if not zoo_ip: print 'no ip yet, watching for it' self.zookeeper.exists(CHUNKSERVER_PATH + chunkserver_num, watch=watch_it) else: chunkserver_ip = self.convert_zookeeper_ip(zoo_ip) self._register_chunkserver(chunkserver_num, chunkserver_ip) except Exception as ex: self.print_exception( 'watch children, adding chunkserver', ex) elif len(children) < len(self.chunkservers): try: removed_servers = [ chunkserver_num for chunkserver_num in self.chunkservers if chunkserver_num not in children ] for chunkserver_num in removed_servers: if ssh(self.chunkservers[chunkserver_num], SERVER): print "Another chunkserver to replace %s " % chunkserver_num else: print 'Failed to recover from cs num %s failure' % chunkserver_num self._unregister_chunkserver(chunkserver_num) except Exception as ex: self.print_exception('Removing chunkserver', ex) finally: pass
def _register_with_zookeeper(self, master_port): try: self.zookeeper.start() address = "tcp://%s:%s" % (zutils.get_myip(), master_port) # use ephemeral node for shadow master to subscribe to later # self.zookeeper.create('master', ephemeral=True, value=address) self.zookeeper.ensure_path('master') self.zookeeper.ensure_path('chunkserver') self.zookeeper.create('master/0', ephemeral=True) data = '{username}@{tcpip}'.format(username=getpass.getuser(), tcpip=address) self.zookeeper.set('master', data) # registers chunkserver with master when ip set on zookeeper def watch_ip(event): path = event.path # chunkserver_ip = self.zookeeper.get(path)[0] ~ changed to username@[tcp:ip] chunkserver_ip = self.zookeeper.get(path)[0].split('@')[-1] chunkserver_num = path[path.rfind('/') + 1:] print "New IP %s detected in chunknum %s" % (chunkserver_ip, chunkserver_num) self._register_chunkserver(chunkserver_num, chunkserver_ip) @self.zookeeper.ChildrenWatch(CHUNKSERVER_PATH) def watch_children(children): if len(children) > len(self.chunkservers): print "New chunkserver(s) detected" # This creates a watch function for each new chunk server, where the # master waits to register until the data(ip address) is updated new_chunkservers = [ chunkserver_num for chunkserver_num in children if chunkserver_num not in self.chunkservers ] for chunkserver_num in new_chunkservers: try: # ~ changed to username@[tcp:ip] # chunkserver_ip = self.zookeeper.get(CHUNKSERVER_PATH + # chunkserver_num)[0] chunkserver_ip = self.zookeeper.get( CHUNKSERVER_PATH + chunkserver_num)[0].split('@')[-1] # if IP is not set yet, assign watcher to wait if len(chunkserver_ip) == 0: self.zookeeper.exists(CHUNKSERVER_PATH + chunkserver_num, watch=watch_ip) else: self._register_chunkserver( chunkserver_num, chunkserver_ip) except Exception as ex: self.print_exception( 'watch children, adding chunkserver', ex) elif len(children) < len(self.chunkservers): self.lock.acquire() try: removed_servers = [ chunkserver_num for chunkserver_num in self.chunkservers if chunkserver_num not in children ] for chunkserver_num in removed_servers: self._unregister_chunkserver(chunkserver_num) print "Chunkserver %s was removed" % chunkserver_num self.num_chunkservers = len(self.chunkservers) #print "Now %d chunksrv" % self.num_chunkservers #print "Calling replicate directly" self.replicate() except Exception as ex: self.print_exception('Removing chunkserver', ex) finally: self.lock.release() except Exception as e: self.print_exception('connecting to zookeeper', e) print "Unable to connect to zookeeper - master shutting down" sys.exit(2)