def allOff(): """ Turn off all slice HTBs """ # Get/set special slice IDs root_xid = bwlimit.get_xid("root") default_xid = bwlimit.get_xid("default") kernelhtbs = gethtbs(root_xid, default_xid) if len(kernelhtbs): logger.log("bwmon: Disabling all running HTBs.") for htb in list(kernelhtbs.keys()): bwlimit.off(htb, dev = dev_default)
def allOff(): """ Turn off all slice HTBs """ # Get/set special slice IDs root_xid = bwlimit.get_xid("root") default_xid = bwlimit.get_xid("default") kernelhtbs = gethtbs(root_xid, default_xid) if len(kernelhtbs): logger.log("bwmon: Disabling all running HTBs.") for htb in kernelhtbs.keys(): bwlimit.off(htb, dev=dev_default)
def __init__(self, rec): self.name = rec['name'] logger.verbose ('sliver_libvirt: {} init'.format(self.name)) # Assume the directory with the image and config files # are in place self.keys = '' self.rspec = {} self.slice_id = rec['slice_id'] self.enabled = True self.conn = Sliver_Libvirt.getConnection(rec['type']) self.xid = bwlimit.get_xid(self.name) dom = None try: dom = self.conn.lookupByName(self.name) except: logger.log('sliver_libvirt: Domain {} does not exist. ' \ 'Will try to create it again.'.format(self.name)) self.__class__.create(rec['name'], rec) dom = self.conn.lookupByName(self.name) self.dom = dom
def __init__(self, rec): self.name = rec['name'] logger.verbose ('sliver_libvirt: %s init'%(self.name)) # Assume the directory with the image and config files # are in place self.keys = '' self.rspec = {} self.slice_id = rec['slice_id'] self.enabled = True self.conn = Sliver_Libvirt.getConnection(rec['type']) self.xid = bwlimit.get_xid(self.name) dom = None try: dom = self.conn.lookupByName(self.name) except: logger.log('sliver_libvirt: Domain %s does not exist. ' \ 'Will try to create it again.' % (self.name)) self.__class__.create(rec['name'], rec) dom = self.conn.lookupByName(self.name) self.dom = dom
def create_lxc(name, rec=None): ''' Create dirs, copy fs image, creat_lxc ''' logger.verbose ('sliver_lxc: %s create'%(name)) conn = libvirt.open("lxc://") try: p = conn.lookupByName(name) logger.log("create_lxc: there is already a running vm %s!"%(name)) return except: logger.log("create a new sliver %s"%(name)) #return # Get the type of image from vref myplc tags specified as: # pldistro = lxc # fcdistro = squeeze # arch x86_64 arch = 'x86_64' #tags = rec['rspec']['tags'] #if 'arch' in tags: # arch = tags['arch'] # if arch == 'i386': # arch = 'i686' vref = "lxc-f18-x86_64" #vref = rec['vref'] #if vref is None: # vref = "lxc-f14-x86_64" # logger.log("sliver_libvirt: %s: WARNING - no vref attached, using hard-wired default %s" % (name,vref)) refImgDir = os.path.join('/vservers/.lvref', vref) containerDir = os.path.join('/vservers', name) # check the template exists -- there's probably a better way.. if not os.path.isdir(refImgDir): logger.log('creat_lxc: %s: ERROR Could not create sliver - reference image %s not found' % (name,vref)) logger.log('creat_lxc: %s: ERROR Expected reference image in %s'%(name,refImgDir)) return # Snapshot the reference image fs (assume the reference image is in its own # subvolume) command = ['btrfs', 'subvolume', 'snapshot', refImgDir, containerDir] if not logger.log_call(command, timeout=15*60): logger.log('creat_lxc: ERROR Could not create BTRFS snapshot at', containerDir) return command = ['chmod', '755', containerDir] logger.log_call(command, timeout=15*60) # TODO: set quotas... # Set hostname. A valid hostname cannot have '_' #with open(os.path.join(containerDir, 'etc/hostname'), 'w') as f: # print >>f, name.replace('_', '-') # Add slices group if not already present try: group = grp.getgrnam('slices') except: command = ['/usr/sbin/groupadd', 'slices'] logger.log_call(command, timeout=15*60) # Add unix account (TYPE is specified in the subclass) command = ['/usr/sbin/useradd', '-g', 'slices', '-s', '/usr/sbin/vsh', name, '-p', '*'] logger.log_call(command, timeout=15*60) command = ['mkdir', '/home/%s/.ssh'%name] logger.log_call(command, timeout=15*60) # Create PK pair keys to connect from the host to the guest without # password... maybe remove the need for authentication inside the # guest? command = ['su', '-s', '/bin/bash', '-c', 'ssh-keygen -t rsa -N "" -f /home/%s/.ssh/id_rsa'%(name)] logger.log_call(command, timeout=60) command = ['chown', '-R', '%s.slices'%name, '/home/%s/.ssh'%name] logger.log_call(command, timeout=30) command = ['mkdir', '%s/root/.ssh'%containerDir] logger.log_call(command, timeout=10) command = ['cp', '/home/%s/.ssh/id_rsa.pub'%name, '%s/root/.ssh/authorized_keys'%containerDir] logger.log_call(command, timeout=30) logger.log("creating /etc/slicename file in %s" % os.path.join(containerDir,'etc/slicename')) try: file(os.path.join(containerDir,'etc/slicename'), 'w').write(name) except: logger.log_exc("exception while creating /etc/slicename") try: file(os.path.join(containerDir,'etc/slicefamily'), 'w').write(vref) except: logger.log_exc("exception while creating /etc/slicefamily") uid = None try: uid = getpwnam(name).pw_uid except KeyError: # keyerror will happen if user id was not created successfully logger.log_exc("exception while getting user id") if uid is not None: logger.log("uid is %d" % uid) command = ['mkdir', '%s/home/%s' % (containerDir, name)] logger.log_call(command, timeout=10) command = ['chown', name, '%s/home/%s' % (containerDir, name)] logger.log_call(command, timeout=10) etcpasswd = os.path.join(containerDir, 'etc/passwd') etcgroup = os.path.join(containerDir, 'etc/group') if os.path.exists(etcpasswd): # create all accounts with gid=1001 - i.e. 'slices' like it is in the root context slices_gid=1001 logger.log("adding user %(name)s id %(uid)d gid %(slices_gid)d to %(etcpasswd)s" % (locals())) try: file(etcpasswd,'a').write("%(name)s:x:%(uid)d:%(slices_gid)d::/home/%(name)s:/bin/bash\n" % locals()) except: logger.log_exc("exception while updating %s"%etcpasswd) logger.log("adding group slices with gid %(slices_gid)d to %(etcgroup)s"%locals()) try: file(etcgroup,'a').write("slices:x:%(slices_gid)d\n"%locals()) except: logger.log_exc("exception while updating %s"%etcgroup) sudoers = os.path.join(containerDir, 'etc/sudoers') if os.path.exists(sudoers): try: file(sudoers,'a').write("%s ALL=(ALL) NOPASSWD: ALL\n" % name) except: logger.log_exc("exception while updating /etc/sudoers") # customizations for the user environment - root or slice uid # we save the whole business in /etc/planetlab.profile # and source this file for both root and the slice uid's .profile # prompt for slice owner, + LD_PRELOAD for transparently wrap bind pl_profile=os.path.join(containerDir,"etc/planetlab.profile") ld_preload_text="""# by default, we define this setting so that calls to bind(2), # when invoked on 0.0.0.0, get transparently redirected to the public interface of this node # see https://svn.planet-lab.org/wiki/LxcPortForwarding""" usrmove_path_text="""# VM's before Features/UsrMove need /bin and /sbin in their PATH""" usrmove_path_code=""" pathmunge () { if ! echo $PATH | /bin/egrep -q "(^|:)$1($|:)" ; then if [ "$2" = "after" ] ; then PATH=$PATH:$1 else PATH=$1:$PATH fi fi } pathmunge /bin after pathmunge /sbin after unset pathmunge """ with open(pl_profile,'w') as f: f.write("export PS1='%s@\H \$ '\n"%(name)) f.write("%s\n"%ld_preload_text) f.write("export LD_PRELOAD=/etc/planetlab/lib/bind_public.so\n") f.write("%s\n"%usrmove_path_text) f.write("%s\n"%usrmove_path_code) # make sure this file is sourced from both root's and slice's .profile enforced_line = "[ -f /etc/planetlab.profile ] && source /etc/planetlab.profile\n" for path in [ 'root/.profile', 'home/%s/.profile'%name ]: from_root=os.path.join(containerDir,path) # if dir is not yet existing let's forget it for now if not os.path.isdir(os.path.dirname(from_root)): continue found=False try: contents=file(from_root).readlines() for content in contents: if content==enforced_line: found=True except IOError: pass if not found: with open(from_root,"a") as user_profile: user_profile.write(enforced_line) # in case we create the slice's .profile when writing if from_root.find("/home")>=0: command=['chown','%s:slices'%name,from_root] logger.log_call(command,timeout=5) # Lookup for xid and create template after the user is created so we # can get the correct xid based on the name of the slice xid = bwlimit.get_xid(name) # Template for libvirt sliver configuration template_filename_sliceimage = os.path.join('/vservers/.lvref','lxc_template.xml') if os.path.isfile (template_filename_sliceimage): logger.log("WARNING: using compat template %s"%template_filename_sliceimage) template_filename=template_filename_sliceimage else: logger.log("Cannot find XML template %s"%template_filename_sliceimage) return interfaces = get_interfaces_xml(rec) try: with open(template_filename) as f: template = Template(f.read()) xml = template.substitute(name=name, xid=xid, interfaces=interfaces, arch=arch) except IOError: logger.log('Failed to parse or use XML template file %s'%template_filename) return # Lookup for the sliver before actually # defining it, just in case it was already defined. try: dom = conn.lookupByName(name) except: dom = conn.defineXML(xml) def debuginfo(dom): ''' Helper method to get a "nice" output of the info struct for debug''' [state, maxmem, mem, ncpu, cputime] = dom.info() return '%s is %s, maxmem = %s, mem = %s, ncpu = %s, cputime = %s' % (dom.name(), STATES.get(state, state), maxmem, mem, ncpu, cputime) logger.verbose('create_lxc: %s -> %s'%(name, debuginfo(dom))) # the sliver has been created # then configure the sliver, write keys to authorized_keys file configure(name, rec) # finally, start the sliver dom.create() logger.verbose('create_lxc:%s start'%(name)) # After the VM is started... we can play with the virtual interface # Create the ebtables rule to mark the packets going out from the virtual # interface to the actual device so the filter canmatch against the mark bwlimit.ebtables("-A INPUT -i veth%d -j mark --set-mark %d" % \ (xid, xid))
def create(name, rec=None): ''' Create dirs, copy fs image, lxc_create ''' logger.verbose('sliver_lxc: %s create' % (name)) conn = Sliver_Libvirt.getConnection(Sliver_LXC.TYPE) # Get the type of image from vref myplc tags specified as: # pldistro = lxc # fcdistro = squeeze # arch x86_64 arch = 'x86_64' tags = rec['rspec']['tags'] if 'arch' in tags: arch = tags['arch'] if arch == 'i386': arch = 'i686' vref = rec['vref'] if vref is None: vref = "lxc-f14-x86_64" logger.log( "sliver_libvirt: %s: WARNING - no vref attached, using hard-wired default %s" % (name, vref)) refImgDir = os.path.join(Sliver_LXC.REF_IMG_BASE_DIR, vref) containerDir = os.path.join(Sliver_LXC.CON_BASE_DIR, name) # check the template exists -- there's probably a better way.. if not os.path.isdir(refImgDir): logger.log( 'sliver_lxc: %s: ERROR Could not create sliver - reference image %s not found' % (name, vref)) logger.log('sliver_lxc: %s: ERROR Expected reference image in %s' % (name, refImgDir)) return # Snapshot the reference image fs (assume the reference image is in its own # subvolume) command = ['btrfs', 'subvolume', 'snapshot', refImgDir, containerDir] if not logger.log_call(command, timeout=15 * 60): logger.log('sliver_lxc: ERROR Could not create BTRFS snapshot at', containerDir) return command = ['chmod', '755', containerDir] logger.log_call(command, timeout=15 * 60) # TODO: set quotas... # Set hostname. A valid hostname cannot have '_' #with open(os.path.join(containerDir, 'etc/hostname'), 'w') as f: # print >>f, name.replace('_', '-') # Add slices group if not already present try: group = grp.getgrnam('slices') except: command = ['/usr/sbin/groupadd', 'slices'] logger.log_call(command, timeout=15 * 60) # Add unix account (TYPE is specified in the subclass) command = [ '/usr/sbin/useradd', '-g', 'slices', '-s', Sliver_LXC.SHELL, name, '-p', '*' ] logger.log_call(command, timeout=15 * 60) command = ['mkdir', '/home/%s/.ssh' % name] logger.log_call(command, timeout=15 * 60) # Create PK pair keys to connect from the host to the guest without # password... maybe remove the need for authentication inside the # guest? command = [ 'su', '-s', '/bin/bash', '-c', 'ssh-keygen -t rsa -N "" -f /home/%s/.ssh/id_rsa' % (name) ] logger.log_call(command, timeout=60) command = ['chown', '-R', '%s.slices' % name, '/home/%s/.ssh' % name] logger.log_call(command, timeout=30) command = ['mkdir', '%s/root/.ssh' % containerDir] logger.log_call(command, timeout=10) command = [ 'cp', '/home/%s/.ssh/id_rsa.pub' % name, '%s/root/.ssh/authorized_keys' % containerDir ] logger.log_call(command, timeout=30) logger.log("creating /etc/slicename file in %s" % os.path.join(containerDir, 'etc/slicename')) try: file(os.path.join(containerDir, 'etc/slicename'), 'w').write(name) except: logger.log_exc("exception while creating /etc/slicename") try: file(os.path.join(containerDir, 'etc/slicefamily'), 'w').write(vref) except: logger.log_exc("exception while creating /etc/slicefamily") uid = None try: uid = getpwnam(name).pw_uid except KeyError: # keyerror will happen if user id was not created successfully logger.log_exc("exception while getting user id") if uid is not None: logger.log("uid is %d" % uid) command = ['mkdir', '%s/home/%s' % (containerDir, name)] logger.log_call(command, timeout=10) command = ['chown', name, '%s/home/%s' % (containerDir, name)] logger.log_call(command, timeout=10) etcpasswd = os.path.join(containerDir, 'etc/passwd') etcgroup = os.path.join(containerDir, 'etc/group') if os.path.exists(etcpasswd): # create all accounts with gid=1001 - i.e. 'slices' like it is in the root context slices_gid = 1001 logger.log( "adding user %(name)s id %(uid)d gid %(slices_gid)d to %(etcpasswd)s" % (locals())) try: file(etcpasswd, 'a').write( "%(name)s:x:%(uid)d:%(slices_gid)d::/home/%(name)s:/bin/bash\n" % locals()) except: logger.log_exc("exception while updating %s" % etcpasswd) logger.log( "adding group slices with gid %(slices_gid)d to %(etcgroup)s" % locals()) try: file(etcgroup, 'a').write("slices:x:%(slices_gid)d\n" % locals()) except: logger.log_exc("exception while updating %s" % etcgroup) sudoers = os.path.join(containerDir, 'etc/sudoers') if os.path.exists(sudoers): try: file(sudoers, 'a').write("%s ALL=(ALL) NOPASSWD: ALL\n" % name) except: logger.log_exc("exception while updating /etc/sudoers") # customizations for the user environment - root or slice uid # we save the whole business in /etc/planetlab.profile # and source this file for both root and the slice uid's .profile # prompt for slice owner, + LD_PRELOAD for transparently wrap bind pl_profile = os.path.join(containerDir, "etc/planetlab.profile") ld_preload_text = """# by default, we define this setting so that calls to bind(2), # when invoked on 0.0.0.0, get transparently redirected to the public interface of this node # see https://svn.planet-lab.org/wiki/LxcPortForwarding""" usrmove_path_text = """# VM's before Features/UsrMove need /bin and /sbin in their PATH""" usrmove_path_code = """ pathmunge () { if ! echo $PATH | /bin/egrep -q "(^|:)$1($|:)" ; then if [ "$2" = "after" ] ; then PATH=$PATH:$1 else PATH=$1:$PATH fi fi } pathmunge /bin after pathmunge /sbin after unset pathmunge """ with open(pl_profile, 'w') as f: f.write("export PS1='%s@\H \$ '\n" % (name)) f.write("%s\n" % ld_preload_text) f.write("export LD_PRELOAD=/etc/planetlab/lib/bind_public.so\n") f.write("%s\n" % usrmove_path_text) f.write("%s\n" % usrmove_path_code) # make sure this file is sourced from both root's and slice's .profile enforced_line = "[ -f /etc/planetlab.profile ] && source /etc/planetlab.profile\n" for path in ['root/.profile', 'home/%s/.profile' % name]: from_root = os.path.join(containerDir, path) # if dir is not yet existing let's forget it for now if not os.path.isdir(os.path.dirname(from_root)): continue found = False try: contents = file(from_root).readlines() for content in contents: if content == enforced_line: found = True except IOError: pass if not found: with open(from_root, "a") as user_profile: user_profile.write(enforced_line) # in case we create the slice's .profile when writing if from_root.find("/home") >= 0: command = ['chown', '%s:slices' % name, from_root] logger.log_call(command, timeout=5) # Lookup for xid and create template after the user is created so we # can get the correct xid based on the name of the slice xid = bwlimit.get_xid(name) # Template for libvirt sliver configuration template_filename_sliceimage = os.path.join( Sliver_LXC.REF_IMG_BASE_DIR, 'lxc_template.xml') if os.path.isfile(template_filename_sliceimage): logger.log("WARNING: using compat template %s" % template_filename_sliceimage) template_filename = template_filename_sliceimage else: logger.log("Cannot find XML template %s" % template_filename_sliceimage) return interfaces = Sliver_Libvirt.get_interfaces_xml(rec) try: with open(template_filename) as f: template = Template(f.read()) xml = template.substitute(name=name, xid=xid, interfaces=interfaces, arch=arch) except IOError: logger.log('Failed to parse or use XML template file %s' % template_filename) return # Lookup for the sliver before actually # defining it, just in case it was already defined. try: dom = conn.lookupByName(name) except: dom = conn.defineXML(xml) logger.verbose('lxc_create: %s -> %s' % (name, Sliver_Libvirt.debuginfo(dom)))
def sync(nmdbcopy): """ Syncs tc, db, and bwmon.pickle. Then, starts new slices, kills old ones, and updates byte accounts for each running slice. Sends emails and caps those that went over their limit. """ # Defaults global DB_FILE, \ period, \ default_MaxRate, \ default_Maxi2Rate, \ default_MaxKByte,\ default_Maxi2KByte,\ default_Share, \ dev_default # All slices names = [] # In case the limits have changed. default_MaxRate = int(bwlimit.get_bwcap(dev_default) / 1000) default_Maxi2Rate = int(bwlimit.bwmax / 1000) # Incase default isn't set yet. if default_MaxRate == -1: default_MaxRate = 1000000 # xxx $Id$ # with svn we used to have a trick to detect upgrades of this file # this has gone with the move to git, without any noticeable effect on operations though try: f = open(DB_FILE, "r+") logger.verbose("bwmon: Loading %s" % DB_FILE) (version, slices, deaddb) = pickle.load(f) f.close() # Check version of data file if version != "$Id$": logger.log("bwmon: Not using old version '%s' data file %s" % (version, DB_FILE)) raise Exception except Exception: version = "$Id$" slices = {} deaddb = {} # Get/set special slice IDs root_xid = bwlimit.get_xid("root") default_xid = bwlimit.get_xid("default") # Since root is required for sanity, its not in the API/plc database, so pass {} # to use defaults. if root_xid not in slices.keys(): slices[root_xid] = Slice(root_xid, "root", {}) slices[root_xid].reset({}, {}) # Used by bwlimit. pass {} since there is no rspec (like above). if default_xid not in slices.keys(): slices[default_xid] = Slice(default_xid, "default", {}) slices[default_xid].reset({}, {}) live = {} # Get running slivers that should be on this node (from plc). {xid: name} # db keys on name, bwmon keys on xid. db doesnt have xid either. for plcSliver in nmdbcopy.keys(): live[bwlimit.get_xid(plcSliver)] = nmdbcopy[plcSliver] logger.verbose("bwmon: Found %s instantiated slices" % live.keys().__len__()) logger.verbose("bwmon: Found %s slices in dat file" % slices.values().__len__()) # Get actual running values from tc. # Update slice totals and bandwidth. {xid: {values}} kernelhtbs = gethtbs(root_xid, default_xid) logger.verbose("bwmon: Found %s running HTBs" % kernelhtbs.keys().__len__()) # The dat file has HTBs for slices, but the HTBs aren't running nohtbslices = set(slices.keys()) - set(kernelhtbs.keys()) logger.verbose("bwmon: Found %s slices in dat but not running." % nohtbslices.__len__()) # Reset tc counts. for nohtbslice in nohtbslices: if live.has_key(nohtbslice): slices[nohtbslice].reset({}, live[nohtbslice]['_rspec']) else: logger.log("bwmon: Removing abondoned slice %s from dat." % nohtbslice) del slices[nohtbslice] # The dat file doesnt have HTB for the slice but kern has HTB slicesnodat = set(kernelhtbs.keys()) - set(slices.keys()) logger.verbose("bwmon: Found %s slices with HTBs but not in dat" % slicesnodat.__len__()) for slicenodat in slicesnodat: # But slice is running if live.has_key(slicenodat): # init the slice. which means start accounting over since kernel # htb was already there. slices[slicenodat] = Slice(slicenodat, live[slicenodat]['name'], live[slicenodat]['_rspec']) # Get new slices. # Slices in GetSlivers but not running HTBs newslicesxids = set(live.keys()) - set(kernelhtbs.keys()) logger.verbose("bwmon: Found %s new slices" % newslicesxids.__len__()) # Setup new slices for newslice in newslicesxids: # Delegated slices dont have xids (which are uids) since they haven't been # instantiated yet. if newslice != None and live[newslice].has_key('_rspec') == True: # Check to see if we recently deleted this slice. if live[newslice]['name'] not in deaddb.keys(): logger.log("bwmon: new slice %s||wangyang add1041" % live[newslice]['name']) # _rspec is the computed rspec: NM retrieved data from PLC, computed loans # and made a dict of computed values. slices[newslice] = Slice(newslice, live[newslice]['name'], live[newslice]['_rspec']) slices[newslice].reset({}, live[newslice]['_rspec']) # Double check time for dead slice in deaddb is within 24hr recording period. elif (time.time() <= (deaddb[live[newslice]['name']]['slice'].time + period)): deadslice = deaddb[live[newslice]['name']] logger.log("bwmon: Reinstantiating deleted slice %s" % live[newslice]['name']) slices[newslice] = deadslice['slice'] slices[newslice].xid = newslice # Start the HTB newvals = { "maxrate": deadslice['slice'].MaxRate * 1000, "minrate": deadslice['slice'].MinRate * 1000, "maxexemptrate": deadslice['slice'].Maxi2Rate * 1000, "usedbytes": deadslice['htb']['usedbytes'] * 1000, "usedi2bytes": deadslice['htb']['usedi2bytes'], "share": deadslice['htb']['share'] } slices[newslice].reset(newvals, live[newslice]['_rspec']) # Bring up to date slices[newslice].update(newvals, live[newslice]['_rspec']) # Since the slice has been reinitialed, remove from dead database. del deaddb[deadslice['slice'].name] del newvals else: logger.log("bwmon: Slice %s doesn't have xid. Skipping." % live[newslice]['name']) # Move dead slices that exist in the pickle file, but # aren't instantiated by PLC into the dead dict until # recording period is over. This is to avoid the case where a slice is dynamically created # and destroyed then recreated to get around byte limits. deadxids = set(slices.keys()) - set(live.keys()) logger.verbose("bwmon: Found %s dead slices" % (deadxids.__len__() - 2)) for deadxid in deadxids: if deadxid == root_xid or deadxid == default_xid: continue logger.log("bwmon: removing dead slice %s " % deadxid) if slices.has_key(deadxid) and kernelhtbs.has_key(deadxid): # add slice (by name) to deaddb logger.log("bwmon: Saving bandwidth totals for %s." % slices[deadxid].name) deaddb[slices[deadxid].name] = { 'slice': slices[deadxid], 'htb': kernelhtbs[deadxid] } del slices[deadxid] if kernelhtbs.has_key(deadxid): logger.verbose("bwmon: Removing HTB for %s." % deadxid) bwlimit.off(deadxid, dev=dev_default) # Clean up deaddb for deadslice in deaddb.keys(): if (time.time() >= (deaddb[deadslice]['slice'].time + period)): logger.log("bwmon: Removing dead slice %s from dat." \ % deaddb[deadslice]['slice'].name) del deaddb[deadslice] # Get actual running values from tc since we've added and removed buckets. # Update slice totals and bandwidth. {xid: {values}} kernelhtbs = gethtbs(root_xid, default_xid) logger.verbose("bwmon: now %s running HTBs" % kernelhtbs.keys().__len__()) # Update all byte limites on all slices for (xid, slice) in slices.iteritems(): # Monitor only the specified slices if xid == root_xid or xid == default_xid: continue if names and name not in names: continue if (time.time() >= (slice.time + period)) or \ (kernelhtbs[xid]['usedbytes'] < slice.bytes) or \ (kernelhtbs[xid]['usedi2bytes'] < slice.i2bytes): # Reset to defaults every 24 hours or if it appears # that the byte counters have overflowed (or, more # likely, the node was restarted or the HTB buckets # were re-initialized). slice.reset(kernelhtbs[xid], live[xid]['_rspec']) elif ENABLE: logger.verbose("bwmon: Updating slice %s" % slice.name) # Update byte counts slice.update(kernelhtbs[xid], live[xid]['_rspec']) logger.verbose("bwmon: Saving %s slices in %s" % (slices.keys().__len__(), DB_FILE)) f = open(DB_FILE, "w") pickle.dump((version, slices, deaddb), f) f.close()
def sync(nmdbcopy): """ Syncs tc, db, and bwmon.pickle. Then, starts new slices, kills old ones, and updates byte accounts for each running slice. Sends emails and caps those that went over their limit. """ # Defaults global DB_FILE, \ period, \ default_MaxRate, \ default_Maxi2Rate, \ default_MaxKByte, \ default_Maxi2KByte, \ default_Share, \ dev_default # All slices names = [] # In case the limits have changed. default_MaxRate = int(bwlimit.get_bwcap(dev_default) / 1000) default_Maxi2Rate = int(bwlimit.bwmax / 1000) # Incase default isn't set yet. if default_MaxRate == -1: default_MaxRate = 1000000 # xxx $Id$ # with svn we used to have a trick to detect upgrades of this file # this has gone with the move to git, without any noticeable effect on operations though try: f = open(DB_FILE, "r+") logger.verbose("bwmon: Loading %s" % DB_FILE) (version, slices, deaddb) = pickle.load(f) f.close() # Check version of data file if version != "$Id$": logger.log("bwmon: Not using old version '%s' data file %s" % (version, DB_FILE)) raise Exception except Exception: version = "$Id$" slices = {} deaddb = {} # Get/set special slice IDs root_xid = bwlimit.get_xid("root") default_xid = bwlimit.get_xid("default") # Since root is required for sanity, its not in the API/plc database, so pass {} # to use defaults. if root_xid not in list(slices.keys()): slices[root_xid] = Slice(root_xid, "root", {}) slices[root_xid].reset({}, {}) # Used by bwlimit. pass {} since there is no rspec (like above). if default_xid not in list(slices.keys()): slices[default_xid] = Slice(default_xid, "default", {}) slices[default_xid].reset({}, {}) live = {} # Get running slivers that should be on this node (from plc). {xid: name} # db keys on name, bwmon keys on xid. db doesnt have xid either. for plcSliver in list(nmdbcopy.keys()): live[bwlimit.get_xid(plcSliver)] = nmdbcopy[plcSliver] logger.verbose("bwmon: Found %s instantiated slices" % list(live.keys()).__len__()) logger.verbose("bwmon: Found %s slices in dat file" % list(slices.values()).__len__()) # Get actual running values from tc. # Update slice totals and bandwidth. {xid: {values}} kernelhtbs = gethtbs(root_xid, default_xid) logger.verbose("bwmon: Found %s running HTBs" % list(kernelhtbs.keys()).__len__()) # The dat file has HTBs for slices, but the HTBs aren't running nohtbslices = set(slices.keys()) - set(kernelhtbs.keys()) logger.verbose( "bwmon: Found %s slices in dat but not running." % nohtbslices.__len__()) # Reset tc counts. for nohtbslice in nohtbslices: if nohtbslice in live: slices[nohtbslice].reset( {}, live[nohtbslice]['_rspec'] ) else: logger.log("bwmon: Removing abondoned slice %s from dat." % nohtbslice) del slices[nohtbslice] # The dat file doesnt have HTB for the slice but kern has HTB slicesnodat = set(kernelhtbs.keys()) - set(slices.keys()) logger.verbose( "bwmon: Found %s slices with HTBs but not in dat" % slicesnodat.__len__()) for slicenodat in slicesnodat: # But slice is running if slicenodat in live: # init the slice. which means start accounting over since kernel # htb was already there. slices[slicenodat] = Slice(slicenodat, live[slicenodat]['name'], live[slicenodat]['_rspec']) # Get new slices. # Slices in GetSlivers but not running HTBs newslicesxids = set(live.keys()) - set(kernelhtbs.keys()) logger.verbose("bwmon: Found %s new slices" % newslicesxids.__len__()) # Setup new slices for newslice in newslicesxids: # Delegated slices dont have xids (which are uids) since they haven't been # instantiated yet. if newslice != None and ('_rspec' in live[newslice]) == True: # Check to see if we recently deleted this slice. if live[newslice]['name'] not in list(deaddb.keys()): logger.log( "bwmon: new slice %s" % live[newslice]['name'] ) # _rspec is the computed rspec: NM retrieved data from PLC, computed loans # and made a dict of computed values. slices[newslice] = Slice(newslice, live[newslice]['name'], live[newslice]['_rspec']) slices[newslice].reset( {}, live[newslice]['_rspec'] ) # Double check time for dead slice in deaddb is within 24hr recording period. elif (time.time() <= (deaddb[live[newslice]['name']]['slice'].time + period)): deadslice = deaddb[live[newslice]['name']] logger.log("bwmon: Reinstantiating deleted slice %s" % live[newslice]['name']) slices[newslice] = deadslice['slice'] slices[newslice].xid = newslice # Start the HTB newvals = {"maxrate": deadslice['slice'].MaxRate * 1000, "minrate": deadslice['slice'].MinRate * 1000, "maxexemptrate": deadslice['slice'].Maxi2Rate * 1000, "usedbytes": deadslice['htb']['usedbytes'] * 1000, "usedi2bytes": deadslice['htb']['usedi2bytes'], "share":deadslice['htb']['share']} slices[newslice].reset(newvals, live[newslice]['_rspec']) # Bring up to date slices[newslice].update(newvals, live[newslice]['_rspec']) # Since the slice has been reinitialed, remove from dead database. del deaddb[deadslice['slice'].name] del newvals else: logger.log("bwmon: Slice %s doesn't have xid. Skipping." % live[newslice]['name']) # Move dead slices that exist in the pickle file, but # aren't instantiated by PLC into the dead dict until # recording period is over. This is to avoid the case where a slice is dynamically created # and destroyed then recreated to get around byte limits. deadxids = set(slices.keys()) - set(live.keys()) logger.verbose("bwmon: Found %s dead slices" % (deadxids.__len__() - 2)) for deadxid in deadxids: if deadxid == root_xid or deadxid == default_xid: continue logger.log("bwmon: removing dead slice %s " % deadxid) if deadxid in slices and deadxid in kernelhtbs: # add slice (by name) to deaddb logger.log("bwmon: Saving bandwidth totals for %s." % slices[deadxid].name) deaddb[slices[deadxid].name] = {'slice': slices[deadxid], 'htb': kernelhtbs[deadxid]} del slices[deadxid] if deadxid in kernelhtbs: logger.verbose("bwmon: Removing HTB for %s." % deadxid) bwlimit.off(deadxid, dev = dev_default) # Clean up deaddb for deadslice in list(deaddb.keys()): if (time.time() >= (deaddb[deadslice]['slice'].time + period)): logger.log("bwmon: Removing dead slice %s from dat." \ % deaddb[deadslice]['slice'].name) del deaddb[deadslice] # Get actual running values from tc since we've added and removed buckets. # Update slice totals and bandwidth. {xid: {values}} kernelhtbs = gethtbs(root_xid, default_xid) logger.verbose("bwmon: now %s running HTBs" % list(kernelhtbs.keys()).__len__()) # Update all byte limites on all slices for (xid, slice) in slices.items(): # Monitor only the specified slices if xid == root_xid or xid == default_xid: continue if names and name not in names: continue if (time.time() >= (slice.time + period)) or \ (kernelhtbs[xid]['usedbytes'] < slice.bytes) or \ (kernelhtbs[xid]['usedi2bytes'] < slice.i2bytes): # Reset to defaults every 24 hours or if it appears # that the byte counters have overflowed (or, more # likely, the node was restarted or the HTB buckets # were re-initialized). slice.reset(kernelhtbs[xid], live[xid]['_rspec']) elif ENABLE: logger.verbose("bwmon: Updating slice %s" % slice.name) # Update byte counts slice.update(kernelhtbs[xid], live[xid]['_rspec']) logger.verbose("bwmon: Saving %s slices in %s" % (list(slices.keys()).__len__(), DB_FILE)) f = open(DB_FILE, "w") pickle.dump((version, slices, deaddb), f) f.close()
def create(name, rec=None): ''' Create dirs, copy fs image, lxc_create ''' logger.verbose('sliver_lxc: {} create'.format(name)) conn = Sliver_Libvirt.getConnection(Sliver_LXC.TYPE) vref = rec['vref'] if vref is None: vref = "lxc-f24-x86_64" logger.log("sliver_libvirt: {}: WARNING - no vref attached, using hard-wired default {}" .format(name, vref)) # compute guest arch from vref # essentially we want x86_64 (default) or i686 here for libvirt try: (x, y, arch) = vref.split('-') arch = "x86_64" if arch.find("64") >= 0 else "i686" except: arch = 'x86_64' # Get the type of image from vref myplc tags specified as: # pldistro = lxc # fcdistro = squeeze # arch x86_64 arch = 'x86_64' tags = rec['rspec']['tags'] if 'arch' in tags: arch = tags['arch'] if arch == 'i386': arch = 'i686' refImgDir = os.path.join(Sliver_LXC.REF_IMG_BASE_DIR, vref) containerDir = os.path.join(Sliver_LXC.CON_BASE_DIR, name) # check the template exists -- there's probably a better way.. if not os.path.isdir(refImgDir): logger.log('sliver_lxc: {}: ERROR Could not create sliver - reference image {} not found' .format(name, vref)) logger.log('sliver_lxc: {}: ERROR Expected reference image in {}'.format(name, refImgDir)) return # during some time this fragment had been commented out # but we're seeing cases where this code might actually be useful, so.. # this hopefully should be fixed now # # in fedora20 we have some difficulty in properly cleaning up /vservers/<slicename> # # also note that running e.g. btrfs subvolume create /vservers/.lvref/image /vservers/foo # # behaves differently, whether /vservers/foo exists or not: # # if /vservers/foo does not exist, it creates /vservers/foo # # but if it does exist, then it creates /vservers/foo/image !! # # so we need to check the expected container rootfs does not exist yet # # this hopefully could be removed in a future release if os.path.exists (containerDir): logger.log("sliver_lxc: {}: WARNING cleaning up pre-existing {}".format(name, containerDir)) command = ['btrfs', 'subvolume', 'delete', containerDir] logger.log_call(command, BTRFS_TIMEOUT) # re-check if os.path.exists (containerDir): logger.log('sliver_lxc: {}: ERROR Could not create sliver - could not clean up empty {}' .format(name, containerDir)) return # Snapshot the reference image fs # this assumes the reference image is in its own subvolume command = ['btrfs', 'subvolume', 'snapshot', refImgDir, containerDir] if not logger.log_call(command, timeout=BTRFS_TIMEOUT): logger.log('sliver_lxc: ERROR Could not create BTRFS snapshot at {}' .format(containerDir)) return command = ['chmod', '755', containerDir] logger.log_call(command) # TODO: set quotas... # Set hostname. A valid hostname cannot have '_' #with open(os.path.join(containerDir, 'etc/hostname'), 'w') as f: # print >>f, name.replace('_', '-') # Add slices group if not already present try: group = grp.getgrnam('slices') except: command = ['/usr/sbin/groupadd', 'slices'] logger.log_call(command) # Add unix account (TYPE is specified in the subclass) command = ['/usr/sbin/useradd', '-g', 'slices', '-s', Sliver_LXC.SHELL, name, '-p', '*'] logger.log_call(command) command = ['mkdir', '/home/{}/.ssh'.format(name)] logger.log_call(command) # Create PK pair keys to connect from the host to the guest without # password... maybe remove the need for authentication inside the # guest? command = ['su', '-s', '/bin/bash', '-c', 'ssh-keygen -t rsa -N "" -f /home/{}/.ssh/id_rsa'.format(name)] logger.log_call(command) command = ['chown', '-R', '{}:slices'.format(name), '/home/{}/.ssh'.format(name)] logger.log_call(command) command = ['mkdir', '{}/root/.ssh'.format(containerDir)] logger.log_call(command) command = ['cp', '/home/{}/.ssh/id_rsa.pub'.format(name), '{}/root/.ssh/authorized_keys'.format(containerDir)] logger.log_call(command) logger.log("creating /etc/slicename file in {}".format(os.path.join(containerDir, 'etc/slicename'))) try: with open(os.path.join(containerDir, 'etc/slicename'), 'w') as f: f.write(name) except: logger.log_exc("exception while creating /etc/slicename") try: with open(os.path.join(containerDir, 'etc/slicefamily'), 'w') as f: f.write(vref) except: logger.log_exc("exception while creating /etc/slicefamily") uid = None try: uid = getpwnam(name).pw_uid except KeyError: # keyerror will happen if user id was not created successfully logger.log_exc("exception while getting user id") if uid is not None: logger.log("uid is {}".format(uid)) command = ['mkdir', '{}/home/{}'.format(containerDir, name)] logger.log_call(command) command = ['chown', name, '{}/home/{}'.format(containerDir, name)] logger.log_call(command) etcpasswd = os.path.join(containerDir, 'etc/passwd') etcgroup = os.path.join(containerDir, 'etc/group') if os.path.exists(etcpasswd): # create all accounts with gid=1001 - i.e. 'slices' like it is in the root context slices_gid = 1001 logger.log("adding user {name} id {uid} gid {slices_gid} to {etcpasswd}" .format(**(locals()))) try: with open(etcpasswd, 'a') as passwdfile: passwdfile.write("{name}:x:{uid}:{slices_gid}::/home/{name}:/bin/bash\n" .format(**locals())) except: logger.log_exc("exception while updating {}".format(etcpasswd)) logger.log("adding group slices with gid {slices_gid} to {etcgroup}" .format(**locals())) try: with open(etcgroup, 'a') as groupfile: groupfile.write("slices:x:{slices_gid}\n" .format(**locals())) except: logger.log_exc("exception while updating {}".format(etcgroup)) sudoers = os.path.join(containerDir, 'etc/sudoers') if os.path.exists(sudoers): try: with open(sudoers, 'a') as f: f.write("{} ALL=(ALL) NOPASSWD: ALL\n".format(name)) except: logger.log_exc("exception while updating /etc/sudoers") # customizations for the user environment - root or slice uid # we save the whole business in /etc/planetlab.profile # and source this file for both root and the slice uid's .profile # prompt for slice owner, + LD_PRELOAD for transparently wrap bind pl_profile = os.path.join(containerDir, "etc/planetlab.profile") ld_preload_text = """# by default, we define this setting so that calls to bind(2), # when invoked on 0.0.0.0, get transparently redirected to the public interface of this node # see https://svn.planet-lab.org/wiki/LxcPortForwarding""" usrmove_path_text = """# VM's before Features/UsrMove need /bin and /sbin in their PATH""" usrmove_path_code = """ pathmunge () { if ! echo $PATH | /bin/egrep -q "(^|:)$1($|:)" ; then if [ "$2" = "after" ] ; then PATH=$PATH:$1 else PATH=$1:$PATH fi fi } pathmunge /bin after pathmunge /sbin after unset pathmunge """ with open(pl_profile, 'w') as f: f.write("export PS1='{}@\H \$ '\n".format(name)) f.write("{}\n".format(ld_preload_text)) f.write("if [ -e /etc/planetlab/lib/bind_public.so ] ; then # Only preload bind_public if it exists.\n") f.write(" export LD_PRELOAD=/etc/planetlab/lib/bind_public.so\n") f.write("fi\n") f.write("{}\n".format(usrmove_path_text)) f.write("{}\n".format(usrmove_path_code)) # make sure this file is sourced from both root's and slice's .profile enforced_line = "[ -f /etc/planetlab.profile ] && source /etc/planetlab.profile\n" for path in [ 'root/.profile', 'home/{}/.profile'.format(name) ]: from_root = os.path.join(containerDir, path) # if dir is not yet existing let's forget it for now if not os.path.isdir(os.path.dirname(from_root)): continue found = False try: with open(from_root) as f: contents = f.readlines() for content in contents: if content == enforced_line: found = True except IOError: pass if not found: with open(from_root, "a") as user_profile: user_profile.write(enforced_line) # in case we create the slice's .profile when writing if from_root.find("/home") >= 0: command = ['chown', '{}:slices'.format(name), from_root] logger.log_call(command) # Lookup for xid and create template after the user is created so we # can get the correct xid based on the name of the slice xid = bwlimit.get_xid(name) # Template for libvirt sliver configuration template_filename_sliceimage = os.path.join(Sliver_LXC.REF_IMG_BASE_DIR, 'lxc_template.xml') if os.path.isfile(template_filename_sliceimage): logger.verbose("Using XML template {}".format(template_filename_sliceimage)) template_filename = template_filename_sliceimage else: logger.log("Cannot find XML template {}".format(template_filename_sliceimage)) return interfaces = Sliver_Libvirt.get_interfaces_xml(rec) try: with open(template_filename) as f: template = Template(f.read()) xml = template.substitute(name=name, xid=xid, interfaces=interfaces, arch=arch) except IOError: logger.log('Failed to parse or use XML template file {}'.format(template_filename)) return # Lookup for the sliver before actually # defining it, just in case it was already defined. try: dom = conn.lookupByName(name) except: dom = conn.defineXML(xml) logger.verbose('lxc_create: {} -> {}'.format(name, Sliver_Libvirt.dom_details(dom)))