def add_ebs_volumes(iids, mounts, access_key, nthreads=10): """ Add a lot of EBS volumes in parallel with threading iid, list of iids, mounts list of lists of mount point configurations to add, such as: [i-01dweww] [[{"Mount": "/opt2", "Size" : 1, "Attach" : "/dev/sdb", "Fdisk" : "/dev/xvdb"}]] """ # print iids, mounts if len(iids) != len(mounts): raise ValueError("length of iids must be the same as len mounts") # print items # print mounts # for m in mounts: # for mi in m: # for k, v in mi.iteritems(): # print k, v # if type(v) is str: # print [ord(ki) for ki in k], [ord(vi) for vi in v] # else: # print [ord(ki) for ki in k] item_pool = Queue.Queue() for iid, mounts in zip(iids, mounts): # print iid, mounts # sys.__stdout__.flush() if type(mounts) is not list: raise TypeError("Mounts must be a _list_ of mounts...") if type(iid) is list: raise TypeError("At this point, iid should be a string!...") item_pool.put((iid, mounts)) lock = thread.allocate_lock() thethreads = [] for _i in range(nthreads): t = _add_ebs_volumesThread(item_pool, lock, access_key) thethreads.append(t) t.start() # setup a timeout to prevent really infinite loops! import datetime import time begin = datetime.datetime.utcnow() timeout = 60 * 60 * 3 for t in thethreads: while not t.done: if (datetime.datetime.utcnow() - begin).seconds > timeout: break time.sleep(0.1) nd = [t for t in thethreads if not t.done] errs = [] for t in thethreads: errs = errs + t.errors if len(errs): raise lD.ShellExecuteError("Problems creating volumes, as: \n" + '\n'.join(errs)) if len(nd): raise lD.ShellExecuteError( "Timeout in running create volumes as threads")
def add_new_ebs_vol(iid, conf, access_key): """ Create and name a new ebs volume, give it to a pre-existing instance and mount it on that instance conf is a dictionary which must contain: "Mount": "where-to_mount_it", "Size" : SizeIngGB, "Attach" : "aws_expected_device_name", "Fdisk" : "device_name_seen_by_fdisk" e.g.: "Mount": "/opt2", "Size" : 1, "Attach" : "/dev/sdb", "Fdisk" : "/dev/xvdb" Fdisk is optional, if not given it will be guessed from "Attach" and the region. i.e.: region Attach FDisk eu-* sd<X> xvd<X> (e.g. sdb->xvdb) ap-* sd<Y> xvd<Y+4> (e.g. sbd->xvdf) """ try: i = desc_instance(iid) except lD.ShellExecuteError: raise ValueError(iid + " is not one of your instance IDs") # get a reference to this instance ip = pub_ip(iid) remote = lD.remoteHost("root", ip, access_key) # choose the fdisk device in the case this is a broken Tokyo centos6 instance if "Fdisk" not in conf: import string alpha = string.ascii_lowercase skip = 0 if detect_region().startswith('eu'): # eu-* sd<X> xvd<X> (e.g. sdb->xvdb) skip = 0 elif detect_region().startswith( 'ap') and remote.detect_linux_version() in ["Centos6"]: # ap-* sd<Y> xvd<Y+4> (e.g. sbd->xvdf) skip = 4 conf["Fdisk"] = '/dev/xvd' + alpha[alpha.index(conf["Attach"][-1]) + skip] av_zone = i["Reservations"][0]["Instances"][0]["Placement"][ "AvailabilityZone"] voljson = runawstojson("ec2 create-volume --size " + str(conf["Size"]) + " --availability-zone " + av_zone) instnam = "" for tag in i["Reservations"][0]["Instances"][0]["Tags"]: if tag["Key"] == "Name": instnam = tag["Value"] # print voljson vol_id = voljson["VolumeId"] name_resource(vol_id, instnam + conf["Mount"].replace("/", "_")) time.sleep(5) count = 0 while count < 10: descvol = runawstojson("ec2 describe-volumes --volume " + vol_id) # print descvol if descvol['Volumes'][0]["State"] == "available": break time.sleep(5) count = count + 1 resjson = runawstojson("ec2 attach-volume --volume-id " + vol_id + " --instance-id " + iid + " --device " + conf["Attach"]) # print resjson time.sleep(5) count = 0 while count < 10: descvol = runawstojson("ec2 describe-volumes --volume " + vol_id) # print descvol if descvol['Volumes'][0]['Attachments'][0]["State"] == "attached": break time.sleep(5) count = count + 1 remote.cp( os.path.dirname(__file__) + "/../remotescripts/fdiskwrap.sh", "~/fdiskwrap.sh") remote.run("chmod a+x fdiskwrap.sh") try: remote.run("./fdiskwrap.sh " + conf["Fdisk"]) except lD.ShellExecuteError: time.sleep(30) remote.run("./fdiskwrap.sh " + conf["Fdisk"]) remote.run("mkfs.ext4 -b 4096 " + conf["Fdisk"] + "1 ") remote.run("bash -c 'echo \"" + conf["Fdisk"] + "1 " + conf["Mount"] + " ext4 defaults 1 1\" >> /etc/fstab'") mvto = " /" + conf["Mount"].replace("/", "_") remote.run("bash -c \"mv " + conf["Mount"] + mvto + "; mkdir " + conf["Mount"] + "; mount " + conf["Mount"] + ";\"") remote.run("bash -c \"if [ -d " + mvto + " ] ; then chmod --reference " + mvto + " " + conf["Mount"] + " ; fi\"") remote.run("bash -c 'shopt -s dotglob; if [ \"$(ls -A " + mvto + ")\" ] ; then mv " + mvto + "/* " + conf["Mount"] + "/ ; fi'") res = remote.run("df -hP") if conf["Mount"] not in res: raise lD.ShellExecuteError( "Could not mount the requested disk, resulted in " + res) return vol_id
ambtime = int(ambari.run("date -u '+%s'")) cltime = whole_cluster.run("date -u '+%s'") cltime = [(int(p.split(':')[-1])) for p in cltime.split('\n')] if (max(cltime) - min(cltime)) > (10 * 60): raise RuntimeError( 'The system clocks are not synchronized, a difference of ' + str(max(cltime) - min(cltime)) + ' seconds was found') if min(cltime) < (ambtime - 5): raise RuntimeError( 'At least one machine has a too early system clock, a difference of ' + str(ambtime - min(cltime)) + ' seconds was found') try: if "no ambari-agent" in whole_cluster.run("which ambari-agent"): raise lD.ShellExecuteError() except lD.ShellExecuteError: whole_cluster.register() lD.install_pdsh(whole_cluster) # TODO: instead copy this file _from_ the ambari node *to* the others directly # For the time being, copy to tmp, then redistribute if necessary copy_from = None # First handle the localhost case: repo already exists atmp = None if thehost == "localhost": for _repoption in [ "/etc/yum.repos.d/ambari.repo", installfrom + "/repo/ambari.repo", installfrom + "/../dev/repo/ambari.repo" ]: if os.path.exists(_repoption) and os.access(_repoption, os.R_OK):