Пример #1
0
ud = {}
try:
    ud_file = '/tmp/cm/userData.yaml'
    with open(ud_file, 'r') as f:
        ud = yaml.load(f)
    role = ud.get('role', '')
    cm_pwd = ud.get('password', '')
except Exception, e:
    log.error("[{2}] Error reading user data file {0}: {1}".format(ud_file, e, ip))
    sys.exit(1)
# Initialize CM only on the master node. This assumes the master is:
#  1. Running jobs
#  2. Runs the very first job
if role == 'master':
    # Initialize CloudMan if not already initialized
    cm = CloudMan('http://127.0.0.1:42284/', cm_pwd)
    cm_type = cm.get_cluster_type()
    log.debug("[{0}] Current CloudMan type: '{1}'".format(ip, cm_type))
    if cm_type == '' or cm_type is None:
        log.debug("[{0}] Initializing CloudMan to type 'SGE'".format(ip))
        cm.initialize(type='SGE')
        # Enable autoscaling with sufficient cluster size limits to enable the
        # 22 models to run in parallel
        # Get the number of cores on this machine to set autoscaling appropriately
        process = subprocess.Popen(['grep', '-c', 'processor', '/proc/cpuinfo'],
                stdout=subprocess.PIPE)
        out, err = process.communicate()
        try:
            # We need an integer big enough to accomodate running all 22 models
            # in parallel; however, CloudMan allows max 19 workers so limit there.
            as_max = min(19, int(math.ceil(22.0/int(out.strip()))))
Пример #2
0
also includes terminating the instance itself.
"""
import os
import sys
import yaml
from blend.cloudman import CloudMan

# Setup logging
import logging
log = logging.getLogger('terminate_cm')
hdlr = logging.FileHandler('/mnt/transient_nfs/ghem/manipulate_cm.log')
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
hdlr.setFormatter(formatter)
log.addHandler(hdlr)
log.setLevel(logging.DEBUG)

# Get CloudMan password from the user data file
ud = {}
try:
    ud_file = '/tmp/cm/userData.yaml'
    with open(ud_file, 'r') as f:
        ud = yaml.load(f)
    cm_pwd = ud.get('password', '')
except Exception, e:
    log.error("Error reading user data file {0}: {1}".format(ud_file, e))
    sys.exit(1)
# Get a handle to CloudMan and terminate the cluster
cm = CloudMan('http://127.0.0.1:42284/', cm_pwd)
log.debug("Initiating termination and deletion of this cluster")
cm.terminate(terminate_master_instance=True, delete_cluster=True)