def start_celery_on_vm(instance_type, ip, key_file, agent_type, worker_name='%h', username="******", prepend_commands=None, log_level='info'): copy_celery_config_to_vm(instance_type=instance_type, ip=ip, key_file=key_file, agent_type=agent_type, username=username) commands = prepend_commands if prepend_commands is not None else [] python_path_list = [TaskConfig.STOCHSS_HOME, TaskConfig.PYURDME_DIR, os.path.join(TaskConfig.STOCHSS_HOME, 'app'), os.path.join(TaskConfig.STOCHSS_HOME, 'app', 'backend'), os.path.join(TaskConfig.STOCHSS_HOME, 'app', 'lib', 'cloudtracker')] python_path = 'export PYTHONPATH={0}'.format(':'.join(python_path_list)) commands.append(python_path) # Start the shutdown-monitor if agent_type == AgentTypes.EC2: commands.append('python /home/ubuntu/stochss/app/backend/tasks.py shutdown-monitor &' + \ "celery -A tasks worker -Q {q1},{q2} -n {worker_name} --autoreload --loglevel={log_level} --workdir /home/{username} > /home/{username}/celery.log 2>&1".format( q1=CeleryConfig.get_queue_name(agent_type=agent_type), q2=CeleryConfig.get_queue_name(agent_type=agent_type, instance_type=instance_type), log_level=log_level, worker_name=worker_name, username=username) ) else: commands.append( "celery -A tasks worker -Q {q1},{q2} -n {worker_name} --autoreload --loglevel={log_level} --workdir /home/{username} > /home/{username}/celery.log 2>&1".format( q1=CeleryConfig.get_queue_name(agent_type=agent_type), q2=CeleryConfig.get_queue_name(agent_type=agent_type, instance_type=instance_type), log_level=log_level, worker_name=worker_name, username=username) ) command = ';'.join(commands) # start_celery_str = "celery -A tasks worker --autoreload --loglevel=info --workdir /home/ubuntu > /home/ubuntu/celery.log 2>&1" # PyURDME must be run inside a 'screen' terminal as part of the FEniCS code depends on the ability to # write to the process' terminal, screen provides this terminal. celery_cmd = "sudo screen -d -m bash -c '{0}'".format(command) # cmd = "ssh -o 'StrictHostKeyChecking no' -i {key_file} {username}@{ip} \"{cmd}\"".format(key_file=key_file, # ip=ip, # username=username, # cmd=celery_cmd) cmd = get_remote_command(username, ip, key_file, celery_cmd) logging.info(cmd) success = os.system(cmd) logging.debug("success = {0}".format(success)) return success
def update_celery_config_with_queue_head_ip(queue_head_ip, agent_type): ''' Method used for updating celery config file. It should have the correct IP of the queue head node, which should already be running. Args queue_head_ip The ip that is going to be written in the celery configuration file ''' logging.debug("queue_head_ip = {0}".format(queue_head_ip)) with open(CeleryConfig.CONFIG_TEMPLATE_FILENAME, 'r') as celery_config_file: celery_config_lines = celery_config_file.readlines() with open(CeleryConfig.get_config_filename(agent_type=agent_type), 'w') as celery_config_file: for line in celery_config_lines: if line.strip().startswith('BROKER_URL'): celery_config_file.write( 'BROKER_URL = "amqp://*****:*****@{0}:5672/"\n'.format( queue_head_ip)) else: celery_config_file.write(line) # Now update the actual Celery app.... # TODO: Doesnt seem to work in GAE until next request comes in to server tasks.CelerySingleton().configure()
def copy_celery_config_to_vm(instance_type, ip, key_file, agent_type, username): celery_config_filename = CeleryConfig.get_config_filename(agent_type=agent_type) if not os.path.exists(celery_config_filename): raise Exception("celery config file not found: {0}".format(celery_config_filename)) config_celery_queues(agent_type=agent_type, instance_types=[instance_type]) cmd = get_scp_command(keyfile=key_file, source=celery_config_filename, target="{user}@{ip}:~/celeryconfig.py".format(user=username, ip=ip)) logging.info(cmd) success = os.system(cmd) if success == 0: logging.info("scp success!") logging.info(" {0} transfered to {1}".format(celery_config_filename, ip)) else: raise Exception("scp failure: {0} not transfered to {1}".format(celery_config_filename, ip))
def config_celery_queues(agent_type, instance_types): exchange = "exchange = Exchange('{0}', type='direct')".format( CeleryConfig.get_exchange_name(agent_type=agent_type)) logging.debug(exchange) queue_list = map( lambda instance_type: "Queue('{0}', exchange, routing_key='{1}')". format( CeleryConfig.get_queue_name(agent_type=agent_type, instance_type=instance_type), CeleryConfig.get_routing_key_name(agent_type=agent_type, instance_type=instance_type)), instance_types) agent_queue_name = CeleryConfig.get_queue_name(agent_type=agent_type) agent_routing_key = CeleryConfig.get_routing_key_name( agent_type=agent_type) queue_list.insert( 0, "Queue('{0}', exchange, routing_key='{1}')".format( agent_queue_name, agent_routing_key)) logging.debug(pprint.pformat(queue_list)) queues_string = 'CELERY_QUEUES = ({0})'.format(', '.join(queue_list)) logging.debug(queues_string) with open(CeleryConfig.get_config_filename(agent_type=agent_type), 'r') as f: lines = f.readlines() f = open(CeleryConfig.get_config_filename(agent_type=agent_type), 'w') clear_following = False for line in lines: if clear_following: f.write("") elif line.strip().startswith('exchange'): f.write(exchange + "\n") elif line.strip().startswith('CELERY_QUEUES'): f.write(queues_string + "\n") clear_following = True else: f.write(line) f.close() # reload the celery configuration tasks.CelerySingleton().configure()
def update_celery_config_with_queue_head_ip(queue_head_ip, agent_type): ''' Method used for updating celery config file. It should have the correct IP of the queue head node, which should already be running. Args queue_head_ip The ip that is going to be written in the celery configuration file ''' logging.debug("queue_head_ip = {0}".format(queue_head_ip)) with open(CeleryConfig.CONFIG_TEMPLATE_FILENAME, 'r') as celery_config_file: celery_config_lines = celery_config_file.readlines() with open(CeleryConfig.get_config_filename(agent_type=agent_type), 'w') as celery_config_file: for line in celery_config_lines: if line.strip().startswith('BROKER_URL'): celery_config_file.write('BROKER_URL = "amqp://*****:*****@{0}:5672/"\n'.format(queue_head_ip)) else: celery_config_file.write(line) # Now update the actual Celery app.... # TODO: Doesnt seem to work in GAE until next request comes in to server tasks.CelerySingleton().configure()
def copy_celery_config_to_vm(instance_type, ip, key_file, agent_type, username): celery_config_filename = CeleryConfig.get_config_filename( agent_type=agent_type) if not os.path.exists(celery_config_filename): raise Exception( "celery config file not found: {0}".format(celery_config_filename)) config_celery_queues(agent_type=agent_type, instance_types=[instance_type]) cmd = get_scp_command(keyfile=key_file, source=celery_config_filename, target="{user}@{ip}:~/celeryconfig.py".format( user=username, ip=ip)) logging.info(cmd) success = os.system(cmd) if success == 0: logging.info("scp success!") logging.info(" {0} transfered to {1}".format(celery_config_filename, ip)) else: raise Exception("scp failure: {0} not transfered to {1}".format( celery_config_filename, ip))
def config_celery_queues(agent_type, instance_types): exchange = "exchange = Exchange('{0}', type='direct')".format(CeleryConfig.get_exchange_name(agent_type=agent_type)) logging.debug(exchange) queue_list = map(lambda instance_type: "Queue('{0}', exchange, routing_key='{1}')".format( CeleryConfig.get_queue_name(agent_type=agent_type, instance_type=instance_type), CeleryConfig.get_routing_key_name(agent_type=agent_type, instance_type=instance_type)), instance_types) agent_queue_name = CeleryConfig.get_queue_name(agent_type=agent_type) agent_routing_key = CeleryConfig.get_routing_key_name(agent_type=agent_type) queue_list.insert(0, "Queue('{0}', exchange, routing_key='{1}')".format(agent_queue_name, agent_routing_key)) logging.debug(pprint.pformat(queue_list)) queues_string = 'CELERY_QUEUES = ({0})'.format(', '.join(queue_list)) logging.debug(queues_string) with open(CeleryConfig.get_config_filename(agent_type=agent_type), 'r') as f: lines = f.readlines() f = open(CeleryConfig.get_config_filename(agent_type=agent_type), 'w') clear_following = False for line in lines: if clear_following: f.write("") elif line.strip().startswith('exchange'): f.write(exchange + "\n") elif line.strip().startswith('CELERY_QUEUES'): f.write(queues_string + "\n") clear_following = True else: f.write(line) f.close() # reload the celery configuration tasks.CelerySingleton().configure()
def execute_cloud_task(params, agent_type, ec2_access_key, ec2_secret_key, task_id, instance_type, cost_replay, database, storage_agent): ''' This method instantiates celery tasks in the cloud. Returns return value from celery async call and the task ID ''' if 'bucketname' not in params: params['bucketname'] = '' logging.debug('execute_cloud_task: params =\n\n{0}'.format(pprint.pformat(params))) logging.debug('agent_type = {}'.format(agent_type)) celery_config = tasks.CelerySingleton() celery_config.configure() celery_config.print_celery_queue_config() celery_queue_name = CeleryConfig.get_queue_name(agent_type=agent_type, instance_type=instance_type) celery_exchange = CeleryConfig.get_exchange_name(agent_type=agent_type, instance_type=instance_type) celery_routing_key = CeleryConfig.get_routing_key_name(agent_type=agent_type, instance_type=instance_type) logging.info('Deliver the task to the queue: {0}, routing key: {1}'.format(celery_queue_name, celery_routing_key)) try: # Need to make sure that the queue is actually reachable because # we don't want the user to try to submit a task and have it # timeout because the broker server isn't up yet. is_broker_up, exc, trace = check_broker_status() if is_broker_up == False: return { "success": False, "reason": "Cloud instances unavailable. Please wait a minute for their initialization to complete.", "exception": str(exc), "traceback": trace } # create a celery task logging.debug("execute_cloud_task : executing task with uuid : %s ", task_id) start_time = datetime.now() data = { 'status': "pending", "start_time": start_time.strftime('%Y-%m-%d %H:%M:%S'), 'message': "Task sent to Cloud", 'uuid': task_id, 'infrastructure': agent_type } if params["job_type"] == JobTypes.STOCHOPTIM: result = __execute_cloud_stochoptim_task(params=params, data=data, database=database, task_id=task_id, celery_queue_name=celery_queue_name, celery_routing_key=celery_routing_key, storage_agent=storage_agent) else: # if this is the cost analysis and agent is ec2 replay then update the stochss-cost-analysis table if cost_replay: if agent_type in JobConfig.SUPPORTED_AGENT_TYPES_FOR_COST_ANALYSIS: result = __execute_cloud_cost_analysis_task(params=params, agent_type=agent_type, instance_type=instance_type, task_id=task_id, database=database, ec2_access_key=ec2_access_key, ec2_secret_key=ec2_secret_key, start_time=start_time, celery_queue_name=celery_queue_name, celery_routing_key=celery_routing_key, storage_agent=storage_agent) else: raise Exception("cost replay not supported for agent type = {0}".format(agent_type)) else: result = {} result["db_id"] = task_id params["db_table"] = JobDatabaseConfig.TABLE_NAME params["cost_analysis_table"] = JobDatabaseConfig.COST_ANALYSIS_TABLE_NAME database.updateEntry(taskid=task_id, data=data, tablename=params["db_table"]) if ec2_access_key is None: ec2_access_key = '' if ec2_secret_key is None: ec2_secret_key = '' celery_task = tasks.task.apply_async( args=[task_id, params, agent_type, database, storage_agent, ec2_access_key, ec2_secret_key], queue=celery_queue_name, routing_key=celery_routing_key) logging.info('celery_task.ready() = {}'.format(celery_task.ready())) logging.info('celery_task.id = {}'.format(celery_task.id)) result["celery_pid"] = celery_task.id logging.info("execute_cloud_task: result of task with task_id {0} : \n{1}".format(task_id, pprint.pformat( result))) result["success"] = True result['resource'] = params['resource'] return result except Exception as e: logging.exception(e) return { "success": False, "reason": str(e), "exception": str(e), }
def start_celery_on_vm(instance_type, ip, key_file, agent_type, worker_name='%h', username="******", prepend_commands=None, log_level='info'): copy_celery_config_to_vm(instance_type=instance_type, ip=ip, key_file=key_file, agent_type=agent_type, username=username) commands = prepend_commands if prepend_commands is not None else [] python_path_list = [ TaskConfig.STOCHSS_HOME, TaskConfig.PYURDME_DIR, os.path.join(TaskConfig.STOCHSS_HOME, 'app'), os.path.join(TaskConfig.STOCHSS_HOME, 'app', 'backend'), os.path.join(TaskConfig.STOCHSS_HOME, 'app', 'lib', 'cloudtracker') ] python_path = 'export PYTHONPATH={0}'.format(':'.join(python_path_list)) commands.append(python_path) # Start the shutdown-monitor if agent_type == AgentTypes.EC2: commands.append('python /home/ubuntu/stochss/app/backend/tasks.py shutdown-monitor &' + \ "celery -A tasks worker -Q {q1},{q2} -n {worker_name} --autoreload --loglevel={log_level} --workdir /home/{username} > /home/{username}/celery.log 2>&1".format( q1=CeleryConfig.get_queue_name(agent_type=agent_type), q2=CeleryConfig.get_queue_name(agent_type=agent_type, instance_type=instance_type), log_level=log_level, worker_name=worker_name, username=username) ) else: commands.append( "celery -A tasks worker -Q {q1},{q2} -n {worker_name} --autoreload --loglevel={log_level} --workdir /home/{username} > /home/{username}/celery.log 2>&1" .format(q1=CeleryConfig.get_queue_name(agent_type=agent_type), q2=CeleryConfig.get_queue_name( agent_type=agent_type, instance_type=instance_type), log_level=log_level, worker_name=worker_name, username=username)) command = ';'.join(commands) # start_celery_str = "celery -A tasks worker --autoreload --loglevel=info --workdir /home/ubuntu > /home/ubuntu/celery.log 2>&1" # PyURDME must be run inside a 'screen' terminal as part of the FEniCS code depends on the ability to # write to the process' terminal, screen provides this terminal. celery_cmd = "sudo screen -d -m bash -c '{0}'".format(command) # cmd = "ssh -o 'StrictHostKeyChecking no' -i {key_file} {username}@{ip} \"{cmd}\"".format(key_file=key_file, # ip=ip, # username=username, # cmd=celery_cmd) cmd = get_remote_command(username, ip, key_file, celery_cmd) logging.info(cmd) success = os.system(cmd) logging.debug("success = {0}".format(success)) return success
def execute_cloud_task(params, agent_type, ec2_access_key, ec2_secret_key, task_id, instance_type, cost_replay, database, storage_agent): ''' This method instantiates celery tasks in the cloud. Returns return value from celery async call and the task ID ''' if 'bucketname' not in params: params['bucketname'] = '' logging.debug('execute_cloud_task: params =\n\n{0}'.format( pprint.pformat(params))) logging.debug('agent_type = {}'.format(agent_type)) celery_config = tasks.CelerySingleton() celery_config.configure() celery_config.print_celery_queue_config() celery_queue_name = CeleryConfig.get_queue_name( agent_type=agent_type, instance_type=instance_type) celery_exchange = CeleryConfig.get_exchange_name( agent_type=agent_type, instance_type=instance_type) celery_routing_key = CeleryConfig.get_routing_key_name( agent_type=agent_type, instance_type=instance_type) logging.info('Deliver the task to the queue: {0}, routing key: {1}'.format( celery_queue_name, celery_routing_key)) try: # Need to make sure that the queue is actually reachable because # we don't want the user to try to submit a task and have it # timeout because the broker server isn't up yet. is_broker_up, exc, trace = check_broker_status() if is_broker_up == False: return { "success": False, "reason": "Cloud instances unavailable. Please wait a minute for their initialization to complete.", "exception": str(exc), "traceback": trace } # create a celery task logging.debug("execute_cloud_task : executing task with uuid : %s ", task_id) start_time = datetime.now() data = { 'status': "pending", "start_time": start_time.strftime('%Y-%m-%d %H:%M:%S'), 'message': "Task sent to Cloud", 'uuid': task_id, 'infrastructure': agent_type } if params["job_type"] == JobTypes.STOCHOPTIM: result = __execute_cloud_stochoptim_task( params=params, data=data, database=database, task_id=task_id, celery_queue_name=celery_queue_name, celery_routing_key=celery_routing_key, storage_agent=storage_agent) else: # if this is the cost analysis and agent is ec2 replay then update the stochss-cost-analysis table if cost_replay: if agent_type in JobConfig.SUPPORTED_AGENT_TYPES_FOR_COST_ANALYSIS: result = __execute_cloud_cost_analysis_task( params=params, agent_type=agent_type, instance_type=instance_type, task_id=task_id, database=database, ec2_access_key=ec2_access_key, ec2_secret_key=ec2_secret_key, start_time=start_time, celery_queue_name=celery_queue_name, celery_routing_key=celery_routing_key, storage_agent=storage_agent) else: raise Exception( "cost replay not supported for agent type = {0}". format(agent_type)) else: result = {} result["db_id"] = task_id params["db_table"] = JobDatabaseConfig.TABLE_NAME params[ "cost_analysis_table"] = JobDatabaseConfig.COST_ANALYSIS_TABLE_NAME database.updateEntry(taskid=task_id, data=data, tablename=params["db_table"]) if ec2_access_key is None: ec2_access_key = '' if ec2_secret_key is None: ec2_secret_key = '' celery_task = tasks.task.apply_async( args=[ task_id, params, agent_type, database, storage_agent, ec2_access_key, ec2_secret_key ], queue=celery_queue_name, routing_key=celery_routing_key) logging.info('celery_task.ready() = {}'.format( celery_task.ready())) logging.info('celery_task.id = {}'.format(celery_task.id)) result["celery_pid"] = celery_task.id logging.info( "execute_cloud_task: result of task with task_id {0} : \n{1}" .format(task_id, pprint.pformat(result))) result["success"] = True result['resource'] = params['resource'] return result except Exception as e: logging.exception(e) return { "success": False, "reason": str(e), "exception": str(e), }