Exemplo n.º 1
0
def update_jobs_created():
	jobs_created = job_manager.get_jobs_by_status("CREATED")
	for job in jobs_created:
		tasks_log.info("Found new job")
		job.status_id = job_manager.get_job_status_id("IN_PROGRESS")
		db.commit()
		email_manager.send_job_created_email(job)
Exemplo n.º 2
0
def update_fleet():
	count = 0
	while count < FLEET_UPDATE_MAX_RUNS:
		try:
			tasks_log.info("Update Jobs and JobItems Fleets. Attempt " + str(count))
			fleet_manager.update_fleet_capacity()
		except Exception, err:
			tasks_log.error(traceback.format_exc())
		finally:
Exemplo n.º 3
0
def poll_job_items_queue():
	count = 0
	while count < POLL_JOB_ITEMS_MAX_RUNS:
		try:
			tasks_log.info("Polling job_items queue " + str(count))
			process_next_job_item()
		except Exception, err:
			tasks_log.error(traceback.format_exc())
		finally:
Exemplo n.º 4
0
def poll_jobs_queue():
	count = 0
	while count < POLL_JOBS_MAX_RUNS:
		try:
			tasks_log.info("Polling jobs created + in-progress queues " + str(count))
			update_jobs_created()
			update_jobs_in_progress()
		except Exception, err:
			tasks_log.error(traceback.format_exc())
		finally:
Exemplo n.º 5
0
def add_job_item_containers(count):
	"""
	Add to instance with most containers
	"""
	i = 0;
	while i < count:
		instance_id = get_available_instance_w_most_containers(ECS_JOB_ITEMS_CLUSTER, 
			JOB_ITEM_CONTAINERS_PER_INSTANCE)
		if instance_id:
			tasks_log.info("JobItem - Adding Container To Instance " + instance_id)
			ecs.start_task_on_instance(ECS_JOB_ITEMS_CLUSTER, ECS_JOB_ITEMS_TASK, instance_id)
		else:
			tasks_log.info("Attempt To Add Container Failed. No available instances found.")
		time.sleep(2)
		i+=1
Exemplo n.º 6
0
def calculate_optimal_job_item_container_capacity(queue, itms_per_cntr):
	"""
	Checks size of JobItems queue and compares with constant JOB_ITEMS_PER_CONTAINER
	"""
	current_time = datetime.datetime.utcnow()
	last_updated_job_item_time = datetime.datetime.utcnow() #job_manager.get_last_updated_job().last_updated
	seconds_since_last_job_item = int((current_time - last_updated_job_item_time).total_seconds())
	
	job_items_queue = sqs.get_queue(queue)
	queue_size = sqs.get_queue_size(job_items_queue.url)
	optimal_containers = calculate_job_item_containers(queue_size, itms_per_cntr)
	msg = ("SITUATION:\nCurrentTime: %s\nLast_Updated: %s\nSecondsSinceLastUpdate: " + 
		"%s\nJobItemQueueSize: %s\nOptimalContainers: %s")
	msg = msg % (current_time, last_updated_job_item_time, seconds_since_last_job_item, 
		queue_size, optimal_containers)
	tasks_log.info(msg)
	return optimal_containers
Exemplo n.º 7
0
def update_jobs_fleet_instances(optimal_cnts, autoscaling_group, max_inst, min_inst, cnts_per_inst):
	current_instances = autoscaling.get_capacity(autoscaling_group)
	optimal_instances = calculate_optimal_job_instance_capacity(optimal_cnts, cnts_per_inst)
	tasks_log.info("JobsInstances - Current:%s Optimal:%s" % (str(current_instances), str(optimal_instances)))
	if optimal_instances > current_instances and current_instances < max_inst:
		tasks_log.info("Jobs - Increasing Instance Capacity")
		autoscaling.increase_capacity(autoscaling_group)
		return current_instances+1
	elif optimal_instances < current_instances and current_instances > min_inst:
		tasks_log.info("Jobs - Reducing Instance Capacity")
		autoscaling.decrease_capacity(autoscaling_group)
		return current_instances-1
	else:
		tasks_log.info("Jobs - Leaving Instance Capacity Unchanged")
		return current_instances
Exemplo n.º 8
0
def update_jobs_fleet_containers(cluster, service, max_cnts, min_cnts, cooldown):
	current_containers = ecs.get_service_capacity(cluster, service)
	optimal_containers = calculate_optimal_job_container_capacity(cooldown)
	tasks_log.info("JobsContainers - Current:%s Optimal:%s" % (str(current_containers), str(optimal_containers)))
	if optimal_containers > current_containers and current_containers < max_cnts:
		tasks_log.info("Jobs - Increasing Container Capacity")
		ecs.set_service_capacity(cluster, service, current_containers+1)
		return current_containers+1
	elif optimal_containers < current_containers and current_containers > min_cnts:
		tasks_log.info("Jobs - Reducing Container Capacity")
		ecs.set_service_capacity(cluster, service, current_containers-1)
		return current_containers-1
	else:
		tasks_log.info("Jobs - Leaving Container Capacity Unchanged")
		return current_containers
Exemplo n.º 9
0
def update_job_items_fleet_instances(autoscaling_grp, max_insts, min_insts, cntrs_per_inst, cntrs):
	"""
	Fast scale up. Slow scale down.
	"""
	current_instances = autoscaling.get_capacity(autoscaling_grp)
	optimal_instances = calculate_optimal_job_item_instance_capacity(cntrs_per_inst, cntrs)
	tasks_log.info("JobItemsInstances: Current:%s Optimal:%s" % (str(current_instances), 
		str(optimal_instances)))
	if optimal_instances > current_instances and current_instances < max_insts:
		tasks_log.info("JobItems - Increasing Instance Capacity")
		autoscaling.increase_capacity(autoscaling_grp)
		return current_instances+1
	elif optimal_instances < current_instances and current_instances > min_insts:
		tasks_log.info("JobItems - Reducing Instance Capacity")
		autoscaling.decrease_capacity(autoscaling_grp)
		return current_instances-1
	else:
		tasks_log.info("JobItems - Leaving Instance Capacity Unchanged")
		return current_instances
Exemplo n.º 10
0
def update_job_items_fleet_containers(cluster, queue, max_cntrs, min_cntrs, itms_per_cntr):
	"""
	Slow scale up. No scale down (tasks will die after they complete)
	"""
	current_containers = ecs.get_total_tasks_in_cluster(cluster)
	optimal_containers = calculate_optimal_job_item_container_capacity(queue, itms_per_cntr)
	tasks_log.info("JobItemsContainers - Current:%s Optimal:%s" % (str(current_containers), 
		str(optimal_containers)))
	if optimal_containers > current_containers and current_containers < max_cntrs:
		tasks_log.info("JobItems - Increasing Container Capacity")
		add_job_item_containers(current_containers+1)
		return optimal_containers
	elif optimal_containers < current_containers and current_containers > min_cntrs:
		tasks_log.info("JobItems - Reducing Container Capacity")
		remove_job_item_containers(current_containers-1)
		return current_containers-1
	else:
		tasks_log.info("JobItems - Leaving Container Capacity Unchanged")
		return optimal_containers
Exemplo n.º 11
0
def calculate_optimal_job_container_capacity(cooldown):
	seconds_since_last_job = get_seconds_since_last_job_run(cooldown)
	jobs_in_flight = (len(job_manager.get_jobs_by_status("IN_PROGRESS")) + 
		len(job_manager.get_jobs_by_status("CREATED")))
	msg = ("SITUATION:\nSecondsSinceLastJobRun: %s\nJobCount: %s")
	msg = msg % (seconds_since_last_job, jobs_in_flight)
	tasks_log.info(msg)
	if jobs_in_flight == 0:
		tasks_log.info("no jobs in flight")
		if seconds_since_last_job > cooldown:
			return 0
		else:
			return 1
	else:
		tasks_log.info("found jobs in flight")
		return 1
Exemplo n.º 12
0
def process_next_job_item():
	tasks_log.info("Getting next job_item from queue")
	queue = sqs.get_queue(client_constants.SQS_JOB_ITEMS_QUEUE)
	msg = sqs.get_next_message(queue)
	if msg is None: 
		tasks_log.info("No job items found in Queue")
		return
	job_item_key = msg['MessageAttributes']['job_item_key']['StringValue']
	tasks_log.info("Found new job_item " + job_item_key)
	job_item = job_item_manager.get_job_item_doc(job_item_key)
	job_item['attempts'] += 1
	status = job_item_manager.process_job_item(job_item)
	if status == "COMPLETE":
		items_log.info("Deleting completed job_item from queue")
		sqs.delete_message(queue, msg)
	else:
		# We are going to let SQS handle retries
		items_log.info("Leaving job_item in queue")
		
Exemplo n.º 13
0
def calculate_optimal_job_item_instance_capacity(cntrs_per_inst, opt_cntrs):
	optimal_instances = int(math.ceil(float(opt_cntrs) / cntrs_per_inst))
	msg = "SITUATION:\nOptimalJobItemContainers: %s\nJobItemContainerPerInstance: %s\nOptimalJobItemInstances: %s"
	msg = msg % (opt_cntrs, cntrs_per_inst, optimal_instances)
	tasks_log.info(msg)
	return optimal_instances