def save(self, *args, **kwargs): # If we have too many, do this asynchronously. if len(self.datas) > 200: task_args = [self.initial_group.id] if self.cleaned_data['company']: task_args.append(self.cleaned_data['company'].id) else: task_args.append(None) chord( process_patient_form_data_row.subtask([data] + task_args) for data in self.datas)(process_patient_form_data.subtask( [self.user.id])) else: patient_ids = [] for form in self.forms: patient = form.save(commit=False) patient.patient_profile.company = self.cleaned_data['company'] patient.patient_profile.save() patient_ids.append(patient.pk) patients = User.objects.filter(id__in=patient_ids) content = PatientProfile.generate_csv2(patients) self.download = TemporaryDownload.objects.create( for_user=self.user, content=content, content_type='text/csv', filename='imported_patients.csv')
def _delete_organization_buildings(org_pk, chunk_size=100, *args, **kwargs): """Deletes all BuildingSnapshot instances within an organization :param org_pk: int, str, the organization pk """ qs = BuildingSnapshot.objects.filter(super_organization=org_pk) ids = qs.values_list('id', flat=True) deleting_cache_key = get_prog_key( 'delete_organization_buildings', org_pk ) if not ids: cache.set(deleting_cache_key, 100) return # delete the canonical buildings can_ids = CanonicalBuilding.objects.filter( canonical_snapshot__super_organization=org_pk ).values_list('id', flat=True) _delete_canonical_buildings.delay(can_ids) step = float(chunk_size) / len(ids) cache.set(deleting_cache_key, 0) tasks = [] for del_ids in batch(ids, chunk_size): # we could also use .s instead of .subtask and not wrap the *args tasks.append( _delete_organization_buildings_chunk.subtask( (del_ids, deleting_cache_key, step, org_pk) ) ) chord(tasks, interval=15)(finish_delete.subtask([org_pk]))
def uptest_all_procs(): print('Uptest all procs') # Fan out a task for each active host # callback post_uptest_all_procs at the end # Note: uptests can take a long time, so use .all() to fetch data # from DB immediately hosts = Host.objects.filter(active=True).all() # Only bother doing anything if there are active hosts if hosts: # Create a test run record. run = TestRun(start=timezone.now()) run.save() print('Running test run_id={}'.format(run.id)) def make_test_task(host): print('Creating test task for host={} run_id={}'.format( host.name, run.id)) return uptest_host.subtask((host.name, run.id, True), expires=1800) chord( (make_test_task(h) for h in hosts))(post_uptest_all_procs.subtask( (run.id, ))) else: print('No hosts to test')
def swarm_cleanup(swarm_id, swarm_trace_id): """ Delete any procs in the swarm that aren't from the current release. """ logger.info("[%s] Swarm %s cleanup", swarm_trace_id, swarm_id) swarm = Swarm.objects.get(id=swarm_id) all_procs = swarm.get_procs() current_procs = [p for p in all_procs if p.hash == swarm.release.hash] stale_procs = [p for p in all_procs if p.hash != swarm.release.hash] delete_subtasks = [] # Only delete old procs if the deploy of the new ones was successful. if stale_procs and len(current_procs) >= swarm.size: for p in stale_procs: # We don't need to worry about removing these nodes from a pool at # this point, so just call delete_proc instead of swarm_delete_proc logger.info("[%s] Swarm %s stale proc %s on host %s", swarm_trace_id, swarm_id, p.name, p.host.name) delete_subtasks.append( delete_proc.subtask((p.host.name, p.name), {'swarm_trace_id': swarm_trace_id})) if delete_subtasks: chord(delete_subtasks)(swarm_finished.subtask(( swarm_id, swarm_trace_id, ))) else: swarm_finished.delay([], swarm_id, swarm_trace_id)
def _save_raw_data(file_pk, *args, **kwargs): """Chunk up the CSV and save data into the DB raw.""" import_file = ImportFile.objects.get(pk=file_pk) if import_file.raw_save_done: return {'status': 'warning', 'message': 'raw data already saved'} if import_file.source_type == "Green Button Raw": return _save_raw_green_button_data(file_pk, *args, **kwargs) parser = reader.MCMParser(import_file.local_file) cache_first_rows(import_file, parser) rows = parser.next() import_file.num_rows = 0 prog_key = get_prog_key('save_raw_data', file_pk) tasks = [] for chunk in batch(rows, 100): import_file.num_rows += len(chunk) tasks.append(_save_raw_data_chunk.subtask((chunk, file_pk, prog_key))) tasks = add_cache_increment_parameter(tasks) import_file.num_columns = parser.num_columns() import_file.save() if tasks: chord(tasks, interval=15)(finish_raw_save.subtask([file_pk])) else: finish_raw_save.task(file_pk) return {'status': 'success'}
def _save_raw_data(file_pk, *args, **kwargs): """Chunk up the CSV and save data into the DB raw.""" result = {'status': 'success', 'progress': 100} prog_key = get_prog_key('save_raw_data', file_pk) try: import_file = ImportFile.objects.get(pk=file_pk) if import_file.raw_save_done: result['status'] = 'warning' result['message'] = 'Raw data already saved' cache.set(prog_key, result) return result if import_file.source_type == "Green Button Raw": return _save_raw_green_button_data(file_pk, *args, **kwargs) parser = reader.MCMParser(import_file.local_file) cache_first_rows(import_file, parser) rows = parser.next() import_file.num_rows = 0 tasks = [] for chunk in batch(rows, 100): import_file.num_rows += len(chunk) tasks.append( _save_raw_data_chunk.subtask((chunk, file_pk, prog_key))) tasks = add_cache_increment_parameter(tasks) import_file.num_columns = parser.num_columns() import_file.save() if tasks: chord(tasks, interval=15)(finish_raw_save.subtask([file_pk])) else: finish_raw_save.task(file_pk) except StopIteration: result['status'] = 'error' result['message'] = 'StopIteration Exception' result['stacktrace'] = traceback.format_exc() except Error as e: result['status'] = 'error' result['message'] = 'File Content Error: ' + e.message result['stacktrace'] = traceback.format_exc() except KeyError as e: result['status'] = 'error' result['message'] = 'Invalid Column Name: "' + e.message + '"' result['stacktrace'] = traceback.format_exc() except Exception as e: result['status'] = 'error' result['message'] = 'Unhandled Error: ' + e.message result['stacktrace'] = traceback.format_exc() cache.set(prog_key, result) return result
def _save_raw_data(file_pk, *args, **kwargs): """Chunk up the CSV and save data into the DB raw.""" result = {'status': 'success', 'progress': 100} prog_key = get_prog_key('save_raw_data', file_pk) try: import_file = ImportFile.objects.get(pk=file_pk) if import_file.raw_save_done: result['status'] = 'warning' result['message'] = 'Raw data already saved' cache.set(prog_key, result) return result if import_file.source_type == "Green Button Raw": return _save_raw_green_button_data(file_pk, *args, **kwargs) parser = reader.MCMParser(import_file.local_file) cache_first_rows(import_file, parser) rows = parser.next() import_file.num_rows = 0 tasks = [] for chunk in batch(rows, 100): import_file.num_rows += len(chunk) tasks.append(_save_raw_data_chunk.subtask((chunk, file_pk, prog_key))) tasks = add_cache_increment_parameter(tasks) import_file.num_columns = parser.num_columns() import_file.save() if tasks: chord(tasks, interval=15)(finish_raw_save.subtask([file_pk])) else: finish_raw_save.task(file_pk) except StopIteration: result['status'] = 'error' result['message'] = 'StopIteration Exception' result['stacktrace'] = traceback.format_exc() except Error as e: result['status'] = 'error' result['message'] = 'File Content Error: ' + e.message result['stacktrace'] = traceback.format_exc() except KeyError as e: result['status'] = 'error' result['message'] = 'Invalid Column Name: "' + e.message + '"' result['stacktrace'] = traceback.format_exc() except Exception as e: result['status'] = 'error' result['message'] = 'Unhandled Error: ' + e.message result['stacktrace'] = traceback.format_exc() cache.set(prog_key, result) return result
def _map_data(file_pk, *args, **kwargs): """Get all of the raw data and process it using appropriate mapping. @lock_and_track returns a progress_key :param file_pk: int, the id of the import_file we're working with. """ import_file = ImportFile.objects.get(pk=file_pk) # Don't perform this task if it's already been completed. if import_file.mapping_done: prog_key = get_prog_key('map_data', file_pk) cache.set(prog_key, 100) return {'status': 'warning', 'message': 'mapping already complete'} # If we haven't finished saving, we shouldn't proceed with mapping # Re-queue this task. if not import_file.raw_save_done: map_data.apply_async(args=[file_pk], countdown=60, expires=120) return {'status': 'error', 'message': 'waiting for raw data save.'} source_type_dict = { 'Portfolio Raw': PORTFOLIO_RAW, 'Assessed Raw': ASSESSED_RAW, 'Green Button Raw': GREEN_BUTTON_RAW, } source_type = source_type_dict.get(import_file.source_type, ASSESSED_RAW) qs = BuildingSnapshot.objects.filter( import_file=import_file, source_type=source_type, ).iterator() prog_key = get_prog_key('map_data', file_pk) tasks = [] for chunk in batch(qs, 100): serialized_data = [obj.extra_data for obj in chunk] tasks.append(map_row_chunk.subtask( (serialized_data, file_pk, source_type, prog_key) )) tasks = add_cache_increment_parameter(tasks) if tasks: chord(tasks, interval=15)(finish_mapping.subtask([file_pk])) else: finish_mapping.task(file_pk) return {'status': 'success'}
def builder(name, mimetype, chunks=None, *args, **kwargs): if not chunks: chunks = settings.DATA_EXPORTER_CHUNKS_LENGTH return chord(generate_subtasks_builder(name, mimetype, chunks, *args, **kwargs))(compute.subtask(kwargs=dict({ 'name': name, 'mimetype': mimetype }, **kwargs)))
def builder(name, mimetype, chunks=None, *args, **kwargs): if not chunks: chunks = settings.DATA_EXPORTER_CHUNKS_LENGTH return chord( generate_subtasks_builder(name, mimetype, chunks, *args, **kwargs))( compute.subtask(kwargs=dict({ 'name': name, 'mimetype': mimetype }, **kwargs)))
def build_chord_from_tasklist(tasklist, pageinfo): if len(tasklist) == 0: return callback_inner = write_string_to_file.subtask( args=(pageinfo['title'],)) callback_outer = create_table_for_page.subtask( args=(pageinfo,), kwargs={'callback': callback_inner}) result = chord(tasklist, interval=120)(callback_outer) return result
def form_valid(self, form): super(ScaleImageView, self).form_valid(form) subtasks = [image_tasks.scale_image.si(self.object.image.name, 50), image_tasks.scale_image.si(self.object.image.name, 100), image_tasks.scale_image.si(self.object.image.name, 150), image_tasks.scale_image.si(self.object.image.name, 200), image_tasks.scale_image.si(self.object.image.name, 250), image_tasks.scale_image.si(self.object.image.name, 300), image_tasks.scale_image.si(self.object.image.name, 400)] subtasks_async = group(subtasks).apply_async() upon_completion = email_tasks.send_email.si("*****@*****.**", [self.object.notify], "Yo", "All your images are scaled") chord(subtasks)(upon_completion) task_ids = [t.task_id for t in subtasks_async.subtasks] return self.render_to_response(self.get_context_data(form=form, task_ids=json.dumps(task_ids), success=True))
def queue_computers_requests(request_num, per_page, group_id, api_key=str()): """ Creates a Celery chord of requests and concatenates the results into one JSON formatted dictionary. :param request_num: The number of requests to make to get all results from the Watchman '/computers' endpoint. :param per_page: The number of computers to include per page. Watchman limits this to 100. :param group_id: The group ID to use in requesting a list of computers. :param api_key: An optional API key to use if no WATCHMAN_API_KEY environment variable is set. :return: Returns a JSON formatted dictionary containing the concatenated results from the multiple requests. """ # put the the multiple computer requests in a group computers_chord = chord( get_computers.s( page=page, per_page=per_page, group_id=group_id, api_key=api_key) for page in range(1, request_num + 1)) return computers_chord(combine_computer_results.subtask())
def test_solution(solution): try: compile_solution(solution) solution.grader_message = 'Testing' taskList = [] tests = solution.problem.test_set.all() for t in tests: curSubTask = run_test.si(solution, t) taskList.append(curSubTask) res = chord(group(taskList), save_result.s(solution)) res.apply_async() except subprocess.CalledProcessError: solution.grader_message = 'Compilation error (syntax)' except subprocess.TimeoutExpired: solution.grader_message = 'Compilation error (timeout)' finally: solution.save()
def test_solution(solution): try: compile_solution(solution) solution.grader_message = 'Testing' taskList = [] print('adding tests') print(sys.getsizeof(solution)) tests = Test.objects.filter(problem_id = solution.problem_id) for t in tests: curSubTask = run_test.si(solution.id, t.id) taskList.append(curSubTask) res = chord(group(taskList), save_result.s(solution)) res.apply_async() print('tests added') except subprocess.CalledProcessError: solution.grader_message = 'Compilation error (syntax)' except subprocess.TimeoutExpired: solution.grader_message = 'Compilation error (timeout)' finally: solution.save()
def swarm_assign_uptests(swarm_id, swarm_trace_id=None): logger.info("[%s] Swarm %s uptests", swarm_trace_id, swarm_id) swarm = Swarm.objects.get(id=swarm_id) all_procs = swarm.get_procs() current_procs = [p for p in all_procs if p.hash == swarm.release.hash] # Organize procs by host host_procs = defaultdict(list) for proc in current_procs: host_procs[proc.host.name].append(proc.name) header = [ uptest_host_procs.subtask((h, ps)) for h, ps in host_procs.items() ] if len(header): this_chord = chord(header) callback = swarm_post_uptest.s(swarm_id, swarm_trace_id) this_chord(callback) else: # There are no procs, so there are no uptests to run. Call # swarm_post_uptest with an empty list of uptest results. swarm_post_uptest([], swarm_id, swarm_trace_id)
def job_dispatch(results, job_id, batches): """ Process the job batches one at a time When there is more than one batch to process, a chord is used to delay the execution of remaining batches. """ batch = batches.pop(0) info('dispatching job_id: {0}, batch: {1}, results: {2}'.format( job_id, batch, results)) tasks = [job_worker.subtask((job_id, task_num)) for task_num in batch] # when there are other batches to process, use a chord to delay the # execution of remaining tasks, otherwise, finish off with a TaskSet if batches: info('still have batches, chording {0}'.format(batches)) callback = job_dispatch.subtask((job_id, batches)) return chord(tasks)(callback) else: info('only batch, calling TaskSet') return TaskSet(tasks=tasks).apply_async()
def B(id): return chord(make_request.subtask((id, "%s %r" % (id, i, ))) for i in xrange(10))(B_callback.subtask((id, )))
def swarm_release(swarm_id, swarm_trace_id=None): """ Assuming the swarm's build is complete, this task will ensure there's a release with that build + current config, and call subtasks to make sure there are enough deployments. """ logger.info("[%s] Swarm %s release", swarm_trace_id, swarm_id) swarm = Swarm.objects.get(id=swarm_id) send_debug_event('Swarm %s release' % swarm) build = swarm.release.build # Bail out if the build doesn't have a file assert build.file, "Build %s has no file" % build # swarm.get_current_release(tag) will check whether there's a release with # the right build and config, and create one if not. swarm.release = swarm.get_current_release(build.tag) swarm.save() # Ensure that the release is hashed. release = swarm.release release.save() # OK we have a release. Next: see if we need to do a deployment. # Query squad for list of procs. all_procs = swarm.get_procs() current_procs = [p for p in all_procs if p.hash == swarm.release.hash] procs_needed = swarm.size - len(current_procs) if procs_needed > 0: hosts = swarm.get_prioritized_hosts() hostcount = len(hosts) # Build up a dictionary where the keys are hostnames, and the # values are lists of ports. new_procs_by_host = defaultdict(list) for x in range(procs_needed): host = hosts[x % hostcount] port = host.get_free_port() new_procs_by_host[host.name].append(port) # Ports need to be locked here in the synchronous loop, before # fanning out the async subtasks, in order to prevent collisions. lock_port(host, port) # Now loop over the hosts and fan out a task to each that needs it. subtasks = [] for host in hosts: if host.name in new_procs_by_host: subtasks.append( swarm_deploy_to_host.subtask( (swarm.id, host.id, new_procs_by_host[host.name], swarm_trace_id))) callback = swarm_post_deploy.subtask((swarm.id, swarm_trace_id)) chord(subtasks)(callback) elif procs_needed < 0: # We need to delete some procs # Get the list of hosts valid for this swarm, with some # running procs on them. # Get prioritized_hosts returns all hosts in the squad that # can run the proc, but not necessarily that are running it, # now. hosts = [ host for host in swarm.get_prioritized_hosts() if len(host.swarm_procs) > 0 ] assert len(hosts) > 0, 'No hosts running proc' # First remove from the busiest hosts hosts.reverse() hostcount = len(hosts) subtasks = [] for x in range(procs_needed * -1): # Roundrobin across hosts host = hosts[x % hostcount] proc = host.swarm_procs.pop() subtasks.append( swarm_delete_proc.subtask( (swarm.id, host.name, proc.name, proc.port), {'swarm_trace_id': swarm_trace_id})) callback = swarm_post_deploy.subtask((swarm.id, swarm_trace_id)) chord(subtasks)(callback) else: # We have just the right number of procs. Uptest and route them. swarm_assign_uptests(swarm.id, swarm_trace_id)