def handle(self, *args, **options): filter_qs = {} # Don't include disabled projects by default if not options['disabled_projects']: filter_qs['entity__resource__project__disabled'] = False # Don't include obsolete by default if not options['obsolete_entities']: filter_qs['entity__obsolete'] = False translations_pks = ( Translation.objects .filter( entity__resource__format__in=DB_FORMATS, **filter_qs ) .values_list('pk', flat=True) ) # Split translations into even batches and send them to Celery workers batch_size = int(options['batch_size']) group( signature( check_translations, args=(translations_pks[i:i + batch_size],) ) for i in range(0, len(translations_pks), batch_size) ).apply_async()
def lp_clinical_team(): event.Event('event', { 'task': 'overwatch_tasks', 'info': { 'message': 'started lp_clinical_team' } }) update_data = report_tasks.get_profile_changes() count = 0 header = [] batch = {} for client_id in update_data: if count >= UPDATE_CLIENT_BATCH_SIZE: header.append(update_client_profile_batch.s(batch)) batch = {} count = 0 else: update_package = update_data[client_id] batch[client_id] = update_package count += 1 if len(batch) > 0: header.append(update_client_profile_batch.s(batch)) group(header)() event.Event('event', { 'task': 'overwatch_tasks', 'info': { 'message': 'finished lp_clinical_team' } })
def test_group_chord_group_chain(self, manager): from celery.five import bytes_if_py2 if not manager.app.conf.result_backend.startswith('redis'): raise pytest.skip('Requires redis result backend.') redis_connection = StrictRedis() redis_connection.delete('redis-echo') before = group(redis_echo.si('before {}'.format(i)) for i in range(3)) connect = redis_echo.si('connect') after = group(redis_echo.si('after {}'.format(i)) for i in range(2)) result = (before | connect | after).delay() result.get(timeout=TIMEOUT) redis_messages = list(map( bytes_if_py2, redis_connection.lrange('redis-echo', 0, -1) )) before_items = \ set(map(bytes_if_py2, (b'before 0', b'before 1', b'before 2'))) after_items = set(map(bytes_if_py2, (b'after 0', b'after 1'))) assert set(redis_messages[:3]) == before_items assert redis_messages[3] == b'connect' assert set(redis_messages[4:]) == after_items redis_connection.delete('redis-echo')
def alert_change_domain(domain_id, **kwargs): from app.models.models import Domain, Host, hosts, User utc_now = datetime.datetime.utcnow() experation_date = kwargs.pop('experation_date') domain_name = kwargs.pop('domain_name') query = db_session.session.query(User.username) query = query.join(hosts, hosts.c.user_id == User.id) query = query.join(Host, hosts.c.host_id == Host.id) query = query.join(Domain, Host.domain_id == Domain.id) query = query.filter(Domain.id==domain_id) query = query.filter(or_(User.date_out >=utc_now, User.date_out.is_(None))) query_set = query.all() message = render_jinja2('email.html', text=u'Продлена аренда домена <br> <b>%s</b> <br> до <br> %s' % (domain_name, experation_date.strftime('"%d" %B %Y'))) tasks_list = list() for user_inst in query_set: user_email = user_inst[0] logger.info('send mail %s. domain %s', user_email, domain_id) tasks_list.append(send_email.s(subject='ChicagoOnline. %s' % domain_name, message=message.encode('utf-8'), recipients=[user_email], html=True)) if tasks_list: group(*tasks_list).delay()
def post(self, request, *args, **kwargs): """Import a manifest at a remote_url.""" remote_url = request.data.get("remote_url") if not remote_url: return Response( {'error': 'Did not provide remote_url.'}, status=status.HTTP_400_BAD_REQUEST) shared_id = str(uuid.uuid4()) imp = ManifestPreImporter(remote_url) lst = imp.get_all_urls() # If there are manifests to import, create a celery group for the task. if lst: if len(lst) == 1: g = group([import_single_manifest.s(imp.text, lst[0])]) else: g = group([import_single_manifest.s(None, url) for url in lst]).skew(start=0, step=0.3) task = g.apply_async(task_id=shared_id) task.save() else: if imp.errors: return Response({'errors': imp.errors}, status=status.HTTP_400_BAD_REQUEST) return Response({'errors': ['Failed to find recognisable IIIF manifest data.']}, status=status.HTTP_400_BAD_REQUEST) # Return a URL where the status of the import can be polled. status_url = reverse('status', request=request, args=[shared_id]) return Response({'status': status_url}, status.HTTP_202_ACCEPTED)
def import_replications(start, end): """Recieves a start and a end number and import each replication file in this interval. """ Import(start=start, end=end).save() urls = [format_url(n) for n in range(start, end + 1)] group(get_filter_changeset_file.s(url) for url in urls)()
def update_all_apidata(*args, **kwargs): character_keys = APIKey.objects.filter(expired=False).exclude(type='Corporation') corpkeys = APIKey.objects.filter(expired=False, type='Corporation') tasks = queue_character_tasks(character_keys) + queue_corporation_tasks(corpkeys) if len(tasks) > 0: group(tasks).apply_async(queue='transient')
def task_group(): # 100 random num(group) -> filter -> sum (chain) # execute several tasks in parallel. g = group(rand.s(x) for x in range(100)) # chain a group together with another task will auto upgrade to be a chord gchain = chain(g, filter.s(), xsum.s()) gchain.apply_async(countdown=1) # logger.warning(datetime.now() + timedelta(seconds=5)) # gchain.apply_async(eta=datetime.now() + timedelta(seconds=5)) # gchain.apply_async(countdown=5) job = group([ add.s(1, 2), add.s(2, 3), add.s(3, 4), add.s(4, 5), ]) chain(job, xsum.s()).apply_async() # chord(job, xsum.s()).apply_async() # sync call if settings.CELERY_ALWAYS_EAGER: res = job.delay() cnt = 0 while not res.ready(): print('wait job ready%s' % '.' * cnt) # async call will stucked here print('job result: %s' % sum(res.get(timeout=2)))
def fetch_transactions_status(transaction_ids=None): eligible_transactions = Transaction.objects.filter(state=Transaction.States.Pending) if transaction_ids: eligible_transactions = eligible_transactions.filter(pk__in=transaction_ids) group(fetch_transaction_status.s(transaction.id) for transaction in eligible_transactions)()
def process_queue(*args, **kwargs): keys = kwargs.get('keys') queue = kwargs.get('queue') if queue == Person.not_added_key: # process not added persons queue result = group(add_instance.subtask( (key, Person), options={'queue': 'persons'}) for key in keys) result.apply_async(queue='persons') elif queue == Movie.not_added_key: # process not added movies queue result = group(add_instance.subtask( (key, Movie), options={'queue': 'movies'}) for key in keys) result.apply_async(queue='movies') elif queue == Person.not_saved_relations_key: # process not saved persons relations queue, # save person -> movie relations result = group(save_relations.subtask( (key, Person), options={'queue': 'relations'}) for key in keys) result.apply_async(queue='relations') elif queue == Movie.not_saved_relations_key: # process not saved movies relations queue, # save movie -> movie relations result = group(save_relations.subtask( (key, Movie), options={'queue': 'relations'}) for key in keys) result.apply_async(queue='relations')
def _call_task_errbacks(self, request, exc, traceback): old_signature = [] for errback in request.errbacks: errback = self.app.signature(errback) if ( # Celery tasks type created with the @task decorator have # the __header__ property, but Celery task created from # Task class do not have this property. # That's why we have to check if this property exists # before checking is it partial function. hasattr(errback.type, '__header__') and # workaround to support tasks with bind=True executed as # link errors. Otherwise retries can't be used not isinstance(errback.type.__header__, partial) and arity_greater(errback.type.__header__, 1) ): errback(request, exc, traceback) else: old_signature.append(errback) if old_signature: # Previously errback was called as a task so we still # need to do so if the errback only takes a single task_id arg. task_id = request.id root_id = request.root_id or task_id group(old_signature, app=self.app).apply_async( (task_id,), parent_id=task_id, root_id=root_id )
def generate_pdfs(): dirty_documents = chain(Invoice.objects.filter(pdf__dirty__gt=0), Proforma.objects.filter(pdf__dirty__gt=0)) # Generate PDFs in parallel group(generate_pdf.s(document.id, document.kind) for document in dirty_documents)()
def scan_launch(scan_id): with session_transaction() as session: scan = None try: log.debug("scan: %s launching", scan_id) # Part for common action for whole scan scan = Scan.load_from_ext_id(scan_id, session) scan_request = scan_ctrl._create_scan_request( scan.files_ext, scan.get_probelist(), scan.mimetype_filtering) scan_request = scan_ctrl._add_empty_results( scan.files_ext, scan_request, scan, session) # Nothing to do if scan_request.nb_files == 0: scan.set_status(IrmaScanStatus.finished) log.warning("scan %s: finished nothing to do", scan_id) return # Part for action file_ext by file_ext file_ext_id_list = [file.external_id for file in scan.files_ext] celery.group(scan_launch_file_ext.si(file_ext_id) for file_ext_id in file_ext_id_list)() scan.set_status(IrmaScanStatus.launched) session.commit() log.info("scan %s: launched", scan_id) return except Exception as e: log.exception(e) if scan is not None: scan.set_status(IrmaScanStatus.error)
def test_on_run(self): args = [ ('twseid', 'INFO', './log/twseid.log', True, False), ('otcid', 'INFO', './log/otcid.log', True, False) ] tasks = group([ run_scrapy_service.subtask(args[0]), run_scrapy_service.subtask(args[1]) ]) results = tasks.apply_async() results.join() self.assertTrue(results.ready()) self.assertTrue(results.successful()) args = [ ('twsehistrader', 'INFO', './log/twsehistrader.log', True, True), ('twsehisstock', 'INFO', './log/twsehisstock.log', True, True), ('otchistrader', 'INFO', './log/otchistrader.log', True, True), ('otchisstock', 'INFO', './log/otchisstock.log', True, True) ] tasks = group([ run_scrapy_service.subtask(args[0]), run_scrapy_service.subtask(args[1]), run_scrapy_service.subtask(args[2]), run_scrapy_service.subtask(args[3]) ]) t = timeit.Timer() results = tasks.apply_async() results.join() print "scrapy all bin.tasks used %.4f(s)" % (t.timeit()) self.assertTrue(results.ready()) self.assertTrue(results.successful())
def handle(self, *args, **options): pks = (ActivityLog.objects.review_queue().values_list('pk', flat=True) .order_by('id')) ts = [add_versionlog.subtask(args=[chunk]) for chunk in chunked(pks, 100)] group(ts).apply_async()
def generate_eod_tasks(): ''' Task responsible for generating work items used to obtain end of day data for stocks using get_eod_data() task ''' db = MongoDBUtil() symbol_sets = set() #Gets all symbols sp500 = finsymbols.get_sp500_symbols() amex = finsymbols.get_amex_symbols() nyse = finsymbols.get_nyse_symbols() nasdaq = finsymbols.get_nasdaq_symbols() #Adds all symbols to set which removes duplicates symbol_sets.update(_get_symbol_set(sp500)) symbol_sets.update(_get_symbol_set(amex)) symbol_sets.update(_get_symbol_set(nyse)) symbol_sets.update(_get_symbol_set(nasdaq)) now = datetime.datetime.now() end_date = '-'.join([str(now.year),str(now.month),str(now.day)]) his_symbols = db.has_historical_data(symbol_sets) if(len(his_symbols) >= 1): start_date = '1980-01-01' hist_job = group(get_eod_data.s(symbol,start_date,end_date) for symbol in symbol_sets) hist_job.apply_async() # Obtain data for current date job = group(get_eod_data.s(symbol,end_date,end_date) for symbol in symbol_sets) job.apply_async()
def broadcast(type, task, args, kwargs=None, callback=None): # pylint: disable=redefined-builtin """ Run a broadcast across our servers. Returns a task group that can be checked for results. `callback` should be a task signature that will be run once, after all of the broadcast tasks have finished running. """ if type not in ['web', 'app', 'build']: raise ValueError('allowed value of `type` are web, app and build.') if kwargs is None: kwargs = {} if type in ['web', 'app']: servers = settings.MULTIPLE_APP_SERVERS elif type in ['build']: servers = settings.MULTIPLE_BUILD_SERVERS tasks = [] for server in servers: task_sig = task.s(*args, **kwargs).set(queue=server) tasks.append(task_sig) if callback: task_promise = chord(tasks, callback).apply_async() else: # Celery's Group class does some special handling when an iterable with # len() == 1 is passed in. This will be hit if there is only one server # defined in the above queue lists if len(tasks) > 1: task_promise = group(*tasks).apply_async() else: task_promise = group(tasks).apply_async() return task_promise
def update_user_ratings(): """Update add-on author's ratings.""" cursor = connections[multidb.get_replica()].cursor() # We build this query ahead of time because the cursor complains about data # truncation if it does the parameters. Also, this query is surprisingly # quick, <1sec for 6100 rows returned q = """ SELECT addons_users.user_id as user_id, AVG(rating) as avg_rating FROM reviews INNER JOIN versions INNER JOIN addons_users INNER JOIN addons ON reviews.version_id = versions.id AND addons.id = versions.addon_id AND addons_users.addon_id = addons.id WHERE reviews.reply_to IS NULL AND reviews.rating > 0 AND addons.status IN (%s) GROUP BY addons_users.user_id """ % (",".join(map(str, VALID_ADDON_STATUSES))) cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [update_user_ratings_task.subtask(args=[chunk]) for chunk in chunked(d, 1000)] group(ts).apply_async()
def inform(self): while True: self.container.put(obj.async_jump.get()) obj.async_jump = AsyncResult() if self.container.qsize() > 10: group(parse.s(item) for item in self.task_args())() obj.async_in.set()
def test_chord_in_chords_with_chains(self, manager): try: manager.app.backend.ensure_chords_allowed() except NotImplementedError as e: raise pytest.skip(e.args[0]) c = chord( group([ chain( add.si(1, 2), chord( group([add.si(1, 2), add.si(1, 2)]), add.si(1, 2), ), ), chain( add.si(1, 2), chord( group([add.si(1, 2), add.si(1, 2)]), add.si(1, 2), ), ), ]), add.si(2, 2) ) r = c.delay() assert r.get(timeout=TIMEOUT) == 4
def populate_attribute(self, summary_attribute, update=True): """Populate all instances of summary_attribute in table. Arguments: summary_attribute - either an instance of h1ds.models.SummaryAttribute or its slug """ try: attr_slug = summary_attribute.slug except AttributeError: attr_slug = summary_attribute shot_queryset = Shot.objects.filter(device=self.device, number__lte=self.device.latest_shot.number) if update: task_name = update_single_table_attribute else: task_name = insert_single_table_attribute shot_manager = get_backend_shot_manager_for_device(self.device) shot_timestamp = shot_manager().get_timestamp_for_shot group( (task_name.s(self.device.slug, shot.number, shot_timestamp(shot.number), attr_slug) for shot in shot_queryset), ).apply_async()
def test_group_to_chord(self): c = ( self.add.s(5) | group([self.add.s(i, i) for i in range(5)], app=self.app) | self.add.s(10) | self.add.s(20) | self.add.s(30) ) c._use_link = True tasks, results = c.prepare_steps((), c.tasks) self.assertEqual(tasks[-1].args[0], 5) self.assertIsInstance(tasks[-2], chord) self.assertEqual(len(tasks[-2].tasks), 5) self.assertEqual(tasks[-2].parent_id, tasks[-1].id) self.assertEqual(tasks[-2].root_id, tasks[-1].id) self.assertEqual(tasks[-2].body.args[0], 10) self.assertEqual(tasks[-2].body.parent_id, tasks[-2].id) self.assertEqual(tasks[-3].args[0], 20) self.assertEqual(tasks[-3].root_id, tasks[-1].id) self.assertEqual(tasks[-3].parent_id, tasks[-2].body.id) self.assertEqual(tasks[-4].args[0], 30) self.assertEqual(tasks[-4].parent_id, tasks[-3].id) self.assertEqual(tasks[-4].root_id, tasks[-1].id) self.assertTrue(tasks[-2].body.options['link']) self.assertTrue(tasks[-2].body.options['link'][0].options['link']) c2 = self.add.s(2, 2) | group(self.add.s(i, i) for i in range(10)) c2._use_link = True tasks2, _ = c2.prepare_steps((), c2.tasks) self.assertIsInstance(tasks2[0], group)
def execute_transactions(transaction_ids=None): executable_transactions = Transaction.objects.filter(state=Transaction.States.Initial) if transaction_ids: executable_transactions = executable_transactions.filter(pk__in=transaction_ids) group(execute_transaction.s(transaction.id) for transaction in executable_transactions)()
def _evil_groupmember(self, evil_t, *eargs, **opts): g1 = group(add.s(2, 2).set(**opts), evil_t.s(*eargs).set(**opts), add.s(4, 4).set(**opts), add.s(8, 8).set(**opts)) g2 = group(add.s(3, 3).set(**opts), add.s(5, 5).set(**opts), evil_t.s(*eargs).set(**opts), add.s(7, 7).set(**opts)) self.join(g1(), timeout=10) self.join(g2(), timeout=10)
def on_error(request, exc, uuid, state=FAILURE, call_errbacks=True): if propagate: raise I = Info(state, exc) R = I.handle_error_state(task, eager=eager) if call_errbacks: group([signature(errback, app=app) for errback in request.errbacks or []], app=app).apply_async((uuid,)) return I, R, I.state, I.retval
def fire(): myName = getHostname() log("{0} fires.".format(myName)) setOutputActive() # kick 下游 neurons connections = getConnections() group([kick.subtask((myName,), routing_key=connection) for connection in connections]).apply_async()
def reindex_collections(index=None): from . import tasks ids = (Collection.objects.exclude(type=amo.COLLECTION_SYNCHRONIZED) .values_list('id', flat=True)) taskset = [tasks.index_collections.subtask(args=[chunk], kwargs=dict(index=index)) for chunk in chunked(sorted(list(ids)), 150)] group(taskset).apply_async()
def check_virtual_connection_consistency(): with db_session() as db: group(convert_virtual_connections.s(_) for _, in ( db.query(User.id).filter( func.lower(User.username) .in_(db.query(func.lower(VirtualUserConnection.to_username).distinct())) ) )).delay()
def reindex_addons(index=None, addon_type=None): from . import tasks ids = Addon.unfiltered.values_list('id', flat=True) if addon_type: ids = ids.filter(type=addon_type) ts = [tasks.index_addons.subtask(args=[chunk], kwargs=dict(index=index)) for chunk in chunked(sorted(list(ids)), 150)] group(ts).apply_async()
def update_collections_total(): """Update collections downloads totals.""" d = (CollectionCount.objects.values('collection_id') .annotate(sum=Sum('count'))) ts = [tasks.update_collections_total.subtask(args=[chunk]) for chunk in chunked(d, 50)] group(ts).apply_async()
def build_sweep(sweep_id: str): from sweep_builder.init_tokens import init_tokens from sweep_builder.pipeline import iter_pipeline from sweep_builder.reality_inferrer.reality import iter_reality_base try: _measurement_name_base = __name__ + '.' + build_sweep.__name__ + '.' _measurement_tags = {'sweep_id': sweep_id} # In the jobs persister we purposefully avoid persisting # anything besides the Job ID. This means that things like tokens # and other data on *Claim is lost. # As long as we are doing that, we need to leave tokens somewhere # for workers to pick up. logger.info(f"#{sweep_id} Prepositioning platform tokens") init_tokens(sweep_id) logger.info(f"#{sweep_id} Starting sweep building") # task_group = TaskGroup() delayed_tasks = [] cnt = 0 with Measure.counter(_measurement_name_base + 'outer_loop', tags=_measurement_tags) as cntr: for reality_claim in iter_reality_base(): # what we get here are Scope and AdAccount objects. # Children of AdAccount reality claims are to be processed # in separate Celery tasks. But we still have jobs # associated with Scopes objects, so # need to rate and store the jobs before chipping off # a separate task for each of AdAccounts. if reality_claim.entity_type == Entity.AdAccount: # child_task_id = task_group.generate_task_id() # task_group.report_task_active(child_task_id) delayed_tasks.append( # we are using Celery chord to process AdAccounts in parallel # for very very large (hundreds of thousands) numbers of AdAccounts, # chord management will be super memory expensive, # as chord timer/controller will be looking at entire list on # each tick. # In that case, probably better to switch to # a callback per handler + mutex/counter somewhere build_sweep_slice_per_ad_account_task.si( sweep_id, reality_claim, # task_id=child_task_id )) elif reality_claim.entity_type == Entity.Page: delayed_tasks.append( build_sweep_slice_per_page.si(sweep_id, reality_claim)) else: cnt = 1 _step = 1000 for _ in iter_pipeline(sweep_id, [reality_claim]): cnt += 1 if cnt % _step == 0: cntr += _step logger.info( f'#{sweep_id}-root: Queueing up #{cnt}') # because above counter communicates only increments of _step, # we need to report remainder --- amount under _step cntr += cnt % _step logger.info(f"#{sweep_id}-root: Queued up a total of {cnt} tasks") # # here we fan out actual work to celery workers # # and wait for all tasks to finish before returning group_result = group(delayed_tasks).delay() # In case the workers crash, go-away (scaling) or are otherwise # non-responsive, the following would wait indefinitely. # Since that's not desirable and the total sweep build time is minutes at # maximum, we add a reasonable timeout # Because we are not joining on the results, but actually periodically # looking for "you done yet?", we can exit if this threshold is busted, and # let the next run recover from the situation # You will nee should_be_done_by = time.time() + (60 * 20) Measure.gauge(f'{_measurement_name_base}per_account_sweep.total', tags=_measurement_tags)(len(group_result.results)) # Monitor the progress. Although this obviously can be achieved with # group_result.join(), we need to "see" into the task group progress with Measure.gauge(f'{_measurement_name_base}per_account_sweep.done', tags=_measurement_tags) as measure_done: while True: done_counter = 0 for result in group_result.results: logger.debug(f'{result}: {result.state}') if result.ready(): done_counter += 1 logger.debug( f"TOTAL: {done_counter}/{len(group_result.results)}") logger.debug("=" * 20) logger.debug("Checking group result") measure_done(done_counter) if group_result.ready(): logger.debug(f"#{sweep_id}-root: Sweep build complete") break # Important. If we don't sleep, the native join in celery context # switches all the time and we end up with 100% cpu, eventually somehow # deadlocking the process. 5 seconds is kind of an arbitrary number, but # does what we need and the impact of a (potential) delay is absolutely # minimal time.sleep(5) # The last line of defense. Workers did not finish in time we # expected, no point waiting, kill it. if time.time() > should_be_done_by: Measure.gauge( f'{_measurement_name_base}per_account_sweep.early_exits', tags=_measurement_tags)(1) logger.warning( "Exiting incomplete sweep build, it's taking too long") return logger.info("Waiting on results join") if group_result.supports_native_join: group_result.join_native() else: # Eager mode does not support native join. group_result.join() # # alternative to Celery's native group_result.join() # # our manual task tracking code + join() # task_group.join() logger.info("Join complete, sweep build ended") except Exception as ex: ErrorInspector.inspect(ex, None, {'sweep_id': sweep_id})
def run(self): db_session.add(self) log = self.get_log(job_run=self) db_session.add(log) try: self.status = JobRunStatus.running self.run_at = now() print self.run_at log.add_log("started", "Job Started at %s" % (self.run_at)) db_session.commit() checks_to_run = [] # Let the rules populate the checks they wish to run. # Don't forget to first open connections on all sources so they can be queried. [ source.open_connection() for source in self.job_template.data_sources ] map(lambda r: r.run(self, checks_to_run), self.job_template.rules) [ source.close_connection() for source in self.job_template.data_sources ] # Dedupe checks_to_run even against checks. Expect of tuples of format (DataSource, table_name_string, Check) seen = set() seen_add = seen.add checks_to_run = [ c for c in checks_to_run if not ((c[0].id, c[1], c[2].id) in seen or seen_add( (c[0].id, c[1], c[2].id))) ] if len(checks_to_run) > 0: # Bucketize checks based on parallelization chosen. Each bucket runs sequentially. checks_by_parallelization = self.get_checks_by_parallelization( checks_to_run) # Run each bucket of checks in a separate celery worker, by turning each subarray into an array of celery run check # job signatures, and then splatting each array of run check signatures into a chain(requiring them to be done one # at a time in each chain), then you group all chains together so they run in parallel. Each chain is a worker. # Then finally you call register finished when all done. separate_queues = [ map( lambda c: celery_jobs.job_runs.run_check.si( c[0].id, c[1], c[2].id, self.id), chks) for chks in checks_by_parallelization ] sep_chains = [chain(*queue) for queue in separate_queues] print sep_chains group_of_chains = (group(*sep_chains) | celery_jobs.job_runs.register_finished.s( self.id)).apply_async() else: self.set_finished() except Exception: self.set_failed() db_session.add(log) db_session.commit()
def trace_task(uuid, args, kwargs, request=None): # R - is the possibly prepared return value. # I - is the Info object. # T - runtime # Rstr - textual representation of return value # retval - is the always unmodified return value. # state - is the resulting task state. # This function is very long because we've unrolled all the calls # for performance reasons, and because the function is so long # we want the main variables (I, and R) to stand out visually from the # the rest of the variables, so breaking PEP8 is worth it ;) R = I = T = Rstr = retval = state = None task_request = None time_start = monotonic() try: try: kwargs.items except AttributeError: raise InvalidTaskError( "Task keyword arguments is not a mapping") push_task(task) task_request = Context(request or {}, args=args, called_directly=False, kwargs=kwargs) root_id = task_request.root_id or uuid task_priority = (task_request.delivery_info.get("priority") if inherit_parent_priority else None) push_request(task_request) try: # -*- PRE -*- if prerun_receivers: send_prerun(sender=task, task_id=uuid, task=task, args=args, kwargs=kwargs) loader_task_init(uuid, task) if track_started: store_result( uuid, { "pid": pid, "hostname": hostname }, STARTED, request=task_request, ) # -*- TRACE -*- try: R = retval = fun(*args, **kwargs) state = SUCCESS except Reject as exc: I, R = Info(REJECTED, exc), ExceptionInfo(internal=True) state, retval = I.state, I.retval I.handle_reject(task, task_request) traceback_clear(exc) except Ignore as exc: I, R = Info(IGNORED, exc), ExceptionInfo(internal=True) state, retval = I.state, I.retval I.handle_ignore(task, task_request) traceback_clear(exc) except Retry as exc: I, R, state, retval = on_error(task_request, exc, uuid, RETRY, call_errbacks=False) traceback_clear(exc) except Exception as exc: I, R, state, retval = on_error(task_request, exc, uuid) traceback_clear(exc) except BaseException: raise else: try: # callback tasks must be applied before the result is # stored, so that result.children is populated. # groups are called inline and will store trail # separately, so need to call them separately # so that the trail's not added multiple times :( # (Issue #1936) callbacks = task.request.callbacks if callbacks: if len(task.request.callbacks) > 1: sigs, groups = [], [] for sig in callbacks: sig = signature(sig, app=app) if isinstance(sig, group): groups.append(sig) else: sigs.append(sig) for group_ in groups: group_.apply_async( (retval, ), parent_id=uuid, root_id=root_id, priority=task_priority, ) if sigs: group(sigs, app=app).apply_async( (retval, ), parent_id=uuid, root_id=root_id, priority=task_priority, ) else: signature(callbacks[0], app=app).apply_async( (retval, ), parent_id=uuid, root_id=root_id, priority=task_priority, ) # execute first task in chain chain = task_request.chain if chain: _chsig = signature(chain.pop(), app=app) _chsig.apply_async( (retval, ), chain=chain, parent_id=uuid, root_id=root_id, priority=task_priority, ) mark_as_done( uuid, retval, task_request, publish_result, ) except EncodeError as exc: I, R, state, retval = on_error(task_request, exc, uuid) else: Rstr = saferepr(R, resultrepr_maxsize) T = monotonic() - time_start if task_on_success: task_on_success(retval, uuid, args, kwargs) if success_receivers: send_success(sender=task, result=retval) if _does_info: info( LOG_SUCCESS, { "id": uuid, "name": get_task_name(task_request, name), "return_value": Rstr, "runtime": T, }, ) # -* POST *- if state not in IGNORE_STATES: if task_after_return: task_after_return( state, retval, uuid, args, kwargs, None, ) finally: try: if postrun_receivers: send_postrun( sender=task, task_id=uuid, task=task, args=args, kwargs=kwargs, retval=retval, state=state, ) finally: pop_task() pop_request() if not eager: try: backend_cleanup() loader_cleanup() except (KeyboardInterrupt, SystemExit, MemoryError): raise except Exception as exc: logger.error("Process cleanup failed: %r", exc, exc_info=True) except MemoryError: raise except Exception as exc: _signal_internal_error(task, uuid, args, kwargs, request, exc) if eager: raise R = report_internal_error(task, exc) if task_request is not None: I, _, _, _ = on_error(task_request, exc, uuid) return trace_ok_t(R, I, T, Rstr)
def handle_grb_lvalert(alert): """Parse an LVAlert message related to superevents/GRB external triggers and dispatch it to other tasks. Notes ----- This LVAlert message handler is triggered by creating a new superevent or GRB external trigger event, or a label associated with completeness of sky maps: * Any new event triggers a coincidence search with :meth:`gwcelery.tasks.raven.coincidence_search`. * When both a GW and GRB sky map are available during a coincidence, indicated by the labels ``SKYMAP_READY`` and ``EXT_SKYMAP_READY`` respectfully, this trigger the spacetime coinc FAR to be calculated. If an alert is triggered with these same conditions, indicated by the ``RAVEN_ALERT`` label, a combined GW-GRB sky map is created using :meth:`gwcelery.tasks.external_skymaps.create_combined_skymap`. """ # Determine GraceDB ID graceid = alert['uid'] # launch searches if alert['alert_type'] == 'new': if alert['object'].get('group') == 'External': # Create and upload Swift sky map for the joint targeted # sub-threshold search as agreed on in the MOU if alert['object']['search'] == 'SubGRBTargeted' and \ alert['object']['pipeline'] == 'Swift': external_skymaps.create_upload_external_skymap( alert['object'], None, alert['object']['created']) # launch standard Burst-GRB search raven.coincidence_search(graceid, alert['object'], group='Burst') if alert['object']['search'] in ['SubGRB', 'SubGRBTargeted']: # if sub-threshold GRB, launch search with that pipeline raven.coincidence_search( graceid, alert['object'], group='CBC', searches=['SubGRB', 'SubGRBTargeted'], pipelines=[alert['object']['pipeline']]) else: # if threshold GRB, launch standard CBC-GRB search raven.coincidence_search(graceid, alert['object'], group='CBC', searches=['GRB']) elif 'S' in graceid: # launch standard GRB search based on group preferred_event_id = alert['object']['preferred_event'] gw_group = gracedb.get_group(preferred_event_id) raven.coincidence_search(graceid, alert['object'], group=gw_group, searches=['GRB']) if gw_group == 'CBC': # launch subthreshold searches if CBC # for Fermi and Swift separately to use different time windows for pipeline in ['Fermi', 'Swift']: raven.coincidence_search( graceid, alert['object'], group='CBC', searches=['SubGRB', 'SubGRBTargeted'], pipelines=[pipeline]) # rerun raven pipeline or created combined sky map when sky maps are # available elif alert['alert_type'] == 'label_added' and \ alert['object'].get('group') == 'External': if _skymaps_are_ready(alert['object'], alert['data']['name'], 'compare'): # if both sky maps present and a coincidence, compare sky maps se_id, ext_ids = _get_superevent_ext_ids(graceid, alert['object'], 'compare') superevent = gracedb.get_superevent(se_id) preferred_event_id = superevent['preferred_event'] gw_group = gracedb.get_group(preferred_event_id) tl, th = raven._time_window(graceid, gw_group, [alert['object']['pipeline']], [alert['object']['search']]) raven.raven_pipeline([alert['object']], se_id, superevent, tl, th, gw_group) if _skymaps_are_ready(alert['object'], alert['data']['name'], 'combine'): # if both sky maps present and a raven alert, create combined # skymap se_id, ext_id = _get_superevent_ext_ids(graceid, alert['object'], 'combine') external_skymaps.create_combined_skymap(se_id, ext_id) elif 'EM_COINC' in alert['object']['labels']: # if not complete, check if GW sky map; apply label to external # event if GW sky map se_labels = gracedb.get_labels(alert['object']['superevent']) if 'SKYMAP_READY' in se_labels: gracedb.create_label.si('SKYMAP_READY', graceid).delay() elif alert['alert_type'] == 'label_added' and 'S' in graceid and \ 'SKYMAP_READY' in alert['object']['labels']: # if sky map in superevent, apply label to all external events # at the time group( gracedb.create_label.si('SKYMAP_READY', ext_id) for ext_id in alert['object']['em_events'] ).delay()
def test_chain_with_chord_raises_error(self): with pytest.raises(NotImplementedError): (self.add.s(2, 2) | group(self.add.s(2, 2), self.add.s(5, 6)) | self.add.s()).delay()
def sync_all(): job = group([sync.s(account.id) for account in StripeAccount.query.all()]) job.apply_async()
def run_collectors_by_type(*args): for slug in args: collector_type = CollectorType.objects\ .prefetch_related('collectors').get(slug=slug) slugs = [c.slug for c in collector_type.collectors.all()] group(run_collector.s(slug) for slug in slugs)()
#!/usr/bin/env python import bipolar from celery import group from time import sleep from elasticsearch import helpers, Elasticsearch import sys import json ip_net = sys.argv[1] ips = bipolar.net_explode(ip_net) my_group = group([bipolar.scan_heartbleed.s(ip) for ip in ips]) group_results = my_group.apply_async(queue='scan') for child in group_results.children: print(child.as_tuple()[0][0]) #group_results = my_group.apply_async() #while not group_results.ready(): # print('waiting for jobs to complete') # sleep(10) #group_results = group_results.get() # #scan_data = {} #for results in group_results: # ip = results['scan'].keys()[0] # scan_data[ip] = results['scan'][ip] # #output = [] #for ip in scan_data.keys(): # open_ports = [] # if 'tcp' in scan_data[ip].keys():
import bipolar from celery import group from time import sleep from elasticsearch import helpers, Elasticsearch import sys import json sqli_file = sys.argv[1] urls = [] with open(sqli_file) as f: url_data = f.readlines() for data in url_data: urls.append(data.strip()) my_group = group([bipolar.sqli_check.s(url) for url in urls]) group_results = my_group.apply_async() print(group_results) while not group_results.ready(): print('waiting for jobs to complete') sleep(10) group_results = group_results.get() output = [] for results in group1_results: if results is not None: for i in results: output.append(json.dumps(i)) print(output)
def train_networks(self, networks, initial=False): """ train each networks on cluster server :param networks: network lists :return: networks """ try: tasks = [] # if first version run it alone first for first_network = {} if (initial == True and len(networks) > 0 and len(list(first_network.keys())) == 0): network = networks[0] network['flag'] = True key = '_'.join( [network['nn_id'], str(network['nn_wf_ver_id'])]) if (self.celery_flag): result = train.delay( network.get('nn_id'), str(network.get('nn_wf_ver_id'))).get() network['acc'] = result[key].get('accuracy') else: result = train(network.get('nn_id'), str(network.get('nn_wf_ver_id'))) network['acc'] = result[key].get('accuracy') first_network = networks[0].copy() del networks[0] if (self.celery_flag): # You can use cluster servers for faster hyper parameter searching # using cluster server with celery for genetic algorithm for network in networks: if (network['flag'] == True): continue tasks.append( train.subtask((network.get('nn_id'), str(network.get('nn_wf_ver_id'))))) results = group(tasks).apply_async() results = results.join() for result in results: for network in networks: key = '_'.join( [network['nn_id'], str(network['nn_wf_ver_id'])]) if (key in list(result.keys()) and result[key] is not None and result[key].get('accuracy') is not None): network['acc'] = result[key].get('accuracy') network['flag'] = True else: # for debug you can run all tasks on django process for network in networks: if (network['flag'] == True): continue result = train(network.get('nn_id'), str(network.get('nn_wf_ver_id'))) key = '_'.join( [network['nn_id'], str(network['nn_wf_ver_id'])]) network['acc'] = result[key].get('accuracy') network['flag'] = True if len(list(first_network.keys())) > 0: networks.append(first_network) except Exception as e: logging.error("Error on training : {0} ".format(e)) finally: return networks
def add_chord_to_chord(self, nums, val): subtasks = [add.s(num, val) for num in nums] self.add_to_chord(group(subtasks) | tsum.s()) return 0
def alwaysexits(self): g = group(exiting.s() for _ in range(10)) self.join(g(), timeout=10)
def alwayskilled(self): g = group(kill.s() for _ in range(10)) self.join(g(), timeout=10)
def adds(self): group(add.s(i, i) for i in xrange(10)).delay()
def orthorectify(initWorkingSetName, stepName, requestInfo, jobId, outputFolder, imageFiles, dsmFile, dtmFile, rpcFiles, occlusionThreshold=None, denoiseRadius=None): """ Run Girder Worker jobs to orthorectify source images. Requirements: - Danesfield Docker image is available on host :param initWorkingSetName: The name of the top-level working set. :type initWorkingSetName: str :param stepName: The name of the step. :type stepName: str (DanesfieldStep) :param requestInfo: HTTP request and authorization info. :type requestInfo: RequestInfo :param jobId: Job ID. :type jobId: str :param outputFolder: Output folder document. :type outputFolder: dict :param imageFiles: List of image files. :type imageFiles: list[dict] :param dsmFile: DSM file document. :type dsmFile: dict :param dtmFile: DTM file document. :type dtmFile: dict :param rpcFiles: List of RPC files. :type rpcFiles: list[dict] :param occlusionThreshold: :type occlusionThreshold: float :param denoiseRadius: :type denoiseRadius: float :returns: None """ gc = createGirderClient(requestInfo) def createOrthorectifyTask(imageFile, rpcFile): # Set output file name based on input file name orthoName = os.path.splitext(imageFile['name'])[0] + '_ortho.tif' outputVolumePath = VolumePath(orthoName) # Docker container arguments containerArgs = [ 'danesfield/tools/orthorectify.py', # Source image GirderFileIdToVolume(imageFile['_id'], gc=gc), # DSM GirderFileIdToVolume(dsmFile['_id'], gc=gc), # Destination image outputVolumePath, '--dtm', GirderFileIdToVolume(dtmFile['_id'], gc=gc), '--raytheon-rpc', GirderFileIdToVolume(rpcFile['_id'], gc=gc), ] if occlusionThreshold is not None: containerArgs.extend( ['--occlusion-thresh', str(occlusionThreshold)]) if denoiseRadius is not None: containerArgs.extend(['--denoise-radius', str(denoiseRadius)]) # Result hooks # - Upload output files to output folder # - Provide upload metadata upload_kwargs = createUploadMetadata(jobId, stepName) resultHooks = [ GirderUploadVolumePathToFolder(outputVolumePath, outputFolder['_id'], upload_kwargs=upload_kwargs, gc=gc) ] return docker_run.s( **createDockerRunArguments(image=DockerImage.DANESFIELD, containerArgs=containerArgs, jobTitle=('[%s] Orthorectify: %s' % (initWorkingSetName, imageFile['name'])), jobType=stepName, user=requestInfo.user, resultHooks=resultHooks)) # Find RPC file corresponding to each image, or None correspondingRpcFiles = [ next((rpcFile for rpcFile in rpcFiles if rpcFileMatchesImageFile(rpcFile, imageFile)), None) for imageFile in imageFiles ] # For some images, it seems that we're not getting RPC files from # the P3D step. Deciding to simply skip those images and log a # warning instead of raising an exception for now. imagesMissingRpcFiles = [ imageFile['name'] for imageFile, rpcFile in zip(imageFiles, correspondingRpcFiles) if not rpcFile ] if imagesMissingRpcFiles: logprint.info( 'Step: {} -- Warning: Missing RPC files for images: {}'.format( stepName, imagesMissingRpcFiles)) # raise DanesfieldWorkflowException( # 'Missing RPC files for images: {}'.format(imagesMissingRpcFiles), # step=stepName) # Run tasks in parallel using a group; skip if we have no rpcFile # for the given image tasks = [ createOrthorectifyTask(imageFile, rpcFile) for imageFile, rpcFile in zip(imageFiles, correspondingRpcFiles) if rpcFile is not None ] groupResult = group(tasks).delay() DanesfieldWorkflowManager.instance().setGroupResult( jobId, stepName, groupResult) # Add info for job event listeners for result in groupResult.results: addJobInfo(result.job, jobId=jobId, stepName=stepName)
def chaincomplex(self): c = (add.s(2, 2) | (add.s(4) | add.s(8) | add.s(16)) | group(add.s(i) for i in range(4))) res = c() assert_equal(res.get(), [32, 33, 34, 35])
def add_to_all(self, nums, val): """Add the given value to all supplied numbers.""" subtasks = [add.s(num, val) for num in nums] raise self.replace(group(*subtasks))
def get_content(page_num): """并行调用任务,group一次创建多个任务""" # start = time.time() for i in range(1, page_num + 1): group(C.s(base_url.format(i)))()
def test_unicode_task(self, manager): manager.join( group(print_unicode.s() for _ in range(5))(), timeout=TIMEOUT, propagate=True, )
def _parallel_get_market_trade_metrics(self, tca_request_list, dummy_market): logger = LoggerManager.getLogger(__name__) market_holder_list = DataFrameHolder() trade_order_holder_list = DataFrameHolder() # For each currency pair select collect the trades and market data, then calculate benchmarks and slippage result = [] keep_looping = True # If we have also asked for trades/order if tca_request_list[0].trade_order_mapping is not None: point_in_time_executions_only = \ self._util_func.dict_key_list(tca_request_list[0].trade_order_mapping) == ['trade_df'] else: point_in_time_executions_only = True parallel_library = tca_request_list[0].multithreading_params['parallel_library'] if parallel_library == 'single': # from tcapy.analysis.tcatickerloaderimpl import TCATickerLoaderImpl tca_ticker_loader = Mediator.get_tca_ticker_loader(version=self._version) start_date = tca_request_list[0].start_date finish_date = tca_request_list[0].finish_date # Parameters for the loop i = 0; no_of_tries = 5 # Error trapping for Celery, if have failed event retry it while i < no_of_tries and keep_looping: try: # For each TCA request kick off a thread for tca_request_single_ticker in tca_request_list: # Split up the request by date (monthly/weekly chunks) tca_request_date_split = self._split_tca_request_by_date( tca_request_single_ticker, tca_request_single_ticker.ticker, period=tca_request_single_ticker.multithreading_params['cache_period']) if not(constants.multithreading_params['splice_request_by_dates']) \ or tca_request_list[0].tca_type == 'detailed' \ or tca_request_list[0].tca_type == 'compliance' \ or tca_request_list[0].summary_display == 'candlestick'\ or not(point_in_time_executions_only): if 'celery' in parallel_library: # Load all the data for this ticker and THEN calculate the metrics on it result.append(chord((get_market_trade_holder_via_celery.s(tca_request_data) for tca_request_data in tca_request_date_split), calculate_metrics_single_ticker_via_celery.s(tca_request_single_ticker, dummy_market)).apply_async()) elif parallel_library == 'single': # This is not actually parallel, but is mainly for debugging purposes for tca_request_s in tca_request_date_split: # print(tca_request_s.start_date) market_df, trade_order_df_dict = tca_ticker_loader.get_market_trade_order_holder( tca_request_s, return_cache_handles=False) market_df, trade_order_df_list, ticker, trade_order_keys = \ tca_ticker_loader.calculate_metrics_single_ticker((market_df, trade_order_df_dict), tca_request_s, dummy_market) market_holder_list.add_dataframe(market_df, ticker) trade_order_holder_list.add_dataframe_dict( dict(zip(trade_order_keys, trade_order_df_list))) else: # Otherwise work on parallel chunks by date # doesn't currently work with orders which straddle day/week/month boundaries # but should work with points in time # # In practice, it's not really much faster than the above code if 'celery' == parallel_library: # For each ticker/date combination load data and process chunk (so can do fully in parallel) result.append(group(get_market_trade_holder_and_calculate_metrics_single_ticker_via_celery.s( tca_request_data, dummy_market) for tca_request_data in tca_request_date_split).apply_async()) # Now combine the results from the parallel operations, if using celery if 'celery' in parallel_library: # Careful, when the output is empty! output = [p.get(timeout=constants.celery_timeout_seconds) for p in result if p is not None] # If pipelined/splice_request_by_dates will have two lists so flatten it into one output = self._util_func.flatten_list_of_lists(output) for market_df, trade_order_df_list, ticker, trade_order_keys in output: market_holder_list.add_dataframe(market_df, ticker) # market_df_dict[ticker] = market_df trade_order_holder_list.add_dataframe_dict(dict(zip(trade_order_keys, trade_order_df_list))) del result del output keep_looping = False except DateException as e: raise e keep_looping = False except TradeMarketNonOverlapException as e: raise e keep_looping = False except DataMissingException as e: raise e keep_looping = False except ErrorWritingOverlapDataException as e: raise e keep_looping = False # Exception likely related to Celery and possibly lack of communication with Redis message broker # or Memcached results backend # except Exception as e: except Exception as e: if i == no_of_tries - 1: err_msg = "Failed with " + parallel_library + " after multiple attempts: " + str(e) + ", " + str(traceback.format_exc()) raise Exception(err_msg) i = i + 1 logger.warn("Failed with " + parallel_library + ", trying again for " + str(i) + " time: " + str(e) + ", " + str(traceback.format_exc())) logger.debug("Finished parallel computation") # Expand out the DataFrame holders into dictionaries of DataFrames market_df_dict = market_holder_list.get_combined_dataframe_dict() trade_order_results_df_dict = trade_order_holder_list.get_combined_dataframe_dict(start_date=start_date, finish_date=finish_date) # TODO add candlestick drawing here for cases when using split threading by date trade_order_results_df_dict = self._util_func.remove_keymatch_dict(trade_order_results_df_dict, 'market_df_downsampled') return market_df_dict, trade_order_results_df_dict
def run_task(self, job_uid=None, user=None): run_uid = '' logger.debug('Running Job with id: {0}'.format(job_uid)) job = Job.objects.get(uid=job_uid) job_name = self.normalize_job_name(job.name) formats = [format.slug for format in job.formats.all()] export_tasks = [] # build a list of celery tasks based on the export formats.. for format in formats: try: # see settings.EXPORT_TASKS for configuration task_fq_name = self.export_task_registry[format] # instantiate the required class. parts = task_fq_name.split('.') module_path, class_name = '.'.join(parts[:-1]), parts[-1] module = importlib.import_module(module_path) CeleryExportTask = getattr(module, class_name) export_task = CeleryExportTask() export_tasks.append(export_task) except KeyError as e: logger.debug(e) except ImportError as e: msg = 'Error importing export task: {0}'.format(e) logger.debug(msg) # run the tasks if len(export_tasks) > 0: # start the run run = None try: # enforce max runs max_runs = settings.EXPORT_MAX_RUNS run_count = job.runs.count() if run_count > 0: while run_count > max_runs - 1: job.runs.earliest(field_name='started_at').delete( ) # delete earliest run_count -= 1 # add the new run if not user: user = job.user run = ExportRun.objects.create( job=job, user=user, status='SUBMITTED') # persist the run run.save() run_uid = str(run.uid) logger.debug('Saved run with id: {0}'.format(run_uid)) except DatabaseError as e: logger.error('Error saving export run: {0}'.format(e)) raise e # setup the staging directory stage_dir = settings.EXPORT_STAGING_ROOT + str(run_uid) + '/' os.makedirs(stage_dir, 6600) # pull out the tags to create the conf file categories = job.categorised_tags # dict of points/lines/polygons bbox = job.overpass_extents # extents of job in order required by overpass # setup the initial tasks conf = OSMConfTask() query = OverpassQueryTask() pbfconvert = OSMToPBFConvertTask() prep_schema = OSMPrepSchemaTask() # check for transform and/or translate configurations """ Not implemented for now. transform = job.configs.filter(config_type='TRANSFORM') translate = job.configs.filter(config_type='TRANSLATION') """ # save initial tasks to the db with 'PENDING' state.. for initial_task in [conf, query, pbfconvert, prep_schema]: try: ExportTask.objects.create(run=run, status='PENDING', name=initial_task.name) logger.debug('Saved task: {0}'.format(initial_task.name)) except DatabaseError as e: logger.error('Saving task {0} threw: {1}'.format( initial_task.name, e)) raise e # save the rest of the ExportFormat tasks. for export_task in export_tasks: """ Set the region name on the Garmin Export task. The region gets written to the exported '.img' file. Could set additional params here in future if required. """ if export_task.name == 'Garmin Export': export_task.region = job.region.name try: ExportTask.objects.create(run=run, status='PENDING', name=export_task.name) logger.debug('Saved task: {0}'.format(export_task.name)) except DatabaseError as e: logger.error('Saving task {0} threw: {1}'.format( export_task.name, e)) raise e # check if we need to generate a preset file from Job feature selections if job.feature_save or job.feature_pub: # run GeneratePresetTask preset_task = GeneratePresetTask() ExportTask.objects.create(run=run, status='PENDING', name=preset_task.name) logger.debug('Saved task: {0}'.format(preset_task.name)) # add to export tasks export_tasks.append(preset_task) """ Create a celery chain which runs the initial conf and query tasks (initial_tasks), followed by a chain of pbfconvert and prep_schema (schema_tasks). The export format tasks (format_tasks) are then run in parallel, followed by the finalize_task at the end to clean up staging dirs, update run status, email user etc.. """ initial_tasks = chain( conf.si(categories=categories, stage_dir=stage_dir, run_uid=run_uid, job_name=job_name) | query.si(stage_dir=stage_dir, job_name=job_name, bbox=bbox, run_uid=run_uid, filters=job.filters)) schema_tasks = chain( pbfconvert.si( stage_dir=stage_dir, job_name=job_name, run_uid=run_uid) | prep_schema.si( stage_dir=stage_dir, job_name=job_name, run_uid=run_uid)) format_tasks = group( task.si( run_uid=run_uid, stage_dir=stage_dir, job_name=job_name) for task in export_tasks) finalize_task = FinalizeRunTask() """ If header tasks fail, errors will not propagate to the finalize_task. This means that the finalize_task will always be called, and will update the overall run status. """ chain( chain(initial_tasks, schema_tasks), chord(header=format_tasks, body=finalize_task.si( stage_dir=stage_dir, run_uid=run_uid)).set( link_error=finalize_task.si())).apply_async( expires=datetime.now() + timedelta( days=1)) # tasks expire after one day. return run else: return False
import cv2 import base64 import json import time import os from celery import group from celery_proj.celery_app import predict_task img_ls = [] img_loc = "Data" for filename in os.listdir(img_loc): if filename.endswith(".jpg"): img_ls.append(filename) def json_encode(img): full_p = os.path.join(img_loc, img) img = cv2.imread(full_p) _, im_arr = cv2.imencode('.jpg', img) im_bytes = im_arr.tobytes() base_img = base64.b64encode(im_bytes).decode('utf-8') return base_img predict_result = group( predict_task.s(json_encode(i), (180, 180)) for i in img_ls)() res = predict_result.get() for i in res: print(i)
def grid_search_controller(config_path): # start = time.time() # Dynamic importing config file from config_path config = load(config_path) # Dynamic loading lambda name LAMBDA_NAME = getattr(config.Cross_Validation, "LAMBDA_NAME") # Clean the log of specified lambda function clean_logs('/aws/lambda/' + LAMBDA_NAME) # Dynamic load parameters PARAMETERS = [] CV_SETTINGS = [] for key in dir(config.Hyperparameter): if key.isupper(): PARAMETERS.append(key) for key in dir(config.Cross_Validation): if key.isupper(): CV_SETTINGS.append(key) # Tune forecast horizon of the chosen model payload_list = create_event(config, PARAMETERS, CV_SETTINGS) min_metric = float('inf') chosen_model_event = None metrics = [] # from src.lambda_func.prophet.prophet import grid_search_worker # for payload in payload_list: # map_item = grid_search_worker(payload) # metrics.append(map_item['average_metric']) # if map_item['average_metric'] < min_metric: # print ("======Update chosen model event==========") # chosen_model_event = map_item['event'] # min_metric = map_item['average_metric'] # print ("=======Metric=======") # print (min_metric) # print ("======Event=======") # print (chosen_model_event) # print ("======Metrics=======") # print (metrics) # print ("====Execution time====") # print (time.time() - start) start = time.time() print ("=====Time Stamp======") print (start) job = group(invoke_lambda.s( function_name = LAMBDA_NAME, sync = True, payload = payload ) for payload in payload_list) print("===Async Tasks start===") result = job.apply_async() result.save() from celery.result import GroupResult saved_result = GroupResult.restore(result.id) while not saved_result.ready(): time.sleep(0.1) model_list = saved_result.get(timeout=None) print("===Async Tasks end===") print (time.time() - start) for item in model_list: payload = item['Payload'] if payload['average_metric'] < min_metric: chosen_model_event = payload['event'] min_metric = payload['average_metric'] from src.celery_lambda import measurement measurement.parse_log("/aws/lambda/prophet_worker") # Non-zero forecast period makes lambda upload graphs to s3 chosen_model_event['forecast'] = getattr(config.Cross_Validation, "FORECAST") # Invoke Lambda with forecast response = invoke_lambda(function_name = LAMBDA_NAME, sync=True, payload=chosen_model_event) print ("=======The Execution Time===========") print (time.time() - start) print (response)
def move_ucr_data_into_aggregation_tables(date=None, intervals=2): date = date or datetime.utcnow().date() monthly_dates = [] # probably this should be run one time, for now I leave this in aggregations script (not a big cost) # but remove issues when someone add new table to mapping, also we don't need to add new rows manually # on production servers _update_ucr_table_mapping() first_day_of_month = date.replace(day=1) for interval in range(intervals - 1, 0, -1): # calculate the last day of the previous months to send to the aggregation script first_day_next_month = first_day_of_month - relativedelta( months=interval - 1) monthly_dates.append(first_day_next_month - relativedelta(days=1)) monthly_dates.append(date) db_alias = get_icds_ucr_db_alias() if db_alias: with connections[db_alias].cursor() as cursor: _create_aggregate_functions(cursor) _update_aggregate_locations_tables(cursor) state_ids = (SQLLocation.objects.filter( domain=DASHBOARD_DOMAIN, location_type__name='state').values_list('location_id', flat=True)) for monthly_date in monthly_dates: calculation_date = monthly_date.strftime('%Y-%m-%d') stage_1_tasks = [ icds_state_aggregation_task.si(state_id=state_id, date=monthly_date, func=_aggregate_gm_forms) for state_id in state_ids ] stage_1_tasks.extend([ icds_state_aggregation_task.si(state_id=state_id, date=monthly_date, func=_aggregate_df_forms) for state_id in state_ids ]) stage_1_tasks.extend([ icds_state_aggregation_task.si(state_id=state_id, date=monthly_date, func=_aggregate_cf_forms) for state_id in state_ids ]) stage_1_tasks.extend([ icds_state_aggregation_task.si( state_id=state_id, date=monthly_date, func=_aggregate_child_health_thr_forms) for state_id in state_ids ]) stage_1_tasks.extend([ icds_state_aggregation_task.si( state_id=state_id, date=monthly_date, func=_aggregate_ccs_record_thr_forms) for state_id in state_ids ]) stage_1_tasks.extend([ icds_state_aggregation_task.si( state_id=state_id, date=monthly_date, func=_aggregate_child_health_pnc_forms) for state_id in state_ids ]) stage_1_tasks.extend([ icds_state_aggregation_task.si( state_id=state_id, date=monthly_date, func=_aggregate_ccs_record_pnc_forms) for state_id in state_ids ]) # stage_1_tasks.extend([ # icds_state_aggregation_task.si( # state_id=state_id, date=monthly_date, func=_aggregate_delivery_forms # ) for state_id in state_ids # ]) stage_1_tasks.extend([ icds_state_aggregation_task.si(state_id=state_id, date=monthly_date, func=_aggregate_bp_forms) for state_id in state_ids ]) stage_1_tasks.extend([ icds_state_aggregation_task.si(state_id=state_id, date=monthly_date, func=_aggregate_awc_infra_forms) for state_id in state_ids ]) stage_1_tasks.append( icds_aggregation_task.si(date=calculation_date, func=_update_months_table)) res = group(*stage_1_tasks).apply_async() res_daily = icds_aggregation_task.delay( date=calculation_date, func=_daily_attendance_table) res.get() res_child = chain( icds_aggregation_task.si(date=calculation_date, func=_child_health_monthly_table), icds_aggregation_task.si(date=calculation_date, func=_agg_child_health_table), ).apply_async() res_ccs = chain( icds_aggregation_task.si(date=calculation_date, func=_ccs_record_monthly_table), icds_aggregation_task.si(date=calculation_date, func=_agg_ccs_record_table), ).apply_async() res_daily.get() res_ccs.get() res_child.get() res_awc = icds_aggregation_task.delay(date=calculation_date, func=_agg_awc_table) res_awc.get() chain( icds_aggregation_task.si(date=date.strftime('%Y-%m-%d'), func=aggregate_awc_daily), email_dashboad_team.si( aggregation_date=date.strftime('%Y-%m-%d'))).delay()
def GetDevicesInfo(addrlist): job = group([GetASICInfo.s(ip) for ip in addrlist if ip != '']) run = job.apply_async() result = run.get() return result
def process_one(filename=None): """Enqueues a mail file for processing""" res = chain(parse.s(filename), group(deploy_db.s(), deploy_es.s()))() print "Enqueued mail file for processing: {} ({})".format(filename, res)
def unicodetask(self): self.join(group(print_unicode.s() for _ in range(5))(), timeout=1, propagate=True)
def dispatch_tasks(task_id, end_time_hour, end_time_minute): username, camera_id = task_id.split('##') camera = Camera.objects.filter( user__username=username, camera_id=camera_id).first() # get the camera model to extract frame ai_skill_settings = camera.ai_skill_settings.all( ) # get all ai_skills setted for ai_skill_setting in ai_skill_settings: ai_skill = ai_skill_setting.ai_skill coordinates = ai_skill_setting.coordinates face_relevence = ai_skill_setting.face_relevance skill_url = ai_skill.ai_skill_url camera_url = camera.camera_url skill_test = None try: skill_test = requests.get(skill_url).status_code if skill_test != 200: ai_skill.state = 0 ai_skill.save() else: ai_skill.state = 1 ai_skill.save() except requests.exceptions.ConnectionError: ai_skill.state = 0 ai_skill.save() camera_test = is_opened(camera_url=camera_url) if not camera_test: camera.state = 10 # connection failure camera.save() if skill_test == 200 and camera_test: info = { 'user': camera.user.id, 'camera': camera.id, 'ai_skill': ai_skill.id } all_faces = None if face_relevence: similarity = face_relevence.similarity quality = face_relevence.quality face_groups = face_relevence.face_group.all() face_images = [] for face_group in face_groups: faces = face_group.face_set.all() if faces: for face in faces: face_image = face.face_image face_images.append( base64.b64encode(face_image.read())) all_faces = { 'similarity': similarity, 'quality': quality, 'faces': face_images } with RedisTaskState(task_id=task_id) as task_state: task_state.set_state('running') group( put_image.s(camera_url=camera_url, coordinates=coordinates, task_id=task_id, end_time_hour=end_time_hour, end_time_minute=end_time_minute), detect_image.s(skill_id=ai_skill.id, task_id=task_id, end_time_hour=end_time_hour, end_time_minute=end_time_minute, interval=camera.extraction_settings.frequency, faces=all_faces, **info)).apply_async() else: with RedisTaskState(task_id=task_id) as task_state: task_state.set_state('error') clear_queue(task_id)
def manyshort(self): self.join(group(add.s(i, i) for i in range(1000))(), timeout=10, propagate=True)
def always_timeout(self): self.join( group(sleeping.s(1).set(time_limit=0.1) for _ in range(100))(), timeout=10, propagate=True, )