def scrape_news(): sites = SiteNewsScrapedData.objects.all() for site in sites: site.delete() # TODO: Pasar de alguna manera el transaction_id al spider chord((scrape_site.s(spider) for spider in spiders), compute_nlp.s())() return True
def test_app_when_app_in_task(self): t1 = Mock(name='t1') t2 = Mock(name='t2') x = chord([t1, self.add.s(4, 4)]) self.assertIs(x.app, x.tasks[0].app) t1.app = None x = chord([t1], body=t2) self.assertIs(x.app, t2._app)
def test_app_when_app_in_task(self): t1 = Mock(name='t1') t2 = Mock(name='t2') x = chord([t1, self.add.s(4, 4)]) assert x.app is x.tasks[0].app t1.app = None x = chord([t1], body=t2) assert x.app is t2._app
def get_and_merge_items(items, log): """Get the items returned from the RECAP server and merge them into CL. Items is a list of dicts like so, sorted by court, case number, document number and attachment number: [{'attachment_number': '0', 'document_number': '1', 'case_number': '186759', 'court_id': 'almb', 'is_available': '0'}, ... ] Note that all values are strings. The idea is to iterate over all of these dicts, grabbing the docket, and adding any items that have is_available = 1. """ update_log_status(log, RECAPLog.GETTING_AND_MERGING_ITEMS) tasks = [] for prev, item, nxt in previous_and_next(items): if prev is None or item['case_number'] != prev['case_number']: # New case. Get the next docket before getting any PDFs. url = get_docketxml_url(item['court_id'], item['case_number']) logger.info("New docket found at: %s" % url) filename = get_docket_filename(item['court_id'], item['case_number']) tasks.append(download_recap_item.si(url, filename, clobber=True)) # Get the document filename = get_document_filename(item['court_id'], item['case_number'], item['document_number'], item['attachment_number']) location = os.path.join(settings.MEDIA_ROOT, 'recap', filename) if not os.path.isfile(location) and int(item['is_available']): # We don't have it yet, and it's available to get. Get it! url = get_pdf_url(item['court_id'], item['case_number'], filename) tasks.append(download_recap_item.si(url, filename)) if nxt is None or item['case_number'] != nxt['case_number']: # Last item in the case. Send for processing. if len(tasks) > 0: logger.info("Sending %s tasks for processing." % len(tasks)) filename = get_docket_filename(item['court_id'], item['case_number']) chord(tasks)(chain( parse_recap_docket.si(filename, debug=False), extract_recap_pdf.s().set(priority=5), add_or_update_recap_document.s(coalesce_docket=True), )) tasks = [] logger.info("Finished queueing new cases.")
def _fleet_start_and_wait(deployment, search_params, next_task=None): """ Starts the units for the deployment and performs an asynchronous wait for all unit states to reach running state. :param deployment: Deployment parameters. :type deployment: dict :param search_params: Search parameters :type search_params: dict :return: """ name, version, nodes = deployment['deployment']['name'], \ deployment['deployment']['version'], \ deployment['deployment']['nodes'] if not deployment['schedule']: service_types = {service_type for service_type, template in deployment['templates'].items() if template['enabled']} else: service_types = {'timer'} min_nodes = deployment['deployment'].get('check', {}).get( 'min-nodes', nodes) templates = deployment['templates'] return chord( group( _fleet_start.si(search_params, name, version, nodes, service_type, templates[service_type]) for service_type in service_types ), _fleet_check_deploy.si(name, version, len(service_types), min_nodes, search_params, next_task=next_task), options=DEFAULT_CHORD_OPTIONS )()
def prepare_steps(self, args, tasks): steps = deque(tasks) next_step = prev_task = prev_res = None tasks, results = [], [] i = 0 while steps: # First task get partial args from chain. task = maybe_subtask(steps.popleft()) task = task.clone() if i else task.clone(args) i += 1 tid = task.options.get('task_id') if tid is None: tid = task.options['task_id'] = uuid() res = task.type.AsyncResult(tid) # automatically upgrade group(..) | s to chord(group, s) if isinstance(task, group): try: next_step = steps.popleft() except IndexError: next_step = None if next_step is not None: task = chord(task, body=next_step, task_id=tid) if prev_task: # link previous task to this task. prev_task.link(task) # set the results parent attribute. res.parent = prev_res results.append(res) tasks.append(task) prev_task, prev_res = task, res return tasks, results
def test_clone_clones_body(self): x = chord([self.add.s(2, 2), self.add.s(4, 4)], body=self.mul.s(4)) y = x.clone() self.assertIsNot(x.kwargs['body'], y.kwargs['body']) y.kwargs.pop('body') z = y.clone() self.assertIsNone(z.kwargs.get('body'))
def test_clone_clones_body(self): x = chord([self.add.s(2, 2), self.add.s(4, 4)], body=self.mul.s(4)) y = x.clone() assert x.kwargs['body'] is not y.kwargs['body'] y.kwargs.pop('body') z = y.clone() assert z.kwargs.get('body') is None
def prepare_steps(self, args, tasks): steps = deque(tasks) next_step = prev_task = prev_res = None tasks, results = [], [] i = 0 while steps: # First task get partial args from chain. task = maybe_subtask(steps.popleft()) task = task.clone() if i else task.clone(args) res = task.freeze() i += 1 if isinstance(task, group): # automatically upgrade group(..) | s to chord(group, s) try: next_step = steps.popleft() # for chords we freeze by pretending it's a normal # task instead of a group. res = Signature.freeze(task) task = chord(task, body=next_step, task_id=res.task_id) except IndexError: pass # no callback, so keep as group if prev_task: # link previous task to this task. prev_task.link(task) # set the results parent attribute. res.parent = prev_res if not isinstance(prev_task, chord): results.append(res) tasks.append(task) prev_task, prev_res = task, res return tasks, results
def recover_cluster(self, recovery_params): """ Recovers the cluster by re-scheduling deployments :param recovery_params: Parameters for recovering cluster :type recovery_params: dict :return: GroupResult """ logger.info('Begin Cluster recovery for: {}'.format(recovery_params)) state = recovery_params.get('state', DEPLOYMENT_STATE_PROMOTED) deployments = get_store().filter_deployments( state=state, name=recovery_params.get('name'), version=recovery_params.get('version'), exclude_names=recovery_params.get('exclude-names') ) return chord( group(create.si(clone_deployment(deployment)) for deployment in deployments), async_wait.s( default_retry_delay=TASK_SETTINGS['DEPLOYMENT_WAIT_RETRY_DELAY'], max_retries=TASK_SETTINGS['DEPLOYMENT_WAIT_RETRIES']), options=DEFAULT_CHORD_OPTIONS ).delay()
def test_clone_clones_body(self): x = chord([add.s(2, 2), add.s(4, 4)], body=mul.s(4)) y = x.clone() self.assertIsNot(x.kwargs["body"], y.kwargs["body"]) y.kwargs.pop("body") z = y.clone() self.assertIsNone(z.kwargs.get("body"))
def _deploy_all(deployment, search_params, next_task=None): """ Deploys all services for a given deployment :param deployment: Deployment parameters :type deployment: dict :return: Result of execution of next tasj """ security_profile = deployment.get('security', {})\ .get('profile', 'default') app_template = deployment['templates']['app'] if not app_template['enabled']: return [] name, version, nodes = deployment['deployment']['name'], \ deployment['deployment']['version'], \ deployment['deployment']['nodes'] return chord( group( _fleet_deploy.si(search_params, name, version, nodes, service_type, template, security_profile) for service_type, template in deployment['templates'].items() if template['enabled'] ), _fleet_start_and_wait.si(deployment, search_params, next_task=next_task), options=DEFAULT_CHORD_OPTIONS )()
def cc_works(): return ( chord([ cc_produce_n.s(n + 1) for n in range(10) ])( cc_collect_total.s() ) )
def cc_fails(): return ( chord([ cc_produce_n.s(n + 1) for n in range(10) ])( cc_collect_total.s() |\ cc_print_result.s() ) )
def test_links_to_body(self): x = chord([add.s(2, 2), add.s(4, 4)], body=mul.s(4)) x.link(div.s(2)) self.assertFalse(x.options.get("link")) self.assertTrue(x.kwargs["body"].options["link"]) x.link_error(div.s(2)) self.assertFalse(x.options.get("link_error")) self.assertTrue(x.kwargs["body"].options["link_error"]) self.assertTrue(x.tasks) self.assertTrue(x.body)
def test_links_to_body(self): x = chord([add.s(2, 2), add.s(4, 4)], body=mul.s(4)) x.link(div.s(2)) self.assertFalse(x.options.get('link')) self.assertTrue(x.kwargs['body'].options['link']) x.link_error(div.s(2)) self.assertFalse(x.options.get('link_error')) self.assertTrue(x.kwargs['body'].options['link_error']) self.assertTrue(x.tasks) self.assertTrue(x.body)
def test_links_to_body(self): x = chord([self.add.s(2, 2), self.add.s(4, 4)], body=self.mul.s(4)) x.link(self.div.s(2)) assert not x.options.get('link') assert x.kwargs['body'].options['link'] x.link_error(self.div.s(2)) assert not x.options.get('link_error') assert x.kwargs['body'].options['link_error'] assert x.tasks assert x.body
def get_load_from_csv_tasks(msg): ''' Returns a chord of tasks to migrate from csv to staging ''' guid_batch = msg[mk.GUID_BATCH] lzw = msg[mk.LANDING_ZONE_WORK_DIR] header_file_path = msg[mk.HEADER_FILE_PATH] load_type = msg[mk.LOAD_TYPE] split_file_tuple_list = msg[mk.SPLIT_FILE_LIST] loader_tasks = [] for split_file_tuple in split_file_tuple_list: message_for_file_loader = merge_dict(generate_msg_for_file_loader(split_file_tuple, header_file_path, lzw, guid_batch, load_type), msg) loader_tasks.append(W_load_csv_to_staging.task.subtask(args=[message_for_file_loader])) return chord(group(loader_tasks), handle_group_results.s())
def get_task(self, run_instance): """ Recursive and magic function that creates the full tasks :param run_instance: A run instance object :return: a celery task object """ parents = self.parents.all() if parents: l_tasks = [] for parent in parents: l_tasks.append(parent.get_task(run_instance)) return chord(l_tasks, generic_run.s(task_name=self.name)) else: input_d = Data.objects.filter(task=self, run_instance=run_instance).all() return generic_run.si([d.value for d in input_d], task_name=self.name)
def prepare_steps(self, args, tasks): app = self.app steps = deque(tasks) next_step = prev_task = prev_res = None tasks, results = [], [] i = 0 while steps: # First task get partial args from chain. task = maybe_signature(steps.popleft(), app=app) task = task.clone() if i else task.clone(args) res = task.freeze() i += 1 if isinstance(task, group): task = maybe_unroll_group(task) if isinstance(task, chain): # splice the chain steps.extendleft(reversed(task.tasks)) continue elif isinstance(task, group) and steps and \ not isinstance(steps[0], group): # automatically upgrade group(..) | s to chord(group, s) try: next_step = steps.popleft() # for chords we freeze by pretending it's a normal # task instead of a group. res = Signature.freeze(next_step) task = chord(task, body=next_step, task_id=res.task_id) except IndexError: pass # no callback, so keep as group if prev_task: # link previous task to this task. prev_task.link(task) # set the results parent attribute. if not res.parent: res.parent = prev_res if not isinstance(prev_task, chord): results.append(res) tasks.append(task) prev_task, prev_res = task, res print(tasks) return tasks, results
def _check_deployment(nodes, path, attempts, timeout, search_params=None, next_task=None): """ Performs a deployment check on discovered nodes :param nodes: List of discovered nodes :type nodes: list :param attempts: Max no. of attempts for deployment check for a given node. :type attempts: int :param timeout: Deployment check timeout :type timeout: str :return: GroupResult """ if path and nodes: return chord( group(_check_node.si(node, path, attempts, timeout) for _, node in nodes.iteritems()), _deployment_check_passed.si(search_params=search_params, next_task=next_task), options=DEFAULT_CHORD_OPTIONS ).delay() elif next_task: return next_task.delay()
def test_chord_size_nested_implicit_chain_chain_single(self): sig = chord([chain(self.add.s())]) assert sig.__length_hint__() == 1
def test_chord_size_implicit_group_single(self): sig = chord([self.add.s()]) assert sig.__length_hint__() == 1
def test_app_when_app(self): app = Mock(name='app') x = chord([self.add.s(4, 4)], app=app) assert x.app is app
def test_chord_size_implicit_chain_many(self): # This isn't a chain object so the `tasks` attribute can't be lifted # into the chord - this isn't actually valid and would blow up we tried # to run it but it sanity checks our recursion sig = chord([[self.add.s()] * 42]) assert sig.__length_hint__() == 1
def test_chord_size_simple(self): sig = chord(self.add.s()) assert sig.__length_hint__() == 1
def test_chord_size_chain_many(self): # Chains get flattened into the encapsulating chord so even though the # chain would only count for 1, the tasks we pulled into the chord's # header and are counted as a bunch of simple signature objects sig = chord(chain([self.add.s()] * 42)) assert sig.__length_hint__() == 42
def test_freeze_tasks_is_not_group(self): x = chord([self.add.s(2, 2)], body=self.add.s(), app=self.app) x.freeze() x.tasks = [self.add.s(2, 2)] x.freeze()
def test_reverse(self): x = chord([add.s(2, 2), add.s(4, 4)], body=mul.s(4)) self.assertIsInstance(subtask(x), chord) self.assertIsInstance(subtask(dict(x)), chord)
def test_chord_size_nested_implicit_group_chain_group_tail_many(self): x = chord([self.add.s() | group([self.add.s()] * 4)] * 2, body=self.add.s()) assert x.__length_hint__() == 4 * 2
def test_chord_size_chain_single(self): sig = chord(chain(self.add.s())) assert sig.__length_hint__() == 1
def test_chord_size_nested_group_chain_group_tail_single(self): x = chord(group([self.add.s() | group(self.add.s())] * 42), body=self.add.s()) assert x.__length_hint__() == 42
def test_chord_size_nested_group_chain_group_mid_many(self): x = chord(group( [self.add.s() | group([self.add.s()] * 4) | self.add.s()] * 2), body=self.add.s()) assert x.__length_hint__() == 2
def test_chord_size_nested_chord_body_implicit_group_many(self): sig = chord(chord(tuple(), [self.add.s()] * 42)) assert sig.__length_hint__() == 42
def test_chord_size_nested_chord_body_simple(self): sig = chord(chord(tuple(), self.add.s())) assert sig.__length_hint__() == 1
def test_chord_size_nested_implicit_chain_chain_many(self): sig = chord([chain([self.add.s()] * 42)]) assert sig.__length_hint__() == 1
def test_set_immutable(self): x = chord([Mock(name='t1'), Mock(name='t2')], app=self.app) x.set_immutable(True)
def test_chord_size_explicit_group_single(self): sig = chord(group(self.add.s())) assert sig.__length_hint__() == 1
def archive_item(self, guid, provider_id, user, task_id=None): try: # For CELERY_ALWAYS_EAGER=True the current request context is # empty but already initialized one is on request_stack if app.config['CELERY_ALWAYS_EAGER']: self.request_stack.pop() crt_task_id = self.request.id if not task_id: task_id = crt_task_id if not self.request.retries: update_status(*add_subtask_to_progress(task_id)) provider = superdesk.get_resource_service('ingest_providers').find_one(req=None, _id=provider_id) if provider is None: message = 'For ingest with guid= %s, failed to retrieve provider with _id=%s' % (guid, provider_id) raise_fail(task_id, message) service_provider = superdesk.io.providers[provider.get('type')] service_provider.provider = provider item = None old_item = False try: items = service_provider.get_items(guid) except LookupError: ingest_doc = superdesk.get_resource_service('ingest').find_one(req=None, _id=guid) if not ingest_doc: message = 'Not found the ingest with guid: %s for provider %s' % (guid, provider.get('type')) raise_fail(task_id, message) else: old_item = True ingest_doc.pop('_id') items = [ingest_doc] except Exception: raise self.retry(countdown=2) for item_it in items: if 'guid' in item_it and item_it['guid'] == guid: item = item_it break if item is None: message = 'Returned ingest but not found the ingest with guid: %s for provider %s' \ % (guid, provider.get('type')) raise_fail(task_id, message) if not old_item: item['created'] = item['firstcreated'] = utc.localize(item['firstcreated']) item['updated'] = item['versioncreated'] = utc.localize(item['versioncreated']) ''' Necessary because flask.g.user is None while fetching packages the for grouped items or while patching in archive collection. Without this version_creator is set None which doesn't make sense. ''' flask.g.user = user remove_unwanted(item) superdesk.get_resource_service(ARCHIVE).patch(guid, item) tasks = [] for group in item.get('groups', []): for ref in group.get('refs', []): if 'residRef' in ref: resid_ref = ref.get('residRef') doc = {'guid': resid_ref, 'ingest_provider': provider_id, 'task_id': crt_task_id} archived_doc = superdesk.get_resource_service(ARCHIVE).find_one(req=None, guid=doc.get('guid')) # check if task already started if not archived_doc: doc.setdefault('_id', doc.get('guid')) superdesk.get_resource_service(ARCHIVE).post([doc]) elif archived_doc.get('task_id') == crt_task_id: # it is a retry so continue archived_doc.update(doc) remove_unwanted(archived_doc) superdesk.get_resource_service(ARCHIVE).patch(archived_doc.get('_id'), archived_doc) else: # there is a cyclic dependency, skip it continue mark_ingest_as_archived(doc.get('guid')) tasks.append(archive_item.s(resid_ref, provider.get('_id'), user, task_id)) for rendition in item.get('renditions', {}).values(): href = service_provider.prepare_href(rendition['href']) if rendition['rendition'] == 'baseImage': tasks.append(archive_media.s(task_id, guid, href)) else: tasks.append(archive_rendition.s(task_id, guid, rendition['rendition'], href)) update_status(*finish_subtask_from_progress(task_id)) if tasks: chord((task for task in tasks), update_item.s(crt_task_id == task_id, task_id, guid)).delay() elif task_id == crt_task_id: update_status(*finish_task_for_progress(task_id)) except Exception: logger.error(traceback.format_exc())
def test_repr(self): x = chord([add.s(2, 2), add.s(4, 4)], body=mul.s(4)) self.assertTrue(repr(x)) x.kwargs["body"] = None self.assertIn("without body", repr(x))
def test_repr(self): x = chord([add.s(2, 2), add.s(4, 4)], body=mul.s(4)) self.assertTrue(repr(x)) x.kwargs['body'] = None self.assertIn('without body', repr(x))
def test_chord_size_with_body(self): sig = chord(self.add.s(), self.add.s()) assert sig.__length_hint__() == 1
def test_from_dict_with_body(self): header = body = self.add.s(1, 2) original_chord = chord(header=header, body=body) rebuilt_chord = chord.from_dict(dict(original_chord)) assert isinstance(rebuilt_chord, chord)
def test_app_when_header_is_empty(self): x = chord([], self.add.s(4, 4)) assert x.app is self.add.app
def test_repr(self): x = chord([self.add.s(2, 2), self.add.s(4, 4)], body=self.mul.s(4)) assert repr(x) x.kwargs['body'] = None assert 'without body' in repr(x)
def chord_add(): return chord([self.add.s(4, 4)], finalize.s()).apply_async()
def test_chord_size_nested_chain_chain_many(self): # The outer chain will be pulled up into the chord but the lower one # remains and will only count as a single final element sig = chord(chain(chain([self.add.s()] * 42))) assert sig.__length_hint__() == 1
def test_app_fallback_to_current(self): from celery._state import current_app t1 = Mock(name='t1') t1.app = t1._app = None x = chord([t1], body=t1) assert x.app is current_app
def test_reverse(self): x = chord([self.add.s(2, 2), self.add.s(4, 4)], body=self.mul.s(4)) self.assertIsInstance(signature(x), chord) self.assertIsInstance(signature(dict(x)), chord)
def test_argument_is_group(self): x = chord(group(self.add.s(2, 2), self.add.s(4, 4), app=self.app)) self.assertTrue(x.tasks)
def test_set_parent_id(self): x = chord(group(self.add.s(2, 2))) x.tasks = [self.add.s(2, 2)] x.set_parent_id('pid')
def test_app_when_app(self): app = Mock(name='app') x = chord([self.add.s(4, 4)], app=app) self.assertIs(x.app, app)
def test_app_fallback_to_current(self): from celery._state import current_app t1 = Mock(name='t1') t1.app = t1._app = None x = chord([t1], body=t1) self.assertIs(x.app, current_app)
def test_chord_size_implicit_group_many(self): sig = chord([self.add.s()] * 42) assert sig.__length_hint__() == 42
def archive_item(self, guid, provider_id, user, trigger_events, task_id=None): try: # For CELERY_ALWAYS_EAGER=True the current request context is # empty but already initialized one is on request_stack if app.config["CELERY_ALWAYS_EAGER"]: self.request_stack.pop() crt_task_id = self.request.id if not task_id: task_id = crt_task_id if not self.request.retries: update_status(*add_subtask_to_progress(task_id)) provider = superdesk.apps["ingest_providers"].find_one(req=None, _id=provider_id) if provider is None: message = "For ingest with guid= %s, failed to retrieve provider with _id=%s" % (guid, provider_id) raise_fail(task_id, message) service_provider = superdesk.io.providers[provider.get("type")] service_provider.provider = provider item = None old_item = False try: items = service_provider.get_items(guid) except LookupError: ingest_doc = superdesk.apps["ingest"].find_one(req=None, _id=guid) if not ingest_doc: message = "Not found the ingest with guid: %s for provider %s" % (guid, provider.get("type")) raise_fail(task_id, message) else: old_item = True ingest_doc.pop("_id") items = [ingest_doc] except Exception: raise self.retry(countdown=2) for item_it in items: if "guid" in item_it and item_it["guid"] == guid: item = item_it break if item is None: message = "Returned ingest but not found the ingest with guid: %s for provider %s" % ( guid, provider.get("type"), ) raise_fail(task_id, message) if not old_item: item["created"] = item["firstcreated"] = utc.localize(item["firstcreated"]) item["updated"] = item["versioncreated"] = utc.localize(item["versioncreated"]) superdesk.apps["archive"].update(guid, item, trigger_events=trigger_events) tasks = [] for group in item.get("groups", []): for ref in group.get("refs", []): if "residRef" in ref: doc = { "guid": ref.get("residRef"), "ingest_provider": provider_id, "user": user, "task_id": crt_task_id, } archived_doc = superdesk.apps["archive"].find_one(req=None, guid=doc.get("guid")) # check if task already started if not archived_doc: doc.setdefault("_id", doc.get("guid")) superdesk.apps["archive"].create([doc], trigger_events=trigger_events) elif archived_doc.get("task_id") == crt_task_id: # it is a retry so continue archived_doc.update(doc) superdesk.apps["archive"].update( archived_doc.get("_id"), archived_doc, trigger_events=trigger_events ) else: # there is a cyclic dependency, skip it continue ingest_set_archived(doc.get("guid")) tasks.append(archive_item.s(ref["residRef"], provider.get("_id"), user, trigger_events, task_id)) for rendition in item.get("renditions", {}).values(): href = service_provider.prepare_href(rendition["href"]) if rendition["rendition"] == "baseImage": tasks.append(archive_media.s(task_id, guid, href, trigger_events)) else: tasks.append(archive_rendition.s(task_id, guid, rendition["rendition"], href, trigger_events)) update_status(*finish_subtask_from_progress(task_id)) if tasks: chord((task for task in tasks), update_item.s(crt_task_id == task_id, task_id, guid)).delay() elif task_id == crt_task_id: update_status(*finish_task_for_progress(task_id)) except Exception: logger.error(traceback.format_exc())