def _get_location(loader, record): try: if record.location: return Location.objects.filter( source_id=record.location.id, schema_name=loader.context['country'].schema_name).first() except Exception as e: process_exception(e) return None
def notify(self, model): ct = ContentType.objects.get_for_model(model) etl = EtlTask.objects.filter(content_type=ct).first() ret = [] for service in Service.objects.filter(source_model=ct): for subscription in self.filter(content_type=ct).exclude( type=Subscription.NONE): logger.info(f"Process subscription {subscription}") try: if subscription.type in (Subscription.EXCEL, Subscription.PDF): format = { Subscription.EXCEL: 'xlsx', Subscription.PDF: 'pdf', }[subscription.type] rf = APIRequestFactory() request = rf.get(f"{service.endpoint}?format={format}") request.user = subscription.user request.api_info = { } # this is set my the middleware, so we must set manually here response = service.viewset.as_view({'get': 'list'})(request) response.render() # check headers set in ApiMiddleware in request.api_info request.api_info.update(dict(response.items())) attachments = { f'{model._meta.verbose_name}.{format}': BytesIO(response.content), } template = 'dataset_changed_attachment' else: attachments = None template = 'dataset_changed' ret.append( mail.send( subscription.user. email, # List of email addresses also accepted '*****@*****.**', template=template, context={ 'subscription': subscription, 'user': subscription.user, 'base_url': settings.ABSOLUTE_BASE_URL, 'verbose_name': model._meta.verbose_name, 'etl': etl, 'model': ct.model, 'service': service }, attachments=attachments)) except Exception as e: # pragma: no cover logger.exception(e) process_exception(e) return ret
def queue(self, request, pk): obj = self.get_object(request, pk) try: obj.status = 'QUEUED' obj.elapsed = None obj.save() task = app.tasks.get(obj.task) task.delay(run_type=RUN_QUEUED) self.message_user(request, f"Task '{obj.task}' queued", messages.SUCCESS) except Exception as e: # pragma: no cover process_exception(e) self.message_user(request, f"Cannot queue '{obj.task}': {e}", messages.ERROR) return HttpResponseRedirect(reverse("admin:etl_etltask_changelist"))
def refresh(self, request): try: start = time() res = self.model.loader.task.apply() stop = time() duration = stop - start self.message_user( request, "Data loaded in %s. %s" % (naturaldelta(duration), res.result), messages.SUCCESS) except Exception as e: # pragma: no cover process_exception(e) self.message_user(request, str(e), messages.ERROR) finally: return HttpResponseRedirect( reverse(admin_urlname(self.model._meta, 'changelist')))
def unlock(modeladmin, request, queryset): count = len(queryset) for obj in queryset: try: Service.objects.get_for_model(obj.loader.model).invalidate_cache() except Service.DoesNotExist: pass except Exception as e: process_exception(e) obj.loader.model.objects.truncate() obj.loader.unlock() modeladmin.message_user(request, "{0} loader{1} unlocked".format(count, pluralize(count)), messages.SUCCESS)
def queue(self, request): try: start = time() res = self.model.loader.task.delay() if settings.CELERY_TASK_ALWAYS_EAGER: # pragma: no cover stop = time() duration = stop - start self.message_user( request, "Data loaded in %s. %s" % (naturaldelta(duration), res.result), messages.SUCCESS) else: self.message_user(request, "ETL task scheduled", messages.SUCCESS) except Exception as e: # pragma: no cover process_exception(e) self.message_user(request, str(e), messages.ERROR) finally: return HttpResponseRedirect( reverse(admin_urlname(self.model._meta, 'changelist')))
def truncate(modeladmin, request, queryset): count = len(queryset) for obj in queryset: try: Service.objects.get_for_model(obj.loader.model).invalidate_cache() except Service.DoesNotExist: pass except Exception as e: process_exception(e) obj.loader.model.objects.truncate() obj.loader.unlock() obj.status = 'NO DATA' obj.last_run = None obj.run_type = RUN_UNKNOWN obj.last_success = None obj.last_failure = None obj.time = None obj.save() modeladmin.message_user(request, "{0} table{1} truncated".format(count, pluralize(count)), messages.SUCCESS)
def process_country(self): qs = self.filter_queryset(self.get_queryset()) content_type = DjangoContentType.objects.get(app_label='tpm', model='tpmvisit') for visit in qs.all(): tpm_activities = visit.activities unicef_focal_points = [] for a in tpm_activities.only('activity_ptr_id'): qs = TpmTpmactivityUnicefFocalPoints.objects.filter(tpmactivity_id=a.activity_ptr_id) unicef_focal_points.extend(qs.values_list('user__email', flat=True)) visit.unicef_focal_points = ",".join(unicef_focal_points) tpm_focal_points = [] qs = TpmTpmvisitTpmPartnerFocalPoints.objects.filter(tpmvisit=visit) tpm_focal_points.extend(qs.values_list('tpmpartnerstaffmember__user__email', flat=True)) visit.tpm_focal_points = ",".join(tpm_focal_points) try: visit.report_attachments = ",".join(UnicefAttachmentsAttachment.objects.filter( object_id=visit.id, code='activity_report', content_type=content_type ).values_list('file', flat=True)).strip() except Exception as e: process_exception(e) try: visit.attachments = ",".join(UnicefAttachmentsAttachment.objects.filter( object_id=visit.id, code='activity_attachments', content_type=content_type ).values_list('file', flat=True)).strip() except Exception as e: process_exception(e) filters = self.config.key(self, visit) values = self.get_values(visit) op = self.process_record(filters, values) self.increment_counter(op)
def get_locations(self, record, values: dict, **kwargs): locs = [] locations = getattr(record, self.location_m2m_field) for location in locations.order_by('id'): location_data = dict(source_id=location.id, name=location.name, pcode=location.p_code, level=location.admin_level, levelname=location.admin_level_name, latitude=None, longitude=None) try: loc = Location.objects.get( source_id=location.id, schema_name=self.context['country'].schema_name) location_data["latitude"] = loc.latitude location_data["longitude"] = loc.longitude except Exception as e: process_exception(e) locs.append(location_data) values['locations_data'] = locs return ", ".join([l['name'] for l in locs])
def load(self, *, verbosity=0, stdout=None, ignore_dependencies=False, max_records=None, only_delta=True, run_type=RUN_UNKNOWN, **kwargs): self.on_start(run_type) self.results = EtlResult() logger.debug(f"Running loader {self}") lock = self.lock() truncate = self.config.truncate try: if lock: # pragma: no branch if not ignore_dependencies: for requirement in self.config.depends: if requirement.loader.is_running(): raise RequiredIsRunning(requirement) if requirement.loader.need_refresh(self): raise RequiredIsMissing(requirement) else: logger.info(f"Loader {requirement} is uptodate") self.mapping = {} mart_fields = self.model._meta.concrete_fields for field in mart_fields: if field.name not in ['source_id', 'id', 'last_modify_date']: self.mapping[field.name] = field.name if self.config.mapping: # pragma: no branch self.mapping.update(self.config.mapping) self.update_context(today=timezone.now(), max_records=max_records, verbosity=verbosity, records=0, only_delta=only_delta, is_empty=not self.model.objects.exists(), stdout=stdout) sid = transaction.savepoint() try: self.results.context = self.context self.fields_to_compare = [f for f in self.mapping.keys() if f not in ["seen"]] if truncate: self.model.objects.truncate() qs = self.filter_queryset(self.get_queryset()) for record in qs.all(): filters = self.config.key(self, record) values = self.get_values(record) op = self.process_record(filters, values) self.increment_counter(op) if stdout and verbosity > 0: stdout.write("\n") # deleted = self.model.objects.exclude(seen=today).delete()[0] # self.results.deleted = deleted except MaxRecordsException: pass except BaseException: transaction.savepoint_rollback(sid) raise else: logger.info(f"Unable to get lock for {self}") except (RequiredIsMissing, RequiredIsRunning) as e: self.on_end(error=e, retry=True) raise except BaseException as e: self.on_end(e) process_exception(e) raise else: self.on_end(None) finally: if lock: # pragma: no branch try: lock.release() except LockError as e: # pragma: no cover logger.warning(e) return self.results
def load(self, *, verbosity=0, stdout=None, ignore_dependencies=False, max_records=None, only_delta=True, run_type=RUN_UNKNOWN, **kwargs): self.on_start(run_type) self.results = EtlResult() logger.debug(f"Running loader {self}") lock = self.lock() truncate = self.config.truncate try: if lock: # pragma: no branch if not ignore_dependencies: for requirement in self.config.depends: if requirement.loader.is_running(): raise RequiredIsRunning(requirement) if requirement.loader.need_refresh(self): # if not force_requirements: raise RequiredIsMissing(requirement) # else: # logger.info(f"Load required dataset {requirement}") # requirement.loader.task.apply_async( # kwargs={"force_requirements": force_requirements, # "run_type": RUN_AS_REQUIREMENT} # ) # raise RequiredIsQueued(requirement) # logger.info(f"Load required dataset {requirement}") # requirement.loader.load(stdout=stdout, # force_requirements=force_requirements, # run_type=RUN_AS_REQUIREMENT) else: logger.info(f"Loader {requirement} is uptodate") self.mapping = {} mart_fields = self.model._meta.concrete_fields for field in mart_fields: if field.name not in [ 'country_name', 'schema_name', 'area_code', 'source_id', 'id', 'last_modify_date' ]: self.mapping[field.name] = field.name if self.config.mapping: # pragma: no branch self.mapping.update(self.config.mapping) self.update_context(today=timezone.now(), max_records=max_records, verbosity=verbosity, records=0, only_delta=only_delta, is_empty=not self.model.objects.exists(), stdout=stdout) sid = transaction.savepoint() try: self.results.context = self.context self.fields_to_compare = [ f for f in self.mapping.keys() if f not in ["seen"] ] if truncate: self.model.objects.truncate() self.process_country() if self.config.sync_deleted_records(self): self.remove_deleted() if stdout and verbosity > 0: stdout.write("\n") # deleted = self.model.objects.exclude(seen=today).delete()[0] # self.results.deleted = deleted except MaxRecordsException: pass except Exception: transaction.savepoint_rollback(sid) raise else: logger.info(f"Unable to get lock for {self}") except (RequiredIsMissing, RequiredIsRunning) as e: self.on_end(error=e, retry=True) raise except BaseException as e: self.on_end(e) process_exception(e) raise else: self.on_end(None) finally: if lock: # pragma: no branch try: lock.release() except LockError as e: # pragma: no cover logger.warning(e) return self.results
def load(self, *, verbosity=0, stdout=None, ignore_dependencies=False, max_records=None, only_delta=True, run_type=RUN_UNKNOWN, **kwargs): logger.debug(f"Running loader {self}") lock = self.lock() truncate = self.config.truncate try: self.on_start(run_type) if lock: # pragma: no branch if not ignore_dependencies: for requirement in self.config.depends: if requirement.loader.is_running(): raise RequiredIsRunning(requirement) if requirement.loader.need_refresh(self): raise RequiredIsMissing(requirement) connection = connections['etools'] if kwargs.get('countries'): countries = kwargs['countries'] truncate = False else: countries = connection.get_tenants() # self.get_final_mapping() # if self.config.fields_to_compare is None: # self.fields_to_compare = [f for f in self.mapping.keys() if f not in ["seen"]] # self.mapping = {} # mart_fields = self.model._meta.concrete_fields # for field in mart_fields: # if field.name not in ['country_name', 'schema_name', 'area_code', 'source_id', # 'id', 'last_modify_date']: # self.mapping[field.name] = field.name # if self.config.mapping: # pragma: no branch # self.mapping.update(self.config.mapping) self.update_context(today=timezone.now(), countries=countries, max_records=max_records, verbosity=verbosity, records=0, only_delta=only_delta, is_empty=not self.model.objects.exists(), stdout=stdout) sid = transaction.savepoint() total_countries = len(countries) try: self.results.context = self.context # self.fields_to_compare = se # self.fields_to_compare = [f for f in self.mapping.keys() if f not in ["seen"]] if truncate: self.model.objects.truncate() for i, country in enumerate(countries, 1): cache.set( "STATUS:%s" % self.etl_task.task, "%s - %s" % (country, self.results.processed)) self.context['country'] = country if stdout and verbosity > 0: stdout.write(f"{i:>3}/{total_countries} " f"{country.name:<25} " f"{country.schema_name:<25}") stdout.flush() connection.set_schemas([country.schema_name]) start_time = time.time() self.process_country() elapsed_time = time.time() - start_time elapsed = strfelapsed(elapsed_time) if stdout and verbosity > 0: stdout.write(f" in {elapsed}\n") stdout.flush() if stdout and verbosity > 2: stdout.write("\n") stdout.flush() self.post_process_country() if self.config.sync_deleted_records(self): self.remove_deleted() if stdout and verbosity > 0: stdout.write("\n") except MaxRecordsException: pass except Exception: transaction.savepoint_rollback(sid) raise else: logger.info(f"Unable to get lock for {self}") except (RequiredIsMissing, RequiredIsRunning) as e: self.on_end(error=e, retry=True) raise except BaseException as e: self.on_end(e) process_exception(e) raise else: self.on_end(None) cache.set("STATUS:%s" % self.etl_task.task, "completed - %s" % self.results.processed) finally: if lock: # pragma: no branch try: lock.release() except LockError as e: # pragma: no cover logger.warning(e) return self.results