def run(self, distribution): fields = distribution.field_set.all() fields = {field.title: field.identifier for field in fields} df = self.init_df(distribution, fields) # Aplica la operación de procesamiento e indexado a cada columna result = [ process_column(df[col], self.index_name) for col in df.columns ] if not result: # Distribución sin series cargadas return # List flatten: si el resultado son múltiples listas las junto en una sola actions = reduce(lambda x, y: x + y, result) if isinstance( result[0], list) else result self.add_catalog_keyword(actions, distribution) for success, info in parallel_bulk(self.elastic, actions): if not success: logger.warning(strings.BULK_REQUEST_ERROR, info) remove_duplicated_fields(distribution) for field in distribution.field_set.exclude(title='indice_tiempo'): field.enhanced_meta.update_or_create(key=meta_keys.AVAILABLE, value='true') # Cálculo de metadatos adicionales sobre cada serie df.apply(update_enhanced_meta, args=(distribution.dataset.catalog.identifier, distribution.identifier))
def update_distribution_metadata(changed, distribution_model): time_index = DistributionRepository( distribution_model).get_time_index_series() df = init_df(distribution_model, time_index) periodicity = get_distribution_time_index_periodicity(time_index) new_metadata = [] metas_to_delete = [] field_content_type = ContentType.objects.get_for_model(Field) for serie in list(df.columns): meta = calculate_enhanced_meta(df[serie], periodicity) field = distribution_model.field_set.get(identifier=serie, present=True) for meta_key, value in meta.items(): new_metadata.append( Metadata(content_type=field_content_type, object_id=field.id, key=meta_key, value=value)) metas_to_delete.extend( Metadata.objects.filter(object_id=field.id, key__in=list(meta.keys())).values_list( 'id', flat=True)) with transaction.atomic(): Metadata.objects.filter(id__in=metas_to_delete).delete() Metadata.objects.bulk_create(new_metadata) distribution_model.enhanced_meta.update_or_create( key=meta_keys.LAST_HASH, defaults={'value': distribution_model.data_hash}) distribution_model.enhanced_meta.update_or_create( key=meta_keys.CHANGED, defaults={'value': str(changed)}) update_popularity_metadata(distribution_model) remove_duplicated_fields(distribution_model)