Пример #1
0
    def run(self, distribution):
        fields = distribution.field_set.all()
        fields = {field.title: field.identifier for field in fields}
        df = self.init_df(distribution, fields)

        # Aplica la operación de procesamiento e indexado a cada columna
        result = [
            process_column(df[col], self.index_name) for col in df.columns
        ]

        if not result:  # Distribución sin series cargadas
            return

        # List flatten: si el resultado son múltiples listas las junto en una sola
        actions = reduce(lambda x, y: x + y, result) if isinstance(
            result[0], list) else result

        self.add_catalog_keyword(actions, distribution)
        for success, info in parallel_bulk(self.elastic, actions):
            if not success:
                logger.warning(strings.BULK_REQUEST_ERROR, info)

        remove_duplicated_fields(distribution)
        for field in distribution.field_set.exclude(title='indice_tiempo'):
            field.enhanced_meta.update_or_create(key=meta_keys.AVAILABLE,
                                                 value='true')

        # Cálculo de metadatos adicionales sobre cada serie
        df.apply(update_enhanced_meta,
                 args=(distribution.dataset.catalog.identifier,
                       distribution.identifier))
Пример #2
0
def update_distribution_metadata(changed, distribution_model):
    time_index = DistributionRepository(
        distribution_model).get_time_index_series()
    df = init_df(distribution_model, time_index)

    periodicity = get_distribution_time_index_periodicity(time_index)
    new_metadata = []
    metas_to_delete = []
    field_content_type = ContentType.objects.get_for_model(Field)
    for serie in list(df.columns):
        meta = calculate_enhanced_meta(df[serie], periodicity)

        field = distribution_model.field_set.get(identifier=serie,
                                                 present=True)
        for meta_key, value in meta.items():
            new_metadata.append(
                Metadata(content_type=field_content_type,
                         object_id=field.id,
                         key=meta_key,
                         value=value))

        metas_to_delete.extend(
            Metadata.objects.filter(object_id=field.id,
                                    key__in=list(meta.keys())).values_list(
                                        'id', flat=True))
    with transaction.atomic():
        Metadata.objects.filter(id__in=metas_to_delete).delete()
        Metadata.objects.bulk_create(new_metadata)

    distribution_model.enhanced_meta.update_or_create(
        key=meta_keys.LAST_HASH,
        defaults={'value': distribution_model.data_hash})
    distribution_model.enhanced_meta.update_or_create(
        key=meta_keys.CHANGED, defaults={'value': str(changed)})
    update_popularity_metadata(distribution_model)
    remove_duplicated_fields(distribution_model)