示例#1
0
 def test_custom_filter(self):
     field = self.distribution.field_set.create(title='test_title')
     field.enhanced_meta.create(key=meta_keys.AVAILABLE, value='true')
     other_field = self.distribution.field_set.create(title='other_title')
     other_field.enhanced_meta.create(key=meta_keys.AVAILABLE, value='true')
     series = SeriesRepository.get_available_series(title='test_title')
     self.assertEqual(series.count(), 1)
def get_distribution_series_identifers(distribution: Distribution,
                                       series_titles: list) -> list:
    identifier_for_each_title = {
        s.title: s.identifier
        for s in SeriesRepository.get_present_series(distribution=distribution)
    }
    return [identifier_for_each_title[title] for title in series_titles]
示例#3
0
 def test_available_and_unavailable_series(self):
     self.distribution.field_set.create()
     available_field = self.distribution.field_set.create()
     available_field.enhanced_meta.create(key=meta_keys.AVAILABLE,
                                          value='true')
     series = SeriesRepository.get_available_series()
     self.assertEqual(series.count(), 1)
示例#4
0
    def calculate_distribution_indicators(self, node, data_json, catalog):
        indexable = Distribution.objects.filter(dataset__catalog=catalog,
                                                dataset__indexable=True)

        updated = indexable.filter(updated=True).count()
        self.create(type=Indicator.DISTRIBUTION_UPDATED,
                    value=updated,
                    node=node)

        not_updated = indexable.filter(updated=False).count()
        self.create(type=Indicator.DISTRIBUTION_NOT_UPDATED,
                    value=not_updated,
                    node=node)

        discontinued = indexable.filter(present=False).count()
        self.create(type=Indicator.DISTRIBUTION_INDEXABLE_DISCONTINUED,
                    value=discontinued,
                    node=node)

        self.create(type=Indicator.DISTRIBUTION_INDEXABLE,
                    value=indexable.count(),
                    node=node)

        not_indexable = Distribution.objects.filter(dataset__catalog=catalog,
                                                    dataset__indexable=False)

        new = not_indexable.filter(new=True).count()
        self.create(type=Indicator.DISTRIBUTION_NEW, value=new, node=node)

        previous = not_indexable.filter(new=False, present=True).count()
        self.create(type=Indicator.DISTRIBUTION_NOT_INDEXABLE_PREVIOUS,
                    value=previous,
                    node=node)

        error = Distribution.objects.filter(dataset__catalog=catalog,
                                            error=True).count()
        self.create(type=Indicator.DISTRIBUTION_ERROR, value=error, node=node)

        not_indexable_discontinued = not_indexable.filter(
            present=False).count()
        self.create(type=Indicator.DISTRIBUTION_NOT_INDEXABLE_DISCONTINUED,
                    value=not_indexable_discontinued,
                    node=node)

        self.create(type=Indicator.DISTRIBUTION_NOT_INDEXABLE,
                    value=not_indexable.count(),
                    node=node)

        self.create(type=Indicator.DISTRIBUTION_AVAILABLE,
                    value=len(
                        data_json.get_distributions(only_time_series=True)),
                    node=node)

        available_series = SeriesRepository.get_available_series()
        self.create(type=Indicator.DISTRIBUTION_TOTAL,
                    value=available_series.filter(
                        distribution__dataset__catalog=catalog).values_list(
                            'distribution').distinct().count(),
                    node=node)
示例#5
0
 def update_distribution_indexation_metadata(self, distribution):
     time_index = DistributionRepository(distribution).get_time_index_series()
     for field in SeriesRepository.get_present_series(distribution=distribution).exclude(id=time_index.id):
         field.enhanced_meta.update_or_create(key=meta_keys.AVAILABLE, value='true')
     # Cálculo de metadatos adicionales sobre cada serie
     distribution.enhanced_meta.update_or_create(key=meta_keys.PERIODICITY,
                                                 defaults={
                                                     'value': get_distribution_time_index_periodicity(time_index)})
示例#6
0
 def _delete_distribution_data(self, distribution):
     fields_to_delete = list(
         SeriesRepository.get_present_series(
             distribution=distribution).exclude(
                 identifier=None).values_list('identifier', flat=True))
     series_data = Search(using=self.elastic,
                          index=self.index._name).filter(
                              'terms', series_id=fields_to_delete)
     series_data.delete()
示例#7
0
def get_all_available_series_units() -> set:
    series_metadata = SeriesRepository.get_available_series() \
        .values_list('metadata', flat=True)

    catalog_units = set()
    for metadata in series_metadata:
        metadata = json.loads(metadata)
        units = metadata.get('units')
        if units:
            catalog_units.add(units)
    return catalog_units
    def init_data(self):
        """Inicializa en un diccionario con IDs de series como clave los valores a escribir en cada
        uno de los CSV.
        """
        fields = SeriesRepository.get_available_series().exclude(identifier=None)

        if self.catalog:
            try:
                catalog = Catalog.objects.get(identifier=self.catalog)
            except Catalog.DoesNotExist:
                return

            fields = fields.filter(
                distribution__dataset__catalog=catalog
            )

        fields = fields.prefetch_related(
            'distribution',
            'distribution__dataset',
            'distribution__dataset__catalog',
            'enhanced_meta',
        )
        all_meta = Metadata.objects.all()
        field_ct = ContentType.objects.get_for_model(Field)
        for field in fields:
            meta = json.loads(field.metadata)
            dist_meta = json.loads(field.distribution.metadata)
            dataset_meta = json.loads(field.distribution.dataset.metadata)
            themes = field.distribution.dataset.themes
            theme_labels = get_theme_labels(json.loads(themes)) if themes else ''

            self.fields[field.identifier] = {
                'dataset': field.distribution.dataset,
                'distribution': field.distribution,
                'serie': field,
                'serie_titulo': field.title,
                'serie_unidades': meta.get('units'),
                'serie_descripcion': meta.get('description'),
                'distribucion_titulo': dist_meta.get('title'),
                'distribucion_descripcion': dist_meta.get('description'),
                'distribucion_url_descarga': field.distribution.download_url,
                'dataset_responsable': dataset_meta.get('publisher', {}).get('name'),
                'dataset_fuente': dataset_meta.get('source'),
                'dataset_titulo': field.distribution.dataset.title,
                'dataset_descripcion': dataset_meta.get('description'),
                'dataset_tema': theme_labels,
                'metadata': {o.key: o.value for o in list(all_meta.filter(content_type=field_ct, object_id=field.id))},
                'frequency': self.serie_periodicity(field),
            }
def init_df(distribution: Distribution, time_index: Field):
    """Inicializa el DataFrame del CSV de la distribución pasada,
    seteando el índice de tiempo correcto y validando las columnas
    dentro de los datos
    """
    distribution.refresh_from_db()  # Recarga la distribución si ya fue leída
    df = read_distribution_csv_as_df(distribution, time_index)
    fields = SeriesRepository.get_present_series(distribution=distribution)
    drop_null_or_missing_fields_from_df(df, [field.title for field in fields])

    data = df.values
    new_index = generate_df_time_index(df, time_index)
    identifiers = get_distribution_series_identifers(distribution,
                                                     series_titles=df.columns)
    return pd.DataFrame(index=new_index, data=data, columns=identifiers)
示例#10
0
    def _get_model(self, series_id):
        """Valida si el 'series_id' es válido, es decir, si la serie
        pedida es un ID contenido en la base de datos. De no
        encontrarse, llena la lista de errores según corresponda.
        """

        field_model = SeriesRepository.get_available_series(identifier=series_id).first()
        if field_model is None:
            self._append_error(SERIES_DOES_NOT_EXIST.format(series_id), series_id=series_id)
            return None

        index_start_metadata = meta_keys.get(field_model, meta_keys.INDEX_START)
        if index_start_metadata is None:
            self._append_error(SERIES_DOES_NOT_EXIST.format(series_id), series_id=series_id)
            return None

        return field_model
示例#11
0
def update_popularity_metadata(distribution: Distribution):
    if not Index(SeriesQuery._doc_type.index).exists():
        return

    series = SeriesRepository.get_available_series(distribution=distribution)

    series_ids = series.values_list('identifier', flat=True)

    if not series_ids:
        return

    for meta_key, days in KEY_DAYS_PAIRS:
        s = SeriesQuery.search()
        if days:
            s = s.filter('range', timestamp={'gte': f'now-{days}d/d'})
        buckets = {
            serie_id: get_serie_filter(serie_id)
            for serie_id in series_ids
        }
        agg_result = popularity_aggregation(s, buckets)

        update_series_popularity_metadata(agg_result, meta_key, series)
示例#12
0
 def test_present_and_available_series_non_present(self):
     non_present_field = self.distribution.field_set.create(present=False)
     non_present_field.enhanced_meta.create(key=meta_keys.AVAILABLE,
                                            value='true')
     series = SeriesRepository.get_present_and_available_series()
     self.assertFalse(series)
示例#13
0
 def get_available_fields(self):
     return SeriesRepository.get_available_series(
         distribution__dataset__catalog__identifier=self.node.catalog_id
     )
def all_time_series():
    series_ids = SeriesRepository.get_available_series()\
        .values_list('identifier', flat=True)
    return series_ids
示例#15
0
 def test_no_series_no_results(self):
     series = SeriesRepository.get_available_series()
     self.assertFalse(series)
示例#16
0
 def test_present_and_available_series_non_available(self):
     self.distribution.field_set.create(present=True)
     series = SeriesRepository.get_present_and_available_series()
     self.assertFalse(series)
示例#17
0
 def test_no_available_series_no_results(self):
     self.distribution.field_set.create()
     series = SeriesRepository.get_available_series()
     self.assertFalse(series)
示例#18
0
 def test_one_available_series_shows_up_in_result(self):
     field = self.distribution.field_set.create()
     field.enhanced_meta.create(key=meta_keys.AVAILABLE, value='true')
     series = SeriesRepository.get_available_series()
     self.assertTrue(series)