Пример #1
0
 def after_show(self, context, pkg_dict):
     schema = dcatapit_schema.get_custom_package_schema()
     # quick hack on date fields that are in wrong format
     for fdef in schema:
         if fdef.get('type') != 'date':
             continue
         fname = fdef['name']
         df_value = pkg_dict.get(fname)
         if df_value:
             tmp_value = validators.parse_date(df_value, df_value)
             if isinstance(tmp_value, datetime.date):
                 try:
                     tmp_value = tmp_value.strftime(
                         fdef.get('format') or '%d-%m-%Y')
                 except ValueError, err:
                     log.warning(
                         "dataset %s, field %s: cannot reformat date for %s (from input %s): %s",
                         pkg_dict['name'],
                         fname,
                         tmp_value,
                         df_value,
                         err,
                         exc_info=err)
                     tmp_value = df_value
             pkg_dict[fname] = tmp_value
Пример #2
0
def update_temporal_coverage(pdata):
    # do not process if tempcov is already present
    if pdata.get('temporal_coverage'):
        return

    tstart = pdata.pop('temporal_start', None)
    tend = pdata.pop('temporal_end', None)

    if not (tstart and tend):
        to_delete = []
        for idx, ex in enumerate(pdata.get('extras') or []):
            if ex['key'] == 'temporal_start':
                to_delete.append(idx)
                tstart = ex['value']
            if ex['key'] == 'temporal_end':
                to_delete.append(idx)
                tend = ex['value']
        if to_delete:
            for idx in reversed(to_delete):
                pdata['extras'].pop(idx)

    try:
        tstart = validators.parse_date(tstart).strftime(DATE_FORMAT)
    except (
            Invalid,
            ValueError,
            TypeError,
    ), err:
        if tstart is not None:
            print(u"dataset {}: can't use {} as temporal coverage start: {}".
                  format(pdata['name'], tstart, err)).encode('utf-8')
        tstart = None
Пример #3
0
def update_modified(pdata):
    try:
        data = validators.parse_date(pdata['modified'])
    except (KeyError, Invalid,):
        val = pdata.get('modified') or None
        print (u"dataset {}: invalid modified date {}. Using now timestamp"
                .format(pdata['name'], val)).encode('utf-8')
        data = datetime.now()
    pdata['modified'] = datetime.now().strftime("%Y-%m-%d")
Пример #4
0
def update_modified(pdata):
    try:
        data = validators.parse_date(pdata['modified'])
    except (KeyError, Invalid):
        val = pdata.get('modified') or None
        log.info(
            f"dataset {pdata['name']}: invalid modified date {val}. "
            f'Using now timestamp'
        )
        data = datetime.now()
    pdata['modified'] = datetime.now().strftime('%Y-%m-%d')
Пример #5
0
            for idx in reversed(to_delete):
                pdata['extras'].pop(idx)

    try:
        tstart = validators.parse_date(tstart).strftime(DATE_FORMAT)
    except (
            Invalid,
            ValueError,
            TypeError,
    ), err:
        if tstart is not None:
            print(u"dataset {}: can't use {} as temporal coverage start: {}".
                  format(pdata['name'], tstart, err)).encode('utf-8')
        tstart = None
    try:
        tend = validators.parse_date(tend).strftime(DATE_FORMAT)
    except (
            Invalid,
            ValueError,
            TypeError,
    ), err:
        if tend is not None:
            print(u"dataset {}: can't use {} as temporal coverage end: {}".
                  format(pdata['name'], tend, err)).encode('utf-8')
        tend = None
    ## handle 2010-01-01 to 2010-01-01 case, use whole year
    # if tstart == tend and tstart.day == 1 and tstart.month == 1:
    #     tend = tend.replace(day=31, month=12)

    if (tstart):
Пример #6
0
    def after_show(self, context, pkg_dict):
        schema = dcatapit_schema.get_custom_package_schema()
        # quick hack on date fields that are in wrong format
        for fdef in schema:
            if fdef.get('type') != 'date':
                continue
            fname = fdef['name']
            df_value = pkg_dict.get(fname)
            if df_value:
                tmp_value = validators.parse_date(df_value, df_value)
                if isinstance(tmp_value, datetime.date):
                    try:
                        tmp_value = tmp_value.strftime(fdef.get('format') or '%d-%m-%Y')
                    except ValueError as err:
                        log.warning('dataset %s, field %s: cannot reformat date for %s (from input %s): %s',
                                    pkg_dict['name'], fname, tmp_value, df_value, err, exc_info=err)
                        tmp_value = df_value
                pkg_dict[fname] = tmp_value

        # themes are parsed by dcat, which requires a list of URI
        # we have the format like this:
        # [{"theme": "AGRI", "subthemes": ["http://eurovoc.europa.eu/100253", "http://eurovoc.europa.eu/100258"]},
        # {"theme": "ENVI", "subthemes": []}]
        # We need to fix this.

        if not context.get('for_view'):
            if not any(x['key'] == 'theme' for x in pkg_dict.get('extras', [])):
                # there's no theme, add the list from the aggreagate
                aggr_raw = pkg_dict.get(FIELD_THEMES_AGGREGATE)
                if aggr_raw is None:
                    # let's try and find it in extras:
                    aggr_raw = next((x['value'] for x in pkg_dict.get('extras', [])
                                     if x['key'] == FIELD_THEMES_AGGREGATE), None)
                if aggr_raw is None:
                    log.error(f'No Aggregates in dataset {pkg_dict.get("id", "_")}')
                    aggr_raw = json.dumps([{'theme': 'OP_DATPRO', 'subthemes':[]}])
                    pkg_dict[FIELD_THEMES_AGGREGATE] = aggr_raw

                themes = []
                for aggr in json.loads(aggr_raw):
                    themes.append(theme_name_to_uri(aggr['theme']))

                extras = pkg_dict.get('extras', [])
                extras.append({'key': 'theme', 'value': json.dumps(themes)})
                pkg_dict['extras'] = extras

        # in some cases (automatic solr indexing after update)
        # pkg_dict may come without validation and thus
        # without extras converted to main dict.
        # this will ensure that holder keys are extracted to main dict
        pkg_update = {}
        to_remove = []
        for eidx, ex in enumerate(pkg_dict.get('extras') or []):
            if ex['key'].startswith('holder_'):
                to_remove.append(eidx)
                pkg_update[ex['key']] = ex['value']

        for k in pkg_update.keys():
            if k in pkg_dict:
                if pkg_update[k] == pkg_dict[k]:
                    log.warning(f'Ignoring duplicated key {k} with same value {pkg_update[k]}')
                else:
                    raise KeyError(f'Duplicated key in pkg_dict: {k}: {pkg_update[k]} in extras'
                                   f' vs {pkg_dict[k]} in pkg')

        for tr in reversed(to_remove):
            val = pkg_dict['extras'].pop(tr)
            assert val['key'].startswith('holder_'), val
        pkg_dict.update(pkg_update)

        # remove holder info if pkg is local, use org as a source
        # see https://github.com/geosolutions-it/ckanext-dcatapit/pull/213#issuecomment-410668740
        pkg_dict['dataset_is_local'] = helpers.dataset_is_local(pkg_dict['id'])
        if pkg_dict['dataset_is_local']:
            pkg_dict.pop('holder_identifier', None)
            pkg_dict.pop('holder_name', None)
        return self._update_pkg_rights_holder(pkg_dict)
Пример #7
0
def update_temporal_coverage(pdata):
    # do not process if tempcov is already present
    if pdata.get('temporal_coverage'):
        return

    tstart = pdata.pop('temporal_start', None)
    tend = pdata.pop('temporal_end', None)

    if not (tstart and tend):
        to_delete = []
        for idx, ex in enumerate(pdata.get('extras') or []):
            if ex['key'] == 'temporal_start':
                to_delete.append(idx)
                tstart = ex['value']
            if ex['key'] == 'temporal_end':
                to_delete.append(idx)
                tend = ex['value']
        if to_delete:
            for idx in reversed(to_delete):
                pdata['extras'].pop(idx)

    try:
        tstart = validators.parse_date(tstart).strftime(DATE_FORMAT)
    except (Invalid, ValueError, TypeError) as err:
        if tstart is not None:
            log.error(
                f"dataset {pdata['name']}: can't use {tstart} as temporal coverage start:",
                exc_info=True
            )
        tstart = None
    try:
        tend = validators.parse_date(tend).strftime(DATE_FORMAT)
    except (Invalid, ValueError, TypeError) as err:
        if tend is not None:
            log.error(
                f"dataset {pdata['name']}: can't use {tend} as temporal coverage end:",
                exc_info=True
            )
        tend = None
    # handle 2010-01-01 to 2010-01-01 case, use whole year
    # if tstart == tend and tstart.day == 1 and tstart.month == 1:
    #     tend = tend.replace(day=31, month=12)

    if (tstart):

        validator = toolkit.get_validator('dcatapit_temporal_coverage')
        if (tstart == tend):
            log.info(
                f"dataset {pdata['name']}: "
                f'the same temporal coverage start/end: {tstart}/{tend}, '
                f'using start only',
            )
            tend = None
        temp_cov = json.dumps([{'temporal_start': tstart,
                                'temporal_end': tend}])
        try:
            temp_cov = validator(temp_cov, {})
            pdata['temporal_coverage'] = temp_cov
        except Invalid as err:
            log.error(
                f"dataset {pdata['name']}: cannot use temporal coverage {(tstart, tend)}:",
                exec_info=True
            )