示例#1
0
    def dataset_last_change_date(dataset_dict):
        '''
        :param dataset_dict:
        :type dataset_dict: dict
        :return:
        :rtype: datetime.datetime
        '''
        last_change_date = None
        last_modified = dataset_dict.get(
            'last_modified'
        )  # last_modified is not an extra; only stored in solr
        reviewed = get_extra_from_dataset(
            'review_date', dataset_dict)  # dataset_dict.get('review_date')
        if not last_modified or not reviewed:
            last = last_modified or reviewed
            if last:
                last_change_date = dateutil.parser.parse(last)
        else:
            last_modified_date = dateutil.parser.parse(last_modified)
            review_date = dateutil.parser.parse(reviewed)
            last_change_date = last_modified_date if last_modified_date > review_date else review_date

        last_change_date = last_change_date.replace(
            tzinfo=None) if last_change_date else None
        return last_change_date
示例#2
0
    def is_fresh(self, now=datetime.datetime.utcnow()):
        update_freq = get_extra_from_dataset('data_update_frequency',
                                             self.dataset_dict)
        try:
            if update_freq is not None and int(update_freq) <= 0:
                return True
        except ValueError as e:
            log.info(
                'Update frequency for dataset "{}" is not a number'.format(
                    self.dataset_dict.get('name')))

        return super(DataCompletenessFreshnessCalculator, self).is_fresh(now)
示例#3
0
 def __init__(self, dataset_dict):
     self.surely_not_fresh = True
     self.dataset_dict = dataset_dict
     update_freq = get_extra_from_dataset('data_update_frequency',
                                          dataset_dict)
     # modified = dataset_dict.get('metadata_modified')
     try:
         self.modified = FreshnessCalculator.dataset_last_change_date(
             dataset_dict)
         if self.modified and update_freq and UPDATE_FREQ_INFO.get(
                 update_freq):
             # if '.' not in modified:
             #     modified += '.000'
             # self.modified = datetime.datetime.strptime(modified, "%Y-%m-%dT%H:%M:%S.%f")
             self.extra_days = UPDATE_FREQ_INFO.get(update_freq)
             self.update_freq_in_days = int(update_freq)
             self.surely_not_fresh = False
     except Exception, e:
         log.error(unicode(e))
示例#4
0
文件: get.py 项目: skvisha/hdx-ckan
def _additional_hdx_package_show_processing(context, package_dict, just_for_reindexing=False):
    # added because showcase schema validation is generating "ckan.lib.navl.dictization_functions.Missing"
    if 'tracking_summary' in package_dict and not package_dict.get('tracking_summary'):
        del package_dict['tracking_summary']
    # this shouldn't be executed from showcases
    if package_dict.get('type') == 'dataset' and not context.get('no_compute_extra_hdx_show_properties'):

        for resource_dict in package_dict.get('resources', []):
            _additional_hdx_resource_show_processing(context, resource_dict)

        # downloads_list = (res['tracking_summary']['total'] for res in package_dict.get('resources', []) if
        #                   res.get('tracking_summary', {}).get('total'))
        # package_dict['total_res_downloads'] = sum(downloads_list)

        if _should_manually_load_property_value(context, package_dict, 'total_res_downloads'):
            total_res_downloads = jql.downloads_per_dataset_all_cached().get(package_dict['id'], 0)
            log.debug('Dataset {} has {} downloads'.format(package_dict['id'], total_res_downloads))
            package_dict['total_res_downloads'] = total_res_downloads

        if _should_manually_load_property_value(context, package_dict, 'pageviews_last_14_days'):
            pageviews_last_14_days = jql.pageviews_per_dataset_last_14_days_cached().get(package_dict['id'], 0)
            log.debug(
                'Dataset {} has {} page views in the last 14 days'.format(package_dict['id'], pageviews_last_14_days))
            package_dict['pageviews_last_14_days'] = pageviews_last_14_days

        if _should_manually_load_property_value(context, package_dict, 'has_quickcharts'):
            package_dict['has_quickcharts'] = False
            for resource_dict in package_dict.get('resources', []):
                resource_views = get_action('resource_view_list')(context, {'id': resource_dict['id']}) or []
                for view in resource_views:
                    if view.get("view_type") == 'hdx_hxl_preview':
                        package_dict['has_quickcharts'] = True
                        break

        if _should_manually_load_property_value(context, package_dict, 'has_geodata'):
            package_dict['has_geodata'] = False
            for resource_dict in package_dict.get('resources', []):
                if resource_dict.get('format') in GEODATA_FORMATS:
                    package_dict['has_geodata'] = True
                    break

        if _should_manually_load_property_value(context, package_dict, 'has_showcases'):
            package_dict['has_showcases'] = False
            package_dict['num_of_showcases'] = 0
            num_of_showcases = len(hdx_get_package_showcase_id_list(context, {'package_id': package_dict['id']}))
            if num_of_showcases > 0:
                package_dict['has_showcases'] = True
                package_dict['num_of_showcases'] = num_of_showcases

        if _should_manually_load_property_value(context, package_dict, 'last_modified'):
            if get_extra_from_dataset('is_requestdata_type', package_dict):
                package_dict['last_modified'] = package_dict.get('metadata_modified')
            else:
                package_dict['last_modified'] = None
                all_dates = [dateutil.parser.parse(r.get('last_modified'))
                             for r in package_dict.get('resources', [])
                             if r.get('last_modified')]
                if all_dates:
                    package_dict['last_modified'] = max(all_dates).isoformat()

        freshness_calculator = freshness.get_calculator_instance(package_dict, None)
        if _should_manually_load_property_value(context, package_dict, 'due_date'):
            package_dict.pop('due_date', None)
            package_dict.pop('overdue_date', None)
            # package_dict.pop('due_daterange', None)
            # package_dict.pop('overdue_daterange', None)
            freshness_calculator.populate_with_date_ranges()

        if not just_for_reindexing:
            member_list = get_action('hdx_member_list')(context, {'org_id': package_dict.get('owner_org')})
            if member_list and not member_list.get('is_member'):
                del package_dict['maintainer_email']

            # Freshness should be computed after the last_modified field
            freshness_calculator.populate_with_freshness()