def dataset_last_change_date(dataset_dict): ''' :param dataset_dict: :type dataset_dict: dict :return: :rtype: datetime.datetime ''' last_change_date = None last_modified = dataset_dict.get( 'last_modified' ) # last_modified is not an extra; only stored in solr reviewed = get_extra_from_dataset( 'review_date', dataset_dict) # dataset_dict.get('review_date') if not last_modified or not reviewed: last = last_modified or reviewed if last: last_change_date = dateutil.parser.parse(last) else: last_modified_date = dateutil.parser.parse(last_modified) review_date = dateutil.parser.parse(reviewed) last_change_date = last_modified_date if last_modified_date > review_date else review_date last_change_date = last_change_date.replace( tzinfo=None) if last_change_date else None return last_change_date
def is_fresh(self, now=datetime.datetime.utcnow()): update_freq = get_extra_from_dataset('data_update_frequency', self.dataset_dict) try: if update_freq is not None and int(update_freq) <= 0: return True except ValueError as e: log.info( 'Update frequency for dataset "{}" is not a number'.format( self.dataset_dict.get('name'))) return super(DataCompletenessFreshnessCalculator, self).is_fresh(now)
def __init__(self, dataset_dict): self.surely_not_fresh = True self.dataset_dict = dataset_dict update_freq = get_extra_from_dataset('data_update_frequency', dataset_dict) # modified = dataset_dict.get('metadata_modified') try: self.modified = FreshnessCalculator.dataset_last_change_date( dataset_dict) if self.modified and update_freq and UPDATE_FREQ_INFO.get( update_freq): # if '.' not in modified: # modified += '.000' # self.modified = datetime.datetime.strptime(modified, "%Y-%m-%dT%H:%M:%S.%f") self.extra_days = UPDATE_FREQ_INFO.get(update_freq) self.update_freq_in_days = int(update_freq) self.surely_not_fresh = False except Exception, e: log.error(unicode(e))
def _additional_hdx_package_show_processing(context, package_dict, just_for_reindexing=False): # added because showcase schema validation is generating "ckan.lib.navl.dictization_functions.Missing" if 'tracking_summary' in package_dict and not package_dict.get('tracking_summary'): del package_dict['tracking_summary'] # this shouldn't be executed from showcases if package_dict.get('type') == 'dataset' and not context.get('no_compute_extra_hdx_show_properties'): for resource_dict in package_dict.get('resources', []): _additional_hdx_resource_show_processing(context, resource_dict) # downloads_list = (res['tracking_summary']['total'] for res in package_dict.get('resources', []) if # res.get('tracking_summary', {}).get('total')) # package_dict['total_res_downloads'] = sum(downloads_list) if _should_manually_load_property_value(context, package_dict, 'total_res_downloads'): total_res_downloads = jql.downloads_per_dataset_all_cached().get(package_dict['id'], 0) log.debug('Dataset {} has {} downloads'.format(package_dict['id'], total_res_downloads)) package_dict['total_res_downloads'] = total_res_downloads if _should_manually_load_property_value(context, package_dict, 'pageviews_last_14_days'): pageviews_last_14_days = jql.pageviews_per_dataset_last_14_days_cached().get(package_dict['id'], 0) log.debug( 'Dataset {} has {} page views in the last 14 days'.format(package_dict['id'], pageviews_last_14_days)) package_dict['pageviews_last_14_days'] = pageviews_last_14_days if _should_manually_load_property_value(context, package_dict, 'has_quickcharts'): package_dict['has_quickcharts'] = False for resource_dict in package_dict.get('resources', []): resource_views = get_action('resource_view_list')(context, {'id': resource_dict['id']}) or [] for view in resource_views: if view.get("view_type") == 'hdx_hxl_preview': package_dict['has_quickcharts'] = True break if _should_manually_load_property_value(context, package_dict, 'has_geodata'): package_dict['has_geodata'] = False for resource_dict in package_dict.get('resources', []): if resource_dict.get('format') in GEODATA_FORMATS: package_dict['has_geodata'] = True break if _should_manually_load_property_value(context, package_dict, 'has_showcases'): package_dict['has_showcases'] = False package_dict['num_of_showcases'] = 0 num_of_showcases = len(hdx_get_package_showcase_id_list(context, {'package_id': package_dict['id']})) if num_of_showcases > 0: package_dict['has_showcases'] = True package_dict['num_of_showcases'] = num_of_showcases if _should_manually_load_property_value(context, package_dict, 'last_modified'): if get_extra_from_dataset('is_requestdata_type', package_dict): package_dict['last_modified'] = package_dict.get('metadata_modified') else: package_dict['last_modified'] = None all_dates = [dateutil.parser.parse(r.get('last_modified')) for r in package_dict.get('resources', []) if r.get('last_modified')] if all_dates: package_dict['last_modified'] = max(all_dates).isoformat() freshness_calculator = freshness.get_calculator_instance(package_dict, None) if _should_manually_load_property_value(context, package_dict, 'due_date'): package_dict.pop('due_date', None) package_dict.pop('overdue_date', None) # package_dict.pop('due_daterange', None) # package_dict.pop('overdue_daterange', None) freshness_calculator.populate_with_date_ranges() if not just_for_reindexing: member_list = get_action('hdx_member_list')(context, {'org_id': package_dict.get('owner_org')}) if member_list and not member_list.get('is_member'): del package_dict['maintainer_email'] # Freshness should be computed after the last_modified field freshness_calculator.populate_with_freshness()