def command(self): super(SchemaChecker, self)._load_config() context = self.create_context() data = {'field_paths': defaultdict(int), 'broken_rules': defaultdict(dict), 'datasets_per_portal': defaultdict(set), 'invalid_datasets': 0, 'valid_datasets': 0} if len(self.args) == 0: active_datasets = [] context = {'model': model, 'session': model.Session, 'ignore_auth': True} validator = schema_checker.SchemaChecker() num_datasets = 0 for i, dataset in enumerate(iterate_local_datasets(context)): print 'Processing dataset %s' % i normalize_action_dataset(dataset) validator.process_record(dataset) num_datasets += 1 active_datasets.append(dataset['id']) delete_deprecated_violations(active_datasets) general = {'num_datasets': num_datasets} validator.redis_client.set('general', general) elif len(self.args) == 2 and self.args[0] == 'specific': context = {'model': model, 'session': model.Session, 'ignore_auth': True} package_show = get_action('package_show') dataset_name = self.args[1] dataset = package_show(context, {'id': dataset_name}) print 'Processing dataset %s' % dataset normalize_action_dataset(dataset) validator = schema_checker.SchemaChecker() validator.process_record(dataset) elif len(self.args) == 2 and self.args[0] == 'remote': endpoint = self.args[1] ckan = ckanclient.CkanClient(base_location=endpoint) rows = 1000 total = self.get_dataset_count(ckan) steps = int(ceil(total / float(rows))) for i in range(0, steps): if i == steps - 1: rows = total - (i * rows) datasets = self.get_datasets(ckan, rows, i) self.validate_datasets(datasets, data) self.write_validation_result(self.render_template(data))
def delete_deprecated_datasets(self, context, remote_dataset_names): package_update = get_action('package_update') local_datasets = iterate_local_datasets(context) filtered = filter(self.portal_relevant(self.PORTAL), local_datasets) local_dataset_names = map(lambda dataset: dataset['name'], filtered) deprecated = set(local_dataset_names) - set(remote_dataset_names) log.info('Found %s deprecated datasets.' % len(deprecated)) for local_dataset in filtered: if local_dataset['name'] in deprecated: local_dataset['state'] = 'deleted' local_dataset['tags'].append({'name': 'deprecated'}) package_update(context, local_dataset)
def command(self): super(SchemaChecker, self)._load_config() context = self.create_context() data = { 'field_paths': defaultdict(int), 'broken_rules': defaultdict(dict), 'datasets_per_portal': defaultdict(set), 'invalid_datasets': 0, 'valid_datasets': 0 } if len(self.args) == 0: context = { 'model': model, 'session': model.Session, 'ignore_auth': True } validator = schema_checker.SchemaChecker() num_datasets = 0 for i, dataset in enumerate(iterate_local_datasets(context)): print 'Processing dataset %s' % i normalize_action_dataset(dataset) validator.process_record(dataset) num_datasets += 1 general = {'num_datasets': num_datasets} validator.redis_client.set('general', general) elif len(self.args) == 2 and self.args[0] == 'remote': endpoint = self.args[1] ckan = ckanclient.CkanClient(base_location=endpoint) rows = 1000 total = self.get_dataset_count(ckan) steps = int(ceil(total / float(rows))) for i in range(0, steps): if i == steps - 1: rows = total - (i * rows) datasets = self.get_datasets(ckan, rows, i) self.validate_datasets(datasets, data) self.write_validation_result(self.render_template(data))
def command(self): super(LinkChecker,self)._load_config() active_datasets = set() if len(self.args) == 0: context = {'model': model, 'session': model.Session, 'ignore_auth': True} validator = link_checker.LinkChecker() num_datasets = 0 for i, dataset in enumerate(iterate_local_datasets(context)): print 'Processing dataset %s with name: %s' % (i,dataset['name']) normalize_action_dataset(dataset) validator.process_record(dataset) num_datasets += 1 active_datasets.add(dataset['id']) self.delete_deprecated_datasets(active_dataset_ids) general = {'num_datasets': num_datasets} validator.redis_client.set('general', general) if len(self.args) > 0: subcommand = self.args[0] if subcommand == 'remote': self.check_remote_host(self.args[1]) elif subcommand == 'report': self.generate_report() elif len(self.args) == 2 and self.args[0] == 'specific': dataset_name = self.args[1] context = {'model': model, 'session': model.Session, 'ignore_auth': True} package_show = get_action('package_show') validator = link_checker.LinkChecker() dataset = package_show(context, {'id': dataset_name}) print 'Processing dataset %s' % dataset normalize_action_dataset(dataset) validator.process_record(dataset)