def test_first_time_distribution_indexable(self, *_): read_datajson(self.task, whitelist=True, read_local=True) index_catalog(self.node, self.mgmt_task, read_local=True, ) distribution = Distribution.objects.get(identifier='212.1') self.assertEqual(distribution.enhanced_meta.get(key=meta_keys.CHANGED).value, 'True')
def test_format_is_passed_to_data_json(self, data_json, *_): read_datajson(self.task, whitelist=True) self.node.catalog_format = 'xlsx' index_catalog(self.node, self.mgmt_task) self.assertEqual(data_json.call_args[1]['catalog_format'], self.node.catalog_format)
def _index(self, catalog_id, catalog_url, periodicity='R/P1D', set_availables=True, set_error=False, set_present=True): node = Node.objects.create( catalog_id=catalog_id, catalog_url=os.path.join(SAMPLES_DIR, catalog_url), indexable=True, ) read_datajson(self.task, whitelist=True, read_local=True) if set_availables: for field in datajsonar_Field.objects.all(): field.enhanced_meta.create(key=meta_keys.AVAILABLE, value='true') field.enhanced_meta.create(key=meta_keys.HITS_90_DAYS, value='0') field.enhanced_meta.create(key=meta_keys.PERIODICITY, value=periodicity) datajsonar_Field.objects.update(error=set_error, present=set_present) index_ok = CatalogMetadataIndexer(node, self.meta_task, self.fake_index._name).index() if index_ok: connections.get_connection().indices.forcemerge() return index_ok
def test_error_distribution_logs(self, *_): catalog = os.path.join(SAMPLES_DIR, 'distribution_missing_downloadurl.json') self.node.catalog_url = catalog self.node.save() read_datajson(self.task, whitelist=True, read_local=True) index_catalog(self.node, self.mgmt_task, read_local=True) self.assertGreater(len(ReadDataJsonTask.objects.get(id=self.task.id).logs), 10)
def test_index_same_series_different_catalogs(self, *_): read_datajson(self.task, whitelist=True, read_local=True) index_catalog(self.node, self.mgmt_task, read_local=True) read_datajson(self.task, whitelist=True, read_local=True) index_catalog(self.node, self.mgmt_task, read_local=True) count = Field.objects.filter(identifier='212.1_PSCIOS_ERN_0_0_25').count() self.assertEqual(count, 1)
def handle(self, *args, **options): if ReadDataJsonTask.objects.filter(status=ReadDataJsonTask.RUNNING): logger.info(u'Ya está corriendo una indexación') return task = ReadDataJsonTask() task.save() read_datajson(task, whitelist=options['whitelist'])
def test_dont_index_same_distribution_twice(self, *_): read_datajson(self.task, whitelist=True, read_local=True) index_catalog(self.node, self.mgmt_task, read_local=True) read_datajson(self.task, whitelist=True, read_local=True) index_catalog(self.node, self.mgmt_task, read_local=True) distribution = Distribution.objects.get(identifier='212.1') # La distribucion es marcada como no indexable hasta que cambien sus datos self.assertEqual(distribution.enhanced_meta.get(key=meta_keys.CHANGED).value, 'False')
def test_read(self): identifier = 'test_id' Node(catalog_id=identifier, catalog_url=os.path.join(dir_path, 'sample_data.json'), indexable=True).save() task = ReadDataJsonTask() task.save() read_datajson(task, whitelist=True) self.assertTrue( Field.objects.filter( distribution__dataset__catalog__identifier=identifier))
def test_custom_validation_options(self, *_): # Fallarán todas las validaciones config = DistributionValidatorConfig.get_solo() config.max_field_title_len = 0 config.save() read_datajson(self.task, whitelist=True) index_catalog(self.node, self.mgmt_task) distribution = Distribution.objects.get(identifier='212.1') self.assertTrue(distribution.error)
def test_read_datajson_one_node_only_calls_task_for_that_node( self, index_catalog): Node(catalog_id='one_catalog', catalog_url='http://one_url.com', indexable=True).save() node = Node.objects.create(catalog_id='other_catalog', catalog_url='http://other_url.com', indexable=True) task = ReadDataJsonTask.objects.create(node=node) read_datajson(task) self.assertEqual(index_catalog.delay.call_count, 1)
def test_read_datajson_several_nodes_call_index_catalog_once_per_node( self, index_catalog): Node(catalog_id='one_catalog', catalog_url='http://one_url.com', indexable=True).save() Node(catalog_id='other_catalog', catalog_url='http://other_url.com', indexable=True).save() task = ReadDataJsonTask.objects.create() read_datajson(task) self.assertEqual(index_catalog.delay.call_count, 2)
def parse_catalog(catalog_id, catalog_path, node=None): if not node: node = Node.objects.create(catalog_id=catalog_id, catalog_url=catalog_path, indexable=True) catalog = DataJson(node.catalog_url) node.catalog = json.dumps(catalog) node.save() task = ReadDataJsonTask() task.save() read_datajson(task, whitelist=True) return node
def test_index_YYYY_distribution(self, *_): catalog = os.path.join(SAMPLES_DIR, 'single_data_yyyy.json') self.node.catalog_url = catalog self.node.save() read_datajson(self.task, whitelist=True) index_catalog(self.node, self.mgmt_task) distribution = Distribution.objects.get(identifier='102.1') self.assertEqual( distribution.enhanced_meta.get(key=meta_keys.CHANGED).value, 'True')
def test_significant_figures(self, *_): Catalog.objects.all().delete() catalog = os.path.join(SAMPLES_DIR, 'ipc_data.json') self.node.catalog_url = catalog self.node.save() read_datajson(self.task, whitelist=True) index_catalog(self.node, self.mgmt_task) field = Field.objects.get( identifier='serie_inflacion') # Sacado del data.json self.assertEqual( field.enhanced_meta.get(key='significant_figures').value, '4')
def test_index_same_distribution_if_data_changed(self, *_): read_datajson(self.task, whitelist=True, read_local=True) index_catalog(self.node, self.mgmt_task, read_local=True, ) new_catalog = os.path.join(SAMPLES_DIR, 'full_ts_data_changed.json') self.node.catalog_url = new_catalog self.node.save() read_datajson(self.task, whitelist=True, read_local=True) index_catalog(self.node, self.mgmt_task, read_local=True) distribution = Distribution.objects.get(identifier='212.1') # La distribución fue indexada nuevamente, está marcada como indexable self.assertEqual(distribution.enhanced_meta.get(key=meta_keys.CHANGED).value, 'True')
def _index_catalog(self, catalog_path): Node.objects.create(catalog_id='test_catalog', catalog_url=catalog_path, indexable=True) task = ReadDataJsonTask.objects.create() read_datajson(task, whitelist=True) with mock.patch( 'series_tiempo_ar_api.libs.indexing.indexer.distribution_indexer.parallel_bulk' ): distributions = Distribution.objects.all() for distribution in distributions: DistributionIndexer('some_index').reindex(distribution)
def _index(self, catalog_id, catalog_url, set_availables=True): node = Node.objects.create( catalog_id=catalog_id, catalog_url=os.path.join(SAMPLES_DIR, catalog_url), indexable=True, ) read_datajson(self.task, whitelist=True, read_local=True) if set_availables: for field in datajsonar_Field.objects.all(): field.enhanced_meta.create(key=meta_keys.AVAILABLE, value='true') CatalogMetadataIndexer(node, self.meta_task, self.FakeField).index() self.elastic.indices.forcemerge()
def _index(self, catalog_id, catalog_url, set_availables=True): node = Node.objects.create( catalog_id=catalog_id, catalog_url=os.path.join(SAMPLES_DIR, catalog_url), indexable=True, ) read_datajson(self.task, whitelist=True, read_local=True) if set_availables: for field in datajsonar_Field.objects.all(): field.enhanced_meta.create(key=meta_keys.AVAILABLE, value='true') index_ok = CatalogMetadataIndexer(node, self.meta_task, fake_index._name).index() if index_ok: connections.get_connection().indices.forcemerge() return index_ok
def index_catalog(catalog_id, catalog_path, index, node=None): """Indexa un catálogo. Útil para tests""" if not node: node = Node(catalog_id=catalog_id, catalog_url=catalog_path, indexable=True) catalog = DataJson(node.catalog_url) node.catalog = json.dumps(catalog) node.save() task = ReadDataJsonTask() task.save() read_datajson(task, read_local=True, whitelist=True) for distribution in Distribution.objects.filter( dataset__catalog__identifier=catalog_id): DistributionIndexer(index=index).run(distribution) ElasticInstance.get().indices.forcemerge(index=index)
def read_data(self, catalog_path): Node.objects.create(catalog_id='test_catalog', catalog_url=catalog_path, indexable=True) task = ReadDataJsonTask.objects.create() read_datajson(task, whitelist=True)