def test_xlsx_ingestion(self): obj = Dataset.objects.get(name="boardgametournament-games-xlsx") process_dataset.delay(obj) self.assertEqual(len(obj.archive_items.all()), 1) archiveitem = obj.archive_items.all()[0] self.assertEqual(len(list(archiveitem.data())), 5)
def test_shp_ingestion(self): obj = Dataset.objects.get(name="Aeroporti") process_dataset.delay(obj) self.assertEqual(len(obj.archive_items.all()), 1) archiveitem = obj.archive_items.all()[0] self.assertEqual(len(list(archiveitem.data())), 5)
def test_it_logs_using_redis_logger(self): obj = DatasetFactory() loggy = MagicMock() with patch('webui.scheduler.tasks.get_redis_logger', return_value=loggy): process_dataset.delay(obj) self.assert_(loggy.info.called)
def test_mapped_stats(self): from webui.scheduler.tasks import process_dataset obj = Dataset.objects.get(name='osm-dataset') process_dataset.delay(obj) archive_item = obj.archive_items.all()[0] response = self.client.get(archive_item.get_absolute_url() + 'mapped/stats/') self.assertContains(response, "10") self.assertContains(response, "5")
def test_mapped_stats(self): from webui.scheduler.tasks import process_dataset obj = Dataset.objects.get(name='osm-dataset') process_dataset.delay(obj) archive_item = obj.archive_items.all()[0] response = self.client.get( archive_item.get_absolute_url() + 'mapped/stats/' ) self.assertContains(response, "10") self.assertContains(response, "5")
def test_shp_ingestion_encoding(self): obj = Dataset.objects.get(name="Aeroporti") obj.download = 'http://testserver/poiGenerici.zip' obj.encoding = 'latin1' obj.save() process_dataset.delay(obj) # test it does not crash self.assertEqual(len(obj.archive_items.all()), 1) archiveitem = obj.archive_items.all()[0] self.assertEqual(len(list(archiveitem.data())), 2322)
def test_csvkit_no_inference(self): obj = Dataset.objects.get(name="strange symbols") process_dataset.delay(obj) self.assertEqual(len(obj.archive_items.all()), 1) archiveitem = obj.archive_items.all()[0] data = archiveitem.data() data.next() for row in data: row = list(row) self.assertIn(u"None", row) self.assertIn(u"0", row[-1]) self.assertIn(u"+", row[-2])
def test_dataset_encoding(self): from sqlalchemy.exc import NoSuchTableError obj = Dataset.objects.get(name="looks like ascii") obj.encoding = "us-ascii" obj.save() process_dataset.delay(obj) with self.assertRaises(NoSuchTableError): obj.archive_items.all()[0].data().next() obj.encoding = "UTF-8" obj.save() process_dataset.delay(obj) self.assertEqual(len(obj.archive_items.all()), 1)
def test_shp_ingestion_without_projection(self): from sqlalchemy.exc import NoSuchTableError obj = Dataset.objects.get(name="osm-dataset") obj.download = 'http://testserver/aeroporti_tn_no_prj.zip' obj.save() obj.archive_items.all().delete() process_dataset.delay(obj) self.assertEqual(len(obj.archive_items.all()), 1) archiveitem = obj.archive_items.all()[0] with self.assertRaises(NoSuchTableError): list(archiveitem.data())
def test_it_works_for_datasets(self): obj = DatasetFactory() task = process_dataset.delay(obj) response = self.client.get('/s/task/{}/'.format(task.id)) self.assertContains(response, 'Processing dataset {}'.format(obj))
def test_shp_ingestion_with_given_projection(self): import re obj = Dataset.objects.get(name="Aeroporti") obj.download = 'http://testserver/aeroporti_tn_no_prj.zip' obj.projection = 'epsg:3064' obj.save() process_dataset.delay(obj) self.assertEqual(len(obj.archive_items.all()), 1) archiveitem = obj.archive_items.all()[0] data = list(archiveitem.data()) self.assertEqual(len(data), 5) match = re.search(r'POLYGON \(\(([0-9\.]+) ([0-9\.]+)', data[1][5]) lon, lat = match.groups() self.assertEqual(int(float(lon)), 11) self.assertEqual(int(float(lat)), 46)
def test_zip_extraction_and_deletion(self): import os.path import shutil from webui.scheduler.test_helpers import _get_downloaded_file obj = Dataset.objects.get(name="Aeroporti") process_dataset.delay(obj) self.assertEqual(len(obj.archive_items.all()), 1) # copy another test file into the zip extraction dir data = _get_downloaded_file("aeroporti_tn.zip") zip_dir = data['out_file'] + "__exp" bgg_file = _get_downloaded_file("boardgamegeek.csv") new_file = os.path.join(zip_dir, "im_a_new_file.csv") shutil.copy(bgg_file['out_file'], new_file) # run again the workflow and check that the zip extraction dir is # cleaned and no archiveitem is created for im_a_new_file.csv process_dataset.delay(obj) self.assertFalse(os.path.exists(new_file)) self.assertEqual(len(obj.archive_items.all()), 1)