def test_create_large_file(self): # Max number capable of storage in Postgres integer field, plus 1 # (errors out with IntegerField; passes with BigInt) upload = utils.get_test_data_upload(self.user, self.dataset, size=2147483648) # Test BigInt outer boundaries # Max bigint number upload2 = utils.get_test_data_upload(self.user, self.dataset, size=9223372036854775807) self.assertEqual(upload2.size, 9223372036854775807) # Max bigint + 1 self.assertRaises(DatabaseError, utils.get_test_data_upload, self.user, self.dataset, size=9223372036854775808)
def test_import_additional_data_same_columns(self): self.dataset.import_data(self.user, self.upload) utils.wait() xls_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_XLS_FILENAME) # Refresh from database self.dataset = Dataset.objects.get(id=self.dataset.id) self.dataset.import_data(self.user, xls_upload) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) upload = DataUpload.objects.get(id=self.upload.id) xls_upload = DataUpload.objects.get(id=xls_upload.id) self.assertEqual([c['name'] for c in dataset.column_schema], ['id', 'first_name', 'last_name', 'employer']) self.assertEqual([c['type'] for c in dataset.column_schema], ['int', 'unicode', 'unicode', 'unicode']) self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [None, None, None, None]) self.assertEqual(dataset.row_count, 8) self.assertEqual(upload.imported, True) self.assertEqual(xls_upload.imported, True) self.assertEqual(dataset.locked, False) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 2)
def test_delete(self): upload = utils.get_test_data_upload(self.user, self.dataset) upload_id = upload.id path = upload.get_path() self.assertEqual(os.path.isfile(path), True) solr.delete(settings.SOLR_DATA_CORE, '*:*') self.dataset.import_data(self.user, upload) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1) upload = DataUpload.objects.get(id=upload_id) dataset = Dataset.objects.get(id=self.dataset.id) self.assertEqual(dataset.initial_upload, upload) self.assertEqual(dataset.row_count, 4) upload.delete() # Ensure dataset still exists dataset = Dataset.objects.get(id=self.dataset.id) self.assertEqual(dataset.initial_upload, None) self.assertEqual(dataset.row_count, 0) self.assertEqual(os.path.exists(path), False) with self.assertRaises(DataUpload.DoesNotExist): DataUpload.objects.get(id=upload_id) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 0)
def test_import_additional_data_same_columns(self): self.dataset.import_data(self.user, self.upload) xls_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_XLS_FILENAME) # Refresh from database self.dataset = Dataset.objects.get(id=self.dataset.id) self.dataset.import_data(self.user, xls_upload) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) upload = DataUpload.objects.get(id=self.upload.id) xls_upload = DataUpload.objects.get(id=xls_upload.id) self.assertEqual([c['name'] for c in dataset.column_schema], ['id', 'first_name', 'last_name', 'employer']) self.assertEqual([c['type'] for c in dataset.column_schema], ['int', 'unicode', 'unicode', 'unicode']) self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [None, None, None, None]) self.assertEqual(dataset.row_count, 8) self.assertEqual(upload.imported, True) self.assertEqual(xls_upload.imported, True) self.assertEqual(dataset.locked, False) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 2)
def test_import_xls(self): xls_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_XLS_FILENAME) self.dataset.import_data(self.user, xls_upload) task = self.dataset.current_task self.assertNotEqual(task, None) self.assertNotEqual(task.id, None) self.assertEqual(task.task_name, 'panda.tasks.import.xls') # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) xls_upload = DataUpload.objects.get(id=xls_upload.id) task = TaskStatus.objects.get(id=task.id) self.assertEqual([c['name'] for c in dataset.column_schema], ['id', 'first_name', 'last_name', 'employer']) self.assertEqual([c['type'] for c in dataset.column_schema], ['int', 'unicode', 'unicode', 'unicode']) self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [None, None, None, None]) self.assertEqual(dataset.row_count, 4) self.assertEqual(xls_upload.imported, True) self.assertEqual(task.status, 'SUCCESS') self.assertNotEqual(task.start, None) self.assertNotEqual(task.end, None) self.assertEqual(task.traceback, None) self.assertEqual(dataset.locked, False) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1)
def test_import_additional_data_different_columns(self): self.dataset.import_data(self.user, self.upload) utils.wait() xls_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_XLS_FILENAME) xls_upload.columns = ['id', 'first_name', 'last_name', 'employer', 'MORE COLUMNS!'] xls_upload.save() # Refresh from database self.dataset = Dataset.objects.get(id=self.dataset.id) self.assertRaises(DataImportError, self.dataset.import_data, self.user, xls_upload) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) upload = DataUpload.objects.get(id=self.upload.id) xls_upload = DataUpload.objects.get(id=xls_upload.id) self.assertEqual(dataset.columns, ['id', 'first_name', 'last_name', 'employer']) self.assertEqual(dataset.row_count, 4) self.assertEqual(upload.imported, True) self.assertEqual(xls_upload.imported, False) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1)
def test_import_excel_xlsx(self): xlsx_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_EXCEL_XLSX_FILENAME) self.dataset.import_data(self.user, xlsx_upload) task = self.dataset.current_task self.assertNotEqual(task, None) self.assertNotEqual(task.id, None) self.assertEqual(task.task_name, 'panda.tasks.import.xlsx') utils.wait() # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) xlsx_upload = DataUpload.objects.get(id=xlsx_upload.id) task = TaskStatus.objects.get(id=task.id) self.assertEqual(dataset.columns, ['id', 'first_name', 'last_name', 'employer']) self.assertEqual(dataset.row_count, 4) self.assertEqual(xlsx_upload.imported, True) self.assertEqual(task.status, 'SUCCESS') self.assertNotEqual(task.start, None) self.assertNotEqual(task.end, None) self.assertEqual(task.traceback, None) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1)
def test_delete(self): upload = utils.get_test_data_upload(self.user, self.dataset) upload_id = upload.id path = upload.get_path() self.assertEqual(os.path.isfile(path), True) solr.delete(settings.SOLR_DATA_CORE, '*:*') self.dataset.import_data(self.user, upload) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1) upload = DataUpload.objects.get(id=upload_id) dataset = Dataset.objects.get(id=self.dataset.id) self.assertEqual(dataset.initial_upload, upload) self.assertEqual(dataset.row_count, 4) upload.delete() # Ensure dataset still exists dataset = Dataset.objects.get(id=self.dataset.id) self.assertEqual(dataset.initial_upload, None) self.assertEqual(dataset.row_count, 0) self.assertEqual(os.path.exists(path), False) with self.assertRaises(DataUpload.DoesNotExist): DataUpload.objects.get(id=upload_id) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 0)
def test_import_additional_xlsx_typed_columns(self): self.dataset.import_data(self.user, self.upload) # Refresh from database self.dataset = Dataset.objects.get(id=self.dataset.id) self.dataset.reindex_data(self.user, typed_columns=[True, False, True, True]) second_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_EXCEL_XLSX_FILENAME) # Refresh from database self.dataset = Dataset.objects.get(id=self.dataset.id) self.dataset.import_data(self.user, second_upload) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) self.assertEqual([c['name'] for c in dataset.column_schema], ['id', 'first_name', 'last_name', 'employer']) self.assertEqual([c['type'] for c in dataset.column_schema], ['int', 'unicode', 'unicode', 'unicode']) self.assertEqual([c['indexed'] for c in dataset.column_schema], [True, False, True, True]) self.assertEqual([c['indexed_name'] for c in dataset.column_schema], ['column_int_id', None, 'column_unicode_last_name', 'column_unicode_employer']) self.assertEqual(dataset.row_count, 8) self.assertEqual(dataset.locked, False) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 2) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_int_id:2')['response']['numFound'], 2) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_unicode_last_name:Germuska')['response']['numFound'], 2) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_unicode_first_name:Joseph')['response']['numFound'], 0)
def test_reindex_complex(self): upload = utils.get_test_data_upload(self.user, self.dataset, filename=utils.TEST_CSV_TYPES_FILENAME) self.dataset.import_data(self.user, upload) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) dataset.reindex_data(self.user, typed_columns=[True for c in upload.columns]) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) task = dataset.current_task self.assertEqual(task.status, 'SUCCESS') self.assertNotEqual(task.start, None) self.assertNotEqual(task.end, None) self.assertEqual(task.traceback, None) self.assertEqual([c['name'] for c in dataset.column_schema], ['text', 'date', 'integer', 'boolean', 'float', 'time', 'datetime', 'empty_column', '']) self.assertEqual([c['type'] for c in dataset.column_schema], ['unicode', 'date', 'int', 'bool', 'float', 'time', 'datetime', None, 'unicode']) self.assertEqual([c['indexed'] for c in dataset.column_schema], [True for c in upload.columns]) self.assertEqual([c['indexed_name'] for c in dataset.column_schema], ['column_unicode_text', 'column_date_date', 'column_int_integer', 'column_bool_boolean', 'column_float_float', 'column_time_time', 'column_datetime_datetime', None, 'column_unicode_']) self.assertEqual([c['min'] for c in dataset.column_schema], [None, u'1920-01-01T00:00:00', 40, None, 1.0, u'9999-12-31T00:00:00', u'1971-01-01T04:14:00', None, None]) self.assertEqual([c['max'] for c in dataset.column_schema], [None, u'1971-01-01T00:00:00', 164, None, 41800000.01, u'9999-12-31T14:57:13', u'2048-01-01T14:57:00', None, None]) self.assertEqual(dataset.row_count, 5) self.assertEqual(dataset.locked, False) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_bool_boolean:true')['response']['numFound'], 2) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_unicode_text:"Chicago Tribune"')['response']['numFound'], 1) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_datetime_datetime:[1971-01-01T01:01:01Z TO NOW]')['response']['numFound'], 1) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_time_time:[9999-12-31T04:13:01Z TO *]')['response']['numFound'], 2) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_date_date:[1971-01-01T00:00:00Z TO NOW]')['response']['numFound'], 1)
def test_import_additional_data_different_columns(self): self.dataset.import_data(self.user, self.upload) xls_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_XLS_FILENAME) xls_upload.columns = [ 'id', 'first_name', 'last_name', 'employer', 'MORE COLUMNS!' ] xls_upload.save() # Refresh from database self.dataset = Dataset.objects.get(id=self.dataset.id) self.assertRaises(DataImportError, self.dataset.import_data, self.user, xls_upload) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) upload = DataUpload.objects.get(id=self.upload.id) xls_upload = DataUpload.objects.get(id=xls_upload.id) self.assertEqual([c['name'] for c in dataset.column_schema], ['id', 'first_name', 'last_name', 'employer']) self.assertEqual(dataset.row_count, 4) self.assertEqual(upload.imported, True) self.assertEqual(xls_upload.imported, False) self.assertEqual(dataset.locked, False) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1)
def test_reindex_complex(self): upload = utils.get_test_data_upload(self.user, self.dataset, filename=utils.TEST_CSV_TYPES_FILENAME) self.dataset.import_data(self.user, upload) utils.wait() # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) dataset.reindex_data(self.user, typed_columns=[True for c in upload.columns]) utils.wait() # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) self.assertEqual([c['name'] for c in dataset.column_schema], ['text', 'date', 'integer', 'boolean', 'float', 'time', 'datetime', 'empty_column', '']) self.assertEqual([c['type'] for c in dataset.column_schema], ['unicode', 'datetime', 'int', 'bool', 'float', 'datetime', 'datetime', 'NoneType', 'unicode']) self.assertEqual([c['indexed'] for c in dataset.column_schema], [True for c in upload.columns]) self.assertEqual([c['indexed_name'] for c in dataset.column_schema], ['column_unicode_text', 'column_datetime_date', 'column_int_integer', 'column_bool_boolean', 'column_float_float', 'column_datetime_time', 'column_datetime_datetime', 'column_NoneType_empty_column', 'column_unicode_']) self.assertEqual(dataset.row_count, 5) self.assertEqual(dataset.locked, False) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_bool_boolean:true')['response']['numFound'], 2) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_unicode_text:"Chicago Tribune"')['response']['numFound'], 1) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_datetime_datetime:[1971-01-01T01:01:01Z TO NOW]')['response']['numFound'], 1) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_datetime_time:[9999-12-31T04:13:01Z TO *]')['response']['numFound'], 2) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_datetime_date:[1971-01-01T00:00:00Z TO NOW]')['response']['numFound'], 1)
def test_import_xls(self): xls_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_XLS_FILENAME) self.dataset.import_data(self.user, xls_upload) task = self.dataset.current_task self.assertNotEqual(task, None) self.assertNotEqual(task.id, None) self.assertEqual(task.task_name, 'panda.tasks.import.xls') # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) xls_upload = DataUpload.objects.get(id=xls_upload.id) task = TaskStatus.objects.get(id=task.id) self.assertEqual([c['name'] for c in dataset.column_schema], ['id', 'first_name', 'last_name', 'employer']) self.assertEqual([c['type'] for c in dataset.column_schema], ['int', 'unicode', 'unicode', 'unicode']) self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [None, None, None, None]) self.assertEqual(dataset.row_count, 4) self.assertEqual(xls_upload.imported, True) self.assertEqual(task.status, 'SUCCESS') self.assertNotEqual(task.start, None) self.assertNotEqual(task.end, None) self.assertEqual(task.traceback, None) self.assertEqual(dataset.locked, False) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1)
def test_undeletable(self): upload = utils.get_test_data_upload(self.user, self.dataset) upload.deletable = False upload.save() with self.assertRaises(DataUploadNotDeletable): upload.delete()
def setUp(self): self.user = utils.get_panda_user() self.dataset = utils.get_test_dataset(self.user) self.upload = utils.get_test_data_upload(self.user, self.dataset) self.auth_headers = utils.get_auth_headers() self.client = Client()
def setUp(self): settings.CELERY_ALWAYS_EAGER = True utils.setup_test_solr() self.user = utils.get_panda_user() self.dataset = utils.get_test_dataset(self.user) self.upload = utils.get_test_data_upload(self.user, self.dataset)
def test_delete(self): upload = utils.get_test_data_upload(self.user, self.dataset) path = upload.get_path() self.assertEqual(os.path.isfile(path), True) upload.delete() self.assertEqual(os.path.exists(path), False)
def setUp(self): settings.CELERY_ALWAYS_EAGER = True self.user = utils.get_panda_user() self.dataset = utils.get_test_dataset(self.user) self.upload = utils.get_test_data_upload(self.user, self.dataset) self.auth_headers = utils.get_auth_headers() self.client = Client()
def test_delete(self): upload = utils.get_test_data_upload(self.user, self.dataset) upload_id = upload.id path = upload.get_path() self.assertEqual(os.path.isfile(path), True) upload.delete() self.assertEqual(os.path.exists(path), False) with self.assertRaises(DataUpload.DoesNotExist): DataUpload.objects.get(id=upload_id)
def test_created(self): upload = utils.get_test_data_upload(self.user, self.dataset) self.assertEqual(upload.original_filename, utils.TEST_DATA_FILENAME) self.assertEqual(upload.creator, self.user) self.assertNotEqual(upload.creation_date, None) self.assertEqual(upload.dataset, self.dataset) self.assertEqual(upload.data_type, 'csv') self.assertNotEqual(self.upload.dialect, None) self.assertEqual(self.upload.columns, ['id', 'first_name', 'last_name', 'employer']); self.assertEqual(len(self.upload.sample_data), 4) self.assertEqual(self.upload.sample_data[0], ['1', 'Brian', 'Boyer', 'Chicago Tribune']);
def test_import_additional_xlsx_typed_columns(self): self.dataset.import_data(self.user, self.upload) # Refresh from database self.dataset = Dataset.objects.get(id=self.dataset.id) self.dataset.reindex_data(self.user, typed_columns=[True, False, True, True]) second_upload = utils.get_test_data_upload( self.user, self.dataset, utils.TEST_EXCEL_XLSX_FILENAME) # Refresh from database self.dataset = Dataset.objects.get(id=self.dataset.id) self.dataset.import_data(self.user, second_upload) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) self.assertEqual([c['name'] for c in dataset.column_schema], ['id', 'first_name', 'last_name', 'employer']) self.assertEqual([c['type'] for c in dataset.column_schema], ['int', 'unicode', 'unicode', 'unicode']) self.assertEqual([c['indexed'] for c in dataset.column_schema], [True, False, True, True]) self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [ 'column_int_id', None, 'column_unicode_last_name', 'column_unicode_employer' ]) self.assertEqual(dataset.row_count, 8) self.assertEqual(dataset.locked, False) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 2) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'column_int_id:2')['response']['numFound'], 2) self.assertEqual( solr.query( settings.SOLR_DATA_CORE, 'column_unicode_last_name:Germuska')['response']['numFound'], 2) self.assertEqual( solr.query( settings.SOLR_DATA_CORE, 'column_unicode_first_name:Joseph')['response']['numFound'], 0)
def test_created(self): upload = utils.get_test_data_upload(self.user, self.dataset) self.assertEqual(upload.original_filename, utils.TEST_DATA_FILENAME) self.assertEqual(upload.creator, self.user) self.assertNotEqual(upload.creation_date, None) self.assertEqual(upload.dataset, self.dataset) self.assertEqual(upload.data_type, "csv") self.assertNotEqual(self.upload.dialect, None) self.assertEqual(self.upload.columns, ["id", "first_name", "last_name", "employer"]) self.assertEqual(len(self.upload.sample_data), 4) self.assertEqual(self.upload.sample_data[0], ["1", "Brian", "Boyer", "Chicago Tribune"]) self.assertEqual(len(self.upload.guessed_types), 4) self.assertEqual(self.upload.guessed_types, ["int", "unicode", "unicode", "unicode"])
def setUp(self): settings.CELERY_ALWAYS_EAGER = True utils.setup_test_solr() self.user = utils.get_panda_user() self.dataset = utils.get_test_dataset(self.user) self.upload = utils.get_test_data_upload(self.user, self.dataset) self.dataset.import_data(self.user, self.upload, 0) utils.wait() self.auth_headers = utils.get_auth_headers() self.client = Client()
def test_change_user_reindex(self): solr.delete(settings.SOLR_DATASETS_CORE, '*:*') self.user.first_name = 'bazbarfoo' self.user.save() dataset = utils.get_test_dataset(self.user) upload = utils.get_test_data_upload(self.user, dataset) self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, dataset.creator.first_name)['response']['numFound'], 1) old_name = dataset.creator.first_name dataset.creator.first_name = 'foobarbaz' dataset.creator.save() self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, old_name)['response']['numFound'], 0) self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, dataset.creator.first_name)['response']['numFound'], 1)
def test_created(self): upload = utils.get_test_data_upload(self.user, self.dataset) self.assertEqual(upload.original_filename, utils.TEST_DATA_FILENAME) self.assertEqual(upload.creator, self.user) self.assertNotEqual(upload.creation_date, None) self.assertEqual(upload.dataset, self.dataset) self.assertEqual(upload.data_type, 'csv') self.assertNotEqual(self.upload.dialect, None) self.assertEqual(self.upload.columns, ['id', 'first_name', 'last_name', 'employer']) self.assertEqual(len(self.upload.sample_data), 4) self.assertEqual(self.upload.sample_data[0], ['1', 'Brian', 'Boyer', 'Chicago Tribune']) self.assertEqual(len(self.upload.guessed_types), 4) self.assertEqual(self.upload.guessed_types, ['int', 'unicode', 'unicode', 'unicode']) self.assertEqual(upload.deletable, True)
def test_reindex_with_currency(self): upload = utils.get_test_data_upload(self.user, self.dataset, filename=utils.TEST_MONEY) self.dataset.import_data(self.user, upload) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) dataset.reindex_data(self.user, typed_columns=[False, True], column_types=['unicode', 'float']) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) self.assertEqual([c['name'] for c in dataset.column_schema], ['product', 'price']) self.assertEqual([c['type'] for c in dataset.column_schema], ['unicode', 'float']) self.assertEqual([c['indexed'] for c in dataset.column_schema], [False, True]) self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [None, 'column_float_price']) self.assertEqual([c['min'] for c in dataset.column_schema], [None, 39.99]) self.assertEqual([c['max'] for c in dataset.column_schema], [None, 2599.00]) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:39.99')['response']['numFound'], 2) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:[1500 TO *]')['response']['numFound'], 2) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:*')['response']['numFound'], 8)
def test_reindex_with_currency(self): upload = utils.get_test_data_upload(self.user, self.dataset, filename=utils.TEST_MONEY) self.dataset.import_data(self.user, upload) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) dataset.reindex_data(self.user, typed_columns=[False, True], column_types=['unicode', 'float']) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) self.assertEqual([c['name'] for c in dataset.column_schema], ['product', 'price']) self.assertEqual([c['type'] for c in dataset.column_schema], ['unicode', 'float']) self.assertEqual([c['indexed'] for c in dataset.column_schema], [False, True]) self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [None, 'column_float_price']) self.assertEqual([c['min'] for c in dataset.column_schema], [None, 39.99]) self.assertEqual([c['max'] for c in dataset.column_schema], [None, 2599.00]) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'column_float_price:39.99')['response']['numFound'], 2) self.assertEqual( solr.query( settings.SOLR_DATA_CORE, 'column_float_price:[1500 TO *]')['response']['numFound'], 2) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'column_float_price:*')['response']['numFound'], 8)
def test_import_encoded_data(self): """ This tests for a complicated case where a UnicodeDecodeError during import could be masked by an AttrbiuteError in the return handler. """ old_sniffer_size = settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE = 50 data_upload = utils.get_test_data_upload( self.user, self.dataset, utils.TEST_LATIN1_DATA_FILENAME) self.dataset.import_data(self.user, data_upload) task = self.dataset.current_task self.assertNotEqual(task, None) self.assertNotEqual(task.id, None) self.assertEqual(task.task_name, 'panda.tasks.import.csv') # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) data_upload = DataUpload.objects.get(id=data_upload.id) task = TaskStatus.objects.get(id=task.id) self.assertEqual(len(dataset.column_schema), 8) self.assertEqual(dataset.row_count, None) self.assertEqual(data_upload.imported, False) self.assertEqual(task.status, 'FAILURE') self.assertNotEqual(task.start, None) self.assertNotEqual(task.end, None) self.assertEqual('encoded' in task.traceback, True) self.assertEqual(dataset.locked, False) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'walking')['response']['numFound'], 0) settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE = old_sniffer_size
def test_import_encoded_data(self): """ This tests for a complicated case where a UnicodeDecodeError during import could be masked by an AttrbiuteError in the return handler. """ old_sniffer_size = settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE = 50 data_upload = utils.get_test_data_upload(self.user, self.dataset, utils.TEST_LATIN1_DATA_FILENAME) self.dataset.import_data(self.user, data_upload) task = self.dataset.current_task self.assertNotEqual(task, None) self.assertNotEqual(task.id, None) self.assertEqual(task.task_name, 'panda.tasks.import.csv') # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) data_upload = DataUpload.objects.get(id=data_upload.id) task = TaskStatus.objects.get(id=task.id) self.assertEqual(len(dataset.column_schema), 8) self.assertEqual(dataset.row_count, None) self.assertEqual(data_upload.imported, False) self.assertEqual(task.status, 'FAILURE') self.assertNotEqual(task.start, None) self.assertNotEqual(task.end, None) self.assertEqual('encoded' in task.traceback, True) self.assertEqual(dataset.locked, False) self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'walking')['response']['numFound'], 0) settings.PANDA_SNIFFER_MAX_SAMPLE_SIZE = old_sniffer_size
def test_sample_encoding_fails(self): with self.assertRaises(DataSamplingError): utils.get_test_data_upload(self.user, self.dataset, utils.TEST_LATIN1_FILENAME)
def test_sample_encoding_success(self): utils.get_test_data_upload(self.user, self.dataset, utils.TEST_LATIN1_FILENAME, encoding='latin1')
def test_reindex_complex(self): upload = utils.get_test_data_upload( self.user, self.dataset, filename=utils.TEST_CSV_TYPES_FILENAME) self.dataset.import_data(self.user, upload) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) dataset.reindex_data(self.user, typed_columns=[True for c in upload.columns]) # Refresh from database dataset = Dataset.objects.get(id=self.dataset.id) task = dataset.current_task self.assertEqual(task.status, 'SUCCESS') self.assertNotEqual(task.start, None) self.assertNotEqual(task.end, None) self.assertEqual(task.traceback, None) self.assertEqual([c['name'] for c in dataset.column_schema], [ 'text', 'date', 'integer', 'boolean', 'float', 'time', 'datetime', 'empty_column', '' ]) self.assertEqual([c['type'] for c in dataset.column_schema], [ 'unicode', 'date', 'int', 'bool', 'float', 'time', 'datetime', None, 'unicode' ]) self.assertEqual([c['indexed'] for c in dataset.column_schema], [True for c in upload.columns]) self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [ 'column_unicode_text', 'column_date_date', 'column_int_integer', 'column_bool_boolean', 'column_float_float', 'column_time_time', 'column_datetime_datetime', None, 'column_unicode_' ]) self.assertEqual([c['min'] for c in dataset.column_schema], [ None, u'1920-01-01T00:00:00', 40, None, 1.0, u'9999-12-31T00:00:00', u'1971-01-01T04:14:00', None, None ]) self.assertEqual([c['max'] for c in dataset.column_schema], [ None, u'1971-01-01T00:00:00', 164, None, 41800000.01, u'9999-12-31T14:57:13', u'2048-01-01T14:57:00', None, None ]) self.assertEqual(dataset.row_count, 5) self.assertEqual(dataset.locked, False) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'column_bool_boolean:true')['response']['numFound'], 2) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'column_unicode_text:"Chicago Tribune"')['response'] ['numFound'], 1) self.assertEqual( solr.query( settings.SOLR_DATA_CORE, 'column_datetime_datetime:[1971-01-01T01:01:01Z TO NOW]') ['response']['numFound'], 1) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'column_time_time:[9999-12-31T04:13:01Z TO *]') ['response']['numFound'], 2) self.assertEqual( solr.query(settings.SOLR_DATA_CORE, 'column_date_date:[1971-01-01T00:00:00Z TO NOW]') ['response']['numFound'], 1)
def setUp(self): self.validator = DataValidation() self.user = utils.get_panda_user() self.dataset = utils.get_test_dataset(self.user) self.upload = utils.get_test_data_upload(self.user, self.dataset)