def test_observation_with_easting_northing_zone_xls(self): """ Scenario: File with column Easting, Northing and Zone Given that a column named Easting , Northing and Zone exist Then the dataset type should be inferred as Observation And the type of Easting and Northing should be 'number' And Easting and Northing should be set as required And they should be tagged with the appropriate biosys tag And Zone should be of type integer and required. """ columns = ['What', 'Easting', 'Northing', 'Zone', 'Comments'] rows = [ columns, ['Something', 12563.233, 568932.345, 50, 'A dog'], [ 'Observation with easting/northing as string', '12563.233', '568932.345', 50, 'A dog' ] ] client = self.custodian_1_client file_ = helpers.rows_to_xlsx_file(rows) with open(file_, 'rb') as fp: payload = { 'file': fp, } resp = client.post(self.url, data=payload, format='multipart') self.assertEqual(status.HTTP_200_OK, resp.status_code) received = resp.json() # should be an observation self.assertEqual(Dataset.TYPE_OBSERVATION, received.get('type')) # data_package verification self.assertIn('data_package', received) # verify fields attributes schema_descriptor = Package( received.get('data_package')).resources[0].descriptor['schema'] schema = utils_data_package.GenericSchema(schema_descriptor) east_field = schema.get_field_by_name('Easting') self.assertIsNotNone(east_field) self.assertEqual(east_field.type, 'number') self.assertTrue(east_field.required) biosys = east_field.get('biosys') self.assertIsNotNone(biosys) biosys_type = biosys.get('type') self.assertEqual(biosys_type, BiosysSchema.EASTING_TYPE_NAME) north_field = schema.get_field_by_name('Northing') self.assertIsNotNone(north_field) self.assertEqual(north_field.type, 'number') self.assertTrue(north_field.required) biosys = north_field.get('biosys') self.assertIsNotNone(biosys) biosys_type = biosys.get('type') self.assertEqual(biosys_type, BiosysSchema.NORTHING_TYPE_NAME) zone_field = schema.get_field_by_name('Zone') self.assertIsNotNone(zone_field) self.assertEqual(zone_field.type, 'integer') self.assertTrue(zone_field.required) biosys = zone_field.get('biosys') self.assertIsNotNone(biosys) biosys_type = biosys.get('type') self.assertEqual(biosys_type, BiosysSchema.ZONE_TYPE_NAME) # test that we can save the dataset as returned self.verify_inferred_data(received)
def test_observation_with_genus_species_infra_rank_and_infra_name_only_xls( self): """ Scenario: File with column Latitude, Longitude, Genus, Species, Infraspecific Rank and Infraspecific Name should be inferred as species observation Given that a column named Latitude, Longitude, Genus, Species Infraspecific Rank and Infraspecific Name exists Then the dataset type should be of type speciesObservation And the column 'Genus' should be of type string, set as required and tag as biosys type genus And the column 'Species' should be of type string, set as required and tag as biosys type species And the column 'Infraspecific Rank' should be of type string, set as not required and tag as biosys type InfraSpecificRank And the column 'Infraspecific Name' should be of type string, set as not required and tag as biosys type InfraSpecificName """ columns = [ 'What', 'When', 'Latitude', 'Longitude', 'Genus', 'Species', 'Infraspecific Rank', 'Infraspecific Name', 'Comments' ] rows = [ columns, [ 'I saw a dog', '2018-02-02', -32, 117.75, 'Canis', 'lupus', 'subsp. familiaris', '', None ], [ 'I saw a Chubby bat', '2017-01-02', -32, 116.7, 'Chubby', 'bat', '', '', 'Amazing!' ], [ 'I saw nothing', '2018-01-02', -32.34, 116.7, None, None, None, None, None ], ] client = self.custodian_1_client file_ = helpers.rows_to_xlsx_file(rows) with open(file_, 'rb') as fp: payload = { 'file': fp, } resp = client.post(self.url, data=payload, format='multipart') self.assertEqual(status.HTTP_200_OK, resp.status_code) received = resp.json() # should be a species observation self.assertEqual(Dataset.TYPE_SPECIES_OBSERVATION, received.get('type')) self.assertIn('data_package', received) schema_descriptor = Package( received.get('data_package')).resources[0].descriptor['schema'] schema = utils_data_package.GenericSchema(schema_descriptor) # field attributes # genus genus = schema.get_field_by_name('Genus') self.assertIsNotNone(genus) self.assertEqual(genus.type, 'string') self.assertTrue(genus.required) biosys = genus.get('biosys') self.assertIsNotNone(biosys) biosys_type = biosys.get('type') self.assertEqual(biosys_type, BiosysSchema.GENUS_TYPE_NAME) # species species = schema.get_field_by_name('Species') self.assertIsNotNone(species) self.assertEqual(species.type, 'string') self.assertTrue(species.required) biosys = species.get('biosys') self.assertIsNotNone(biosys) biosys_type = biosys.get('type') self.assertEqual(biosys_type, BiosysSchema.SPECIES_TYPE_NAME) # infra rank infra_rank = schema.get_field_by_name('Infraspecific Rank') self.assertIsNotNone(infra_rank) self.assertEqual(infra_rank.type, 'string') self.assertFalse(infra_rank.required) biosys = infra_rank.get('biosys') self.assertIsNotNone(biosys) biosys_type = biosys.get('type') self.assertEqual(biosys_type, BiosysSchema.INFRA_SPECIFIC_RANK_TYPE_NAME) # infra name infra_name = schema.get_field_by_name('Infraspecific Name') self.assertIsNotNone(infra_name) self.assertEqual(infra_name.type, 'string') self.assertFalse(infra_name.required) biosys = infra_name.get('biosys') self.assertIsNotNone(biosys) biosys_type = biosys.get('type') self.assertEqual(biosys_type, BiosysSchema.INFRA_SPECIFIC_NAME_TYPE_NAME) # test that we can create a dataset with the returned data self.verify_inferred_data(received)
def test_csv_with_excel_content_type(self): """ Often on Windows a csv file comes with an excel content-type (e.g: 'application/vnd.ms-excel') Test that we handle the case. """ view = InferDatasetView.as_view() columns = ['Name', 'Age', 'Weight', 'Comments'] rows = [ columns, ['Frederic', '56', '80.5', 'a comment'], ['Hilda', '24', '56', ''] ] file_ = helpers.rows_to_csv_file(rows) factory = APIRequestFactory() with open(file_, 'rb') as fp: payload = { 'file': fp, } # In order to hack the Content-Type of the multipart form data we need to use the APIRequestFactory and work # with the view directly. Can't use the classic API client. # hack the content-type of the request. data, content_type = factory._encode_data(payload, format='multipart') if six.PY3: data = data.decode('utf-8') data = data.replace('Content-Type: text/csv', 'Content-Type: application/vnd.ms-excel') if six.PY3: data = data.encode('utf-8') request = factory.generic('POST', self.url, data, content_type=content_type) user = self.data_engineer_1_user token, _ = Token.objects.get_or_create(user=user) force_authenticate(request, user=self.data_engineer_1_user, token=token) resp = view(request).render() self.assertEqual(status.HTTP_200_OK, resp.status_code) # should be json self.assertEqual(resp.get('content-type'), 'application/json') if six.PY3: content = resp.content.decode('utf-8') else: content = resp.content received = json.loads(content) # name should be set with the file name self.assertIn('name', received) file_name = path.splitext(path.basename(fp.name))[0] self.assertEqual(file_name, received.get('name')) # type should be 'generic' self.assertIn('type', received) self.assertEqual('generic', received.get('type')) # data_package verification self.assertIn('data_package', received) self.verify_inferred_data(received) # verify schema schema_descriptor = Package( received.get('data_package')).resources[0].descriptor['schema'] schema = utils_data_package.GenericSchema(schema_descriptor) self.assertEqual(len(schema.fields), len(columns)) self.assertEqual(schema.field_names, columns) field = schema.get_field_by_name('Name') self.assertEqual(field.type, 'string') self.assertFalse(field.required) self.assertEqual(field.format, 'default') field = schema.get_field_by_name('Age') self.assertEqual(field.type, 'integer') self.assertFalse(field.required) self.assertEqual(field.format, 'default') field = schema.get_field_by_name('Weight') self.assertEqual(field.type, 'number') self.assertFalse(field.required) self.assertEqual(field.format, 'default') field = schema.get_field_by_name('Comments') self.assertEqual(field.type, 'string') self.assertFalse(field.required) self.assertEqual(field.format, 'default')