def test_observation_with_easting_northing_zone_xls(self):
        """
        Scenario: File with column Easting, Northing and Zone
         Given that a column named Easting , Northing and Zone exist
         Then the dataset type should be inferred as Observation
         And the type of Easting and Northing should be 'number'
         And Easting and Northing should be set as required
         And they should be tagged with the appropriate biosys tag
         And Zone should be of type integer and required.
        """
        columns = ['What', 'Easting', 'Northing', 'Zone', 'Comments']
        rows = [
            columns, ['Something', 12563.233, 568932.345, 50, 'A dog'],
            [
                'Observation with easting/northing as string', '12563.233',
                '568932.345', 50, 'A dog'
            ]
        ]
        client = self.custodian_1_client
        file_ = helpers.rows_to_xlsx_file(rows)
        with open(file_, 'rb') as fp:
            payload = {
                'file': fp,
            }
            resp = client.post(self.url, data=payload, format='multipart')
            self.assertEqual(status.HTTP_200_OK, resp.status_code)
            received = resp.json()
            # should be an observation
            self.assertEqual(Dataset.TYPE_OBSERVATION, received.get('type'))
            # data_package verification
            self.assertIn('data_package', received)

            # verify fields attributes
            schema_descriptor = Package(
                received.get('data_package')).resources[0].descriptor['schema']
            schema = utils_data_package.GenericSchema(schema_descriptor)
            east_field = schema.get_field_by_name('Easting')
            self.assertIsNotNone(east_field)
            self.assertEqual(east_field.type, 'number')
            self.assertTrue(east_field.required)
            biosys = east_field.get('biosys')
            self.assertIsNotNone(biosys)
            biosys_type = biosys.get('type')
            self.assertEqual(biosys_type, BiosysSchema.EASTING_TYPE_NAME)

            north_field = schema.get_field_by_name('Northing')
            self.assertIsNotNone(north_field)
            self.assertEqual(north_field.type, 'number')
            self.assertTrue(north_field.required)
            biosys = north_field.get('biosys')
            self.assertIsNotNone(biosys)
            biosys_type = biosys.get('type')
            self.assertEqual(biosys_type, BiosysSchema.NORTHING_TYPE_NAME)

            zone_field = schema.get_field_by_name('Zone')
            self.assertIsNotNone(zone_field)
            self.assertEqual(zone_field.type, 'integer')
            self.assertTrue(zone_field.required)
            biosys = zone_field.get('biosys')
            self.assertIsNotNone(biosys)
            biosys_type = biosys.get('type')
            self.assertEqual(biosys_type, BiosysSchema.ZONE_TYPE_NAME)

            # test that we can save the dataset as returned
            self.verify_inferred_data(received)
    def test_observation_with_genus_species_infra_rank_and_infra_name_only_xls(
            self):
        """
        Scenario: File with column Latitude, Longitude, Genus, Species, Infraspecific Rank and Infraspecific Name
                  should be inferred as species observation
         Given that a column named Latitude, Longitude, Genus, Species Infraspecific Rank and Infraspecific Name exists
         Then the dataset type should be of type speciesObservation
         And the column 'Genus' should be of type string, set as required and tag as biosys type genus
         And the column 'Species' should be of type string, set as required and tag as biosys type species
         And the column 'Infraspecific Rank' should be of type string, set as not required and tag as biosys type InfraSpecificRank
         And the column 'Infraspecific Name' should be of type string, set as not required and tag as biosys type InfraSpecificName
        """
        columns = [
            'What', 'When', 'Latitude', 'Longitude', 'Genus', 'Species',
            'Infraspecific Rank', 'Infraspecific Name', 'Comments'
        ]
        rows = [
            columns,
            [
                'I saw a dog', '2018-02-02', -32, 117.75, 'Canis', 'lupus',
                'subsp. familiaris', '', None
            ],
            [
                'I saw a Chubby bat', '2017-01-02', -32, 116.7, 'Chubby',
                'bat', '', '', 'Amazing!'
            ],
            [
                'I saw nothing', '2018-01-02', -32.34, 116.7, None, None, None,
                None, None
            ],
        ]
        client = self.custodian_1_client
        file_ = helpers.rows_to_xlsx_file(rows)
        with open(file_, 'rb') as fp:
            payload = {
                'file': fp,
            }
            resp = client.post(self.url, data=payload, format='multipart')
            self.assertEqual(status.HTTP_200_OK, resp.status_code)
            received = resp.json()
            # should be a species observation
            self.assertEqual(Dataset.TYPE_SPECIES_OBSERVATION,
                             received.get('type'))
            self.assertIn('data_package', received)
            schema_descriptor = Package(
                received.get('data_package')).resources[0].descriptor['schema']
            schema = utils_data_package.GenericSchema(schema_descriptor)
            # field attributes
            # genus
            genus = schema.get_field_by_name('Genus')
            self.assertIsNotNone(genus)
            self.assertEqual(genus.type, 'string')
            self.assertTrue(genus.required)
            biosys = genus.get('biosys')
            self.assertIsNotNone(biosys)
            biosys_type = biosys.get('type')
            self.assertEqual(biosys_type, BiosysSchema.GENUS_TYPE_NAME)
            # species
            species = schema.get_field_by_name('Species')
            self.assertIsNotNone(species)
            self.assertEqual(species.type, 'string')
            self.assertTrue(species.required)
            biosys = species.get('biosys')
            self.assertIsNotNone(biosys)
            biosys_type = biosys.get('type')
            self.assertEqual(biosys_type, BiosysSchema.SPECIES_TYPE_NAME)
            # infra rank
            infra_rank = schema.get_field_by_name('Infraspecific Rank')
            self.assertIsNotNone(infra_rank)
            self.assertEqual(infra_rank.type, 'string')
            self.assertFalse(infra_rank.required)
            biosys = infra_rank.get('biosys')
            self.assertIsNotNone(biosys)
            biosys_type = biosys.get('type')
            self.assertEqual(biosys_type,
                             BiosysSchema.INFRA_SPECIFIC_RANK_TYPE_NAME)
            # infra name
            infra_name = schema.get_field_by_name('Infraspecific Name')
            self.assertIsNotNone(infra_name)
            self.assertEqual(infra_name.type, 'string')
            self.assertFalse(infra_name.required)
            biosys = infra_name.get('biosys')
            self.assertIsNotNone(biosys)
            biosys_type = biosys.get('type')
            self.assertEqual(biosys_type,
                             BiosysSchema.INFRA_SPECIFIC_NAME_TYPE_NAME)

            # test that we can create a dataset with the returned data
            self.verify_inferred_data(received)
    def test_csv_with_excel_content_type(self):
        """
        Often on Windows a csv file comes with an excel content-type (e.g: 'application/vnd.ms-excel')
        Test that we handle the case.
        """
        view = InferDatasetView.as_view()
        columns = ['Name', 'Age', 'Weight', 'Comments']
        rows = [
            columns, ['Frederic', '56', '80.5', 'a comment'],
            ['Hilda', '24', '56', '']
        ]
        file_ = helpers.rows_to_csv_file(rows)
        factory = APIRequestFactory()
        with open(file_, 'rb') as fp:
            payload = {
                'file': fp,
            }
            # In order to hack the Content-Type of the multipart form data we need to use the APIRequestFactory and work
            # with the view directly. Can't use the classic API client.
            # hack the content-type of the request.
            data, content_type = factory._encode_data(payload,
                                                      format='multipart')
            if six.PY3:
                data = data.decode('utf-8')
            data = data.replace('Content-Type: text/csv',
                                'Content-Type: application/vnd.ms-excel')
            if six.PY3:
                data = data.encode('utf-8')
            request = factory.generic('POST',
                                      self.url,
                                      data,
                                      content_type=content_type)
            user = self.data_engineer_1_user
            token, _ = Token.objects.get_or_create(user=user)
            force_authenticate(request,
                               user=self.data_engineer_1_user,
                               token=token)
            resp = view(request).render()
            self.assertEqual(status.HTTP_200_OK, resp.status_code)
            # should be json
            self.assertEqual(resp.get('content-type'), 'application/json')
            if six.PY3:
                content = resp.content.decode('utf-8')
            else:
                content = resp.content
            received = json.loads(content)

            # name should be set with the file name
            self.assertIn('name', received)
            file_name = path.splitext(path.basename(fp.name))[0]
            self.assertEqual(file_name, received.get('name'))
            # type should be 'generic'
            self.assertIn('type', received)
            self.assertEqual('generic', received.get('type'))

            # data_package verification
            self.assertIn('data_package', received)
            self.verify_inferred_data(received)

            # verify schema
            schema_descriptor = Package(
                received.get('data_package')).resources[0].descriptor['schema']
            schema = utils_data_package.GenericSchema(schema_descriptor)
            self.assertEqual(len(schema.fields), len(columns))
            self.assertEqual(schema.field_names, columns)

            field = schema.get_field_by_name('Name')
            self.assertEqual(field.type, 'string')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')

            field = schema.get_field_by_name('Age')
            self.assertEqual(field.type, 'integer')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')

            field = schema.get_field_by_name('Weight')
            self.assertEqual(field.type, 'number')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')

            field = schema.get_field_by_name('Comments')
            self.assertEqual(field.type, 'string')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')