示例#1
0
    def test_headers_are_trimmed_csv(self):
        """
        Test that if the user upload a csv with columns containing heading or trailing space, the parser will trimmed it and
        then compare to schema.
        """
        fields = ['What', 'When', 'Who']
        dataset = self._create_dataset_from_rows([fields])
        schema = dataset.schema
        self.assertEqual(schema.headers, fields)

        # upload record
        csv_data = [
            ['What ', ' When', ' Who  '],
            ['Something', '2018-02-01', 'me'],
        ]
        file_ = helpers.rows_to_csv_file(csv_data)
        client = self.custodian_1_client
        url = reverse('api:dataset-upload', kwargs={'pk': dataset.pk})
        with open(file_) as fp:
            data = {
                'file': fp,
                'strict': True  # upload in strict mode
            }
            resp = client.post(url, data=data, format='multipart')
            self.assertEqual(status.HTTP_200_OK, resp.status_code)

            # verify stored data
            record = dataset.record_queryset.first()
            self.assertEqual(record.data.get('What'), 'Something')
            self.assertEqual(record.data.get('When'), '2018-02-01')
            self.assertEqual(record.data.get('Who'), 'me')
            # verify that the fields with space doesn't exists
            for f in csv_data[0]:
                self.assertIsNone(record.data.get(f))
示例#2
0
 def test_site_code_column_name(self):
     """
     Test that a column named 'site_code' can be used to extract the site code
     """
     csv_data = [[
         'site_code', 'Site Name', 'Description', 'Latitude', 'Longitude',
         'Datum', 'Attribute1', 'Attribute2'
     ], ['C1', 'Site 1', 'Description1', -32, 116, '', 'attr11', 'attr12'],
                 [
                     'C2', 'Site 2', 'Description2', -31, 117, '', 'attr21',
                     'attr22'
                 ]]
     csv_file = helpers.rows_to_csv_file(csv_data)
     project = self.project_1
     client = self.custodian_1_client
     url = reverse('api:upload-sites', kwargs={'pk': project.pk})
     self.assertEqual(0, Site.objects.filter(project=project).count())
     with open(csv_file) as fp:
         data = {'file': fp}
         resp = client.post(url, data=data, format='multipart')
         self.assertEqual(status.HTTP_200_OK, resp.status_code)
         qs = Site.objects.filter(project=project)
         self.assertEqual(len(csv_data) - 1, qs.count())
         self.assertEqual(['C1', 'C2'],
                          [s.code for s in qs.order_by('code')])
示例#3
0
    def test_permissions(self):
        """
        Only custodian or admin
        :return:
        """
        project = self.project_1
        custodian_client = self.custodian_1_client

        urls = [reverse('api:upload-sites', kwargs={'pk': project.pk})]
        access = {
            "forbidden": [
                self.anonymous_client, self.readonly_client,
                self.custodian_2_client
            ],
            "allowed": [self.admin_client, custodian_client]
        }
        data = {'file': 'dsddsds'}
        for client in access['forbidden']:
            for url in urls:
                self.assertIn(
                    client.post(url, data=data,
                                format='multipart').status_code,
                    [status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN])

        csv_file = helpers.rows_to_csv_file([['Site Code'], ['C1']])
        for client in access['allowed']:
            for url in urls:
                with open(csv_file) as fp:
                    self.assertIn(
                        client.post(url, data={
                            'file': fp
                        }, format='multipart').status_code,
                        [status.HTTP_200_OK])
    def test_generic_string_and_number_simple_csv(self):
        """
        Test that the infer detect numbers and integers type
        """
        columns = ['Name', 'Age', 'Weight', 'Comments']
        rows = [
            columns, ['Frederic', '56', '80.5', 'a comment'],
            ['Hilda', '24', '56', '']
        ]
        client = self.data_engineer_1_client
        file_ = helpers.rows_to_csv_file(rows)
        with open(file_, 'rb') as fp:
            payload = {
                'file': fp,
            }
            resp = client.post(self.url, data=payload, format='multipart')
            self.assertEqual(status.HTTP_200_OK, resp.status_code)
            # should be json
            self.assertEqual(resp.get('content-type'), 'application/json')
            received = resp.json()

            # name should be set with the file name
            self.assertIn('name', received)
            file_name = path.splitext(path.basename(fp.name))[0]
            self.assertEqual(file_name, received.get('name'))
            # type should be 'generic'
            self.assertIn('type', received)
            self.assertEqual('generic', received.get('type'))

            # data_package verification
            self.assertIn('data_package', received)
            self.verify_inferred_data(received)

            # verify schema
            schema_descriptor = Package(
                received.get('data_package')).resources[0].descriptor['schema']
            schema = utils_data_package.GenericSchema(schema_descriptor)
            self.assertEqual(len(schema.fields), len(columns))
            self.assertEqual(schema.field_names, columns)

            field = schema.get_field_by_name('Name')
            self.assertEqual(field.type, 'string')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')

            field = schema.get_field_by_name('Age')
            self.assertEqual(field.type, 'integer')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')

            field = schema.get_field_by_name('Weight')
            self.assertEqual(field.type, 'number')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')

            field = schema.get_field_by_name('Comments')
            self.assertEqual(field.type, 'string')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')
    def test_upload_csv_happy_path(self):
        csv_data = [
            ['Column A', 'Column B'],
            ['A1', 'B1'],
            ['A2', 'B2']
        ]
        file_ = helpers.rows_to_csv_file(csv_data)
        client = self.custodian_1_client
        self.assertEqual(0, self.ds.record_queryset.count())
        file_name = path.basename(file_)
        with open(file_) as fp:
            data = {
                'file': fp,
                'strict': True  # upload in strict mode
            }
            resp = client.post(self.url, data=data, format='multipart')
            self.assertEqual(status.HTTP_200_OK, resp.status_code)
            # The records should be saved in order of the row
            qs = self.ds.record_queryset.order_by('pk')
            self.assertEqual(len(csv_data) - 1, qs.count())

            index = 0
            record = qs[index]
            expected_data = {
                'Column A': 'A1',
                'Column B': 'B1',
            }
            self.assertEqual(expected_data, record.data)
            # test that source_info contains the file_name and row_counter
            source_info = record.source_info
            self.assertIsNotNone(source_info)
            expected_info = {
                'file_name': file_name,
                'row': index + 2
            }
            self.assertEqual(source_info, expected_info)

            index = 1
            record = qs[index]
            expected_data = {
                'Column A': 'A2',
                'Column B': 'B2',
            }
            self.assertEqual(expected_data, record.data)
            # test that source_info contains the file_name and row_counter
            source_info = record.source_info
            self.assertIsNotNone(source_info)
            expected_info = {
                'file_name': file_name,
                'row': index + 2
            }
            self.assertEqual(source_info, expected_info)

            self.assertEqual(self.project_1.record_count, len(csv_data) - 1)
            self.assertEqual(self.ds.record_count, len(csv_data) - 1)
示例#6
0
    def test_upload_csv_happy_path(self):
        csv_data = [[
            'Site Code', 'Site Name', 'Description', 'Latitude', 'Longitude',
            'Datum', 'Attribute1', 'Attribute2'
        ], ['C1', 'Site 1', 'Description1', -32, 116, '', 'attr11', 'attr12'],
                    [
                        'C2', 'Site 2', 'Description2', -31, 117, '', 'attr21',
                        'attr22'
                    ]]
        csv_file = helpers.rows_to_csv_file(csv_data)
        project = self.project_1
        client = self.custodian_1_client
        url = reverse('api:upload-sites', kwargs={'pk': project.pk})
        self.assertEqual(0, Site.objects.filter(project=project).count())
        with open(csv_file) as fp:
            data = {'file': fp}
            resp = client.post(url, data=data, format='multipart')
            self.assertEqual(status.HTTP_200_OK, resp.status_code)
            qs = Site.objects.filter(project=project)
            self.assertEqual(len(csv_data) - 1, qs.count())
            self.assertEqual(['C1', 'C2'],
                             [s.code for s in qs.order_by('code')])
            self.assertEqual(['Site 1', 'Site 2'],
                             [s.name for s in qs.order_by('name')])
            self.assertEqual(
                ['Description1', 'Description2'],
                [s.description for s in qs.order_by('description')])

            # test geom and attr
            s = qs.filter(code='C1').first()
            self.assertEqual((116, -32), (s.geometry.x, s.geometry.y))
            expected_attributes = {
                'Latitude': '-32',
                'Longitude': '116',
                'Datum': '',
                'Attribute1': 'attr11',
                'Attribute2': 'attr12'
            }

            self.assertEqual(expected_attributes, s.attributes)

            self.assertEqual(project.site_count, len(csv_data) - 1)
    def test_csv_with_excel_content_type(self):
        """
        Often on Windows a csv file comes with an excel content-type (e.g: 'application/vnd.ms-excel')
        Test that we handle the case.
        """
        view = InferDatasetView.as_view()
        columns = ['Name', 'Age', 'Weight', 'Comments']
        rows = [
            columns, ['Frederic', '56', '80.5', 'a comment'],
            ['Hilda', '24', '56', '']
        ]
        file_ = helpers.rows_to_csv_file(rows)
        factory = APIRequestFactory()
        with open(file_, 'rb') as fp:
            payload = {
                'file': fp,
            }
            # In order to hack the Content-Type of the multipart form data we need to use the APIRequestFactory and work
            # with the view directly. Can't use the classic API client.
            # hack the content-type of the request.
            data, content_type = factory._encode_data(payload,
                                                      format='multipart')
            if six.PY3:
                data = data.decode('utf-8')
            data = data.replace('Content-Type: text/csv',
                                'Content-Type: application/vnd.ms-excel')
            if six.PY3:
                data = data.encode('utf-8')
            request = factory.generic('POST',
                                      self.url,
                                      data,
                                      content_type=content_type)
            user = self.data_engineer_1_user
            token, _ = Token.objects.get_or_create(user=user)
            force_authenticate(request,
                               user=self.data_engineer_1_user,
                               token=token)
            resp = view(request).render()
            self.assertEqual(status.HTTP_200_OK, resp.status_code)
            # should be json
            self.assertEqual(resp.get('content-type'), 'application/json')
            if six.PY3:
                content = resp.content.decode('utf-8')
            else:
                content = resp.content
            received = json.loads(content)

            # name should be set with the file name
            self.assertIn('name', received)
            file_name = path.splitext(path.basename(fp.name))[0]
            self.assertEqual(file_name, received.get('name'))
            # type should be 'generic'
            self.assertIn('type', received)
            self.assertEqual('generic', received.get('type'))

            # data_package verification
            self.assertIn('data_package', received)
            self.verify_inferred_data(received)

            # verify schema
            schema_descriptor = Package(
                received.get('data_package')).resources[0].descriptor['schema']
            schema = utils_data_package.GenericSchema(schema_descriptor)
            self.assertEqual(len(schema.fields), len(columns))
            self.assertEqual(schema.field_names, columns)

            field = schema.get_field_by_name('Name')
            self.assertEqual(field.type, 'string')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')

            field = schema.get_field_by_name('Age')
            self.assertEqual(field.type, 'integer')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')

            field = schema.get_field_by_name('Weight')
            self.assertEqual(field.type, 'number')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')

            field = schema.get_field_by_name('Comments')
            self.assertEqual(field.type, 'string')
            self.assertFalse(field.required)
            self.assertEqual(field.format, 'default')