def test_determine_x_and_y_fields(self): test_csv_file = io.StringIO('lon,lat\n' '123.4,50.2\n') prepared_csv = prepare_csv_rows(test_csv_file) self.assertEqual(prepared_csv['coord_fields'], ('lon', 'lat')) test_csv_file = io.StringIO('lon,something_else\n' '123.4,50.2\n') prepared_csv = prepare_csv_rows(test_csv_file) self.assertEqual(prepared_csv['coord_fields'], (None, None))
def append(self, request, **kwargs): self.is_authenticated(request) try: dataset_id = kwargs.get('dataset_id') del kwargs['dataset_id'] bundle = self.build_bundle(request=request) obj = self.obj_get(bundle, **self.remove_api_resource_names(kwargs)) csv_info = json.loads(request.POST.get('csv_info')) additional_fields = json.loads(request.POST.get('fields')) row_set = prepare_csv_rows(obj.file) sample_row = next(row_set.sample) table_name = create_database_table(sample_row, dataset_id, append=True) add_or_update_database_fields(table_name, additional_fields) populate_data(table_name, row_set) bundle.data['table_name'] = table_name populate_point_data(dataset_id, csv_info) obj.delete() # Temporary file has been moved to database, safe to delete except InternalError: logger.exception() raise ImmediateHttpResponse(HttpBadRequest('Error deploying file to database.')) return self.create_response(request, bundle)
def test_data_type_inference(self): test_csv_file = io.StringIO( 'header_one,header_two,header_three,header_four\n' 'one,1,,1\n' 'two,2,,2.1\n') prepared_csv = prepare_csv_rows(test_csv_file) dtypes = prepared_csv['data_types'] self.assertEqual(dtypes[0].lower(), 'string') self.assertEqual(dtypes[1].lower(), 'integer') self.assertEqual(dtypes[2].lower(), 'empty') self.assertEqual(dtypes[3].lower(), 'decimal')
def append(self, request, **kwargs): self.is_authenticated(request) try: dataset_id = kwargs.pop('dataset_id', None) bundle = self.build_bundle(request=request) obj = self.obj_get(bundle, **self.remove_api_resource_names(kwargs)) csv_info = json.loads(request.POST.get('csv_info')) additional_fields = json.loads(request.POST.get('fields')) prepared_csv = prepare_csv_rows(obj.file, csv_info) table_name = create_database_table( prepared_csv['row_set'], csv_info, dataset_id, append=True, additional_fields=additional_fields) self.populate_point_data(dataset_id, csv_info) bundle.data['table_name'] = table_name obj.delete( ) # Temporary file has been moved to database, safe to delete except Exception as e: logger.exception(e) raise ImmediateHttpResponse( HttpBadRequest(content=json.dumps( derive_error_response_data(e)), content_type='application/json')) return self.create_response(request, bundle)
def deploy(self, request, **kwargs): """ The deploy endpoint, at ``{tablo_server}/api/v1/temporary-files/{uuid}/{dataset_id}/deploy/`` deploys the file specified by {uuid} into a database table named after the {dataset_id}. The {dataset_id} must be unique for the instance of Tablo. With the deploy endpoint, this is the start of what Tablo considers an import. The data will be temporarily stored in an import table until the finalize endpoint for the dataset_id is called. POST messages to the deploy endpoint should include the following data: **csv_info** Information about the CSV file. This is generally that information obtained through the describe endpoint, but can be modified to send additional information or modify it. **fields** A list of field JSON objects in the following format: .. code-block:: json { "name": "field_name", "type": "text", "value": "optional value", "required": true } The value can be specified if the field is a constant value throughout the table. This can be use for adding audit information. :return: An empty HTTP 200 response if the deploy was successful. An error response if otherwise. """ self.is_authenticated(request) try: dataset_id = kwargs.get('dataset_id') del kwargs['dataset_id'] bundle = self.build_bundle(request=request) obj = self.obj_get(bundle, **self.remove_api_resource_names(kwargs)) csv_info = json.loads(request.POST.get('csv_info')) additional_fields = json.loads(request.POST.get('fields')) # Use separate iterator of table rows to not exaust the main one optional_fields = determine_optional_fields(prepare_csv_rows(obj.file)) row_set = prepare_csv_rows(obj.file) sample_row = next(row_set.sample) table_name = create_database_table(sample_row, dataset_id, optional_fields=optional_fields) populate_data(table_name, row_set) add_or_update_database_fields(table_name, additional_fields) bundle.data['table_name'] = table_name add_point_column(dataset_id) populate_point_data(dataset_id, csv_info) obj.delete() # Temporary file has been moved to database, safe to delete except InternalError: logger.exception() raise ImmediateHttpResponse(HttpBadRequest('Error deploying file to database.')) return self.create_response(request, bundle)
def describe(self, request, **kwargs): """ Describe, located at the ``{tablo-server}/api/v1/temporary-files/{uuid}/describe`` endpoint, will describe the uploaded CSV file. This allows you to know the column names and data types that were found within the file. :return: A JSON object in the following format: .. code-block:: json { "fieldNames": ["field one", "field two", "latitude", "longitude"], "dataTypes": ["STRING", "INTEGER", "DOUBLE", "DOUBLE"], "optionalFields": ["field one"], "xColumn": "longitude", "yColumn": "latitude", "filename": "uploaded.csv" } **fieldNames** A list of field (column) names within the CSV **dataTypes** A list of data types for each of the columns. The index of this list will match the index of the fieldNames list. **optionalFields** A list of fields that had empty values, and are taken to be optional. **xColumn** The best guess at which column contains X spatial coordinates. **yColumn** The best guess at which column contains Y spatial coordinates. **filename** The name of the file being described """ self.is_authenticated(request) bundle = self.build_bundle(request=request) obj = self.obj_get(bundle, **self.remove_api_resource_names(kwargs)) if obj.extension == 'csv': csv_file_name = obj.file.name else: raise ImmediateHttpResponse(HttpBadRequest('Unsupported file format.')) row_set = prepare_csv_rows(obj.file) sample_row = next(row_set.sample) bundle.data['fieldNames'] = [cell.column for cell in sample_row] bundle.data['dataTypes'] = [TYPE_REGEX.sub('', str(cell.type)) for cell in sample_row] bundle.data['optionalFields'] = determine_optional_fields(row_set) x_field, y_field = determine_x_and_y_fields(sample_row) if x_field and y_field: bundle.data['xColumn'] = x_field bundle.data['yColumn'] = y_field bundle.data.update({'file_name': csv_file_name}) return self.create_response(request, bundle)
def deploy(self, request, **kwargs): """ The deploy endpoint, at ``{tablo_server}/api/v1/temporary-files/{uuid}/{dataset_id}/deploy/`` deploys the file specified by {uuid} into a database table named after the {dataset_id}. The {dataset_id} must be unique for the instance of Tablo. With the deploy endpoint, this is the start of what Tablo considers an import. The data will be temporarily stored in an import table until the finalize endpoint for the dataset_id is called. POST messages to the deploy endpoint should include the following data: **csv_info** Information about the CSV file. This is generally that information obtained through the describe endpoint, but can be modified to send additional information or modify it. **fields** A list of field JSON objects in the following format: .. code-block:: json { "name": "field_name", "type": "text", "required": true } The value can be specified if the field is a constant value throughout the table. This can be use for adding audit information. :return: An empty HTTP 200 response if the deploy was successful. An error response if otherwise. """ self.is_authenticated(request) try: dataset_id = kwargs.pop('dataset_id', None) bundle = self.build_bundle(request=request) obj = self.obj_get(bundle, **self.remove_api_resource_names(kwargs)) csv_info = json.loads(request.POST.get('csv_info')) additional_fields = json.loads(request.POST.get('fields')) prepared_csv = prepare_csv_rows(obj.file, csv_info) table_name = create_database_table( prepared_csv['row_set'], csv_info, dataset_id, additional_fields=additional_fields) add_geometry_column(dataset_id) self.populate_point_data(dataset_id, csv_info) bundle.data['table_name'] = table_name obj.delete( ) # Temporary file has been moved to database, safe to delete except Exception as e: logger.exception(e) raise ImmediateHttpResponse( HttpBadRequest(content=json.dumps( derive_error_response_data(e)), content_type='application/json')) return self.create_response(request, bundle)
def describe(self, request, **kwargs): """ Describe, located at the ``{tablo-server}/api/v1/temporary-files/{uuid}/describe`` endpoint, will describe the uploaded CSV file. This allows you to know the column names and data types that were found within the file. :return: A JSON object in the following format: .. code-block:: json { "fieldNames": ["field one", "field two", "latitude", "longitude"], "dataTypes": ["String", "Integer", "Decimal", "Decimal"], "optionalFields": ["field one"], "xColumn": "longitude", "yColumn": "latitude", "filename": "uploaded.csv" } **fieldNames** A list of field (column) names within the CSV **dataTypes** A list of data types for each of the columns. The index of this list will match the index of the fieldNames list. **optionalFields** A list of fields that had empty values, and are taken to be optional. **xColumn** The best guess at which column contains X spatial coordinates. **yColumn** The best guess at which column contains Y spatial coordinates. **filename** The name of the file being described """ self.is_authenticated(request) bundle = self.build_bundle(request=request) obj = self.obj_get(bundle, **self.remove_api_resource_names(kwargs)) try: if obj.extension == 'csv': csv_file_name = obj.file.name else: raise InvalidFileError('Unsupported file format', extension=obj.extension) except InvalidFileError as e: raise ImmediateHttpResponse( HttpBadRequest(content=json.dumps( derive_error_response_data(e, code=BAD_DATA)), content_type='application/json')) csv_info = json.loads(request.POST.get('csv_info') or '{}') or None prepared_csv = prepare_csv_rows(obj.file, csv_info) row_set = prepared_csv['row_set'] if not len(row_set): raise ImmediateHttpResponse( HttpBadRequest(content=json.dumps( derive_error_response_data(InvalidFileError( 'File is empty', lines=0), code=BAD_DATA)), content_type='application/json')) bundle.data['fieldNames'] = row_set.columns.to_list() bundle.data['dataTypes'] = prepared_csv['data_types'] bundle.data['optionalFields'] = prepared_csv['optional_fields'] x_field, y_field = prepared_csv['coord_fields'] if x_field and y_field: bundle.data['xColumn'] = x_field bundle.data['yColumn'] = y_field bundle.data.update({'file_name': csv_file_name}) return self.create_response(request, bundle)