def test_infer_schema_from_another_csv_file(): '''Another sample CSV file test.''' path = '../test-data/test.csv' path = os.path.join(os.path.split(__file__)[0], path) abspath = os.path.abspath(path) schema = csv_utils.infer_schema_from_csv_file(abspath) fields = schema['fields'] assert len(fields) == 7 nose.tools.assert_equals( fields[0], { 'index': 0, 'name': 'datetime', 'temporal_extent': '2011-12-30T00:00:00/2011-12-30T00:00:00', 'type': 'datetime', } ) assert fields[1]['index'] == 1 assert fields[1]['name'] == 'timedelta' assert fields[1]['type'] == 'string' assert fields[2]['index'] == 2 assert fields[2]['name'] == 'integer' assert fields[2]['type'] == 'integer'
def test_infer_dates_in_schema_from_csv_file(): # get the absolute path to the test data file. path = '../test-data/data.csv' path = os.path.join(os.path.split(__file__)[0], path) abspath = os.path.abspath(path) schema = csv_utils.infer_schema_from_csv_file(abspath) nose.tools.assert_equals(schema['fields'][0]['type'], 'datetime')
def test_infer_schema_temporal_extent_raises_error(m): '''infer dates from a temporal extent, but raise an exception Test that infer_schema_from_csv_file handles exceptions well ''' m.side_effect = [ValueError(), TypeError, IOError, IndexError()] # get the absolute path to the test data file. path = '../test-data/data.csv' path = os.path.join(os.path.split(__file__)[0], path) abspath = os.path.abspath(path) #run the test 4 time for each of our possible exceptions raised #by temporal_extent for i in range(4): schema = csv_utils.infer_schema_from_csv_file(abspath) nose.tools.assert_equals(schema['fields'][0]['type'], 'datetime')
def _infer_schema_for_resource(resource): '''Return a JSON Table Schema for the given resource. This will guess column headers and types from the resource's CSV file. ''' # Note: Since this function is only called after uploading a file, # we assume the resource does have an uploaded file and this line will not # raise an exception. path = util.get_path_to_resource_file(resource) if not csv_utils.resource_is_csv_or_text_file(path): helpers.flash_notice( 'This file does not seem to be a csv or text file. ' 'You could try validating this file at http://csvlint.io' ) try: schema = csv_utils.infer_schema_from_csv_file(path) except exceptions.CouldNotReadCSVException: schema = {'fields': []} return schema
def test_infer_schema_from_csv_file(): '''Test that infer_schema_from_csv_file infers the correct schema from a sample CSV file. This should be broken up into different tests for different types of CSV file. For now we just have this. ''' # Get the absolute path to the test data file. path = '../test-data/lahmans-baseball-database/AllstarFull.csv' path = os.path.join(os.path.split(__file__)[0], path) abspath = os.path.abspath(path) schema = csv_utils.infer_schema_from_csv_file(abspath) assert schema == { 'fields': [ {'index': 0, 'name': 'playerID', 'type': 'string'}, {'25%': 1957.0, '50%': 1975.0, '75%': 1996.0, 'count': 4912.0, 'index': 1, 'max': 2013.0, 'mean': 1975.2168159609121, 'min': 1933.0, 'name': 'yearID', 'std': 23.055456639147902, 'type': 'integer'}, {'25%': 0.0, '50%': 0.0, '75%': 0.0, 'count': 4912.0, 'index': 2, 'max': 2.0, 'mean': 0.14128664495114007, 'min': 0.0, 'name': 'gameNum', 'std': 0.46806965450335747, 'type': 'integer'}, {'index': 3, 'name': 'gameID', 'type': 'string'}, {'index': 4, 'name': 'teamID', 'type': 'string'}, {'index': 5, 'name': 'lgID', 'type': 'string'}, {'25%': 1.0, '50%': 1.0, '75%': 1.0, 'count': 4875.0, 'index': 6, 'max': 1.0, 'mean': 0.78174358974358971, 'min': 0.0, 'name': 'GP', 'std': 0.41310477594222272, 'type': 'number'}, {'25%': 3.0, '50%': 5.0, '75%': 7.0, 'count': 1540.0, 'index': 7, 'max': 10.0, 'mean': 5.0519480519480515, 'min': 0.0, 'name': 'startingPos', 'std': 2.646100537485232, 'type': 'number'} ] }