def test_padded_ints_coerce_fail(self): try: typeinference.normalize_column_type([u'0001', u'0997', u'8.7', u''], normal_type=int) except InvalidValueForTypeException, e: self.assertEqual(e.index, 0) self.assertEqual(e.value, '0001') self.assertEqual(e.normal_type, int)
def test_ints_coerce_fail(self): try: typeinference.normalize_column_type([u'1', u'-87', u'418000000', u'', u'TRUE'], normal_type=int) except InvalidValueForTypeException, e: self.assertEqual(e.index, 4) self.assertEqual(e.value, 'TRUE') self.assertEqual(e.normal_type, int)
def test_floats_coerce_fail(self): try: typeinference.normalize_column_type([u'1', u'-87.413', u'418000000.0', u'Hello, world!'], normal_type=float) except InvalidValueForTypeException, e: self.assertEqual(e.index, 3) self.assertEqual(e.value, 'Hello, world!') self.assertEqual(e.normal_type, float)
def test_nulls_coerce_fail(self): try: typeinference.normalize_column_type([u'n/a', u'NA', u'.', u'1.7', u'none', u''], normal_type=NoneType) except InvalidValueForTypeException, e: self.assertEqual(e.index, 3) self.assertEqual(e.value, '1.7') self.assertEqual(e.normal_type, NoneType)
def test_padded_ints_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type([u'0001', u'0997', u'8.7', u''], normal_type=int) self.assertEqual(e.exception.index, 0) self.assertEqual(e.exception.value, '0001') self.assertEqual(e.exception.normal_type, int)
def test_dates_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type([u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08 16:14:45', u'4:45 AM'], normal_type=datetime.datetime) self.assertEqual(e.exception.index, 3) self.assertEqual(e.exception.value, '4:45 AM') self.assertEqual(e.exception.normal_type, datetime.datetime)
def test_nulls_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type([u'n/a', u'NA', u'.', u'1.7', u'none', u''], normal_type=NoneType) self.assertEqual(e.exception.index, 3) self.assertEqual(e.exception.value, '1.7') self.assertEqual(e.exception.normal_type, NoneType)
def test_booleans_coerce_fail(self): try: typeinference.normalize_column_type([u'False', u'TRUE', u'FALSE', u'17', u''], normal_type=bool) except InvalidValueForTypeException, e: self.assertEqual(e.index, 3) self.assertEqual(e.value, '17') self.assertEqual(e.normal_type, bool)
def test_times_coerce_fail(self): try: typeinference.normalize_column_type([u'4:40 AM', u'03:45:00', u'16:14:45', u'1,000,000'], normal_type=datetime.time) except InvalidValueForTypeException, e: self.assertEqual(e.index, 3) self.assertEqual(e.value, '1,000,000') self.assertEqual(e.normal_type, datetime.time)
def test_booleans_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type([u'False', u'TRUE', u'FALSE', u'17', u''], normal_type=bool) self.assertEqual(e.exception.index, 3) self.assertEqual(e.exception.value, '17') self.assertEqual(e.exception.normal_type, bool)
def test_ints_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type([u'1', u'-87', u'418000000', u'', u'TRUE'], normal_type=int) self.assertEqual(e.exception.index, 4) self.assertEqual(e.exception.value, 'TRUE') self.assertEqual(e.exception.normal_type, int)
def test_floats_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type([u'1', u'-87.413', u'418000000.0', u'Hello, world!'], normal_type=float) self.assertEqual(e.exception.index, 3) self.assertEqual(e.exception.value, 'Hello, world!') self.assertEqual(e.exception.normal_type, float)
def test_times_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type([u'4:40 AM', u'03:45:00', u'16:14:45', u'1,000,000'], normal_type=datetime.time) self.assertEqual(e.exception.index, 3) self.assertEqual(e.exception.value, '1,000,000') self.assertEqual(e.exception.normal_type, datetime.time)
def test_padded_ints_coerce_fail(self): try: typeinference.normalize_column_type( [u'0001', u'0997', u'8.7', u''], normal_type=int) except InvalidValueForTypeException, e: self.assertEqual(e.index, 0) self.assertEqual(e.value, '0001') self.assertEqual(e.normal_type, int)
def test_ints_coerce_fail(self): try: typeinference.normalize_column_type( [u'1', u'-87', u'418000000', u'', u'TRUE'], normal_type=int) except InvalidValueForTypeException, e: self.assertEqual(e.index, 4) self.assertEqual(e.value, 'TRUE') self.assertEqual(e.normal_type, int)
def test_booleans_coerce_fail(self): try: typeinference.normalize_column_type( [u'False', u'TRUE', u'FALSE', u'17', u''], normal_type=bool) except InvalidValueForTypeException, e: self.assertEqual(e.index, 3) self.assertEqual(e.value, '17') self.assertEqual(e.normal_type, bool)
def test_ints_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type( [u'1', u'-87', u'418000000', u'', u'TRUE'], normal_type=int) self.assertEqual(e.exception.index, 4) self.assertEqual(e.exception.value, 'TRUE') self.assertEqual(e.exception.normal_type, int)
def test_booleans_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type( [u'False', u'TRUE', u'FALSE', u'17', u''], normal_type=bool) self.assertEqual(e.exception.index, 3) self.assertEqual(e.exception.value, '17') self.assertEqual(e.exception.normal_type, bool)
def test_padded_ints_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type( [u'0001', u'0997', u'8.7', u''], normal_type=int) self.assertEqual(e.exception.index, 0) self.assertEqual(e.exception.value, '0001') self.assertEqual(e.exception.normal_type, int)
def test_nulls_coerce_fail(self): try: typeinference.normalize_column_type( [u'n/a', u'NA', u'.', u'1.7', u'none', u''], normal_type=NoneType) except InvalidValueForTypeException, e: self.assertEqual(e.index, 3) self.assertEqual(e.value, '1.7') self.assertEqual(e.normal_type, NoneType)
def test_times_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type( [u'4:40 AM', u'03:45:00', u'16:14:45', u'1,000,000'], normal_type=datetime.time) self.assertEqual(e.exception.index, 3) self.assertEqual(e.exception.value, '1,000,000') self.assertEqual(e.exception.normal_type, datetime.time)
def test_nulls_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type( [u'n/a', u'NA', u'.', u'1.7', u'none', u''], normal_type=NoneType) self.assertEqual(e.exception.index, 3) self.assertEqual(e.exception.value, '1.7') self.assertEqual(e.exception.normal_type, NoneType)
def test_floats_coerce_fail(self): try: typeinference.normalize_column_type( [u'1', u'-87.413', u'418000000.0', u'Hello, world!'], normal_type=float) except InvalidValueForTypeException, e: self.assertEqual(e.index, 3) self.assertEqual(e.value, 'Hello, world!') self.assertEqual(e.normal_type, float)
def test_floats_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type( [u'1', u'-87.413', u'418000000.0', u'Hello, world!'], normal_type=float) self.assertEqual(e.exception.index, 3) self.assertEqual(e.exception.value, 'Hello, world!') self.assertEqual(e.exception.normal_type, float)
def test_dates_coerce_fail(self): try: typeinference.normalize_column_type([u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08 16:14:45', u'4:45 AM'], normal_type=datetime.datetime) except InvalidValueForTypeException as e: self.assertEqual(e.index, 3) self.assertEqual(e.value, '4:45 AM') self.assertEqual(e.normal_type, datetime.datetime) else: raise AssertionError('Expected InvalidValueForTypeException')
def test_times_coerce_fail(self): try: typeinference.normalize_column_type( [u'4:40 AM', u'03:45:00', u'16:14:45', u'1,000,000'], normal_type=datetime.time) except InvalidValueForTypeException, e: self.assertEqual(e.index, 3) self.assertEqual(e.value, '1,000,000') self.assertEqual(e.normal_type, datetime.time)
def test_dates_coerce_fail(self): with self.assertRaises(InvalidValueForTypeException) as e: typeinference.normalize_column_type([ u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08 16:14:45', u'4:45 AM' ], normal_type=datetime.datetime) self.assertEqual(e.exception.index, 3) self.assertEqual(e.exception.value, '4:45 AM') self.assertEqual(e.exception.normal_type, datetime.datetime)
def test_jeremy_singer_vine_datetimes(self): """ This obscure test named after Jeremy Singer-Vine, who discovered it. """ self.assertEqual( (six.text_type, [u'P', u'H', u'H']), typeinference.normalize_column_type([u'P', u'H', u'H']))
def __init__(self, order, name, l, normal_type=InvalidType, blanks_as_nulls=True, infer_types=True): """ Construct a column from a sequence of values. If normal_type is not InvalidType, inference will be skipped and values assumed to have already been normalized. If infer_types is False, type inference will be skipped and the type assumed to be unicode. """ if normal_type != InvalidType: t = normal_type data = l elif not infer_types: t = six.text_type data = l else: t, data = typeinference.normalize_column_type( l, blanks_as_nulls=blanks_as_nulls) list.__init__(self, data) self.order = order self.name = name or '_unnamed' # empty column names don't make sense self.type = t
def test_datetimes_and_times(self): self.assertEqual((unicode, [ 'Jan 1, 2008 at 4:40 AM', '2010-01-27T03:45:00', '16:14:45', None ]), typeinference.normalize_column_type([ 'Jan 1, 2008 at 4:40 AM', '2010-01-27T03:45:00', '16:14:45', '' ]))
def test_times(self): self.assertEqual((datetime.time, [ datetime.time(4, 40, 0), datetime.time(3, 45, 0), datetime.time(16, 14, 45), None ]), typeinference.normalize_column_type( [u'4:40 AM', u'03:45:00', u'16:14:45', u'']))
def test_strings(self): self.assertEqual((six.text_type, [ u'Chicago Tribune', u'435 N Michigan ave', u'Chicago, IL', None ]), typeinference.normalize_column_type([ u'Chicago Tribune', u'435 N Michigan ave', u'Chicago, IL', u'' ]))
def __init__(self, row_number, column_name, value, normal_type): self.row_number = row_number self.column_name = column_name self.value = value self.normal_type = normal_type self.new_type = normalize_column_type([value])[0] msg = 'Row %i, column "%s": Unable to convert "%s" to %s. New type is %s.' % (row_number, column_name, value, normal_type.__name__, self.new_type.__name__) super(InferredNormalFalsifiedException, self).__init__(msg)
def test_dates(self): self.assertEqual((datetime.date, [ datetime.date(2008, 1, 1), datetime.date(2010, 1, 27), datetime.date(2008, 3, 1), None ]), typeinference.normalize_column_type( [u'Jan 1, 2008', u'2010-01-27', u'3/1/08', u'']))
def test_strings_coerce(self): self.assertEqual( (unicode, [ u'Chicago Tribune', u'435 N Michigan ave', u'Chicago, IL', None ]), typeinference.normalize_column_type([ u'Chicago Tribune', u'435 N Michigan ave', u'Chicago, IL', u'' ], normal_type=unicode))
def test_datetimes_and_times(self): self.assertEqual((six.text_type, [ u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'16:14:45', None ]), typeinference.normalize_column_type([ u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'16:14:45', u'' ]))
def test_datetimes_and_dates(self): self.assertEqual((datetime.datetime, [ datetime.datetime(2008, 1, 1, 4, 40, 0), datetime.datetime(2010, 1, 27, 3, 45, 0), datetime.datetime(2008, 3, 1, 0, 0, 0), None ]), typeinference.normalize_column_type([ u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08', u'' ]))
def test_datetimes_coerce(self): self.assertEqual( (datetime.datetime, [ datetime.datetime(2008, 1, 1, 4, 40, 0), datetime.datetime(2010, 1, 27, 3, 45, 0), datetime.datetime(2008, 3, 1, 16, 14, 45), None ]), typeinference.normalize_column_type([ u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08 16:14:45', u'' ], normal_type=datetime.datetime))
def __init__(self, order, name, l, normal_type=InvalidType, blanks_as_nulls=True, type_inference=True): """ Construct a column from a sequence of values. If normal_type is not InvalidType, inference will be skipped and values assumed to have already been normalized. """ if not type_inference or normal_type != InvalidType: t = normal_type data = l else: t, data = typeinference.normalize_column_type(l, blanks_as_nulls=blanks_as_nulls) list.__init__(self, data) self.order = order self.name = name or '_unnamed' # empty column names don't make sense self.type = t
def __init__(self, order, name, l, normal_type=InvalidType): """ Construct a column from a sequence of values. If normal_type is not InvalidType, inference will be skipped and values assumed to have already been normalized. """ if normal_type != InvalidType: t = normal_type data = l else: t, data = typeinference.normalize_column_type(l) list.__init__(self, data) self.order = order self.name = name or '_unnamed' # empty column names don't make sense self.type = t
def __init__(self, order, name, l, normal_type=InvalidType): """ Construct a column from a sequence of values. If normal_type is not None, inference will be skipped and values assumed to have already been normalized. """ if normal_type != InvalidType: t = normal_type data = l else: t, data = typeinference.normalize_column_type(l) list.__init__(self, data) self.order = order self.name = name or '_unnamed' # empty column names don't make sense self.type = t self._compute_nullable() self._compute_max_length()
def test_ints_floats(self): self.assertEqual((float, [1.01, -87, 418000000, None]), typeinference.normalize_column_type([u'1.01', u'-87', u'418000000', u'']))
def test_strings_coerce(self): self.assertEqual((unicode, [u'Chicago Tribune', u'435 N Michigan ave', u'Chicago, IL', None]), typeinference.normalize_column_type([u'Chicago Tribune', u'435 N Michigan ave', u'Chicago, IL', u''], normal_type=unicode))
def test_comma_floats(self): self.assertEqual((float, [1.01, -87.413, 418000000.0, None]), typeinference.normalize_column_type([u'1.01', u'-87.413', u'418,000,000.0', u'']))
def test_floats_coerce(self): self.assertEqual((float, [1.01, -87.413, 418000000.0, None]), typeinference.normalize_column_type([u'1.01', u'-87.413', u'418000000.0', u''], normal_type=float))
def test_nulls_coerce(self): self.assertEqual((NoneType, [None, None, None, None, None, None]), typeinference.normalize_column_type( [u'n/a', u'NA', u'.', u'null', u'none', u''], normal_type=NoneType))
def test_ints_coerce(self): self.assertEqual((int, [1, -87, 418000000, None]), typeinference.normalize_column_type( [u'1', u'-87', u'418000000', u''], normal_type=int))
def test_padded_ints_coerce(self): self.assertEqual((unicode, [u'0001', u'0997', u'8.7', None]), typeinference.normalize_column_type([u'0001', u'0997', u'8.7', u''], normal_type='unicode'))
def test_comma_ints(self): self.assertEqual((int, [1, -87, 418000000, None]), typeinference.normalize_column_type( [u'1', u'-87', u'418,000,000', u'']))
def test_ints_coerce(self): self.assertEqual((int, [1, -87, 418000000, None]), typeinference.normalize_column_type([u'1', u'-87', u'418000000', u''], normal_type=int))
def test_datetimes_and_times(self): self.assertEqual((unicode, [u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'16:14:45', None]), typeinference.normalize_column_type([u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'16:14:45', u'']))
def test_datetimes_and_dates_coerce(self): self.assertEqual((datetime.datetime, [datetime.datetime(2008, 1, 1, 4, 40, 0), datetime.datetime(2010, 1, 27, 3, 45, 0), datetime.datetime(2008, 3, 1, 0, 0, 0), None]), typeinference.normalize_column_type([u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08', u''], normal_type=datetime.datetime))
def test_nulls_coerce(self): self.assertEqual((NoneType, [None, None, None, None, None, None]), typeinference.normalize_column_type([u'n/a', u'NA', u'.', u'null', u'none', u''], normal_type=NoneType))
def test_mixed(self): self.assertEqual((unicode, [u'Chicago Tribune', u'-87.413', u'418000000', None]), typeinference.normalize_column_type([u'Chicago Tribune', u'-87.413', u'418000000', u'']))
def test_padded_ints_coerce(self): self.assertEqual((six.text_type, [u'0001', u'0997', u'8.7', None]), typeinference.normalize_column_type( [u'0001', u'0997', u'8.7', u''], normal_type='six.text_type'))
def test_booleans_coerce(self): self.assertEqual((bool, [False, True, False, True, None]), typeinference.normalize_column_type([u'False', u'TRUE', u'FALSE', u'yes', u''], normal_type=bool))
def test_comma_ints(self): self.assertEqual((int, [1, -87, 418000000, None]), typeinference.normalize_column_type([u'1', u'-87', u'418,000,000', u'']))
def test_datetimes(self): self.assertEqual((datetime.datetime, [datetime.datetime(2008, 1, 1, 4, 40, 0), datetime.datetime(2010, 1, 27, 3, 45, 0), datetime.datetime(2008, 3, 1, 16, 14, 45), None]), typeinference.normalize_column_type([u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08 16:14:45', u'']))
def test_floats_coerce(self): self.assertEqual((float, [1.01, -87.413, 418000000.0, None]), typeinference.normalize_column_type( [u'1.01', u'-87.413', u'418000000.0', u''], normal_type=float))