def test_row_as_table_row(self): schema_definition = [ ('s', 'STRING'), ('i', 'INTEGER'), ('f', 'FLOAT'), ('b', 'BOOLEAN'), ('r', 'RECORD')] data_defination = [ 'abc', 123, 123.456, True, {'a': 'b'}] str_def = '{"s": "abc", "i": 123, "f": 123.456, "b": true, "r": {"a": "b"}}' schema = bigquery.TableSchema( fields=[bigquery.TableFieldSchema(name=k, type=v) for k, v in schema_definition]) coder = TableRowJsonCoder(table_schema=schema) test_row = bigquery.TableRow( f=[bigquery.TableCell(v=to_json_value(e)) for e in data_defination]) self.assertEqual(str_def, coder.encode(test_row)) self.assertEqual(test_row, coder.decode(coder.encode(test_row))) # A coder without schema can still decode. self.assertEqual( test_row, TableRowJsonCoder().decode(coder.encode(test_row)))
def test_row_and_no_schema(self): coder = TableRowJsonCoder() test_row = bigquery.TableRow( f=[bigquery.TableCell(v=to_json_value(e)) for e in ['abc', 123, 123.456, True]]) with self.assertRaises(AttributeError) as ctx: coder.encode(test_row) self.assertTrue( ctx.exception.message.startswith('The TableRowJsonCoder requires'))
def json_compliance_exception(self, value): with self.assertRaises(ValueError) as exn: schema_definition = [('f', 'FLOAT')] schema = bigquery.TableSchema( fields=[bigquery.TableFieldSchema(name=k, type=v) for k, v in schema_definition]) coder = TableRowJsonCoder(table_schema=schema) test_row = bigquery.TableRow( f=[bigquery.TableCell(v=to_json_value(value))]) coder.encode(test_row) self.assertTrue(bigquery.JSON_COMPLIANCE_ERROR in exn.exception.message)
def decode(self, encoded_table_row): od = json.loads( encoded_table_row, object_pairs_hook=collections.OrderedDict) return bigquery.TableRow( f=[bigquery.TableCell(v=to_json_value(e)) for e in od.itervalues()])
def get_test_rows(self): now = time.time() dt = datetime.datetime.utcfromtimestamp(float(now)) ts = dt.strftime('%Y-%m-%d %H:%M:%S.%f UTC') expected_rows = [ { 'i': 1, 's': 'abc', 'f': 2.3, 'b': True, 't': ts, 'dt': '2016-10-31', 'ts': '22:39:12.627498', 'dt_ts': '2008-12-25T07:30:00', 'r': {'s2': 'b'}, 'rpr': [{'s3': 'c', 'rpr2': [{'rs': ['d', 'e'], 's4': None}]}] }, { 'i': 10, 's': 'xyz', 'f': -3.14, 'b': False, 'rpr': [], 't': None, 'dt': None, 'ts': None, 'dt_ts': None, 'r': None, }] nested_schema = [ bigquery.TableFieldSchema( name='s2', type='STRING', mode='NULLABLE')] nested_schema_2 = [ bigquery.TableFieldSchema( name='s3', type='STRING', mode='NULLABLE'), bigquery.TableFieldSchema( name='rpr2', type='RECORD', mode='REPEATED', fields=[ bigquery.TableFieldSchema( name='rs', type='STRING', mode='REPEATED'), bigquery.TableFieldSchema( name='s4', type='STRING', mode='NULLABLE')])] schema = bigquery.TableSchema( fields=[ bigquery.TableFieldSchema( name='b', type='BOOLEAN', mode='REQUIRED'), bigquery.TableFieldSchema( name='f', type='FLOAT', mode='REQUIRED'), bigquery.TableFieldSchema( name='i', type='INTEGER', mode='REQUIRED'), bigquery.TableFieldSchema( name='s', type='STRING', mode='REQUIRED'), bigquery.TableFieldSchema( name='t', type='TIMESTAMP', mode='NULLABLE'), bigquery.TableFieldSchema( name='dt', type='DATE', mode='NULLABLE'), bigquery.TableFieldSchema( name='ts', type='TIME', mode='NULLABLE'), bigquery.TableFieldSchema( name='dt_ts', type='DATETIME', mode='NULLABLE'), bigquery.TableFieldSchema( name='r', type='RECORD', mode='NULLABLE', fields=nested_schema), bigquery.TableFieldSchema( name='rpr', type='RECORD', mode='REPEATED', fields=nested_schema_2)]) table_rows = [ bigquery.TableRow(f=[ bigquery.TableCell(v=to_json_value('true')), bigquery.TableCell(v=to_json_value(str(2.3))), bigquery.TableCell(v=to_json_value(str(1))), bigquery.TableCell(v=to_json_value('abc')), # For timestamps cannot use str() because it will truncate the # number representing the timestamp. bigquery.TableCell(v=to_json_value('%f' % now)), bigquery.TableCell(v=to_json_value('2016-10-31')), bigquery.TableCell(v=to_json_value('22:39:12.627498')), bigquery.TableCell(v=to_json_value('2008-12-25T07:30:00')), # For record we cannot use dict because it doesn't create nested # schemas correctly so we have to use this f,v based format bigquery.TableCell(v=to_json_value({'f': [{'v': 'b'}]})), bigquery.TableCell(v=to_json_value([{'v':{'f':[{'v': 'c'}, {'v':[ {'v':{'f':[{'v':[{'v':'d'}, {'v':'e'}]}, {'v':None}]}}]}]}}])) ]), bigquery.TableRow(f=[ bigquery.TableCell(v=to_json_value('false')), bigquery.TableCell(v=to_json_value(str(-3.14))), bigquery.TableCell(v=to_json_value(str(10))), bigquery.TableCell(v=to_json_value('xyz')), bigquery.TableCell(v=None), bigquery.TableCell(v=None), bigquery.TableCell(v=None), bigquery.TableCell(v=None), bigquery.TableCell(v=None), bigquery.TableCell(v=to_json_value([]))])] return table_rows, schema, expected_rows