def test_prune_max_properties(self):
        doc = {'prop-' + str(i): 'value' for i in range(0, 10000)}
        sanitized = bigquery_schema.sanitize_property_value(doc)
        self.assertEqual(len(sanitized), 10000)

        # prune the 10,000'th
        doc['prop-10001'] = 'value'
        sanitized = bigquery_schema.sanitize_property_value(doc)
        self.assertEqual(len(sanitized), 10000)

        # prune last added property
        doc['z'] = 'value'
        sanitized = bigquery_schema.sanitize_property_value(doc)
        self.assertEqual(len(sanitized), 10000)
        self.assertNotIn('z', sanitized)
 def test_sanitize_property_value(self):
     doc = {
         'empyty_dict': {},
         'empyty_dict_list': [{}, {}],
         'a' * 200: 'value0',
         '@!@': 'deleteme',
         '@2_3': 'value1',
         'invalid_numeric': 9.300000191734863,
         'labels': {
             'label1': 'value1',
             'label2': 'value2',
         }
     }
     sanitized = bigquery_schema.sanitize_property_value(doc)
     self.assertEqual(len(sanitized), 4)
     self.assertNotIn('empty_dict', sanitized)
     self.assertNotIn('empty_dict_list', sanitized)
     self.assertEqual(sanitized['a' * 128], 'value0')
     self.assertEqual(sanitized['invalid_numeric'], 9.300000192)
     self.assertEqual(sanitized['_2_3'], 'value1')
     labels = sanitized['labels']
     self.assertEqual(len(labels), 2)
     labels_found = [False, False]
     for label in labels:
         if label['name'] == 'label1':
             labels_found[0] = True
             assert label['value'] == 'value1'
         if label['name'] == 'label2':
             labels_found[1] = True
             assert label['value'] == 'value2'
     self.assertTrue(labels_found[0] and labels_found[1])
 def test_remove_duplicate_property(self):
     doc = {
         'ipAddress': 'value',
         'IPAddress': 'other_value',
         'array': [{
             'ipAddress': 'value',
             'IPAddress': 'other_value'
         }],
     }
     sanitized = bigquery_schema.sanitize_property_value(doc)
     self.assertEqual(len(sanitized), 2)
     self.assertIn('IPAddress', sanitized)
     self.assertEqual(sanitized['IPAddress'], 'other_value')
     self.assertEqual(sanitized['array'], [{'IPAddress': 'other_value'}])
Exemplo n.º 4
0
 def process(self, element):
     yield bigquery_schema.sanitize_property_value(element)
 def process(self, element):
     element = bigquery_schema.sanitize_property_value(element)
     # add load timestamp.
     element['timestamp'] = self.load_time.get()
     yield element