def test_formatter_keep_unicode(self): config = {'cef.version': '0', 'cef.vendor': 'mozilla', 'cef.device_version': '3', 'cef.product': 'weave', 'cef': True, 'cef.file': mkstemp()[1]} environ = {'PATH_INFO': u'/reviewers/receipt/issue/\u043f\u0442\u0442-news'} kw = {'cs2': 1L, 'cs2Label': u'\xd0'} from cef import _get_fields, _format_msg, _filter_params config = _filter_params('cef', config) name = 'name' severity = 0 username = u'tarek' signature = 'xx' fields = _get_fields(name, severity, environ, config, username=username, signature=signature, as_unicode=True, **kw) msg = _format_msg(fields, kw, 1024, as_unicode=True) self.assertEquals(type(msg), unicode) self.assertTrue(u'cs2Label=\xd0' in msg, msg) self.assertTrue(u'keep_unicode' not in msg, msg)
def log_cef(self, name, severity, environ, config, username='******', signature=None, **kw): """Creates a CEF record, and emit it to heka in the fields blob. Args: - name: name to log - severity: integer from 0 to 10 - environ: the WSGI environ object - config: configuration dict - signature: CEF signature code - defaults to name value - username: user name - defaults to 'none' - extra keywords: extra keys used in the CEF extension """ from cef import _get_fields, _format_msg, _filter_params config = _filter_params('cef', config) fields = _get_fields(name, severity, environ, config, username=username, signature=signature, **kw) msg = _format_msg(fields, kw) try: self.heka(type='cef', payload=msg, fields={'cef_meta': cef_meta}) except ValueError: # Brutal brute force back into unicode. CEF lib explicitly converts # unicode to UTF8, but protobuf lib barfs on UTF8, it wants # unicode. Which it then proceeds to convert back into UTF8. *sigh* # Ideally cef lib is updated to accept a "give me unicode" option. msg = unicode(msg, "UTF-8", "replace") self.heka(type='cef', payload=msg, fields={'cef_meta': cef_meta}) # Return the formatted message return msg
def test_formatter_not_unicode_bytes(self): config = {'cef.version': '0', 'cef.vendor': 'mozilla', 'cef.device_version': '3', 'cef.product': 'weave', 'cef': True, 'cef.file': mkstemp()[1]} environ = {} # Note that csLabel can't be coerced into unicode or decoded using UTF8 kw = {'cs2': 1L, 'cs2Label': '\xd0'} from cef import _get_fields, _format_msg, _filter_params config = _filter_params('cef', config) name = 'name' severity = 0 username = u'tarek' signature = 'xx' fields = _get_fields(name, severity, environ, config, username=username, signature=signature, as_unicode=True, **kw) msg = _format_msg(fields, kw, 1024, as_unicode=True) self.assertEquals(type(msg), unicode) self.assertTrue(u'cs2Label=\ufffd' in msg, msg) self.assertTrue(u'keep_unicode' not in msg, msg)
def log_cef(self, name, severity, environ, config, username='******', signature=None, **kw): """Creates a CEF record, and emit it to heka in the fields blob. Args: - name: name to log - severity: integer from 0 to 10 - environ: the WSGI environ object - config: configuration dict - signature: CEF signature code - defaults to name value - username: user name - defaults to 'none' - extra keywords: extra keys used in the CEF extension """ from cef import _get_fields, _format_msg, _filter_params config = _filter_params('cef', config) fields = _get_fields(name, severity, environ, config, username=username, signature=signature, **kw) msg = _format_msg(fields, kw) self.heka(type='cef', payload=msg, fields={'cef_meta': cef_meta}) # Return the formatted message return msg