def test_invalid(self): # None of these fields are valid. Don't raise an error, just ignore fieldmap = { 0: 'int-invalid', ('a', 'b'): 'tuple-invalid', 'false-invalid': False, 'none-invalid': None, 'float-invalid': 1.0, 'dict-invalid': {}, 'tuple-invalid': tuple(), 'set-invalid': set(), 'list-invalid': [], } out = flattener(fieldmap)({}) eq_(len(out.keys()), 0) fieldmap = { 0.0: 'float-invalid', } out = flattener(fieldmap)({}) eq_(len(out.keys()), 0)
def test_list(self): # Integer values must be interpreted as array indices fieldmap = { '0': 0, '1': '1', '2.0': '2.0', } flat = flattener(fieldmap) src = [0, 1, [2]] out = flat(src) eq_(out.keys(), fieldmap.keys()) eq_(out['0'], src[0]) eq_(out['1'], src[1]) eq_(out['2.0'], src[2][0])
def test_dict(self): fieldmap = { 'all1': '', 'all2': True, 'x': 'x', 'y.z': 'y.z', 'z.1': 'z.1', } flat = flattener(fieldmap) src = {'x': 'X', 'y': {'z': 'Y.Z'}, 'z': ['Z.0', 'Z.1']} out = flat(src) eq_(out.keys(), fieldmap.keys()) eq_(out['all1'], src) eq_(out['all2'], src) eq_(out['x'], src['x']) eq_(out['y.z'], src['y']['z']) eq_(out['z.1'], src['z'][1])
def __init__(self, **kwargs): self.params = kwargs self.url = 'https://stream.twitter.com/1.1/statuses/filter.json' self.valid_params = { 'follow', 'track', 'locations', 'delimited', 'stall_warnings', 'filter_level', 'language'} self.enabled = True self.delay = 0 # Set up writers if 'path' in kwargs: self.stream = StreamWriter(kwargs['path'], flush=kwargs.get('flush', False)) self.process_bytes = self.stream.write elif 'function' in kwargs: self.process_json = build_transform( kwargs, vars={'message': {}}, filename='TwitterStream:function') elif kwargs.get('driver') == 'sqlalchemy': engine = gramex.data.create_engine(kwargs['url'], **kwargs.get('parameters', {})) table = gramex.data.get_table(kwargs['table']) fields = kwargs['fields'] for field in list(fields.keys()): if field not in table.columns: app_log.error('TwitterStream field %s not in table' % field) fields.pop(field) flatten = flattener(fields=fields) self.process_json = lambda tweet: engine.execute(table.insert(flatten(tweet))) self.buf = bytearray() self.client = tornado.httpclient.HTTPClient() while True: # Set .enabled to False to temporarily disable streamer if self.enabled: params = {key: val.encode('utf-8') for key, val in self.params.items() if key in self.valid_params} if 'follow' not in params and 'track' not in params and 'locations' not in params: self.enabled = False self.delay = 5 app_log.error('TwitterStream needs follow, track or locations. Disabling') else: self.fetch_tweets(params) # Restart after a delay determined by time.sleep(self.delay)
def test_default(self): fieldmap = {'x': 'x', 'y.a': 'y.a', 'y.1': 'y.1', 'z.a': 'z.a', '1': 1} default = 1 flat = flattener(fieldmap, default=default) out = flat({'z': {}, 'y': []}) eq_(out, {key: default for key in fieldmap})