def show( producer, template=('t', u'{t}\n', 'Message template.'), ): """Print tweets in human readable form.""" producer(consumers.to_tweet(consumers.show(template=template)))
def uniq(producer): """Omit repeated tweets.""" producer( consumers.to_tweet( consumers.uniq(consumers.select()) ), )
def text(producer): """Print only tweet's text. It replaces the new line symbol (`\n`) with a space. """ producer(consumers.to_tweet(consumers.print_text()))
def filter( producer, config, output, mode=('', u'a', 'The mode to open the files, `a` to append and `w` to rewrite.'), filters=('', [], 'The filters to use.'), ): """Filter the tweets to files by filtering predicates defined in the configuration file.""" dustbin_template = config.dustbin_template dustbin = consumers.group(dustbin_template) if dustbin_template is not None else None filters_to_include = config.filters if filters: filters_to_include = (f for f in config.filters if f.name in filters) streams = tuple( ( consumers.group(f.split_template, mode=mode) if f.split_template != '--' else consumers.show(output=output, template='{t.raw}'), lambda c, _, f=f: c.filter(**f.predicates), ) for f in filters_to_include ) target = consumers.filter(streams, dustbin) producer(consumers.to_tweet(target))
def test_filter(tweets): pinkpop = [] dimazest = [] dustbin = [] streams = tuple((to_list(l), lambda c, f, p=p: c.filter(**p)) for l, p in [ ( pinkpop, { 'follow': [], 'track': ['pinkpop'], 'locations': [], }, ), ( dimazest, { 'follow': [10868922], 'track': [], 'locations': [], }, ), ]) target = consumers.filter(streams, to_list(dustbin)) from_iterable(consumers.to_tweet(target), tweets) assert len(pinkpop) == 1 assert pinkpop[0].id == 190800262909276162 assert len(dimazest) == 3 assert not dustbin
def test_count_tokens(tweets): counter = Counter() from_iterable( consumers.to_tweet( consumers.count_tokens(counter), ), tweets, ) assert counter == Counter( { u'paaspop': 1, u'all': 1, u'pinkpop': 1, u'thats': 1, u'pedropicopop': 1, u'use': 1, u'here': 1, u'pukkelpop': 1, u'prilpop': 1, u'fun': 1, u'come': 1, u'#pygrunn': 1, u'#pp12': 1, } )
def S(source): t = lambda target: consume_iterable( consumers.to_tweet(target), source ) return t
def media(producer, output): """Retrieve media urls.""" producer( consumers.to_tweet( consumers.print_media(output=output) ) )
def test_filter(tweets): pinkpop = [] dimazest = [] dustbin = [] streams = tuple((to_list(l), lambda c, f, p=p: c.filter(**p) ) for l, p in [(pinkpop, {'follow': [], 'track': ['pinkpop'], 'locations': [], }, ), (dimazest, {'follow': [10868922], 'track': [], 'locations': [], }, ), ] ) target = consumers.filter(streams, to_list(dustbin)) from_iterable(consumers.to_tweet(target), tweets) assert len(pinkpop) == 1 assert pinkpop[0].id == 190800262909276162 assert len(dimazest) == 3 assert not dustbin
def filter( producer, config, output, mode=('', u'a', 'The mode to open the files, `a` to append and `w` to rewrite.'), filters=('', [], 'The filters to use.'), ): """Filter the tweets to files by filtering predicates defined in the configuration file.""" dustbin_template = config.dustbin_template dustbin = consumers.group( dustbin_template) if dustbin_template is not None else None filters_to_include = config.filters if filters: filters_to_include = (f for f in config.filters if f.name in filters) streams = tuple(( consumers.group(f.split_template, mode=mode) if f.split_template != '--' else consumers.show(output=output, template='{t.raw}'), lambda c, _, f=f: c.filter(**f.predicates), ) for f in filters_to_include) target = consumers.filter(streams, dustbin) producer(consumers.to_tweet(target))
def test_closing(): sink = to_list([]) target = consumers.to_tweet(sink) target.close() with raises(StopIteration): sink.send('Sink is expected to be closed too.')
def timeline(producer, window=('w', '%Y-%m-%d-%H', '')): """Count the number of tweets per window.""" producer( consumers.to_tweet( consumers.timeline( window=window, target=consumers.counter_printer(sys.stdout), ), ), )
def text( producer, output, ): """Print only tweet's text. It replaces the new line symbol (\\n) with a space. """ producer(consumers.to_tweet(consumers.print_text(output=output)))
def test_filter_dustbin(tweets): result = [] dustbin = [] streams = ((to_list(result), lambda c, f: c.filter(follow=[-1000])), ) target = consumers.filter(streams, to_list(dustbin)) from_iterable(consumers.to_tweet(target), tweets) assert not result assert len(dustbin) == 3
def filter(producer, config): """Filter the tweets to files by filtering predicates defined in the configuration file.""" dustbin_template = config.dustbin_template dustbin = consumers.group( dustbin_template) if dustbin_template is not None else None streams = tuple(( consumers.group(f.split_template), lambda c, _, f=f: c.filter(**f.predicates), ) for f in config.filters) target = consumers.filter(streams, dustbin) producer(consumers.to_tweet(target))
def test_count_timeline(tweets): counter = Counter() from_iterable( consumers.to_tweet(consumers.timeline(counter), ), tweets, ) assert counter == Counter({ '2012-05-12-09': 1, '2012-04-26-07': 1, '2012-04-13-13': 1, })
def test_batch(tweets): @consumers.consumer def batch_end_consumer(): batch_end_consumer.batches = 0 while True: try: yield except consumers.BatchEndException: batch_end_consumer.batches += 1 from_iterable(consumers.to_tweet(consumers.batch(batch_end_consumer())), tweets) assert batch_end_consumer.batches == 2
def filter(producer, config): """Filter the tweets to files by filtering predicates defined in the configuration file.""" dustbin_template = config.dustbin_template dustbin = consumers.group(dustbin_template) if dustbin_template is not None else None streams = tuple( ( consumers.group(f.split_template), lambda c, _, f=f: c.filter(**f.predicates), ) for f in config.filters ) target = consumers.filter(streams, dustbin) producer(consumers.to_tweet(target))
def test_count_timeline(tweets): counter = Counter() from_iterable( consumers.to_tweet( consumers.timeline(counter), ), tweets, ) assert counter == Counter( { '2012-05-12-09': 1, '2012-04-26-07': 1, '2012-04-13-13': 1, } )
def test_count_tokens(tweets): counter = Counter() from_iterable( consumers.to_tweet(consumers.count_tokens(counter), ), tweets, ) assert counter == Counter({ u'paaspop': 1, u'all': 1, u'pinkpop': 1, u'thats': 1, u'pedropicopop': 1, u'use': 1, u'here': 1, u'pukkelpop': 1, u'prilpop': 1, u'fun': 1, u'come': 1, u'#pygrunn': 1, u'#pp12': 1, })
def test_bad_json(): result = [] from_iterable(consumers.to_tweet(to_list(result)), ['not valid JSON']) assert not result
def test_basic(tweets): result = [] from_iterable(consumers.to_tweet(to_list(result)), tweets) assert len(result) == 3 assert all(isinstance(t, Tweet) for t in result)
def test_uniq(tweets): result = [] from_iterable(consumers.to_tweet(consumers.uniq(to_list(result))), tweets * 20) assert len(result)
def group( producer, file_name_template=('t', '%Y-%m-%d-%H.gz', ''), ): """Group tweets to files by date according to the template.""" producer(consumers.to_tweet(consumers.group(file_name_template)))
def flow(out=None): producer(consumers.to_tweet(consumers.print_text(output=out)))
def pprint(producer): """Pretty print tweet's json representation.""" producer(consumers.to_tweet(consumers.pprint()))
def show(producer): """Print tweets in human readable form.""" producer(consumers.to_tweet(consumers.show()))
def group(producer, file_name_template=('t', '%Y-%m-%d-%H.gz', ''), ): """Group tweets to files by date according to the template.""" producer(consumers.to_tweet(consumers.group(file_name_template)))
def uniq(producer): """Omit repeated tweets.""" producer(consumers.to_tweet(consumers.uniq(consumers.select())), )
def S(source): t = lambda target: consume_iterable(consumers.to_tweet(target), source) return t