Exemplo n.º 1
0
def accumulate_data(time_steps=definitions.TIMEFRAME):
    consumer = AvroAsync(topic='gdax')
    messages = consumer.read_new(accumulate=True,
                                 n_messages=time_steps,
                                 unique=True)
    rows = [{
        k: v
        for k, v in msg.items()
        if k in ['ts', 'price', 'volume_24h', 'spread', 'side']
    } for msg in messages]
    return rows_to_df(rows, categorical=list(['side']))
Exemplo n.º 2
0
    def __init__(self, socket_client, topic='gdax'):
        self.ws = socket_client
        self.delay = 1
        self.listener = AvroAsync(topic=topic)

        super(AvroListener, self).__init__()
Exemplo n.º 3
0
        except Exception as e:
            print('ERROR', e)
            self.start_stream()

    def format_message(self, msg):
        sentences = list(self.analyzer.sentences(msg.body))
        sentence_count = len(sentences)
        polarity = sum(
            [i['compound'] for i in self.analyzer.sentiment(sentences)])

        message = {
            'sentences':
            msg.body,
            'polarity':
            polarity,
            'sentence_count':
            sentence_count,
            'ts':
            str(
                datetime.fromtimestamp(
                    msg.created_utc).replace(microsecond=0)),
        }
        print(message)
        return message


if __name__ == '__main__':
    sink = AvroAsync(topic='reddit')
    r = RedditStreamer(producer=sink.producer())
    r.start_stream()
Exemplo n.º 4
0
        msg = {
            'price':
            float(message['price']),
            'ts':
            str(
                utc_to_local(
                    datetime.strptime(
                        message['time'],
                        "%Y-%m-%dT%H:%M:%S.%fZ").replace(microsecond=0))),
            'volume_24h':
            float(message['volume_24h']),
            'spread':
            self.calculate_spread(message),
            'side':
            message['side']
        }
        print(msg)

        return msg

    def on_close(self):
        print("-- Goodbye! --")


if __name__ == '__main__':

    sink = AvroAsync(topic='gdax')
    gd = GDAXClient(products=['BTC-USD'],
                    channels=['ticker'],
                    producer=sink.producer())
    gd.start_stream()
Exemplo n.º 5
0
def get_historic_data(offset, max_points=50000):
    a = AvroAsync(topic='gdax')
    msgs = a.read_from_offset(offset=offset)
    if len(msgs) > max_points:
        return msgs[max_points:]
    return msgs
Exemplo n.º 6
0
        all_data['text'] = clean_text(text)
        message = self.format_message(all_data)
        print(message)
        self.producer.produce(topic=self.topic, value=message)

    def on_error(self, status):
        print('Error %s' % status)
        self.start()

    def format_message(self, msg):
        sentences = list(self.analyzer.sentences(msg['text']))
        sentence_count = len(sentences)
        polarity = sum([i['compound'] for i in self.analyzer.sentiment(sentences)])

        message = {'sentences': str(msg['text']),
                   'polarity': polarity,
                   'sentence_count': sentence_count,
                   'ts':  str(
                       datetime.fromtimestamp(
                           int(msg['timestamp_ms'])/1000).replace(microsecond=0)
                   ),
                   }
        return message


if __name__ == '__main__':
    sink = AvroAsync(topic='reddit')
    tweet_stream = TwitterStream(producer=sink.producer())
    tweet_stream.start()

Exemplo n.º 7
0
def get_data(topic, keep_keys=list(['ts']), categorical=list(['side'])):

    consumer = AvroAsync(topic=topic)
    data = consumer.read_from_start(return_msgs=True)
    rows = [{k: v for k, v in msg.items() if k in keep_keys} for msg in data]
    return rows_to_df(rows, categorical=categorical)