def test_get_schema_files(schema_files_dir):
    fs = KafkaStream.schemas(schema_files_dir)
    assert isinstance(fs, dict)
    for key, val in fs.items():
        assert val.endswith('.avsc')
        assert key in ['twitter', 'keyschema', 'reddit', 'gdax']

    with pytest.raises(ValueError):
        KafkaStream.schemas(None)
示例#2
0
class StreamFactory(object):

    STREAMS = KafkaStream.avro_consumer(topic='gdax', offset='start')

    @classmethod
    def stream_from_datetime(cls, start_time: datetime.date,
                             stream_from: Union[Iterable, AnyStr]):
        stream = cls.STREAMS if stream_from == 'kafka' else stream_from
        return Observable \
                    .from_(stream) \
                    .filter(lambda value: datetime.strptime(value['ts'], '%Y-%m-%d %H:%M:%S') > start_time)

    @classmethod
    def stream_from_start(cls, stream_from: Union[Iterable, AnyStr]):
        stream = cls.STREAMS if stream_from == 'kafka' else stream_from
        return Observable \
                    .from_(stream)

    @classmethod
    def stream_from_offset(cls, offset, stream_from: Union[Iterable, AnyStr]):
        stream = cls.STREAMS if stream_from == 'kafka' else stream_from
        return Observable \
                    .from_(stream) \
                    .take_while(lambda value: datetime.now() -
                                              datetime.strptime(value['ts'], '%Y-%m-%d %H:%M:%S') > timedelta(seconds=5))
示例#3
0
    def __init__(self, socket_client, topic='gdax'):

        self.ws = socket_client
        self.delay = 1
        self.stream = KafkaStream.consumer(topic='gdax', offset='end')
        self.source = Observable.from_(self.stream)

        super(AvroListener, self).__init__()
示例#4
0
def get_historic_data(offset, max_points=50000):
    stream = KafkaStream.avro_consumer(topic='gdax', offset=offset)
    source = Observable \
        .from_(stream) \
        .take_while(lambda value: datetime.now() -
                                  datetime.strptime(value['ts'], '%Y-%m-%d %H:%M:%S') > timedelta(seconds=5))

    a = source.to_blocking()
    return [msg for msg in a][-max_points:]
示例#5
0
from kafka_tfrx.stream import KafkaStream

from kryptoflow.scrapers.reddit import RedditStreamer
from kryptoflow.managers.project import ProjectManager

if __name__ == '__main__':
    schemas = ProjectManager.get_value('kafka')['schemas']
    secrets = ProjectManager.get_secrets('reddit')
    sink = KafkaStream.avro_producer(schemas=schemas, topic='reddit')
    r = RedditStreamer(producer=sink, reddit_config=secrets)
    r.start_stream()
def get_live_data():
    a = KafkaStream(topic='gdax')
    msgs = a.read_new()
    if len(msgs) > max_points:
        return msgs[max_points:]
    return msgs
示例#7
0
def gen():
    stream = KafkaStream.avro_consumer(topic='gdax', offset='start')
    for i in stream:
        print(i)
        yield (i['price'], i['volume_24h'], i['spread'],
               1 if i['side'] == 'buy' else 0)
示例#8
0
from kafka_tfrx.stream import KafkaStream
"""
Run python examples/simple_produce.py first to get an output here
Assumes kafka and schema registry are running according to the following
configuration: https://github.com/carlomazzaferro/kryptoflow/blob/master/docker-compose.yml
"""

if __name__ == '__main__':
    cons = KafkaStream.avro_consumer(topic='gdax', ip='127.0.0.1')
    for c in cons:
        print(c)
示例#9
0
def stream_from_start(observer):
    stream = KafkaStream.avro_consumer(topic='gdax', offset='start')
    source = Observable \
        .from_(stream) \
        .subscribe(observer())
示例#10
0
from kafka_tfrx.stream import KafkaStream
from kryptoflow.scrapers.gdax_ws import GDAXClient
from kryptoflow.managers.project import ProjectManager

if __name__ == '__main__':

    schemas = ProjectManager.get_value('kafka')['schemas']
    sink = KafkaStream.avro_producer(topic='gdax', ip='localhost', schemas=schemas)
    gd = GDAXClient(products=['BTC-USD'], channels=['ticker'], producer=sink)
    gd.start_stream()
def test_setup_avro_consumer_int():
    e = KafkaStream.avro_consumer('gdax', offset=10000, group_id=None)
    assert isinstance(e, KafkaStream.__class__)
    assert isinstance(e, collections.Iterable)
def test_setup_avro_consumer():
    c = KafkaStream.avro_consumer('gdax', offset='start', group_id=None)
    assert isinstance(c, KafkaStream.__class__)
    assert isinstance(c, collections.Iterable)
def test_setup_avro_producer():
    p = KafkaStream.avro_producer('gdax', schemas='tests/sample_schemas')
    assert isinstance(p, AvroProducer)
    with pytest.raises(ValueError):
        KafkaStream.avro_producer('gdax', schemas=None)
示例#14
0
from kafka_tfrx.stream import KafkaStream

"""
Assumes kafka and schema registry are running according to the following
configuration: https://github.com/carlomazzaferro/kryptoflow/blob/master/docker-compose.yml
"""

if __name__ == '__main__':
    msg = {"price":  100.0,
           "volume_24h": 10000.1,
           "spread": 0,
           "ts": "1235467",
           "side": "buy"}

    p = KafkaStream.avro_producer('gdax', schemas='examples/schemas', ip='127.0.0.1')

    for i in range(100):
        p.produce(topic='gdax', value=msg)
    p.flush()