def test_get_schema_files(schema_files_dir): fs = KafkaStream.schemas(schema_files_dir) assert isinstance(fs, dict) for key, val in fs.items(): assert val.endswith('.avsc') assert key in ['twitter', 'keyschema', 'reddit', 'gdax'] with pytest.raises(ValueError): KafkaStream.schemas(None)
class StreamFactory(object): STREAMS = KafkaStream.avro_consumer(topic='gdax', offset='start') @classmethod def stream_from_datetime(cls, start_time: datetime.date, stream_from: Union[Iterable, AnyStr]): stream = cls.STREAMS if stream_from == 'kafka' else stream_from return Observable \ .from_(stream) \ .filter(lambda value: datetime.strptime(value['ts'], '%Y-%m-%d %H:%M:%S') > start_time) @classmethod def stream_from_start(cls, stream_from: Union[Iterable, AnyStr]): stream = cls.STREAMS if stream_from == 'kafka' else stream_from return Observable \ .from_(stream) @classmethod def stream_from_offset(cls, offset, stream_from: Union[Iterable, AnyStr]): stream = cls.STREAMS if stream_from == 'kafka' else stream_from return Observable \ .from_(stream) \ .take_while(lambda value: datetime.now() - datetime.strptime(value['ts'], '%Y-%m-%d %H:%M:%S') > timedelta(seconds=5))
def __init__(self, socket_client, topic='gdax'): self.ws = socket_client self.delay = 1 self.stream = KafkaStream.consumer(topic='gdax', offset='end') self.source = Observable.from_(self.stream) super(AvroListener, self).__init__()
def get_historic_data(offset, max_points=50000): stream = KafkaStream.avro_consumer(topic='gdax', offset=offset) source = Observable \ .from_(stream) \ .take_while(lambda value: datetime.now() - datetime.strptime(value['ts'], '%Y-%m-%d %H:%M:%S') > timedelta(seconds=5)) a = source.to_blocking() return [msg for msg in a][-max_points:]
from kafka_tfrx.stream import KafkaStream from kryptoflow.scrapers.reddit import RedditStreamer from kryptoflow.managers.project import ProjectManager if __name__ == '__main__': schemas = ProjectManager.get_value('kafka')['schemas'] secrets = ProjectManager.get_secrets('reddit') sink = KafkaStream.avro_producer(schemas=schemas, topic='reddit') r = RedditStreamer(producer=sink, reddit_config=secrets) r.start_stream()
def get_live_data(): a = KafkaStream(topic='gdax') msgs = a.read_new() if len(msgs) > max_points: return msgs[max_points:] return msgs
def gen(): stream = KafkaStream.avro_consumer(topic='gdax', offset='start') for i in stream: print(i) yield (i['price'], i['volume_24h'], i['spread'], 1 if i['side'] == 'buy' else 0)
from kafka_tfrx.stream import KafkaStream """ Run python examples/simple_produce.py first to get an output here Assumes kafka and schema registry are running according to the following configuration: https://github.com/carlomazzaferro/kryptoflow/blob/master/docker-compose.yml """ if __name__ == '__main__': cons = KafkaStream.avro_consumer(topic='gdax', ip='127.0.0.1') for c in cons: print(c)
def stream_from_start(observer): stream = KafkaStream.avro_consumer(topic='gdax', offset='start') source = Observable \ .from_(stream) \ .subscribe(observer())
from kafka_tfrx.stream import KafkaStream from kryptoflow.scrapers.gdax_ws import GDAXClient from kryptoflow.managers.project import ProjectManager if __name__ == '__main__': schemas = ProjectManager.get_value('kafka')['schemas'] sink = KafkaStream.avro_producer(topic='gdax', ip='localhost', schemas=schemas) gd = GDAXClient(products=['BTC-USD'], channels=['ticker'], producer=sink) gd.start_stream()
def test_setup_avro_consumer_int(): e = KafkaStream.avro_consumer('gdax', offset=10000, group_id=None) assert isinstance(e, KafkaStream.__class__) assert isinstance(e, collections.Iterable)
def test_setup_avro_consumer(): c = KafkaStream.avro_consumer('gdax', offset='start', group_id=None) assert isinstance(c, KafkaStream.__class__) assert isinstance(c, collections.Iterable)
def test_setup_avro_producer(): p = KafkaStream.avro_producer('gdax', schemas='tests/sample_schemas') assert isinstance(p, AvroProducer) with pytest.raises(ValueError): KafkaStream.avro_producer('gdax', schemas=None)
from kafka_tfrx.stream import KafkaStream """ Assumes kafka and schema registry are running according to the following configuration: https://github.com/carlomazzaferro/kryptoflow/blob/master/docker-compose.yml """ if __name__ == '__main__': msg = {"price": 100.0, "volume_24h": 10000.1, "spread": 0, "ts": "1235467", "side": "buy"} p = KafkaStream.avro_producer('gdax', schemas='examples/schemas', ip='127.0.0.1') for i in range(100): p.produce(topic='gdax', value=msg) p.flush()