async def loop(self): if self.config.cache == 'redis': cache = Redis(ip=self.config.redis['ip'], port=self.config.redis['port'], socket=self.config.redis.socket, del_after_read=self.config.redis['del_after_read'], flush=self.config.redis['start_flush'], retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None) elif self.config.cache == 'kafka': cache = Kafka(self.config.kafka['ip'], self.config.kafka['port'], flush=self.config.kafka['start_flush']) while True: start = time.time() if 'exchanges' in self.config and self.config.exchanges: for exchange in self.config.exchanges: for dtype in self.config.exchanges[exchange]: if dtype in {'retries'}: continue for pair in self.config.exchanges[exchange][ dtype] if 'symbols' not in self.config.exchanges[ exchange][dtype] else self.config.exchanges[ exchange][dtype]['symbols']: store = Storage(self.config) LOG.info('Reading %s-%s-%s', exchange, dtype, pair) data = cache.read(exchange, dtype, pair) if len(data) == 0: LOG.info('No data for %s-%s-%s', exchange, dtype, pair) continue store.aggregate(data) store.write(exchange, dtype, pair, time.time()) cache.delete(exchange, dtype, pair) LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair) total = time.time() - start interval = self.config.storage_interval - total if interval <= 0: LOG.warning( "Storage operations currently take %.1f seconds, longer than the interval of %d", total, self.config.storage_interval) interval = 0.5 await asyncio.sleep(interval) else: await asyncio.sleep(30)
async def loop(self, loop): if self.config.cache == 'redis': self.cache = Redis(ip=self.config.redis['ip'], port=self.config.redis['port'], password=os.environ.get('REDIS_PASSWORD', None) or self.config.redis.get('password', None), socket=self.config.redis.socket, del_after_read=self.config.redis['del_after_read'], flush=self.config.redis['start_flush'], retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None) elif self.config.cache == 'kafka': self.cache = Kafka(self.config.kafka['ip'], self.config.kafka['port'], flush=self.config.kafka['start_flush']) interval = self.config.storage_interval time_partition = False multiplier = 1 if not isinstance(interval, int): if len(interval) > 1: multiplier = int(interval[:-1]) interval = interval[-1] base_interval = interval if interval in {'M', 'H', 'D'}: time_partition = True if interval == 'M': interval = 60 * multiplier elif interval == 'H': interval = 3600 * multiplier else: interval = 86400 * multiplier self.parquet_buffer = dict() while not self.terminating: start, end = None, None try: aggregation_start = time.time() if time_partition: interval_start = aggregation_start if end: interval_start = end + timedelta(seconds=interval + 1) start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier) if 'exchanges' in self.config and self.config.exchanges: await self._write_storage(loop=loop, start=start, end=end) total = time.time() - aggregation_start wait = interval - total if wait <= 0: LOG.warning("Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval) wait = 0.5 try: await asyncio.sleep(delay=wait, loop=loop) except asyncio.CancelledError as e: pass else: try: await asyncio.sleep(delay=30, loop=loop) except asyncio.CancelledError as e: pass except Exception: LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True) raise LOG.info("Aggregator running on PID %d stopped", os.getpid())
class Aggregator(Process): def __init__(self, config_file=None): self.config_file = config_file super().__init__() self.daemon = True self.terminating = False def run(self): LOG.info("Aggregator running on PID %d", os.getpid()) loop = asyncio.get_event_loop() self.config = DynamicConfig(loop=loop, file_name=self.config_file) loop.create_task(self.loop(loop=loop)) self.event = threading.Event() # sleep control for write threads setup_event_loop_signal_handlers(loop, self._stop_on_signal) try: loop.run_forever() except KeyboardInterrupt: pass except Exception: LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True) def _stop_on_signal(self, sig, loop): if self.terminating: LOG.info("Aggregator on PID %d is already being stopped...", os.getpid()) return LOG.info("Stopping Aggregator on %d due to signal %d", os.getpid(), sig) self.terminating = True self.config.set_terminating() self.event.set() if 'write_on_stop' in self.config and self.config.write_on_stop \ and 'exchanges' in self.config and self.config.exchanges: stop_event_loop(loop, self._write_storage(loop=loop, write_on_stop=True)) else: stop_event_loop(loop) async def loop(self, loop): if self.config.cache == 'redis': self.cache = Redis(ip=self.config.redis['ip'], port=self.config.redis['port'], password=os.environ.get('REDIS_PASSWORD', None) or self.config.redis.get('password', None), socket=self.config.redis.socket, del_after_read=self.config.redis['del_after_read'], flush=self.config.redis['start_flush'], retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None) elif self.config.cache == 'kafka': self.cache = Kafka(self.config.kafka['ip'], self.config.kafka['port'], flush=self.config.kafka['start_flush']) interval = self.config.storage_interval time_partition = False multiplier = 1 if not isinstance(interval, int): if len(interval) > 1: multiplier = int(interval[:-1]) interval = interval[-1] base_interval = interval if interval in {'M', 'H', 'D'}: time_partition = True if interval == 'M': interval = 60 * multiplier elif interval == 'H': interval = 3600 * multiplier else: interval = 86400 * multiplier self.parquet_buffer = dict() while not self.terminating: start, end = None, None try: aggregation_start = time.time() if time_partition: interval_start = aggregation_start if end: interval_start = end + timedelta(seconds=interval + 1) start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier) if 'exchanges' in self.config and self.config.exchanges: await self._write_storage(loop=loop, start=start, end=end) total = time.time() - aggregation_start wait = interval - total if wait <= 0: LOG.warning("Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval) wait = 0.5 try: await asyncio.sleep(delay=wait, loop=loop) except asyncio.CancelledError as e: pass else: try: await asyncio.sleep(delay=30, loop=loop) except asyncio.CancelledError as e: pass except Exception: LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True) raise LOG.info("Aggregator running on PID %d stopped", os.getpid()) async def _write_storage(self, loop, start=None, end=None, write_on_stop=False): if write_on_stop: LOG.info("Writing cached data before stopping...") else: LOG.info("Writing cached data...") max_workers = self.config.num_write_threads if 'num_write_threads' in self.config else 1 with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool: futures = [] for exchange in self.config.exchanges: for dtype in self.config.exchanges[exchange]: # Skip over the retries arg in the config if present. if dtype in {'retries', 'channel_timeouts', 'http_proxy'}: continue for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in \ self.config.exchanges[exchange][dtype] else \ self.config.exchanges[exchange][dtype]['symbols']: futures.append(loop.run_in_executor(pool, self._write_pair_blocking, exchange, dtype, pair, start, end, write_on_stop)) await asyncio.gather(*futures, loop=loop, return_exceptions=False) LOG.info("Write finished") def _write_pair_blocking(self, exchange, dtype, pair, start, end, write_on_stop): LOG.info('Reading cache for %s-%s-%s', exchange, dtype, pair) store = Storage(self.config, parquet_buffer=self.parquet_buffer) data = self.cache.read(exchange, dtype, pair, start=start, end=end) if len(data) == 0: LOG.info('No data for %s-%s-%s', exchange, dtype, pair) return store.aggregate(data) retries = 0 while ((not self.terminating) or write_on_stop): if retries > self.config.storage_retries: LOG.error("Failed to write after %d retries", self.config.storage_retries) raise EngineWriteError try: # retrying this is ok, provided every # engine clears its internal buffer after writing successfully. LOG.info('Writing cached data to store for %s-%s-%s', exchange, dtype, pair) store.write(exchange, dtype, pair, time.time()) except OSError as e: LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e) if write_on_stop: break if e.errno == 112: # Host is down retries += 1 self.event.wait(self.config.storage_retry_wait) else: raise except EngineWriteError: LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e) if write_on_stop: break retries += 1 self.event.wait(self.config.storage_retry_wait) else: break LOG.info('Deleting cached data for %s-%s-%s', exchange, dtype, pair) self.cache.delete(exchange, dtype, pair) LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair)
async def loop(self): if self.config.cache == 'redis': cache = Redis(ip=self.config.redis['ip'], port=self.config.redis['port'], socket=self.config.redis.socket, del_after_read=self.config.redis['del_after_read'], flush=self.config.redis['start_flush'], retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None) elif self.config.cache == 'kafka': cache = Kafka(self.config.kafka['ip'], self.config.kafka['port'], flush=self.config.kafka['start_flush']) interval = self.config.storage_interval time_partition = False multiplier = 1 if not isinstance(interval, int): if len(interval) > 1: multiplier = int(interval[:-1]) interval = interval[-1] base_interval = interval if interval in {'M', 'H', 'D'}: time_partition = True if interval == 'M': interval = 60 * multiplier elif interval == 'H': interval = 3600 * multiplier else: interval = 86400 * multiplier parquet_buffer = dict() while True: start, end = None, None try: aggregation_start = time.time() if time_partition: interval_start = aggregation_start if end: interval_start = end + timedelta(seconds=interval + 1) start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier) if 'exchanges' in self.config and self.config.exchanges: store = Storage(self.config, parquet_buffer=parquet_buffer) for exchange in self.config.exchanges: for dtype in self.config.exchanges[exchange]: # Skip over the retries arg in the config if present. if dtype in {'retries', 'channel_timeouts'}: continue for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in self.config.exchanges[exchange][dtype] else self.config.exchanges[exchange][dtype]['symbols']: LOG.info('Reading %s-%s-%s', exchange, dtype, pair) data = cache.read(exchange, dtype, pair, start=start, end=end) if len(data) == 0: LOG.info('No data for %s-%s-%s', exchange, dtype, pair) continue store.aggregate(data) retries = 0 while True: if retries > self.config.storage_retries: LOG.error("Failed to write after %d retries", self.config.storage_retries) raise EngineWriteError try: # retrying this is ok, provided every # engine clears its internal buffer after writing successfully. store.write(exchange, dtype, pair, time.time()) except OSError as e: if e.errno == 112: # Host is down LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e) retries += 1 await asyncio.sleep(self.config.storage_retry_wait) continue else: raise except EngineWriteError: retries += 1 await asyncio.sleep(self.config.storage_retry_wait) continue else: break cache.delete(exchange, dtype, pair) LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair) total = time.time() - aggregation_start wait = interval - total if wait <= 0: LOG.warning("Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval) wait = 0.5 await asyncio.sleep(wait) else: await asyncio.sleep(30) except Exception: LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True) raise
async def loop(self): if self.config.cache == 'redis': cache = Redis(ip=self.config.redis['ip'], port=self.config.redis['port'], socket=self.config.redis.socket, del_after_read=self.config.redis['del_after_read'], flush=self.config.redis['start_flush'], retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None) elif self.config.cache == 'kafka': cache = Kafka(self.config.kafka['ip'], self.config.kafka['port'], flush=self.config.kafka['start_flush']) interval = self.config.storage_interval time_partition = False multiplier = 1 if not isinstance(interval, int): if len(interval) > 1: multiplier = int(interval[:-1]) interval = interval[-1] base_interval = interval if interval in {'M', 'H', 'D'}: time_partition = True if interval == 'M': interval = 60 * multiplier elif interval == 'H': interval = 3600 * multiplier else: interval = 86400 * multiplier while True: start, end = None, None try: aggregation_start = time.time() if time_partition: interval_start = aggregation_start if end: interval_start = end + timedelta(seconds=interval + 1) start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier) if 'exchanges' in self.config and self.config.exchanges: for exchange in self.config.exchanges: for dtype in self.config.exchanges[exchange]: # Skip over the retries arg in the config if present. if dtype in {'retries', 'channel_timeouts'}: continue for pair in self.config.exchanges[exchange][ dtype] if 'symbols' not in self.config.exchanges[ exchange][ dtype] else self.config.exchanges[ exchange][dtype]['symbols']: store = Storage(self.config) LOG.info('Reading %s-%s-%s', exchange, dtype, pair) data = cache.read(exchange, dtype, pair, start=start, end=end) if len(data) == 0: LOG.info('No data for %s-%s-%s', exchange, dtype, pair) continue store.aggregate(data) store.write(exchange, dtype, pair, time.time()) cache.delete(exchange, dtype, pair) LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair) total = time.time() - aggregation_start wait = interval - total if wait <= 0: LOG.warning( "Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval) wait = 0.5 await asyncio.sleep(wait) else: await asyncio.sleep(30) except Exception: LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True) raise
async def loop(self): if self.config.cache == 'redis': cache = Redis(ip=self.config.redis['ip'], port=self.config.redis['port'], socket=self.config.redis.socket, del_after_read=self.config.redis['del_after_read'], flush=self.config.redis['start_flush'], retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None) elif self.config.cache == 'kafka': cache = Kafka(self.config.kafka['ip'], self.config.kafka['port'], flush=self.config.kafka['start_flush']) self.store = Storage(self.config) ### tmp interval = self.config.storage_interval time_partition = False multiplier = 1 if not isinstance(interval, int): if len(interval) > 1: multiplier = int(interval[:-1]) interval = interval[-1] base_interval = interval if interval in {'M', 'H', 'D'}: time_partition = True if interval == 'M': interval = 60 * multiplier elif interval == 'H': interval = 3600 * multiplier else: interval = 86400 * multiplier stats_cache = {} for exchange in self.config.exchanges: stats_cache[exchange] = {} for pair in self.config.exchanges[exchange][TRADES]: stats_cache[exchange][pair] = init_cache( InfluxConfig( db='crypto', host='http://localhost:8086', exchange=exchange, pair=pair, )) while True: start, end = None, None try: aggregation_start = time.time() if time_partition: interval_start = aggregation_start if end: interval_start = end + timedelta(seconds=interval + 1) start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier) if 'exchanges' in self.config and self.config.exchanges: data_arb = {} for exchange in self.config.exchanges: stats_all = [ ] ### Stats from each loop iter stored here data_all = [] ### Data... "" data_arb[exchange] = {} for dtype in self.config.exchanges[exchange]: # Skip over the retries arg in the config if present. if dtype in {'retries', 'channel_timeouts'}: continue # for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in \ # self.config.exchanges[exchange][ # dtype] else \ # self.config.exchanges[exchange][dtype]['symbols']: for pair in self.config.exchanges[exchange][ dtype]: ### tmp # store = Storage(self.config) LOG.info('Reading %s-%s-%s', exchange, dtype, pair) data = cache.read(exchange, dtype, pair, start=start, end=end) data_all.append(data) data_arb[exchange][pair] = data if len(data) == 0: LOG.info('No data for %s-%s-%s', exchange, dtype, pair) continue # if dtype == TRADES: stats_all.append( self.collect_stats( data, exchange, pair, stats_cache)) # LOG.info('HAVING trades') # stats_to_write = [] # for trade in data: # if 'id' not in trade: # trade['id'] = None # typed_trade = Trade(**trade) # update_stats(stats_cache[exchange][pair], typed_trade, stats_to_write) # LOG.info('DONE computing stats for %s-%s', exchange, pair) # store.aggregate(stats_to_write) # store.write(exchange, STATS, pair, time.time()) # # self.store.aggregate(data) # self.store.write(exchange, dtype, pair, time.time()) cache.delete(exchange, dtype, pair) # LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair) # self.store.aggregate(stats_all) self.store.write(exchange, STATS, pair, time.time()) if any(data_all): self.store.aggregate(data_all) self.store.write(exchange, dtype, pair, time.time()) if data_arb: await self.write_arbs(data_arb) # total = time.time() - aggregation_start wait = interval - total if wait <= 0: LOG.warning( "Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval) wait = 0.5 else: LOG.warning( f"Storage operations took {total}s, interval {interval}s" ) await asyncio.sleep(wait) else: await asyncio.sleep(30) except Exception: LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True) raise