def __init__(self, f_price): ''' A representation of a PriceLevel object ''' self.f_price = f_price self.i_qty = 0 self.order_tree = FastRBTree()
def __init__(self): self.ptree = FastRBTree() self.vol = 0 self.prmp = {} self.order_map = {} self.mip = None self.mxp = None
def __init__(self, cache_size_limit, trace, csv_suffix=".csv"): self.cache_size_limit = cache_size_limit self.cache = {} self.hits = 0.0 self.requests = 0.0 self.ts_order = ['row', 'hit'] self.ts_datapoint = {key: None for key in self.ts_order} self.ts_datapoint['row'] = 0 self.ts_file = open("csv/min" + csv_suffix, "w") self.ts_writer = csv.writer(self.ts_file) self.ts_writer.writerow(self.ts_order) self.clairvoyance = FastRBTree() self.precog = FastRBTree() last_time = time.time() for i, page_opcode in enumerate(trace): if time.time() > last_time + 0.1: last_time = time.time() print '1', i, '\r', sys.stdout.flush() page, _ = page_opcode try: self.precog[page].append(i) except KeyError: self.precog[page] = collections.deque() self.precog[page].append(i) known_max = i known_max += 2 for times in self.precog.values(): times.append(known_max) known_max += 1 print print 'Done loading.'
def __init__(self, max_depth, data): # RBTree: maintains sorted order of rates # every value inserted to RBTree must be a tuple, so we hard code the second value to be 0 self.rate_tree = FastRBTree() # dict: Uses rate and amount for key value pairs self.rate_dict = {} # float: amounts summed across all rate levels in tree self.volume = 0 # int: total number of rate levels in tree self.depth = len(data) # int: maximum number of rate levels in tree self.max_depth = max_depth # populate rate_tree and rate_dict from public API call data # set volume for level in data: rate = float(level[0]) amount = float(level[1]) self.rate_tree.insert(rate, 0) self.rate_dict[rate] = amount self.volume += amount
def __init__(self, env, i_id): ''' Initiate an Agent object. Save all parameters as attributes :param env: Environment Object. The Environment where the agent acts :param i_id: integer. Agent id ''' self.env = env self.i_id = i_id self.state = None self.done = False self.b_print_always = False self.position = {} self.ofi_acc = {} self.d_order_tree = {} self.d_order_map = {} self.d_trades = {} self.d_initial_pos = {} self.log_info = {'duration': 0., 'total_reward': 0.} self.learning = False # Whether the agent is expected to learn for s_instr in self.env.l_instrument: self.position[s_instr] = {'qAsk': 0., 'Ask': 0., 'qBid': 0., 'Bid': 0.} self.ofi_acc[s_instr] = {'qAsk': 0., 'Ask': 0., 'qBid': 0., 'Bid': 0.} self.d_order_tree[s_instr] = {'BID': FastRBTree(), 'ASK': FastRBTree()} self.d_order_map[s_instr] = {} self.d_trades[s_instr] = {'BID': [], 'ASK': []}
def reset(self, testing=False): ''' Reset the state and the agent's memory about its positions :param testing: boolean. If should freeze policy ''' self.state = None self.done = False self.position = {} self.d_order_tree = {} self.d_order_map = {} self.d_trades = {} self.log_info = {'duration': 0., 'total_reward': 0.} for s_instr in self.env.l_instrument: self.position[s_instr] = {'qAsk': 0., 'Ask': 0., 'qBid': 0., 'Bid': 0.} self.ofi_acc[s_instr] = {'qAsk': 0., 'Ask': 0., 'qBid': 0., 'Bid': 0.} self.d_order_tree[s_instr] = {'BID': FastRBTree(), 'ASK': FastRBTree()} self.d_order_map[s_instr] = {} self.d_trades[s_instr] = {'BID': [], 'ASK': []}
def test_add_symbol(): ST = SymbolTable("") T = FastRBTree() Content = {'Type': "int" , 'Attribute': None , 'TokenLocation': (10,2) } ST.InsertSymbol("age", Content) T.insert("age", Content) Content = {'Type': "float" , 'Attribute': 'static' , 'TokenLocation': (11,2) } ST.InsertSymbol("temperature", Content) T.insert("temperature", Content) Content = {'Type': "char" , 'Attribute': 'const' , 'TokenLocation': (12,2) } ST.InsertSymbol("letter", Content) T.insert("letter", Content) keys = ST.TopScope.keys() for key in keys: assert(T.__contains__(key)) assert(T.get(key) == ST.TopScope.get(key)) assert(T.get(key) is not ST.TopScope.get(key)) #write test to prove that the returned item is a pointer return
def __init__(self): self.price_tree = FastRBTree() self.volume = 0 self.price_map = {} # Map from price -> order_list object self.order_map = {} # Order ID to Order object self.min_price = None self.max_price = None
def __init__(self, name): self.tree = FastRBTree() self.name = name self.price_map = {} # Map price -> OrderList self.order_map = {} # Map order_id -> Order self.min_price = None self.max_price = None
class Node: def __init__(self, name): self.name = name self.children = {} self.visited = False self.ancestors = FastRBTree() def updateChildren(self, child, nameOfSequence): if child in self.children: self.children[child].add(nameOfSequence) else: self.children[child] = set() self.children[child].add(nameOfSequence) def getChild(self, nameOfChild): return filter(lambda x: x == nameOfChild, self.children.keys()) def updateAncestors(self, ancestors, nameOfSequence): for ancestor in ancestors: if self.name == ancestor.name: continue elementFromTree = self.ancestors.get(ancestor.name, None) if elementFromTree != None: elementFromTree.add(nameOfSequence) self.ancestors.__setitem__(ancestor.name, elementFromTree) else: # NIE JE V SEK tmpValue = set() tmpValue.add(nameOfSequence) self.ancestors.insert(ancestor.name, tmpValue)
def find_top_k_with_FastRBTree(filename = TDATA, k = 10): """ Profile result: 5 million strings: memory consuming: 259 MB time consuming: 39.9689998627 [(753, 'bf'), (753, 'qj'), (753, 'zb'), (753, 'vz'), (763, 'ma'), (755, 'lx'), (779, 'qp'), (768, 'bg'), (758, 'eq'), (767, 'tf')] """ result = [] t = FastRBTree() with open(filename) as f: for line in f: key = line.strip() t[key] = t.setdefault(key, 0) + 1 # heapq for key, val in t.iter_items(): if len(result) < k: heapq.heappush(result, (val, key)) else: heapq.heappushpop(result, (val, key)) return result
def __init__(self, product_ids='ETH-USD', api_key=None, api_secret=None, passphrase=None, use_heartbeat=False, trade_log_file_path=None): if api_key is not None: self._authenticated = True self.api_key = api_key self.api_secret = api_secret self.passphrase = passphrase else: self._authenticated = False if not isinstance(product_ids, list): product_ids = [product_ids] self.product_ids = product_ids self.use_heartbeat = use_heartbeat self.trade_log_file_path = trade_log_file_path self._trade_file = None self.traders = { product_id: gdax.trader.Trader(product_id=product_id) for product_id in product_ids } self._asks = {product_id: FastRBTree() for product_id in product_ids} self._bids = {product_id: FastRBTree() for product_id in product_ids} self._sequences = {product_id: None for product_id in product_ids} self._ws_session = None self._ws_connect = None self._ws = None
def process_snapshot(self, message: Dict): """ Process a snapshot message :param message: json """ # If a snapshot is sent reset trees self._asks = RBTree() self._bids = RBTree() # Parse all asks and add them to tree for ask in message['asks']: price, size = ask price = Decimal(price) size = Decimal(size) self._asks.insert(price, size) # Parse all bids and add them to tree for bid in message['bids']: price, size = bid price = Decimal(price) size = Decimal(size) self._bids.insert(price, size)
def __init__(self, SourceFile): self.Table = [ ] #declare table as a stack (list) containing an empty tree self.TopScope = FastRBTree( ) # a place where the current top scope is held self.ReadMode = False #Read or lookup mode self.DebugMode = False self.SourceFile = SourceFile
def reset(self): ''' Reset the state and the agent's memory about its positions ''' self.state = None self.position = {'qAsk': 0., 'Ask': 0., 'qBid': 0., 'Bid': 0.} self.d_order_tree = {'BID': FastRBTree(), 'ASK': FastRBTree()} self.d_order_map = {}
def reset(self): ''' Reset the state and the agent's memory about its positions ''' self.state = None self.position = {'qAsk': 0, 'Ask': 0., 'qBid': 0, 'Bid': 0.} self.d_order_tree = {'BID': FastRBTree(), 'ASK': FastRBTree()} self.d_order_map = {} # Reset any variables here, if required self.next_time = 0.
class Tree(object): def __init__(self): self.priceTree = FastRBTree() self.volume = 0 self.priceMap = {} # Map from price -> orderList object self.orderMap = {} # Order ID to Order object def __len__(self): return len(self.orderMap) def getPrice(self, price): return self.priceMap[price] def getOrder(self, idNum): return self.orderMap[idNum] def createPrice(self, price): newList = OrderList() self.priceTree.insert(price, newList) self.priceMap[price] = newList def removePrice(self, price): self.priceTree.remove(price) del self.priceMap[price] def priceExists(self, price): return price in self.priceMap def orderExists(self, idNum): return idNum in self.orderMap def insertTick(self, tick): if tick.price not in self.priceMap: self.createPrice(tick.price) order = Order(tick, self.priceMap[tick.price]) self.priceMap[order.price].appendOrder(order) self.orderMap[order.idNum] = order self.volume += order.qty def updateOrder(self, tick): order = self.orderMap[tick.idNum] originalVolume = order.qty if tick.price != order.price: # Price changed orderList = self.priceMap[order.price] orderList.removeOrder(order) if len(orderList) == 0: removePrice(order.price) self.insertTick(tick) else: # Quantity changed order.updateQty(tick.qty,tick.price) self.volume += order.qty - originalVolume def removeOrderById(self, idNum): order = self.orderMap[idNum] self.volume -= order.qty order.orderList.removeOrder(order) if len(order.orderList) == 0: self.removePrice(order.price) del self.orderMap[idNum] def max(self): return min(self.priceTree) def min(self): return max(self.priceTree)
def __init__(self, cache_entries_limit, ghost_entries_limit, trace_size_limit, csv_suffix="_mmc.csv"): self.full_cache = FastRBTree() self.was_hit = None self.was_ghost_hit = None self.num_hits = 0 self.num_requests = 0 self.cache_entries_limit = cache_entries_limit self.ghost_entries_limit = ghost_entries_limit self.trace_size_limit = trace_size_limit self.trace = collections.deque() self.stack = RBTree() self.ranker = RBTree() self.generation = 0 # During startup, this will act like an LRU. self.startup = True self.EM_period = 50 * int(np.ceil(np.log(trace_size_limit))) self.countdown_to_EM = trace_size_limit // 2 self.tau = [0.25, 0.25, 0.25, 0.25] self.theta = [0.5, 0.5, 0.5, 0.5] self.acc_tau = [0.0, 0.0, 0.0, 0.0] self.acc_theta = [0.0, 0.0, 0.0, 0.0] self.num_in_cache = 0 self.num_in_full_cache = 0 self.num_reads = 0 self.csv_suffix = csv_suffix self.ts_order = [ 'row', 'hit', 'ghost_hit', 'tau_R_SDD', 'tau_R_IRM', 'tau_W_SDD', 'tau_W_IRM', 'theta_R_SDD', 'theta_R_IRM', 'theta_W_SDD', 'theta_W_IRM', 'depth', 'rank', 'Z_R_SDD', 'Z_R_IRM', 'Z_W_SDD', 'Z_W_IRM', 'Z_sum' ] self.ts_datapoint = {key: None for key in self.ts_order} self.ts_datapoint['row'] = 0 self.ts_file = open("csv/mmc_rw" + self.csv_suffix, "w") self.ts_writer = csv.writer(self.ts_file) self.ts_writer.writerow(self.ts_order) self.evict_order = ['row', 'depth', 'rank', 'value', 'opcode'] self.evict_datapoint = {key: None for key in self.evict_order} self.evict_datapoint['row'] = 0 self.evict_file = open("csv/mmc_rw_evict" + self.csv_suffix, "w") self.evict_writer = csv.writer(self.evict_file) self.evict_writer.writerow(self.evict_order) self.purge_order = ['row', 'depth', 'rank', 'value', 'opcode'] self.purge_datapoint = {key: None for key in self.purge_order} self.purge_datapoint['row'] = 0 self.purge_file = open("csv/mmc_rw_purge" + self.csv_suffix, "w") self.purge_writer = csv.writer(self.purge_file) self.purge_writer.writerow(self.purge_order)
def solve(n, k): tree = FastRBTree() tree.insert(n, 1) ls = rs = n for i in range(k): key, val = tree.max_item() tree.remove(key) if val > 1: tree.insert(key, val - 1) if key % 2 == 1: key //= 2 ls = rs = key update_tree(tree, key) update_tree(tree, key) else: key //= 2 ls = key rs = key - 1 update_tree(tree, ls) update_tree(tree, rs) return str(ls) + " " + str(rs)
class Tree(object): def __init__(self): self.price_tree = FastRBTree() self.price_map = {} self.order_map = {} self.received_orders = {} def receive(self, order_id, size): self.received_orders[order_id] = size def create_price(self, price): new_list = [] self.price_tree.insert(price, new_list) self.price_map[price] = new_list def remove_price(self, price): self.price_tree.remove(price) del self.price_map[price] def insert_order(self, order_id, size, price, initial=False): if not initial: del self.received_orders[order_id] if price not in self.price_map: self.create_price(price) order = { 'order_id': order_id, 'size': size, 'price': price, 'price_map': self.price_map[price] } self.price_map[price].append(order) self.order_map[order_id] = order def match(self, maker_order_id, match_size): order = self.order_map[maker_order_id] original_size = order['size'] new_size = original_size - match_size order['size'] = new_size def change(self, order_id, new_size): order = self.order_map[order_id] order['size'] = new_size def remove_order(self, order_id): if order_id in self.order_map: order = self.order_map[order_id] self.price_map[order['price']] = [ o for o in self.price_map[order['price']] if o['order_id'] != order_id ] if not self.price_map[order['price']]: self.remove_price(order['price']) del self.order_map[order_id] else: del self.received_orders[order_id]
def __init__(self, env, i_id): ''' Initiate an Agent object. Save all parameters as attributes :param env: Environment Object. The Environment where the agent acts :param i_id: integer. Agent id ''' self.env = env self.i_id = i_id self.state = None self.position = {'qAsk': 0., 'Ask': 0., 'qBid': 0., 'Bid': 0.} self.d_order_tree = {'BID': FastRBTree(), 'ASK': FastRBTree()} self.d_order_map = {}
def __init__(self, s_side): ''' Initialize a BookSide object. Save all parameters as attributes :param s_side: string. BID or ASK ''' if s_side not in ['BID', 'ASK']: raise InvalidTypeException('side should be BID or ASK') self.s_side = s_side self.price_tree = FastRBTree() self._i_idx = 0 self.d_order_map = {} self.last_price = 0.
def test_push_scope(): ST = SymbolTable("") T = FastRBTree() T2 = FastRBTree() ST.PushScope(T) assert( len(ST.Table) == 1 ) ST.PushScope(T2) assert( len(ST.Table) == 2 ) return
def __init__(self): self.priceTree = FastRBTree() self.volume = 0 self.priceMap = {} # Map from price -> orderList object self.orderMap = {} # Order ID to Order object self.minPrice = None self.maxPrice = None
def __init__(self, delta=0.01, K=25, CX=1.1): self.delta = delta self.K = K self.CX = CX self.centroids = RBTree() self.nreset = 0 self.reset()
def __init__(self): self.tree = FastRBTree() self.volume = 0 self.price_map = {} self.order_map = {} self.min_price = None self.max_price = None
class Tree(object): def __init__(self): self.price_tree = FastRBTree() self.price_map = {} self.order_map = {} self.received_orders = {} def receive(self, order_id, size): self.received_orders[order_id] = size def create_price(self, price): new_list = [] self.price_tree.insert(price, new_list) self.price_map[price] = new_list def remove_price(self, price): self.price_tree.remove(price) del self.price_map[price] def insert_order(self, order_id, size, price, initial=False): if not initial: del self.received_orders[order_id] if price not in self.price_map: self.create_price(price) order = {'order_id': order_id, 'size': size, 'price': price, 'price_map': self.price_map[price]} self.price_map[price].append(order) self.order_map[order_id] = order def match(self, maker_order_id, match_size): order = self.order_map[maker_order_id] original_size = order['size'] new_size = original_size - match_size order['size'] = new_size def change(self, order_id, new_size): order = self.order_map[order_id] order['size'] = new_size def remove_order(self, order_id): if order_id in self.order_map: order = self.order_map[order_id] self.price_map[order['price']] = [o for o in self.price_map[order['price']] if o['order_id'] != order_id] if not self.price_map[order['price']]: self.remove_price(order['price']) del self.order_map[order_id] else: del self.received_orders[order_id]
class Tree(object): def __init__(self): self.price_tree = FastRBTree() self.price_map = {} # Map from price -> order_list object self.order_map = {} # Order ID to Order object self.received_orders = {} def receive(self, order_id, size): self.received_orders[order_id] = size def create_price(self, price): new_list = OrderList() self.price_tree.insert(price, new_list) self.price_map[price] = new_list def remove_price(self, price): self.price_tree.remove(price) del self.price_map[price] def insert_order(self, order_id, size, price, initial=False): if not initial: del self.received_orders[order_id] if price not in self.price_map: self.create_price(price) order = Order(order_id, size, price, self.price_map[price]) self.price_map[order.price].append_order(order) self.order_map[order.order_id] = order def match(self, maker_order_id, size): order = self.order_map[maker_order_id] original_size = order.size new_size = original_size - size order.update_size(new_size) def change(self, order_id, new_size): order = self.order_map[order_id] order.update_size(new_size) def remove_order(self, order_id): if order_id in self.order_map: order = self.order_map[order_id] order.order_list.remove_order(order) if len(order.order_list) == 0: self.remove_price(order.price) del self.order_map[order_id] else: del self.received_orders[order_id]
def __init__(self): ''' Limit order book tree implementation using Red-Black tree for self-balancing Each limit price level is a OrderLinkedlist, and each order contains information including id, price, timestamp, volume self.limit_level: dict key: price level; value: OrderLinkedlist object self.order_ids: dict key: order id; value: Order object helps to locate order by id ''' # tree that store price as keys and number of orders on that level as values self.price_tree = FastRBTree() self.max_price = None self.min_price = None self.limit_levels = {} self.order_ids = {}
def __init__(self, s_side, fr_data, i_member=None): ''' Initialize a BookSide object. Save all parameters as attributes :param s_side: string. BID or ASK :param fr_data: ZipExtFile object. data to read :param i_member*: integer. Member number to be used as a filter ''' if s_side not in ['BID', 'ASK']: raise InvalidTypeException('side should be BID or ASK') self.i_member = i_member self.s_side = s_side self.price_tree = FastRBTree() self._i_idx = 0 self.fr_data = fr_data self.parser = parser_data.LineParser(s_side) self.d_order_map = {} self.last_price = 0.
def run(self): for p in range(self.N): if self.processed[p]: continue self.processed[p] = True self.order.append(p) seeds = FastRBTree() self._update(p, seeds) while seeds: # pop_min returns ( (reachability, q), q ). _, q = seeds.pop_min() self.processed[q] = True self.order.append(q) self._update(q, seeds) start = timer() self._extract_cluster_xi() print("extract cluster took", timer() - start)
def __init__(self, node_timeout): self._logger = logging.getLogger(self.__class__.__name__) self._nodes = {} self._sessions = {} self._sessions_by_owner = {} self._keepalive_queue = FastRBTree() self._priority_queue = FastRBTree() self._node_timeout = node_timeout self._culling_timer = runtime.greenpool.spawn(self._cull_dead_nodes)
def __init__(self, product_ids='ETH-USD', api_key=None, api_secret=None, passphrase=None, use_heartbeat=False, trade_log_file_path=None): super().__init__(product_ids=product_ids, api_key=api_key, api_secret=api_secret, passphrase=passphrase, use_heartbeat=use_heartbeat, trade_log_file_path=trade_log_file_path) if not isinstance(product_ids, list): product_ids = [product_ids] self.traders = {product_id: gdax.trader.Trader(product_id=product_id) for product_id in product_ids} self._asks = {product_id: FastRBTree() for product_id in product_ids} self._bids = {product_id: FastRBTree() for product_id in product_ids} self._sequences = {product_id: None for product_id in product_ids}
def __init__(self, cache_entries_limit, ghost_entries_limit, trace_size_limit, csv_suffix="_mmc.csv"): self.full_cache = FastRBTree() self.was_hit = None self.was_ghost_hit = None self.num_hits = 0 self.num_requests = 0 self.cache_entries_limit = cache_entries_limit self.ghost_entries_limit = ghost_entries_limit self.trace_size_limit = trace_size_limit self.trace = collections.deque() self.stack = RBTree() self.ranker = RBTree() self.generation = 0 # During startup, this will act like an LRU. self.startup = True self.EM_period = 50 * int(np.ceil(np.log(trace_size_limit))) self.countdown_to_EM = trace_size_limit // 2 self.tau = [0.25, 0.25, 0.25, 0.25] self.theta = [0.5, 0.5, 0.5, 0.5] self.acc_tau = [0.0, 0.0, 0.0, 0.0] self.acc_theta = [0.0, 0.0, 0.0, 0.0] self.num_in_cache = 0 self.num_in_full_cache = 0 self.num_reads = 0 self.csv_suffix = csv_suffix self.ts_order = [ 'row', 'hit', 'ghost_hit', 'tau_R_SDD', 'tau_R_IRM', 'tau_W_SDD', 'tau_W_IRM', 'theta_R_SDD', 'theta_R_IRM', 'theta_W_SDD', 'theta_W_IRM', 'depth', 'rank', 'Z_R_SDD', 'Z_R_IRM', 'Z_W_SDD', 'Z_W_IRM', 'Z_sum' ] self.ts_datapoint = {key: None for key in self.ts_order} self.ts_datapoint['row'] = 0 self.ts_file = open("csv/mmc_rw" + self.csv_suffix, "w") self.ts_writer = csv.writer(self.ts_file) self.ts_writer.writerow(self.ts_order) self.evict_order = [ 'row', 'depth', 'rank', 'value', 'opcode'] self.evict_datapoint = {key: None for key in self.evict_order} self.evict_datapoint['row'] = 0 self.evict_file = open("csv/mmc_rw_evict" + self.csv_suffix, "w") self.evict_writer = csv.writer(self.evict_file) self.evict_writer.writerow(self.evict_order) self.purge_order = ['row', 'depth', 'rank', 'value', 'opcode'] self.purge_datapoint = {key: None for key in self.purge_order} self.purge_datapoint['row'] = 0 self.purge_file = open("csv/mmc_rw_purge" + self.csv_suffix, "w") self.purge_writer = csv.writer(self.purge_file) self.purge_writer.writerow(self.purge_order)
def __init__(self, protocol, debug=False): ''' Constructs an order book which will be a client to the given blob protocol. :param protocol: blob protocol to which this will be a client :param debug: flag to print debug message ''' BlobClient.__init__(self, protocol) self.debug = debug # A book can forward messages to multiple BookClients. self.clients = [] # Track sequence number for blob protocol. self.sequence = None # Internal data structures for storing orders. self.orders_by_id = {} self.bids = FastRBTree() self.asks = FastRBTree()
def GetLeastNumbers2(l,k): count = 0 tree = FastRBTree() for i in l: if count <k: tree.insert(i,i) count += 1 else: maxVal = max(tree) if i < maxVal: tree.remove(maxVal) tree.insert(i,i) datas = [item for item in tree] return datas
def createSequenceDB(): file = open(sys.argv[1], "r") db = {} for line in file: alert = parseLineOfJSON(line) if alert == None: continue if alert[ 'IP'] not in db: # AK NIE JE ESTE V DB SEKVIENCIA (IP ADDRESSA) db[alert['IP']] = FastRBTree() isThereNode = db[alert['IP']].get( alert['Time'], None) #AK UZ V TOM CASE JE NIAKA UDALOST V SEKVENCII if isThereNode != None: if alert['alert'] not in isThereNode: isThereNode.append(alert['alert']) else: db[alert['IP']].insert(alert['Time'], [alert['alert']]) return db
class OrderTree(object): def __init__(self): self.price_tree = FastRBTree() self.price_map = {} self.min_price = None self.max_price = None def insert_price(self, price, amount): self.price_tree.insert(price, amount) self.price_map[price] = amount if self.max_price == None or price > self.max_price: self.max_price = price if self.min_price == None or price < self.min_price: self.min_price = price def update_price(self, price, amount): self.price_tree.insert(price, amount) #updates if key exists self.price_map[price] = amount def remove_price(self, price): self.price_tree.remove(price) del self.price_map[price] if self.max_price == price: try: self.max_price = max(self.price_tree) except ValueError: self.max_price = None if self.min_price == price: try: self.min_price = min(self.price_tree) except ValueError: self.min_price = None def price_exists(self, price): return price in self.price_map def max(self): return self.max_price def min(self): return self.min_price
class Hyperedge: def __init__(self, hyperkey, col, hlabel): self.hyperkey = hyperkey self.col = col self._alerts = Tree() self.insert_alert(hlabel, 1) self.nalerts = 1 def get_alert(self, key): return self._alerts.get(key) def insert_alert(self, alert_key, count): self._alerts.insert(alert_key, count) def foreach_alert(self, func): self._alerts.foreach(func) def pop_alert(self, key): return self._alerts.pop(key)
class MMCRWPolicy(object): def __init__(self, cache_entries_limit, ghost_entries_limit, trace_size_limit, csv_suffix="_mmc.csv"): self.full_cache = FastRBTree() self.was_hit = None self.was_ghost_hit = None self.num_hits = 0 self.num_requests = 0 self.cache_entries_limit = cache_entries_limit self.ghost_entries_limit = ghost_entries_limit self.trace_size_limit = trace_size_limit self.trace = collections.deque() self.stack = RBTree() self.ranker = RBTree() self.generation = 0 # During startup, this will act like an LRU. self.startup = True self.EM_period = 50 * int(np.ceil(np.log(trace_size_limit))) self.countdown_to_EM = trace_size_limit // 2 self.tau = [0.25, 0.25, 0.25, 0.25] self.theta = [0.5, 0.5, 0.5, 0.5] self.acc_tau = [0.0, 0.0, 0.0, 0.0] self.acc_theta = [0.0, 0.0, 0.0, 0.0] self.num_in_cache = 0 self.num_in_full_cache = 0 self.num_reads = 0 self.csv_suffix = csv_suffix self.ts_order = [ 'row', 'hit', 'ghost_hit', 'tau_R_SDD', 'tau_R_IRM', 'tau_W_SDD', 'tau_W_IRM', 'theta_R_SDD', 'theta_R_IRM', 'theta_W_SDD', 'theta_W_IRM', 'depth', 'rank', 'Z_R_SDD', 'Z_R_IRM', 'Z_W_SDD', 'Z_W_IRM', 'Z_sum' ] self.ts_datapoint = {key: None for key in self.ts_order} self.ts_datapoint['row'] = 0 self.ts_file = open("csv/mmc_rw" + self.csv_suffix, "w") self.ts_writer = csv.writer(self.ts_file) self.ts_writer.writerow(self.ts_order) self.evict_order = [ 'row', 'depth', 'rank', 'value', 'opcode'] self.evict_datapoint = {key: None for key in self.evict_order} self.evict_datapoint['row'] = 0 self.evict_file = open("csv/mmc_rw_evict" + self.csv_suffix, "w") self.evict_writer = csv.writer(self.evict_file) self.evict_writer.writerow(self.evict_order) self.purge_order = ['row', 'depth', 'rank', 'value', 'opcode'] self.purge_datapoint = {key: None for key in self.purge_order} self.purge_datapoint['row'] = 0 self.purge_file = open("csv/mmc_rw_purge" + self.csv_suffix, "w") self.purge_writer = csv.writer(self.purge_file) self.purge_writer.writerow(self.purge_order) def request(self, page, opcode): self.num_requests += 1 self.was_hit = False self.was_ghost_hit = False node = self.get_node(page) if node: self.was_ghost_hit = True if not node.is_evicted: self.num_hits += 1 self.was_hit = True Z = self.calculate_Z(node.depth, node.rank, node.opcode) node.hit_count += Z[R_IRM] + Z[W_IRM] else: node = Node(self) node.hit_count = self.tau[R_IRM] + self.tau[W_IRM] node.page_key = page self.full_cache[page] = node if not self.was_hit: self.num_in_cache += 1 if not self.was_ghost_hit: self.num_in_full_cache += 1 else: if node.opcode == 'r': self.num_reads -= 1 if opcode == 'r': self.num_reads += 1 node.is_evicted = node.is_purged = False record = Record(self, node) self.add_trace_record(record) node.opcode = opcode if len(self.trace) > self.trace_size_limit: popped_record = self.trace.popleft() self.update_tau_and_theta_accs(record, increment=True) self.update_tau_and_theta_accs(popped_record, increment=False) self.refresh_params() popped_record.node.hit_count -= popped_record.Z[R_IRM] popped_record.node.hit_count -= popped_record.Z[W_IRM] node.restack() node.rerank() self.countdown_to_EM -= 1 if self.countdown_to_EM == 0: self.EM_algorithm(delta=0.00001) self.countdown_to_EM = self.EM_period self.startup = False if ( self.num_in_cache > self.cache_entries_limit or self.num_in_full_cache > self.cache_entries_limit + self.ghost_entries_limit ): self.pageout() #dump_cache(self, "exp") def add_trace_record(self, record): self.ts_datapoint['row'] = self.num_requests if self.was_hit: self.ts_datapoint['hit'] = 1 else: self.ts_datapoint['hit'] = 0 if self.was_ghost_hit: self.ts_datapoint['ghost_hit'] = 1 else: self.ts_datapoint['ghost_hit'] = 0 self.ts_datapoint['tau_R_SDD'] = self.tau[R_SDD] self.ts_datapoint['tau_R_IRM'] = self.tau[R_IRM] self.ts_datapoint['tau_W_SDD'] = self.tau[W_SDD] self.ts_datapoint['tau_W_IRM'] = self.tau[W_IRM] self.ts_datapoint['theta_R_SDD'] = self.theta[R_SDD] self.ts_datapoint['theta_R_IRM'] = self.theta[R_IRM] self.ts_datapoint['theta_W_SDD'] = self.theta[W_SDD] self.ts_datapoint['theta_W_IRM'] = self.theta[W_IRM] self.ts_datapoint['Z_R_SDD'] = record.Z[R_SDD] self.ts_datapoint['Z_R_IRM'] = record.Z[R_IRM] self.ts_datapoint['Z_W_SDD'] = record.Z[W_SDD] self.ts_datapoint['Z_W_IRM'] = record.Z[W_IRM] self.ts_datapoint['Z_sum'] = sum(record.Z) self.ts_datapoint['depth'] = record.depth self.ts_datapoint['rank'] = record.node.rank self.ts_writer.writerow( [self.ts_datapoint[key] for key in self.ts_order]) self.ts_file.flush() self.trace.append(record) def pageout(self): min_node = None min_node_value = None min_ghost = None min_ghost_value = None for depth, node in enumerate(self.stack.values()): node.depth_memo = depth for rank, node in enumerate(self.ranker.values()): node.recompute_expected_value(depth=node.depth_memo, rank=rank) value = node.expected_value if not node.is_evicted: if min_node is None or value < min_node_value: min_node = node min_node_value = value if min_ghost is None or value < min_ghost_value: min_ghost = node min_ghost_value = value if self.num_in_cache > self.cache_entries_limit: self.evict(min_node) if ( self.num_in_full_cache > self.cache_entries_limit + self.ghost_entries_limit ): self.purge(min_ghost) def EM_algorithm(self, delta): def abs_sum(): return sum(self.tau) + sum(self.theta) before = delta + 4.0 i = 0 # We need to detect if we're in a "nonsense" local optimum. The # algorithm will optimize to the global maximum if we aren't in one of # these cases. if (self.startup or min(self.tau) < 0.00001 or min(self.theta) < 0.00001 ): use_hard_Z = True else: use_hard_Z = False while abs(before - abs_sum()) > delta: before = abs_sum() hard_Z = [0.25, 0.25, 0.25, 0.25] if use_hard_Z and i == 0 else None self.E_step(hard_Z=hard_Z) i += 1 self.M_step() # Since we are rearranging the ranks, it's possible that we can # get into a situation where the ranks shift in a cycle such # that the tau delta is always exeeded. I've only seen this limit # hit when the trace size is very small (e.g. 10). if i > 50: break def E_step(self, hard_Z=None): """Treat self.tau and self.theta as constants.""" for node in self.full_cache.values(): node._hit_count = 0.0 for record in self.trace: if hard_Z is None: if record.node.is_purged: rank = record.node.rank_purge_memo else: rank = record.node.rank record._Z = self.calculate_Z(record.depth, rank, record.opcode) else: record._Z = hard_Z record.node._hit_count += record._Z[R_IRM] + record._Z[W_IRM] new_ranker = RBTree() for node in self.full_cache.values(): node.ranker_key = node.new_ranker_key() new_ranker[node.ranker_key] = node self.ranker = new_ranker def M_step(self): """Treat Record.Z as constant.""" self.acc_tau = [0.0 for d in range(D)] self.acc_theta = [0.0 for d in range(D)] for record in self.trace: self.update_tau_and_theta_accs(record, increment=True) self.refresh_params() def calculate_Z(self, depth, rank, opcode): Z = [0.0 for d in range(D)] H = [depth, rank, depth, rank] def num_on_hit(i): return (self.tau[i] * self.theta[i] * (1 - self.theta[i])**H[i]) def den_on_hit(i, j): acc = 0.0 for x in [i, j]: acc += num_on_hit(x) return acc if opcode is None: num = [0.0 for d in range(D)] for i in range(D): num[i] = num_on_hit(i) den = sum(num) return [n / den for n in num] elif opcode is 'r': num = [num_on_hit(R_SDD), num_on_hit(R_IRM)] den = den_on_hit(R_SDD, R_IRM) try: return [num[0] / den, num[1] / den, 0.0, 0.0] except ZeroDivisionError: return [0.5, 0.5, 0.0, 0.0] elif opcode is 'w': num = [num_on_hit(W_SDD), num_on_hit(W_IRM)] den = den_on_hit(W_SDD, W_IRM) try: return [0.0, 0.0, num[0] / den, num[1] / den] except ZeroDivisionError: return [0.0, 0.0, 0.5, 0.5] def refresh_params(self): R = len(self.trace) self.tau = [self.acc_tau[d] / R for d in range(D)] self.theta = [0.0, 0.0, 0.0, 0.0] for d in range(D): try: self.theta[d] = (R * self.tau[d] / (R * self.tau[d] + self.acc_theta[d])) except ZeroDivisionError as err: pass def _update_tau_and_theta_accs(self, Z, depth, rank, increment=True): H = [depth, rank, depth, rank] if increment: self.acc_tau = [self.acc_tau[d] + Z[d] for d in range(D)] self.acc_theta = [self.acc_theta[d] + Z[d] * H[d] for d in range(D)] else: self.acc_tau = [self.acc_tau[d] - Z[d] for d in range(D)] self.acc_theta = [max(0.0, self.acc_theta[d] - Z[d] * H[d]) for d in range(D)] def update_tau_and_theta_accs(self, record, increment=True): if record.node.is_purged: rank = record.node.rank_purge_memo else: rank = record.node.rank self._update_tau_and_theta_accs(record.Z, record.depth, rank, increment) def evict(self, node): self.evict_datapoint['row'] += 1 self.evict_datapoint['depth'] = node.depth self.evict_datapoint['rank'] = node.rank self.evict_datapoint['value'] = node.expected_value self.evict_datapoint['opcode'] = node.opcode self.evict_writer.writerow( [self.evict_datapoint[key] for key in self.evict_order]) self.evict_file.flush() self.num_in_cache -= 1 node.is_evicted = True def purge(self, node): self.purge_datapoint['row'] += 1 self.purge_datapoint['depth'] = node.depth self.purge_datapoint['rank'] = node.rank self.purge_datapoint['value'] = node.expected_value self.purge_datapoint['opcode'] = node.opcode self.purge_writer.writerow( [self.purge_datapoint[key] for key in self.purge_order]) self.purge_file.flush() self.num_in_full_cache -= 1 if node.opcode == 'r': self.num_reads -= 1 node.purge() @property def cache_list(self): return filter(lambda node: not node.is_evicted, self.full_cache_list) @property def full_cache_list(self): return list(self.full_cache.values()) def hit_rate(self): return float(self.num_hits) / self.num_requests def get_node(self, page): try: node = self.full_cache[page] return node except KeyError: return None
class MMCPolicy(object): def __init__(self, cache_entries_limit, ghost_entries_limit, trace_size_limit, csv_suffix="_mmc.csv", draw_dump=False): self.full_cache = FastRBTree() self.was_hit = None self.was_ghost_hit = None self.num_hits = 0 self.num_requests = 0 self.cache_entries_limit = cache_entries_limit self.ghost_entries_limit = ghost_entries_limit self.trace_size_limit = trace_size_limit self.trace = collections.deque() self.stack = RBTree() self.ranker = RBTree() self.generation = 0 # During startup, this will act like an LRU. self.startup = True self.EM_period = 50 * int(np.ceil(np.log(trace_size_limit))) self.countdown_to_EM = trace_size_limit // 2 self.tau = [0.5, 0.5] self.theta = [0.5, 0.5] self.acc_tau = [0.0] self.acc_theta = [0.0, 0.0] self.num_in_cache = 0 self.num_in_full_cache = 0 self.csv_suffix = csv_suffix self.draw_dump = draw_dump self.ts_order = [ 'row', 'hit', 'ghost_hit', 'tau', 'theta0', 'theta1', 'Z', 'depth', 'rank'] self.ts_datapoint = {key: None for key in self.ts_order} self.ts_datapoint['row'] = 0 self.ts_file = open("csv/mmc" + self.csv_suffix, "w") self.ts_writer = csv.writer(self.ts_file) self.ts_writer.writerow(self.ts_order) self.evict_order = [ 'row', 'depth', 'rank', 'value', 'Z', 'tau'] self.evict_datapoint = {key: None for key in self.evict_order} self.evict_datapoint['row'] = 0 self.evict_file = open("csv/mmc_evict" + self.csv_suffix, "w") self.evict_writer = csv.writer(self.evict_file) self.evict_writer.writerow(self.evict_order) self.purge_order = ['row', 'depth', 'rank', 'value', 'Z'] self.purge_datapoint = {key: None for key in self.purge_order} self.purge_datapoint['row'] = 0 self.purge_file = open("csv/mmc_purge" + self.csv_suffix, "w") self.purge_writer = csv.writer(self.purge_file) self.purge_writer.writerow(self.purge_order) def request(self, page): self.num_requests += 1 self.was_hit = False self.was_ghost_hit = False node = self.get_node(page) if node: self.was_ghost_hit = True if not node.is_evicted: self.num_hits += 1 self.was_hit = True node.hit_count += 1.0 - self.calculate_Z(node.depth, node.rank) else: node = Node(self) node.hit_count = self.tau[1] node.page_key = page self.full_cache[page] = node if not self.was_hit: self.num_in_cache += 1 if not self.was_ghost_hit: self.num_in_full_cache += 1 node.is_evicted = node.is_purged = False record = Record(self, node) self.add_trace_record(record) if len(self.trace) > self.trace_size_limit: popped_record = self.trace.popleft() self.update_tau_and_theta_accs(record, increment=True) self.update_tau_and_theta_accs(popped_record, increment=False) self.refresh_params() popped_record.node.hit_count -= 1.0 - popped_record.Z node.restack() node.rerank() self.countdown_to_EM -= 1 if self.countdown_to_EM == 0: self.EM_algorithm(delta=0.00001) self.countdown_to_EM = self.EM_period self.startup = False if ( self.num_in_cache > self.cache_entries_limit or self.num_in_full_cache > self.cache_entries_limit + self.ghost_entries_limit ): self.pageout() if self.draw_dump: dump_cache(self, self.csv_suffix) def add_trace_record(self, record): self.ts_datapoint['row'] = self.num_requests if self.was_hit: self.ts_datapoint['hit'] = 1 else: self.ts_datapoint['hit'] = 0 if self.was_ghost_hit: self.ts_datapoint['ghost_hit'] = 1 else: self.ts_datapoint['ghost_hit'] = 0 self.ts_datapoint['tau'] = self.tau[0] self.ts_datapoint['theta0'] = self.theta[0] self.ts_datapoint['theta1'] = self.theta[1] depth = record.depth self.ts_datapoint['depth'] = depth self.ts_datapoint['rank'] = record.node.rank self.ts_datapoint['Z'] = record.Z self.ts_writer.writerow( [self.ts_datapoint[key] for key in self.ts_order]) self.ts_file.flush() self.trace.append(record) def pageout(self): min_node = None min_node_value = None min_ghost = None min_ghost_value = None for depth, node in enumerate(self.stack.values()): node.depth_memo = depth for rank, node in enumerate(self.ranker.values()): node.recompute_expected_value(depth=node.depth_memo, rank=rank) value = node.expected_value if not node.is_evicted: if min_node is None or value < min_node_value: min_node = node min_node_value = value if min_ghost is None or value < min_ghost_value: min_ghost = node min_ghost_value = value if self.num_in_cache > self.cache_entries_limit: self.evict(min_node) if ( self.num_in_full_cache > self.cache_entries_limit + self.ghost_entries_limit ): self.purge(min_ghost) def EM_algorithm(self, delta): def abs_sum(): return abs(self.tau[0]) + abs(self.theta[0]) + abs(self.theta[1]) before = delta + 4.0 i = 0 # We need to detect if we're in a "nonsense" local optimum. The # algorithm will optimize to the global maximum if we aren't in one of # these cases. if (self.startup or self.tau[0] == 0.0 or self.tau[0] == 1.0 or self.theta[0] == 0.0 or self.theta[0] == 0.0 ): use_hard_Z = True else: use_hard_Z = False while abs(before - abs_sum()) > delta: before = abs_sum() hard_Z = 0.5 if use_hard_Z and i == 0 else None self.E_step(hard_Z=hard_Z) i += 1 self.M_step() # Since we are rearranging the ranks, it's possible that we can # get into a situation where the ranks shift in a cycle such # that the tau delta is always exeeded. I've only seen this limit # hit when the trace size is very small (e.g. 10). if i > 50: break def E_step(self, hard_Z=None): """Treat self.tau and self.theta as constants.""" for node in self.full_cache.values(): node._hit_count = 0.0 for record in self.trace: if hard_Z is None: if record.node.is_purged: rank = record.node.rank_purge_memo else: rank = record.node.rank record._Z = self.calculate_Z(record.depth, rank) else: record._Z = hard_Z record.node._hit_count += (1.0 - record._Z) new_ranker = RBTree() for node in self.full_cache.values(): node.ranker_key = node.new_ranker_key() new_ranker[node.ranker_key] = node self.ranker = new_ranker def M_step(self): """Treat Record.Z as constant.""" self.acc_tau = [0.0] self.acc_theta = [0.0, 0.0] for record in self.trace: self.update_tau_and_theta_accs(record, increment=True) self.refresh_params() def calculate_Z(self, depth, rank): numerator = ( self.tau[0] * self.theta[0] * (1 - self.theta[0])**depth) denominator = ( numerator + self.tau[1] * self.theta[1] * (1 - self.theta[1])**rank) try: return float(numerator) / denominator except ZeroDivisionError as err: # This can happen when a node falls off the trace and rank and # depth become greater than the limits. return self.tau[0] def refresh_params(self): R = len(self.trace) self.tau[0] = self.acc_tau[0] / R self.tau[1] = 1.0 - self.tau[0] try: self.theta[0] = ((R * self.tau[0]) / (R * self.tau[0] + self.acc_theta[0])) except ZeroDivisionError: self.theta[0] = 1.0 / len(self.full_cache) try: self.theta[1] = ((R * self.tau[1]) / (R * self.tau[1] + self.acc_theta[1])) except ZeroDivisionError: self.theta[1] = 1.0 / len(self.full_cache) def _update_tau_and_theta_accs(self, Z, depth, rank, increment=True): if increment: self.acc_tau[0] += Z self.acc_theta[0] += Z * depth self.acc_theta[1] += (1.0 - Z) * rank else: self.acc_tau[0] -= Z self.acc_theta[0] -= Z * depth self.acc_theta[1] -= (1.0 - Z) * rank self.acc_theta = [max(0.0, acc) for acc in self.acc_theta] def update_tau_and_theta_accs(self, record, increment=True): depth = record.depth if record.node.is_purged: rank = record.node.rank_purge_memo else: rank = record.node.rank self._update_tau_and_theta_accs(record.Z, depth, rank, increment) def evict(self, node): self.evict_datapoint['row'] += 1 self.evict_datapoint['depth'] = node.depth self.evict_datapoint['rank'] = node.rank self.evict_datapoint['value'] = node.expected_value self.evict_datapoint['Z'] = self.calculate_Z( node.depth, node.rank) self.evict_datapoint['tau'] = self.tau[0] self.evict_writer.writerow( [self.evict_datapoint[key] for key in self.evict_order]) self.evict_file.flush() self.num_in_cache -= 1 node.is_evicted = True def purge(self, node): self.purge_datapoint['row'] += 1 self.purge_datapoint['depth'] = node.depth self.purge_datapoint['rank'] = node.rank self.purge_datapoint['value'] = node.expected_value self.purge_datapoint['Z'] = self.calculate_Z( node.depth, node.rank) self.purge_writer.writerow( [self.purge_datapoint[key] for key in self.purge_order]) self.purge_file.flush() self.num_in_full_cache -= 1 node.purge() @property def cache_list(self): return filter(lambda node: not node.is_evicted, self.full_cache_list) @property def full_cache_list(self): return list(self.full_cache.values()) def hit_rate(self): return float(self.num_hits) / self.num_requests def get_node(self, page): try: node = self.full_cache[page] return node except KeyError: return None
""" setup_FastRBTree_ps = """ from __main__ import keys, crb_prev, crb_succ """ try: fp = open('testkeys.txt') keys = eval(fp.read()) fp.close() bskeys = zip(keys, keys) except IOError: print("create 'testkeys.txt' with profile_bintree.py\n") sys.exit() ptree = PTree.from_keys(keys) ftree = FTree.from_keys(keys) def rb_prev(): for key in keys: try: item = ptree.prev_item(key) except KeyError: pass def rb_succ(): for key in keys: try: item = ptree.succ_item(key) except KeyError:
alert[c[x]] = "*" tmpkey = tuple(alert) for x in range(2): alert[c[x]] = label[x] hlabel = tuple(label) if tmpkey in hyper_dict: tmpedge = hyper_dict[tmpkey] hypersize_list.discard((tmpedge.nalerts, tmpedge.hyperkey)) tmpedge.nalerts -= tmpedge.pop_alert(hlabel) if tmpedge.nalerts > 0: hypersize_list.insert((tmpedge.nalerts, tmpedge.hyperkey), tmpedge) for z in range(10): hyper_dict = {} hypersize_list = Tree() hcombinations = [ (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 2), (1, 3), (1, 4), (1, 5), (2, 3), (2, 4), (2, 5), (3, 4), (3, 5),
def __init__(self, hyperkey, col, hlabel): self.hyperkey = hyperkey self.col = col self._alerts = Tree() self.insert_alert(hlabel, 1) self.nalerts = 1
class TDigest(object): def __init__(self, delta=0.01, K=25): self.C = RBTree() self.n = 0 self.delta = delta self.K = K def __add__(self, other_digest): C1 = list(self.C.values()) C2 = list(other_digest.C.values()) shuffle(C1) shuffle(C2) data = C1 + C2 new_digest = TDigest(self.delta, self.K) for c in data: new_digest.update(c.mean, c.count) return new_digest def __len__(self): return len(self.C) def __repr__(self): return """<T-Digest: n=%d, centroids=%d>""" % (self.n, len(self)) def _add_centroid(self, centroid): if centroid.mean not in self.C: self.C.insert(centroid.mean, centroid) else: self.C[centroid.mean].update(centroid.mean, centroid.count) def _compute_centroid_quantile(self, centroid): denom = self.n cumulative_sum = sum( c_i.count for c_i in self.C.value_slice(-float('Inf'), centroid.mean)) return (centroid.count / 2. + cumulative_sum) / denom def _update_centroid(self, centroid, x, w): self.C.pop(centroid.mean) centroid.update(x, w) self._add_centroid(centroid) def _find_closest_centroids(self, x): try: ceil_key = self.C.ceiling_key(x) except KeyError: floor_key = self.C.floor_key(x) return [self.C[floor_key]] try: floor_key = self.C.floor_key(x) except KeyError: ceil_key = self.C.ceiling_key(x) return [self.C[ceil_key]] if abs(floor_key - x) < abs(ceil_key - x): return [self.C[floor_key]] elif abs(floor_key - x) == abs(ceil_key - x) and (ceil_key != floor_key): return [self.C[ceil_key], self.C[floor_key]] else: return [self.C[ceil_key]] def _theshold(self, q): return 4 * self.n * self.delta * q * (1 - q) def update(self, x, w=1): """ Update the t-digest with value x and weight w. """ self.n += w if len(self) == 0: self._add_centroid(Centroid(x, w)) return S = self._find_closest_centroids(x) while len(S) != 0 and w > 0: j = choice(list(range(len(S)))) c_j = S[j] q = self._compute_centroid_quantile(c_j) # This filters the out centroids that do not satisfy the second part # of the definition of S. See original paper by Dunning. if c_j.count + w > self._theshold(q): S.pop(j) continue delta_w = min(self._theshold(q) - c_j.count, w) self._update_centroid(c_j, x, delta_w) w -= delta_w S.pop(j) if w > 0: self._add_centroid(Centroid(x, w)) if len(self) > self.K / self.delta: self.compress() return def batch_update(self, values, w=1): """ Update the t-digest with an iterable of values. This assumes all points have the same weight. """ for x in values: self.update(x, w) self.compress() return def compress(self): T = TDigest(self.delta, self.K) C = list(self.C.values()) shuffle(C) for c_i in C: T.update(c_i.mean, c_i.count) self.C = T.C def percentile(self, q): """ Computes the percentile of a specific value in [0,1], ie. computes F^{-1}(q) where F^{-1} denotes the inverse CDF of the distribution. """ if not (0 <= q <= 1): raise ValueError("q must be between 0 and 1, inclusive.") t = 0 q *= self.n for i, key in enumerate(self.C.keys()): c_i = self.C[key] k = c_i.count if q < t + k: if i == 0: return c_i.mean elif i == len(self) - 1: return c_i.mean else: delta = (self.C.succ_item(key)[1].mean - self.C.prev_item(key)[1].mean) / 2. return c_i.mean + ((q - t) / k - 0.5) * delta t += k return self.C.max_item()[1].mean def quantile(self, q): """ Computes the quantile of a specific value, ie. computes F(q) where F denotes the CDF of the distribution. """ t = 0 N = float(self.n) for i, key in enumerate(self.C.keys()): c_i = self.C[key] if i == len(self) - 1: delta = (c_i.mean - self.C.prev_item(key)[1].mean) / 2. else: delta = (self.C.succ_item(key)[1].mean - c_i.mean) / 2. z = max(-1, (q - c_i.mean) / delta) if z < 1: return t / N + c_i.count / N * (z + 1) / 2 t += c_i.count return 1 def trimmed_mean(self, q1, q2): """ Computes the mean of the distribution between the two percentiles q1 and q2. This is a modified algorithm than the one presented in the original t-Digest paper. """ if not (q1 < q2): raise ValueError("q must be between 0 and 1, inclusive.") s = k = t = 0 q1 *= self.n q2 *= self.n for i, key in enumerate(self.C.keys()): c_i = self.C[key] k_i = c_i.count if q1 < t + k_i: if i == 0: delta = self.C.succ_item(key)[1].mean - c_i.mean elif i == len(self) - 1: delta = c_i.mean - self.C.prev_item(key)[1].mean else: delta = (self.C.succ_item(key)[1].mean - self.C.prev_item(key)[1].mean) / 2. nu = ((q1 - t) / k_i - 0.5) * delta s += nu * k_i * c_i.mean k += nu * k_i if q2 < t + k_i: return s/k t += k_i return s/k
def __init__(self, delta=0.01, K=25): self.C = RBTree() self.n = 0 self.delta = delta self.K = K
def __init__(self, attr=RangeAttribute): self._tree = FastRBTree() self._attr = attr
def crb_pop_max(): tree = FastRBTree.fromkeys(keys) while tree.count: tree.pop_max()
def __init__(self): self.price_tree = FastRBTree() self.price_map = {} self.order_map = {} self.received_orders = {}
class Tree(object): def __init__(self): self.price_tree = FastRBTree() self.volume = 0 self.price_map = {} # Map from price -> order_list object self.order_map = {} # Order ID to Order object self.min_price = None self.max_price = None def __len__(self): return len(self.order_map) def get_price(self, price): return self.price_map[price] def get_order(self, id_num): return self.order_map[id_num] def create_price(self, price): new_list = OrderList() self.price_tree.insert(price, new_list) self.price_map[price] = new_list if self.max_price == None or price > self.max_price: self.max_price = price if self.min_price == None or price < self.min_price: self.min_price = price def remove_price(self, price): self.price_tree.remove(price) del self.price_map[price] if self.max_price == price: try: self.max_price = max(self.price_tree) except ValueError: self.max_price = None if self.min_price == price: try: self.min_price = min(self.price_tree) except ValueError: self.min_price = None def price_exists(self, price): return price in self.price_map def order_exists(self, id_num): return id_num in self.order_map def insert_tick(self, tick): if tick.price not in self.price_map: self.create_price(tick.price) order = Order(tick, self.price_map[tick.price]) self.price_map[order.price].append_order(order) self.order_map[order.id_num] = order self.volume += order.qty def update_order(self, tick): order = self.order_map[tick.id_num] original_volume = order.qty if tick.price != order.price: # Price changed order_list = self.price_map[order.price] order_list.remove_order(order) if len(order_list) == 0: self.remove_price(order.price) self.insert_tick(tick) self.volume -= original_volume else: # Quantity changed order.update_qty(tick.qty, tick.price) self.volume += order.qty - original_volume def remove_order_by_id(self, id_num): order = self.order_map[id_num] self.volume -= order.qty order.order_list.remove_order(order) if len(order.order_list) == 0: self.remove_price(order.price) del self.order_map[id_num] def max(self): return self.max_price def min(self): return self.min_price
class PriceLevel(object): ''' A representation of a Price level in the book ''' def __init__(self, f_price): ''' A representation of a PriceLevel object ''' self.f_price = f_price self.i_qty = 0 self.order_tree = FastRBTree() def add(self, order_aux): ''' Insert the information in the tree using the info in order_aux. Return is should delete the Price level or not :param order_aux: Order Object. The Order message to be updated ''' # check if the order_aux price is the same of the self s_status = order_aux['order_status'] if order_aux['order_price'] != self.f_price: raise DifferentPriceException elif s_status in ['New', 'Replaced', 'Partially Filled']: self.order_tree.insert(order_aux.main_id, order_aux) self.i_qty += int(order_aux['total_qty_order']) # check if there is no object in the updated tree (should be deleted) return self.order_tree.count == 0 def delete(self, i_last_id, i_old_qty): ''' Delete the information in the tree using the info in order_aux. Return is should delete the Price level or not :param i_last_id: Integer. The previous secondary order id :param i_old_qty: Integer. The previous order qty ''' # check if the order_aux price is the same of the self try: self.order_tree.remove(i_last_id) self.i_qty -= i_old_qty except KeyError: raise DifferentPriceException # check if there is no object in the updated tree (should be deleted) return self.order_tree.count == 0 def __str__(self): ''' Return the name of the PriceLevel ''' return '{:,.0f}'.format(self.i_qty) def __repr__(self): ''' Return the name of the PriceLevel ''' return '{:,.0f}'.format(self.i_qty) def __eq__(self, other): ''' Return if a PriceLevel has equal price from the other :param other: PriceLevel object. PriceLevel to be compared ''' # just to make sure that there is no floating point discrepance f_aux = other if not isinstance(other, float): f_aux = other.f_price return abs(self.f_price - f_aux) < 1e-4 def __gt__(self, other): ''' Return if a PriceLevel has a gerater price from the other. Bintrees uses that to compare nodes :param other: PriceLevel object. PriceLevel to be compared ''' # just to make sure that there is no floating point discrepance f_aux = other if not isinstance(other, float): f_aux = other.f_price return (f_aux - self.f_price) > 1e-4 def __lt__(self, other): ''' Return if a Order has smaller order_id from the other. Bintrees uses that to compare nodes :param other: Order object. Order to be compared ''' f_aux = other if not isinstance(other, float): f_aux = other.f_price return (f_aux - self.f_price) < -1e-4 def __ne__(self, other): ''' Return if a Order has different order_id from the other :param other: Order object. Order to be compared ''' return not self.__eq__(other)
#! /usr/bin/env python # coding:utf-8 from __future__ import division import heapq import bintrees import random if __name__ == '__main__': from benchmarker import Benchmarker from itertools import repeat, izip from bintrees import FastRBTree # initialize heapq h = range(10000) heapq.heapify(h) # initialize AVLTree m = izip(xrange(10000), repeat(True)) t = FastRBTree(m) for bm in Benchmarker(width=20, loop=100000, cycle=3, extra=1): for _ in bm.empty(): pass for _ in bm('heapq'): heapq.heappop(h) heapq.heappush(h, random.randint(-100000, 100000)) for _ in bm('FastRBTree'): t.pop_min() t[random.randint(-100000, 100000)] = True
class ExclusiveRangeDict(object): """A class like dict whose key is a range [begin, end) of integers. It has an attribute for each range of integers, for example: [10, 20) => Attribute(0), [20, 40) => Attribute(1), [40, 50) => Attribute(2), ... An instance of this class is accessed only via iter_range(begin, end). The instance is accessed as follows: 1) If the given range [begin, end) is not covered by the instance, the range is newly created and iterated. 2) If the given range [begin, end) exactly covers ranges in the instance, the ranges are iterated. (See test_set() in tests/range_dict_tests.py.) 3) If the given range [begin, end) starts at and/or ends at a mid-point of an existing range, the existing range is split by the given range, and ranges in the given range are iterated. For example, consider a case that [25, 45) is given to an instance of [20, 30), [30, 40), [40, 50). In this case, [20, 30) is split into [20, 25) and [25, 30), and [40, 50) into [40, 45) and [45, 50). Then, [25, 30), [30, 40), [40, 45) are iterated. (See test_split() in tests/range_dict_tests.py.) 4) If the given range [begin, end) includes non-existing ranges in an instance, the gaps are filled with new ranges, and all ranges are iterated. For example, consider a case that [25, 50) is given to an instance of [30, 35) and [40, 45). In this case, [25, 30), [35, 40) and [45, 50) are created in the instance, and then [25, 30), [30, 35), [35, 40), [40, 45) and [45, 50) are iterated. (See test_fill() in tests/range_dict_tests.py.) """ class RangeAttribute(object): def __init__(self): pass def __str__(self): return '<RangeAttribute>' def __repr__(self): return '<RangeAttribute>' def copy(self): # pylint: disable=R0201 return ExclusiveRangeDict.RangeAttribute() def __init__(self, attr=RangeAttribute): self._tree = FastRBTree() self._attr = attr def iter_range(self, begin=None, end=None): if not begin: begin = self._tree.min_key() if not end: end = self._tree.max_item()[1][0] # Assume that self._tree has at least one element. if self._tree.is_empty(): self._tree[begin] = (end, self._attr()) # Create a beginning range (border) try: bound_begin, bound_value = self._tree.floor_item(begin) bound_end = bound_value[0] if begin >= bound_end: # Create a blank range. try: new_end, _ = self._tree.succ_item(bound_begin) except KeyError: new_end = end self._tree[begin] = (min(end, new_end), self._attr()) elif bound_begin < begin and begin < bound_end: # Split the existing range. new_end = bound_value[0] new_value = bound_value[1] self._tree[bound_begin] = (begin, new_value.copy()) self._tree[begin] = (new_end, new_value.copy()) else: # bound_begin == begin # Do nothing (just saying it clearly since this part is confusing) pass except KeyError: # begin is less than the smallest element. # Create a blank range. # Note that we can assume self._tree has at least one element. self._tree[begin] = (min(end, self._tree.min_key()), self._attr()) # Create an ending range (border) try: bound_begin, bound_value = self._tree.floor_item(end) bound_end = bound_value[0] if end > bound_end: # Create a blank range. new_begin = bound_end self._tree[new_begin] = (end, self._attr()) elif bound_begin < end and end < bound_end: # Split the existing range. new_end = bound_value[0] new_value = bound_value[1] self._tree[bound_begin] = (end, new_value.copy()) self._tree[end] = (new_end, new_value.copy()) else: # bound_begin == begin # Do nothing (just saying it clearly since this part is confusing) pass except KeyError: # end is less than the smallest element. # It must not happen. A blank range [begin,end) has already been created # even if [begin,end) is less than the smallest range. # Do nothing (just saying it clearly since this part is confusing) raise missing_ranges = [] prev_end = None for range_begin, range_value in self._tree.itemslice(begin, end): range_end = range_value[0] # Note that we can assume that we have a range beginning with |begin| # and a range ending with |end| (they may be the same range). if prev_end and prev_end != range_begin: missing_ranges.append((prev_end, range_begin)) prev_end = range_end for missing_begin, missing_end in missing_ranges: self._tree[missing_begin] = (missing_end, self._attr()) for range_begin, range_value in self._tree.itemslice(begin, end): yield range_begin, range_value[0], range_value[1] def __str__(self): return str(self._tree)
class BookSide(object): ''' A side of the lmit order book representation ''' def __init__(self, s_side): ''' Initialize a BookSide object. Save all parameters as attributes :param s_side: string. BID or ASK ''' if s_side not in ['BID', 'ASK']: raise InvalidTypeException('side should be BID or ASK') self.s_side = s_side self.price_tree = FastRBTree() self._i_idx = 0 self.d_order_map = {} self.last_price = 0. def update(self, d_data): ''' Update the state of the order book given the data pased. Return if the message was handle successfully :param d_data: dict. data related to a single order ''' # dont process aggresive trades if d_data['agressor_indicator'] == 'Agressive': return True # update the book information order_aux = Order(d_data) s_status = order_aux['order_status'] b_sould_update = True b_success = True # check the order status if s_status != 'New': try: i_old_id = self.d_order_map[order_aux]['main_id'] except KeyError: if s_status == 'Canceled' or s_status == 'Filled': b_sould_update = False s_status = 'Invalid' elif s_status == 'Replaced': s_status = 'New' # process the message if s_status == 'New': b_sould_update = self._new_order(order_aux) elif s_status != 'Invalid': i_old_id = self.d_order_map[order_aux]['main_id'] f_old_pr = self.d_order_map[order_aux]['price'] i_old_q = self.d_order_map[order_aux]['qty'] # hold the last traded price if s_status in ['Partially Filled', 'Filled']: self.last_price = order_aux['order_price'] # process message if s_status in ['Canceled', 'Expired', 'Filled']: b_sould_update = self._canc_expr_filled_order(order_aux, i_old_id, f_old_pr, i_old_q) if not b_sould_update: b_success = False elif s_status == 'Replaced': b_sould_update = self._replaced_order(order_aux, i_old_id, f_old_pr, i_old_q) elif s_status == 'Partially Filled': b_sould_update = self._partially_filled(order_aux, i_old_id, f_old_pr, i_old_q) # remove from order map if s_status not in ['New', 'Invalid']: self.d_order_map.pop(order_aux) # update the order map if b_sould_update: f_qty = int(order_aux['total_qty_order']) self.d_order_map[order_aux] = {} self.d_order_map[order_aux]['price'] = d_data['order_price'] self.d_order_map[order_aux]['order_id'] = order_aux.order_id self.d_order_map[order_aux]['qty'] = f_qty self.d_order_map[order_aux]['main_id'] = order_aux.main_id # return that the update was done return True def _canc_expr_filled_order(self, order_obj, i_old_id, f_old_pr, i_old_q): ''' Update price_tree when passed canceled, expried or filled orders :param order_obj: Order Object. The last order in the file :param i_old_id: integer. Old id of the order_obj :param f_old_pr: float. Old price of the order_obj :param i_old_q: integer. Old qty of the order_obj ''' this_price = self.price_tree.get(f_old_pr) if this_price.delete(i_old_id, i_old_q): self.price_tree.remove(f_old_pr) # remove from order map return False def _replaced_order(self, order_obj, i_old_id, f_old_pr, i_old_q): ''' Update price_tree when passed replaced orders :param order_obj: Order Object. The last order in the file :param i_old_id: integer. Old id of the order_obj :param f_old_pr: float. Old price of the order_obj :param i_old_q: integer. Old qty of the order_obj ''' # remove from the old price this_price = self.price_tree.get(f_old_pr) if this_price.delete(i_old_id, i_old_q): self.price_tree.remove(f_old_pr) # insert in the new price f_price = order_obj['order_price'] if not self.price_tree.get(f_price): self.price_tree.insert(f_price, PriceLevel(f_price)) # insert the order in the due price this_price = self.price_tree.get(f_price) this_price.add(order_obj) return True def _partially_filled(self, order_obj, i_old_id, f_old_pr, i_old_q): ''' Update price_tree when passed partially filled orders :param order_obj: Order Object. The last order in the file :param i_old_id: integer. Old id of the order_obj :param f_old_pr: float. Old price of the order_obj :param i_old_q: integer. Old qty of the order_obj ''' # delete old price, if it is needed this_price = self.price_tree.get(f_old_pr) if this_price.delete(i_old_id, i_old_q): self.price_tree.remove(f_old_pr) # add/modify order # insert in the new price f_price = order_obj['order_price'] if not self.price_tree.get(f_price): self.price_tree.insert(f_price, PriceLevel(f_price)) this_price = self.price_tree.get(f_price) this_price.add(order_obj) return True def _new_order(self, order_obj): ''' Update price_tree when passed new orders :param order_obj: Order Object. The last order in the file ''' # if it was already in the order map if order_obj in self.d_order_map: i_old_sec_id = self.d_order_map[order_obj]['last_order_id'] f_old_price = self.d_order_map[order_obj]['price'] i_old_qty = self.d_order_map[order_obj]['qty'] this_price = self.price_tree.get(f_old_price) # remove from order map self.d_order_map.pop(order_obj) if this_price.delete(i_old_sec_id, i_old_qty): self.price_tree.remove(f_old_price) # insert a empty price level if it is needed f_price = order_obj['order_price'] if not self.price_tree.get(f_price): self.price_tree.insert(f_price, PriceLevel(f_price)) # add the order this_price = self.price_tree.get(f_price) this_price.add(order_obj) return True def get_n_top_prices(self, n): ''' Return a dataframe with the N top price levels :param n: integer. Number of price levels desired ''' raise NotImplementedError def get_n_botton_prices(self, n=5): ''' Return a dataframe with the N botton price levels :param n: integer. Number of price levels desired ''' raise NotImplementedError
def __init__(self): self.price_tree = FastRBTree() self.price_map = {} # Map from price -> order_list object self.order_map = {} # Order ID to Order object self.received_orders = {}