def count_moves_in_game_range(self, game_begin, game_end): """Count the total moves in a game range. Args: game_begin: integer, starting game game_end: integer, ending game Uses the `ct_` keyspace for rapid move summary. """ rows = self.bt_table.read_rows(ROWCOUNT_PREFIX.format(game_begin), ROWCOUNT_PREFIX.format(game_end), filter_=row_filters.ColumnRangeFilter( METADATA, MOVE_COUNT, MOVE_COUNT)) return sum([int(r.cell_value(METADATA, MOVE_COUNT)) for r in rows])
def bleakest_moves(self, start_game, end_game): """Given a range of games, return the bleakest moves. Returns a list of (game, move, q) sorted by q. """ bleak = b'bleakest_q' rows = self.bt_table.read_rows( ROW_PREFIX.format(start_game), ROW_PREFIX.format(end_game), filter_=row_filters.ColumnRangeFilter( METADATA, bleak, bleak)) def parse(r): rk = str(r.row_key, 'utf-8') g, m = _game_row_key.match(rk).groups() q = r.cell_value(METADATA, bleak) return int(g), int(m), float(q) return sorted([parse(r) for r in rows], key=operator.itemgetter(2))
def get_bleakest_moves(start_game, end_game): """Given a range of games, return the bleakest moves. Returns a list of (game, move, q) sorted by q. """ row_fmt = 'g_{:0>10}_' bleak = b'bleakest_q' rows = _bt_table.read_rows(row_fmt.format(start_game), row_fmt.format(end_game), filter_=row_filters.ColumnRangeFilter( 'metadata', bleak, bleak)) gm_pat = re.compile(r'g_(\d+)_m_(\d+)') bleakest = [ gm_pat.match(str(r.row_key, 'utf-8')).groups() + (r.cell_value('metadata', bleak), ) for r in rows ] # Convert to numbers in a second pass bleakest = [(int(g), int(m), float(q)) for g, m, q in bleakest] bleakest.sort(key=operator.itemgetter(2)) return bleakest
def read_wait_cell(self): """Read the value of the cell holding the 'wait' value, Returns the int value of whatever it has, or None if the cell doesn't exist. """ table_state = self.bt_table.read_row( TABLE_STATE, filter_=bigtable_row_filters.ColumnRangeFilter( METADATA, WAIT_CELL, WAIT_CELL)) if table_state is None: utils.dbg('No waiting for new games needed; ' 'wait_for_game_number column not in table_state') return None value = table_state.cell_value(METADATA, WAIT_CELL) if not value: utils.dbg('No waiting for new games needed; ' 'no value in wait_for_game_number cell ' 'in table_state') return None return cbt_intvalue(value)
def _Acquire(self, lease_time): now = int(time.time() * 1e6) expires = int((time.time() + lease_time) * 1e6) # Only latest value latest_value = row_filters.CellsColumnLimitFilter(1) # Match any lease time value > now which means someone else holds a lock # We can't store these as ints, encode to str. current_lease = row_filters.ValueRangeFilter( start_value=utils.SmartStr(now), inclusive_start=False) # aff4:lease family, column = self.store.GetFamilyColumn(self.store.LEASE_ATTRIBUTE) col_filter = row_filters.ColumnRangeFilter(family, start_column=column, end_column=column) # Note filter chains are evaluated in order so there are performance # considerations with which filter to apply first filter_chain = row_filters.RowFilterChain( [col_filter, current_lease, latest_value]) mutate_row = self.store.table.row(self.subject, filter_=filter_chain) # state=False means no lease or it's expired, in this case take the lock. mutate_row.set_cell(family, column, utils.SmartStr(expires), state=False) # Check in review: I think we want to retry the RPC here? Or should we just # raise like we can't get the lock? existing_lock = self.store.CallWithRetry(mutate_row.commit, "write") if existing_lock: raise ExistingLock("Row %s locked." % self.subject) # We have the lock self.expires = expires self.locked = True
def games_by_time(self, start_game, end_game): """Given a range of games, return the games sorted by time. Returns [(time, game_number), ...] The time will be a `datetime.datetime` and the game number is the integer used as the basis of the row ID. Note that when a cluster of self-play nodes are writing concurrently, the game numbers may be out of order. """ move_count = b'move_count' rows = self.bt_table.read_rows( ROWCOUNT_PREFIX.format(start_game), ROWCOUNT_PREFIX.format(end_game), filter_=bigtable_row_filters.ColumnRangeFilter( METADATA, move_count, move_count)) def parse(r): rk = str(r.row_key, 'utf-8') game = _game_from_counter.match(rk).groups()[0] return (r.cells[METADATA][move_count][0].timestamp, game) return sorted([parse(r) for r in rows], key=operator.itemgetter(0))
def Resolve(self, subject, attribute, token=None): """Retrieve the latest value set for a subject's attribute. Args: subject: The subject URN. attribute: The attribute. token: The security token used in this call. Returns: A (string, timestamp in microseconds) stored in the bigtable cell, or (None, 0). Raises: AccessError: if anything goes wrong. """ subject = utils.SmartStr(subject) attribute = utils.SmartStr(attribute) family, column = self.GetFamilyColumn(attribute) col_filter = row_filters.ColumnRangeFilter(family, start_column=column, end_column=column) # Most recent latest_filter = row_filters.CellsColumnLimitFilter(1) row_filter = row_filters.RowFilterChain( filters=[col_filter, latest_filter]) row_data = self.table.read_row(subject, filter_=row_filter) if row_data: for cell in row_data.cells[family][column]: return self.Decode(attribute, cell.value), self.DatetimeToMicroseconds( cell.timestamp) return None, 0
def ResolveMulti(self, subject, attributes, timestamp=None, limit=None, token=None): """Resolve multiple attributes for a subject. Results will be returned in arbitrary order (i.e. not ordered by attribute or timestamp). Args: subject: The subject to resolve. attributes: The attribute string or list of strings to match. Note this is an exact match, not a regex. timestamp: A range of times for consideration (In microseconds). Can be a constant such as ALL_TIMESTAMPS or NEWEST_TIMESTAMP or a tuple of ints (start, end). limit: The maximum total number of results we return. token: The security token used in this call. Yields: A unordered list of (attribute, value string, timestamp). Raises: AccessError: if anything goes wrong. """ subject = utils.SmartStr(subject) self.security_manager.CheckDataStoreAccess( token, [subject], self.GetRequiredResolveAccess(attributes)) if isinstance(attributes, basestring): attributes = [utils.SmartStr(attributes)] else: attributes = [utils.SmartStr(x) for x in attributes] filter_union = [] for attribute in attributes: family, column = self.GetFamilyColumn(attribute) col_filter = row_filters.ColumnRangeFilter( family, start_column=column, end_column=column) filter_union.append(col_filter) # More than one attribute, use a union, otherwise just use the # existing filter. if len(filter_union) > 1: filter_union = row_filters.RowFilterUnion(filters=filter_union) else: filter_union = filter_union[0] # Essentially timestamp AND (attr1 OR attr2) timestamp_filter = self._TimestampToFilter(timestamp) if timestamp_filter: row_filter = row_filters.RowFilterChain( filters=[filter_union, timestamp_filter]) else: row_filter = filter_union row_data = self.CallWithRetry( self.table.read_row, "read", subject, filter_=row_filter) if row_data: max_results = limit or 2**64 for column, cells in row_data.cells[family].iteritems(): attribute = ":".join((family, column)) for cell in cells: if max_results <= 0: raise StopIteration max_results -= 1 yield attribute, self.Decode( attribute, cell.value), self.DatetimeToMicroseconds(cell.timestamp)
'Glob for SGFs to backfill into eval_games bigtable.') flags.mark_flags_as_required( ['sgf_glob', 'cbt_project', 'cbt_instance', 'cbt_table']) FLAGS = flags.FLAGS # Constants EVAL_PREFIX = 'e_{:0>10}' EVAL_GAME_COUNTER = b'eval_game_counter' SGF_FILENAME = b'sgf' #### Common Filters EVAL_COUNT_FILTER = row_filters.ColumnRangeFilter(METADATA, EVAL_GAME_COUNTER, EVAL_GAME_COUNTER) #### START #### def grouper(iterable, n): iterator = iter(iterable) group = tuple(itertools.islice(iterator, n)) while group: yield group group = tuple(itertools.islice(iterator, n)) def latest_game_number(bt_table): """Return the number of the last game to be written.""" # TODO(amj): Update documentation on latest_game_number (last game or next game)?
def read_existing_models(bt_table): """Return model names from each existing record.""" # TODO(dtj): Is there a clean way to read just row_keys. rows = bt_table.read_rows(filter_=row_filters.ColumnRangeFilter( METADATA, MODEL_NAME, MODEL_NAME)) return [row.row_key.decode() for row in rows]