def sync_readings(session, rows: iter, sensors: list, awesome_sensors: dict, reading_types: dict): """ Bulk store readings """ # Convert list to dictionary sensors = {sensor['name']: sensor for sensor in sensors} def get_readings(_rows) -> iter: for row in _rows: # Convert rows of portal data into portal readings yield from maps.row_to_readings(row, sensors=sensors, reading_types=reading_types, awesome_sensors=awesome_sensors) # Iterate over data chunks for chunk in utils.iter_chunks( get_readings(rows), chunk_size=settings.BULK_READINGS_CHUNK_SIZE): if chunk: try: objects.Reading.store_bulk(session, readings=chunk) # No more readings, so stop except exceptions.EmptyValueError: break
def split_test_training(positions_w_context, est_num_positions): print("Estimated number of chunks: %s" % (est_num_positions // CHUNK_SIZE), file=sys.stderr) desired_test_size = 10**5 if est_num_positions < 2 * desired_test_size: positions_w_context = list(positions_w_context) test_size = len(positions_w_context) // 3 return positions_w_context[:test_size], [positions_w_context[test_size:]] else: shuffled_positions = utils.shuffler(positions_w_context) test_chunk = utils.take_n(desired_test_size, shuffled_positions) training_chunks = utils.iter_chunks(CHUNK_SIZE, shuffled_positions) return test_chunk, training_chunks
def split_test_training(positions_w_context, est_num_positions): print("Estimated number of chunks: %s" % (est_num_positions // CHUNK_SIZE), file=sys.stderr) desired_test_size = 10**5 if est_num_positions < 2 * desired_test_size: positions_w_context = list(positions_w_context) test_size = len(positions_w_context) // 3 return positions_w_context[:test_size], [ positions_w_context[test_size:] ] else: shuffled_positions = utils.shuffler(positions_w_context) test_chunk = utils.take_n(desired_test_size, shuffled_positions) training_chunks = utils.iter_chunks(CHUNK_SIZE, shuffled_positions) return test_chunk, training_chunks
def main(argv): """Main program. """ del argv # Unused total_games = FLAGS.training_games total_moves = FLAGS.training_moves fresh = FLAGS.training_fresh batch_size = FLAGS.batch_size output_prefix = FLAGS.output_prefix spec = bigtable_input.BigtableSpec(FLAGS.cbt_project, FLAGS.cbt_instance, FLAGS.cbt_table) gq_r = bigtable_input.GameQueue(spec.project, spec.instance, spec.table) gq_c = bigtable_input.GameQueue(spec.project, spec.instance, spec.table + '-nr') mix = bigtable_input.mix_by_decile(total_games, total_moves, 9) trainings = [ (spec, start_r, start_c, mix, batch_size, '{}{:0>10}_{:0>10}.tfrecord.zz'.format(output_prefix, start_r, start_c)) for start_r, finish_r, start_c, finish_c in reversed( list( training_series(gq_r.latest_game_number, gq_c.latest_game_number, mix, fresh))) ] if FLAGS.starting_game: game = FLAGS.starting_game starts = [t[1] for t in trainings] where = bisect.bisect_left(starts, game) trainings = trainings[where:] if FLAGS.max_trainings: trainings = trainings[:FLAGS.max_trainings] if FLAGS.dry_run: for t in trainings: print(t) raise SystemExit concurrency = min(FLAGS.concurrency, multiprocessing.cpu_count() * 2) with tqdm(desc='Training Sets', unit_scale=2, total=len(trainings)) as pbar: for b in utils.iter_chunks(concurrency, trainings): with multiprocessing.Pool(processes=concurrency) as pool: pool.map(_export_training_set, b) pbar.update(len(b))
def delete_row_range(self, format_str, start_game, end_game): """Delete rows related to the given game range. Args: format_str: a string to `.format()` by the game numbers in order to create the row prefixes. start_game: the starting game number of the deletion. end_game: the ending game number of the deletion. """ row_keys = make_single_array( self.tf_table.keys_by_range_dataset( format_str.format(start_game), format_str.format(end_game))) row_keys = list(row_keys) if not row_keys: utils.dbg('No rows left for games %d..%d' % ( start_game, end_game)) return utils.dbg('Deleting %d rows: %s..%s' % ( len(row_keys), row_keys[0], row_keys[-1])) # Reverse the keys so that the queue is left in a more # sensible end state if you change your mind (say, due to a # mistake in the timestamp) and abort the process: there will # be a bit trimmed from the end, rather than a bit # trimmed out of the middle. row_keys.reverse() total_keys = len(row_keys) utils.dbg('Deleting total of %d keys' % total_keys) concurrency = min(MAX_BT_CONCURRENCY, multiprocessing.cpu_count() * 2) with multiprocessing.Pool(processes=concurrency) as pool: batches = [] with tqdm(desc='Keys', unit_scale=2, total=total_keys) as pbar: for b in utils.iter_chunks(bigtable.row.MAX_MUTATIONS, row_keys): pbar.update(len(b)) batches.append((self.btspec, b)) if len(batches) >= concurrency: pool.map(_delete_rows, batches) batches = [] pool.map(_delete_rows, batches) batches = []