def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks * train_ratio) shuffle_size = cfg['training']['shuffle_size'] ChunkParser.BATCH_SIZE = cfg['training']['batch_size'] root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) train_parser = ChunkParser(FileDataSrc(chunks[:num_train]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator(train_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() shuffle_size = int(shuffle_size * (1.0 - train_ratio)) test_parser = ChunkParser(FileDataSrc(chunks[num_train:]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator(test_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = get_checkpoint(root_dir) tfprocess.restore(cp) # Sweeps through all test chunks statistically num_evals = (num_chunks - num_train) * 10 // ChunkParser.BATCH_SIZE print("Using {} evaluation batches".format(num_evals)) for _ in range(cfg['training']['total_steps']): tfprocess.process(ChunkParser.BATCH_SIZE, num_evals) tfprocess.save_leelaz_weights(cmd.output) tfprocess.session.close() train_parser.shutdown() test_parser.shutdown()
def main(): batch = [ tf.placeholder(tf.float32, [None, 120, 8 * 8]), tf.placeholder(tf.float32, [None, 1924]), tf.placeholder(tf.float32, [None, 1]), ] tfprocess = TFProcess(batch) tfprocess.save_leelaz_weights('weights.txt')
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks*train_ratio) shuffle_size = cfg['training']['shuffle_size'] ChunkParser.BATCH_SIZE = cfg['training']['batch_size'] root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) train_parser = ChunkParser(FileDataSrc(chunks[:num_train]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() shuffle_size = int(shuffle_size*(1.0-train_ratio)) test_parser = ChunkParser(FileDataSrc(chunks[num_train:]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = get_checkpoint(root_dir) tfprocess.restore(cp) # Sweeps through all test chunks statistically num_evals = (num_chunks-num_train)*10 // ChunkParser.BATCH_SIZE print("Using {} evaluation batches".format(num_evals)) for _ in range(cfg['training']['total_steps']): tfprocess.process(ChunkParser.BATCH_SIZE, num_evals) tfprocess.save_leelaz_weights(cmd.output) tfprocess.session.close() train_parser.shutdown() test_parser.shutdown()
def main(): if len(sys.argv) != 2: print("Usage: {} config.yaml".format(sys.argv[0])) return 1 cfg = yaml.safe_load(open(sys.argv[1], 'r').read()) print(yaml.dump(cfg, default_flow_style=False)) batch = [ tf.placeholder(tf.float32, [None, 120, 8 * 8]), tf.placeholder(tf.float32, [None, 1924]), tf.placeholder(tf.float32, [None, 1]), ] tfprocess = TFProcess(cfg, batch) tfprocess.save_leelaz_weights('weights.txt')
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks*train_ratio) shuffle_size = cfg['training']['shuffle_size'] ChunkParser.BATCH_SIZE = cfg['training']['batch_size'] root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) train_parser = ChunkParser(FileDataSrc(chunks[:num_train]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() shuffle_size = int(shuffle_size*(1.0-train_ratio)) test_parser = ChunkParser(FileDataSrc(chunks[num_train:]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = get_checkpoint(root_dir) tfprocess.restore(cp) # Sweeps through all test chunks statistically num_evals = (num_chunks-num_train)*10 // ChunkParser.BATCH_SIZE print("Using {} evaluation batches".format(num_evals)) for _ in range(cfg['training']['total_steps']): tfprocess.process(ChunkParser.BATCH_SIZE, num_evals) tfprocess.save_leelaz_weights('/tmp/weights.txt') with open('/tmp/weights.txt', 'rb') as f: m = hashlib.sha256() w = f.read() m.update(w) digest = m.hexdigest() filename = '/tmp/{}.gz'.format(digest) with gzip.open(filename, 'wb') as f: f.write(w) if cmd.upload: metadata = {'training_id':'1', 'layers':cfg['model']['residual_blocks'], 'filters':cfg['model']['filters']} print("\nUploading `{}'...".format(digest[:8]), end='') upload(cmd.upload, metadata, filename) print("[done]\n") else: print("\nStored `{}'\n".format(filename))
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks*train_ratio) num_test = num_chunks - num_train if 'input_test' in cfg['dataset']: train_chunks = get_latest_chunks(cfg['dataset']['input_train'], num_train) test_chunks = get_latest_chunks(cfg['dataset']['input_test'], num_test) else: chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) train_chunks = chunks[:num_train] test_chunks = chunks[num_train:] shuffle_size = cfg['training']['shuffle_size'] total_batch_size = cfg['training']['batch_size'] batch_splits = cfg['training'].get('num_batch_splits', 1) if total_batch_size % batch_splits != 0: raise ValueError('num_batch_splits must divide batch_size evenly') split_batch_size = total_batch_size // batch_splits # Load data with split batch size, which will be combined to the total batch size in tfprocess. ChunkParser.BATCH_SIZE = split_batch_size root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) train_parser = ChunkParser(FileDataSrc(train_chunks), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() shuffle_size = int(shuffle_size*(1.0-train_ratio)) test_parser = ChunkParser(FileDataSrc(test_chunks), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = tf.train.latest_checkpoint(root_dir) tfprocess.restore(cp) # If number of test positions is not given # sweeps through all test chunks statistically # Assumes average of 10 samples per test game. # For simplicity, testing can use the split batch size instead of total batch size. # This does not affect results, because test results are simple averages that are independent of batch size. num_evals = cfg['training'].get('num_test_positions', num_test * 10) num_evals = max(1, num_evals // ChunkParser.BATCH_SIZE) print("Using {} evaluation batches".format(num_evals)) tfprocess.process_loop(total_batch_size, num_evals, batch_splits=batch_splits) if cmd.output is not None: tfprocess.save_leelaz_weights(cmd.output) tfprocess.session.close() train_parser.shutdown() test_parser.shutdown()
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] allow_less = cfg['dataset'].get('allow_less_chunks', False) train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks * train_ratio) num_test = num_chunks - num_train sort_type = cfg['dataset'].get('sort_type', 'mtime') if sort_type == 'mtime': sort_key_fn = os.path.getmtime elif sort_type == 'number': sort_key_fn = game_number_for_name elif sort_type == 'name': sort_key_fn = identity_function else: raise ValueError('Unknown dataset sort_type: {}'.format(sort_type)) if 'input_test' in cfg['dataset']: train_chunks = get_latest_chunks(cfg['dataset']['input_train'], num_train, allow_less, sort_key_fn) test_chunks = get_latest_chunks(cfg['dataset']['input_test'], num_test, allow_less, sort_key_fn) else: chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks, allow_less, sort_key_fn) if allow_less: num_train = int(len(chunks) * train_ratio) num_test = len(chunks) - num_train train_chunks = chunks[:num_train] test_chunks = chunks[num_train:] shuffle_size = cfg['training']['shuffle_size'] total_batch_size = cfg['training']['batch_size'] batch_splits = cfg['training'].get('num_batch_splits', 1) train_workers = cfg['dataset'].get('train_workers', None) test_workers = cfg['dataset'].get('test_workers', None) if total_batch_size % batch_splits != 0: raise ValueError('num_batch_splits must divide batch_size evenly') split_batch_size = total_batch_size // batch_splits diff_focus_min = cfg['training'].get('diff_focus_min', 1) diff_focus_slope = cfg['training'].get('diff_focus_slope', 0) diff_focus_q_weight = cfg['training'].get('diff_focus_q_weight', 6.0) diff_focus_pol_scale = cfg['training'].get('diff_focus_pol_scale', 3.5) root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) train_parser = ChunkParser(train_chunks, get_input_mode(cfg), shuffle_size=shuffle_size, sample=SKIP, batch_size=split_batch_size, diff_focus_min=diff_focus_min, diff_focus_slope=diff_focus_slope, diff_focus_q_weight=diff_focus_q_weight, diff_focus_pol_scale=diff_focus_pol_scale, workers=train_workers) test_shuffle_size = int(shuffle_size * (1.0 - train_ratio)) # no diff focus for test_parser test_parser = ChunkParser(test_chunks, get_input_mode(cfg), shuffle_size=test_shuffle_size, sample=SKIP, batch_size=split_batch_size, workers=test_workers) if 'input_validation' in cfg['dataset']: valid_chunks = get_all_chunks(cfg['dataset']['input_validation']) validation_parser = ChunkParser(valid_chunks, get_input_mode(cfg), sample=1, batch_size=split_batch_size, workers=0) import tensorflow as tf from chunkparsefunc import parse_function from tfprocess import TFProcess tfprocess = TFProcess(cfg) train_dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string, tf.string)) train_dataset = train_dataset.map(parse_function) test_dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string, tf.string)) test_dataset = test_dataset.map(parse_function) validation_dataset = None if 'input_validation' in cfg['dataset']: validation_dataset = tf.data.Dataset.from_generator( validation_parser.sequential, output_types=(tf.string, tf.string, tf.string, tf.string, tf.string)) validation_dataset = validation_dataset.map(parse_function) if tfprocess.strategy is None: #Mirrored strategy appends prefetch itself with a value depending on number of replicas train_dataset = train_dataset.prefetch(4) test_dataset = test_dataset.prefetch(4) if validation_dataset is not None: validation_dataset = validation_dataset.prefetch(4) else: options = tf.data.Options() options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF train_dataset = train_dataset.with_options(options) test_dataset = test_dataset.with_options(options) if validation_dataset is not None: validation_dataset = validation_dataset.with_options(options) tfprocess.init(train_dataset, test_dataset, validation_dataset) tfprocess.restore() # If number of test positions is not given # sweeps through all test chunks statistically # Assumes average of 10 samples per test game. # For simplicity, testing can use the split batch size instead of total batch size. # This does not affect results, because test results are simple averages that are independent of batch size. num_evals = cfg['training'].get('num_test_positions', len(test_chunks) * 10) num_evals = max(1, num_evals // split_batch_size) print("Using {} evaluation batches".format(num_evals)) tfprocess.total_batch_size = total_batch_size tfprocess.process_loop(total_batch_size, num_evals, batch_splits=batch_splits) if cmd.output is not None: if cfg['training'].get('swa_output', False): tfprocess.save_swa_weights(cmd.output) else: tfprocess.save_leelaz_weights(cmd.output) train_parser.shutdown() test_parser.shutdown()
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks * train_ratio) num_test = num_chunks - num_train if 'input_test' in cfg['dataset']: train_chunks = get_latest_chunks(cfg['dataset']['input_train'], num_train) test_chunks = get_latest_chunks(cfg['dataset']['input_test'], num_test) else: chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) train_chunks = chunks[:num_train] test_chunks = chunks[num_train:] shuffle_size = cfg['training']['shuffle_size'] total_batch_size = cfg['training']['batch_size'] batch_splits = cfg['training'].get('num_batch_splits', 1) if total_batch_size % batch_splits != 0: raise ValueError('num_batch_splits must divide batch_size evenly') split_batch_size = total_batch_size // batch_splits # Load data with split batch size, which will be combined to the total batch size in tfprocess. ChunkParser.BATCH_SIZE = split_batch_size root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) def map_fn(example_proto): """ planes <tf.Tensor 'Reshape:0' shape=(2048, 112, 64) dtype=float32> probs <tf.Tensor 'Reshape_1:0' shape=(2048, 1858) dtype=float32> winner <tf.Tensor 'Reshape_2:0' shape=(2048, 1) dtype=float32> """ tfrecord_features = { 'planes': tf.FixedLenFeature((112, 64), tf.float32), 'probs': tf.FixedLenFeature((1858), tf.float32), 'winner': tf.FixedLenFeature([], tf.float32) } parsed_features = tf.parse_single_example(example_proto, tfrecord_features) planes, probs, winner = parsed_features["planes"], parsed_features[ "probs"], parsed_features["winner"] return planes, probs, winner filenames = [ "gs://jeremylorino-staging-bq-data/chess/lczero_data/train.tfrecords", "gs://jeremylorino-staging-bq-data/chess/lczero_data/train.tfrecords" ] dataset = tf.data.TFRecordDataset(filenames) dataset = dataset.map(map_fn, num_parallel_calls=10) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() # shuffle_size = int(shuffle_size*(1.0-train_ratio)) filenames = [ "gs://jeremylorino-staging-bq-data/chess/lczero_data/test.tfrecords", "gs://jeremylorino-staging-bq-data/chess/lczero_data/test.tfrecords" ] dataset = tf.data.TFRecordDataset(filenames) dataset = dataset.map( map_fn, num_parallel_calls=10) # .take(int(num_chunks*(1.0-train_ratio))) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) # if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = tf.train.latest_checkpoint( 'gs://jeremylorino-staging-bq-data/chess/lczero_model/64x6-test-6/') tfprocess.restore(cp) # Sweeps through all test chunks statistically # Assumes average of 10 samples per test game. # For simplicity, testing can use the split batch size instead of total batch size. # This does not affect results, because test results are simple averages that are independent of batch size. num_evals = num_test * 10 // ChunkParser.BATCH_SIZE print("Using {} evaluation batches".format(num_evals)) tfprocess.process_loop(total_batch_size, num_evals, batch_splits=batch_splits) if cmd.output is not None: tfprocess.save_leelaz_weights(cmd.output) tfprocess.session.close()
#!/usr/bin/env python3 import os import sys from tfprocess import TFProcess import setting blocks = 6 channels = 64 tfprocess = TFProcess(setting.RESIDUAL_BLOCKS, setting.RESIDUAL_FILTERS) tfprocess.init(batch_size=1, gpus_num=1) # tfprocess.replace_weights(weights) path = os.path.join(os.getcwd(), "leelaz-model") tfprocess.save_leelaz_weights('restored.txt') # save_path = tfprocess.saver.save(tfprocess.session, path, global_step=0)
print("Version", line.strip()) if line != '1\n': raise ValueError("Unknown version {}".format(line.strip())) else: weights.append(list(map(float, line.split(' ')))) if e == 2: channels = len(line.split(' ')) print("Channels", channels) blocks = e - (4 + 14) if blocks % 8 != 0: raise ValueError("Inconsistent number of weights in the file") blocks /= 8 print("Blocks", blocks) return weights if __name__ == '__main__': gpu_num = 2 x = [[ tf.placeholder(tf.float32, [None, 18, 19 * 19]), tf.placeholder(tf.float32, [None, 362]), tf.placeholder(tf.float32, [None, 1]) ] for _ in range(gpu_num)] tfprocess = TFProcess(x) tfprocess.save_leelaz_weights("./save/random.txt") path = os.path.join(leela_conf.SAVE_DIR, "leelaz-model") tfprocess.replace_weights(get_weights(sys.argv[1])) print("saved to: ", path) save_path = tfprocess.save(0, path)