Exemplo n.º 1
0
def main(cmd):
    cfg = yaml.safe_load(cmd.cfg.read())
    print(yaml.dump(cfg, default_flow_style=False))

    num_chunks = cfg['dataset']['num_chunks']
    chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks)

    train_ratio = cfg['dataset']['train_ratio']
    num_train = int(num_chunks * train_ratio)
    shuffle_size = cfg['training']['shuffle_size']
    ChunkParser.BATCH_SIZE = cfg['training']['batch_size']

    root_dir = os.path.join(cfg['training']['path'], cfg['name'])
    if not os.path.exists(root_dir):
        os.makedirs(root_dir)

    train_parser = ChunkParser(FileDataSrc(chunks[:num_train]),
                               shuffle_size=shuffle_size,
                               sample=SKIP,
                               batch_size=ChunkParser.BATCH_SIZE)
    dataset = tf.data.Dataset.from_generator(train_parser.parse,
                                             output_types=(tf.string,
                                                           tf.string,
                                                           tf.string))
    dataset = dataset.map(ChunkParser.parse_function)
    dataset = dataset.prefetch(4)
    train_iterator = dataset.make_one_shot_iterator()

    shuffle_size = int(shuffle_size * (1.0 - train_ratio))
    test_parser = ChunkParser(FileDataSrc(chunks[num_train:]),
                              shuffle_size=shuffle_size,
                              sample=SKIP,
                              batch_size=ChunkParser.BATCH_SIZE)
    dataset = tf.data.Dataset.from_generator(test_parser.parse,
                                             output_types=(tf.string,
                                                           tf.string,
                                                           tf.string))
    dataset = dataset.map(ChunkParser.parse_function)
    dataset = dataset.prefetch(4)
    test_iterator = dataset.make_one_shot_iterator()

    tfprocess = TFProcess(cfg)
    tfprocess.init(dataset, train_iterator, test_iterator)

    if os.path.exists(os.path.join(root_dir, 'checkpoint')):
        cp = get_checkpoint(root_dir)
        tfprocess.restore(cp)

    # Sweeps through all test chunks statistically
    num_evals = (num_chunks - num_train) * 10 // ChunkParser.BATCH_SIZE
    print("Using {} evaluation batches".format(num_evals))

    for _ in range(cfg['training']['total_steps']):
        tfprocess.process(ChunkParser.BATCH_SIZE, num_evals)

    tfprocess.save_leelaz_weights(cmd.output)

    tfprocess.session.close()
    train_parser.shutdown()
    test_parser.shutdown()
Exemplo n.º 2
0
def main():
    batch = [
        tf.placeholder(tf.float32, [None, 120, 8 * 8]),
        tf.placeholder(tf.float32, [None, 1924]),
        tf.placeholder(tf.float32, [None, 1]),
    ]
    tfprocess = TFProcess(batch)
    tfprocess.save_leelaz_weights('weights.txt')
Exemplo n.º 3
0
def main(cmd):
    cfg = yaml.safe_load(cmd.cfg.read())
    print(yaml.dump(cfg, default_flow_style=False))

    num_chunks = cfg['dataset']['num_chunks']
    chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks)

    train_ratio = cfg['dataset']['train_ratio']
    num_train = int(num_chunks*train_ratio)
    shuffle_size = cfg['training']['shuffle_size']
    ChunkParser.BATCH_SIZE = cfg['training']['batch_size']

    root_dir = os.path.join(cfg['training']['path'], cfg['name'])
    if not os.path.exists(root_dir):
        os.makedirs(root_dir)

    train_parser = ChunkParser(FileDataSrc(chunks[:num_train]),
            shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE)
    dataset = tf.data.Dataset.from_generator(
        train_parser.parse, output_types=(tf.string, tf.string, tf.string))
    dataset = dataset.map(ChunkParser.parse_function)
    dataset = dataset.prefetch(4)
    train_iterator = dataset.make_one_shot_iterator()

    shuffle_size = int(shuffle_size*(1.0-train_ratio))
    test_parser = ChunkParser(FileDataSrc(chunks[num_train:]), 
            shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE)
    dataset = tf.data.Dataset.from_generator(
        test_parser.parse, output_types=(tf.string, tf.string, tf.string))
    dataset = dataset.map(ChunkParser.parse_function)
    dataset = dataset.prefetch(4)
    test_iterator = dataset.make_one_shot_iterator()

    tfprocess = TFProcess(cfg)
    tfprocess.init(dataset, train_iterator, test_iterator)

    if os.path.exists(os.path.join(root_dir, 'checkpoint')):
        cp = get_checkpoint(root_dir)
        tfprocess.restore(cp)

    # Sweeps through all test chunks statistically
    num_evals = (num_chunks-num_train)*10 // ChunkParser.BATCH_SIZE
    print("Using {} evaluation batches".format(num_evals))

    for _ in range(cfg['training']['total_steps']):
        tfprocess.process(ChunkParser.BATCH_SIZE, num_evals)

    tfprocess.save_leelaz_weights(cmd.output)

    tfprocess.session.close()
    train_parser.shutdown()
    test_parser.shutdown()
Exemplo n.º 4
0
def main():
    if len(sys.argv) != 2:
        print("Usage: {} config.yaml".format(sys.argv[0]))
        return 1

    cfg = yaml.safe_load(open(sys.argv[1], 'r').read())
    print(yaml.dump(cfg, default_flow_style=False))

    batch = [
        tf.placeholder(tf.float32, [None, 120, 8 * 8]),
        tf.placeholder(tf.float32, [None, 1924]),
        tf.placeholder(tf.float32, [None, 1]),
    ]
    tfprocess = TFProcess(cfg, batch)
    tfprocess.save_leelaz_weights('weights.txt')
Exemplo n.º 5
0
def main(cmd):
    cfg = yaml.safe_load(cmd.cfg.read())
    print(yaml.dump(cfg, default_flow_style=False))

    num_chunks = cfg['dataset']['num_chunks']
    chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks)

    train_ratio = cfg['dataset']['train_ratio']
    num_train = int(num_chunks*train_ratio)
    shuffle_size = cfg['training']['shuffle_size']
    ChunkParser.BATCH_SIZE = cfg['training']['batch_size']

    root_dir = os.path.join(cfg['training']['path'], cfg['name'])
    if not os.path.exists(root_dir):
        os.makedirs(root_dir)

    train_parser = ChunkParser(FileDataSrc(chunks[:num_train]),
            shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE)
    dataset = tf.data.Dataset.from_generator(
        train_parser.parse, output_types=(tf.string, tf.string, tf.string))
    dataset = dataset.map(ChunkParser.parse_function)
    dataset = dataset.prefetch(4)
    train_iterator = dataset.make_one_shot_iterator()

    shuffle_size = int(shuffle_size*(1.0-train_ratio))
    test_parser = ChunkParser(FileDataSrc(chunks[num_train:]), 
            shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE)
    dataset = tf.data.Dataset.from_generator(
        test_parser.parse, output_types=(tf.string, tf.string, tf.string))
    dataset = dataset.map(ChunkParser.parse_function)
    dataset = dataset.prefetch(4)
    test_iterator = dataset.make_one_shot_iterator()

    tfprocess = TFProcess(cfg)
    tfprocess.init(dataset, train_iterator, test_iterator)

    if os.path.exists(os.path.join(root_dir, 'checkpoint')):
        cp = get_checkpoint(root_dir)
        tfprocess.restore(cp)

    # Sweeps through all test chunks statistically
    num_evals = (num_chunks-num_train)*10 // ChunkParser.BATCH_SIZE
    print("Using {} evaluation batches".format(num_evals))

    for _ in range(cfg['training']['total_steps']):
        tfprocess.process(ChunkParser.BATCH_SIZE, num_evals)

    tfprocess.save_leelaz_weights('/tmp/weights.txt')

    with open('/tmp/weights.txt', 'rb') as f:
        m = hashlib.sha256()
        w = f.read()
        m.update(w)
        digest = m.hexdigest()

    filename = '/tmp/{}.gz'.format(digest)
    with gzip.open(filename, 'wb') as f:
        f.write(w)

    if cmd.upload:
        metadata = {'training_id':'1', 'layers':cfg['model']['residual_blocks'],
                'filters':cfg['model']['filters']}
        print("\nUploading `{}'...".format(digest[:8]), end='')
        upload(cmd.upload, metadata, filename)
        print("[done]\n")
    else:
        print("\nStored `{}'\n".format(filename))
Exemplo n.º 6
0
def main(cmd):
    cfg = yaml.safe_load(cmd.cfg.read())
    print(yaml.dump(cfg, default_flow_style=False))

    num_chunks = cfg['dataset']['num_chunks']
    train_ratio = cfg['dataset']['train_ratio']
    num_train = int(num_chunks*train_ratio)
    num_test = num_chunks - num_train
    if 'input_test' in cfg['dataset']:
        train_chunks = get_latest_chunks(cfg['dataset']['input_train'], num_train)
        test_chunks = get_latest_chunks(cfg['dataset']['input_test'], num_test)
    else:
        chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks)
        train_chunks = chunks[:num_train]
        test_chunks = chunks[num_train:]

    shuffle_size = cfg['training']['shuffle_size']
    total_batch_size = cfg['training']['batch_size']
    batch_splits = cfg['training'].get('num_batch_splits', 1)
    if total_batch_size % batch_splits != 0:
        raise ValueError('num_batch_splits must divide batch_size evenly')
    split_batch_size = total_batch_size // batch_splits
    # Load data with split batch size, which will be combined to the total batch size in tfprocess.
    ChunkParser.BATCH_SIZE = split_batch_size

    root_dir = os.path.join(cfg['training']['path'], cfg['name'])
    if not os.path.exists(root_dir):
        os.makedirs(root_dir)

    train_parser = ChunkParser(FileDataSrc(train_chunks),
            shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE)
    dataset = tf.data.Dataset.from_generator(
        train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string))
    dataset = dataset.map(ChunkParser.parse_function)
    dataset = dataset.prefetch(4)
    train_iterator = dataset.make_one_shot_iterator()

    shuffle_size = int(shuffle_size*(1.0-train_ratio))
    test_parser = ChunkParser(FileDataSrc(test_chunks),
            shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE)
    dataset = tf.data.Dataset.from_generator(
        test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string))
    dataset = dataset.map(ChunkParser.parse_function)
    dataset = dataset.prefetch(4)
    test_iterator = dataset.make_one_shot_iterator()

    tfprocess = TFProcess(cfg)
    tfprocess.init(dataset, train_iterator, test_iterator)

    if os.path.exists(os.path.join(root_dir, 'checkpoint')):
        cp = tf.train.latest_checkpoint(root_dir)
        tfprocess.restore(cp)

    # If number of test positions is not given
    # sweeps through all test chunks statistically
    # Assumes average of 10 samples per test game.
    # For simplicity, testing can use the split batch size instead of total batch size.
    # This does not affect results, because test results are simple averages that are independent of batch size.
    num_evals = cfg['training'].get('num_test_positions', num_test * 10)
    num_evals = max(1, num_evals // ChunkParser.BATCH_SIZE)
    print("Using {} evaluation batches".format(num_evals))

    tfprocess.process_loop(total_batch_size, num_evals, batch_splits=batch_splits)

    if cmd.output is not None:
        tfprocess.save_leelaz_weights(cmd.output)

    tfprocess.session.close()
    train_parser.shutdown()
    test_parser.shutdown()
Exemplo n.º 7
0
def main(cmd):
    cfg = yaml.safe_load(cmd.cfg.read())
    print(yaml.dump(cfg, default_flow_style=False))

    num_chunks = cfg['dataset']['num_chunks']
    allow_less = cfg['dataset'].get('allow_less_chunks', False)
    train_ratio = cfg['dataset']['train_ratio']
    num_train = int(num_chunks * train_ratio)
    num_test = num_chunks - num_train
    sort_type = cfg['dataset'].get('sort_type', 'mtime')
    if sort_type == 'mtime':
        sort_key_fn = os.path.getmtime
    elif sort_type == 'number':
        sort_key_fn = game_number_for_name
    elif sort_type == 'name':
        sort_key_fn = identity_function
    else:
        raise ValueError('Unknown dataset sort_type: {}'.format(sort_type))
    if 'input_test' in cfg['dataset']:
        train_chunks = get_latest_chunks(cfg['dataset']['input_train'],
                                         num_train, allow_less, sort_key_fn)
        test_chunks = get_latest_chunks(cfg['dataset']['input_test'], num_test,
                                        allow_less, sort_key_fn)
    else:
        chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks,
                                   allow_less, sort_key_fn)
        if allow_less:
            num_train = int(len(chunks) * train_ratio)
            num_test = len(chunks) - num_train
        train_chunks = chunks[:num_train]
        test_chunks = chunks[num_train:]

    shuffle_size = cfg['training']['shuffle_size']
    total_batch_size = cfg['training']['batch_size']
    batch_splits = cfg['training'].get('num_batch_splits', 1)
    train_workers = cfg['dataset'].get('train_workers', None)
    test_workers = cfg['dataset'].get('test_workers', None)
    if total_batch_size % batch_splits != 0:
        raise ValueError('num_batch_splits must divide batch_size evenly')
    split_batch_size = total_batch_size // batch_splits

    diff_focus_min = cfg['training'].get('diff_focus_min', 1)
    diff_focus_slope = cfg['training'].get('diff_focus_slope', 0)
    diff_focus_q_weight = cfg['training'].get('diff_focus_q_weight', 6.0)
    diff_focus_pol_scale = cfg['training'].get('diff_focus_pol_scale', 3.5)

    root_dir = os.path.join(cfg['training']['path'], cfg['name'])
    if not os.path.exists(root_dir):
        os.makedirs(root_dir)

    train_parser = ChunkParser(train_chunks,
                               get_input_mode(cfg),
                               shuffle_size=shuffle_size,
                               sample=SKIP,
                               batch_size=split_batch_size,
                               diff_focus_min=diff_focus_min,
                               diff_focus_slope=diff_focus_slope,
                               diff_focus_q_weight=diff_focus_q_weight,
                               diff_focus_pol_scale=diff_focus_pol_scale,
                               workers=train_workers)
    test_shuffle_size = int(shuffle_size * (1.0 - train_ratio))
    # no diff focus for test_parser
    test_parser = ChunkParser(test_chunks,
                              get_input_mode(cfg),
                              shuffle_size=test_shuffle_size,
                              sample=SKIP,
                              batch_size=split_batch_size,
                              workers=test_workers)
    if 'input_validation' in cfg['dataset']:
        valid_chunks = get_all_chunks(cfg['dataset']['input_validation'])
        validation_parser = ChunkParser(valid_chunks,
                                        get_input_mode(cfg),
                                        sample=1,
                                        batch_size=split_batch_size,
                                        workers=0)

    import tensorflow as tf
    from chunkparsefunc import parse_function
    from tfprocess import TFProcess
    tfprocess = TFProcess(cfg)
    train_dataset = tf.data.Dataset.from_generator(
        train_parser.parse,
        output_types=(tf.string, tf.string, tf.string, tf.string, tf.string))
    train_dataset = train_dataset.map(parse_function)
    test_dataset = tf.data.Dataset.from_generator(
        test_parser.parse,
        output_types=(tf.string, tf.string, tf.string, tf.string, tf.string))
    test_dataset = test_dataset.map(parse_function)

    validation_dataset = None
    if 'input_validation' in cfg['dataset']:
        validation_dataset = tf.data.Dataset.from_generator(
            validation_parser.sequential,
            output_types=(tf.string, tf.string, tf.string, tf.string,
                          tf.string))
        validation_dataset = validation_dataset.map(parse_function)

    if tfprocess.strategy is None:  #Mirrored strategy appends prefetch itself with a value depending on number of replicas
        train_dataset = train_dataset.prefetch(4)
        test_dataset = test_dataset.prefetch(4)
        if validation_dataset is not None:
            validation_dataset = validation_dataset.prefetch(4)
    else:
        options = tf.data.Options()
        options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
        train_dataset = train_dataset.with_options(options)
        test_dataset = test_dataset.with_options(options)
        if validation_dataset is not None:
            validation_dataset = validation_dataset.with_options(options)
    tfprocess.init(train_dataset, test_dataset, validation_dataset)

    tfprocess.restore()

    # If number of test positions is not given
    # sweeps through all test chunks statistically
    # Assumes average of 10 samples per test game.
    # For simplicity, testing can use the split batch size instead of total batch size.
    # This does not affect results, because test results are simple averages that are independent of batch size.
    num_evals = cfg['training'].get('num_test_positions',
                                    len(test_chunks) * 10)
    num_evals = max(1, num_evals // split_batch_size)
    print("Using {} evaluation batches".format(num_evals))
    tfprocess.total_batch_size = total_batch_size
    tfprocess.process_loop(total_batch_size,
                           num_evals,
                           batch_splits=batch_splits)

    if cmd.output is not None:
        if cfg['training'].get('swa_output', False):
            tfprocess.save_swa_weights(cmd.output)
        else:
            tfprocess.save_leelaz_weights(cmd.output)

    train_parser.shutdown()
    test_parser.shutdown()
Exemplo n.º 8
0
def main(cmd):
    cfg = yaml.safe_load(cmd.cfg.read())
    print(yaml.dump(cfg, default_flow_style=False))

    num_chunks = cfg['dataset']['num_chunks']
    train_ratio = cfg['dataset']['train_ratio']
    num_train = int(num_chunks * train_ratio)
    num_test = num_chunks - num_train
    if 'input_test' in cfg['dataset']:
        train_chunks = get_latest_chunks(cfg['dataset']['input_train'],
                                         num_train)
        test_chunks = get_latest_chunks(cfg['dataset']['input_test'], num_test)
    else:
        chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks)
        train_chunks = chunks[:num_train]
        test_chunks = chunks[num_train:]

    shuffle_size = cfg['training']['shuffle_size']
    total_batch_size = cfg['training']['batch_size']
    batch_splits = cfg['training'].get('num_batch_splits', 1)
    if total_batch_size % batch_splits != 0:
        raise ValueError('num_batch_splits must divide batch_size evenly')
    split_batch_size = total_batch_size // batch_splits
    # Load data with split batch size, which will be combined to the total batch size in tfprocess.
    ChunkParser.BATCH_SIZE = split_batch_size

    root_dir = os.path.join(cfg['training']['path'], cfg['name'])
    if not os.path.exists(root_dir):
        os.makedirs(root_dir)

    def map_fn(example_proto):
        """
        planes <tf.Tensor 'Reshape:0' shape=(2048, 112, 64) dtype=float32>
        probs <tf.Tensor 'Reshape_1:0' shape=(2048, 1858) dtype=float32>
        winner <tf.Tensor 'Reshape_2:0' shape=(2048, 1) dtype=float32>
        """

        tfrecord_features = {
            'planes': tf.FixedLenFeature((112, 64), tf.float32),
            'probs': tf.FixedLenFeature((1858), tf.float32),
            'winner': tf.FixedLenFeature([], tf.float32)
        }
        parsed_features = tf.parse_single_example(example_proto,
                                                  tfrecord_features)
        planes, probs, winner = parsed_features["planes"], parsed_features[
            "probs"], parsed_features["winner"]

        return planes, probs, winner

    filenames = [
        "gs://jeremylorino-staging-bq-data/chess/lczero_data/train.tfrecords",
        "gs://jeremylorino-staging-bq-data/chess/lczero_data/train.tfrecords"
    ]
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(map_fn, num_parallel_calls=10)
    dataset = dataset.prefetch(4)
    train_iterator = dataset.make_one_shot_iterator()

    # shuffle_size = int(shuffle_size*(1.0-train_ratio))
    filenames = [
        "gs://jeremylorino-staging-bq-data/chess/lczero_data/test.tfrecords",
        "gs://jeremylorino-staging-bq-data/chess/lczero_data/test.tfrecords"
    ]
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(
        map_fn,
        num_parallel_calls=10)  # .take(int(num_chunks*(1.0-train_ratio)))
    dataset = dataset.prefetch(4)
    test_iterator = dataset.make_one_shot_iterator()

    tfprocess = TFProcess(cfg)
    tfprocess.init(dataset, train_iterator, test_iterator)

    # if os.path.exists(os.path.join(root_dir, 'checkpoint')):
    cp = tf.train.latest_checkpoint(
        'gs://jeremylorino-staging-bq-data/chess/lczero_model/64x6-test-6/')
    tfprocess.restore(cp)

    # Sweeps through all test chunks statistically
    # Assumes average of 10 samples per test game.
    # For simplicity, testing can use the split batch size instead of total batch size.
    # This does not affect results, because test results are simple averages that are independent of batch size.
    num_evals = num_test * 10 // ChunkParser.BATCH_SIZE
    print("Using {} evaluation batches".format(num_evals))

    tfprocess.process_loop(total_batch_size,
                           num_evals,
                           batch_splits=batch_splits)

    if cmd.output is not None:
        tfprocess.save_leelaz_weights(cmd.output)

    tfprocess.session.close()
Exemplo n.º 9
0
#!/usr/bin/env python3
import os
import sys
from tfprocess import TFProcess
import setting

blocks = 6
channels = 64
tfprocess = TFProcess(setting.RESIDUAL_BLOCKS, setting.RESIDUAL_FILTERS)
tfprocess.init(batch_size=1, gpus_num=1)
# tfprocess.replace_weights(weights)
path = os.path.join(os.getcwd(), "leelaz-model")
tfprocess.save_leelaz_weights('restored.txt')
# save_path = tfprocess.saver.save(tfprocess.session, path, global_step=0)
Exemplo n.º 10
0
                print("Version", line.strip())
                if line != '1\n':
                    raise ValueError("Unknown version {}".format(line.strip()))
            else:
                weights.append(list(map(float, line.split(' '))))
            if e == 2:
                channels = len(line.split(' '))
                print("Channels", channels)
        blocks = e - (4 + 14)
        if blocks % 8 != 0:
            raise ValueError("Inconsistent number of weights in the file")
        blocks /= 8
        print("Blocks", blocks)
    return weights


if __name__ == '__main__':
    gpu_num = 2
    x = [[
        tf.placeholder(tf.float32, [None, 18, 19 * 19]),
        tf.placeholder(tf.float32, [None, 362]),
        tf.placeholder(tf.float32, [None, 1])
    ] for _ in range(gpu_num)]

    tfprocess = TFProcess(x)
    tfprocess.save_leelaz_weights("./save/random.txt")
    path = os.path.join(leela_conf.SAVE_DIR, "leelaz-model")
    tfprocess.replace_weights(get_weights(sys.argv[1]))
    print("saved to: ", path)
    save_path = tfprocess.save(0, path)