示例#1
0
def print_ckpt_tensor_name(checkpoint_path):
    import pdb
    pdb.set_trace()
    num_tensor = 0
    ckpt = 'ckpt'
    checkpoint_name = None
    os.chdir(checkpoint_path)
    for each_file in os.listdir(os.curdir):
        if ckpt in each_file:
            checkpoint_name = each_file.split(
                ckpt)[0] + ckpt + each_file.split(ckpt)[1].split('.')[0]
            break

    if checkpoint_name is None:
        return

    model_reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_name)
    var_dict = model_reader.get_variable_to_shape_map()
    for key in var_dict:
        num_tensor = num_tensor + 1
        print(key + " " + str(model_reader.get_tensor(key).shape))
        if key == 'bert/encoder/layer_2/attention/self/qkv_weight'\
        or key == 'bert/embeddings/word_embeddings'\
        or key == 'bert/pooler/dense/kernel':
            print(model_reader.get_tensor(key))
    print(num_tensor)
示例#2
0
def get_embedding():
    model_dir = '/home/zpl/Model_rnn/whole_256_3/model_longtime' + '-%d' % 16
    reader = pywrap_tensorflow.NewCheckpointReader(model_dir)
    var_to_shape_map = reader.get_variable_to_shape_map()
    for key in var_to_shape_map.keys():
        # print(var_to_shape_map[key])
        if key == 'model/embedding':
            # print("tensor_name: ", key)
            embedding = reader.get_tensor(key)
            return embedding
示例#3
0
def convert_compare_ipu_gpu(ckpt_a, ckpt_b):
    graph = tf.Graph()
    reader_a = pywrap_tensorflow.NewCheckpointReader(ckpt_a)
    reader_b = pywrap_tensorflow.NewCheckpointReader(ckpt_b)
    var_to_shape_map_a = reader_a.get_variable_to_shape_map()
    var_to_shape_map_b = reader_b.get_variable_to_shape_map()
    # import pdb
    # pdb.set_trace()
    with graph.as_default():
        sess = tf.Session()
        for tensor_name in var_to_shape_map_a:
            try:
                tensor_value_a = reader_a.get_tensor(tensor_name)
                tensor_value_b = reader_b.get_tensor(tensor_name)
                if tensor_value_a.any() != tensor_value_b.any():
                    print(tensor_name)
            except:
                print("Not found tensor:{}".format(tensor_name))

    print("finish compare!")
示例#4
0
def print_vars(data_type=np.float16):
    checkpoint_name='ckpt_noshuffDIEN3'
    curent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))+'/'
    out_dir = curent_dir + "model-F16"+'/'
    if not os.path.exists(out_dir):
        os.mkdir(out_dir) 
        
    reader=pywrap_tensorflow.NewCheckpointReader(args.ckpt)
    var_to_map = reader.get_variable_to_shape_map()
    val_f = {}
    for key, dim in var_to_map.items():
        val_f[key.strip(":0")] = tf.Variable(reader.get_tensor(key).astype(data_type))
     #get parameters before convert
    param_log_origin=''
    for key in var_to_map:
        param_log_origin += "tensor_name: "+key+"  shape:"+str(reader.get_tensor(key).shape)+"\r\n"
        param_log_origin += str(reader.get_tensor(key))+"\r\n"  
    writer = open(out_dir+'Param-'+str(reader.get_tensor(key).dtype)+'.txt', 'w', encoding="utf-8")
    writer.write(param_log_origin)      
  
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        new_saver = tf.train.import_meta_graph(args.ckpt+'.meta')
        new_saver.restore(sess,args.ckpt)  
        saver = tf.train.Saver(val_f)
        saver.save(sess, out_dir+checkpoint_name)  

    #save parameters after convert
    reader_convert = pywrap_tensorflow.NewCheckpointReader(out_dir+checkpoint_name)
    var_to_map_convert = reader_convert.get_variable_to_shape_map()  
    param_log_convert=''
    for item in var_to_map_convert:
        param_log_convert += "tensor_name: "+item+"  shape:"+str(reader_convert.get_tensor(item).shape)+"\r\n"
        param_log_convert += str(reader_convert.get_tensor(item))+"\r\n" 
    writer = open(out_dir+'Param-'+str(reader_convert.get_tensor(item).dtype)+'.txt', 'w', encoding="utf-8")
    writer.write(param_log_convert)      
    
    print("Convert Finish!")
    print("Save to path:"+out_dir) 
示例#5
0
def get_embeding(
    path="/home/xihuaiwen/chinese/CLUE_B/baselines/models/bert/prev_trained_model/chinese_L-12_H-768_A-12/gc_ckpt/model.ckpt-525000"
):
    graph = tf.Graph()
    reader = pywrap_tensorflow.NewCheckpointReader(path)
    var_to_shape_map = reader.get_variable_to_shape_map()
    with graph.as_default():
        sess = tf.Session()
        for key in var_to_shape_map:
            if "adam" not in key and "Momentum" not in key:
                if 'word_embeddings' in key:
                    val = reader.get_tensor(key)
    return val
示例#6
0
文件: log.py 项目: graphcore/examples
def load_initializers_from_checkpoint(checkpoint_path):
    initializers = {}
    reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_path)
    var_to_map = reader.get_variable_to_shape_map()
    for key, dim in var_to_map.items():
        if key == 'global_step':
            continue
        # if reader.get_tensor(key).dtype.name == 'float16':
        #     int_data = np.asarray(reader.get_tensor(key), np.int32)
        #     np_weight = int_data.view(dtype=np.float16).reshape(dim)
        # else:
        np_weight = reader.get_tensor(key)
        initializers[key] = np_weight
    return initializers
示例#7
0
def print_ckpt_tensor_name(checkpoint_path):
    ckpt = '.ckpt'
    checkpoint_name = None
    os.chdir(checkpoint_path)
    for each_file in os.listdir(os.curdir):
        if ckpt in each_file:
            checkpoint_name = each_file.split(ckpt)[0] + ckpt
            break

    if checkpoint_name is None:
        return

    model_reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_name)
    var_dict = model_reader.get_variable_to_shape_map()
    for key in var_dict:
        print(key)
        if key == 'bert/encoder/layer_1/attention/output/dense/bias':
            print(model_reader.get_tensor(key))
示例#8
0
def convert_ckpt_to_fp16(ckpt_file: str) -> tf.train.Saver:
    """Convert checkpoint to fp16 weights and return saver.

    Args:
        ckpt_file: Path to checkpoint file.

    Returns: tf.train.Saver object initialized with dictionary of fp16 variables.

    """
    # Strip .data-xxxx-xxxx
    if not ckpt_file.endswith(".ckpt"):
        ckpt_file = ckpt_file.rsplit('.', 1)[0]

    reader = pywrap_tensorflow.NewCheckpointReader(ckpt_file)
    var_to_map = reader.get_variable_to_shape_map()

    val_f16 = {}
    for key, _ in var_to_map.items():
        val_f16[key.strip(":0")] = tf.Variable(reader.get_tensor(key).astype(np.float16))
    saver = tf.train.Saver(val_f16)
    return saver
def convert_ckpt_to_fp(checkpoint_path,data_type=np.float16):
    """Convert checkpoint to fp weights and return saver.
    Args:
        init_checkpoint: Path to checkpoint file.
        data_type: np.float16, np.float32, np.float64,

    """
    ckpt = '.ckpt'
    sync_file = []
    checkpoint_name = None
    os.chdir(checkpoint_path)
    for each_file in  os.listdir(os.curdir):
        if ckpt in each_file:
            checkpoint_name = each_file.split(ckpt)[0]+ckpt
            break
        
    if checkpoint_name is None:
        return
    
    curent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))+'/'
    out_dir = curent_dir +  checkpoint_path + "-F16"+'/'
    if not os.path.exists(out_dir):
        os.mkdir(out_dir) 

    for each_file in  os.listdir(os.curdir):
        ext = os.path.splitext(each_file)[1]
        if ext in ['.txt','.json']:
            copyfile(curent_dir+checkpoint_path+'/'+each_file, out_dir+each_file)
        
    reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_name)
    var_to_map = reader.get_variable_to_shape_map()
    val_f = {}
    for key, dim in var_to_map.items():
        val_f[key.strip(":0")] = tf.Variable(reader.get_tensor(key).astype(data_type))
        '''
        if 'word_embeddings' in key:
            temp = reader.get_tensor(key)[:2896,:]
            val_f[key.strip(":0")] =  tf.Variable(temp.astype(data_type))#119547
        if 'dense' in key:
            if len(dim)>1:
                need_split_dim1 = False
                need_split_dim2 = False
                need_split_dim1 = True if dim[0]==3072 else False
                need_split_dim2 = True if dim[1]==3072 else False
                if need_split_dim1:
                    temp = reader.get_tensor(key)[:2048,:]
                    val_f[key.strip(":0")] =  tf.Variable(temp.astype(data_type))
                elif need_split_dim2:
                    temp = reader.get_tensor(key)[:,:2048]
                    val_f[key.strip(":0")] =  tf.Variable(temp.astype(data_type))
                elif need_split_dim1 and need_split_dim2:
                    temp = reader.get_tensor(key)[:2048,:2048]
                    val_f[key.strip(":0")] =  tf.Variable(temp.astype(data_type))
            else:
                if dim[0]==3072:
                    temp = reader.get_tensor(key)[:2048]
                    val_f[key.strip(":0")] =  tf.Variable(temp.astype(data_type))
        '''
        
    #get parameters before convert
    param_log_origin=''
    for key in var_to_map:
        param_log_origin += "tensor_name: "+key+"  shape:"+str(reader.get_tensor(key).shape)+"\r\n"
        param_log_origin += str(reader.get_tensor(key))+"\r\n"  
    writer = open(out_dir+'Param-'+str(reader.get_tensor(key).dtype)+'.txt', 'w', encoding="utf-8")
    writer.write(param_log_origin)      
  
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        new_saver = tf.train.import_meta_graph(curent_dir + checkpoint_path +'/'+checkpoint_name+'.meta')
        new_saver.restore(sess,curent_dir + checkpoint_path +'/'+checkpoint_name)  
        saver = tf.train.Saver(val_f)
        saver.save(sess, out_dir+checkpoint_name)  

    #save parameters after convert
    reader_convert = pywrap_tensorflow.NewCheckpointReader(out_dir+checkpoint_name)
    var_to_map_convert = reader_convert.get_variable_to_shape_map()  
    param_log_convert=''
    for item in var_to_map_convert:
        param_log_convert += "tensor_name: "+item+"  shape:"+str(reader_convert.get_tensor(item).shape)+"\r\n"
        param_log_convert += str(reader_convert.get_tensor(item))+"\r\n" 
    writer = open(out_dir+'Param-'+str(reader_convert.get_tensor(item).dtype)+'.txt', 'w', encoding="utf-8")
    writer.write(param_log_convert)      
    
    print("Convert Finish!")
    print("Save to path:"+out_dir)    
示例#10
0
if os.path.isfile(config_para["test"]):
    dt_sentences = dev_sentences + test_sentences
else:
    dt_sentences = dev_sentences

if 'bin' in parameters['pre_emb']:
    wordmodel = gensim.models.KeyedVectors.load_word2vec_format(
        parameters['pre_emb'], binary=True)
else:
    wordmodel = gensim.models.KeyedVectors.load_word2vec_format(
        parameters['pre_emb'], binary=False)

# load bioBert embedding
bert_word_embedding = None
word_index_dic = {}
reader = pywrap_tensorflow.NewCheckpointReader(
    parameters['bio_bert_embedding'])
bert_word_embedding = reader.get_tensor("bert/embeddings/word_embeddings")
with open(parameters['bio_bert_vocab'], "r", encoding="utf8") as f:
    vocab = f.readlines()
    for index, i in enumerate(vocab):
        word_index_dic[i.strip()] = index

# Create a dictionary / mapping of words
# If we use pretrained embeddings, we add them to the dictionary.
word_to_id = []
char_to_id = []
pt_to_id = []
tag_to_id = []
if not parameters['reload']:
    if parameters['pre_emb']:
        # mapping of words frenquency decreasing
示例#11
0
def convert_research_ckpt_to_apps(
    ckpt_file,
    output_dir,
    num_embed_split,
    vocab_size,
    use_attention_bias,
    use_qkv_bias,
    use_cls_layer,
    baseline,
    dtype,
):

    saved_variables = []
    split_embeddings = []

    def add_variable(old_tensor, new_tensor):
        logging.info(f"{old_tensor} -> {new_tensor}")
        saved_variables.append(new_tensor)

    graph = tf.Graph()
    reader = pywrap_tensorflow.NewCheckpointReader(ckpt_file)
    var_to_shape_map = reader.get_variable_to_shape_map()

    with graph.as_default():
        sess = tf.compat.v1.Session()
        for old_tensor_name in sorted(var_to_shape_map):
            # Filter out the optimizer variables
            if 'global_step' in old_tensor_name:
                continue
            if filter_optimizer(old_tensor_name):
                continue
            if not use_cls_layer and "transform" in old_tensor_name:
                logging.info("Discarding dense layer before MLM loss.")
                continue
            if not use_attention_bias and "output/dense/bias" in old_tensor_name:
                logging.info("Discarding attention biases.")
                continue

            this_tensor_dtype = dtype

            tensor_value = tf.cast(reader.get_tensor(old_tensor_name),
                                   dtype=this_tensor_dtype)

            new_name = old_tensor_name

            if new_name.startswith('all/'):
                new_name = new_name[4:]

            if "Norm" in new_name:
                new_name = new_name.replace("GroupNorm", "LayerNorm")

            if "/layer_" in new_name and "encoder" in new_name:

                if new_name.endswith('weight') and '/dwconv/' not in new_name:
                    new_name = new_name.replace("weight", "dense/kernel")
                if new_name.endswith('bias'):
                    new_name = new_name.replace("bias", "dense/bias")

                if '/boom' in new_name:
                    new_name = new_name.replace("boom", "feed_forward_")

                    if '/up/' in new_name:
                        new_name = new_name.replace("/up/", "/intermediate/")
                    if '/down/' in new_name:
                        new_name = new_name.replace("/down/", "/output/")
                    if '/mixer/' in new_name:
                        new_name = new_name.replace("/mixer/",
                                                    "/output/mixer/")

                    if baseline:
                        new_name = new_name.replace("/feed_forward_", "")
                        new_name = new_name.replace("/postnorm/", "/output/")

                if '/conv/' in new_name:
                    new_name = new_name.replace("/conv/", "/convolution/")

                    if "/pre/" in new_name:
                        hidden_dim = tensor_value.shape[-1] // 2
                        values = tensor_value[..., :hidden_dim]
                        gates = tensor_value[..., hidden_dim:]
                        values_name = new_name.replace("/pre/",
                                                       "/pre/glu/values/")
                        gates_name = new_name.replace("/pre/",
                                                      "/pre/glu/gates/")
                        values_var = tf.Variable(values, name=values_name)
                        gates_var = tf.Variable(gates, name=gates_name)
                        add_variable(old_tensor_name, values_var)
                        add_variable(old_tensor_name, gates_var)
                        continue

                if '/attention/' in new_name:
                    new_name = new_name.replace("attention/output",
                                                "attention/projection")
                    new_name = new_name.replace("/qkv/", "/self/")
                    if '/self/' in new_name:
                        new_name = new_name.replace("dense/bias", "bias")
                        if 'self/dense/kernel' in new_name:
                            hidden_dim = tensor_value.shape[-1] // 3
                            q = tensor_value[..., :hidden_dim]
                            k = tensor_value[..., hidden_dim:2 * hidden_dim]
                            v = tensor_value[...,
                                             2 * hidden_dim:3 * hidden_dim]
                            q_name = new_name.replace("/dense/kernel",
                                                      "/query/kernel")
                            k_name = new_name.replace("/dense/kernel",
                                                      "/key/kernel")
                            v_name = new_name.replace("/dense/kernel",
                                                      "/value/kernel")
                            q_var = tf.Variable(q, name=q_name)
                            add_variable(old_tensor_name, q_var)
                            k_var = tf.Variable(k, name=k_name)
                            add_variable(old_tensor_name, k_var)
                            v_var = tf.Variable(v, name=v_name)
                            add_variable(old_tensor_name, v_var)
                            continue

                    if baseline:
                        new_name = new_name.replace("/postnorm/",
                                                    "/projection/")

            elif 'encoder/post_layers' in new_name:
                new_name = new_name.replace("bert/encoder/post_layers/", "")

            elif "word_embeddings" in new_name:
                split_match = re.search("/s\d/", new_name)
                if split_match and num_embed_split == 1:
                    split_embeddings.append(tensor_value)
                    continue
                else:
                    pass

            new_var = tf.Variable(tensor_value, name=new_name)
            add_variable(old_tensor_name, new_var)

        if split_embeddings and num_embed_split == 1:
            merged_embedding = tf.concat(split_embeddings, axis=0)
            new_var = tf.Variable(merged_embedding,
                                  name="bert/embeddings/word_embeddings")
            add_variable(split_embeddings[0], new_var)

        sess.run(tf.compat.v1.global_variables_initializer())
        saver = tf.compat.v1.train.Saver()
        _dir_name, ckpt_name = os.path.split(ckpt_file)
        output_file = os.path.join(output_dir, ckpt_name)
        saver.save(sess, output_file)

        num_params = np.sum([np.prod(v.shape) for v in saved_variables])
        print(f"Number of parameters saved: {num_params}")
示例#12
0
def convert_gc_ckpt_to_google(ckpt_file,
                              output_dir=None,
                              include_qkv_bias=False,
                              dtype=tf.float32):
    graph = tf.Graph()
    dir_name, ckpt_name = os.path.split(os.path.abspath(ckpt_file))
    reader = pywrap_tensorflow.NewCheckpointReader(ckpt_file)
    var_to_shape_map = reader.get_variable_to_shape_map()
    with graph.as_default():
        sess = tf.Session()
        num_hidden_layers = 0
        word_embeddings = []
        new_variables = []
        keep_variables = []
        for tensor_name in var_to_shape_map:
            logging.info(f"Loading {tensor_name}")
            # Filter the optimizer variables
            if filter_optimizer(tensor_name):
                continue

            tensor_value = tf.cast(reader.get_tensor(tensor_name), dtype=dtype)
            if "word_embeddings" in tensor_name:
                word_embeddings.append(tensor_name)
            elif "attention" in tensor_name:
                layer_idx = int(tensor_name.split("/")[2].split("_")[-1])
                num_hidden_layers = max(layer_idx, num_hidden_layers)
                if "qkv_bias" in tensor_name and include_qkv_bias:
                    hidden_size = tensor_value.shape[0] // 3
                    query_bias = tensor_value[:hidden_size]
                    key_bias = tensor_value[hidden_size:2 * hidden_size]
                    value_bias = tensor_value[2 * hidden_size:]
                    qb = tf.Variable(query_bias,
                                     name=tensor_name.replace(
                                         "qkv_bias", "query/bias"))
                    kb = tf.Variable(key_bias,
                                     name=tensor_name.replace(
                                         "qkv_bias", "key/bias"))
                    vb = tf.Variable(value_bias,
                                     name=tensor_name.replace(
                                         "qkv_bias", "value/bias"))
                    new_variables.extend([qb, kb, vb])
                # rename projection to output
                elif "projection" in tensor_name:
                    new_name = tensor_name.replace("projection", "output")

                    proj = tf.Variable(tensor_value, name=new_name)
                    new_variables.append(proj)
            else:
                var = tf.get_variable(tensor_name,
                                      shape=tensor_value.shape,
                                      dtype=dtype)
                keep_variables.append(var)

        # Combine split embeddings
        word_embeddings = np.sort(word_embeddings)
        embeddings_vals = [reader.get_tensor(k) for k in word_embeddings]
        unit_embeddings = np.vstack(embeddings_vals)
        logging.debug(
            f"Concated word_embeddings shape: {unit_embeddings.shape}")
        we = tf.Variable(
            unit_embeddings,
            dtype=dtype,
            shape=unit_embeddings.shape,
            name="bert/embeddings/word_embeddings",
        )
        new_variables.append(we)
        saved_variables = new_variables + keep_variables
        sess.run(tf.compat.v1.global_variables_initializer())
        saver = tf.compat.v1.train.Saver(var_list=saved_variables)
        output_file = os.path.join(output_dir, ckpt_name)
        saver.save(sess, output_file)
        print("Saved to :" + output_file)
示例#13
0
def convert_google_ckpt_to_gc(
    ckpt_file,
    output_dir,
    num_embed_split,
    vocab_size,
    use_attention_bias,
    use_qkv_bias,
    use_cls_layer,
    dtype,
):

    saved_variables = []

    def add_variable(old_tensor, new_tensor):
        logging.info(f"{old_tensor} -> {new_tensor}")
        saved_variables.append(new_tensor)

    graph = tf.Graph()
    reader = pywrap_tensorflow.NewCheckpointReader(ckpt_file)
    var_to_shape_map = reader.get_variable_to_shape_map()
    with graph.as_default():
        sess = tf.compat.v1.Session()
        for tensor_name in sorted(var_to_shape_map):
            # Filter out the optimizer variables
            if filter_optimizer(tensor_name):
                continue
            if not use_cls_layer and "transform" in tensor_name:
                logging.info("Discarding dense layer before MLM loss.")
                continue
            if not use_attention_bias and "output/dense/bias" in tensor_name:
                logging.info("Discarding attention biases.")
                continue

            this_tensor_dtype = dtype
            if "cls/squad/" in tensor_name:
                # Keep SQuAD output dense layer weights as float32
                this_tensor_dtype = tf.float32
                tensor_value = tf.cast(reader.get_tensor(tensor_name),
                                       dtype=this_tensor_dtype)

            else:
                #  Cast all other tensors to required precision.
                tensor_value = tf.cast(reader.get_tensor(tensor_name),
                                       dtype=this_tensor_dtype)

            if "word_embeddings" in tensor_name and num_embed_split > 1:
                # Split word_embeddings when num_split>1
                logging.info(
                    f"Splitting word embeddings info {num_embed_split} splits."
                )
                word_embeddings = truncate_vocab(tensor_value, vocab_size)
                hidden_size = np.shape(word_embeddings)[1]
                assert vocab_size % num_embed_split == 0
                size_per_slice = int(vocab_size / num_embed_split)
                for i in range(num_embed_split):
                    start_idx = i * size_per_slice
                    end_idx = (i + 1) * size_per_slice
                    we_pieces = tf.Variable(
                        word_embeddings[start_idx:end_idx, :],
                        shape=(size_per_slice, hidden_size),
                        name=f"bert/embeddings/s{i}/word_embeddings",
                    )
                    add_variable(tensor_name, we_pieces)

            #  Truncate word embeddings to  vocab_size
            elif "word_embeddings" in tensor_name:
                full_word_embeddings = tf.Variable(truncate_vocab(
                    tensor_value, vocab_size),
                                                   name=tensor_name)
                add_variable(tensor_name, full_word_embeddings)

            # Rename tensor
            elif "attention/output" in tensor_name:
                new_name = tensor_name.replace("attention/output",
                                               "attention/projection")
                proj = tf.Variable(tensor_value, name=new_name)
                add_variable(tensor_name, proj)

            elif is_qkv_tensor(tensor_name):
                # We will process self-attention parameters outside the loop
                continue

            else:
                others_var = tf.Variable(tensor_value, name=tensor_name)
                add_variable(tensor_name, others_var)

        # Concatenate or split QKV
        layer_re = re.compile('.*/layer_([0-9]+)/.*')
        matches = [layer_re.match(k) for k in var_to_shape_map.keys()]
        num_hidden_layers = max(
            [int(m.group(1)) for m in matches if m is not None]) + 1

        logging.info("Concatenate query, key, value layers into one.")
        for i in range(num_hidden_layers):
            layer_name = f"bert/encoder/layer_{i}/attention/self"
            # Combine query,key,value to qkv_weight
            qkv_weight = []
            qkv_bias = []
            for name in ["query", "key", "value"]:
                weight_name = layer_name + f"/{name}/kernel"
                bias_name = layer_name + f"/{name}/bias"
                weight = tf.cast(reader.get_tensor(weight_name), dtype=dtype)
                bias = tf.cast(reader.get_tensor(bias_name), dtype=dtype)

                add_variable(weight_name, tf.Variable(weight,
                                                      name=weight_name))

                # The QKV bias is always concantenated
                qkv_bias.append(bias)

            if use_qkv_bias:
                qkv_bias = tf.concat(qkv_bias, axis=0)
                qkv_b = tf.Variable(qkv_bias,
                                    shape=qkv_bias.shape,
                                    name=layer_name + "/qkv_bias")
                add_variable("qkv_bias", qkv_b)

        sess.run(tf.compat.v1.global_variables_initializer())
        saver = tf.compat.v1.train.Saver()
        _dir_name, ckpt_name = os.path.split(ckpt_file)
        output_file = os.path.join(output_dir, ckpt_name)
        saver.save(sess, output_file)

        num_params = np.sum([np.prod(v.shape) for v in saved_variables])
        print(f"Number of parameters saved: {num_params}")
示例#14
0
def convert_gc_ckpt_to_google(ckpt_file,
                              output_dir=None,
                              include_qkv_bias=False,
                              dtype=tf.float32):
    """ Convert GC bert checkpoint to Google original checkpoint
        1. combine `word_embeddings` if splitted
        2. rename scope `bert/encoder/layer_x/attention/projection/` to `bert/encoder/layer_x/attention/output/`
        3. add back attention_projection_bias.
        4. split `qkv_weight` to query,key,value, and add relative bias.
        5. rename `GroupNorm` to `LayerNorm`.
        6. add back dense layer before mlm loss.
    Args:
        ckpt_file: str, Google checkpoint.
        output_dir: str, Path to save converted GC checkpoint.
        include_qkv_bias: bool, are there bias weights in attention layer.
        dtype: tf.float32 or tf.float16, type of tensor in output ckpt file. Only will be used when load origin google checkpoint

    Returns:
        None
    """
    graph = tf.Graph()
    dir_name, ckpt_name = os.path.split(os.path.abspath(ckpt_file))
    if not output_dir:
        output_dir = os.path.join(dir_name, "google_ckpt")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    reader = pywrap_tensorflow.NewCheckpointReader(ckpt_file)
    var_to_shape_map = reader.get_variable_to_shape_map()
    with graph.as_default():
        sess = tf.Session()
        num_hidden_layers = 0
        optimizer_names = ["adam", "Momentum", "lamb"]  # optimizer weights
        word_embeddings = []
        new_variables = []
        keep_vardiables = []
        for tensor_name in var_to_shape_map:
            # logger.info(f"Load {tensor_name}......")
            # Filter the optimizer variables
            if filter_optimizer(tensor_name, optimizer_names):
                continue

            tensor_value = tf.cast(reader.get_tensor(tensor_name), dtype=dtype)

            if tensor_name == 'bert/encoder/layer_0/intermediate/dense/kernel' or tensor_name == 'bert/pooler/dense/kernel':
                print(tensor_name)
                print(tensor_value)
            # logger.info(f"Shape is {tensor_value.shape}")
            if "word_embeddings" in tensor_name:
                word_embeddings.append(tensor_name)
            elif "attention" in tensor_name:
                layer_idx = int(tensor_name.split("/")[2].split("_")[-1])
                num_hidden_layers = max(layer_idx, num_hidden_layers)
                # split query, key, value.
                if "qkv_weight" in tensor_name:
                    hidden_size = tensor_value.shape[1] // 3
                    query = tensor_value[:, :hidden_size]
                    key = tensor_value[:, hidden_size:2 * hidden_size]
                    value = tensor_value[:, 2 * hidden_size:]

                    qw = tf.Variable(query,
                                     name=tensor_name.replace(
                                         "qkv_weight", "query/kernel"))
                    kw = tf.Variable(key,
                                     name=tensor_name.replace(
                                         "qkv_weight", "key/kernel"))
                    vw = tf.Variable(value,
                                     name=tensor_name.replace(
                                         "qkv_weight", "value/kernel"))
                    new_variables.extend([qw, kw, vw])
                elif "qkv_bias" in tensor_name and include_qkv_bias:
                    hidden_size = tensor_value.shape[0] // 3
                    query_bias = tensor_value[:hidden_size]
                    key_bias = tensor_value[hidden_size:2 * hidden_size]
                    value_bias = tensor_value[2 * hidden_size:]
                    qb = tf.Variable(query_bias,
                                     name=tensor_name.replace(
                                         "qkv_bias", "query/bias"))
                    kb = tf.Variable(key_bias,
                                     name=tensor_name.replace(
                                         "qkv_bias", "key/bias"))
                    vb = tf.Variable(value_bias,
                                     name=tensor_name.replace(
                                         "qkv_bias", "value/bias"))
                    new_variables.extend([qb, kb, vb])
                # rename projection to output
                elif "projection" in tensor_name:
                    # logger.debug(f"Rename projection......")
                    new_name = tensor_name.replace("projection", "output")
                    if "GroupNorm" in tensor_name:
                        # logger.debug(f"Rename GroupNorm in attention ......")
                        new_name = new_name.replace("GroupNorm", "LayerNorm")

                    proj = tf.Variable(tensor_value, name=new_name)
                    new_variables.append(proj)
            # rename other GroupNorm
            elif "GroupNorm" in tensor_name:
                # logger.debug(f"Rename GroupNorm ......")
                gn = tf.Variable(tensor_value,
                                 name=tensor_name.replace(
                                     "GroupNorm", "LayerNorm"))
                new_variables.append(gn)
            else:
                var = tf.get_variable(tensor_name,
                                      shape=tensor_value.shape,
                                      dtype=dtype)
                # var = tf.Variable(tensor_value, name=tensor_name)
                keep_vardiables.append(var)

        # Combine splitted embeddings
        word_embeddings = np.sort(word_embeddings)
        embeddings_vals = [reader.get_tensor(k) for k in word_embeddings]
        unit_embeddings = np.vstack(embeddings_vals)
        # logger.debug(f"Concated word_embeddings shape: {unit_embeddings.shape}")
        we = tf.Variable(unit_embeddings,
                         dtype=dtype,
                         shape=unit_embeddings.shape,
                         name="bert/embeddings/word_embeddings")
        new_variables.append(we)
        saved_variables = new_variables + keep_vardiables
        # logger.info("Finish concat word embeddings.")
        sess.run(tf.compat.v1.global_variables_initializer())
        saver = tf.compat.v1.train.Saver()
        output_file = os.path.join(output_dir, ckpt_name)
        saver.save(sess, output_file)
示例#15
0
def convert_ipu_ckpt_to_gc(ckpt_file,
                           output_dir=None,
                           num_embed_split=1,
                           vocab_size=30400,
                           use_attention_bias=False,
                           use_qkv_bias=False,
                           use_cls_layer=False,
                           dtype=tf.float16,
                           label_num=1):
    """ Convert Google original checkpoint to GC bert checkpoint
    there are several difference between our GC bert and origin google bert:
        1. gc_bert do not have attention_probs_dropout_prob
        2. gc_bert do not have mlm projection layer
        3. gc_bert do not have attention_projection_bias
        4. rename scope `bert/encoder/layer_x/attention/output/` to `bert/encoder/layer_x/attention/projection/`
        5. combine query, key, value layer to qkv_weight and qkv_bias layer. This changes might cause different performance on lamb optimizer,
           so the optimizer has been modified.
        6. In some cases, gc_bert supports word embedding split and rename the scope to `bert/embeddings/s{i}/word_embeddings`.
    Args:
        ckpt_file: str, Google checkpoint.
        output_dir: str, Path to save converted GC checkpoint.
        num_embed_split: int, number of word embedding need to be split. Only will be used when load origin google checkpoint
        vocab_size: int, vocabulary size. GC bert cut original 30522 to 30400 for better performance.
        use_attention_bias: bool, whether to use attention bias. Defaults to False.
        use_qkv_bias: bool, whether to use bias in qkv layers. Defaults to False.
        use_cls_layer: bool, whether to use dense layer before mlm loss. Defaults to False
        dtype: tf.float32 or tf.float16, type of tensor in output ckpt file. Only will be used when load origin google checkpoint

    Returns:
        None
    """
    graph = tf.Graph()
    dir_name, ckpt_name = os.path.split(ckpt_file)
    if not output_dir:
        output_dir = os.path.join(dir_name, "gc_ckpt")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    reader = pywrap_tensorflow.NewCheckpointReader(ckpt_file)
    var_to_shape_map = reader.get_variable_to_shape_map()
    with graph.as_default():
        sess = tf.Session()
        num_hidden_layers = 0
        optimizer_names = ["adam", "Momentum", "lamb"]  # optimizer weights
        qkv_layers = defaultdict(dict)
        saved_variables = []
        emb_list = []
        for tensor_name in var_to_shape_map:
            # Filter the optimizer variables
            if filter_optimizer(tensor_name, optimizer_names):
                continue
            if not use_cls_layer and "transform" in tensor_name:
                # print("Abandon dense layer before mlm loss.")
                continue
            if not use_attention_bias and "output/dense/bias" in tensor_name:
                # print("Abandon attention bias")
                continue
            tensor_value = tf.cast(reader.get_tensor(tensor_name), dtype=dtype)
            if "word_embeddings" in tensor_name:
                emb_list.append(tensor_name)
                # split word_embeddings when num_split>1
                '''
                word_embeddings = tensor_value[:vocab_size, :]
                hidden_size = np.shape(word_embeddings)[1]
                assert vocab_size % num_embed_split == 0
                size_per_slice = int(vocab_size / num_embed_split)
                for i in range(num_embed_split):
                    start_idx = i * size_per_slice
                    end_idx = (i+1) * size_per_slice	
                    we_pieces = tf.Variable(	
                        word_embeddings[start_idx:end_idx, :],
                        shape=(size_per_slice, hidden_size),
                        name=f"bert/embeddings/s{i}/word_embeddings")	
                    saved_variables.append(we_pieces)	
                '''

            # Rename tensor
            elif "attention/output" in tensor_name:
                new_name = tensor_name.replace("attention/output",
                                               "attention/projection")
                if "LayerNorm" in tensor_name:
                    new_name = new_name.replace("LayerNorm", "GroupNorm")
                proj = tf.Variable(tensor_value, name=new_name)
                saved_variables.append(proj)
            elif "LayerNorm" in tensor_name:
                ln = tf.Variable(tensor_value,
                                 name=tensor_name.replace(
                                     "LayerNorm", "GroupNorm"))
                saved_variables.append(ln)
            # Find query, key, value.
            elif "query" in tensor_name or \
                "key" in tensor_name or \
                    "value" in tensor_name:
                layer_idx = int(tensor_name.split("/")[2].split("_")
                                [1])  # get the layer_{i}
                num_hidden_layers = max(layer_idx, num_hidden_layers)
                qkv_layers[layer_idx][tensor_name] = tensor_value
            else:
                others_var = tf.Variable(tensor_value, name=tensor_name)
                saved_variables.append(others_var)

        print("Start to combine query,key,value layers to qkv layer...")
        print("Start to combine word embedding ...")
        word_embeddings = np.sort(emb_list)
        embeding_vals = [reader.get_tensor(key) for key in word_embeddings]
        unit_embedding = np.vstack(embeding_vals)

        # unit_embedding = get_embeding()
        # word_embedding = tf.concat(emb_list, axis=0)
        word = tf.Variable(unit_embedding,
                           shape=unit_embedding.shape,
                           name="bert/embeddings/word_embeddings",
                           dtype=tf.float16)
        saved_variables.append(word)
        '''
        for i in range(num_hidden_layers+1):
            layer_name = f"bert/encoder/layer_{i}/attention/self"
            # Combine query,key,value to qkv_weight
            layer_tensors = qkv_layers[i]
            qkv_weight = []
            qkv_bias = []
            for name in ["query", "key", "value"]:
                weight_name = layer_name + f"/{name}/kernel"
                bias_name = layer_name + f"/{name}/bias"
                qkv_weight.append(layer_tensors[weight_name])
                qkv_bias.append(layer_tensors[bias_name])

            qkv_weight = tf.concat(qkv_weight, axis=1)
            qkv = tf.Variable(qkv_weight, shape=qkv_weight.shape,	
                            name=layer_name+"/qkv_weight")	
            saved_variables.append(qkv)

            if use_qkv_bias:
                qkv_bias = tf.concat(qkv_bias, axis=0)	
                qkv_b = tf.Variable(qkv_bias, shape=qkv_bias.shape,	
                                name=layer_name+"/qkv_bias")
                saved_variables.append(qkv_b)
            else:	
                print(f"Abandon QKV bias in layer_{i}")	
        '''

        #loss
        loss_weight = tf.get_variable(
            shape=(label_num, 768),
            dtype=tf.float16,
            initializer=tf.truncated_normal_initializer(stddev=0.02),
            name="output_weights")
        saved_variables.append(loss_weight)
        loss_bias = tf.get_variable(shape=(label_num, ),
                                    dtype=tf.float16,
                                    initializer=tf.zeros_initializer(),
                                    name="output_bias")
        saved_variables.append(loss_bias)

        sess.run(tf.compat.v1.global_variables_initializer())
        saver = tf.compat.v1.train.Saver()
        output_file = os.path.join(output_dir, ckpt_name)
        saver.save(sess, output_file)
        print("Save to :" + output_file)
示例#16
0
def visualization(f, cfmtrx, maxpnt, tsne, weights_hist):
    pred_and_label = np.empty([0, 2], dtype=np.int32)
    max_idx = np.empty([0, 512], dtype=np.int32)
    global_feature = np.empty([0, 512])

    # CONCAT data from all files
    for fn in range(len(DUMP_FILES)):
        dump_file = DUMP_FILES[fn]
        log_string('V---- %d/%d -----' % (fn + 1, len(DUMP_FILES)))
        # load dump file
        fin = h5py.File(os.path.join(f['dump_dir'], dump_file))
        # concatenate
        pred_and_label = np.concatenate(
            (pred_and_label, fin['pred_and_label'][:]), axis=0)
        max_idx = np.concatenate((max_idx, fin['max_idx'][:]), axis=0)
        global_feature = np.concatenate(
            (global_feature, fin['global_feature'][:]), axis=0)
        fin.close()
    log_string('pred_and_label {}'.format(pred_and_label.shape))  # (N, 2)
    log_string('max_idx {}'.format(max_idx.shape))  # (N, C)
    log_string('global_feature {}'.format(global_feature.shape))  # (N, 512)

    # PLOT confusion matrix
    if cfmtrx:
        log_string('PLOT confusion matrix')
        cmat = np.zeros([f['num_class'], f['num_class']])
        for i in range(pred_and_label.shape[0]):
            pred_val = pred_and_label[i][0]
            true_val = pred_and_label[i][1]
            cmat[true_val, pred_val] += 1
        plot_confusion_matrix(cmat,
                              class_names=label_modelnet.keys(),
                              normalize=True,
                              title='')

    # PLOT max point
    if maxpnt:
        log_string('PLOT maximum point')
        fdump = h5py.File(os.path.join(f['dump_dir'], DUMP_FILES[0]))
        ftest = h5py.File(os.path.join(f['dataset_path'], TEST_FILES[0]))
        max_idx = fdump['max_idx'][:]
        points = ftest['data'][:, 0:f['num_point'], :]
        assert max_idx.shape[0] == points.shape[0]
        # random choose to show
        shows = np.random.random_integers(0, max_idx.shape[0], 20)
        pc_list = []
        for s in range(shows.shape[0]):
            i = shows[s]
            pc = points[i, :, :]
            pidx = np.unique(max_idx[i, :])
            color_tab = np.full((f['num_point']), 35)
            color_tab[pidx] = 99
            plot_point_cloud(pc, color_tab)
        fdump.close()
        ftest.close()

    # PLOT T-SNE
    if tsne:
        log_string('PLOT T-SNE')
        tlabel = []
        tfeature = []
        for i in range(pred_and_label.shape[0]):
            if pred_and_label[i][0] == pred_and_label[i][1]:
                tlabel.append(pred_and_label[i][0])
                tfeature.append(global_feature[i])
        tlabel = np.array(tlabel)
        tfeature = np.array(tfeature)
        log_string('tlabel {}'.format(tlabel.shape))  # (N,)
        log_string('tfeature {}'.format(tfeature.shape))  # (N, C)
        plot_TSNE(tlabel, tfeature, f['num_class'])

    # PLOT
    if weights_hist:
        import tensorflow as tf
        from tensorflow import pywrap_tensorflow
        checkpoint_file = tf.train.latest_checkpoint(f['model_path'])
        # Read data from checkpoint file
        reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_file)
        var_to_shape_map = reader.get_variable_to_shape_map()

        def plot_weights_hist(ax, data):
            data = np.abs(data)
            data = np.squeeze(data)
            data = np.sum(data, axis=1, keepdims=False)
            data = data / np.sum(data)
            print(data.shape)
            s = [
                '$x$', '$y$', '$z$', '$x^2$', '$y^2$', '$z^2$', '$x^3$',
                '$y^3$', '$z^3$', '$\overline{X}$', '$\overline{Y}$',
                '$\overline{Z}$', '$\overline{X^2}$', '$\overline{Y^2}$',
                '$\overline{Z^2}$', '$\overline{X^3}$', '$\overline{Y^3}$',
                '$\overline{Z^3}$', '$xy$', '$yz$', '$zx$', '$x^2y$', '$y^2z$',
                '$z^2x$', '$x^2z$', '$y^2x$', '$z^2y$', '$l2$', '$d_x$',
                '$d_y$', '$d_z$', '$\\theta_x$', '$\\theta_y$', '$\\theta_z$'
            ]
            ax.bar(s, data)

        # Print tensor name and values
        fig = plt.figure()
        ax1 = fig.add_subplot(211)
        ax1.set_xlabel('(a)')
        ax2 = fig.add_subplot(212)
        ax2.set_xlabel('(b)')
        for key in var_to_shape_map:
            if key == 'LinearCombLayer/conv2d_128_pc/weights':
                print(key, reader.get_tensor(key).shape)
                plot_weights_hist(ax1, reader.get_tensor(key))
            if key == 'LinearCombLayer/conv2d_128_nn/weights':
                print(key, reader.get_tensor(key).shape)
                plot_weights_hist(ax2, reader.get_tensor(key))
        #plt.tight_layout()
        plt.show()