Exemplo n.º 1
0
 def train(self):
     embedded = self.creat_model()
     lr = tf.placeholder(dtype=tf.float32, name="learning_rate")  # learning rate
     global_step = tf.Variable(0, name='global_step', trainable=False)
     w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
     b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))
     sim_matrix = similarity(embedded, w, b)
     loss = loss_cal(sim_matrix, type=config.loss)
     trainable_vars = tf.trainable_variables()  # get variable list
     optimizer = optim(lr)  # get optimizer (type is determined by configuration)
     grads, vars = zip(*optimizer.compute_gradients(loss))  # compute gradients of variables with respect to loss
     grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)  # l2 norm clipping by 3
     grads_rescale = [0.01 * grad for grad in grads_clip[:2]] + grads_clip[2:]  # smaller gradient scale for w, b
     train_op = optimizer.apply_gradients(zip(grads_rescale, vars),
                                          global_step=global_step)  # gradient update operation
     # check variables memory
     variable_count = np.sum(np.array([np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars]))
     print("total variables :", variable_count)
     tf.summary.scalar("loss", loss)
     merged = tf.summary.merge_all()
     saver = tf.train.Saver()
     with tf.Session() as sess:
         tf.global_variables_initializer().run()
         os.makedirs(os.path.join(config.model_path, "Check_Point"), exist_ok=True)  # make folder to save model
         os.makedirs(os.path.join(config.model_path, "logs"), exist_ok=True)  # make folder to save log
         writer = tf.summary.FileWriter(os.path.join(config.model_path, "logs"), sess.graph)
         lr_factor = 1  # lr decay factor ( 1/2 per 10000 iteration)
         loss_acc = 0  # accumulated loss ( for running average of loss)
         for iter in range(config.iteration):
             # run forward and backward propagation and update parameters
             _, loss_cur, summary = sess.run([train_op, loss, merged],
                                             feed_dict={self.fingerprint_input: random_batch(),
                                                        lr: config.lr * lr_factor})
             loss_acc += loss_cur  # accumulated loss for each 100 iteration
             if iter % 10 == 0:
                 writer.add_summary(summary, iter)  # write at tensorboard
             if (iter + 1) % 100 == 0:
                 print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100))
                 loss_acc = 0  # reset accumulated loss
             if (iter + 1) % 1000 == 0:
                 lr_factor /= 2  # lr decay
                 print("learning rate is decayed! current lr : ", config.lr * lr_factor)
             if (iter + 1) % 1000 == 0:
                 saver.save(sess, os.path.join(config.model_path, "./Check_Point/model.ckpt"),
                            global_step=iter // 1000)
                 print("model is saved!")
Exemplo n.º 2
0
def train(path):
    tf.reset_default_graph()  # reset graph

    # draw graph
    batch = tf.placeholder(
        shape=[None, config.N * config.M,
               40], dtype=tf.float32)  # input batch (time x batch x n_mel)
    lr = tf.placeholder(dtype=tf.float32)  # learning rate
    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))

    # embedding lstm (3-layer default)
    with tf.variable_scope("lstm"):
        lstm_cells = [
            tf.contrib.rnn.LSTMCell(num_units=config.hidden,
                                    num_proj=config.proj)
            for i in range(config.num_layer)
        ]
        lstm = tf.contrib.rnn.MultiRNNCell(
            lstm_cells)  # define lstm op and variables
        outputs, _ = tf.nn.dynamic_rnn(
            cell=lstm, inputs=batch, dtype=tf.float32,
            time_major=True)  # for TI-VS must use dynamic rnn
        embedded = outputs[-1]  # the last ouput is the embedded d-vector
        embedded = normalize(embedded)  # normalize
    print("embedded size: ", embedded.shape)

    # loss
    sim_matrix = similarity(embedded, w, b)
    print("similarity matrix size: ", sim_matrix.shape)
    loss = loss_cal(sim_matrix, type=config.loss)

    # optimizer operation
    trainable_vars = tf.trainable_variables()  # get variable list
    optimizer = optim(
        lr)  # get optimizer (type is determined by configuration)
    grads, vars = zip(*optimizer.compute_gradients(
        loss))  # compute gradients of variables with respect to loss
    grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)  # l2 norm clipping by 3
    grads_rescale = [0.01 * grad for grad in grads_clip[:2]
                     ] + grads_clip[2:]  # smaller gradient scale for w, b
    train_op = optimizer.apply_gradients(
        zip(grads_rescale,
            vars), global_step=global_step)  # gradient update operation

    # check variables memory
    variable_count = np.sum(
        np.array([
            np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars
        ]))
    print("total variables :", variable_count)

    # record loss
    loss_summary = tf.summary.scalar("loss", loss)
    merged = tf.summary.merge_all()
    saver = tf.train.Saver()

    # training session
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        if (os.path.exists(path)):
            print("Restore from {}".format(
                os.path.join(path, "Check_Point/model.ckpt-2")))
            saver.restore(sess, os.path.join(path, "Check_Point/model.ckpt-2")
                          )  # restore variables from selected ckpt file
        else:
            os.makedirs(os.path.join(path, "Check_Point"),
                        exist_ok=True)  # make folder to save model
            os.makedirs(os.path.join(path, "logs"),
                        exist_ok=True)  # make folder to save log

        writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph)
        epoch = 0
        lr_factor = 1  # lr decay factor ( 1/2 per 10000 iteration)
        loss_acc = 0  # accumulated loss ( for running average of loss)

        for iter in range(config.iteration):
            # run forward and backward propagation and update parameters
            _, loss_cur, summary = sess.run([train_op, loss, merged],
                                            feed_dict={
                                                batch: random_batch(),
                                                lr: config.lr * lr_factor
                                            })

            loss_acc += loss_cur  # accumulated loss for each 100 iteration

            if iter % 10 == 0:
                writer.add_summary(summary, iter)  # write at tensorboard
            if (iter + 1) % 100 == 0:
                print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100))
                loss_acc = 0  # reset accumulated loss
            if (iter + 1) % 3000 == 0:
                lr_factor /= 2  # lr decay
                print("learning rate is decayed! current lr : ",
                      config.lr * lr_factor)
            if (iter + 1) % 2500 == 0:
                saver.save(sess,
                           os.path.join(path, "./Check_Point/model.ckpt"),
                           global_step=iter // 2500)
                print("model is saved!")
Exemplo n.º 3
0
import time

if __name__ == '__main__':
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.per_process_gpu_memory_fraction = 0.1
    sess = tf.Session(config=tf_config)

    N, M = 4, 5
    embed = tf.placeholder(dtype=tf.float32, shape=(N * 2 * M, 3))

    # new loss
    embed_1 = embed[:N * M]
    embed_2 = embed[N * M:]
    center_1 = embedd2center(embed_1, N, M)
    center_2 = embedd2center(embed_2, N, M)
    new_loss = loss_cal(similarity(embed_1, 1.0, 0.0, N, M, center_2), name='softmax', N=N, M=M) + \
               loss_cal(similarity(embed_2, 1.0, 0.0, N, M, center_1), name='softmax', N=N, M=M)

    # oldloss
    old_loss = loss_cal(similarity(embed, 1.0, 0.0, N, M * 2), N=N, M=M * 2)
    sess.run(tf.global_variables_initializer())

    arr = np.random.rand(N * M * 2, 128)

    times = []
    print('Calculating old loss')
    x = sess.run(old_loss, feed_dict={embed: arr})
    print(x)

    times = []
    print('Calculating new loss')
Exemplo n.º 4
0
def train(path):
    tf.reset_default_graph()  # reset graph

    # draw graph
    batch = tf.placeholder(
        shape=[None, config.N * config.M,
               40], dtype=tf.float32)  # input batch (time x batch x n_mel)
    lr = tf.placeholder(dtype=tf.float32)  # learning rate
    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))

    # embedding lstm (3-layer default)
    with tf.variable_scope("lstm"):
        lstm_cells = [
            tf.contrib.rnn.LSTMCell(num_units=config.hidden,
                                    num_proj=config.proj)
            for i in range(config.num_layer)
        ]
        lstm = tf.contrib.rnn.MultiRNNCell(
            lstm_cells)  # define lstm op and variables
        outputs, _ = tf.nn.dynamic_rnn(
            cell=lstm, inputs=batch, dtype=tf.float32,
            time_major=True)  # for TI-VS must use dynamic rnn
        embedded = outputs[-1]  # the last ouput is the embedded d-vector
        embedded = normalize(embedded)  # normalize
    print("embedded size: ", embedded.shape)

    # loss
    sim_matrix = similarity(embedded, w, b)
    print("similarity matrix size: ", sim_matrix.shape)
    loss = loss_cal(sim_matrix, type=config.loss)

    # optimizer operation
    trainable_vars = tf.trainable_variables()  # get variable list
    optimizer = optim(
        lr)  # get optimizer (type is determined by configuration)
    grads, vars = zip(*optimizer.compute_gradients(
        loss))  # compute gradients of variables with respect to loss
    grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)  # l2 norm clipping by 3
    grads_rescale = [0.01 * grad for grad in grads_clip[:2]
                     ] + grads_clip[2:]  # smaller gradient scale for w, b
    train_op = optimizer.apply_gradients(
        zip(grads_rescale,
            vars), global_step=global_step)  # gradient update operation

    # check variables memory
    variable_count = np.sum(
        np.array([
            np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars
        ]))
    print("total variables :", variable_count)

    # record loss
    loss_summary = tf.summary.scalar("loss", loss)
    merged = tf.summary.merge_all()
    saver = tf.train.Saver()

    # training session
    # with tf.Session() as sess:
    gpu_options = tf.GPUOptions(allow_growth=True)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        tf.global_variables_initializer().run()
        os.makedirs(os.path.join(path, "Check_Point"),
                    exist_ok=True)  # make folder to save model
        os.makedirs(os.path.join(path, "logs"),
                    exist_ok=True)  # make folder to save log
        writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph)
        epoch = 0
        lr_factor = 1  # lr decay factor ( 1/2 per 10000 iteration)
        loss_acc = 0  # accumulated loss ( for running average of loss)

        train_times = [
        ]  #===========================================================================2020/05/20 16:30
        total_times = 0  #===========================================================================2020/05/20 16:30
        for iter in range(config.iteration):
            # run forward and backward propagation and update parameters
            # 记录迭代训练开始时间
            begin_time = time.clock(
            )  #===========================================================================2020/05/20 16:30
            _, loss_cur, summary = sess.run([train_op, loss, merged],
                                            feed_dict={
                                                batch: random_batch(),
                                                lr: config.lr * lr_factor
                                            })
            # 记录迭代训练结束时间
            end_time = time.clock(
            )  # ===========================================================================2020/05/20 16:30
            total_times += end_time - begin_time  # ===========================================================================2020/05/20 16:30
            train_times.append(
                str(begin_time) + '_' + str(end_time) + '_' +
                str(end_time - begin_time)
            )  # ===========================================================================2020/05/20 16:30

            loss_acc += loss_cur  # accumulated loss for each 100 iteration

            if iter % 10 == 0:
                writer.add_summary(summary, iter)  # write at tensorboard
            if (iter + 1) % 100 == 0:
                print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100))
                loss_acc = 0  # reset accumulated loss
                print(
                    "iter:{},耗时:{}s".format(iter, str(end_time - begin_time))
                )  # ===========================================================================2020/05/20 16:30
            if (iter + 1) % 10000 == 0:
                lr_factor /= 2  # lr decay
                print("learning rate is decayed! current lr : ",
                      config.lr * lr_factor)
            if (iter + 1) % 10000 == 0:
                saver.save(sess,
                           os.path.join(path, "./Check_Point/model.ckpt"),
                           global_step=iter // 10000)
                print("model is saved!")
        # ===========================================================================2020/05/20 16:30
        # 存模型
        saver.save(sess,
                   os.path.join(path, "./Check_Point/model.ckpt"),
                   global_step=iter)
        print("model is saved!")

        # 将时间写入文件
        with open('GE2E_epoch说话人{}_batch说话人{}_人均音频数{}_iter{}_迭代耗时.txt'.format(
                config.spk_num, config.N, config.M, config.iteration),
                  mode='w',
                  encoding='utf-8') as wf:
            wf.write(
                "epoch说话人{}个;batch说话人:{}个;人均音频数:{}条;迭代总次数:{};平均每次训练迭代耗时:{}\n".
                format(config.spk_num, config.N, config.M, config.iteration,
                       total_times / config.iteration))
            wf.write("开始训练时间_结束训练时间_耗时\n")
            for line in train_times:
                wf.write(line + '\n')
Exemplo n.º 5
0
def train(path):
    tf.reset_default_graph()    # reset graph

    # draw graph
    batch = tf.placeholder(shape= [None, config.N*config.M, 40], dtype=tf.float32)  # input batch (time x batch x n_mel)
    lr = tf.placeholder(dtype= tf.float32)  # learning rate
    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer= np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer= np.array([-5], dtype=np.float32))

    # embedding lstm (3-layer default)
    with tf.variable_scope("lstm"):
        lstm_cells = [tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer)]
        lstm = tf.contrib.rnn.MultiRNNCell(lstm_cells)    # define lstm op and variables
        outputs, _ = tf.nn.dynamic_rnn(cell=lstm, inputs=batch, dtype=tf.float32, time_major=True)   # for TI-VS must use dynamic rnn
        embedded = outputs[-1]                            # the last ouput is the embedded d-vector
        embedded = normalize(embedded)                    # normalize
    print("embedded size: ", embedded.shape)

    # loss
    sim_matrix = similarity(embedded, w, b)
    print("similarity matrix size: ", sim_matrix.shape)
    loss = loss_cal(sim_matrix, type=config.loss)

    # optimizer operation
    trainable_vars= tf.trainable_variables()                # get variable list
    optimizer= optim(lr)                                    # get optimizer (type is determined by configuration)
    grads, vars= zip(*optimizer.compute_gradients(loss))    # compute gradients of variables with respect to loss
    grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)      # l2 norm clipping by 3
    grads_rescale= [0.01*grad for grad in grads_clip[:2]] + grads_clip[2:]   # smaller gradient scale for w, b
    train_op= optimizer.apply_gradients(zip(grads_rescale, vars), global_step= global_step)   # gradient update operation

    # check variables memory
    variable_count = np.sum(np.array([np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars]))
    print("total variables :", variable_count)

    # record loss
    loss_summary = tf.summary.scalar("loss", loss)
    merged = tf.summary.merge_all()
    saver = tf.train.Saver()

    # training session
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        if(os.path.exists(path)):
            print("Restore from {}".format(os.path.join(path, "Check_Point/model.ckpt-2")))
            saver.restore(sess, os.path.join(path, "Check_Point/model.ckpt-2"))  # restore variables from selected ckpt file
        else:
            os.makedirs(os.path.join(path, "Check_Point"), exist_ok=True)  # make folder to save model
            os.makedirs(os.path.join(path, "logs"), exist_ok=True)          # make folder to save log

        writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph)
        epoch = 0
        lr_factor = 1   # lr decay factor ( 1/2 per 10000 iteration)
        loss_acc = 0    # accumulated loss ( for running average of loss)

        for iter in range(config.iteration):
            # run forward and backward propagation and update parameters
            _, loss_cur, summary = sess.run([train_op, loss, merged],
                                  feed_dict={batch: random_batch(), lr: config.lr*lr_factor})

            loss_acc += loss_cur    # accumulated loss for each 100 iteration

            if iter % 10 == 0:
                writer.add_summary(summary, iter)   # write at tensorboard
            if (iter+1) % 100 == 0:
                print("(iter : %d) loss: %.4f" % ((iter+1),loss_acc/100))
                loss_acc = 0                        # reset accumulated loss
            if (iter+1) % 10000 == 0:
                lr_factor /= 2                      # lr decay
                print("learning rate is decayed! current lr : ", config.lr*lr_factor)
            if (iter+1) % 10000 == 0:
                saver.save(sess, os.path.join(path, "./Check_Point/model.ckpt"), global_step=iter//10000)
                print("model is saved!")
Exemplo n.º 6
0
def test(path):
    tf.reset_default_graph()

    # draw graph
    enroll = tf.placeholder(
        shape=[None, config.N * config.M, 40],
        dtype=tf.float32)  # enrollment batch (time x batch x n_mel)
    verif = tf.placeholder(
        shape=[None, config.N * config.M, 40],
        dtype=tf.float32)  # verification batch (time x batch x n_mel)
    batch = tf.concat([enroll, verif], axis=1)

    # embedding lstm (3-layer default)
    with tf.variable_scope("lstm"):
        lstm_cells = [
            tf.contrib.rnn.LSTMCell(num_units=config.hidden,
                                    num_proj=config.proj)
            for i in range(config.num_layer)
        ]
        lstm = tf.contrib.rnn.MultiRNNCell(
            lstm_cells)  # make lstm op and variables
        outputs, _ = tf.nn.dynamic_rnn(
            cell=lstm, inputs=batch, dtype=tf.float32,
            time_major=True)  # for TI-VS must use dynamic rnn
        embedded = outputs[-1]  # the last ouput is the embedded d-vector
        embedded = normalize(embedded)  # normalize

    print("embedded size: ", embedded.shape)

    # enrollment embedded vectors (speaker model)
    enroll_embed = normalize(
        tf.reduce_mean(tf.reshape(embedded[:config.N * config.M, :],
                                  shape=[config.N, config.M, -1]),
                       axis=1))
    # verification embedded vectors
    verif_embed = embedded[config.N * config.M:, :]

    similarity_matrix = similarity(embedded=verif_embed,
                                   w=1.,
                                   b=0.,
                                   center=enroll_embed)
    loss = loss_cal(similarity_matrix, type=config.loss)

    saver = tf.train.Saver(var_list=tf.global_variables())
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        # load model

        #ckpt = tf.train.get_checkpoint_state(path)
        #checkpoints =  ckpt.all_model_checkpoint_paths
        i = 139999
        least_loss = 99999
        #print("checkpoints : ",checkpoints)

        while (i < 399999):
            saver.restore(sess, os.path.join(path, "model.ckpt-" + str(i)))

            S, L = sess.run(
                [similarity_matrix, loss],
                feed_dict={
                    enroll: random_batch(shuffle=False),
                    verif: random_batch(shuffle=False, utter_start=config.M)
                })
            S = S.reshape([config.N, config.M, -1])
            print("test file path : ", config.test_path)
            np.set_printoptions(precision=2)
            #print(S)

            if L < least_loss:
                #diff = abs(FAR-FRR)
                perfect_step = i
                least_loss = L

            print(i)
            print(str(L / (config.N * config.M)))
            i = i + 2500

        print("\ncheckpoint: " + str(perfect_step) + " (loss:%0.2f)" %
              (least_loss))
Exemplo n.º 7
0
def train(path):
    tf.reset_default_graph()  # reset graph

    # draw graph
    batch = tf.placeholder(
        shape=[None, config.N * config.M,
               40], dtype=tf.float32)  # input batch (time x batch x n_mel)
    lr = tf.placeholder(dtype=tf.float32)  # learning rate
    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))

    # embedding lstm (3-layer default)
    with tf.variable_scope("lstm"):
        lstm_cells = [
            tf.contrib.rnn.LSTMCell(num_units=config.hidden,
                                    num_proj=config.proj)
            for i in range(config.num_layer)
        ]
        lstm = tf.contrib.rnn.MultiRNNCell(
            lstm_cells)  # define lstm op and variables
        outputs, _ = tf.nn.dynamic_rnn(
            cell=lstm, inputs=batch, dtype=tf.float32,
            time_major=True)  # for TI-VS must use dynamic rnn
        embedded = outputs[-1]  # the last ouput is the embedded d-vector
        embedded = normalize(embedded)  # normalize
    print("embedded size: ", embedded.shape)

    # loss
    sim_matrix = similarity(embedded, w, b)
    print("similarity matrix size: ", sim_matrix.shape)
    loss = loss_cal(sim_matrix, type=config.loss)

    # optimizer operation
    trainable_vars = tf.trainable_variables()  # get variable list
    optimizer = optim(
        lr)  # get optimizer (type is determined by configuration)
    grads, vars = zip(*optimizer.compute_gradients(
        loss))  # compute gradients of variables with respect to loss
    grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)  # l2 norm clipping by 3
    grads_rescale = [0.01 * grad for grad in grads_clip[:2]
                     ] + grads_clip[2:]  # smaller gradient scale for w, b
    train_op = optimizer.apply_gradients(
        zip(grads_rescale,
            vars), global_step=global_step)  # gradient update operation

    # check variables memory
    variable_count = np.sum(
        np.array([
            np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars
        ]))
    print("total variables :", variable_count)

    # record loss
    loss_summary = tf.summary.scalar("loss", loss)
    merged = tf.summary.merge_all()
    saver = tf.train.Saver()
    iter = 0

    # training session
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        if config.restore:

            # Restore saved model if the user requested it, default = True
            try:
                ckpt = tf.train.latest_checkpoint(
                    checkpoint_dir=os.path.join(path, "Check_Point"))

                #                if (checkpoint_state and checkpoint_state.model_checkpoint_path):
                #                    print('Loading checkpoint {}'.format(checkpoint_state.model_checkpoint_path))
                #saver = tf.train.import_meta_graph(os.path.join(path,"Check_Point/model.cpkt.meta"))

                #ckpt = tf.train.load_checkpoint(os.path.join(path,"Check_Point/model"))
                saver.restore(sess, ckpt)

#                else:
#                    print('No model to load at {}'.format(save_dir))

#                    saver.save(sess, checkpoint_path, global_step=global_step)

            except:
                print('Cannot restore checkpoint exception')

        #if loaded == 0:
        #    raise AssertionError("ckpt file does not exist! Check config.model_num or config.model_path.")

        #print("train file path : ", config.test_path)

        else:

            os.makedirs(os.path.join(path, "Check_Point"),
                        exist_ok=True)  # make folder to save model
            os.makedirs(os.path.join(path, "logs"),
                        exist_ok=True)  # make folder to save log

        writer = tf.summary.FileWriter(os.path.join(path, "logs"), sess.graph)
        epoch = 0
        lr_factor = 1  # lr decay factor ( 1/2 per 10000 iteration)
        loss_acc = 0  # accumulated loss ( for running average of loss)
        iter = 0
        training_data_size = len(os.listdir(config.train_path))
        print("train_size: ", training_data_size)
        prev_iter = -1

        #        while iter  < config.iteration :
        while iter < config.iteration:
            prev_iter = iter

            # run forward and backward propagation and update parameters
            iter, _, loss_cur, summary = sess.run(
                [global_step, train_op, loss, merged],
                feed_dict={
                    batch: random_batch(),
                    lr: config.lr * lr_factor
                })

            loss_acc += loss_cur  # accumulated loss for each 100 iteration

            if (iter - prev_iter > 1):
                epoch = config.N * (iter + 1) // training_data_size
                #lr_factor = lr_factor / (2**(epoch//100))
                lr_factor = lr_factor / (2**(iter // 10000))
                print("restored epoch:", epoch)
                print("restored learning rate:", lr_factor * config.lr)

            #if iter % 1000 == 0:
            #    writer.add_summary(summary, iter)   # write at tensorboard
            if (iter + 1) % 100 == 0:
                print("(iter : %d) loss: %.4f" % ((iter + 1), loss_acc / 100))
                loss_acc = 0  # reset accumulated loss

            #if config.N * (iter+1) % training_data_size == 0:
            #    epoch = epoch + 1
            #    print("epoch: ", epoch)

            if (iter + 1) % 10000 == 0:
                lr_factor /= 2
                print("learning rate is decayed! current lr : ",
                      config.lr * lr_factor)

            #if ((config.N * (iter+1)) / training_data_size)%100  == 0:
            #    lr_factor = lr_factor / 2
            #    print("learning factor: " , lr_factor)
            #    print("learning rate is decayed! current lr : ", config.lr*lr_factor)

            if (iter + 1) % 5000 == 0:
                saver.save(sess,
                           os.path.join(path, "Check_Point/model.ckpt"),
                           global_step=iter)  #pooooooooooooint
                writer.add_summary(summary, iter)  # write at tensorboard
                print("model is saved!")
Exemplo n.º 8
0
def train(path, args):
    tf.reset_default_graph()  # reset graph
    timestamp = time_string() if args.time_string == None else args.time_string

    # draw graph
    feeder = Feeder(args.train_filename, args, hparams)

    output_classes = max(
        [int(f) for f in feeder.total_emt]) + 1 if args.model_type in [
            'emt', 'accent'
        ] else max([int(f) for f in feeder.total_spk]) + 1

    batch = tf.placeholder(
        shape=[args.N * args.M, None, config.n_mels],
        dtype=tf.float32)  # input batch (time x batch x n_mel)
    labels = tf.placeholder(shape=[args.N * args.M], dtype=tf.int32)
    lr = tf.placeholder(dtype=tf.float32)  # learning rate
    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))

    # embedded = triple_lstm(batch)
    print("Training {} Discriminator Model".format(args.model_type))
    encoder = ReferenceEncoder(
        filters=hparams.reference_filters,
        kernel_size=(3, 3),
        strides=(2, 2),
        is_training=True,
        scope='Tacotron_model/inference/pretrained_ref_enc_{}'.format(
            args.model_type),
        depth=hparams.reference_depth)  # [N, 128])
    embedded = encoder(batch)
    embedded = normalize(embedded)

    if args.discriminator:
        logit = tf.layers.dense(
            embedded,
            output_classes,
            name='Tacotron_model/inference/pretrained_ref_enc_{}_dense'.format(
                args.model_type))
        labels_one_hot = tf.one_hot(tf.to_int32(labels), output_classes)
        # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit,labels=labels_one_hot))
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logit,
                                                    labels=labels_one_hot))
        acc, acc_op = tf.metrics.accuracy(labels=tf.argmax(labels_one_hot, 1),
                                          predictions=tf.argmax(logit, 1))
        val_acc, val_acc_op = tf.metrics.accuracy(
            labels=tf.argmax(labels_one_hot, 1),
            predictions=tf.argmax(logit, 1))
    else:
        # loss
        sim_matrix = similarity(embedded,
                                w,
                                b,
                                args.N,
                                args.M,
                                P=hparams.reference_depth)
        print("similarity matrix size: ", sim_matrix.shape)
        loss = loss_cal(sim_matrix, args.N, args.M, type=config.loss)
        val_acc_op = tf.constant(1.)

    # optimizer operation
    trainable_vars = tf.trainable_variables()  # get variable list
    optimizer = optim(
        lr)  # get optimizer (type is determined by configuration)
    grads, vars = zip(*optimizer.compute_gradients(
        loss))  # compute gradients of variables with respect to loss

    if args.discriminator:
        grads_rescale = grads
    else:
        grads_clip, _ = tf.clip_by_global_norm(grads,
                                               3.0)  # l2 norm clipping by 3
        grads_rescale = [0.01 * grad for grad in grads_clip[:2]
                         ] + grads_clip[2:]  # smaller gradient scale for w, b

    train_op = optimizer.apply_gradients(
        zip(grads_rescale,
            vars), global_step=global_step)  # gradient update operation

    # check variables memory
    variable_count = np.sum(
        np.array([
            np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars
        ]))
    print("total variables :", variable_count)

    # record loss
    loss_summary = tf.summary.scalar("loss", loss)
    merged = tf.summary.merge_all()
    saver = tf.train.Saver(max_to_keep=20)
    loss_window = ValueWindow(100)
    acc_window = ValueWindow(100)
    val_loss_window = ValueWindow(5)
    val_acc_window = ValueWindow(5)

    # training session
    with tf.Session() as sess:
        tf.local_variables_initializer().run()
        tf.global_variables_initializer().run()

        checkpoint_folder = os.path.join(path, "checkpoints", timestamp)
        logs_folder = os.path.join(path, "logs", timestamp)
        os.makedirs(checkpoint_folder,
                    exist_ok=True)  # make folder to save model
        os.makedirs(logs_folder, exist_ok=True)  # make folder to save log
        model_name = '{}_disc_model.ckpt'.format(args.model_type)
        checkpoint_path = os.path.join(checkpoint_folder, model_name)

        if args.restore:
            checkpoint_state = tf.train.get_checkpoint_state(checkpoint_folder)
            if (checkpoint_state and checkpoint_state.model_checkpoint_path):
                print('Loading checkpoint {}'.format(
                    checkpoint_state.model_checkpoint_path))
                saver.restore(sess, checkpoint_state.model_checkpoint_path)
            else:
                print('No model to load at {}'.format(checkpoint_folder))
                saver.save(sess, checkpoint_path, global_step=global_step)
        else:
            print('Starting new training!')
            saver.save(sess, checkpoint_path, global_step=global_step)

        writer = tf.summary.FileWriter(logs_folder, sess.graph)
        lr_factor = 1  # lr decay factor ( 1/2 per 10000 iteration)

        iterations = 30000 if args.model_type == 'emt' else config.iteration
        for iter in range(iterations):
            if args.discriminator:
                batch_iter, _, labels_iter = feeder.random_batch_disc()
            else:
                batch_iter, _, labels_iter = feeder.random_batch()
            # run forward and backward propagation and update parameters
            step, _, loss_cur, summary, acc_cur = sess.run(
                [global_step, train_op, loss, merged, acc_op],
                feed_dict={
                    batch: batch_iter,
                    labels: labels_iter,
                    lr: config.lr * lr_factor
                })

            loss_window.append(loss_cur)
            acc_window.append(acc_cur)

            if step % 10 == 0:
                writer.add_summary(summary, step)  # write at tensorboard
            if (step + 1) % 20 == 0:
                val_loss_cur_batch = 0
                val_acc_cur_batch = 0
                for iter in range(VAL_ITERS):
                    if args.discriminator:
                        batch_iter, _, labels_iter = feeder.random_batch_disc(
                            TEST=True)
                    else:
                        batch_iter, _, labels_iter = feeder.random_batch(
                            TEST=True)
                    # run forward and backward propagation and update parameters
                    val_loss_cur, val_acc_cur = sess.run([loss, val_acc_op],
                                                         feed_dict={
                                                             batch: batch_iter,
                                                             labels:
                                                             labels_iter
                                                         })
                    val_loss_cur_batch += val_loss_cur
                    val_acc_cur_batch += val_acc_cur
                val_loss_cur_batch /= VAL_ITERS
                val_acc_cur_batch /= VAL_ITERS
                val_loss_window.append(val_loss_cur_batch)
                val_acc_window.append(val_acc_cur_batch)

                message = "(iter : %d) loss: %.4f" % (
                    (step + 1), loss_window.average)
                if args.discriminator:
                    message += ', acc: {:.2f}%'.format(acc_window.average)
                message += ", val_loss: %.4f" % (val_loss_window.average)
                if args.discriminator:
                    message += ', val_acc: {:.2f}%'.format(
                        val_acc_window.average)
                print(message)

            lr_changed = False
            if args.model_type == 'emt':
                if step > 6000:
                    lr_changed = True if lr_factor != .01 else False
                    lr_factor = .01
                elif step > 4000:
                    lr_changed = True if lr_factor != .1 else False
                    lr_factor = .1
                if lr_changed:
                    print("learning rate is decayed! current lr : ",
                          config.lr * lr_factor)
            elif args.model_type == 'spk':
                if step > 300:  #4000:
                    lr_changed = True if lr_factor != .01 else False
                    lr_factor = .01
                elif step > 180:  #2500:
                    lr_changed = True if lr_factor != .1 else False
                    lr_factor = .1
                if lr_changed:
                    print("learning rate is decayed! current lr : ",
                          config.lr * lr_factor)
            if step % config.save_checkpoint_iters == 0:
                saver.save(sess, checkpoint_path, global_step=global_step)
Exemplo n.º 9
0
def get_embeddings(path, args):
    tf.reset_default_graph()  # reset graph
    if args.time_string == None:
        raise ValueError('must provide valid time_string')

    emb_dir = os.path.join(path, 'embeddings')
    os.makedirs(emb_dir, exist_ok=True)
    meta_path = os.path.join(emb_dir, 'meta.tsv')

    emb_path = os.path.join(
        emb_dir, 'emb_emt.tsv') if args.model_type == 'emt' else os.path.join(
            emb_dir, 'emb_spk.tsv')

    # draw graph
    feeder = Feeder(args.train_filename, args, hparams)
    datasets = ['emt4', 'vctk'] if args.model_type == 'emt' else ['vctk']
    num_datasets = len(datasets)

    batch = tf.placeholder(
        shape=[num_datasets * args.N * args.M, None, config.n_mels],
        dtype=tf.float32)  # input batch (time x batch x n_mel)
    w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))

    # embedded = triple_lstm(batch)
    print("{} Discriminator Model".format(args.model_type))
    encoder = ReferenceEncoder(
        filters=hparams.reference_filters,
        kernel_size=(3, 3),
        strides=(2, 2),
        is_training=True,
        scope='Tacotron_model/inference/pretrained_ref_enc_{}'.format(
            args.model_type),
        depth=hparams.reference_depth)  # [N, 128])
    embedded = encoder(batch)

    # loss
    sim_matrix = similarity(embedded,
                            w,
                            b,
                            num_datasets * args.N,
                            args.M,
                            P=hparams.reference_depth)
    print("similarity matrix size: ", sim_matrix.shape)
    loss = loss_cal(sim_matrix,
                    num_datasets * args.N,
                    args.M,
                    type=config.loss)

    saver = tf.train.Saver()

    # training session
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        checkpoint_folder = os.path.join(path, "checkpoints", args.time_string)

        checkpoint_state = tf.train.get_checkpoint_state(checkpoint_folder)
        if (checkpoint_state and checkpoint_state.model_checkpoint_path):
            print('Loading checkpoint {}'.format(
                checkpoint_state.model_checkpoint_path))
            saver.restore(sess, checkpoint_state.model_checkpoint_path)
        else:
            raise ValueError(
                'No model to load at {}'.format(checkpoint_folder))
        feeder_batch, meta = feeder.emb_batch(make_meta=True,
                                              datasets=datasets)
        emb, loss = sess.run([embedded, loss], feed_dict={batch: feeder_batch})
        print("loss: {:.4f}".format(loss))
        meta.to_csv(meta_path, sep='\t', index=False)
        pd.DataFrame(emb).to_csv(emb_path, sep='\t', index=False, header=False)
Exemplo n.º 10
0
def train(path):
    tf.reset_default_graph()  # reset graph

    # Draw train graph
    train_batch = tf.placeholder(
        shape=[None, config.N * config.M,
               40], dtype=tf.float32)  # input batch (time x batch x n_mel)
    lr = tf.placeholder(dtype=tf.float32)  # learning rate

    global_step = tf.Variable(0, name='global_step', trainable=False)
    w = tf.get_variable("w", initializer=np.array([10], dtype=np.float32))
    b = tf.get_variable("b", initializer=np.array([-5], dtype=np.float32))

    # Embedding LSTM (3-layer default)
    with tf.variable_scope("lstm", reuse=None):
        lstm_cells = [
            tf.contrib.rnn.LSTMCell(num_units=config.hidden,
                                    num_proj=config.proj)
            for i in range(config.num_layer)
        ]
        print(config.num_layer)
        lstm = tf.contrib.rnn.MultiRNNCell(
            lstm_cells)  # define lstm op and variables
        outputs, _ = tf.nn.dynamic_rnn(
            cell=lstm, inputs=train_batch, dtype=tf.float32,
            time_major=True)  # for TI-VS must use dynamic rnn
        embedded = outputs[-1]  # the last ouput is the embedded d-vector
        embedded = normalize(embedded)  # normalize
    print("embedded size: ", embedded.shape)

    # Define loss
    sim_matrix = similarity(embedded, w, b)
    print("similarity matrix size: ", sim_matrix.shape)
    loss = loss_cal(sim_matrix, type=config.loss)

    # Optimizer operation
    trainable_vars = tf.trainable_variables()  # get variable list
    optimizer = optim(
        lr)  # get optimizer (type is determined by configuration)
    grads, vars = zip(*optimizer.compute_gradients(
        loss))  # compute gradients of variables with respect to loss
    grads_clip, _ = tf.clip_by_global_norm(grads, 3.0)  # l2 norm clipping by 3
    grads_rescale = [0.01 * grad for grad in grads_clip[:2]
                     ] + grads_clip[2:]  # smaller gradient scale for w, b
    train_op = optimizer.apply_gradients(
        zip(grads_rescale,
            vars), global_step=global_step)  # gradient update operation

    # Check variables memory
    variable_count = np.sum(
        np.array([
            np.prod(np.array(v.get_shape().as_list())) for v in trainable_vars
        ]))
    print("total variables :", variable_count)

    # TensorBoard vars declaration
    lr_summ = tf.summary.scalar(name='My_LR', tensor=lr)
    loss_summary = tf.summary.scalar("loss_ORIG", loss)
    w_summary = tf.summary.histogram('My_Weights', w)
    b_summary = tf.summary.histogram('My_Bias', b)
    merged = tf.summary.merge_all()  # merge all TB vars into one
    saver = tf.train.Saver(
        max_to_keep=40
    )  # create a saver, max_to_keep=40 w/ every 2500 steps = around 100000

    # Training session
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        os.makedirs(os.path.join(path, "Check_Point"),
                    exist_ok=True)  # make folder to save model
        os.makedirs(os.path.join(path, "logs"),
                    exist_ok=True)  # make folder to save TensorBoard logs
        os.makedirs(
            "./Plots/",
            exist_ok=True)  # make folder to save all plots and .txt logs
        os.makedirs("./Plots/" + path[11:],
                    exist_ok=True)  # makes the subdirs for individual plots
        log_path = "./Plots/" + path[11:] + "/" + path[
            11:] + ".txt"  # declares .txt log files naming convention

        # Block of code to make folders of runs for TensorBoard visualization
        logspath = os.path.join(path, "logs")
        num_previous_runs = os.listdir('./tisv_model/logs')
        if len(num_previous_runs) == 0:
            run_number = 1
        else:
            run_number = max(
                [int(s.split('run_')[1]) for s in num_previous_runs]) + 1
        curr_logdir = 'run_%02d' % run_number
        writer = tf.summary.FileWriter(
            os.path.join(logspath, curr_logdir),
            sess.graph)  # Define writer for TensorBoard
        # END of Block

        # epoch = 0      # not used
        lr_factor = 1  # LR decay factor (1/2 per 10000 iteration)
        loss_acc = 0  # accumulated loss (for calculating average of loss)

        # declares lists for figure creation
        EER_list = []  # collects the EER results every 100 steps for plotting
        train_loss_list = [
        ]  # collects the training loss results every 100 steps for plotting
        # LR_decay_list = []  # not used

        for iter in range(config.iteration):
            # run forward and backward propagation and update parameters
            _, loss_cur, summary = sess.run([train_op, loss, merged],
                                            feed_dict={
                                                train_batch: random_batch(),
                                                lr: config.lr * lr_factor
                                            })

            loss_acc += loss_cur  # accumulated loss for each 100 iteration

            # write train_loss to TensorBoard
            if iter % 10 == 0:
                writer.add_summary(summary, iter)
            # perform validation
            if (iter + 1) % 100 == 0:
                # print("(iter : %d) loss: %.4f" % ((iter+1),loss_acc/100))
                # print("==============VALIDATION START!============")

                # Draw validation graph
                enroll = tf.placeholder(
                    shape=[None, config.N * config.M, 40], dtype=tf.float32
                )  # enrollment batch (time x batch x n_mel)
                valid = tf.placeholder(
                    shape=[None, config.N * config.M, 40], dtype=tf.float32
                )  # validation batch (time x batch x n_mel)
                val_batch = tf.concat([enroll, valid], axis=1)

                # Embedding LSTM (3-layer default)
                with tf.variable_scope("lstm", reuse=tf.AUTO_REUSE):
                    lstm_cells = [
                        tf.contrib.rnn.LSTMCell(num_units=config.hidden,
                                                num_proj=config.proj)
                        for i in range(config.num_layer)
                    ]
                    lstm = tf.contrib.rnn.MultiRNNCell(
                        lstm_cells)  # make lstm op and variables
                    outputs, _ = tf.nn.dynamic_rnn(
                        cell=lstm,
                        inputs=val_batch,
                        dtype=tf.float32,
                        time_major=True)  # for TI-VS must use dynamic rnn
                    embedded = outputs[
                        -1]  # the last ouput is the embedded d-vector
                    embedded = normalize(embedded)  # normalize
                # print("embedded size: ", embedded.shape)

                # enrollment embedded vectors (speaker model)
                enroll_embed = normalize(
                    tf.reduce_mean(tf.reshape(embedded[:config.N *
                                                       config.M, :],
                                              shape=[config.N, config.M, -1]),
                                   axis=1))
                # validation embedded vectors
                valid_embed = embedded[config.N * config.M:, :]
                similarity_matrix = similarity(embedded=valid_embed,
                                               w=1.,
                                               b=0.,
                                               center=enroll_embed)

                # print("test file path : ", config.test_path)

                # Return similarity matrix (SM) after enrollment and validation
                time1 = time.time()  # for check inference time
                S = sess.run(similarity_matrix,
                             feed_dict={
                                 enroll:
                                 random_batch(shuffle=False,
                                              forceValidation=True),
                                 valid:
                                 random_batch(shuffle=False,
                                              utter_start=config.M,
                                              forceValidation=True)
                             })
                S = S.reshape([config.N, config.M, -1])
                time2 = time.time()

                np.set_printoptions(precision=4)
                # print("inference time for %d utterences : %0.2fs" % (2 * config.M * config.N, time2 - time1))
                # print(S)  # print similarity matrix

                # calculating EER
                diff = 1
                EER = 0
                EER_thres = 0
                EER_FAR = 0
                EER_FRR = 0

                # through thresholds calculate false acceptance ratio (FAR) and false reject ratio (FRR)
                for thres in [0.01 * i + 0.5 for i in range(50)]:
                    S_thres = S > thres

                    # False acceptance ratio = false acceptance / mismatched population (enroll speaker != validation speaker)
                    FAR = sum([
                        np.sum(S_thres[i]) - np.sum(S_thres[i, :, i])
                        for i in range(config.N)
                    ]) / (config.N - 1) / config.M / config.N

                    # False reject ratio = false reject / matched population (enroll speaker = validation speaker)
                    FRR = sum([
                        config.M - np.sum(S_thres[i][:, i])
                        for i in range(config.N)
                    ]) / config.M / config.N

                    # Save threshold when FAR = FRR (=EER)
                    if diff > abs(FAR - FRR):
                        diff = abs(FAR - FRR)
                        EER = (FAR + FRR) / 2
                        EER_thres = thres
                        EER_FAR = FAR
                        EER_FRR = FRR

                print(
                    "\n(iter : %d) loss: %.4f || EER : %0.4f (thres:%0.4f, FAR:%0.4f, FRR:%0.4f) || inference time for %d utterences: %0.2fs"
                    % ((iter + 1), loss_acc / 100, EER, EER_thres, EER_FAR,
                       EER_FRR, 2 * config.M * config.N, time2 - time1))
                EER_list.append(EER)
                # print("==============VALIDATION END!==============")
                train_loss_list.append(loss_acc / 100)

                # save figures
                if (iter + 1) % 500 == 0:
                    plt.ioff()
                    fig_EER = plt.figure()
                    iter_list = [(i + 1) * 100 for i in range(len(EER_list))]
                    plt.plot(iter_list, EER_list, label="EER")
                    plt.xlabel("Steps")
                    plt.ylabel("EER")
                    plt.title("Equal error rate progress")
                    plt.grid(True)
                    plot_path = "./Plots/" + path[11:] + "/" + path[
                        11:] + ".png"
                    print("Saving plot as: %s" % plot_path)
                    plt.savefig(plot_path)
                    plt.close(fig_EER)

                    plt.ioff()
                    fig_LOSS = plt.figure()
                    iter_list = [(i + 1) * 100 for i in range(len(EER_list))]
                    plt.plot(iter_list,
                             train_loss_list,
                             color="orange",
                             label="train_loss")
                    plt.xlabel("Steps")
                    plt.ylabel("Training loss")
                    plt.title("Training progress")
                    plt.grid(True)
                    plot_path = "./Plots/" + path[11:] + "/" + path[
                        11:] + "_LOSS.png"
                    print("Saving plot as: %s" % plot_path)
                    plt.savefig(plot_path)
                    plt.close(fig_LOSS)

                # Every 100 iterations, save a log of training progress
                with open(log_path, "a") as file:
                    file.write(
                        str(iter + 1) + "," + str(loss_acc / 100) + "," +
                        str(EER) + "," + str(EER_thres) + "," + str(EER_FAR) +
                        "," + str(EER_FRR) + "\n")

                loss_acc = 0  # reset accumulated loss
            # decay learning rate
            if (iter + 1) % 5000 == 0:
                lr_factor /= 2  # lr decay
                print("Learning Rate (LR) decayed! Current LR: ",
                      config.lr * lr_factor)
            # save model checkpoint
            if (iter + 1) % 5000 == 0:
                saver.save(sess,
                           os.path.join(path, "./Check_Point/model.ckpt"),
                           global_step=iter // 5000)  # naming val
                print("Model checkpoint saved!")