Exemplo n.º 1
0
    def test(self):
        #f = open('../demo/result.txt','w')
        predict_result = []
        with self.__session.as_default():
            print('Testing')
            for batch_y, batch_x in self.__data_manager.test_batches:
                decoded = self.__session.run(self.__decoded,
                                             feed_dict={
                                                 self.__inputs:
                                                 batch_x,
                                                 self.__seq_len:
                                                 [self.__max_char_count] *
                                                 self.__data_manager.batch_size
                                             })

                for i, y in enumerate(batch_y):
                    print('pic_name:')
                    print(batch_y[i])
                    name = ''.join(batch_y[i])
                    print('predict:')
                    print(ground_truth_to_word(decoded[i]))
                    ans = ''.join(ground_truth_to_word(decoded[i]))
                    str = name + ':' + ans + '\n'
                    #f.writelines(str)
                    predict_result.append(str)
        #f.close()
        tf.reset_default_graph()
        return predict_result
Exemplo n.º 2
0
    def train(self, iteration_count):
        self.step += 1
        print("Step: ", self.step)
        with self.__session.as_default():
            train_writer = tf.summary.FileWriter('./logs',
                                                 self.__session.graph)
            step_summary = self.step
            print('Training')
            for epoch in range(self.step, iteration_count + self.step):
                iter_loss = 0
                with open(self.__train_file, 'r') as file:
                    lines = [line.strip('\n') for line in file.readlines()]
                random.shuffle(lines)
                num_batch = len(lines) // self.__bs

                for i in range(num_batch):
                    try:
                        batch_x, batch_y, batch_z = self.get_batch(
                            self.__bs, i, lines, self.__ex)
                        _, loss_value, probabilities, loss_sum, word_step_acc = self.__session.run(
                            [
                                self.__optimizer, self.__loss, self.__prob,
                                self.__loss_summary, self.__word_acc
                            ],
                            feed_dict={
                                self.__inputs: batch_x,
                                self.__output: batch_y,
                                self.__length: batch_z
                            })
                        if i % int((num_batch / 3)) == 0:
                            for j in range(1):
                                print(ground_truth_to_word(batch_y[j]))
                                prob = np.argmax(probabilities[j], axis=-1)
                                print(ground_truth_to_word(prob))
                                print("loss: ", loss_value)
                                print("step: {}/{}".format(i, num_batch))
                        train_writer.add_summary(loss_sum, step_summary)
                        step_summary += 1
                        iter_loss += loss_value
                    except Exception as e:
                        print(str(e))
                        continue
                self.__saver.save(self.__session,
                                  self.__save_path,
                                  global_step=epoch)
                print('[{}] Iteration loss: {}'.format(epoch, iter_loss))
                self.step += 1
        return None
Exemplo n.º 3
0
    def train(self, iteration_count):
        with self.__session.as_default():
            print('Training')
            for i in range(self.step, iteration_count + self.step):
                iter_loss = 0
                for batch_y, batch_dt, batch_x in self.__data_manager.train_batches:
                    op, decoded, loss_value = self.__session.run(
                        [self.__optimizer, self.__decoded, self.__cost],
                        feed_dict={
                            self.__inputs: batch_x,
                            self.__seq_len: [self.__max_char_count] * self.__data_manager.batch_size,
                            self.__targets: batch_dt
                        }
                    )

                    if i % 10 == 0:
                        for j in range(2):
                            print(batch_y[j])
                            print(ground_truth_to_word(decoded[j]))

                    iter_loss += loss_value

                self.__saver.save(
                    self.__session,
                    self.__save_path,
                    global_step=self.step
                )

                print('[{}] Iteration loss: {}'.format(self.step, iter_loss))

                self.step += 1
        return None
Exemplo n.º 4
0
    def train(self, iteration_count):
        with self.__session.as_default():
            print('Training')
            for i in range(self.step, iteration_count + self.step):
                iter_loss = 0
                for batch_y, batch_dt, batch_x in self.__data_manager.train_batches:
                    op, decoded, loss_value = self.__session.run(
                        [self.__optimizer, self.__decoded, self.__cost],
                        feed_dict={
                            self.__inputs:
                            batch_x,
                            self.__seq_len: [self.__max_char_count] *
                            self.__data_manager.batch_size,
                            self.__targets:
                            batch_dt
                        })

                    if i % 10 == 0:
                        for j in range(2):
                            print(batch_y[j])
                            print(ground_truth_to_word(decoded[j]))

                    iter_loss += loss_value

                self.__saver.save(self.__session,
                                  self.__save_path,
                                  global_step=self.step)

                print('[{}] Iteration loss: {}'.format(self.step, iter_loss))

                self.step += 1
        return None
Exemplo n.º 5
0
    def train(self, iteration_count):
        tensorboard_dir = 'tensorboard/dir'
        merged = tf.summary.merge_all()
        if not os.path.exists(tensorboard_dir):
            os.makedirs(tensorboard_dir)
        writer = tf.summary.FileWriter(tensorboard_dir)
        dictionary, inverse_dictionary, dictionary_len = read_dictionary(
            self.__data_manager.dictionary_path)
        with self.__session.as_default():
            print('Training')

            for i in range(self.step, iteration_count + self.step):
                iter_loss = 0
                rs = 0

                for batch_y, batch_dt, batch_x in self.__data_manager.train_batches:
                    op, loss_value, rs = self.__session.run(
                        [self.__optimizer, self.__cost, merged],
                        feed_dict={
                            self.__inputs:
                            batch_x,
                            self.__seq_len: [self.__max_char_count] *
                            self.__data_manager.batch_size,
                            self.__targets:
                            batch_dt
                        })

                    if i % 50 == 0:
                        for j in range(2):
                            [decoded, acc] = self.__session.run(
                                [self.__decoded, self.__acc],
                                feed_dict={
                                    self.__inputs:
                                    batch_x,
                                    self.__seq_len: [self.__max_char_count] *
                                    self.__data_manager.batch_size,
                                    self.__targets:
                                    batch_dt
                                })
                            print(batch_y[j])
                            print(acc)
                            print(
                                ground_truth_to_word(decoded[j],
                                                     inverse_dictionary))

                    iter_loss += loss_value
                    rs += rs
                    writer.add_summary(rs, i)

                if i % 500 == 0:
                    self.__saver.save(self.__session,
                                      self.__save_path,
                                      global_step=self.step)

                print('[{}] Iteration loss: {}'.format(self.step, iter_loss))

                self.step += 1
            writer.add_graph(self.__session.graph)
        return None
Exemplo n.º 6
0
 def test(self):
     with self.sess.as_default():
         example_count = 0
         total_error = 0
         batch_x, batch_y, batch_length = self.data.get_next_test_batch(
             self.batch_size)
         data_targets = sparse_tuple_from(batch_y)
         predict_str = self.sess.run([self.decoded],
                                     feed_dict={
                                         self.inputs: batch_x,
                                         self.seq_len: batch_length
                                     })
         example_count += len(batch_y)
         total_error += np.sum(
             levenshtein(ground_truth_to_word(batch_y),
                         ground_truth_to_word(decoded)))
         print "Error on test set: {}".format(total_error / example_count)
     return None
Exemplo n.º 7
0
Arquivo: crnn.py Projeto: Cospel/CRNN
    def train(self, iteration_count, learning_rate_decay):
        with self.__session.as_default():
            print('Training')

            for i in range(iteration_count):
                iter_loss, len_loss, k = 0, 0, 0

                if learning_rate_decay != 0 and i != 0 and i % learning_rate_decay == 0:
                    self.__start_learning_rate *= 0.1

                pbar = tqdm(total=len(self.__data_manager.train_batches), ncols=150)
                for batch_y, batch_dt, batch_x in self.__data_manager._generate_all_train_batches():
                    op, decoded, loss_value, acc, logits, out_len, cost_len = self.__session.run(
                        [self.__optimizer, self.__decoded, self.__cost, self.__acc, self.__logits, self.__output_len, self.__cost_len],
                        feed_dict={
                            self.__inputs: batch_x,
                            #self.__seq_len: [self.__max_char_count] * self.__data_manager.batch_size,
                            self.__targets: batch_dt,
                            self.__learning_rate: self.__start_learning_rate
                        }
                    )

                    #print('cost', out_len, cost_len)

                    if k > len(self.__data_manager.train_batches)-5:
                        for j in range(2):
                            print('GT:', batch_y[j])
                            print('PREDICT:', ground_truth_to_word(decoded[j], self.CHAR_VECTOR))
                            print(f'---- {i} ----')
                            sys.stdout.flush()

                    pbar.update(1)
                    k += 1
                    iter_loss += loss_value
                    len_loss += cost_len

                    pbar.set_postfix(
                      epoch=str(i)+'/'+str(iteration_count),
                      step=str(k),
                      cost="{:.2f}".format(iter_loss/float(k)),
                      cost_len="{:.2f}".format(len_loss/float(k)),
                      lr=str(self.__start_learning_rate)
                    )


                self.__saver.save(
                    self.__session,
                    self.__save_path,
                    global_step=self.step
                )

                self.save_frozen_model("save/frozen.pb")

                print('[{}] Iteration loss: {} Error rate: {}'.format(self.step, iter_loss, acc))
                self.step += 1
        return None
Exemplo n.º 8
0
    def train(self, iteration_count):
        with self.session.as_default():
            print("Training")
            self.max_weight = tf.math.reduce_max(self.weight_matrix)
            merged = tf.summary.merge_all()

            for i in range(self.step, iteration_count + self.step):
                print("Processing iteration ::", i)
                batch_count = 0
                iter_loss = 0

                for batch_y, batch_dt, batch_x in self.data_manager.train_batches:
                    op, decoded, loss_value, acc, max_weight, summary = self.session.run(
                        [
                            self.optimizer, self.decoded, self.cost, self.acc,
                            self.max_weight, merged
                        ],
                        feed_dict={
                            self.inputs:
                            batch_x,
                            self.seq_len: [self.max_char_count] *
                            self.data_manager.batch_size,
                            self.targets:
                            batch_dt,
                        },
                    )
                    self.train_summary_writer.add_summary(summary, self.step)

                    if i % 1 == 0:
                        for j in range(2):
                            print(f"decoded ...{decoded[0]}")
                            pred = ground_truth_to_word(
                                decoded[j], self.CHAR_VECTOR)
                            print("{} | {}".format(batch_y[j], pred))
                        print("---- {} | {} ----".format(i, batch_count))

                    iter_loss += loss_value
                    batch_count += 1
                    if batch_count >= 100:
                        break

                self.saver.save(self.session,
                                self.save_path,
                                global_step=self.step)

                self.train_summary_writer.flush()
                self.save_frozen_model("save/frozen.pb")

                print("[{}] Iteration loss: {} Error rate: {}".format(
                    self.step, iter_loss, acc))

                print("max weight", max_weight)
                self.step += 1
            self.train_summary_writer.close()
        return None
Exemplo n.º 9
0
    def test(self):
        with self.__session.as_default():
            print('Testing')
            for batch_y, _, batch_x in self.__data_manager.test_batches:
                output = self.__session.run(self.__output,
                                            feed_dict={
                                                self.__inputs: batch_x,
                                                self.__is_training: False
                                            })

                for i, y in enumerate(batch_y):
                    print(batch_y[i])
                    print(ground_truth_to_word(output[i]))
Exemplo n.º 10
0
 def test(self):
     with self.__session.as_default():
         print('Testing')
         total_error = 0
         example_count = 0
         for batch_y, batch_sl, batch_x in self.__data_manager.get_next_test_batch(
         ):
             data_targets = np.asarray([
                 label_to_array(lbl, config.CHAR_VECTOR) for lbl in batch_y
             ])
             data_targets = sparse_tuple_from(data_targets)
             decoded = self.__session.run([self.__decoded],
                                          feed_dict={
                                              self.__inputs: batch_x,
                                              self.__seq_len: batch_sl
                                          })
             example_count += len(batch_y)
             total_error += np.sum(
                 levenshtein(ground_truth_to_word(batch_y),
                             ground_truth_to_word(decoded)))
         print('Error on test set: {}'.format(total_error,
                                              total_error / example_count))
     return None
Exemplo n.º 11
0
    def train(self, iteration_count):
        with self.__session.as_default():
            print('Training')
            for i in range(self.step, iteration_count + self.step):
                iter_loss = 0
                for batch_y, batch_seq_len, batch_dt, batch_dt_2, batch_x in self.__data_manager.train_batches:
                    _, output, loss_value, learning_rate, a1, a2, wm = self.__session.run(
                        [
                            self.__optimizer, self.__output, self.__loss,
                            self.__learning_rate, self.__a1, self.__a2,
                            self.__weight_mask
                        ],
                        feed_dict={
                            self.__inputs: batch_x,
                            self.__seq_len: batch_seq_len,
                            self.__targets_1: batch_dt,
                            self.__targets_2: batch_dt_2,
                            self.__iteration_n:
                            [float(i) + 1.
                             ],  # +1 because 0^(0.5) is undefined obviously
                            self.__is_training: True
                        })

                    #print(batch_y[0])
                    #print(wm[0])
                    #input()
                    #
                    if i % 10 == 0:
                        for j in range(2):
                            print(batch_y[j])
                            print(batch_dt[j])
                            print(ground_truth_to_word(output[j]))
                    #    print(output[0][0][0:15])
                    #    print('------------')
                    #    print(a2[0][0][0:15])
                    #    print('------------')
                    #    print(a2[1][0][0:15])
                    #    input()

                    iter_loss += loss_value

                self.__saver.save(self.__session,
                                  self.__save_path,
                                  global_step=self.step)

                print('[{}] Iteration loss: {}'.format(self.step, iter_loss))

                self.step += 1
        return None
Exemplo n.º 12
0
    def test(self):
        with self.__session.as_default():
            print('Testing')
            for batch_y, _, batch_x in self.__data_manager.test_batches:
                decoded = self.__session.run(
                    self.__decoded,
                    feed_dict={
                        self.__inputs: batch_x,
                        self.__seq_len: [self.__max_char_count] * self.__data_manager.batch_size
                    }
                )

                for i, y in enumerate(batch_y):
                    print(batch_y[i])
                    print(ground_truth_to_word(decoded[i]))
        return None
Exemplo n.º 13
0
Arquivo: crnn.py Projeto: Cospel/CRNN
    def test(self):
        with self.__session.as_default():
            print('Testing')
            for batch_y, _, batch_x in self.__data_manager.test_batches:
                decoded = self.__session.run(
                    self.__decoded,
                    feed_dict={
                        self.__inputs: batch_x,
                        #self.__seq_len: [self.__max_char_count] * self.__data_manager.batch_size
                    }
                )

                for i, y in enumerate(batch_y):
                    print(batch_y[i])
                    print(ground_truth_to_word(decoded[i], self.CHAR_VECTOR))
        return None
Exemplo n.º 14
0
    def test(self, b_savePb=False, b_test=True, b_saveNG=True):
        if b_saveNG:
            # 创建空txt文档保存错误样本的路径
            NG_path = self.__data_manager.examples_path + '_NG'
            if not os.path.exists(NG_path):
                os.mkdir(NG_path)

        with self.__session.as_default():
            if b_savePb:
                self.save_PbModel()
            if b_test:
                count = 0
                r = 0
                for batch_y, _, batch_x in self.__data_manager.test_batches:
                    t1 = time.time()
                    decoded = self.__session.run(
                        self.__decoded,
                        feed_dict={
                            self.__inputs:
                            batch_x,
                            self.__seq_len: [self.__max_char_count] *
                            self.__data_manager.batch_size
                        })
                    for i, y in enumerate(batch_y):
                        count += 1
                        label = self.__data_manager.get_label(batch_y[i])
                        predict = ground_truth_to_word(decoded[i])

                        if label == predict:
                            r += 1
                        else:
                            print(label)
                            print(predict)
                            if b_saveNG:
                                wrong_example = os.path.join(
                                    self.__data_manager.examples_path,
                                    batch_y[i])
                                object_path = os.path.join(NG_path, batch_y[i])
                                shutil.copyfile(wrong_example, object_path)

                acc = r / count
                print("Testing ,count:{},acc:{}".format(count, acc))
                if b_saveNG:
                    print("错误样本保存到文件:{}".format(NG_path))

        return None
Exemplo n.º 15
0
    def train(self, iteration_count):
        with self.__session.as_default():
            print("Training")
            for i in range(self.step, iteration_count + self.step):
                iter_loss = 0
                for batch_y, batch_dt, batch_x in self.__data_manager.train_batches:
                    op, decoded, loss_value, acc = self.__session.run(
                        [
                            self.__optimizer, self.__decoded, self.__cost,
                            self.__acc
                        ],
                        feed_dict={
                            self.__inputs:
                            batch_x,
                            self.__seq_len: [self.__max_char_count] *
                            self.__data_manager.batch_size,
                            self.__targets:
                            batch_dt,
                        },
                    )

                    if i % 10 == 0:
                        for j in range(2):
                            print("GT:", batch_y[j])
                            print(
                                "PREDICT:",
                                ground_truth_to_word(decoded[j],
                                                     self.CHAR_VECTOR),
                            )
                            print(f"---- {i} ----")

                    iter_loss += loss_value

                self.__saver.save(self.__session,
                                  self.__save_path,
                                  global_step=self.step)

                self.save_frozen_model("save/frozen.pb")

                print("[{}] Iteration loss: {} Error rate: {}".format(
                    self.step, iter_loss, acc))

                self.step += 1
        return None
Exemplo n.º 16
0
    def train(self, iteration_count):
        with self.session.as_default():
            print("Training")
            for i in range(self.step, iteration_count + self.step):
                batch_count = 0
                iter_loss = 0
                for batch_y, batch_dt, batch_x in self.data_manager.train_batches:
                    op, decoded, loss_value, acc = self.session.run(
                        [self.optimizer, self.decoded, self.cost, self.acc],
                        feed_dict={
                            self.inputs:
                            batch_x,
                            self.seq_len: [self.max_char_count] *
                            self.data_manager.batch_size,
                            self.targets:
                            batch_dt,
                        },
                    )

                    if i % 10 == 0:
                        for j in range(2):
                            pred = ground_truth_to_word(
                                decoded[j], self.CHAR_VECTOR)
                            print(f"{batch_y[j]} | {pred}")
                        print(f"---- {i} | {batch_count} ----")

                    iter_loss += loss_value
                    batch_count += 1
                    if batch_count >= 100:
                        break

                self.saver.save(self.session,
                                self.save_path,
                                global_step=self.step)

                self.save_frozen_model("save/frozen.pb")

                print(
                    f"[{self.step}] Iteration loss: {iter_loss} Error rate: {acc}"
                )

                self.step += 1
        return None
Exemplo n.º 17
0
    def test(self):
        with self.session.as_default():
            print("Testing")
            for batch_y, _, batch_x in self.data_manager.test_batches:
                decoded = self.session.run(
                    self.decoded,
                    feed_dict={
                        self.inputs:
                        batch_x,
                        self.seq_len:
                        [self.max_char_count] * self.data_manager.batch_size,
                    },
                )

                for i, y in enumerate(batch_y):
                    print("Ground truth", batch_y[i])
                    print(f"decode batch:{i}", decoded.shape)
                    print("Test result",
                          ground_truth_to_word(decoded[i], self.CHAR_VECTOR))
        return None
Exemplo n.º 18
0
 def valid(self):
     count = 0
     r = 0
     for batch_y, _, batch_x in self.__data_manager.test_batches:
         t1 = time.time()
         decoded = self.__session.run(
             self.__decoded,
             feed_dict={
                 self.__inputs:
                 batch_x,
                 self.__seq_len:
                 [self.__max_char_count] * self.__data_manager.batch_size
             })
         for i, y in enumerate(batch_y):
             count += 1
             label = self.__data_manager.get_label(batch_y[i])
             predict = ground_truth_to_word(decoded[i])
             if label == predict:
                 r += 1
     acc = r / count
     print("validation, count:{}, acc:{}".format(count, acc))
Exemplo n.º 19
0
    def train(self, iteration_count):
        with self.__session.as_default():
            print('Training')
            for i in range(self.step, iteration_count + self.step):
                iter_loss = 0
                for batch_y, batch_sl, batch_x in self.__data_manager.get_next_train_batch(
                ):
                    data_targets = np.asarray([
                        label_to_array(lbl, config.CHAR_VECTOR)
                        for lbl in batch_y
                    ])
                    data_targets = sparse_tuple_from(data_targets)
                    op, decoded, loss_value = self.__session.run(
                        [self.__optimizer, self.__decoded, self.__cost],
                        feed_dict={
                            self.__inputs:
                            batch_x,
                            self.__seq_len: [self.__max_char_count] *
                            self.__data_manager.batch_size,
                            self.__targets:
                            data_targets
                        })

                    if i % 10 == 0:
                        for j in range(2):
                            print(batch_y[j])
                            print(ground_truth_to_word(decoded[j]))

                    iter_loss += loss_value

                self.__saver.save(self.__session,
                                  self.__save_path,
                                  global_step=self.step)

                print('[{}] Iteration loss: {}'.format(self.step, iter_loss))

                self.step += 1
        return None
Exemplo n.º 20
0
    def train(self, iteration_count):
        with self.__session.as_default():
            print('Training')
            train_iteration_card_acc = []
            train_iteration_num_acc = []
            train_batch_card_acc = []
            train_batch_num_acc = []
            test_iteration_card_acc = []
            test_iteration_num_acc = []
            counts_train = 0
            counts_test = 0
            for i in range(self.step, iteration_count + self.step):
                #iter_loss = 0
                acc_card_train_ = 0
                sum_card_train_ = 0
                acc_num_train_ = 0
                sum_num_train_ = 0
                iter_loss_train = 0

                acc_card_test_ = 0
                sum_card_test_ = 0
                acc_num_test_ = 0
                sum_num_test_ = 0
                iter_loss_test = 0

                #train
                #for k in range(1):
                #batch_y,batch_dt,batch_x = self.__data_manager.train_batches[0]
                for batch_y, batch_dt, batch_x in self.__data_manager.train_batches:
                    counts_train = counts_train + 1
                    acc_card_train = 0
                    sum_card_train = 0
                    acc_num_train = 0
                    sum_num_train = 0
                    #for batch_y, batch_dt, batch_x in self.__data_manager.train_batches:
                    op, decoded, loss_value, train_summary = self.__session.run(
                        [
                            self.__optimizer, self.__decoded, self.__cost,
                            self.__merged
                        ],
                        feed_dict={
                            self.__inputs:
                            batch_x,
                            self.__seq_len: [self.__max_char_count] *
                            self.__data_manager.batch_size,
                            self.__targets:
                            batch_dt
                        })
                    #print(decoded)
                    print(batch_y[0])
                    print(ground_truth_to_word(decoded[0]))
                    for j in range(self.__data_manager.batch_size):
                        true_label = batch_y[j]
                        predict_label = ground_truth_to_word(decoded[j])
                        #print(true_label)
                        #print(predict_label)
                        if true_label == predict_label:
                            acc_card_train = acc_card_train + 1
                            acc_num_train = acc_num_train + len(true_label)
                            sum_num_train = sum_num_train + len(true_label)
                        else:
                            sum_num_train = sum_num_train + len(true_label)
                            for i in range(
                                    min(len(true_label), len(predict_label))):
                                if true_label[i] == predict_label[i]:
                                    acc_num_train = acc_num_train + 1
                        sum_card_train = sum_card_train + 1

                    iter_loss_train += loss_value
                    acc_card_train_ += acc_card_train
                    sum_card_train_ += sum_card_train
                    acc_num_train_ += acc_num_train
                    sum_num_train_ += sum_num_train

                    train_batch_card_acc.append(acc_card_train /
                                                sum_card_train)
                    train_batch_num_acc.append(acc_num_train / sum_num_train)
                    print('[{}] batch train_card_acc: {}'.format(
                        self.step, acc_card_train / sum_card_train))
                    print('[{}] batch train_number_acc: {}'.format(
                        self.step, acc_num_train / sum_num_train))

                    self.train_writer.add_summary(train_summary, counts_train)

                train_iteration_card_acc.append(acc_card_train_ /
                                                sum_card_train_)
                train_iteration_num_acc.append(acc_num_train_ / sum_num_train_)
                print('[{}] iteration train_loss: {}'.format(
                    self.step, iter_loss_train))
                print('[{}] iteration train_card_acc: {}'.format(
                    self.step, acc_card_train_ / sum_card_train_))
                print('[{}] iteration train_number_acc: {}'.format(
                    self.step, acc_num_train_ / sum_num_train_))
                print('\n')

                #test
                for batch_y, batch_dt, batch_x in self.__data_manager.test_batches:
                    counts_test = counts_test + 1
                    acc_card_test = 0
                    sum_card_test = 0
                    acc_num_test = 0
                    sum_num_test = 0
                    #for batch_y, batch_dt, batch_x in self.__data_manager.train_batches:
                    decoded, loss_value, test_summary = self.__session.run(
                        [self.__decoded, self.__cost, self.__merged],
                        feed_dict={
                            self.__inputs:
                            batch_x,
                            self.__seq_len: [self.__max_char_count] *
                            self.__data_manager.batch_size,
                            self.__targets:
                            batch_dt
                        })
                    for j in range(self.__data_manager.batch_size):
                        true_label = batch_y[j]
                        predit_label = ground_truth_to_word(decoded[j])
                        #print(true_label)
                        #print(predit_label)
                        if true_label == predit_label:
                            acc_card_test = acc_card_test + 1
                            acc_num_test = acc_num_test + len(true_label)
                            sum_num_test = sum_num_test + len(true_label)
                        else:
                            sum_num_test = sum_num_test + len(true_label)
                            for i in range(
                                    min(len(true_label), len(predict_label))):
                                if true_label[i] == predict_label[i]:
                                    acc_num_test = acc_num_test + 1
                        sum_card_test = sum_card_test + 1
                    iter_loss_test += loss_value
                    acc_card_test_ += acc_card_test
                    sum_card_test_ += sum_card_test
                    acc_num_test_ += acc_num_test
                    sum_num_test_ += sum_num_test

                    self.test_writer.add_summary(test_summary, counts_test)

                test_iteration_card_acc.append(acc_card_test_ / sum_card_test_)
                test_iteration_num_acc.append(acc_num_test_ / sum_num_test_)
                print('[{}] iteration test_loss: {}'.format(
                    self.step, iter_loss_test))
                print('[{}] iteration test_card_acc: {}'.format(
                    self.step, acc_card_test_ / sum_card_test_))
                print('[{}] iteration test_number_acc: {}'.format(
                    self.step, acc_num_test_ / sum_num_test_))
                print('\n')

                if self.step % 1 == 0:
                    self.__saver_whole.save(self.__session,
                                            self.__save_path,
                                            global_step=self.step)

                self.step += 1

                #iter_loss_tensor = tf.constant(iter_loss,tf.float32)
                #test_acc_tensor = tf.constant(acc_num*1.0/sum_num,tf.float32)
                #loss_sum = tf.summary.scalar("total_loss",iter_loss_tensor)
                #acc_sum = tf.summary.scalar("test_acc",test_acc_tensor)
                #merged = self.__session.run(tf.summary.merge_all())

        np.savetxt("./train_batch_num_acc.txt", train_batch_num_acc)
        np.savetxt("./train_batch_card_acc.txt", train_batch_card_acc)
        np.savetxt("./train_iteration_num_acc.txt", train_iteration_num_acc)
        np.savetxt("./train_iteration_card_acc.txt", train_iteration_card_acc)
        np.savetxt("./test_iteration_num_acc.txt", test_iteration_num_acc)
        np.savetxt("./test_iteration_card_acc.txt", test_iteration_card_acc)
        self.train_writer.close()
        self.test_writer.close()
        return None
Exemplo n.º 21
0
    def train(self):
        train_batches = utils.load_train_batches()

        with self.__session.as_default():
            print('Start train')
            self.__train_writer.add_graph(self.__session.graph)
            for i in range(self.step, config.EPOCHS):
                # if self.__debug:
                #     print('trining step ' + str(i) + ':----------------------------------------------------')
                iter_loss = 0
                dists = []
                learning_rates = []
                batch_count = 0
                batch_num = len(train_batches)
                for batch_y, batch_dt, batch_x in train_batches:
                    batch_count += 1
                    start_time = time.time()
                    # if self.__debug:
                    # print('next batch:')
                    # print('batch_y.shape: ', batch_y.shape)
                    # print(batch_dt)
                    # print(batch_x.shape)

                    # options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                    # run_metadata = tf.RunMetadata()
                    op, decoded, loss_value, dist, learning_rate = self.__session.run(
                        [
                            self.__optimizer, self.__decoded, self.__cost,
                            self.__dist, self.__learning_rate
                        ],
                        feed_dict={
                            self.__inputs: batch_x,
                            self.__seq_len:
                            [self.__max_char_count] * self.__batch_size,
                            self.__targets: batch_dt,
                            self.__global_step: self.step
                        }
                        # options=options,
                        # run_metadata=run_metadata
                    )

                    if batch_count % config.REPORT_STEPS == 0:
                        elapsed = time.time() - start_time
                        print('training step [' + str(i) + '], batch ' +
                              str(batch_count) + '/' + str(batch_num) +
                              ', elapsed: ' + str(elapsed) + ', loss_value: ' +
                              str(loss_value) + ', learning_rate: ' +
                              str(learning_rate))
                    # print real label and prediction label
                    # if i % config.REPORT_STEPS == 0 and batch_count%10 == 0:
                    if batch_count % config.REPORT_STEPS == 0:
                        for j in range(2):
                            print('batch_y[' + str(j) + ']:', batch_y[j])
                            print('ground_truth_to_word:',
                                  utils.ground_truth_to_word(decoded[j]))

                    iter_loss += loss_value
                    dists.append(dist)
                    learning_rates.append(learning_rate)

                    # if self.__debug:
                    #     #time line trace
                    #     fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                    #     chrome_trace = fetched_timeline.generate_chrome_trace_format()
                    #     with open('trace/timeline_batch_%d.json' % batch_count, 'w') as f:
                    #         f.write(chrome_trace)

                dist = np.mean(dists)
                rate = np.mean(learning_rates)
                summary = tf.Summary()
                summary.value.add(tag="Edit Distance", simple_value=dist)
                summary.value.add(tag="Learning Rate", simple_value=rate)
                summary.value.add(tag="Loss", simple_value=iter_loss)
                self.__train_writer.add_summary(summary=summary, global_step=i)

                print('[{}] Iteration loss: {}, edit distance: {}----------'.
                      format(self.step, iter_loss, dist))
                self.step += 1
                self.__saver.save(self.__session,
                                  self.__save_path,
                                  global_step=self.step)