def read_csv(input_file):
    """Reads a tab separated value file."""
    df = load_csv(input_file, header=0)
    jobcontent = list(df['content'])
    jlabel = list(df['label'])
    lines = [[str(jlabel[i]), str(jobcontent[i])]
             for i in range(len(jobcontent))]
    print('Read csv finished!(1)')
    return shuffle_one([[
        list(dict_label.keys())[list(dict_label.values()).index(l[0])], l[1]
    ] for l in lines if type(l[1]) == str])
Exemplo n.º 2
0
 def _read_csv(cls, input_file):  # 项目
     """Reads a tab separated value file."""
     df = load_csv(input_file, header=0)
     jobcontent = list(df['content'])  #[:len(df)-5000]
     jlabel = list(df['label'])  #[:len(df)-5000]
     lines = [[str(jlabel[i]), str(jobcontent[i])]
              for i in range(len(jobcontent))]
     #print(lines[0],lines[1])
     lines2 = [[
         list(dict_label.keys())[list(dict_label.values()).index(l[0])],
         l[1]
     ] for l in lines if type(l[1]) == str]
     lines3 = shuffle_one(lines2)
     print('Head data:', lines2[0:2])
     print('Length of data:', len(lines))
     print('Read csv finished(2)!')
     return lines3
Exemplo n.º 3
0
ids_test = np.arange(len(input_ids_test))
# 启动图和训练
saver = tf.train.Saver(max_to_keep=100)
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# 恢复模型参数
MODEL_SAVE_PATH = os.path.join(pwd, 'model')
ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
    saver.restore(sess, ckpt.model_checkpoint_path)
    print('Restored model!')

with sess.as_default():
    for i in range(hp.num_train_epochs):
        indexs = shuffle_one(ids_train)
        for batch_num in range(num_batches - 1):
            i1 = indexs[batch_num * hp.batch_size:min((batch_num + 1) *
                                                      hp.batch_size, N_train)]
            # Get features
            input_id_ = select(input_ids, i1)
            input_mask_ = select(input_masks, i1)
            segment_id_ = select(segment_ids, i1)
            label_id_ = select(label_ids, i1)
            # Feed dict
            fd = {
                MODEL.input_ids: input_id_,
                MODEL.input_masks: input_mask_,
                MODEL.segment_ids: segment_id_,
                MODEL.label_ids: label_id_
            }
Exemplo n.º 4
0
saver = tf.train.Saver(max_to_keep=hp.max_to_keep)
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Load model saved before
MODEL_SAVE_PATH = os.path.join(pwd, hp.file_save_model)
ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
    saver.restore(sess, ckpt.model_checkpoint_path)
    print('Restored model!')

with sess.as_default():
    # Tensorboard writer
    writer = tf.summary.FileWriter(hp.logdir, sess.graph)
    for i in range(hp.num_train_epochs):
        indexs = shuffle_one(arr)
        for j in range(num_batchs - 1):
            i1 = indexs[j *
                        hp.batch_size:min((j + 1) *
                                          hp.batch_size, num_train_samples)]
            # Get features
            input_id_ = select(input_ids, i1)
            input_mask_ = select(input_masks, i1)
            segment_id_ = select(segment_ids, i1)
            label_id_ = select(label_ids, i1)
            # Feed dict
            fd = {
                MODEL.input_ids: input_id_,
                MODEL.input_masks: input_mask_,
                MODEL.segment_ids: segment_id_,
                MODEL.label_ids: label_id_