Exemplo n.º 1
0
def sample_0():
    print("now loading model...")
    bert = bert_juman.BertWithJumanModel(DIR_PATH, use_cuda=True)
    print("loading done")

    v = bert.get_sentence_embedding("吾 輩は猫である。")
    print(v[:10])
    lines = lines[3:]
    return lines


def convert_to_embeddings(bert, lines):
    size = len(lines)
    vs = np.empty((size, params.DIM))
    for (i, line) in enumerate(lines):
        vs[i] = bert.get_sentence_embedding(
            line, pooling_strategy=params.POOLING_STRATEGY)
    return vs


if __name__ == "__main__":
    print("loading model...")
    bert = bert_juman.BertWithJumanModel(params.DIR_PATH, use_cuda=True)
    print("model loading done!")
    for (root, dirs, files) in os.walk(ROOT_DIR_PATH):
        for dir in dirs:
            # if dir == "smax" or dir == "sports-watch" or dir == "topic-news" or dir == "livedoor-homme" or dir == "movie-enter":
            #    continue

            dst_dir_path = os.path.join(DST_DIR_PATH, dir)
            if not os.path.isdir(dst_dir_path):
                os.makedirs(dst_dir_path)

            dir_path = os.path.join(root, dir)
            for i, line in enumerate(os.listdir(dir_path)):
                if dir in line:
                    path = os.path.join(root, dir, line)
                    lines = read_document(path)