def comment_to_word_vectors(comment):
    words = list()
    sentence_words = jieba_utils.cut(comment)
    for word in sentence_words:
        if word in word_vector_model.wv:
            words.append(word_vector_model.wv[word])
    return words
def test(comment):
    sentence_words = jieba_utils.cut(comment)
    print(sentence_words)
    words = list()
    for word in sentence_words:
        if word in word_vector_model.wv:
            words.append(word_vector_model.wv[word])
        else:
            print(word, ' 沒有在模組中')
def comment_to_indices(comment, word_to_index):
    indices = []
    size = len(word_to_index)
    words = jieba_utils.cut(comment)
    print('Comment: {}\n Terms: {}'.format(comment, ','.join(words)))
    for word in words:
        if word in word_to_index:
            indices.append(word_to_index[word])
        else:
            indices.append(size)  # other
    return indices
def comment_to_one_hot(comment, word_index_dict):
    data = list()
    words = jieba_utils.cut(comment)
    for word in words:
        data_one_hot = np.zeros(len(word_index_dict) + 1)
        if word in word_index_dict:
            index = word_index_dict[word]
            data_one_hot[index] = 1
        else:
            data_one_hot[-1] = 1
        data.append(data_one_hot.tolist())
    return data
def build_up_word_list(comments, word_list_file_name=None, output_file_name=None):
    if word_list_file_name:
        print('Loading word list from file ' + word_list_file_name)
        with open(word_list_file_name, 'r', encoding='utf-8') as fr:
            word_list = [line.strip() for line in fr.readlines() if len(line.strip()) != 0]
    else:
        wordset = set()
        print('No word list file specified, creating new word list.')
        for comment in comments:
            words = jieba_utils.cut(comment)
            wordset.update(words)
        word_list = list(wordset)

    if output_file_name:
        with open(output_file_name, 'w+', encoding='utf-8') as fw:
            for word in word_list:
                fw.write(word + '\n')
            print('Word list saved to ' + output_file_name + '.')

    return word_list
def comment_to_n_of_words(comment):
    return jieba_utils.cut(comment)