def tfidf_optimal(news):
    c = idf_optimal(news)
    wordlist = c.keys()
    tok = token__.tokenize(news)
    label = tok.keys()
    matrix = {}

    for t in label:
        matrix[t] = []
        for word in range(len(c)):
            matrix[t].append(0)

    for t in label:
        for i in tok[t]:
            if i in wordlist:
                matrix[t][wordlist.index(i)] = tok[t].count(i) * c[i]

    #normalization
    for i in matrix:
        normalize(matrix[i])
    with open('./tf.txt', 'w') as f:
        for i in matrix:
            f.write(str(matrix[i]) + '\n')
    f.close()
    return matrix
def idf(news):
    tok = token__.tokenize(news)
    text = []
    for i in tok:
        text += list(set(tok[i]))

    idf = {}
    for word in text:
        idf[word] = math.log(float(len(text)) / float(text.count(word))) / math.log(2)
    with open('./idf.txt', 'w') as f:
        f.writelines(str(idf))
    f.close()
    return idf