Пример #1
0
print document.description  # Document info.
print document.model  # The parent Model, or None.
print document.features  # List of words from Document.words.keys().
print document.words  # Dictionary of (word, count)-items (read-only).
print document.wordcount  # Total word count.
print document.vector  # Cached Vector (read-only dict).
print document.tf(
    'conclude')  # returns the frequency of a word as a number between 0.0-1.0.
print document.tfidf(
    'conclude'
)  # returns the word's relevancy as tf-idf. Note: simply yields tf if model is None.
print document.keywords(
    top=10, normalized=True
)  # returns a sorted list of (weight, word)-tuples. With normalized=True
# the weights will be between 0.0-1.0 (their sum is 1.0).
print document.copy()
# document vector
v1 = Vector({"curiosity": 1, "kill": 1, "cat": 1})
v2 = Vector({"curiosity": 1, "explore": 1, "mars": 1})
print 1 - distance(v1, v2)
# model
d1 = Document('A tiger is a big yellow cat with stripes.', type='tiger')
d2 = Document(
    'A lion is a big yellow cat with manes.',
    type='lion',
)
d3 = Document('An elephant is a big grey animal with a slurf.',
              type='elephant')
print d1.vector
m = Model(documents=[d1, d2, d3], weight=TFIDF)
print d1.vector