print document.description # Document info. print document.model # The parent Model, or None. print document.features # List of words from Document.words.keys(). print document.words # Dictionary of (word, count)-items (read-only). print document.wordcount # Total word count. print document.vector # Cached Vector (read-only dict). print document.tf( 'conclude') # returns the frequency of a word as a number between 0.0-1.0. print document.tfidf( 'conclude' ) # returns the word's relevancy as tf-idf. Note: simply yields tf if model is None. print document.keywords( top=10, normalized=True ) # returns a sorted list of (weight, word)-tuples. With normalized=True # the weights will be between 0.0-1.0 (their sum is 1.0). print document.copy() # document vector v1 = Vector({"curiosity": 1, "kill": 1, "cat": 1}) v2 = Vector({"curiosity": 1, "explore": 1, "mars": 1}) print 1 - distance(v1, v2) # model d1 = Document('A tiger is a big yellow cat with stripes.', type='tiger') d2 = Document( 'A lion is a big yellow cat with manes.', type='lion', ) d3 = Document('An elephant is a big grey animal with a slurf.', type='elephant') print d1.vector m = Model(documents=[d1, d2, d3], weight=TFIDF) print d1.vector