示例#1
0
print 'Computing TFIDF representations of documents in the corpus'
TFIDFs = TFIDF.compute_all_TFIDFs (inverted_index, idf_threshold)

# Print all cosine similarities between documents
'''
similarities = [[TFIDF.cosine_similarity(d1,d2) for d2 in TFIDFs] for d1 in TFIDFs]
print similarities
exit()
'''

# Given a query, compute its TFIDF representation
print 'Computing query\'s TFIDF representation'

query = 'business became meaningful'
query_terms = parser.extract_terms (query)
q_TFIDF = TFIDF.compute_new_TFIDF (query_terms, inverted_index, idf_threshold)

# Warn if the query is empty due to a high IDF threshold
if len(q_TFIDF) == 0:
	print '*** WARNING *** Empty query, IDf threshold too high'

'''
print q_TFIDF
exit()
'''

######################################################################

print 'Computing set representations of documents in the corpus'
sets = jaccard.compute_all_sets (inverted_index, idf_threshold)
示例#2
0
###############################################################################

print 'Computing TFIDF representations...'
TFIDFs = TFIDF.compute_all_TFIDFs(inverted_index, idf_threshold)
#print "TFIDF  " + str(TFIDFs)

###############################################################################

target_index = None

if search_text:
    # Given a search_text, compute its TFIDF representation
    print 'Computing search text\'s TFIDF representation...'
    search_terms = parser.extract_terms(search_text)
    searched_TFIDF = TFIDF.compute_new_TFIDF(search_terms, inverted_index, idf_threshold)

    # Warn if the search_text is empty due to an high IDF threshold
    if len(searched_TFIDF) == 0:
        print '*** WARNING *** Empty search, IDF threshold is too high!'
else:
    # Otherwise, ask for a target item:
    asin = raw_input("Please enter an item ASIN (e.g. 1603112251): ")

    print 'Find TFIDF representation...'
    target_index = next((id for id, code in items.items() if code == asin), None)

    # Error if the targeted ASIN is not found
    if target_index == None:
        sys.exit('*** FATAL *** Item not found!')
    else: