print 'Computing TFIDF representations of documents in the corpus' TFIDFs = TFIDF.compute_all_TFIDFs (inverted_index, idf_threshold) # Print all cosine similarities between documents ''' similarities = [[TFIDF.cosine_similarity(d1,d2) for d2 in TFIDFs] for d1 in TFIDFs] print similarities exit() ''' # Given a query, compute its TFIDF representation print 'Computing query\'s TFIDF representation' query = 'business became meaningful' query_terms = parser.extract_terms (query) q_TFIDF = TFIDF.compute_new_TFIDF (query_terms, inverted_index, idf_threshold) # Warn if the query is empty due to a high IDF threshold if len(q_TFIDF) == 0: print '*** WARNING *** Empty query, IDf threshold too high' ''' print q_TFIDF exit() ''' ###################################################################### print 'Computing set representations of documents in the corpus' sets = jaccard.compute_all_sets (inverted_index, idf_threshold)
############################################################################### print 'Computing TFIDF representations...' TFIDFs = TFIDF.compute_all_TFIDFs(inverted_index, idf_threshold) #print "TFIDF " + str(TFIDFs) ############################################################################### target_index = None if search_text: # Given a search_text, compute its TFIDF representation print 'Computing search text\'s TFIDF representation...' search_terms = parser.extract_terms(search_text) searched_TFIDF = TFIDF.compute_new_TFIDF(search_terms, inverted_index, idf_threshold) # Warn if the search_text is empty due to an high IDF threshold if len(searched_TFIDF) == 0: print '*** WARNING *** Empty search, IDF threshold is too high!' else: # Otherwise, ask for a target item: asin = raw_input("Please enter an item ASIN (e.g. 1603112251): ") print 'Find TFIDF representation...' target_index = next((id for id, code in items.items() if code == asin), None) # Error if the targeted ASIN is not found if target_index == None: sys.exit('*** FATAL *** Item not found!') else: