def classifiy_wiki_article(search_str, tfidfs_per_doc, idfs, lem_flag=False):
	'''
	Get an article from Wikipedia and cllassify it against the provided data
	TODO: not working...
	'''
	base_wiki = 'http://en.wikipedia.org/wiki/'
	wiki_url = base_wiki+search_str.replace(' ', '_')

	ar_text = \
		get_wiki.get_specific_wikipedia_article(wiki_url, markup=False)
	print ar_text	
	article_words = str_corpus_cleaner.get_clean_terms(ar_text, lem_flag)

	return classify_article_words(article_words, tfidfs_per_doc, idfs)
def classify_article_file(article_path, tfidfs_per_doc, idfs, lem_flag=False):
	'''
	classify a single article.
	-Return: matched category and similarity scores for all categories. 
	'''
	st_time = time.time()

	ar_text = codecs.open(article_path, 'rU').read()
	article_words = str_corpus_cleaner.get_clean_terms(ar_text, lem_flag)

	# Classify article
	match, all_scores = classify_article_words( \
		article_words, tfidfs_per_doc, idfs)
	match = match.split('.')[0] # remove file extension, if any

	#print "%s\t%s\t%.3e\t%.3f sec" % \
	#	( article_path.split('/')[-1].replace('.txt',''), \
	#	match[0], match[1], time.time()-st_time )

	return match, all_scores