示例#1
0
	def create_wordpostuples(self, array):
		""" Create tokens and POS tags for tweets """
		filename = self.TOPICFILE.split('.')[0]
		wordpos_filename = filename + "_wordpos.txt"

		readfromfile = self.debug
		if (readfromfile):
			try:
				self.tuples = helpers.read_from_file(wordpos_filename)
			except: 
				print "! Error in reading from file. Redo posword tuples"
				readfromfile = False

		if (not readfromfile):
			self.startFrogServer('start')			
			time.sleep(20)							# Time for startup server
			frogclient = FrogClient('localhost',self.PORTNUMBER)
			print "** START frog analysis."
			print "** Creating POS tags.. (This may take a while)"
			for item in array:
				lemmapos_array = self.frog_tweets(frogclient, item)
				self.tuples.append(lemmapos_array)	
		
			helpers.dump_to_file(wordpos_filename, self.tuples)
			self.startFrogServer('stop')			
	def __init__(self, mode, corpusfile, referencefile):
		""" Initialize tweets from files and dictionaries"""
		self.load_stopword_file()
		if '--debug' in mode:
			self.corpusfile_tweets = helpers.read_from_file("corpusfile_lda_testing.txt")
			self.referencefile_tweets = helpers.read_from_file("referencefile_lda_testing.txt")
		else:
			self.corpusfile_tweets = self.get_tweets(corpusfile)
			helpers.dump_to_file("corpusfile_lda_testing.txt", self.corpusfile_tweets)
			self.referencefile_tweets = self.get_tweets(referencefile)
			helpers.dump_to_file("referencefile_lda_testing.txt", self.referencefile_tweets)

		self.corpus = self.create_dictionary(self.corpusfile_tweets)
		self.referencecorpus = self.create_dictionary(self.referencefile_tweets)

		self.loglikelihood = self.calculate_loglikelihood(self.corpus, self.referencecorpus)
示例#3
0
	def load_classifier(self, filename):
		""" Load classifier and scaler from file and set as class variables"""
		(classifier, scaler) = helpers.read_from_file(filename)
		self.classifier = classifier
		self.scaler = scaler
示例#4
0
 def test_all(self):
     text = read_from_file('../input.txt')
     messages, customer = preprocess(text)
     suggestions = algorithm(PREPROCESSED_MESSAGES, CUSTOMER)
     postprocessed = postprocess(suggestions, customer)
     self.assertEqual(postprocessed, POSTPROCESSED_TEXT)