def create_wordpostuples(self, array): """ Create tokens and POS tags for tweets """ filename = self.TOPICFILE.split('.')[0] wordpos_filename = filename + "_wordpos.txt" readfromfile = self.debug if (readfromfile): try: self.tuples = helpers.read_from_file(wordpos_filename) except: print "! Error in reading from file. Redo posword tuples" readfromfile = False if (not readfromfile): self.startFrogServer('start') time.sleep(20) # Time for startup server frogclient = FrogClient('localhost',self.PORTNUMBER) print "** START frog analysis." print "** Creating POS tags.. (This may take a while)" for item in array: lemmapos_array = self.frog_tweets(frogclient, item) self.tuples.append(lemmapos_array) helpers.dump_to_file(wordpos_filename, self.tuples) self.startFrogServer('stop')
def __init__(self, mode, corpusfile, referencefile): """ Initialize tweets from files and dictionaries""" self.load_stopword_file() if '--debug' in mode: self.corpusfile_tweets = helpers.read_from_file("corpusfile_lda_testing.txt") self.referencefile_tweets = helpers.read_from_file("referencefile_lda_testing.txt") else: self.corpusfile_tweets = self.get_tweets(corpusfile) helpers.dump_to_file("corpusfile_lda_testing.txt", self.corpusfile_tweets) self.referencefile_tweets = self.get_tweets(referencefile) helpers.dump_to_file("referencefile_lda_testing.txt", self.referencefile_tweets) self.corpus = self.create_dictionary(self.corpusfile_tweets) self.referencecorpus = self.create_dictionary(self.referencefile_tweets) self.loglikelihood = self.calculate_loglikelihood(self.corpus, self.referencecorpus)
def load_classifier(self, filename): """ Load classifier and scaler from file and set as class variables""" (classifier, scaler) = helpers.read_from_file(filename) self.classifier = classifier self.scaler = scaler
def test_all(self): text = read_from_file('../input.txt') messages, customer = preprocess(text) suggestions = algorithm(PREPROCESSED_MESSAGES, CUSTOMER) postprocessed = postprocess(suggestions, customer) self.assertEqual(postprocessed, POSTPROCESSED_TEXT)