class RemoveStopwordsTest(unittest.TestCase): def setUp(self): self.op = RemoveStopwords() def test_run(self): self.op = RemoveStopwords() self.test_data = [] self.assertEqual( self.op.run(self.test_data), ['quick,', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dogs'])
class RemoveStopwordsTest(unittest.TestCase): def setUp(self): self.op = RemoveStopwords() def test_run(self): self.op = RemoveStopwords() self.test_data = [] self.assertEqual(self.op.run(self.test_data), ['quick,','brown','fox','jumps','over','lazy','dogs'] )
class RemoveStopwordsTest(unittest.TestCase): def setUp(self): self.op = RemoveStopwords() def test_run(self): test_data = [ Corpus("0", "", "the quick brown fox jumps over the lazy dog") ] test_data = WordTokenizeWhitespacePunct().run(test_data) desired_results = {"0": "quick brown fox jumps lazy dog"} results = self.op.run(test_data) self.assertIsNotNone(results) for corpus in results: self.assertEqual(corpus.contents, desired_results[corpus.id])
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'pos_tag': return PosTag() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stem_lancaster': return StemmerLancaster() elif operation == 'stem_snowball': return StemmerSnowball() elif operation == 'tfidf': return Tfidf() elif operation == 'topic_model': return TopicModel() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'word_tokenize_spaces': return WordTokenizeSpaces() elif operation == 'word_tokenize_tabs': return WordTokenizeTabs() elif operation == 'nlp-pos': return StanfordCoreNLP(['pos']) elif operation == 'nlp-ner': return StanfordCoreNLP(['pos', 'ner']) elif operation == 'noop': return NoOp() else: raise TransactionException("The requested operation does not exist.")
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'removesilence': return RemoveSilence() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'removehashtags': return RemoveHashtags() elif operation == 'removequotes': return RemoveQuotes() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stop_words': return RemoveStopwords() elif operation == 'tfidf': return Tfidf() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'nlp-pos': return StanfordCoreNLP('pos') elif operation == 'nlp-ner': return StanfordCoreNLP('ner') elif operation == 'nlp-sentiment': return StanfordCoreNLP('sentiment') elif operation == 'nlp-coref': return StanfordCoreNLP('coref') elif operation == 'nlp-relation': return StanfordCoreNLP('relation') elif operation == 'splat-disfluency': print("YOU GOT SPLATTED") return SplatDisfluency() elif operation == 'splat-ngrams': print("YOU GOT SPLATTED") return SplatNGrams() elif operation == 'splat-complexity': print("YOU GOT SPLATTED") return SplatComplexity() elif operation == 'splat-pos': print("YOU GOT SPLATTED") return SplatPOSFrequencies() elif operation == 'splat-syllables': print("YOU GOT SPLATTED") return SplatSyllables() elif operation == 'splat-pronouns': print("YOU GOT SPLATTED") return SplatPronouns() elif operation == 'char-ngrams': return CharNgrams() elif operation == 'length-stats': return LengthStatistics() elif operation == 'topic-model-10': return TopicModel(10) elif operation == 'topic-model-30': return TopicModel(30) elif operation == 'word-vector': return WordVector() elif operation == 'unsup-morph': return UnsupervisedMorphology() elif operation == 'bigram-array': return BigramArray() elif operation == 'speech-token-stats': return SpeechTokenStatistics() elif operation == 'extract_transcript': return ExtractTranscript() elif operation == 'noop': return NoOp() else: raise TransactionException( f'The requested operation "{operation}" does not exist.')
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'pos_tag': return PosTag() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'removesilence': return RemoveSilence() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stop_words': return RemoveStopwords() elif operation == 'tfidf': return Tfidf() elif operation == 'topic_model': return TopicModel() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'word_tokenize_spaces': return WordTokenizeSpaces() elif operation == 'word_tokenize_tabs': return WordTokenizeTabs() elif operation == 'nlp-pos': return StanfordCoreNLP(['pos']) elif operation == 'nlp-ner': return StanfordCoreNLP(['pos', 'ner']) elif operation == 'nlp-sentiment': return StanfordCoreNLP(['parse', 'sentiment']) elif operation == 'nlp-parse': return StanfordCoreNLP(['parse']) elif operation == 'nlp-coref': return StanfordCoreNLP(['tokenize', 'ssplit', 'coref']) elif operation == 'nlp-relation': return StanfordCoreNLP(['parse', 'relation']) elif operation == 'splat-disfluency': print("YOU GOT SPLATTED") return SplatDisfluency() elif operation == 'splat-ngrams': print("YOU GOT SPLATTED") return SplatNGrams() elif operation == 'splat-complexity': print("YOU GOT SPLATTED") return SplatComplexity() elif operation == 'splat-pos': print("YOU GOT SPLATTED") return SplatPOSFrequencies() elif operation == 'splat-syllables': print("YOU GOT SPLATTED") return SplatSyllables() elif operation == 'splat-pronouns': print("YOU GOT SPLATTED") return SplatPronouns() elif operation == 'noop': return NoOp() else: raise TransactionException("The requested operation does not exist.")
def setUp(self): self.op = RemoveStopwords()
def test_run(self): self.op = RemoveStopwords() self.test_data = [] self.assertEqual( self.op.run(self.test_data), ['quick,', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dogs'])
def test_run(self): self.op = RemoveStopwords() self.test_data = [] self.assertEqual(self.op.run(self.test_data), ['quick,','brown','fox','jumps','over','lazy','dogs'] )