class RemoveCapsTest(unittest.TestCase): def setUp(self): self.op = RemoveCapsGreedy() def test_run_greedy(self): self.op = RemoveCapsGreedy() self.test_data = '''Removes all non-proper-noun capitals from a given text. Removes capital letters from text, even for Bill Clinton. Accepts as input a non-tokenized string.''' self.assertEqual(self.op.run(self.test_data), '''removes all non-proper-noun capitals from a given text. removes capital letters from text, even for bill clinton. accepts as input a non-tokenized string.''') def test_run_preserve_nnp(self): self.op = RemoveCapsPreserveNNP() self.test_data = '''Removes all non-proper-noun capitals from a given text. Removes capital letters from text, even for Bill Clinton. Accepts as input a non-tokenized string.''' self.assertEqual(self.op.run(self.test_data), '''removes all non-proper-noun capitals from a given text. removes capital letters from text, even for Bill Clinton. accepts as input a non-tokenized string.''')
def test_run_preserve_nnp(self): self.op = RemoveCapsPreserveNNP() test_data = [ Corpus( "0", "", "Removes all non-proper-noun capitals from a given text. Removes capital letters from text, even for Bill Clinton. Accepts as input a non-tokenized string." ) ] desired_results = { "0": "removes all non-proper-noun capitals from a given text. removes capital letters from text, even for Bill Clinton. accepts as input a non-tokenized string." } results = self.op.run(test_data) self.assertIsNotNone(results) for corpus in results: self.assertEqual(corpus.contents, desired_results[corpus.id])
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'pos_tag': return PosTag() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stem_lancaster': return StemmerLancaster() elif operation == 'stem_snowball': return StemmerSnowball() elif operation == 'tfidf': return Tfidf() elif operation == 'topic_model': return TopicModel() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'word_tokenize_spaces': return WordTokenizeSpaces() elif operation == 'word_tokenize_tabs': return WordTokenizeTabs() elif operation == 'nlp-pos': return StanfordCoreNLP(['pos']) elif operation == 'nlp-ner': return StanfordCoreNLP(['pos', 'ner']) elif operation == 'noop': return NoOp() else: raise TransactionException("The requested operation does not exist.")
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'removesilence': return RemoveSilence() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'removehashtags': return RemoveHashtags() elif operation == 'removequotes': return RemoveQuotes() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stop_words': return RemoveStopwords() elif operation == 'tfidf': return Tfidf() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'nlp-pos': return StanfordCoreNLP('pos') elif operation == 'nlp-ner': return StanfordCoreNLP('ner') elif operation == 'nlp-sentiment': return StanfordCoreNLP('sentiment') elif operation == 'nlp-coref': return StanfordCoreNLP('coref') elif operation == 'nlp-relation': return StanfordCoreNLP('relation') elif operation == 'splat-disfluency': print("YOU GOT SPLATTED") return SplatDisfluency() elif operation == 'splat-ngrams': print("YOU GOT SPLATTED") return SplatNGrams() elif operation == 'splat-complexity': print("YOU GOT SPLATTED") return SplatComplexity() elif operation == 'splat-pos': print("YOU GOT SPLATTED") return SplatPOSFrequencies() elif operation == 'splat-syllables': print("YOU GOT SPLATTED") return SplatSyllables() elif operation == 'splat-pronouns': print("YOU GOT SPLATTED") return SplatPronouns() elif operation == 'char-ngrams': return CharNgrams() elif operation == 'length-stats': return LengthStatistics() elif operation == 'topic-model-10': return TopicModel(10) elif operation == 'topic-model-30': return TopicModel(30) elif operation == 'word-vector': return WordVector() elif operation == 'unsup-morph': return UnsupervisedMorphology() elif operation == 'bigram-array': return BigramArray() elif operation == 'speech-token-stats': return SpeechTokenStatistics() elif operation == 'extract_transcript': return ExtractTranscript() elif operation == 'noop': return NoOp() else: raise TransactionException( f'The requested operation "{operation}" does not exist.')
def test_run_greedy(self): self.op = RemoveCapsGreedy() self.test_data = '''Removes all non-proper-noun capitals from a given text. Removes capital letters from text, even for Bill Clinton. Accepts as input a non-tokenized string.''' self.assertEqual(self.op.run(self.test_data), '''removes all non-proper-noun capitals from a given text. removes capital letters from text, even for bill clinton. accepts as input a non-tokenized string.''')
def setUp(self): self.op = RemoveCapsGreedy()
def get_operation_handler(operation): if operation == 'lemmatize_wordnet': return LemmatizerWordNet() elif operation == 'pos_tag': return PosTag() elif operation == 'removecapsgreedy': return RemoveCapsGreedy() elif operation == 'removecapsnnp': return RemoveCapsPreserveNNP() elif operation == 'removepunct': return RemovePunct() elif operation == 'removesilence': return RemoveSilence() elif operation == 'remove_stopwords': return RemoveStopwords() elif operation == 'sentence_tokenize': return SentenceTokenize() elif operation == 'stem_porter': return StemmerPorter() elif operation == 'stop_words': return RemoveStopwords() elif operation == 'tfidf': return Tfidf() elif operation == 'topic_model': return TopicModel() elif operation == 'wordcloudop': return WordCloudOp() elif operation == 'word_tokenize_treebank': return WordTokenizeTreebank() elif operation == 'word_tokenize_whitespace_punct': return WordTokenizeWhitespacePunct() elif operation == 'word_tokenize_stanford': return WordTokenizeStanford() elif operation == 'word_tokenize_spaces': return WordTokenizeSpaces() elif operation == 'word_tokenize_tabs': return WordTokenizeTabs() elif operation == 'nlp-pos': return StanfordCoreNLP(['pos']) elif operation == 'nlp-ner': return StanfordCoreNLP(['pos', 'ner']) elif operation == 'nlp-sentiment': return StanfordCoreNLP(['parse', 'sentiment']) elif operation == 'nlp-parse': return StanfordCoreNLP(['parse']) elif operation == 'nlp-coref': return StanfordCoreNLP(['tokenize', 'ssplit', 'coref']) elif operation == 'nlp-relation': return StanfordCoreNLP(['parse', 'relation']) elif operation == 'splat-disfluency': print("YOU GOT SPLATTED") return SplatDisfluency() elif operation == 'splat-ngrams': print("YOU GOT SPLATTED") return SplatNGrams() elif operation == 'splat-complexity': print("YOU GOT SPLATTED") return SplatComplexity() elif operation == 'splat-pos': print("YOU GOT SPLATTED") return SplatPOSFrequencies() elif operation == 'splat-syllables': print("YOU GOT SPLATTED") return SplatSyllables() elif operation == 'splat-pronouns': print("YOU GOT SPLATTED") return SplatPronouns() elif operation == 'noop': return NoOp() else: raise TransactionException("The requested operation does not exist.")