示例#1
0
    def setUp(self):
        self.user = User.objects.create_user(username='******', password='******', email='*****@*****.**')
        TextFile(user=self.user, file=None).save()
        self.text_file = TextFile.objects.last()
        CorpusItem(title='test_ci', text_file=self.text_file, user=self.user).save()
        self.corpus_item = CorpusItem.objects.last()

        f_path = abspath("tests/core/parse_response.json")

        with open(f_path) as json_file:
            json_event = json_file.read()

        parsed_text = json.loads(json_event)
        sentences = parsed_text['sentences']

        # deal with bulk save
        words_to_save = []

        for sentence in sentences:
            handler = SentenceHandler(sentence, self.corpus_item)
            words_to_save = words_to_save + handler.process_sentence()
        # buld save the words
        WordToken.objects.bulk_create(words_to_save)

        CorpusItemCollection(user=self.user, title='test').save()
        self.collection = CorpusItemCollection.objects.last()
        self.collection.corpus_items.add(self.corpus_item)
示例#2
0
    def setUp(self):
        self.user = User.objects.create_user(username='******',
                                             password='******',
                                             email='*****@*****.**')
        TextFile(user=self.user, file=None).save()
        self.text_file = TextFile.objects.last()
        CorpusItem(title='test_ci', text_file=self.text_file,
                   user=self.user).save()
        self.corpus_item = CorpusItem.objects.last()

        f_path = abspath("tests/core/parse_response.json")

        with open(f_path) as json_file:
            json_event = json_file.read()

        parsed_text = json.loads(json_event)
        sentences = parsed_text['sentences']

        # deal with bulk save
        words_to_save = []

        for sentence in sentences:
            handler = SentenceHandler(sentence, self.corpus_item)
            words_to_save = words_to_save + handler.process_sentence()
        # buld save the words
        WordToken.objects.bulk_create(words_to_save)

        CorpusItemCollection(user=self.user, title='test').save()
        self.collection = CorpusItemCollection.objects.last()
        self.collection.corpus_items.add(self.corpus_item)
示例#3
0
 def test_that_sentence_can_be_rebuilt(self):
     handler = SentenceHandler(self.test_sentence, self.corpus_item)
     handler.create_sentence()
     handler.save_word_tokens()
     sentences = Sentence.objects.all()
     words = WordToken.objects.filter(sentence=sentences[0])
     print words[0]
     sentence = rebuild_sentence_from_tokens(words)
     self.assertEqual(sentence, 'I had long been familiar with the area around the Boulevard Ornano.')
示例#4
0
    def setUp(self):
        self.user = User.objects.create_user(username='******', password='******', email='*****@*****.**')
        self.line_file = create_doc_with_x_lines(100)
        TextFile(user=self.user, file=SimpleUploadedFile('best_file_eva.txt', str([str(num) + '\n' for num in xrange(100)]))).save()
        self.text_file = TextFile.objects.last()
        sentences = "I am a pony. I am a frong. I am a dog.  I go to the zoo."
        TextFile(user=self.user, file=SimpleUploadedFile('best_file.txt', sentences)).save()
        self.sentence_file = TextFile.objects.last()
        CorpusItem(title='test_ci', text_file=self.text_file, user=self.user).save()
        self.corpus_item = CorpusItem.objects.last()

        f_path = abspath("tests/core/parse_response.json")
        self.parse_lock_path = abspath("tests/core/parse_locked.txt")

        with open(f_path) as json_file:
            json_event = json_file.read()

        parsed_text = json.loads(json_event)
        sentences = parsed_text['sentences']

        # deal with bulk save
        words_to_save = []

        for sentence in sentences:
            handler = SentenceHandler(sentence, self.corpus_item)
            words_to_save = words_to_save + handler.process_sentence()
        # buld save the words
        WordToken.objects.bulk_create(words_to_save)

        CorpusItemCollection(user=self.user, title='test').save()
        self.collection = CorpusItemCollection.objects.last()
        self.collection.corpus_items.add(self.corpus_item)

        settings.DEFAULT_FILTER = {"name": "bob",
                  "filter_data": {
                      "lemma": True,
                      "ner": False,
                      "pos": ['CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS',
                              'MD', 'NN', 'NNS', 'NNP', 'NNPS', 'PDT', 'PDT', 'POS', 'PRP',
                              'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH', 'VB', 'VBD',
                              'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB'],
                      "stopwords": (
                      "I,i,me,my,myself,we,our,ours,ourselves,you,your,yours,yourself,yourselves,he,him,his,"
                      "himself,she,her,hers,herself,it,its,itself,they,them,their,theirs,themselves,what,"
                      "which,who,whom,this,that,these,those,am,is,are,was,were,be,been,being,have,has,had,"
                      "having,do,does,did ,doing,a,an,the,and,but,if,or,because,as,until,while,of,at,by,"
                      "for,with,about,against,between,into,through,during,before,after,above,below,to,"
                      "from,up,down,in,out,on,off,over,under,again,further,then,once,here,there,when,"
                      "where,why,how,all,any,both,each,few,more,most,other,some,such,no,nor,not,only,own,"
                      "same,so,than,too,very,s,t,can,will,just,don,should,now")
                  }
                  }
        self.filter = settings.DEFAULT_FILTER
示例#5
0
 def test_that_sentence_can_be_rebuilt(self):
     handler = SentenceHandler(self.test_sentence, self.corpus_item)
     handler.create_sentence()
     handler.save_word_tokens()
     sentences = Sentence.objects.all()
     words = WordToken.objects.filter(sentence=sentences[0])
     print words[0]
     sentence = rebuild_sentence_from_tokens(words)
     self.assertEqual(
         sentence,
         'I had long been familiar with the area around the Boulevard Ornano.'
     )
示例#6
0
 def test_that_handler_is_saving_dependency_parse(self):
     handler = SentenceHandler(self.test_sentence, self.corpus_item)
     handler.create_sentence()
     handler.save_sentence_dependecy_parses("basic-dependencies")
     sd = SentenceDependency.objects.all()
     self.assertEqual(sd.count(), 13)
示例#7
0
 def test_that_handler_is_saving_tokens(self):
     handler = SentenceHandler(self.test_sentence, self.corpus_item)
     handler.create_sentence()
     handler.save_word_tokens()
     wd = WordToken.objects.all()
     self.assertEqual(wd.count(), 13)
示例#8
0
 def test_sentence_handler_create_sentence(self):
     handler = SentenceHandler(self.test_sentence, self.corpus_item)
     handler.create_sentence()
     sentences = Sentence.objects.all()
     self.assertEqual(sentences.count(), 1)
示例#9
0
    def setUp(self):
        self.user = User.objects.create_user(username='******',
                                             password='******',
                                             email='*****@*****.**')
        self.line_file = create_doc_with_x_lines(100)
        TextFile(user=self.user,
                 file=SimpleUploadedFile(
                     'best_file_eva.txt',
                     str([str(num) + '\n' for num in xrange(100)]))).save()
        self.text_file = TextFile.objects.last()
        sentences = "I am a pony. I am a frong. I am a dog.  I go to the zoo."
        TextFile(user=self.user,
                 file=SimpleUploadedFile('best_file.txt', sentences)).save()
        self.sentence_file = TextFile.objects.last()
        CorpusItem(title='test_ci', text_file=self.text_file,
                   user=self.user).save()
        self.corpus_item = CorpusItem.objects.last()

        f_path = abspath("tests/core/parse_response.json")
        self.parse_lock_path = abspath("tests/core/parse_locked.txt")

        with open(f_path) as json_file:
            json_event = json_file.read()

        parsed_text = json.loads(json_event)
        sentences = parsed_text['sentences']

        # deal with bulk save
        words_to_save = []

        for sentence in sentences:
            handler = SentenceHandler(sentence, self.corpus_item)
            words_to_save = words_to_save + handler.process_sentence()
        # buld save the words
        WordToken.objects.bulk_create(words_to_save)

        CorpusItemCollection(user=self.user, title='test').save()
        self.collection = CorpusItemCollection.objects.last()
        self.collection.corpus_items.add(self.corpus_item)

        settings.DEFAULT_FILTER = {
            "name": "bob",
            "filter_data": {
                "lemma":
                True,
                "ner":
                False,
                "pos": [
                    'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS',
                    'LS', 'MD', 'NN', 'NNS', 'NNP', 'NNPS', 'PDT', 'PDT',
                    'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM',
                    'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT',
                    'WP', 'WP$', 'WRB'
                ],
                "stopwords":
                ("I,i,me,my,myself,we,our,ours,ourselves,you,your,yours,yourself,yourselves,he,him,his,"
                 "himself,she,her,hers,herself,it,its,itself,they,them,their,theirs,themselves,what,"
                 "which,who,whom,this,that,these,those,am,is,are,was,were,be,been,being,have,has,had,"
                 "having,do,does,did ,doing,a,an,the,and,but,if,or,because,as,until,while,of,at,by,"
                 "for,with,about,against,between,into,through,during,before,after,above,below,to,"
                 "from,up,down,in,out,on,off,over,under,again,further,then,once,here,there,when,"
                 "where,why,how,all,any,both,each,few,more,most,other,some,such,no,nor,not,only,own,"
                 "same,so,than,too,very,s,t,can,will,just,don,should,now")
            }
        }
        self.filter = settings.DEFAULT_FILTER
示例#10
0
 def test_that_handler_is_saving_dependency_parse(self):
     handler = SentenceHandler(self.test_sentence, self.corpus_item)
     handler.create_sentence()
     handler.save_sentence_dependecy_parses("basic-dependencies")
     sd = SentenceDependency.objects.all()
     self.assertEqual(sd.count(), 13)
示例#11
0
 def test_that_handler_is_saving_tokens(self):
     handler = SentenceHandler(self.test_sentence, self.corpus_item)
     handler.create_sentence()
     handler.save_word_tokens()
     wd = WordToken.objects.all()
     self.assertEqual(wd.count(), 13)
示例#12
0
 def test_sentence_handler_create_sentence(self):
     handler = SentenceHandler(self.test_sentence, self.corpus_item)
     handler.create_sentence()
     sentences = Sentence.objects.all()
     self.assertEqual(sentences.count(), 1)