def setUp(self): self.user = User.objects.create_user(username='******', password='******', email='*****@*****.**') TextFile(user=self.user, file=None).save() self.text_file = TextFile.objects.last() CorpusItem(title='test_ci', text_file=self.text_file, user=self.user).save() self.corpus_item = CorpusItem.objects.last() f_path = abspath("tests/core/parse_response.json") with open(f_path) as json_file: json_event = json_file.read() parsed_text = json.loads(json_event) sentences = parsed_text['sentences'] # deal with bulk save words_to_save = [] for sentence in sentences: handler = SentenceHandler(sentence, self.corpus_item) words_to_save = words_to_save + handler.process_sentence() # buld save the words WordToken.objects.bulk_create(words_to_save) CorpusItemCollection(user=self.user, title='test').save() self.collection = CorpusItemCollection.objects.last() self.collection.corpus_items.add(self.corpus_item)
def setUp(self): self.user = User.objects.create_user(username='******', password='******', email='*****@*****.**') self.line_file = create_doc_with_x_lines(100) TextFile(user=self.user, file=SimpleUploadedFile('best_file_eva.txt', str([str(num) + '\n' for num in xrange(100)]))).save() self.text_file = TextFile.objects.last() sentences = "I am a pony. I am a frong. I am a dog. I go to the zoo." TextFile(user=self.user, file=SimpleUploadedFile('best_file.txt', sentences)).save() self.sentence_file = TextFile.objects.last() CorpusItem(title='test_ci', text_file=self.text_file, user=self.user).save() self.corpus_item = CorpusItem.objects.last() f_path = abspath("tests/core/parse_response.json") self.parse_lock_path = abspath("tests/core/parse_locked.txt") with open(f_path) as json_file: json_event = json_file.read() parsed_text = json.loads(json_event) sentences = parsed_text['sentences'] # deal with bulk save words_to_save = [] for sentence in sentences: handler = SentenceHandler(sentence, self.corpus_item) words_to_save = words_to_save + handler.process_sentence() # buld save the words WordToken.objects.bulk_create(words_to_save) CorpusItemCollection(user=self.user, title='test').save() self.collection = CorpusItemCollection.objects.last() self.collection.corpus_items.add(self.corpus_item) settings.DEFAULT_FILTER = {"name": "bob", "filter_data": { "lemma": True, "ner": False, "pos": ['CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNS', 'NNP', 'NNPS', 'PDT', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB'], "stopwords": ( "I,i,me,my,myself,we,our,ours,ourselves,you,your,yours,yourself,yourselves,he,him,his," "himself,she,her,hers,herself,it,its,itself,they,them,their,theirs,themselves,what," "which,who,whom,this,that,these,those,am,is,are,was,were,be,been,being,have,has,had," "having,do,does,did ,doing,a,an,the,and,but,if,or,because,as,until,while,of,at,by," "for,with,about,against,between,into,through,during,before,after,above,below,to," "from,up,down,in,out,on,off,over,under,again,further,then,once,here,there,when," "where,why,how,all,any,both,each,few,more,most,other,some,such,no,nor,not,only,own," "same,so,than,too,very,s,t,can,will,just,don,should,now") } } self.filter = settings.DEFAULT_FILTER
def setUp(self): self.user = User.objects.create_user(username='******', password='******', email='*****@*****.**') self.line_file = create_doc_with_x_lines(100) TextFile(user=self.user, file=SimpleUploadedFile( 'best_file_eva.txt', str([str(num) + '\n' for num in xrange(100)]))).save() self.text_file = TextFile.objects.last() sentences = "I am a pony. I am a frong. I am a dog. I go to the zoo." TextFile(user=self.user, file=SimpleUploadedFile('best_file.txt', sentences)).save() self.sentence_file = TextFile.objects.last() CorpusItem(title='test_ci', text_file=self.text_file, user=self.user).save() self.corpus_item = CorpusItem.objects.last() f_path = abspath("tests/core/parse_response.json") self.parse_lock_path = abspath("tests/core/parse_locked.txt") with open(f_path) as json_file: json_event = json_file.read() parsed_text = json.loads(json_event) sentences = parsed_text['sentences'] # deal with bulk save words_to_save = [] for sentence in sentences: handler = SentenceHandler(sentence, self.corpus_item) words_to_save = words_to_save + handler.process_sentence() # buld save the words WordToken.objects.bulk_create(words_to_save) CorpusItemCollection(user=self.user, title='test').save() self.collection = CorpusItemCollection.objects.last() self.collection.corpus_items.add(self.corpus_item) settings.DEFAULT_FILTER = { "name": "bob", "filter_data": { "lemma": True, "ner": False, "pos": [ 'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNS', 'NNP', 'NNPS', 'PDT', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB' ], "stopwords": ("I,i,me,my,myself,we,our,ours,ourselves,you,your,yours,yourself,yourselves,he,him,his," "himself,she,her,hers,herself,it,its,itself,they,them,their,theirs,themselves,what," "which,who,whom,this,that,these,those,am,is,are,was,were,be,been,being,have,has,had," "having,do,does,did ,doing,a,an,the,and,but,if,or,because,as,until,while,of,at,by," "for,with,about,against,between,into,through,during,before,after,above,below,to," "from,up,down,in,out,on,off,over,under,again,further,then,once,here,there,when," "where,why,how,all,any,both,each,few,more,most,other,some,such,no,nor,not,only,own," "same,so,than,too,very,s,t,can,will,just,don,should,now") } } self.filter = settings.DEFAULT_FILTER