예제 #1
0
def test_word_guess_category():
    """
    Should guess and set the word category
    """
    word = Word()
    word.word = 'word'
    word.save()

    assert word.category == 'word'

    word = Word()
    word.word = 'come up with'
    word.save()

    assert word.category == 'phrasal_verb'

    word = Word()
    word.word = 'get over'
    word.save()

    assert word.category == 'phrasal_verb'

    word = Word()
    word.word = 'to put it mildly'
    word.save()

    assert word.category == 'phrase'
예제 #2
0
    def test_saving_and_retrieving_items(self):
        word_ = Word()
        word_.save()
        first_item = Item()
        first_item.text = 'kaizen'
        first_item.word = word_
        first_item.save()

        second_item = Item()
        second_item.text = 'genki'
        second_item.word = word_
        second_item.save()

        saved_words = Word.objects.first()
        self.assertEqual(saved_words, word_)

        saved_items = Item.objects.all()
        self.assertEqual(2, saved_items.count())

        first_saved_item = saved_items[0]
        second_saved_item = saved_items[1]

        self.assertEqual(first_saved_item.text, 'kaizen')
        self.assertEqual(second_saved_item.text, 'genki')
        self.assertEqual(first_saved_item.word, word_)
        self.assertEqual(second_saved_item.word, word_)
예제 #3
0
    def handle(self, *args, **options):
        Word.objects.all().delete()

        files_list = [
            f for f in listdir(settings.STATIC_AUDIO)
            if isfile(join(settings.STATIC_AUDIO, f))
        ]
        for file_path in files_list:
            new_word = Word(
                word=file_path.split('.')[0],
                transcription='[...]',
                audio_path=os.path.join(settings.STATIC_AUDIO, file_path),
            )
            new_word.save()
예제 #4
0
    def handle(self, *args, **options):
        with open(path.join(path.dirname(path.abspath(__file__)), 'lexicon'), 'r') as f:
            contents = f.readlines()[1:]
            contents = map(lambda x: x.split("\t"), contents)
            contents = map(lambda x: x + [''] * (10 - len(x)), contents)

            words = map(lambda x: Word(
                word=x[0],
                prefix=x[1],
                stem=x[2],
                tone=x[3],
                POS=x[4],
                word_class=x[5],
                gloss=x[6],
                no=int(x[7]),
                note=x[8],
                dialect=x[9]
            ), contents)
            Word.objects.bulk_create(words)
예제 #5
0
    words = {}

    for row in reader:
        word = dict(text=row['Word'], language=english, frequency=int(row['Frequency']))
        if word['text'] not in words:
            words[word['text']] = word
        else:
            words[word['text']]['frequency'] += int(word['frequency'])

    words = words.values()
    words = sorted(words, key=lambda w: w['frequency'], reverse=True)
    existing_words_objects = {word.text: word for word in Word.objects.all()}
    existing_words_text = [word.text for word in existing_words_objects.values()]
    for word in words:
        if word['text'] not in existing_words_text:
            word['word'] = Word(text=word['text'])
            word['word'].save()
        else:
            word['word'] = existing_words_objects[word['text']]

    raw_words = [word['word'] for word in words if word['text'] not in existing_words_text]
    for i in range(len(words)):
        words[i]['rank'] = i + 1
    for word in words:
        print(word['rank'], word['text'], word['frequency'])

    word_entries = []

    for word in words:
        word_entry = WordEntry(word=word['word'], language=english, frequency=word['frequency'], rank=word['rank'])
        word_entries.append(word_entry)
예제 #6
0
def _save_data_to_db(word, spelling, raw_json):
    new_word = Word()
    new_word.value = word
    new_word.spelling = spelling
    new_word.raw_od_article = raw_json
    new_word.save()
예제 #7
0
        line = f.readline()  # 每次读取一行
        lst = line.split()  # 切分成列表
        size = len(lst)
        if not line:
            break
        # 至少应含有两项:名称和释义
        # 找到第一个释义项的位置,释义一定含有"."因为要指明词汇类型,如"v."
        k = 0
        for k in range(size):
            if lst[k].find(".") != -1:
                break
        # 从第一个释义项之前的所有项合在一起是name
        name = ""
        for i in range(k):
            if i != 0:
                name += " "
            name += lst[i]
        # 从第一个释义项到结尾合在一起是explian
        explain = ""
        for i in range(k, size):
            if i != k:
                explain += " "
            explain += lst[i]
        # 保存到数据库
        word = Word()
        word.name = name
        word.explain = explain
        word.save()

print("生成完成,见words_word表")
예제 #8
0
 def __yield_words(self, dictionary):
     for key in dictionary.keys():
         yield Word(lemma=key, description=dictionary[key])