示例#1
0
 def addItemWithExample(self, key, example):
     """Prepares item, create fact, create example"""
     item = self.addItem(key)
     if item:
         fact = Fact(key=item)
         # TODO: add fact to item itself!
         example = Example(example=unicode(example, 'utf-8')).save()
         fact.examples.append(example)
         fact.save()
         return item, fact, example
     return None
示例#2
0
    def addExampleWithItems(self, example, keys):
        """Prepares items, corresponding facts and refereces example"""
        example = Example(example=unicode(example, 'utf-8')).save()
        results = []
        for key in keys:
            item = self.addItem(key)
            if item:
                fact = Fact(key=item)
                # TODO: add fact to item itself!
                fact.examples.append(example)
                fact.save()
                results.append((item, fact))

        return results
示例#3
0
    def process(self, category='kanji', limit=100):
        """Process all new & unprocessed kanji keys"""
        wn = Wordnet()
        mc = MeCab()
        ji = Jisho()
        # 0. Find unprocessed kanji key
        try:
            for key in Key.objects(
                category=category, status='new'
            ).timeout(False).limit(limit):

                print 'Processing ', key.value

                # 0a. Get reading for kanji itself
                key_reading = mc.reading(key.value)
                key_gloss = Gloss()
                key_gloss.readings.update({'default': key_reading})
                key_gloss.save()

                # 0b. Initialize corresponding Fact
                key_fact = Fact(key=key, gloss=key_gloss)

                # 1. Get usages from WordNet
                words = wn.complete(key.value)
                if words:
                    for word in words[:7]:
                        # 2. Check, if reading is found
                        reading = mc.reading(word)
                        if(not reading):
                            continue

                        # 3. Check, if definition is found
                        definitions = wn.lookup(word)
                        if(not definitions):
                            continue

                        # 4. Create new Key and corresponding Fact entities
                        try:
                            # Check if such item already exists
                            existing_key = Key.objects.get(value=word)
                            fact = existing_key.fact
                        except (DoesNotExist, MultipleObjectsReturned):
                            # 5a. Create Gloss entity for most common definitions
                            gloss = Gloss()
                            # No more than 2-4 definitions!
                            for definition in definitions[:3]:
                                gloss.translations.append(definition['gloss'])
                            gloss.readings.update({'default': reading})
                            gloss.save()

                            # 5b. Create corresponding key & fact
                            new_key = Key(
                                value=word,
                                category='word',
                                tags=['minor']
                            ).save()
                            fact = Fact(key=new_key, gloss=gloss).save()
                            new_key.fact = fact
                            new_key.status = 'processed'
                            new_key.save()

                        # TODO: add synonyms based on 'words'?
                        # TODO: parse components?
                        # TODO: find advanced examples?

                        #6. Link fact to key-fact as usages
                        key_fact.usages.append(fact)

                # 1a. If still no usages found (or not enough)
                if len(key_fact.usages) < 2:
                    words = ji.define(key.value, 7)
                    for word, info in words:
                        # 4. Create new Key and corresponding Fact entities
                        try:
                            # Check if such item already exists
                            existing_key = Key.objects.get(value=word)
                            fact = existing_key.fact
                        except (DoesNotExist, MultipleObjectsReturned):
                            # 5a. Create Gloss entity for most common definitions
                            gloss = Gloss()
                            gloss.translations.append(info['meaning'])
                            gloss.readings.update({'default': info['kana']})
                            gloss.save()

                            # 5b. Create corresponding key & fact
                            new_key = Key(
                                value=word,
                                category='word',
                                tags=['minor']
                            ).save()
                            fact = Fact(key=new_key, gloss=gloss).save()
                            new_key.fact = fact
                            new_key.status = 'processed'
                            new_key.save()

                            #6. Link fact to key-fact as usages
                            key_fact.usages.append(fact)

                #7. Save key fact and corresponding key (bi-directional link)
                key_fact.save()
                key.fact = key_fact
                if len(key_fact.usages) > 0:
                    # todo: if still nothing found -> lookup in names
                    # dictionary (jisho)
                    key.status = 'processed'
                key.save()

                print 'Total usages: ', len(key.usages())
                print '----------------'
        except OperationFailure as e:
            print 'There was an error querying mongo db: %s' % e