예제 #1
0
 def get_highest_rated(self):
     artcol = self.client['mh']['articles']
     highest = artcol.find_one(sort=[("star", -1)])
     highest.pop('text')
     highest.pop('keywords')
     highest['_id'] = str(highest['_id'])
     highest['site'] = get_site(highest['url'])
     highest['title'] = strip_title(highest['title'])
     return highest
예제 #2
0
 def get_random_article(self):
     artcol = self.client['mh']['articles']
     colsize = artcol.count()
     randart = artcol.find()[random.randrange(colsize)]
     randart.pop('text')
     randart.pop('keywords')
     randart['_id'] = str(randart['_id'])
     randart['site'] = get_site(randart['url'])
     randart['title'] = strip_title(randart['title'])
     return randart
예제 #3
0
    def get_best_match(self, w):

        # find synonyms
        # iterate synonyms and match articles.
        # articles that appear the most are chosen as best match

        w = w.lower()
        splitword = w.split(' ')
        synlist = []

        for word in splitword:
            synlist.extend(find_syns(word))

        relate_dict = {}

        artcol = self.client['mh']['articles']

        for word in splitword:
            queryresult = artcol.find({'keywords': {'$in': [word]}})
            for art in queryresult:
                if str(art['_id']) in relate_dict:
                    relate_dict[str(art['_id'])] += 3
                else:
                    relate_dict[str(art['_id'])] = 3

        for syn in synlist:
            synqueryresult = artcol.find({'keywords': {'$in': [syn]}})
            for art in synqueryresult:
                if str(art['_id']) in relate_dict:
                    relate_dict[str(art['_id'])] += 1
                else:
                    relate_dict[str(art['_id'])] = 1

        keys = relate_dict.keys()
        values = relate_dict.values()
        max_index = array_max_index(values)
        if max_index < 0:
            return False
        best_id = keys[max_index]

        best_article = artcol.find({'_id': ObjectId(best_id)})[0]

        best_article.pop('text')
        best_article.pop('keywords')
        best_article['_id'] = str(best_article['_id'])
        best_article['site'] = get_site(best_article['url'])
        best_article['title'] = strip_title(best_article['title'])
        return best_article