def runTest(self):
        """
        test the function that returns tagged labels from a given text
        """
        from flankers.textsemantics import TextSemantics

        for i, t in enumerate(self.test1):
            semantics = TextSemantics(t)
            results = semantics.find_related_concepts()
            print i, results
            # print True, name and the mean of the rho of the term in relation with the scopes
            if i == 0:
                expected = {
                    "remote exploration of planets",
                    "craters (extraterrestrial)",
                    "interplanetary trajectories",
                    "ice cover (climatology)",
                    "economic impacts",
                    "snow and ice observations",
                    "planet location",
                    "craters (earth)",
                    "appropriations hearings (nasa)",
                    "interplanetary shock waves",
                    "dwarf planets",
                    "interplanetary gases",
                }
                assert results == expected
            elif i == 1:
                expected = {
                    "electric power units (electrical design)",
                    "precision time and time interval (ptti)",
                    "electric power units (aircraft)",
                    "auxiliary power units (apu) (aircraft)",
                    "electric power units (spacecraft)",
                    "manned maneuvering units",
                    "auxiliary power units (apu) (spacecraft)",
                    "extravehicular activity (eva) (operations)",
                    "extravehicular activity (eva) (equipment)",
                    "inertial sensors and measurement units (spacecraft)",
                    "inertial sensors and measurement units (aircraft)",
                    "extravehicular activity (physiological effects)",
                }
                assert results == expected
            elif i == 2:
                expected = set([])
                assert results == expected
            else:
                assert False
    def memcache_indexer_keywords_distinct(self, term=None):
        """
        Get or set in the memcache the keywords indexed with count.

        If term is not set, it returns the full index, else returns the
        ancestorship of a term.

        :return: a Query()
        """
        mkey = _MEMCACHE_SLUGS['INDEXER_DISTINCT'] + str(term)
        if not memcache.get(key=mkey):
            if not term:
                query = Indexer.query(projection=[Indexer.keyword], distinct=True)
                results = {
                    "indexed": [
                        {
                            "keyword": q.keyword,
                            "count": Indexer.query(Indexer.keyword == q.keyword).count()
                        }
                        for q in query
                    ],
                    "n_indexed": query.count()
                }
                memcache.add(key=mkey, value=results)
            else:
                try:
                    results = TextSemantics.find_term_ancestorship(term)
                except Exception as e:
                    raise ValueError(str(e))
                memcache.add(key=mkey, value=results)
        else:
            results = memcache.get(key=mkey)

        return results
Пример #3
0
 def execute_task(self, *args):
     """
     Index an article.
     See Indexer class in models.
     :param args: single object to index and its key
     :return: None
     """
     item, key = args
     from flankers.textsemantics import TextSemantics
     if not (item.title == '' and item.abstract == ''):
         # if item is not a media or a link from Twitter
         # it is or a feed or a tweet
         text = item.abstract if len(item.abstract) != 0 else item.title
         text = text[:1799] if len(text) >= 1800 else text
         if Indexer.query().filter(Indexer.webres == key).count() == 0:
             semantics = TextSemantics(text)
             labels = semantics.find_related_concepts()
             for l in labels:
                     index = Indexer(keyword=l.strip(), webres=key)
                     index.put()
                     print "indexing stored: " + item.url + ">" + l
         else:
             raise Exception("storeIndexer(): Resource already indexed")