def runTest(self): """ test the function that returns tagged labels from a given text """ from flankers.textsemantics import TextSemantics for i, t in enumerate(self.test1): semantics = TextSemantics(t) results = semantics.find_related_concepts() print i, results # print True, name and the mean of the rho of the term in relation with the scopes if i == 0: expected = { "remote exploration of planets", "craters (extraterrestrial)", "interplanetary trajectories", "ice cover (climatology)", "economic impacts", "snow and ice observations", "planet location", "craters (earth)", "appropriations hearings (nasa)", "interplanetary shock waves", "dwarf planets", "interplanetary gases", } assert results == expected elif i == 1: expected = { "electric power units (electrical design)", "precision time and time interval (ptti)", "electric power units (aircraft)", "auxiliary power units (apu) (aircraft)", "electric power units (spacecraft)", "manned maneuvering units", "auxiliary power units (apu) (spacecraft)", "extravehicular activity (eva) (operations)", "extravehicular activity (eva) (equipment)", "inertial sensors and measurement units (spacecraft)", "inertial sensors and measurement units (aircraft)", "extravehicular activity (physiological effects)", } assert results == expected elif i == 2: expected = set([]) assert results == expected else: assert False
def execute_task(self, *args): """ Index an article. See Indexer class in models. :param args: single object to index and its key :return: None """ item, key = args from flankers.textsemantics import TextSemantics if not (item.title == '' and item.abstract == ''): # if item is not a media or a link from Twitter # it is or a feed or a tweet text = item.abstract if len(item.abstract) != 0 else item.title text = text[:1799] if len(text) >= 1800 else text if Indexer.query().filter(Indexer.webres == key).count() == 0: semantics = TextSemantics(text) labels = semantics.find_related_concepts() for l in labels: index = Indexer(keyword=l.strip(), webres=key) index.put() print "indexing stored: " + item.url + ">" + l else: raise Exception("storeIndexer(): Resource already indexed")