def __process_term(self, terms, node): """Adds the term to the terms table.""" term = node[0] pos = node[1] pos_key = pos[0].lower() # continue only if we have a noun or a verb if not pos_key in ('n', 'v'): return term = self._lemmatizer.lemmatize(term, pos_key) if term is None: return term = normalize_feature_name(term) if term in self._stopwords: return key = make_feature_key(term) if key not in terms: terms[key] = 0 terms[key] += 1
raise ProcessingException(str(e)) self._cache.put(resource.uri, result) else: self._logger.debug('Read from cache.') # extracted entities entities = {} # parse the XML data tree = parse_xml(result) entity_ets = tree.xpath('//entity') if len(entity_ets) < self._min_entity_count: raise ProcessingException('Insufficient entities') for entity_et in entity_ets: type = entity_et.find('type').text.lower() name = entity_et.find('text').text score = float(entity_et.find('relevance').text) disambiguated_et = entity_et.find('disambiguated') if disambiguated_et is not None: name = disambiguated_et.find('name').text name = normalize_feature_name(name) key = make_feature_key((type, name)) entities[key] = score context[ProcessingContext.EXTRACTED_ENTITIES] = entities