def lookup_in_taxonomy(self): """ from wikipedia slugs fetch relatedConcepts in the taxonomy :param results: a list of wikipedia slugs :return: a set of related concepts to the slugs """ from unidecode import unidecode base_url = "http://taxonomy.projectchronos.eu/space/dbpediadocs/{}" labels = [] resource = None for res in self.spotted: #res = unidecode(res) try: # print base_url.format(res) resource = retrieve_json(base_url.format(res)) except Exception as e: print Exception('lookup_in_taxonomy(): Cannot fetch taxonomy: ' + res.encode('ascii', 'replace') + ' ' + str(e)) if resource and 'relatedConcepts' in resource.keys(): for c in resource['relatedConcepts']: if c: try: resource = retrieve_json(c) except Exception: try: # see bottom of the module kwd = c[c.rfind('/') + 1:].replace("+", " ").strip() # print kwd, kwd in to_be_corrected.keys() taxonomy = "http://taxonomy.projectchronos.eu/concepts/c/{}" if kwd in to_be_corrected.keys(): kwd = to_be_corrected[kwd].replace(" ", "+") # print kwd else: kwd = kwd.replace(" ", "+") try: resource = retrieve_json(taxonomy.format(kwd)) except Exception as e: raise ValueError("Cannot deduce keyword. Cannot fetch relatedConcepts: " + str(e)) except ValueError as e: print str(e) continue except Exception as e: print Exception('lookup_in_taxonomy(): ' + c + ' Concept is not in the space api ' + str(e)) continue # Resource is found in the taxonomy label = resource['label'] labels.append(str(label)) return set(labels)
def get_ancestor(obj, ancestors=tuple()): if 'ancestor' not in obj.keys(): return ancestors new_obj = retrieve_json(obj['ancestor']) ancestors += (new_obj['label'], ) return get_ancestor(new_obj, ancestors)
def find_term_ancestorship(cls, term): """ Return the genealogy of a term in the Taxonomy. :param term: a term in the taxonomy :return: a dictionary """ term = term.replace(" ", "+").lower() print term base_url = "http://taxonomy.projectchronos.eu/concepts/c/{}" try: resource = retrieve_json(base_url.format(term)) except Exception: raise ValueError("find_term_ancestorship(): term is not in the taxonomy. Wrong term.") def get_ancestor(obj, ancestors=tuple()): if 'ancestor' not in obj.keys(): return ancestors new_obj = retrieve_json(obj['ancestor']) ancestors += (new_obj['label'], ) return get_ancestor(new_obj, ancestors) return { "slug": term, "term": term.replace("+", " "), "ancestors": list(get_ancestor(resource))[::-1] }
def lookup_in_taxonomy(results): """ from wikipedia slugs fetch relatedConcepts in the taxonomy :param results: a list of wikipedia slugs :return: a set of related concepts to the slugs """ from unidecode import unidecode base_url = "http://taxonomy.projectchronos.eu/space/dbpediadocs/{}" labels = [] resource = None for res in results: res = unidecode(res) try: # print base_url.format(res) resource = retrieve_json(base_url.format(res)) except Exception as e: print Exception('Cannot fetch taxonomy: ' + res.encode('ascii', 'replace') + ' ' + str(e)) if resource and 'relatedConcepts' in resource.keys(): for c in resource['relatedConcepts']: if c: label = c[c.rfind('/') + 1:].replace('+', ' ') # print 'Found! ' + label labels.append(str(label)) return set(labels)
def relate(cls, titles, comparing=None, min_rho=0.42): """ Implement the TagMe's 'relating API' :param titles: a string that is wikipedia title or a list of titles :param comparing: a string that is wikipedia title or a list of titles :param min_rho: the minimum rho to filter the relation :return: a list of results objects with "rel" > min_rho """ endpoint = "http://tagme.di.unipi.it/rel" if comparing is None: comparing = cls.return_gen_scopes() params = { "key": api_key, "tt": [] } if isinstance(titles, list): if len(titles) > 1: if isinstance(comparing, list): [params["tt"].append(t + ' ' + c) for c in comparing for t in titles if t != c] elif isinstance(comparing, str): [params["tt"].append(t + ' ' + comparing) for t in titles if t != comparing] elif len(titles) == 1: if isinstance(comparing, list): [params["tt"].append(titles[0] + ' ' + c) for c in comparing if titles[0] != c] elif isinstance(comparing, str): params["tt"].append(titles[0] + ' ' + comparing) else: raise ConceptNotInDBpedia('This term does not have annotations, so it cannot be searched for related') else: if isinstance(comparing, list): [params["tt"].append(titles + ' ' + c) for c in comparing if titles != c] elif isinstance(comparing, str): params["tt"].append(titles + ' ' + comparing) url = endpoint + '?' + urllib.urlencode(params, True) try: results = retrieve_json(url) #pprint(results) except (Exception, ValueError): raise BadRequest('Error in connection or in JSON parsing the response from TagMe Relating API') try: if int(results["errors"]) == 0 and len(results["result"]) != 0: output = [r for r in results["result"] if float(r["rel"]) > min_rho] return output else: for r in results["result"]: if 'err' in r.keys(): print(BadRequest("Error in request data to TagMe: " + str(r['err'] + " in " + str(r)))) return None except (Exception, KeyError) as e: raise e raise BadRequest('TagMe API responded with an error: ' + str(results))
def retrieve_taggings(term, method='GET'): """ Find annotations of a given concept/word/sets of words 'http://tagme.di.unipi.it/tag?key=****&text=Recent+poll+show+President+Obama+opening+up+a+small+lead' :param term: the text to analyze as a byte (b'string') :return: dictionary with loaded JSON from response """ endpoint = "http://tagme.di.unipi.it/tag" # print(term) if str(term) == 'None': return {"timestamp": time.time(), "time": 0, "api": "tag", "annotations": [], "lang": "en"} params = { "key": api_key, "text": term, "include_categories": True, "include_abstract": True, "include_all_spots": True } if method == 'GET': url = endpoint + '?' + urllib.urlencode(params) data = None # print(url) elif method == 'POST': url = endpoint data = params else: raise BadRequest('retrieve_taggings(): Wrong HTTP Verb') results = {"timestamp": time.time(), "time": 0, "api": "tag", "annotations": [], "lang": "en"} try: results = retrieve_json(url, method=method, data=data) except (Exception, ValueError): return results if "errors" not in results.keys() or results["errors"] == 0: return results raise BadRequest('retrieve_taggings() Failed')