def get_data(wid): log(wid) use_caching(shouldnt_compute=True) #should be CombinedEntitiesService yo doc_ids_to_heads = WikiToPageHeadsService().get_value(wid, {}) doc_ids_to_entities = WikiPageToEntitiesService().get_value(wid, {}) doc_ids_combined = {} if doc_ids_to_heads == {}: log(wid, "no heads") if doc_ids_to_entities == {}: log(wid, "no entities") from_s3 = json.loads( bucket.get_key('feature-data/page-%s.json' % wid).get_contents_as_string()) for doc_id in doc_ids_to_heads: entity_response = doc_ids_to_entities.get(doc_id, { 'titles': [], 'redirects': {} }) doc_ids_combined[doc_id] = (map( preprocess, entity_response['titles'] + entity_response['redirects'].keys() + entity_response['redirects'].values() + list(set(doc_ids_to_heads.get(doc_id, [])))) + from_s3.get(doc_id, [])) return doc_ids_combined.items()
def get_data_wid(wid): print wid use_caching(shouldnt_compute=True) #should be CombinedEntitiesService yo doc_ids_to_heads = WikiToPageHeadsService().get_value(wid, {}) doc_ids_to_entities = WikiPageToEntitiesService().get_value(wid, {}) doc_ids_combined = {} if doc_ids_to_heads == {}: print wid, "no heads" if doc_ids_to_entities == {}: print wid, "no entities" for doc_id in doc_ids_to_heads: entity_response = doc_ids_to_entities.get(doc_id, { 'titles': [], 'redirects': {} }) doc_ids_combined[doc_id] = map( preprocess, entity_response['titles'] + entity_response['redirects'].keys() + entity_response['redirects'].values() + list(set(doc_ids_to_heads.get(doc_id, [])))) return doc_ids_combined.items()
def heads(wid): #pprint(WikiToPageHeadsService().get_value(wid, {})) return WikiToPageHeadsService().get_value(wid, {})