def umls_semantic_type_word( umls_string_cache , sentence ): # Already cached? if False and umls_string_cache.has_key( sentence ): mapping = umls_string_cache.get_map( sentence ) else: concepts = interface_umls.string_lookup( sentence ) concepts = [ singleton[0] for singleton in set(concepts) ] umls_string_cache.add_map(sentence , concepts) mapping = umls_string_cache.get_map(sentence) return mapping
def span2concept(span): rawstring = ' '.join(sentence[span[0]:span[1]+1]) # Already cached? if cache.has_key( rawstring ): return cache.get_map( rawstring ) else: concept = interface_umls.string_lookup( rawstring ) if concept: cache.add_map( rawstring , concept ) else: cache.add_map( rawstring , [] ) return cache.get_map( rawstring )
def umls_semantic_context_of_words( umls_string_cache, sentence ): #Defines the largest string span for the sentence. WINDOW_SIZE = 7 # span of the umls concept of the largest substring umls_context_list = [] # keys: tuple of (start,end) index of a substring concept_span_dict = {} # Each sublist functions as the mappings for each word. for i in sentence: umls_context_list.append( [] ) # finds the span for each substring of length 1 to currentWindowSize. for currentWindowSize in range( 1 , WINDOW_SIZE ): for ti in range( 0 , ( len(sentence) - currentWindowSize ) + 1 ): rawstring = "" for tj in range( ti , ti + currentWindowSize): rawstring += ( sentence[tj] + " " ) #Each string is of length 1 to currentWindowSize. rawstring = rawstring.strip() # Not in cache yet? if not( umls_string_cache.has_key( rawstring ) ): # returns a tuple if there is a result or None is there is not. concept = interface_umls.string_lookup( rawstring ) if not concept: umls_string_cache.add_map( rawstring, None ) else: umls_string_cache.add_map( rawstring, concept ) ; #Store the concept into concept_span_dict with its span as a key. concept_span_dict[(ti,ti+currentWindowSize-1)] = umls_string_cache.get_map( rawstring ) # For each substring if there is a span, then # assign the concept to every word that is within in the substring if umls_string_cache.get_map(rawstring): for i in range( ti , ti + currentWindowSize ): if len( umls_context_list[i] ) == 0: umls_context_list[i].append([ti,ti+currentWindowSize-1]) else: updated = 0 for j in umls_context_list[i]: if j[0] >= ti and j[1] <= (ti+currentWindowSize-1): j[0] = ti j[1] = ( ti + currentWindowSize - 1 ) updated += 1 if not(updated): val = [ti,ti+currentWindowSize-1] if umls_context_list[i].count(val)== 0: umls_context_list[i].append(val) #create a list of sublists # each sublist represents the contexts for which the word appears mappings = [] for i in umls_context_list: spans = i if len(spans) == 0: mappings.append( None ) else: sub_mappings = [] for j in spans: sub_mappings.append( concept_span_dict[tuple(j)]) # FIXME - Decided to concat rather than append (not sure why) mappings += sub_mappings return mappings