def getAUIMapCUI(self): """ Returns: dict: {AUI: CUI} """ UMLSDataList = readMRCONSO_RRF(self.MRCONSO_RRF) return {uTermDict['AUI']:uTermDict['CUI'] for uTermDict in UMLSDataList}
def getAUIMapDict(self): """实际上'eng'一栏亦为中文 Returns: dict: {AUI: {'code': code, 'eng': ENG, 'cns': CNS)} """ UMLSDataList = readMRCONSO_RRF(DATA_PATH + '/umlsMT/umls/MRCONSO.RRF') mapDict = {} for uTermDict in UMLSDataList: if uTermDict['LAT'] == 'CHI': AUI = uTermDict['AUI'] code = uTermDict['CODE'] ENG = uTermDict['STR'] CNS = uTermDict['STR'] assert AUI not in mapDict # 检查是否有多个AUI映射到同一个hpo mapDict[AUI] = ({'code': code, 'eng': ENG, 'cns': CNS}) return mapDict
def getAUIMapUmlsTerm(self): UMLSDataList = readMRCONSO_RRF(self.MRCONSO_RRF) return {uTermDict['AUI']: uTermDict['STR'] for uTermDict in UMLSDataList}
for termDict in dataList: if termDict['ISPREF'] == 'Y': countPref += 1 countPrefCharacter += len(termDict['STR']) return countPref, countPrefCharacter if __name__ == '__main__': pass from config import DATA_PATH, JSON_FILE_FORMAT from common import getSaveFunc from read.UMLS.ReadMrConso import readMRCONSO_RRF sourcePath = DATA_PATH + '/umlsMT/umls/MRCONSO.RRF' print('reading', sourcePath) dataList = readMRCONSO_RRF(sourcePath) print('read complete, total terms:', len(dataList)) # print 'counting charactersNum' # print 'characters number:', countCharactersNum([termDict['STR'] for termDict in dataList]) # print 'examining AUI...' # examineAUI(dataList) # print 'done' # print 'counting CUI' # print 'CUI Number:', countCUI(dataList) # # print 'counting isPreffer' # prefNum, prefCharacNum = countIsPref(dataList) # print 'isPreffer number:', prefNum