forked from chb/py-umls
-
Notifications
You must be signed in to change notification settings - Fork 0
/
snomed.py
262 lines (210 loc) · 7.48 KB
/
snomed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# SNOMED import utilities, extracted from umls.py
#
# 2014-01-20 Created by Pascal Pfiffner
#
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
import csv
import logging
from sqlite import SQLite # for py-umls standalone
class SNOMED (object):
""" A class for importing UMLS terminologies into an SQLite database.
"""
sqlite_handle = None
@classmethod
def check_database(cls):
""" Check if our database is in place and if not, prompts to create it.
Will raise on errors!
SNOMED: (snomed.db)
Read SNOMED CT from tab-separated files and create an SQLite database.
"""
snomed_db = os.path.join('databases', 'snomed.db')
if not os.path.exists(snomed_db):
raise Exception("The SNOMED database at {} does not exist. Run the script `snomed.py`."
.format(os.path.abspath(snomed_db)))
@classmethod
def find_needed_files(cls, snomed_dir):
# table to file mapping
prefixes = {
'descriptions': 'sct2_Description_Full-en_INT_',
'relationships': 'sct2_Relationship_Full_INT_'
}
found = {}
snomed_dir = sys.argv[1]
# try to find the files
for table, prefix in prefixes.items():
found_file = _find_files(snomed_dir, prefix)
if found_file is None:
raise Exception('Unable to locate file starting with "{}" in SNOMED directory at {}'.format(prefix, snomed_dir))
found[table] = found_file
return found
@classmethod
def import_from_files(cls, rx_map):
for table, filepath in rx_map.items():
num_query = 'SELECT COUNT(*) FROM {}'.format(table)
num_existing = cls.sqlite_handle.executeOne(num_query, ())[0]
if num_existing > 0:
continue
cls.import_csv_into_table(filepath, table)
@classmethod
def import_csv_into_table(cls, snomed_file, table_name):
""" Import SNOMED CSV into our SQLite database.
The SNOMED CSV files can be parsed by Python's CSV parser with the
"excel-tab" flavor.
"""
logging.debug('..> Importing SNOMED {} into snomed.db...'.format(table_name))
# not yet imported, parse tab-separated file and import
with open(snomed_file, encoding='utf-8') as csv_handle:
cls.sqlite_handle.isolation_level = 'EXCLUSIVE'
sql = cls.insert_query_for(table_name)
reader = csv.reader(csv_handle, dialect='excel-tab')
i = 0
try:
for row in reader:
if i > 0: # first row is the header row
# execute SQL (we just ignore duplicates)
params = cls.insert_tuple_from_csv_row_for(table_name, row)
try:
cls.sqlite_handle.execute(sql, params)
except Exception as e:
sys.exit('Cannot insert {}: {}'.format(params, e))
i += 1
# commit to file
cls.sqlite_handle.commit()
cls.did_import(table_name)
cls.sqlite_handle.isolation_level = None
except csv.Error as e:
cls.sqlite_handle.rollback()
sys.exit('CSV error on line {}: {}'.format(reader.line_num, e))
logging.debug('..> {} concepts parsed'.format(i-1))
@classmethod
def setup_tables(cls):
""" Creates the SQLite tables we need, not the tables we deserve.
Does nothing if the tables/indexes already exist
"""
if cls.sqlite_handle is None:
cls.sqlite_handle = SQLite.get(os.path.join('databases', 'snomed.db'))
# descriptions
cls.sqlite_handle.create('descriptions', '''(
concept_id INTEGER PRIMARY KEY,
lang TEXT,
term TEXT,
isa VARCHAR,
active INT
)''')
cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS isa_index ON descriptions (isa)")
# relationships
cls.sqlite_handle.create('relationships', '''(
relationship_id INTEGER PRIMARY KEY,
source_id INT,
destination_id INT,
rel_type INT,
rel_text VARCHAR,
active INT
)''')
cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS source_index ON relationships (source_id)")
cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS destination_index ON relationships (destination_id)")
cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS rel_type_index ON relationships (rel_type)")
cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS rel_text_index ON relationships (rel_text)")
@classmethod
def insert_query_for(cls, table_name):
""" Returns the insert query needed for the given table
"""
if 'descriptions' == table_name:
return '''INSERT OR IGNORE INTO descriptions
(concept_id, lang, term, isa, active)
VALUES
(?, ?, ?, ?, ?)'''
if 'relationships' == table_name:
return '''INSERT OR IGNORE INTO relationships
(relationship_id, source_id, destination_id, rel_type, active)
VALUES
(?, ?, ?, ?, ?)'''
return None
@classmethod
def insert_tuple_from_csv_row_for(cls, table_name, row):
if 'descriptions' == table_name:
isa = ''
if len(row) > 6:
if '900000000000013009' == row[6]:
isa = 'synonym'
elif '900000000000003001' == row[6]:
isa = 'full'
return (int(row[4]), row[5], row[7], isa, int(row[2]))
if 'relationships' == table_name:
return (int(row[0]), int(row[4]), int(row[5]), int(row[7]), int(row[2]))
return None
@classmethod
def did_import(cls, table_name):
""" Allows us to set hooks after tables have been imported
"""
if 'relationships' == table_name:
cls.sqlite_handle.execute('''
UPDATE relationships SET rel_text = 'isa' WHERE rel_type = 116680003
''')
cls.sqlite_handle.execute('''
UPDATE relationships SET rel_text = 'finding_site' WHERE rel_type = 363698007
''')
class SNOMEDLookup (object):
""" SNOMED lookup """
sqlite = None
def __init__(self):
absolute = os.path.dirname(os.path.realpath(__file__))
self.sqlite = SQLite.get(os.path.join(absolute, 'databases/snomed.db'))
def lookup_code_meaning(self, snomed_id, preferred=True, no_html=True):
""" Returns HTML for all matches of the given SNOMED id.
The "preferred" flag here currently has no function.
"""
if snomed_id is None or len(snomed_id) < 1:
return ''
sql = 'SELECT term, isa, active FROM descriptions WHERE concept_id = ?'
names = []
# loop over results
for res in self.sqlite.execute(sql, (snomed_id,)):
if not no_html and ('synonym' == res[1] or 0 == res[2]):
names.append("<span style=\"color:#888;\">{}</span>".format(res[0]))
else:
names.append(res[0])
if no_html:
return ", ".join(names) if len(names) > 0 else ''
return "<br/>\n".join(names) if len(names) > 0 else ''
# find file function
def _find_files(directory, prefix):
for root, dirs, files in os.walk(directory):
for name in files:
if name.startswith(prefix):
return os.path.join(directory, name)
for name in dirs:
found = _find_files(os.path.join(directory, name), prefix)
if found:
return found
return None
# running this as a script does the database setup/check
if '__main__' == __name__:
logging.basicConfig(level=logging.DEBUG)
# if the database check fails, run import commands
try:
SNOMED.check_database()
except Exception as e:
if len(sys.argv) < 2:
print("Provide the path to the extracted SNOMED directory as first argument.")
print("Download SNOMED from http://www.nlm.nih.gov/research/umls/licensedcontent/snomedctfiles.html""")
sys.exit(0)
# import from files
try:
SNOMED.sqlite_handle = None
SNOMED.setup_tables()
found = SNOMED.find_needed_files(sys.argv[1])
SNOMED.import_from_files(found)
except Exception as e:
raise Exception("SNOMED import failed: {}".format(e))
sys.exit(0)
# examples
look = SNOMEDLookup()
code = '215350009'
meaning = look.lookup_code_meaning(code)
print('SNOMED code "{0}": {1}'.format(code, meaning))