Exemplo n.º 1
0
 def parse(self, text):
     # lowercase query
     text = text.lower()
     tokens = tokenize(text)
     # remove stopwords
     tokens = [token for token in tokens if token not in self.stopwords]
     return tokens
Exemplo n.º 2
0
 def __init__(self):
     self.trie = TokenTrie(name="disease")
     if self.trie.is_empty():
         diseases_file = open(os.path.join(*[os.path.dirname(__file__), 'data', 'UMLS', 'diseases.txt']))
         disease_names = diseases_file.read().split('\n')
         count = 0
         for disease in disease_names:
             self.trie.add(tokenize(disease))
             print disease
             count += 1
         self.trie.save_to_cache()
Exemplo n.º 3
0
 def __init__(self):
     self.trie = TokenTrie(name="symptoms")
     if self.trie.is_empty():
         symptoms_file = open(os.path.join(*[os.path.dirname(__file__), 'data', 'UMLS', 'diseases.txt']))
         symptoms = symptoms_file.read().split('\n')
         count = 0
         for symptom in symptoms:
             (code, name) = symptom.split("\t")
             self.trie.add(tokenize(name))
             print name
             count += 1
         self.trie.save_to_cache()
Exemplo n.º 4
0
 def __init__(self):
     self.trie = TokenTrie(name="disease")
     if self.trie.is_empty():
         diseases_file = open(
             os.path.join(*[
                 os.path.dirname(__file__), 'data', 'UMLS', 'diseases.txt'
             ]))
         disease_names = diseases_file.read().split('\n')
         count = 0
         for disease in disease_names:
             self.trie.add(tokenize(disease))
             print disease
             count += 1
         self.trie.save_to_cache()
Exemplo n.º 5
0
 def __init__(self):
     self.trie = TokenTrie(name="symptoms")
     if self.trie.is_empty():
         symptoms_file = open(
             os.path.join(*[
                 os.path.dirname(__file__), 'data', 'UMLS', 'diseases.txt'
             ]))
         symptoms = symptoms_file.read().split('\n')
         count = 0
         for symptom in symptoms:
             (code, name) = symptom.split("\t")
             self.trie.add(tokenize(name))
             print name
             count += 1
         self.trie.save_to_cache()
Exemplo n.º 6
0
 def extract(self, text):
     sequences = self.trie.scan(tokenize(text))
     # assemble sequences into tokens
     return [" ".join(seq) for seq in sequences]
Exemplo n.º 7
0
 def extract(self, text):
     sequences = self.trie.scan(tokenize(text))
     # assemble sequences into tokens
     return [" ".join(seq) for seq in sequences]