def _query_parser_help(self): field_docs = dict( classifiers=""" The <a href="https://pypi.org/pypi?%3Aaction=list_classifiers" target="_blank">trove classifiers</a> of a package. Use single quotes to specify a classifier, as they contain spaces: <code>classifiers:'Programming Language :: Python :: 3'</code>""", index="The name of the index. This is only the name part, without the user. For example: <code>index:pypi</code>", keywords="The keywords of a package.", name="The package name. For example: <code>name:devpi-client</code>", path="The path of the package in the form '/{user}/{index}/{name}'. For example: <code>path:/root/pypi/devpi-server</code>", text=None, type=""" The type of text. One of <code>project</code> for the project name, <code>title</code> for the title of a documentation page, <code>page</code> for a documentation page, or one of the following project metadata fields: <code>author</code>, <code>author_email</code>, <code>description</code>, <code>keywords</code>, <code>summary</code>. For example: <code>type:page</code> """, user="******") schema = self.project_schema fields = [] for name in schema.names(): field = schema[name] if not field.indexed: continue if name not in field_docs: fields.append((name, "Undocumented")) continue field_doc = field_docs[name] if field_doc is None: continue fields.append((name, field_doc)) fields_doc = "<dl>%s</dl>" % ''.join("<dt><code>%s</code></dt><dd>%s</dd>" % x for x in fields) return { plugins.WhitespacePlugin: None, plugins.SingleQuotePlugin: """ To specify a term which contains spaces, use single quotes like this: <code>'term with spaces'</code>""", plugins.FieldsPlugin: """ By using a search like <code>fieldname:term</code>, you can search in the following fields:<br />%s""" % fields_doc, plugins.PrefixPlugin: """ End a term with an asterisk to search by prefix like this: <code>path:/fschulze/*</code>""", plugins.GroupPlugin: """ Group query clauses with parentheses.""", plugins.OperatorsPlugin: """ Use the <code>AND</code>, <code>OR</code>, <code>ANDNOT</code>, <code>ANDMAYBE</code>, and <code>NOT</code><br /> operators to further refine your search.<br /> Write them in all capital letters, otherwise they will be interpreted as search terms.<br /> An example search would be: <code>devpi ANDNOT client</code>""", plugins.BoostPlugin: """ Boost a term by adding a circumflex followed by the boost value like this: <code>term^2</code>"""}
def instant_search(): results = list() search_term_original = request.form["search"] search_term_original = search_term_original.strip().strip("*") search_term = search_term_original.lower() if " " in search_term: for word in search_term.split(): search_term = ("*%s*" % word).join(search_term.split(word)) else: search_term = "*%s*" % search_term_original for model in get_model_classes(): fields = list() for field in model()._get_indexable_columns(): if field != "id": fields.append(field) fields.append("model_type") mparser = MultifieldParser(fields, schema=model()._get_schema()) query = mparser.parse(search_term) with model()._get_index().searcher() as searcher: for h in searcher.search(query, terms=True): model_id, model_type, ci_name, score = h["model_id"], h[ "model_name"], h["name"], h.score matched_terms = list() for field, text in h.matched_terms(): field = "<code>%s</code>" % pretty_print(field) for search_term_word in search_term_original.split(): text = ("<mark class='bg-danger'>%s</mark>" % search_term_word).join( text.split(search_term_word)) matched_terms.append(": ".join([field, text])) model_name = pretty_print(model_type) results.append((score, model_id, model_name, model_type, ci_name, matched_terms)) results = sorted(results, key=lambda attr: attr[0], reverse=True) html_results = "" for score, model_id, model_name, model_type, ci_name, matched_terms in results: if model_id is None or model_id == u"None": continue html_results += "<tr>" html_results += '<td><a href="/assets/edit/%s/%s">%s</a></td>' % ( model_type.lower(), model_id, model_name) html_results += '<td><a href="/assets/edit/%s/%s">%s</a></td>' % ( model_type.lower(), model_id, ci_name) html_results += '<td><a href="/assets/edit/%s/%s">' % ( model_type.lower(), model_id) for matched_term in matched_terms: html_results += "<p>" + matched_term + "</p>" html_results += "</a></td></tr>" return html_results
def search_index(self, search_term): model_index = self._get_index() schema = self._get_schema() fields = list() for field in self._get_indexable_columns(): if field == "id": field = "model_id" value = getattr(self, field) # Do not search the primary key if not value.primary_key: fields.append(field) parser = qparser.MultifieldParser(fields, schema) query = parser.parse(search_term) with model_index.searcher() as searcher: results = searcher.search(query) for result in results: yield result
def instant_search(): results = list() search_term_original = request.form["search"] search_term_original = search_term_original.strip().strip("*") search_term = search_term_original.lower() if " " in search_term: for word in search_term.split(): search_term = ("*%s*" % word).join(search_term.split(word)) else: search_term = "*%s*" % search_term_original for model in get_model_classes(): fields = list() for field in model()._get_indexable_columns(): if field != "id": fields.append(field) fields.append("model_type") mparser = MultifieldParser(fields, schema=model()._get_schema()) query = mparser.parse(search_term) with model()._get_index().searcher() as searcher: for h in searcher.search(query, terms=True): model_id, model_type, ci_name, score = h["model_id"], h["model_name"], h["name"], h.score matched_terms = list() for field, text in h.matched_terms(): field = "<code>%s</code>" % pretty_print(field) for search_term_word in search_term_original.split(): text = ("<mark class='bg-danger'>%s</mark>" % search_term_word).join(text.split(search_term_word)) matched_terms.append(": ".join([field, text])) model_name = pretty_print(model_type) results.append((score, model_id, model_name, model_type, ci_name, matched_terms)) results = sorted(results, key=lambda attr: attr[0], reverse=True) html_results = "" for score, model_id, model_name, model_type, ci_name, matched_terms in results: if model_id is None or model_id == u"None": continue html_results += "<tr>" html_results += '<td><a href="/assets/edit/%s/%s">%s</a></td>' % (model_type.lower(), model_id, model_name) html_results += '<td><a href="/assets/edit/%s/%s">%s</a></td>' % (model_type.lower(), model_id, ci_name) html_results += '<td><a href="/assets/edit/%s/%s">' % (model_type.lower(), model_id) for matched_term in matched_terms: html_results += "<p>" + matched_term + "</p>" html_results += "</a></td></tr>" return html_results
def Map(Qinput, concept): # Check if not correct syntax # IF THERE ARE NOT FIELDS if Qinput.find('[') == -1 or Qinput.find(']') == -1: return ('top', ['top']) # ========== # Replace 'and','or' Qinput = Qinput.replace(' and', ' ') Qinput = Qinput.replace(' or', ' ') # Replace 'AND', 'OR' if Qinput.strip().split()[-1] == 'AND' or Qinput.strip().split( )[-1] == 'OR': Qinput = ' '.join(Qinput.strip().split()[0:-1]) #print Qinput map1 = Qinput map1 = map1.replace('[', '|') map1 = map1.replace(']', '|') map2 = map1.split('|') #print map2 #print len(map2) terms = [] fields = [] conj = [] for index1 in range(0, len(map2), 2): if len(map2[index1].strip()) > 0: text = map2[index1] if text.find('AND') >= 0: conj.append('AND') text_str = text.replace('AND', '') elif text.find('OR') >= 0: conj.append('OR') text_str = text.replace('OR', '') else: text_str = text terms.append(text_str.strip()) if index1 + 1 < len(map2): fields.append(map2[index1 + 1]) #print terms #print fields #print conj # Fix parenthesis, first remove all parenthesis for i in range(0, len(terms)): terms[i] = terms[i].replace("(", '').strip() terms[i] = terms[i].replace(")", '').strip() if terms[-1] == '': terms = terms[0:-1] #print "==========" #print terms #print fields #print conj # ============================================== # Convert to Whoosh query # ============================================== schema = Schema(IDName=TEXT(stored=True), path=ID(stored=True), title=TEXT(stored=True), desc=TEXT(stored=True), Type=TEXT(stored=True), cohort=NUMERIC(stored=True), inexclude=TEXT(stored=True), platform=TEXT(stored=True), MESHterm=TEXT(stored=True), history=TEXT(stored=True), attributes=TEXT(stored=True), topic=TEXT(stored=True), disease=TEXT(stored=True), measurement=TEXT(stored=True), demographics=TEXT(stored=True), geography=TEXT(stored=True), age=TEXT(stored=True), gender=TEXT(stored=True), category=TEXT(stored=True), IRB=TEXT(stored=True), ConsentType=TEXT(stored=True), phen=TEXT(stored=True), phenID=TEXT(stored=True), phenDesc=TEXT(stored=True), phenName=TEXT(stored=True), phenCUI=TEXT(stored=True), phenMap=TEXT(stored=True), AgeMin=NUMERIC(stored=True), AgeMax=NUMERIC(stored=True), MaleNum=NUMERIC(stored=True), FemaleNum=NUMERIC(stored=True), OtherGenderNum=NUMERIC(stored=True), UnknownGenderNum=NUMERIC(stored=True), Demographics=TEXT(stored=True), phenType=TEXT(stored=True)) ## Convert into Advanced Search MainQuery = '' count = 0 for iQuery in terms: # Default MParser mparser = MultifieldParser([ "IDName", "path", "title", "desc", "Type", "cohort", "platform", "topic", "disease", "measurement", "demographics", "geography", "age", "gender", "category", "phenID", "phenName", "phenDesc", "phenCUI", "phenMap", "Demographics", "phenType" ], schema) # Work with SampleSize if fields[count].find('SampleSize') >= 0: MinSize = terms[count].split(',')[0].strip('(') MaxSize = terms[count].split(',')[1].strip(')') if MinSize.find('*') >= 0: MinSizeN = 0 else: MinSizeN = int(MinSize) if MaxSize.find('*') >= 0: MaxSizeN = 100000 else: MaxSizeN = int(MaxSize) if count < len(terms) - 1: MainQuery += 'cohort:' + '[' + str(MinSizeN) + ' to ' + str( MaxSizeN) + '] ' + conj[count] + ' ' else: MainQuery += 'cohort:' + '[' + str(MinSizeN) + ' to ' + str( MaxSizeN) + ']' # Work with Age if fields[count] == 'Age': #print terms MinAge = terms[count].strip().split(',')[0].strip('(') MaxAge = terms[count].strip().split(',')[1].strip(')') if MinAge.find('*') >= 0: MinAgeN = 0 else: MinAgeN = int(MinAge) if MaxAge.find('*') >= 0: MaxAgeN = 150 else: MaxAgeN = int(MaxAge) if count < len(terms) - 1: MainQuery += 'AgeMin:' + '[' + str( MinAgeN) + ' to 150] ' + ' AND AgeMax:' + '[0 to ' + str( MaxAgeN) + '] ' + conj[count] + ' ' else: MainQuery += 'AgeMin:' + '[' + str( MinAgeN) + ' to 150] ' + ' AND AgeMax:' + '[0 to ' + str( MaxAgeN) + ']' # Work with StudySubject if fields[count].find('StudySubject') >= 0: if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (Type:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += ' Type:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += ' Type:' + terms[count].strip( ) + ') ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' Type:' + terms[count].strip() + ') ' else: MainQuery += ' Type:' + terms[count].strip() + ' ' # Work with Ethnicity if fields[count].find('Ethnicity') >= 0: #print terms[count] if terms[count].find('Hispanic') == 0: terms[count] = 'Hispanic' elif terms[count].find('Not Hispanic') == 0: terms[count] = 'NonLatino' else: terms[count] = 'Hispanic OR Demographics:NonLatino' if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (Demographics:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += ' Demographics:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += ' Demographics:' + terms[count].strip( ) + ') ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' Demographics:' + terms[count].strip() + ') ' else: MainQuery += ' Demographics:' + terms[count].strip() + ' ' # Work with Platform if fields[count].find('Platform') >= 0: if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (platform:"' + terms[count].strip( ) + '" ' + conj[count] else: MainQuery += ' platform:"' + terms[count].strip( ) + '" ' + conj[count] else: MainQuery += ' platform:"' + terms[count].strip( ) + '") ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' platform:"' + terms[count].strip() + '") ' else: MainQuery += ' platform:"' + terms[count].strip() + '" ' # Work with DataAnalysisMethod if fields[count].find('DataAnalysisMethod') >= 0: if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (platform:"' + terms[count].strip( ) + '" ' + conj[count] else: MainQuery += ' platform:"' + terms[count].strip( ) + '" ' + conj[count] else: MainQuery += ' platform:"' + terms[count].strip( ) + '") ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' platform:"' + terms[count].strip() + '") ' else: MainQuery += ' platform:"' + terms[count].strip() + '" ' # Work with Machine if fields[count].find('Machine') >= 0: if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (platform:"' + terms[count].strip( ) + '" ' + conj[count] else: MainQuery += ' platform:"' + terms[count].strip( ) + '" ' + conj[count] else: MainQuery += ' platform:"' + terms[count].strip( ) + '") ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' platform:"' + terms[count].strip() + '") ' else: MainQuery += ' platform:"' + terms[count].strip() + '" ' # Work with SenquencingTechnique if fields[count].find('SequencingTechnique') >= 0: if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (platform:"' + terms[count].strip( ) + '" ' + conj[count] else: MainQuery += ' platform:"' + terms[count].strip( ) + '" ' + conj[count] else: MainQuery += ' platform:"' + terms[count].strip( ) + '") ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' platform:"' + terms[count].strip() + '") ' else: MainQuery += ' platform:"' + terms[count].strip() + '" ' # Work with IRB if fields[count].find('IRB') >= 0: if terms[count] == 'Not Required': terms[count] = 'No' if count < len(terms) - 1: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += '(IRB:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += ' IRB:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += ' IRB:' + terms[count].strip( ) + ') ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' IRB:' + terms[count].strip() + ') ' else: MainQuery += ' IRB:' + terms[count].strip() + ' ' # Work with Consent Type if fields[count].find('Consent') >= 0: #print terms if terms[count] == 'Unrestricted': terms[count] = 'No' if terms[count] == 'Restricted': terms[count] = 'Restricted' if terms[count] == 'Unspecified': terms[count] = 'Unspecified' if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (ConsentType:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += ' ConsentType:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += ' ConsentType:' + terms[count].strip( ) + ') ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' ConsentType:' + terms[count].strip() + ') ' else: MainQuery += ' ConsentType:' + terms[count].strip() + ' ' #if count<len(terms)-1: # MainQuery+= ' ConsentType:' + terms[count].strip() + ' ' +conj[count] #else: # MainQuery+= ' ConsentType:' + terms[count].strip() # Work with Nationality if fields[count].find('Nationality') >= 0: mparser = MultifieldParser(["Demographics", "demographics"], schema) query = mparser.parse(unicode(iQuery)) # Combine all queries together #if count<=len(terms)-2: # MainQuery+=str(query) + ' ' +conj[count] + ' ' #else: # MainQuery+=str(query) if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' ( ' + str(query) + ' ' + conj[count] else: MainQuery += ' ' + str(query) + ' ' + conj[count] else: MainQuery += ' ' + str(query) + ') ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' ' + str(query) + ') ' else: MainQuery += ' ' + str(query) + ' ' # Work with Gender if fields[count].find('Sex') >= 0: if terms[count] == 'Male': #if count<len(terms)-1: # MainQuery+= ' MaleNum:[1 to] ' +conj[count] #else: # MainQuery+= ' MaleNum:[1 to] ' if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (MaleNum:[1 to] ' + conj[count] else: MainQuery += ' MaleNum:[1 to] ' + conj[count] else: MainQuery += ' MaleNum:[1 to]) ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' MaleNum:[1 to]) ' else: MainQuery += ' MaleNum:[1 to] ' if terms[count] == 'Female': #if count<len(terms)-1: # MainQuery+= ' FemaleNum:[1 to] ' +conj[count] #else: # MainQuery+= ' FemaleNum:[1 to] ' if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (FemaleNum:[1 to] ' + conj[count] else: MainQuery += ' FemaleNum:[1 to] ' + conj[count] else: MainQuery += ' FemaleNum:[1 to]) ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' FemaleNum:[1 to]) ' else: MainQuery += ' FemaleNum:[1 to] ' if terms[count] == 'Both': #if count<len(terms)-1: # MainQuery+= ' MaleNum:[1 to] AND FemaleNum:[1 to] ' +conj[count] #else: # MainQuery+= ' MaleNum:[1 to] AND FemaleNum:[1 to] ' if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (MaleNum:[1 to] AND FemaleNum:[1 to] ' + conj[ count] else: MainQuery += ' MaleNum:[1 to] AND FemaleNum:[1 to] ' + conj[ count] else: MainQuery += ' MaleNum:[1 to] AND FemaleNum:[1 to]) ' + conj[ count] else: if fields[count] == fields[count - 1]: MainQuery += ' MaleNum:[1 to] AND FemaleNum:[1 to]) ' else: MainQuery += ' MaleNum:[1 to] AND FemaleNum:[1 to] ' if terms[count] == 'Either': #if count<len(terms)-1: # MainQuery+= ' MaleNum:[1 to] OR FemaleNum:[1 to] ' +conj[count] #else: # MainQuery+= ' MaleNum:[1 to] OR FemaleNum:[1 to] ' if count <= len(terms) - 2: if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (MaleNum:[1 to] OR FemaleNum:[1 to] ' + conj[ count] else: MainQuery += ' MaleNum:[1 to] OR FemaleNum:[1 to] ' + conj[ count] else: MainQuery += ' MaleNum:[1 to] OR FemaleNum:[1 to]) ' + conj[ count] else: if fields[count] == fields[count - 1]: MainQuery += ' MaleNum:[1 to] OR FemaleNum:[1 to]) ' else: MainQuery += ' MaleNum:[1 to] OR FemaleNum:[1 to] ' # Work with Study Design if fields[count].find('StudyDesign') >= 0: if terms[count].find('Genome Wide Association Study') >= 0: terms[count] = 'gwas' if terms[count] == 'Case-Control Study': terms[count] = 'Case-Control' if terms[count] == 'Cross Sectional Study': terms[count] = 'Cross Sectional' if terms[count] == 'Double Blind Study': terms[count] = 'Double Blind' if terms[count] == 'Interventional Studies': terms[count] = 'Interventional' if terms[count] == 'Longitudinal Cohort Study': terms[count] = 'Longitudinal' if terms[count] == 'Mendelian Randomized': terms[count] = 'Mendelian' if terms[count] == 'Multicenter Study': terms[count] = 'Multicenter' if terms[count] == 'Nested Case Control Study': terms[count] = 'Nested Case Control' if terms[count] == 'Observational Studies': terms[count] = 'Observational' if terms[count] == 'Partial Factorial Randomized Trial': terms[count] = 'Partial Factorial Randomized' if terms[count] == 'Placebo Controlled Study': terms[count] = 'Placebo Controlled' if terms[count] == 'Population Based Study': terms[count] = 'Population Based Control' if terms[count] == 'Prospective Cohort Study': terms[count] = 'Prospective' if terms[count] == 'Quantitative Cross Sectional Study': terms[count] = 'Quantitative Cross Sectional' if terms[count] == 'Randomized Trial': terms[count] = 'Randomized' if terms[count] == 'Phase 1': terms[count] = 'Phase1' if terms[count] == 'Phase 2': terms[count] = 'Phase2' if terms[count] == 'Phase 3': terms[count] = 'Phase3' if count <= len(terms) - 2: #MainQuery+= ' Type:' + terms[count].strip() + ' ' +conj[count] if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (Type:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += ' Type:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += ' Type:' + terms[count].strip( ) + ') ' + conj[count] else: #MainQuery+= ' Type:' + terms[count].strip() + ' ' if fields[count] == fields[count - 1]: MainQuery += ' Type:' + terms[count].strip() + ') ' else: MainQuery += ' Type:' + terms[count].strip() + ' ' #print MainQuery # Work with Race if fields[count].find('Race') >= 0: #if terms[count]=='White': # terms[count]='white OR Demographics:cacausian ' if terms[count] == 'Mixed Race': terms[count] = 'Multiple' # Combine all queries together if count <= len(terms) - 2: #MainQuery+= ' Demographics:' + terms[count].strip() + ' ' +conj[count] if fields[count] == fields[count + 1]: if fields[count - 1] != fields[count]: MainQuery += ' (Demographics:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += ' Demographics:' + terms[count].strip( ) + ' ' + conj[count] else: MainQuery += 'Demographics:' + terms[count].strip( ) + ') ' + conj[count] else: if fields[count] == fields[count - 1]: MainQuery += ' Demographics:' + terms[count].strip() + ') ' else: MainQuery += ' Demographics:' + terms[count].strip() + ' ' #else: # MainQuery+= ' Demographics:' + terms[count].strip() #print MainQuery # ====================================== # Work with Study if fields[count].strip() == 'Study': mparser = MultifieldParser(["IDName", "path", "title", "desc"], schema) query = mparser.parse(unicode(iQuery)) # Combine all queries together if count <= len(terms) - 2: MainQuery += str(query) + ' ' + conj[count] + ' ' else: MainQuery += str(query) # Work with Study Name if fields[count].find('Study Name') >= 0: mparser = MultifieldParser(["title"], schema) query = mparser.parse(unicode(iQuery)) # Combine all queries together if count <= len(terms) - 2: MainQuery += str(query) + ' ' + conj[count] + ' ' else: MainQuery += str(query) # Work with Study ID if fields[count].find('Study ID') >= 0: mparser = MultifieldParser(["IDName"], schema) query = mparser.parse(unicode(iQuery)) # Combine all queries together if count <= len(terms) - 2: MainQuery += str(query) + ' ' + conj[count] + ' ' else: MainQuery += str(query) # ====================================== # Work with Variable if fields[count] == 'Variable': mparser = MultifieldParser( ["phenID", "phenName", "phenType", "phenDesc"], schema) query = mparser.parse(unicode(iQuery)) if count < len(terms) - 1: MainQuery += str(query) + ' ' + conj[count] + ' ' else: MainQuery += str(query) # Work with Variable Description if fields[count].find('Variable Desc') >= 0: mparser = MultifieldParser(["phenDesc"], schema) query = mparser.parse(unicode(iQuery)) # Combine all queries together if count <= len(terms) - 2: MainQuery += str(query) + ' ' + conj[count] + ' ' else: MainQuery += str(query) # Work with Variable Name if fields[count].find('Variable Name') >= 0: mparser = MultifieldParser(["phenName"], schema) query = mparser.parse(unicode(iQuery)) # Combine all queries together if count <= len(terms) - 2: MainQuery += str(query) + ' ' + conj[count] + ' ' else: MainQuery += str(query) # Work with Variable ID if fields[count].find('Variable ID') >= 0: mparser = MultifieldParser(["phenID"], schema) query = mparser.parse(unicode(iQuery)) # Combine all queries together if count <= len(terms) - 2: MainQuery += str(query) + ' ' + conj[count] + ' ' else: MainQuery += str(query) # ====================================== # Work with All Fields if fields[count].find('All Fields') >= 0: # Added on May 21, 2013 query = iQuery if concept == True: query_temp = QueryAnalys(query, concept) else: query_temp = query # Combine all queries together #if count<=len(terms)-2: # MainQuery+=str(query) + ' ' +conj[count] + ' ' #else: # MainQuery+=str(query) if count <= len(terms) - 2: MainQuery += query_temp + ' ' + conj[count] + ' ' else: MainQuery += query_temp + ' ' count += 1 return (MainQuery, terms)
def Map(Qinput,concept): # Check if not correct syntax # IF THERE ARE NOT FIELDS if Qinput.find('[')==-1 or Qinput.find(']')==-1: return ('top',['top']) # ========== # Replace 'and','or' Qinput=Qinput.replace(' and',' ') Qinput=Qinput.replace(' or',' ') # Replace 'AND', 'OR' if Qinput.strip().split()[-1]=='AND' or Qinput.strip().split()[-1]=='OR': Qinput = ' '.join(Qinput.strip().split()[0:-1]) #print Qinput map1 = Qinput map1 = map1.replace('[','|') map1 = map1.replace(']','|') map2 = map1.split('|') #print map2 #print len(map2) terms=[] fields=[] conj = [] for index1 in range(0,len(map2),2): if len(map2[index1].strip())>0: text = map2[index1] if text.find('AND')>=0: conj.append('AND') text_str = text.replace('AND','') elif text.find('OR')>=0: conj.append('OR') text_str = text.replace('OR','') else: text_str = text terms.append(text_str.strip()) if index1+1<len(map2): fields.append(map2[index1+1]) #print terms #print fields #print conj # Fix parenthesis, first remove all parenthesis for i in range(0,len(terms)): terms[i]=terms[i].replace("(",'').strip() terms[i]=terms[i].replace(")",'').strip() if terms[-1]=='': terms=terms[0:-1] #print "==========" #print terms #print fields #print conj # ============================================== # Convert to Whoosh query # ============================================== schema = Schema(IDName=TEXT(stored=True),path=ID(stored=True), title=TEXT(stored=True), desc=TEXT(stored=True), Type=TEXT(stored=True), cohort=NUMERIC(stored=True), inexclude=TEXT(stored=True), platform=TEXT(stored=True), MESHterm=TEXT(stored=True), history=TEXT(stored=True), attributes=TEXT(stored=True), topic=TEXT(stored=True),disease=TEXT(stored=True),measurement=TEXT(stored=True),demographics=TEXT(stored=True),geography=TEXT(stored=True),age=TEXT(stored=True),gender=TEXT(stored=True),category=TEXT(stored=True),IRB=TEXT(stored=True),ConsentType=TEXT(stored=True),phen=TEXT(stored=True),phenID=TEXT(stored=True),phenDesc=TEXT(stored=True),phenName=TEXT(stored=True),phenCUI=TEXT(stored=True),phenMap=TEXT(stored=True), AgeMin=NUMERIC(stored=True), AgeMax=NUMERIC(stored=True), MaleNum=NUMERIC(stored=True), FemaleNum=NUMERIC(stored=True), OtherGenderNum=NUMERIC(stored=True), UnknownGenderNum=NUMERIC(stored=True), Demographics=TEXT(stored=True), phenType=TEXT(stored=True)) ## Convert into Advanced Search MainQuery='' count = 0 for iQuery in terms: # Default MParser mparser = MultifieldParser(["IDName","path","title","desc","Type","cohort","platform","topic","disease","measurement","demographics","geography","age","gender","category","phenID","phenName","phenDesc","phenCUI","phenMap","Demographics","phenType"], schema) # Work with SampleSize if fields[count].find('SampleSize')>=0: MinSize = terms[count].split(',')[0].strip('(') MaxSize = terms[count].split(',')[1].strip(')') if MinSize.find('*')>=0: MinSizeN=0 else: MinSizeN=int(MinSize) if MaxSize.find('*')>=0: MaxSizeN=100000 else: MaxSizeN=int(MaxSize) if count<len(terms)-1: MainQuery+= 'cohort:' + '[' + str(MinSizeN) + ' to ' + str(MaxSizeN) + '] ' + conj[count] + ' ' else: MainQuery+= 'cohort:' + '[' + str(MinSizeN) + ' to ' + str(MaxSizeN) + ']' # Work with Age if fields[count]=='Age': #print terms MinAge = terms[count].strip().split(',')[0].strip('(') MaxAge = terms[count].strip().split(',')[1].strip(')') if MinAge.find('*')>=0: MinAgeN=0 else: MinAgeN=int(MinAge) if MaxAge.find('*')>=0: MaxAgeN=150 else: MaxAgeN=int(MaxAge) if count<len(terms)-1: MainQuery+= 'AgeMin:' + '[' + str(MinAgeN) + ' to 150] ' + ' AND AgeMax:' + '[0 to ' + str(MaxAgeN) + '] ' + conj[count] + ' ' else: MainQuery+= 'AgeMin:' + '[' + str(MinAgeN) + ' to 150] ' + ' AND AgeMax:' + '[0 to ' + str(MaxAgeN) + ']' # Work with StudySubject if fields[count].find('StudySubject')>=0: if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+= ' (Type:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+=' Type:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+=' Type:' + terms[count].strip() + ') ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+= ' Type:' + terms[count].strip() + ') ' else: MainQuery+=' Type:' + terms[count].strip() + ' ' # Work with Ethnicity if fields[count].find('Ethnicity')>=0: #print terms[count] if terms[count].find('Hispanic')==0: terms[count]='Hispanic' elif terms[count].find('Not Hispanic')==0: terms[count]='NonLatino' else: terms[count]='Hispanic OR Demographics:NonLatino' if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+= ' (Demographics:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+=' Demographics:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+=' Demographics:' + terms[count].strip() + ') ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+= ' Demographics:' + terms[count].strip() + ') ' else: MainQuery+=' Demographics:' + terms[count].strip() + ' ' # Work with Platform if fields[count].find('Platform')>=0: if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+= ' (platform:"' + terms[count].strip() + '" ' +conj[count] else: MainQuery+=' platform:"' + terms[count].strip() + '" ' +conj[count] else: MainQuery+=' platform:"' + terms[count].strip() + '") ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+= ' platform:"' + terms[count].strip() + '") ' else: MainQuery+=' platform:"' + terms[count].strip() + '" ' # Work with DataAnalysisMethod if fields[count].find('DataAnalysisMethod')>=0: if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+= ' (platform:"' + terms[count].strip() + '" ' +conj[count] else: MainQuery+=' platform:"' + terms[count].strip() + '" ' +conj[count] else: MainQuery+=' platform:"' + terms[count].strip() + '") ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+= ' platform:"' + terms[count].strip() + '") ' else: MainQuery+=' platform:"' + terms[count].strip() + '" ' # Work with Machine if fields[count].find('Machine')>=0: if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+= ' (platform:"' + terms[count].strip() + '" ' +conj[count] else: MainQuery+=' platform:"' + terms[count].strip() + '" ' +conj[count] else: MainQuery+=' platform:"' + terms[count].strip() + '") ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+= ' platform:"' + terms[count].strip() + '") ' else: MainQuery+=' platform:"' + terms[count].strip() + '" ' # Work with SenquencingTechnique if fields[count].find('SequencingTechnique')>=0: if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+= ' (platform:"' + terms[count].strip() + '" ' +conj[count] else: MainQuery+=' platform:"' + terms[count].strip() + '" ' +conj[count] else: MainQuery+=' platform:"' + terms[count].strip() + '") ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+= ' platform:"' + terms[count].strip() + '") ' else: MainQuery+=' platform:"' + terms[count].strip() + '" ' # Work with IRB if fields[count].find('IRB')>=0: if terms[count]=='Not Required': terms[count]='No' if count<len(terms)-1: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+= '(IRB:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+= ' IRB:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+= ' IRB:' + terms[count].strip() + ') ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+= ' IRB:' + terms[count].strip() + ') ' else: MainQuery+= ' IRB:' + terms[count].strip() + ' ' # Work with Consent Type if fields[count].find('Consent')>=0: #print terms if terms[count]=='Unrestricted': terms[count]='No' if terms[count]=='Restricted': terms[count]='Restricted' if terms[count]=='Unspecified': terms[count]='Unspecified' if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+= ' (ConsentType:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+=' ConsentType:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+=' ConsentType:' + terms[count].strip() + ') ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+= ' ConsentType:' + terms[count].strip() + ') ' else: MainQuery+=' ConsentType:' + terms[count].strip() + ' ' #if count<len(terms)-1: # MainQuery+= ' ConsentType:' + terms[count].strip() + ' ' +conj[count] #else: # MainQuery+= ' ConsentType:' + terms[count].strip() # Work with Nationality if fields[count].find('Nationality')>=0: mparser = MultifieldParser(["Demographics","demographics"], schema) query= mparser.parse(unicode(iQuery)) # Combine all queries together #if count<=len(terms)-2: # MainQuery+=str(query) + ' ' +conj[count] + ' ' #else: # MainQuery+=str(query) if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+= ' ( ' + str(query) + ' ' +conj[count] else: MainQuery+= ' ' + str(query) + ' ' +conj[count] else: MainQuery+=' ' + str(query) + ') ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+= ' ' + str(query) + ') ' else: MainQuery+=' ' + str(query) + ' ' # Work with Gender if fields[count].find('Sex')>=0: if terms[count]=='Male': #if count<len(terms)-1: # MainQuery+= ' MaleNum:[1 to] ' +conj[count] #else: # MainQuery+= ' MaleNum:[1 to] ' if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+=' (MaleNum:[1 to] ' +conj[count] else: MainQuery+=' MaleNum:[1 to] ' +conj[count] else: MainQuery+=' MaleNum:[1 to]) ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+=' MaleNum:[1 to]) ' else: MainQuery+=' MaleNum:[1 to] ' if terms[count]=='Female': #if count<len(terms)-1: # MainQuery+= ' FemaleNum:[1 to] ' +conj[count] #else: # MainQuery+= ' FemaleNum:[1 to] ' if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+=' (FemaleNum:[1 to] ' +conj[count] else: MainQuery+=' FemaleNum:[1 to] ' +conj[count] else: MainQuery+=' FemaleNum:[1 to]) ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+=' FemaleNum:[1 to]) ' else: MainQuery+=' FemaleNum:[1 to] ' if terms[count]=='Both': #if count<len(terms)-1: # MainQuery+= ' MaleNum:[1 to] AND FemaleNum:[1 to] ' +conj[count] #else: # MainQuery+= ' MaleNum:[1 to] AND FemaleNum:[1 to] ' if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+=' (MaleNum:[1 to] AND FemaleNum:[1 to] ' +conj[count] else: MainQuery+=' MaleNum:[1 to] AND FemaleNum:[1 to] ' +conj[count] else: MainQuery+=' MaleNum:[1 to] AND FemaleNum:[1 to]) ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+=' MaleNum:[1 to] AND FemaleNum:[1 to]) ' else: MainQuery+=' MaleNum:[1 to] AND FemaleNum:[1 to] ' if terms[count]=='Either': #if count<len(terms)-1: # MainQuery+= ' MaleNum:[1 to] OR FemaleNum:[1 to] ' +conj[count] #else: # MainQuery+= ' MaleNum:[1 to] OR FemaleNum:[1 to] ' if count<=len(terms)-2: if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+=' (MaleNum:[1 to] OR FemaleNum:[1 to] ' +conj[count] else: MainQuery+=' MaleNum:[1 to] OR FemaleNum:[1 to] ' +conj[count] else: MainQuery+=' MaleNum:[1 to] OR FemaleNum:[1 to]) ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+=' MaleNum:[1 to] OR FemaleNum:[1 to]) ' else: MainQuery+=' MaleNum:[1 to] OR FemaleNum:[1 to] ' # Work with Study Design if fields[count].find('StudyDesign')>=0: if terms[count].find('Genome Wide Association Study')>=0: terms[count]= 'gwas' if terms[count]=='Case-Control Study': terms[count]= 'Case-Control' if terms[count]=='Cross Sectional Study': terms[count]= 'Cross Sectional' if terms[count]=='Double Blind Study': terms[count]= 'Double Blind' if terms[count]=='Interventional Studies': terms[count]='Interventional' if terms[count]=='Longitudinal Cohort Study': terms[count]='Longitudinal' if terms[count]=='Mendelian Randomized': terms[count]='Mendelian' if terms[count]=='Multicenter Study': terms[count]='Multicenter' if terms[count]=='Nested Case Control Study': terms[count]='Nested Case Control' if terms[count]=='Observational Studies': terms[count]='Observational' if terms[count]=='Partial Factorial Randomized Trial': terms[count]='Partial Factorial Randomized' if terms[count]=='Placebo Controlled Study': terms[count]='Placebo Controlled' if terms[count]=='Population Based Study': terms[count]='Population Based Control' if terms[count]=='Prospective Cohort Study': terms[count]='Prospective' if terms[count]=='Quantitative Cross Sectional Study': terms[count]='Quantitative Cross Sectional' if terms[count]=='Randomized Trial': terms[count]='Randomized' if terms[count]=='Phase 1': terms[count]='Phase1' if terms[count]=='Phase 2': terms[count]='Phase2' if terms[count]=='Phase 3': terms[count]='Phase3' if count<=len(terms)-2: #MainQuery+= ' Type:' + terms[count].strip() + ' ' +conj[count] if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+= ' (Type:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+=' Type:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+=' Type:' + terms[count].strip() + ') ' +conj[count] else: #MainQuery+= ' Type:' + terms[count].strip() + ' ' if fields[count]==fields[count-1]: MainQuery+= ' Type:' + terms[count].strip() + ') ' else: MainQuery+=' Type:' + terms[count].strip() + ' ' #print MainQuery # Work with Race if fields[count].find('Race')>=0: #if terms[count]=='White': # terms[count]='white OR Demographics:cacausian ' if terms[count]=='Mixed Race': terms[count]='Multiple' # Combine all queries together if count<=len(terms)-2: #MainQuery+= ' Demographics:' + terms[count].strip() + ' ' +conj[count] if fields[count]==fields[count+1]: if fields[count-1]!=fields[count]: MainQuery+= ' (Demographics:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+=' Demographics:' + terms[count].strip() + ' ' +conj[count] else: MainQuery+='Demographics:' + terms[count].strip() + ') ' +conj[count] else: if fields[count]==fields[count-1]: MainQuery+= ' Demographics:' + terms[count].strip() + ') ' else: MainQuery+=' Demographics:' + terms[count].strip() + ' ' #else: # MainQuery+= ' Demographics:' + terms[count].strip() #print MainQuery # ====================================== # Work with Study if fields[count].strip()=='Study': mparser = MultifieldParser(["IDName","path","title","desc"], schema) query= mparser.parse(unicode(iQuery)) # Combine all queries together if count<=len(terms)-2: MainQuery+=str(query) + ' ' +conj[count] + ' ' else: MainQuery+=str(query) # Work with Study Name if fields[count].find('Study Name')>=0: mparser = MultifieldParser(["title"], schema) query= mparser.parse(unicode(iQuery)) # Combine all queries together if count<=len(terms)-2: MainQuery+=str(query) + ' ' +conj[count] + ' ' else: MainQuery+=str(query) # Work with Study ID if fields[count].find('Study ID')>=0: mparser = MultifieldParser(["IDName"], schema) query= mparser.parse(unicode(iQuery)) # Combine all queries together if count<=len(terms)-2: MainQuery+=str(query) + ' ' +conj[count] + ' ' else: MainQuery+=str(query) # ====================================== # Work with Variable if fields[count]=='Variable': mparser = MultifieldParser(["phenID","phenName","phenType","phenDesc"], schema) query= mparser.parse(unicode(iQuery)) if count<len(terms)-1: MainQuery+= str(query) + ' ' +conj[count] + ' ' else: MainQuery+= str(query) # Work with Variable Description if fields[count].find('Variable Desc')>=0: mparser = MultifieldParser(["phenDesc"], schema) query= mparser.parse(unicode(iQuery)) # Combine all queries together if count<=len(terms)-2: MainQuery+=str(query) + ' ' +conj[count] + ' ' else: MainQuery+=str(query) # Work with Variable Name if fields[count].find('Variable Name')>=0: mparser = MultifieldParser(["phenName"], schema) query= mparser.parse(unicode(iQuery)) # Combine all queries together if count<=len(terms)-2: MainQuery+=str(query) + ' ' +conj[count] + ' ' else: MainQuery+=str(query) # Work with Variable ID if fields[count].find('Variable ID')>=0: mparser = MultifieldParser(["phenID"], schema) query= mparser.parse(unicode(iQuery)) # Combine all queries together if count<=len(terms)-2: MainQuery+=str(query) + ' ' +conj[count] + ' ' else: MainQuery+=str(query) # ====================================== # Work with All Fields if fields[count].find('All Fields')>=0: # Added on May 21, 2013 query = iQuery if concept==True: query_temp = QueryAnalys(query, concept) else: query_temp = query # Combine all queries together #if count<=len(terms)-2: # MainQuery+=str(query) + ' ' +conj[count] + ' ' #else: # MainQuery+=str(query) if count<=len(terms)-2: MainQuery+=query_temp + ' ' + conj[count] + ' ' else: MainQuery+=query_temp + ' ' count+=1 return (MainQuery,terms)