def test_one_case(self, input_query, expected): actual = transform_search(input_query) message = u"" message += "\n input: " message += input_query message += "\n expected: " message += expected message += "\n got: " message += unicode(actual) self.assertEqual(expected, actual, message) logging.info("Correct: %s -> %s", input_query, expected)
def test_one_case(self, input_query, expected): actual = transform_search(input_query) message = u"" message += "\n input: " message += input_query message += "\n expected: " message += expected message += "\n got: " message += unicode(actual) self.assertEqual(expected, actual, message) logger.info("Correct: %s -> %s", input_query, expected)
def prepare_sqs_from_search_params(params, sqs=None): # We usually search description twice, so we need a higher boost on # title to overcome that. boost_value = 10 title = params.get('q') location = params.get('location') moc = params.get('moc') moc_id = params.get('moc_id') company = params.get('company') exact_title = bool(params.get('exact_title')) if sqs is None: sqs = DESearchQuerySet() # The Haystack API does not allow for boosting terms in individual # fields. In this case we want to boost the term represented by # the variable 'title' ONLY when it appears in the `title` field in # the search index. # # To get around this I instead construct the string according to the # format specified for boosting a term in a specific field by the # Solr documentation: # 'q=title:(Accountant)^2' # By using parens instead of quotes, Solr can parse more complex title # searches. # # I then pass that string to an SQ object and proceed as normal. # This allows us to ensure that titles that match a query exactly # will appear higher in the results list than results that simply # have the query term in the text of the document. cleaned_params = dict([(val, _clean(val)) for val in [title, location, moc, moc_id, company] if val]) q_val = cleaned_params.get(title) moc_val = cleaned_params.get(moc) moc_id_val = cleaned_params.get(moc_id) loc_val = cleaned_params.get(location) # If 'q' has a value in the querystring, filter our results by it in # two places: 1. In the `text` field (full document) 2. In the `title` # field, after it has been boosted by a factor of 0.5. We want to make # sure that someone searching for a title like "engineer" sees jobs # that match on job title first, before results that "only" match on # random words in the full text of the document. if q_val: # Escape dashes surrounded by spaces, since they probably aren't # intended as negation. # Retail -Sales will search for Retail excluding Sales # Retail - Sales will search for 'Retail - Sales' title = "(%s)" % transform_search(title.replace(' - ', ' \\- ')) tb = u"({t})^{b}".format(t=title, b=boost_value) if exact_title: sqs = sqs.filter(title_exact__exact=title) else: # We have to query on description here so that highlighting # matches the exact term and not a stem. sqs = sqs.filter(SQ(content=Raw("((%s))^1" % title)) | SQ(title=Raw(tb)) | SQ(description=Raw(title))).highlight() # If there is a value in the `location` parameter, add filters for it # in each location-y field in the index. If the `exact` parameter is # `true` in the querystring, search locations for EXACT matches only; # the rationale being that if a user clicks on "San Diego" he probably # doesn't give a crap about "San Francisco" or "San Jose" results. if loc_val: sqs = sqs.filter(full_loc=loc_val) if company: sqs = sqs.filter(company_exact__exact=company) if moc_val: # Before we can search for MOC, we have to find out if the SeoSite # has specified any custom MOC-Onet mappings. If they do, we'll search # on the jobs mapped_moc* fields prefix = 'mapped_' if settings.SITE_BUIDS else '' if moc_id_val: moc_filt = SQ(**{'%smocid' % prefix: moc_id_val}) else: moc_filt = SQ(SQ(**{'%smoc' % prefix: moc_val}) | SQ(**{'%smoc_slab' % prefix: moc_val})) sqs = sqs.filter(moc_filt) return sqs.highlight()
def prepare_sqs_from_search_params(params, sqs=None): # We usually search description twice, so we need a higher boost on # title to overcome that. boost_value = 10 title = params.get('q') location = params.get('location') moc = params.get('moc') moc_id = params.get('moc_id') company = params.get('company') exact_title = bool(params.get('exact_title')) if sqs is None: sqs = DESearchQuerySet() # The Haystack API does not allow for boosting terms in individual # fields. In this case we want to boost the term represented by # the variable 'title' ONLY when it appears in the `title` field in # the search index. # # To get around this I instead construct the string according to the # format specified for boosting a term in a specific field by the # Solr documentation: # 'q=title:(Accountant)^2' # By using parens instead of quotes, Solr can parse more complex title # searches. # # I then pass that string to an SQ object and proceed as normal. # This allows us to ensure that titles that match a query exactly # will appear higher in the results list than results that simply # have the query term in the text of the document. cleaned_params = dict([(val, _clean(val)) for val in [title, location, moc, moc_id, company] if val]) q_val = cleaned_params.get(title) moc_val = cleaned_params.get(moc) moc_id_val = cleaned_params.get(moc_id) loc_val = cleaned_params.get(location) # If 'q' has a value in the querystring, filter our results by it in # two places: 1. In the `text` field (full document) 2. In the `title` # field, after it has been boosted by a factor of 0.5. We want to make # sure that someone searching for a title like "engineer" sees jobs # that match on job title first, before results that "only" match on # random words in the full text of the document. if q_val: # Escape dashes surrounded by spaces, since they probably aren't # intended as negation. # Retail -Sales will search for Retail excluding Sales # Retail - Sales will search for 'Retail - Sales' title = "(%s)" % transform_search(title.replace(' - ', ' \\- ')) tb = u"({t})^{b}".format(t=title, b=boost_value) if exact_title: sqs = sqs.filter(title_exact__exact=title) else: # We have to query on description here so that highlighting # matches the exact term and not a stem. sqs = sqs.filter( SQ(content=Raw("((%s))^1" % title)) | SQ(title=Raw(tb)) | SQ(description=Raw(title))).highlight() # If there is a value in the `location` parameter, add filters for it # in each location-y field in the index. If the `exact` parameter is # `true` in the querystring, search locations for EXACT matches only; # the rationale being that if a user clicks on "San Diego" he probably # doesn't give a crap about "San Francisco" or "San Jose" results. if loc_val: sqs = sqs.filter(full_loc=loc_val) if company: sqs = sqs.filter(company_exact__exact=company) if moc_val: # Before we can search for MOC, we have to find out if the SeoSite # has specified any custom MOC-Onet mappings. If they do, we'll search # on the jobs mapped_moc* fields prefix = 'mapped_' if settings.SITE_BUIDS else '' if moc_id_val: moc_filt = SQ(**{'%smocid' % prefix: moc_id_val}) else: moc_filt = SQ( SQ(**{'%smoc' % prefix: moc_val}) | SQ(**{'%smoc_slab' % prefix: moc_val})) sqs = sqs.filter(moc_filt) return sqs.highlight()