def generate_name_queries(name): """ We only execute this if a user has given us a name with a wildcard in it. Otherwise, the wildcard/reducer strategy is used. """ if "*" in name: yield GeneratedQuery( description=f'Name matches "{name}"', request_input=ListPersonsInput(display_name=name), ) if not name.startswith("*"): yield GeneratedQuery( description=f'Name includes "{name}"', request_input=ListPersonsInput(display_name=f"*{name}"), )
def generate_box_number_queries( box_number: str) -> Tuple[str, ListPersonsInput]: # PWS only ever returns "begins with" results for mailstop. yield GeneratedQuery( description=f'Mailstop begins with "{box_number}"', request_input=ListPersonsInput(mail_stop=box_number), ) # All (most?) UW mail stops start with '35,' and so it is considered shorthand to omit # them at times. To be sure we account for shorthand input, we will also always try # adding '35' to every query. alt_number = f"35{box_number}" yield GeneratedQuery( description=f'Mailstop begins with "35{alt_number}"', request_input=ListPersonsInput(mail_stop=alt_number), )
def generate_department_queries( self, department: str, include_alt_queries: bool = True) -> Tuple[str, ListPersonsInput]: """ Generates queries for department. :param department: The department query. :param include_alt_queries: If set to True, will expand the search beyond the user input in an attempt to return all relevant results. Currently, this will simply sub "&" for "and" and vice versa, so that users don't need to keep track of this themselves. :return: """ yield GeneratedQuery( description=f'Department matches "{department}"', request_input=ListPersonsInput(department=department), ) if ( "*" in department ): # If the user provides a wildcard, we'll let PWS do the rest of the work. return yield GeneratedQuery( description=f'Department begins with "{department}"', request_input=ListPersonsInput( department=ArgFmt.begins_with(department)), ) yield GeneratedQuery( description=f'Department contains "{department}"', request_input=ListPersonsInput( department=ArgFmt.contains(department)), ) if not include_alt_queries: return # Add spaces to account for words with 'and' in them. if " and " in department: department = department.replace(" and ", " & ") elif "&" in department: department = department.replace("&", " and ") else: return # Don't run additional queries if an 'and' isn't included in the q. # Remove any extra whitespace between words. department = " ".join(filter(bool, department.split())) yield from self.generate_department_queries(department, include_alt_queries=False)
def generate_email_queries(partial: str) -> Tuple[str, ListPersonsInput]: # If a user has supplied a full, valid email address, we will search only for the complete # listing as an 'is' operator. try: username, _ = validate_email(partial) # Decide whether we want to help the user by also including an alternate # domain in their query. alternate = None if partial.endswith("@uw.edu"): alternate = "washington.edu" elif partial.endswith("@washington.edu"): alternate = "uw.edu" yield GeneratedQuery( description=f'Email is "{partial}"', request_input=ListPersonsInput(email=partial), ) if alternate: alternate_email = f"{username}@{alternate}" yield GeneratedQuery( description=f'Email is "{alternate_email}"', request_input=ListPersonsInput(email=alternate_email), ) return except EmailError: pass # If the user includes a partial with '@' or '*', we assume they # just want to run this specific query, so will not forcibly include # any additional results. if "@" in partial or "*" in partial: # If a user types in a full address yield GeneratedQuery( description=f'Email matches "{partial}"', request_input=ListPersonsInput(email=partial), ) else: # If the user has just supplied 'foo123', we will search for a couple of # combinations. yield GeneratedQuery( description=f'Email begins with "{partial}"', request_input=ListPersonsInput( email=WildcardFormat.begins_with(partial)), ) yield GeneratedQuery( description=f'Email contains "{partial}"', request_input=ListPersonsInput( email=WildcardFormat.contains(partial)), )
def test_list_persons(self): request_input = ListPersonsInput(display_name="test") self.mock_send_request.return_value = self.mock_people.as_search_output( ) expected_url = f"{self.client.pws_url}/person" self.client.list_persons(request_input) self.mock_send_request.assert_called_once() assert self.mock_send_request.call_args[0][0] == expected_url
def _set_up_multipage_search(self): page_one = self.mock_people.as_search_output(next_=ListPersonsInput( href="https://foo/page-2")) page_two = self.mock_send_request.return_value self.mock_send_request.return_value = page_one mock_next_page = mock.patch.object(self.pws_client, "get_explicit_href").start() mock_next_page.return_value = ListPersonsOutput.parse_obj(page_two)
def as_search_output(*people: PersonOutput, next_: Optional[str] = None) -> Dict: result = ListPersonsOutput( persons=list(people), current=ListPersonsInput(), # Not used page_size=len(people), page_start=1, total_count=len(people), next=next_, ).dict(by_alias=True) return result
def generate_sanitized_phone_queries( phone: str) -> Tuple[str, ListPersonsInput]: """ Attempts to match the phone exactly as provided; if the phone number was very long, we'll also try to match only the last 10 digits. Right now, PWS only supports phone number searches, and won't return results for pagers, faxes, etc. This is a regression from the previous directory product that allowed pager searches. :param phone: The phone number (digits only) """ yield GeneratedQuery( description=f'Phone matches "{phone}"', request_input=ListPersonsInput(phone_number=phone), ) if len(phone) > 10: # XXX YYY-ZZZZ no_country_code = phone[-10:] yield GeneratedQuery( description=f'Phone matches "{no_country_code}"', request_input=ListPersonsInput(phone_number=no_country_code), )
def search_directory_experimental( self, request_input: SearchDirectoryInput) -> SearchDirectoryOutput: """ This new query function improves performance significantly, but is still being tested for accuracy and edge cases. This only executes one query to PWS per population requested. The query includes wildcards for each token the user input. For example: "buffy anne summers" would become a query for display names matching: "*buffy* *summers*" In this example, PWS would return any of the following results: - buffy anne summers - buffy "the vampire slayer" summers - ubuffya alsummersia - buffy-anne summers - buffy anne summers-finn After the results have been filtered, they are sent to the NameSearchResultReducer, which is responsible for sorting these names into appropriate buckets by relevance. """ timer_context = { "query": request_input.dict( exclude_none=True, by_alias=True, exclude_properties=True, exclude_unset=True, ), "statistics": {}, } timer = Timer("search_directory", context=timer_context).start() statistics = ListPersonsRequestStatistics( num_queries_generated=1, num_user_search_tokens=len(request_input.name.split()), ) query = " ".join(f"*{token}*" for token in request_input.name.split()) results = {} for population in request_input.requested_populations: pws_output: ListPersonsOutput = self._pws.list_persons( ListPersonsInput( display_name=query, employee_affiliation_state=(AffiliationState.current if population == "employees" else None), student_affiliation_state=(AffiliationState.current if population == "students" else None), ), populations=request_input.requested_populations, ) statistics.aggregate(pws_output.request_statistics) results = self.reducer.reduce_output(pws_output, request_input.name, results) while pws_output.next: pws_output = self._pws.get_explicit_href( pws_output.next.href, output_type=ListPersonsOutput) results = self.reducer.reduce_output(pws_output, request_input.name, results) statistics.aggregate(pws_output.request_statistics) statistics.num_duplicates_found = self.reducer.duplicate_hit_count timer.context["statistics"] = statistics.dict(by_alias=True) timer.stop(emit_log=True) return SearchDirectoryOutput(scenarios=[ DirectoryQueryScenarioOutput( description=b.description, populations=self.pws_translator.translate_bucket(b), ) for b in results.values() ])