def generate_name_queries(name):
     """
     We only execute this if a user has given  us a name
     with a wildcard in it. Otherwise, the wildcard/reducer strategy is used.
     """
     if "*" in name:
         yield GeneratedQuery(
             description=f'Name matches "{name}"',
             request_input=ListPersonsInput(display_name=name),
         )
     if not name.startswith("*"):
         yield GeneratedQuery(
             description=f'Name includes "{name}"',
             request_input=ListPersonsInput(display_name=f"*{name}"),
         )
 def generate_box_number_queries(
         box_number: str) -> Tuple[str, ListPersonsInput]:
     # PWS only ever returns "begins with" results for mailstop.
     yield GeneratedQuery(
         description=f'Mailstop begins with "{box_number}"',
         request_input=ListPersonsInput(mail_stop=box_number),
     )
     # All (most?) UW mail stops start with '35,' and so it is considered shorthand to omit
     # them at times. To be sure we account for shorthand input, we will also always try
     # adding '35' to every query.
     alt_number = f"35{box_number}"
     yield GeneratedQuery(
         description=f'Mailstop begins with "35{alt_number}"',
         request_input=ListPersonsInput(mail_stop=alt_number),
     )
    def generate_department_queries(
            self,
            department: str,
            include_alt_queries: bool = True) -> Tuple[str, ListPersonsInput]:
        """
        Generates queries for department.
        :param department:  The department query.
        :param include_alt_queries:  If set to True, will expand the search beyond the user input in an attempt to
        return all relevant results. Currently, this will simply sub "&" for "and" and vice versa, so that users
        don't need to keep track of this themselves.
        :return:
        """
        yield GeneratedQuery(
            description=f'Department matches "{department}"',
            request_input=ListPersonsInput(department=department),
        )

        if (
                "*" in department
        ):  # If the user provides a wildcard, we'll let PWS do the rest of the work.
            return

        yield GeneratedQuery(
            description=f'Department begins with "{department}"',
            request_input=ListPersonsInput(
                department=ArgFmt.begins_with(department)),
        )
        yield GeneratedQuery(
            description=f'Department contains "{department}"',
            request_input=ListPersonsInput(
                department=ArgFmt.contains(department)),
        )

        if not include_alt_queries:
            return

        # Add spaces to account for words with 'and' in them.
        if " and " in department:
            department = department.replace(" and ", " & ")
        elif "&" in department:
            department = department.replace("&", " and ")
        else:
            return  # Don't run additional queries if an 'and' isn't included in the q.

        # Remove any extra whitespace between words.
        department = " ".join(filter(bool, department.split()))
        yield from self.generate_department_queries(department,
                                                    include_alt_queries=False)
    def generate_email_queries(partial: str) -> Tuple[str, ListPersonsInput]:
        # If a user has supplied a full, valid email address, we will search only for the complete
        # listing as an 'is' operator.
        try:
            username, _ = validate_email(partial)
            # Decide whether we want to help the user by also including an alternate
            # domain in their query.
            alternate = None
            if partial.endswith("@uw.edu"):
                alternate = "washington.edu"
            elif partial.endswith("@washington.edu"):
                alternate = "uw.edu"
            yield GeneratedQuery(
                description=f'Email is "{partial}"',
                request_input=ListPersonsInput(email=partial),
            )
            if alternate:
                alternate_email = f"{username}@{alternate}"

                yield GeneratedQuery(
                    description=f'Email is "{alternate_email}"',
                    request_input=ListPersonsInput(email=alternate_email),
                )
            return
        except EmailError:
            pass

        # If the user includes a partial with '@' or '*', we assume they
        # just want to run this specific query, so will not forcibly include
        # any additional results.
        if "@" in partial or "*" in partial:  # If a user types in a full address
            yield GeneratedQuery(
                description=f'Email matches "{partial}"',
                request_input=ListPersonsInput(email=partial),
            )
        else:
            # If the user has just supplied 'foo123', we will search for a couple of
            # combinations.
            yield GeneratedQuery(
                description=f'Email begins with "{partial}"',
                request_input=ListPersonsInput(
                    email=WildcardFormat.begins_with(partial)),
            )
            yield GeneratedQuery(
                description=f'Email contains "{partial}"',
                request_input=ListPersonsInput(
                    email=WildcardFormat.contains(partial)),
            )
Пример #5
0
 def test_list_persons(self):
     request_input = ListPersonsInput(display_name="test")
     self.mock_send_request.return_value = self.mock_people.as_search_output(
     )
     expected_url = f"{self.client.pws_url}/person"
     self.client.list_persons(request_input)
     self.mock_send_request.assert_called_once()
     assert self.mock_send_request.call_args[0][0] == expected_url
 def _set_up_multipage_search(self):
     page_one = self.mock_people.as_search_output(next_=ListPersonsInput(
         href="https://foo/page-2"))
     page_two = self.mock_send_request.return_value
     self.mock_send_request.return_value = page_one
     mock_next_page = mock.patch.object(self.pws_client,
                                        "get_explicit_href").start()
     mock_next_page.return_value = ListPersonsOutput.parse_obj(page_two)
Пример #7
0
 def as_search_output(*people: PersonOutput,
                      next_: Optional[str] = None) -> Dict:
     result = ListPersonsOutput(
         persons=list(people),
         current=ListPersonsInput(),  # Not used
         page_size=len(people),
         page_start=1,
         total_count=len(people),
         next=next_,
     ).dict(by_alias=True)
     return result
    def generate_sanitized_phone_queries(
            phone: str) -> Tuple[str, ListPersonsInput]:
        """
        Attempts to match the phone exactly as provided; if the phone number was very long, we'll also try to match
        only the last 10 digits.

        Right now, PWS only supports phone number searches, and won't return results for pagers, faxes, etc.
        This is a regression from the previous directory product that allowed pager searches.

        :param phone: The phone number (digits only)
        """
        yield GeneratedQuery(
            description=f'Phone matches "{phone}"',
            request_input=ListPersonsInput(phone_number=phone),
        )
        if len(phone) > 10:  # XXX YYY-ZZZZ
            no_country_code = phone[-10:]
            yield GeneratedQuery(
                description=f'Phone matches "{no_country_code}"',
                request_input=ListPersonsInput(phone_number=no_country_code),
            )
Пример #9
0
    def search_directory_experimental(
            self,
            request_input: SearchDirectoryInput) -> SearchDirectoryOutput:
        """
        This new query function improves performance significantly, but is still
        being tested for accuracy and edge cases.

        This only executes one query to PWS per population requested. The
        query includes wildcards for each token the user input.

        For example: "buffy anne summers" would become a query for
        display names matching:
            "*buffy* *summers*"

        In this example, PWS would return any of the following results:
            - buffy anne summers
            - buffy "the vampire slayer" summers
            - ubuffya alsummersia
            - buffy-anne summers
            - buffy anne summers-finn

        After the results have been filtered, they are sent to the
        NameSearchResultReducer, which is responsible for sorting
        these names into appropriate buckets by relevance.
        """
        timer_context = {
            "query":
            request_input.dict(
                exclude_none=True,
                by_alias=True,
                exclude_properties=True,
                exclude_unset=True,
            ),
            "statistics": {},
        }
        timer = Timer("search_directory", context=timer_context).start()
        statistics = ListPersonsRequestStatistics(
            num_queries_generated=1,
            num_user_search_tokens=len(request_input.name.split()),
        )
        query = " ".join(f"*{token}*" for token in request_input.name.split())
        results = {}

        for population in request_input.requested_populations:
            pws_output: ListPersonsOutput = self._pws.list_persons(
                ListPersonsInput(
                    display_name=query,
                    employee_affiliation_state=(AffiliationState.current
                                                if population == "employees"
                                                else None),
                    student_affiliation_state=(AffiliationState.current
                                               if population == "students" else
                                               None),
                ),
                populations=request_input.requested_populations,
            )

            statistics.aggregate(pws_output.request_statistics)
            results = self.reducer.reduce_output(pws_output,
                                                 request_input.name, results)

            while pws_output.next:
                pws_output = self._pws.get_explicit_href(
                    pws_output.next.href, output_type=ListPersonsOutput)
                results = self.reducer.reduce_output(pws_output,
                                                     request_input.name,
                                                     results)
                statistics.aggregate(pws_output.request_statistics)

        statistics.num_duplicates_found = self.reducer.duplicate_hit_count
        timer.context["statistics"] = statistics.dict(by_alias=True)
        timer.stop(emit_log=True)

        return SearchDirectoryOutput(scenarios=[
            DirectoryQueryScenarioOutput(
                description=b.description,
                populations=self.pws_translator.translate_bucket(b),
            ) for b in results.values()
        ])