Python DataBrowser.get_units_for_task_name示例

编程语言: Python

命名空间/包名称: mephisto.tools.data_browser

类/类型: DataBrowser

方法/功能: get_units_for_task_name

hotexamples.com的示例: 6

Python DataBrowser.get_units_for_task_name - 已找到6个示例。这些是从开源项目中提取的最受好评的mephisto.tools.data_browser.DataBrowser.get_units_for_task_name现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get_data_from_unit(8)

DataBrowser(7)

get_units_for_task_name(6)

get_task_name_list(1)

get_workers_with_qualification(1)

示例#1

显示文件

    def mephistoDBReader():
        from mephisto.abstractions.databases.local_database import LocalMephistoDB
        from mephisto.tools.data_browser import DataBrowser as MephistoDataBrowser

        db = LocalMephistoDB()
        mephisto_data_browser = MephistoDataBrowser(db=db)

        units = mephisto_data_browser.get_units_for_task_name(database_task_name)
        for unit in units:
            yield mephisto_data_browser.get_data_from_unit(unit)

示例#2

显示文件

文件： review_server.py 项目： Mario-Kart-Felix/Mephisto

    def mephistoDBReader():
        from mephisto.abstractions.databases.local_database import LocalMephistoDB
        from mephisto.tools.data_browser import DataBrowser as MephistoDataBrowser

        db = LocalMephistoDB()
        mephisto_data_browser = MephistoDataBrowser(db=db)

        def format_data_for_review(data):
            contents = data["data"]
            return f"{data}"

        units = mephisto_data_browser.get_units_for_task_name(
            database_task_name)
        for unit in units:
            yield format_data_for_review(
                mephisto_data_browser.get_data_from_unit(unit))

示例#3

显示文件

文件： examine_utils.py 项目： facebookresearch/Mephisto

def print_results(
    db: "MephistoDB",
    task_name: str,
    format_data_for_printing: Callable[[Dict[str, Any]], str],
    start: Optional[int] = None,
    end: Optional[int] = None,
) -> None:
    """
    Script to write out to stdout from start to end results from the task with the given task name
    """
    data_browser = DataBrowser(db=db)
    units = data_browser.get_units_for_task_name(task_name)
    if end is None:
        end = len(units)
    if start is None:
        start = 0
    units.reverse()
    for unit in units[start:end]:
        print(_get_and_format_data(data_browser, format_data_for_printing, unit))

示例#4

显示文件

文件： analysis.py 项目： tonirubass/ParlAI

class AcuteAnalyzer(object):
    """
    Analyzer.

    Given a run_id, we can do lots of fun things!
    """

    def __init__(self, opt: Dict, remove_failed: bool = True):
        """
        Initialize the analyzer.

        Builds up the dataframe

        :param opt:
            opt dict

        :param remove_failed:
            Whether to remove ratings from turkers who failed onboarding
        """
        self.root_dir = opt['root_dir']
        assert os.path.isdir(self.root_dir), '--root-dir must be a real directory!'
        self.run_id = opt['run_id']
        self.outdir = opt['outdir']
        # Get task for loading pairing files
        self.task = opt.get('task', 'q')
        if opt.get('model_ordering') is not None:
            self.custom_model_ordering = opt['model_ordering'].split(',')
        else:
            self.custom_model_ordering = None
        if not self.outdir:
            self.outdir = os.path.join(self.root_dir, f'{self.run_id}-results')
        if not os.path.exists(self.root_dir):
            os.makedirs(self.root_dir, exist_ok=True)
        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir, exist_ok=True)
        mephisto_root_path = opt['mephisto_root']
        if not mephisto_root_path:
            mephisto_root_path = None
        mephisto_db = LocalMephistoDB(database_path=mephisto_root_path)
        self.mephisto_data_browser = MephistoDataBrowser(db=mephisto_db)
        self.checkbox_prefix = CHECKBOX_PREFIX
        # Prepended to checkbox columns in self.dataframe
        self.dataframe = self._extract_to_dataframe()
        if remove_failed:
            self._remove_failed_onboarding()
        if self.dataframe.index.size == 0:
            raise ValueError('No valid results found!')
        self._get_model_nick_names()
        self._load_pairing_files()

    def _extract_response_by_index(
        self, unit_details: Dict[str, Any], idx: int
    ) -> Dict[str, Any]:
        """
        Extract response data from task data.

        :param unit_details:
            full extracted data from a unit
        :param idx:
            index of the singular evaluation within unit_details to extract

        :return response:
            Formatted worker's response data from the task
        """
        task_data = unit_details['data'][idx]
        response: Dict[str, Any] = {
            'run_id': self.run_id,
            'worker': unit_details['worker_id'],
            'time_taken': unit_details['task_end'] - unit_details['task_start'],
            'question': task_data['task_specs']['question'],
            'unit_id': unit_details['unit_id'],
            'task_start': unit_details['task_start'],
        }
        onboarding = task_data['task_specs'].get('is_onboarding', False)
        if 'speakerChoice' not in task_data or task_data['speakerChoice'] == '':
            print('speakerChoice not in task data!')
            return None
        choice = task_data['speakerChoice']
        if onboarding:
            response['correct'] = choice == task_data['pairing_dict']['correct_answer']
        else:
            response['correct'] = -1

        speakers_to_eval = sorted(task_data["pairing_dict"]["speakers_to_eval"])
        response.update(
            {
                'winner': choice,
                'loser': speakers_to_eval[1 - (speakers_to_eval.index(choice))],
                'eval_choice_0': speakers_to_eval[0],
                'eval_choice_1': speakers_to_eval[1],
                'reason': task_data['textReason'],
                'is_onboarding': onboarding,
                'matchup': f"{'__vs__'.join(speakers_to_eval)}",
                'pairing_id': task_data['pair_id'],
            }
        )

        # If it exists, add in which checkboxes of possible reasons the Turkers checked
        if len(task_data.get('speakerReasons', {})) > 0:
            response.update(
                {
                    self.checkbox_prefix + reason: checked
                    for reason, checked in task_data['speakerReasons'].items()
                }
            )
        return response

    def _parse_unit(self, unit: MephistoUnit) -> Optional[Dict[str, Any]]:
        """
        Return data for a given unit.

        If the data is corrupt for whatever reason, we return None

        :param unit:
            MephistoUnit of what should be a completed task by a worker

        :return data:
            Optional dict with the task's formatted data
        """
        try:
            return self.mephisto_data_browser.get_data_from_unit(unit)
        except AssertionError:
            print(
                f"WARNING: Data for run_id `{self.run_id}` not found for "
                f"unit id {unit.db_id}"
            )
            return None

    def _extract_to_dataframe(self) -> pd.DataFrame:
        """
        Extract the data from the run to a pandas dataframe.
        """
        units = self.mephisto_data_browser.get_units_for_task_name(self.run_id)
        responses: List[Dict[str, Any]] = []
        for unit in units:
            unit_details = self._parse_unit(unit)
            if unit_details is None:
                continue
            for idx in range(len(unit_details['data'])):
                response = self._extract_response_by_index(unit_details, idx)
                if response is not None:
                    responses.append(response)

        if len(responses) == 0:
            raise ValueError('No valid results found!')
        else:
            return pd.DataFrame(responses)

    def _remove_failed_onboarding(self):
        """
        Remove workers who failed onboarding.
        """
        df = self.dataframe

        all_workers_failing_onboarding = df.loc[
            df['is_onboarding'] & (df['correct'] == False), 'worker'  # noqa: E712
        ].values

        workers_failing_onboarding = sorted(
            np.unique(all_workers_failing_onboarding).tolist()
        )

        self.dataframe = df[
            ~df["worker"].isin(workers_failing_onboarding) & ~df["is_onboarding"]
        ]
        print(
            f'{self.dataframe.size:d} dataframe entries remaining after removing users who failed onboarding.'
        )

    def _load_pairing_files(self):
        df = self.dataframe
        self.pairings_filepath = get_hashed_combo_path(
            root_dir=self.root_dir,
            subdir='pairings_files',
            task=self.task,
            combos=self.combos,
        )
        if not os.path.exists(self.pairings_filepath):
            print(
                f'WARNING: Pairings filepath {self.pairings_filepath} could not be found.'
            )
            self.pairings_filepath = os.path.join(
                self.root_dir,
                'pairings_files',
                hashlib.sha1(
                    '___vs___'.join(
                        [f"{m}.{'q'.replace(':', '_')}" for m in self.models]
                    ).encode('utf-8')
                ).hexdigest()[:10],
            )
        if not os.path.exists(self.pairings_filepath):
            # For backward compatibility
            print(
                f'WARNING: Pairings filepath {self.pairings_filepath} could not be found.'
            )
            self.pairings_filepath = os.path.join(
                self.root_dir,
                'pairings_files',
                '___vs___'.join(
                    [f"{m}.{self.task.replace(':', '_')}" for m in self.models]
                ),
            )
        if not os.path.exists(self.pairings_filepath):
            print(
                f'NOTE: Pairings filepath {self.pairings_filepath} could not be found!'
            )
            return
        self.pairings = []
        with open(self.pairings_filepath, 'r') as f:
            for line in f:
                pair = json.loads(line)
                model1, model2 = pair['speakers_to_eval']
                pair[model1] = pair['dialogue_dicts'][0]
                pair[model2] = pair['dialogue_dicts'][1]
                del pair['dialogue_dicts']
                self.pairings.append(pair)
        self.pairs_to_eval = [self.pairings[i] for i in df.pairing_id.values.tolist()]
        # Build dialogue_ids => dialogue mappings

        winner_dialogues = []
        loser_dialogues = []
        for i, (_, row) in enumerate(df.iterrows()):
            winner = row['winner']
            loser = row['loser']
            winner_dialogues.append(self.pairs_to_eval[i][winner])
            loser_dialogues.append(self.pairs_to_eval[i][loser])
        df['pairs_to_eval'] = pd.Series(self.pairs_to_eval, index=df.index)
        df['winner_dialogue'] = pd.Series(winner_dialogues, index=df.index)
        df['loser_dialogue'] = pd.Series(loser_dialogues, index=df.index)
        self.dataframe = df

    def _get_model_nick_names(self):
        df = self.dataframe
        df = df[df['run_id'] == self.run_id]
        matchups = list(df.matchup.unique())
        models = set()
        combos = set()
        for matchup in matchups:
            model1, model2 = matchup.split('__vs__')
            models.add(model1)
            models.add(model2)
            combos.add(tuple(sorted((model1, model2))))
        self.models = list(models)
        self.models.sort()
        self.combos = list(combos)
        self.combos.sort()

    def get_reasons(self) -> List[str]:
        """
        Return dataframe reasons.
        """
        return self.dataframe['reason'].values.tolist()

    def get_max_hits_per_worker(self) -> List[int]:
        """
        Get max number of hits per worker.
        """
        return self.dataframe.groupby('worker')['run_id'].count().max()

    def get_wins_per_model_matchup(self) -> pd.DataFrame:
        """
        Return the wins for each model by matchup.
        """
        self.matchup_total_df = (
            self.dataframe.groupby(['eval_choice_0', 'eval_choice_1'])['run_id']
            .count()
            .to_frame('matchup_total')
        )
        self.win_total_df = (
            self.dataframe.groupby(
                ['eval_choice_0', 'eval_choice_1', 'winner', 'loser']
            )['loser']
            .count()
            .to_frame('win_total')
            .reset_index()
            .set_index(['eval_choice_0', 'eval_choice_1'])
        )
        return self.win_total_df

    def get_win_fractions(self) -> pd.DataFrame:
        """
        Return the joined matchup + win totals, get win fractions.

        Sorted according to win percentage
        """
        if not hasattr(self, 'win_total_df'):
            self.get_wins_per_model_matchup()

        self.win_fraction_df = self.matchup_total_df.join(self.win_total_df).assign(
            win_frac=lambda df: df['win_total'] / df['matchup_total']
        )

        pivoted_df = self.win_fraction_df.pivot(
            index="loser", columns="winner", values="win_frac"
        )
        if self.custom_model_ordering is not None:
            # Use the ordering of the models supplied by the user
            assert set(self.custom_model_ordering) == set(pivoted_df.columns)
            self.model_ordering = self.custom_model_ordering
        else:
            self.model_ordering = (
                self.win_fraction_df.groupby("winner")["win_frac"]
                .mean()
                .sort_values()
                .index.values.tolist()
            )
        self.sorted_win_frac_df = pivoted_df.reindex(
            index=self.model_ordering, columns=self.model_ordering
        )
        return self.sorted_win_frac_df

    def get_num_hits_per_matchup(self):
        """
        Return the number of hits per matchup.
        """
        matchup_total_1_df = self.matchup_total_df.reset_index()
        matchup_total_2_df = matchup_total_1_df.rename(
            columns={'eval_choice_0': 'eval_choice_1', 'eval_choice_1': 'eval_choice_0'}
        )
        self.num_hits_per_matchup_df = (
            pd.concat([matchup_total_1_df, matchup_total_2_df], axis=0)
            .pivot(
                index='eval_choice_0', columns='eval_choice_1', values='matchup_total'
            )
            .reindex(index=self.model_ordering, columns=self.model_ordering)
        )
        return self.num_hits_per_matchup_df

    def _compile_checkbox_stats(self) -> Dict[str, pd.DataFrame]:
        """
        Return the fraction of time that Turkers selected each checkbox.

        Results are cut both (1) by matchup and winner and (2) by just the winner. Each
        checkbox represents one reason that the Turkers could have chosen the speaker
        that they did.
        """
        checkbox_columns = [
            col
            for col in self.dataframe.columns
            if col.startswith(self.checkbox_prefix)
        ]
        group_column_types = {
            'matchup_and_winner': ['matchup', 'winner'],
            'winner': ['winner'],
        }
        grouped_dataframes = {}
        for group_type, group_columns in group_column_types.items():
            selected_columns = (
                self.dataframe[group_columns + checkbox_columns]
                .rename(
                    columns={
                        col: col[len(self.checkbox_prefix) :]
                        for col in checkbox_columns
                    }
                )
                .set_index(group_columns)
                .fillna(False)
            )
            grouped_dataframes[group_type] = selected_columns.groupby(
                group_columns
            ).mean()
        return grouped_dataframes

    def _compile_convos_and_reasons(self) -> str:
        """
        Create a human-readable string of all pairs of conversations, as well as which
        conversation each Turker chose and their reason for choosing it.
        """

        pairing_outputs = []

        for _, pairing_sr in self.dataframe.iterrows():
            winning_dialogue = self._dialogue_to_string(
                pairing_sr['winner_dialogue']['dialogue']
            )
            loser_dialogue = self._dialogue_to_string(
                pairing_sr['loser_dialogue']['dialogue']
            )
            pairing_output = f"""CONVO PAIR ID: {pairing_sr['pairing_id']}

WINNING DIALOGUE: {pairing_sr['winner']}
{winning_dialogue}

LOSING DIALOGUE: {pairing_sr['loser']}
{loser_dialogue}

QUESTION: {pairing_sr['question']}
TURKER'S CHOICE: {pairing_sr['winner']}
REASON: {pairing_sr['reason']}



"""
            pairing_outputs.append(pairing_output)

        return ''.join(pairing_outputs)

    @staticmethod
    def _dialogue_to_string(dialogue: List[dict]) -> str:
        """
        Convert a list of dictionaries into a human-readable conversation.

        Each dictionary represents one utterance.
        """
        utterance_strings = []
        for utterance_dict in dialogue:
            if utterance_dict["id"] == "human_evaluator":
                speaker_string = "HUMAN"
            else:
                speaker_string = utterance_dict["id"]
            utterance = utterance_dict["text"]
            utterance_strings.append(f"[{speaker_string}]: {utterance}")
        return "\n".join(utterance_strings)

    def get_matchup_totals_with_significance(self) -> pd.DataFrame:
        """
        Return dataframe with matchup win totals + significance.
        """

        def _signf_level(p):
            if p < 0.001:
                return "***", "p<.001"
            elif p < 0.01:
                return "**", "p<.01"
            elif p < 0.05:
                return "*", "p<.05"
            else:
                return "", "p>.05"

        output = []
        for _, run_annotations in self.dataframe.groupby('run_id'):
            question = list(run_annotations.question)[0]
            for matchup, annotations in run_annotations.groupby('matchup'):
                model1, model2 = matchup.split('__vs__')
                wincount1 = np.sum(annotations['winner'] == model1)
                wincount2 = np.sum(annotations['winner'] == model2)
                numratings = wincount1 + wincount2
                winrate1 = np.mean(annotations['winner'] == model1)
                winrate2 = np.mean(annotations['winner'] == model2)
                p = binom_test([wincount1, wincount2])

                stars, plevel = _signf_level(p)

                agreements = []
                for _, pairing_annotations in annotations.groupby('pairing_id'):
                    pair_wincount1 = np.sum(pairing_annotations['winner'] == model1)
                    pair_wincount2 = np.sum(pairing_annotations['winner'] == model2)
                    if pair_wincount1 < 2 and pair_wincount2 < 2:
                        if pair_wincount1 == 1 and pair_wincount2 == 1:
                            agreements.append(0)
                    else:
                        majority_wincount = max(pair_wincount1, pair_wincount2)
                        num_pair_annotations = pair_wincount1 + pair_wincount2
                        pair_agreement = majority_wincount / num_pair_annotations
                        agreements.append(pair_agreement)
                total_agreement = np.mean(agreements)

                output.append(
                    {
                        'question': question,
                        'matchup': matchup,
                        'model1': model1,
                        'model2': model2,
                        'numwins1': wincount1,
                        'numwins2': wincount2,
                        'winrate1': winrate1,
                        'winrate2': winrate2,
                        'numratings': numratings,
                        'p': p,
                        'stars': stars,
                        'sigf': plevel,
                        'agree': total_agreement,
                    }
                )
        output = pd.DataFrame(output)
        # order the columns how we want
        self.significance_df = output[
            [
                'question',
                'matchup',
                'model1',
                'numwins1',
                'winrate1',
                'model2',
                'numwins2',
                'winrate2',
                'numratings',
                'sigf',
                'stars',
                'p',
                'agree',
            ]
        ]
        return self.significance_df

    def save_results(self, path: str = None):
        """
        Save results to a certain path.
        """
        if not hasattr(self, 'significance_df'):
            self.get_matchup_totals_with_significance()
        if path is None:
            path = self.outdir

        # Save raw dataframe
        self.dataframe.to_csv(f'{path}/{self.run_id}.full.csv', index=False)

        with open('{}/{}.significance.csv'.format(path, self.run_id), 'w') as f:
            f.write(self.significance_df.to_csv(index=False))
        print(
            'To visualize significance result, try cat {} | column -t -s, | less -S'.format(
                '{}/{}.significance.csv'.format(path, self.run_id)
            )
        )
        with open('{}/{}.grid.csv'.format(path, self.run_id), 'w') as f:
            f.write(self.get_win_fractions().to_csv(index=True))
        with open(f'{path}/{self.run_id}.grid.winners_as_rows.csv', 'w') as f:
            f.write(self.get_win_fractions().transpose().to_csv(index=True))
        print(
            'To visualize grid result, try cat {} | column -t -s, | less -S'.format(
                '{}/{}.grid.csv'.format(path, self.run_id)
            )
        )

        # Save stats on how many ratings each worker did
        ratings_per_worker = (
            self.dataframe.groupby('worker')['run_id']
            .count()
            .sort_values(ascending=False)
        )
        ratings_per_worker.to_csv(f'{path}/{self.run_id}.ratings_per_worker.csv')

        # Save stats on how often Turkers selected each checkbox that represents one
        # reason to pick the speaker they did
        if any(col.startswith(self.checkbox_prefix) for col in self.dataframe.columns):
            checkbox_stats_dataframes = self._compile_checkbox_stats()
            for group_type, stats in checkbox_stats_dataframes.items():
                stats.to_csv(f'{path}/{self.run_id}.checkbox_stats.{group_type}.csv')

        if not hasattr(self, 'pairings'):

            print('No pairing file found, skipping conversation visualizations.')

        else:

            with open('{}/{}.reason.html'.format(path, self.run_id), 'w') as f:
                f.write(render_conversations_per_matchups(self.dataframe, True).data)
            print(
                'To visualize conversations with reasons only result, '
                'try scp username@devfair:{} to your local machine'.format(
                    ' {}/{}.reason.html'.format(path, self.run_id)
                )
            )
            with open('{}/{}.all.html'.format(path, self.run_id), 'w') as f:
                f.write(render_conversations_per_matchups(self.dataframe, False).data)
            print(
                'To visualize conversations result, try scp username@devfair:{}'
                ' to your local machine'.format(
                    '{}/{}.all.html'.format(path, self.run_id)
                )
            )

            # Write all pairs of dialogues, as well as the Turkers' choices and reasons, as
            # a text file
            compiled_text = self._compile_convos_and_reasons()
            with open(f'{path}/{self.run_id}.all_convo_pairs.txt', 'w') as f:
                f.write(compiled_text)

示例#5

显示文件

    assignment = Assignment(db, assignment_id)
    assignment.write_assignment_data(
        InitializationData(unit_data={}, shared=annotation["inputs"])
    )

    unit_id = db.new_unit(
        task_run.task_id,
        task_run.db_id,
        task_run.requester_id,
        assignment_id,
        0,  # Unit_index
        0,  # reward
        task_run.provider_type,
        task_run.task_type,
        task_run.sandbox,
    )

    unit = Unit(db, unit_id)
    agent = MockAgent.new(db, worker, unit)
    agent.state.state["inputs"] = annotation["inputs"]
    agent.state.state["outputs"] = annotation["outputs"]
    agent.state.save_data()
    agent.mark_done()
    agent.update_status(AgentState.STATUS_COMPLETED)

# Show tasks appear in MephistoDB:
mephisto_data_browser = MephistoDataBrowser(db=db)
units = mephisto_data_browser.get_units_for_task_name(input("Input task name: "))
for unit in units:
    print(mephisto_data_browser.get_data_from_unit(unit))

示例#6

显示文件

文件： examine_utils.py 项目： facebookresearch/Mephisto

def run_examine_by_worker(
    db: "MephistoDB",
    format_data_for_printing: Callable[[Dict[str, Any]], str],
    task_name: Optional[str] = None,
    block_qualification: Optional[str] = None,
    approve_qualification: Optional[str] = None,
):
    """
    Basic script for reviewing work, grouped by worker for convenience. First gets
    the required information to run a review, then
    """
    data_browser = DataBrowser(db=db)

    # Get initial arguments
    if task_name is None:
        task_name, block_qualification, approve_qualification = prompt_for_options(
            task_name, block_qualification, approve_qualification
        )

    tasks = db.find_tasks(task_name=task_name)
    assert len(tasks) >= 1, f"No task found under name {task_name}"

    print(
        "You will be reviewing actual tasks with this flow. Tasks that you either Accept or Pass "
        "will be paid out to the worker, while rejected tasks will not. Passed tasks will be "
        "specially marked such that you can leave them out of your dataset. \n"
        "You may enter the option in caps to apply it to the rest of the units for a given worker."
    )
    if block_qualification is not None:
        created_block_qual = find_or_create_qualification(db, block_qualification)
        print(
            "When you pass or reject a task, the script gives you an option to disqualify the worker "
            "from future tasks by assigning a qualification. If provided, this worker will no "
            "longer be able to work on tasks where the set --block-qualification shares the same name "
            f"you provided above: {block_qualification}\n"
        )
    if approve_qualification is not None:
        created_approve_qual = find_or_create_qualification(db, approve_qualification)
        print(
            "You may use this script to establish a qualified worker pool by granting the provided "
            f"approve qualification {approve_qualification} to workers you think understand the task "
            "well. This will be provided as an option for workers you (A)pprove all on. "
            "Future tasks can use this qual as a required qualification, as described in the "
            "common qualification flows document."
        )
    print(
        "**************\n"
        "You should only reject tasks when it is clear the worker has acted in bad faith, and "
        "didn't actually do the task. Prefer to pass on tasks that were misunderstandings.\n"
        "**************\n"
    )

    units = data_browser.get_units_for_task_name(task_name)

    others = [u for u in units if u.get_status() != "completed"]
    units = [u for u in units if u.get_status() == "completed"]
    reviews_left = len(units)
    previous_work_by_worker = get_worker_stats(others)

    # Determine allowed options
    options = ["a", "p", "r"]
    options_string = "Do you want to accept this work? (a)ccept, (r)eject, (p)ass:"

    units_by_worker: Dict[str, List["Unit"]] = {}

    for u in units:
        w_id = u.worker_id
        if w_id not in units_by_worker:
            units_by_worker[w_id] = []
        units_by_worker[w_id].append(u)

    # Run the review
    for w_id, w_units in units_by_worker.items():
        worker = Worker.get(db, w_id)
        worker_name = worker.worker_name
        apply_all_decision = None
        reason = None
        for idx, unit in enumerate(w_units):

            print(
                f"Reviewing for worker {worker_name}, ({idx+1}/{len(w_units)}), "
                f"Previous {format_worker_stats(w_id, previous_work_by_worker)} "
                f"(total remaining: {reviews_left})"
            )
            reviews_left -= 1
            print(format_data_for_printing(data_browser.get_data_from_unit(unit)))
            if apply_all_decision is not None:
                decision = apply_all_decision
            else:
                decision = input(
                    "Do you want to accept this work? (a)ccept, (r)eject, (p)ass: "
                )
            while decision.lower() not in options:
                decision = input(
                    "Decision must be one of a, p, r. Use CAPS to apply to all remaining for worker: "
                )

            agent = unit.get_assigned_agent()
            assert (
                agent is not None
            ), f"Can't make decision on None agent... issue with {unit}"
            if decision.lower() == "a":
                agent.approve_work()
                if decision == "A" and approve_qualification is not None:
                    should_special_qualify = input(
                        "Do you want to approve qualify this worker? (y)es/(n)o: "
                    )
                    if should_special_qualify.lower() in ["y", "yes"]:
                        worker.grant_qualification(approve_qualification, 1)
            elif decision.lower() == "p":
                agent.soft_reject_work()
                if apply_all_decision is None and block_qualification is not None:
                    should_soft_block = input(
                        "Do you want to soft block this worker? (y)es/(n)o: "
                    )
                    if should_soft_block.lower() in ["y", "yes"]:
                        worker.grant_qualification(block_qualification, 1)
            else:  # decision = 'r'
                if apply_all_decision is None:
                    reason = input("Why are you rejecting this work? ")
                    should_block = input(
                        "Do you want to hard block this worker? (y)es/(n)o: "
                    )
                    if should_block.lower() in ["y", "yes"]:
                        block_reason = input("Why permanently block this worker? ")
                        worker.block_worker(block_reason)
                agent.reject_work(reason)

            if decision.lower() != decision:
                apply_all_decision = decision.lower()