Пример #1
0
    def get(self, assignment_target):
        # Extract assignment
        assignments = [
            self.data_handler.get_assignment_data(assignment_target)
        ]
        pairings = self.data_handler.get_pairings_for_assignment(
            assignment_target)
        processed_assignments = merge_assignments_with_pairings(
            assignments, pairings, 'assignment {}'.format(assignment_target))
        assignment = processed_assignments[0]

        # Get assignment details to retrieve assignment content
        run_id = assignment['run_id']
        onboarding_id = assignment['onboarding_id']
        conversation_id = assignment['conversation_id']
        worker_id = assignment['worker_id']

        onboard_data = None
        if onboarding_id is not None:
            onboard_data = MTurkDataHandler.get_conversation_data(
                run_id, onboarding_id, worker_id, self.state['is_sandbox'])

        assignment_content = {
            'onboarding':
            onboard_data,
            'task':
            MTurkDataHandler.get_conversation_data(run_id, conversation_id,
                                                   worker_id,
                                                   self.state['is_sandbox']),
            'task_name':
            '_'.join(run_id.split('_')[:-1]),
        }

        # Get assignment instruction html. This can be much improved
        taskname = '_'.join(run_id.split('_')[:-1])
        guess_loc = tasks[taskname].split('tasks/')[1]
        guess_class = '.'.join(guess_loc.split('/'))
        base_format = 'parlai.mturk.tasks.{}.task_config'
        if 'parlai_internal' in guess_loc:
            base_format = 'parlai_internal.mturk.tasks.{}.task_config'
        find_location = base_format.format(guess_class)
        try:
            # Try to find the task at specified location
            t = importlib.import_module(find_location)
            task_instructions = t.task_config['task_description']
        except ImportError:
            task_instructions = None

        data = {
            'assignment_details': assignment,
            'assignment_content': assignment_content,
            'assignment_instructions': task_instructions,
        }

        self.write(json.dumps(data))
Пример #2
0
    def get(self, assignment_target):
        # Extract assignment
        assignments = [self.data_handler.get_assignment_data(
            assignment_target)]
        pairings = self.data_handler.get_pairings_for_assignment(
            assignment_target)
        processed_assignments = merge_assignments_with_pairings(
            assignments, pairings, 'assignment {}'.format(assignment_target))
        assignment = processed_assignments[0]

        # Get assignment details to retrieve assignment content
        run_id = assignment['run_id']
        onboarding_id = assignment['onboarding_id']
        conversation_id = assignment['conversation_id']
        worker_id = assignment['worker_id']

        onboard_data = None
        if onboarding_id is not None:
            onboard_data = MTurkDataHandler.get_conversation_data(
                run_id, onboarding_id, worker_id, self.state['is_sandbox'])

        assignment_content = {
            'onboarding': onboard_data,
            'task': MTurkDataHandler.get_conversation_data(
                run_id, conversation_id, worker_id, self.state['is_sandbox']),
        }

        # Get assignment instruction html
        taskname = '_'.join(run_id.split('_')[:-1])
        find_location = 'parlai.mturk.tasks.{}.task_config'.format(taskname)
        find_location_internal = \
            'parlai_internal.mturk.tasks.{}.task_config'.format(taskname)
        try:
            # Try to find the task config in public tasks
            t = importlib.import_module(find_location)
            task_instructions = t.task_config['task_description']
        except ImportError:
            try:
                # Try to find the task in local tasks
                t = importlib.import_module(find_location_internal)
                task_instructions = t.task_config['task_description']
            except ImportError:
                task_instructions = None

        data = {
            'assignment_details': assignment,
            'assignment_content': assignment_content,
            'assignment_instructions': task_instructions,
        }

        self.write(json.dumps(data))
Пример #3
0
 def test_init_db(self):
     db_logger = MTurkDataHandler('test1', file_name=self.DB_NAME)
     conn = db_logger._get_connection()
     c = conn.cursor()
     c.execute('SELECT COUNT(*) FROM runs;')
     self.assertEqual(c.fetchone()[0], 0)
     c.execute('SELECT COUNT(*) FROM hits;')
     self.assertEqual(c.fetchone()[0], 0)
     c.execute('SELECT COUNT(*) FROM assignments;')
     self.assertEqual(c.fetchone()[0], 0)
     c.execute('SELECT COUNT(*) FROM workers;')
     self.assertEqual(c.fetchone()[0], 0)
     c.execute('SELECT COUNT(*) FROM pairings;')
     self.assertEqual(c.fetchone()[0], 0)
Пример #4
0
    def _get_hit_data(self, hit: Dict[str, Any],
                      logger: MTurkDataHandler) -> Optional[Dict[str, Any]]:
        """
        Return data for a given hit.

        If the HIT is corrupt for whatever reason, we return None

        :param hit:
            HIT information dict
        :param logger:
            Data handler

        :return data:
            Optional dict with the hit data
        """
        try:
            full_data: Dict[str, Any] = logger.get_full_conversation_data(
                self.run_id, hit['conversation_id'], self.is_sandbox)
        except FileNotFoundError:
            print(f"WARNING: Data for run_id `{self.run_id}` not found for "
                  f"conversation id {hit['conversation_id']}")
            return None

        data: Dict[str, Any] = next(iter(full_data['worker_data'].values()))
        if not ('task_data' in data['response']
                and len(data['response']['task_data']) > 0):
            # worker abandoned task, drop their annotations
            return None
        elif len(data['response']['task_data']) != len(data['task_data']):
            raise ValueError(
                'Saved task data does not match response task data')

        return data
Пример #5
0
    def __init__(self, port=DEFAULT_PORT, db_file=DEFAULT_DB_FILE,
                 is_sandbox=False):
        self.state = {'is_sandbox': is_sandbox}
        self.subs = {}
        self.sources = {}
        self.port = port
        self.data_handler = MTurkDataHandler(file_name=db_file)
        self.mturk_manager = MTurkManager.make_taskless_instance(is_sandbox)
        self.mturk_manager.db_logger = self.data_handler

        # TODO load some state from DB

        handlers = [
            (r"/app/(.*)", AppHandler, {'app': self}),
            (r"/tasks", TaskListHandler, {'app': self}),
            (r"/workers", WorkerListHandler, {'app': self}),
            (r"/runs/(.*)", RunHandler, {'app': self}),
            (r"/workers/(.*)", WorkerHandler, {'app': self}),
            (r"/assignments/(.*)", AssignmentHandler, {'app': self}),
            (r"/approve/(.*)", ApprovalHandler, {'app': self}),
            (r"/reject/(.*)", RejectionHandler, {'app': self}),
            (r"/reverse_rejection/(.*)", ReverseHandler, {'app': self}),
            (r"/block/(.*)", BlockHandler, {'app': self}),
            (r"/bonus/(.*)", BonusHandler, {'app': self}),
            (r"/error/(.*)", ErrorHandler, {'app': self}),
            (r"/socket", SocketHandler, {'app': self}),
            (r"/", RedirectHandler),
        ]
        super(Application, self).__init__(handlers, **tornado_settings)
Пример #6
0
    def get(self, task_target):
        hits = self.data_handler.get_hits_for_run(task_target)
        processed_hits = []
        for res in hits:
            processed_hits.append(row_to_dict(res))
        assignments = self.data_handler.get_assignments_for_run(task_target)
        pairings = self.data_handler.get_pairings_for_run(task_target)
        processed_assignments = merge_assignments_with_pairings(
            assignments, pairings, 'task {}'.format(task_target))

        # get feedback data and put into assignments if present
        for assignment in processed_assignments:
            assignment['received_feedback'] = None
            run_id = assignment['run_id']
            conversation_id = assignment['conversation_id']
            worker_id = assignment['worker_id']
            if conversation_id is not None:
                task_data = MTurkDataHandler.get_conversation_data(
                    run_id, conversation_id, worker_id,
                    self.state['is_sandbox'])
                if task_data['data'] is not None:
                    assignment['received_feedback'] = \
                        task_data['data'].get('received_feedback')

        run_details = row_to_dict(self.data_handler.get_run_data(task_target))
        # TODO implement run status determination
        run_details['run_status'] = 'unimplemented'
        data = {
            'run_details': run_details,
            'assignments': processed_assignments,
            'hits': processed_hits,
        }

        self.write(json.dumps(data))
Пример #7
0
    def __init__(self, port=DEFAULT_PORT, db_file=DEFAULT_DB_FILE):
        self.state = {}
        self.subs = {}
        self.sources = {}
        self.port = port
        self.data_handler = MTurkDataHandler(file_name=db_file)

        # TODO load some state from DB

        handlers = [
            (r"/app/(.*)", AppHandler, {
                'app': self
            }),
            (r"/tasks", TaskListHandler, {
                'app': self
            }),
            (r"/workers", WorkerListHandler, {
                'app': self
            }),
            (r"/runs/(.*)", RunHandler, {
                'app': self
            }),
            (r"/workers/(.*)", WorkerHandler, {
                'app': self
            }),
            (r"/error/(.*)", ErrorHandler, {
                'app': self
            }),
            (r"/socket", SocketHandler, {
                'app': self
            }),
            (r"/", RedirectHandler),
        ]
        super(Application, self).__init__(handlers, **tornado_settings)
Пример #8
0
 def _extract_to_dataframe(self) -> pd.DataFrame:
     """
     Extract the data from the run to a pandas dataframe.
     """
     logger = MTurkDataHandler(file_name=self.db_path)
     hits = logger.get_pairings_for_run(self.run_id)
     dataframe: List[Dict[str, Any]] = []
     for hit in hits:
         if hit['conversation_id'] is None:
             continue
         data = self._get_hit_data(hit, logger)
         if data is None:
             continue
         for r_idx, task_data in enumerate(data['task_data']):
             response_data = data['response']['task_data'][r_idx]
             if response_data is None:
                 continue
             response = self._extract_response_data(data, task_data, hit,
                                                    response_data)
             dataframe.append(response)
     return pd.DataFrame(dataframe)
Пример #9
0
    def test_create_get_run(self):
        run_id = 'Test_run_1'
        hits_created = 10
        db_logger = MTurkDataHandler('test2', file_name=self.DB_NAME)

        # Ensure a run logged can be retrieved
        db_logger.log_new_run(hits_created, run_id)
        run_data = db_logger.get_run_data(run_id)
        self.assertEqual(run_data['run_id'], run_id)
        self.assertEqual(run_data['created'], 0)
        self.assertEqual(run_data['completed'], 0)
        self.assertEqual(run_data['maximum'], hits_created)
        self.assertEqual(run_data['failed'], 0)

        # Assert missed entries are None
        self.assertIsNone(db_logger.get_run_data('fake_id'))
Пример #10
0
    def test_create_update_hits(self):
        run_id = 'Test_run_2'
        hits_created = 10
        db_logger = MTurkDataHandler(file_name=self.DB_NAME)
        db_logger.log_new_run(hits_created, run_id)
        HIT1 = self.create_hit()
        HIT2 = self.create_hit()
        HIT3 = self.create_hit()

        # Ensure logging without group id fails
        with self.assertRaises(AssertionError):
            db_logger.log_hit_status(HIT1)

        # Log created hits through one logger
        db_logger.log_hit_status(HIT1, run_id)
        db_logger.log_hit_status(HIT2, run_id)

        # Create new handler, this one with the group id created, ensure
        # the log works fine
        db_logger = MTurkDataHandler(run_id, file_name=self.DB_NAME)
        db_logger.log_hit_status(HIT3)

        # Ensure all of the expected hits are there
        run_data = db_logger.get_run_data(run_id)
        self.assertEqual(run_data['run_id'], run_id)
        self.assertEqual(run_data['created'], 3)
        self.assertEqual(run_data['completed'], 0)
        self.assertEqual(run_data['maximum'], hits_created)
        self.assertEqual(run_data['failed'], 0)

        # Ensure the hit details are correct
        for hit in [HIT1, HIT2, HIT3]:
            hit_db_data = db_logger.get_hit_data(hit['HIT']['HITId'])
            self.assertHITEqual(hit, hit_db_data, run_id)

        # Update the data on a HIT, ensure that the run data stays the same
        # but the HIT data updates
        test_status = 'TEST_STATUS'
        HIT2['HIT']['HITStatus'] = test_status
        db_logger.log_hit_status(HIT2)

        # Ensure all of the expected hits are there
        run_data = db_logger.get_run_data(run_id)
        self.assertEqual(run_data['run_id'], run_id)
        self.assertEqual(run_data['created'], 3)
        self.assertEqual(run_data['completed'], 0)
        self.assertEqual(run_data['maximum'], hits_created)
        self.assertEqual(run_data['failed'], 0)

        # Ensure the hit details are correct
        for hit in [HIT1, HIT2, HIT3]:
            hit_db_data = db_logger.get_hit_data(hit['HIT']['HITId'])
            self.assertHITEqual(hit, hit_db_data, run_id)

        # Ensure requesting a hit that doesn't exist returns none
        self.assertIsNone(db_logger.get_hit_data('fake_id'))
Пример #11
0
    def test_worker_workflows(self):
        run_id = 'Test_run_3'
        hits_created = 10
        db_logger = MTurkDataHandler(run_id, file_name=self.DB_NAME)
        db_logger.log_new_run(hits_created, run_id)
        HIT1 = self.create_hit()
        HIT2 = self.create_hit()
        HIT3 = self.create_hit()
        db_logger.log_hit_status(HIT1)
        db_logger.log_hit_status(HIT2)
        db_logger.log_hit_status(HIT3)

        worker_id_1 = 'TEST_WORKER_ID_1'
        worker_id_2 = 'TEST_WORKER_ID_2'
        assignment_id_1 = 'TEST_ASSIGNMENT_ID_1'
        assignment_id_2 = 'TEST_ASSIGNMENT_ID_2'
        assignment_id_3 = 'TEST_ASSIGNMENT_ID_3'

        # Create two workers and assign the 3 assignments to them
        db_logger.log_worker_accept_assignment(worker_id_1, assignment_id_1,
                                               HIT1['HIT']['HITId'])
        db_logger.log_worker_accept_assignment(worker_id_2, assignment_id_2,
                                               HIT2['HIT']['HITId'])
        db_logger.log_worker_accept_assignment(worker_id_2, assignment_id_3,
                                               HIT3['HIT']['HITId'])

        # Ensure two workers have been created
        conn = db_logger._get_connection()
        c = conn.cursor()
        c.execute('SELECT COUNT(*) FROM workers;')
        self.assertEqual(c.fetchone()[0], 2)

        # Ensure non-existent worker is None
        self.assertIsNone(db_logger.get_worker_data('fake_id'))

        # Ensure the two workers have the correct expected values
        worker_1_data = db_logger.get_worker_data(worker_id_1)
        worker_2_data = db_logger.get_worker_data(worker_id_2)
        self.assertEqual(worker_1_data['worker_id'], worker_id_1)
        self.assertEqual(worker_1_data['accepted'], 1)
        self.assertEqual(worker_1_data['disconnected'], 0)
        self.assertEqual(worker_1_data['completed'], 0)
        self.assertEqual(worker_1_data['approved'], 0)
        self.assertEqual(worker_1_data['rejected'], 0)
        self.assertEqual(worker_2_data['worker_id'], worker_id_2)
        self.assertEqual(worker_2_data['accepted'], 2)
        self.assertEqual(worker_2_data['disconnected'], 0)
        self.assertEqual(worker_2_data['completed'], 0)
        self.assertEqual(worker_2_data['approved'], 0)
        self.assertEqual(worker_2_data['rejected'], 0)

        # Ensure all the assignments are marked as accepted
        c.execute('SELECT COUNT(*) FROM assignments WHERE status = ?;',
                  ('Accepted', ))
        self.assertEqual(c.fetchone()[0], 3)

        # Ensure non-existing assign is None
        self.assertIsNone(db_logger.get_assignment_data('fake_id'))

        # Check each of the assignments
        assignment_1_data = db_logger.get_assignment_data(assignment_id_1)
        assignment_2_data = db_logger.get_assignment_data(assignment_id_2)
        assignment_3_data = db_logger.get_assignment_data(assignment_id_3)
        self.assertEqual(assignment_1_data['assignment_id'], assignment_id_1)
        self.assertEqual(assignment_1_data['status'], 'Accepted')
        self.assertEqual(assignment_1_data['approve_time'], None)
        self.assertEqual(assignment_1_data['worker_id'], worker_id_1)
        self.assertEqual(assignment_1_data['hit_id'], HIT1['HIT']['HITId'])
        self.assertEqual(assignment_2_data['assignment_id'], assignment_id_2)
        self.assertEqual(assignment_2_data['status'], 'Accepted')
        self.assertEqual(assignment_2_data['approve_time'], None)
        self.assertEqual(assignment_2_data['worker_id'], worker_id_2)
        self.assertEqual(assignment_2_data['hit_id'], HIT2['HIT']['HITId'])
        self.assertEqual(assignment_3_data['assignment_id'], assignment_id_3)
        self.assertEqual(assignment_3_data['status'], 'Accepted')
        self.assertEqual(assignment_3_data['approve_time'], None)
        self.assertEqual(assignment_3_data['worker_id'], worker_id_2)
        self.assertEqual(assignment_3_data['hit_id'], HIT3['HIT']['HITId'])

        # Ensure three pairings have been created, one for each assignment
        c.execute('SELECT COUNT(*) FROM pairings')
        self.assertEqual(c.fetchone()[0], 3)

        # Ensure pairings are accurate
        self.assertIsNone(
            db_logger.get_worker_assignment_pairing(worker_id_1,
                                                    assignment_id_3))

        pair_1 = db_logger.get_worker_assignment_pairing(
            worker_id_1, assignment_id_1)
        pair_2 = db_logger.get_worker_assignment_pairing(
            worker_id_2, assignment_id_2)
        pair_3 = db_logger.get_worker_assignment_pairing(
            worker_id_2, assignment_id_3)
        for f in [
                'onboarding_start', 'onboarding_end', 'task_start', 'task_end',
                'conversation_id'
        ]:
            for pair in [pair_1, pair_2, pair_3]:
                self.assertIsNone(pair[f])
        self.assertEqual(pair_1['status'], AssignState.STATUS_NONE)
        self.assertEqual(pair_2['status'], AssignState.STATUS_NONE)
        self.assertEqual(pair_3['status'], AssignState.STATUS_NONE)
        self.assertEqual(pair_1['worker_id'], worker_id_1)
        self.assertEqual(pair_2['worker_id'], worker_id_2)
        self.assertEqual(pair_3['worker_id'], worker_id_2)
        self.assertEqual(pair_1['assignment_id'], assignment_id_1)
        self.assertEqual(pair_2['assignment_id'], assignment_id_2)
        self.assertEqual(pair_3['assignment_id'], assignment_id_3)
        self.assertEqual(pair_1['run_id'], run_id)
        self.assertEqual(pair_2['run_id'], run_id)
        self.assertEqual(pair_3['run_id'], run_id)
        self.assertEqual(pair_1['bonus_amount'], 0)
        self.assertEqual(pair_2['bonus_amount'], 0)
        self.assertEqual(pair_3['bonus_amount'], 0)
        self.assertEqual(pair_1['bonus_text'], '')
        self.assertEqual(pair_2['bonus_text'], '')
        self.assertEqual(pair_3['bonus_text'], '')
        self.assertFalse(pair_1['bonus_paid'])
        self.assertFalse(pair_2['bonus_paid'])
        self.assertFalse(pair_3['bonus_paid'])

        # Ensure get_pairings_for_assignment works
        pair_4 = db_logger.get_pairings_for_assignment(assignment_id_2)[0]
        self.assertEqual(pair_2, pair_4)
        self.assertListEqual([],
                             db_logger.get_pairings_for_assignment('fake_id'))

        # Ensure get_all_<thing>_for_worker works
        self.assertListEqual(
            [], db_logger.get_all_assignments_for_worker('fake_id'))
        self.assertListEqual([],
                             db_logger.get_all_pairings_for_worker('fake_id'))
        self.assertEqual(
            db_logger.get_all_assignments_for_worker(worker_id_1)[0],
            assignment_1_data)
        self.assertEqual(
            len(db_logger.get_all_assignments_for_worker(worker_id_2)), 2)
        self.assertEqual(
            db_logger.get_all_pairings_for_worker(worker_id_1)[0], pair_1)
        self.assertEqual(
            len(db_logger.get_all_pairings_for_worker(worker_id_2)), 2)

        # test task_restricted gets
        self.assertEqual(
            db_logger.get_all_task_assignments_for_worker(worker_id_1)[0],
            assignment_1_data)
        self.assertEqual(
            len(db_logger.get_all_task_assignments_for_worker(worker_id_2)), 2)
        self.assertEqual(
            len(
                db_logger.get_all_task_assignments_for_worker(
                    worker_id_1, 'fake_id')), 0)
        self.assertEqual(
            db_logger.get_all_task_pairings_for_worker(worker_id_1)[0], pair_1)
        self.assertEqual(
            len(db_logger.get_all_task_pairings_for_worker(worker_id_2)), 2)
        self.assertEqual(
            len(
                db_logger.get_all_task_pairings_for_worker(
                    worker_id_1, 'fake_id')), 0)

        conversation_id_1 = "CONV_ID_1"
        conversation_id_2 = "CONV_ID_2"

        db_logger.log_start_onboard(worker_id_1, assignment_id_1)
        db_logger.log_start_onboard(worker_id_2, assignment_id_2)
        db_logger.log_start_onboard(worker_id_2, assignment_id_3)
        db_logger.log_finish_onboard(worker_id_1, assignment_id_1)
        db_logger.log_finish_onboard(worker_id_2, assignment_id_2)
        db_logger.log_finish_onboard(worker_id_2, assignment_id_3)
        db_logger.log_start_task(worker_id_1, assignment_id_1,
                                 conversation_id_1)
        db_logger.log_start_task(worker_id_2, assignment_id_2,
                                 conversation_id_1)
        db_logger.log_start_task(worker_id_2, assignment_id_3,
                                 conversation_id_2)

        # Check to see retrieval by conversation
        pairs_1 = db_logger.get_pairings_for_conversation(conversation_id_1)
        pairs_2 = db_logger.get_pairings_for_conversation(conversation_id_2)
        pairs_3 = db_logger.get_pairings_for_conversation('fake_id')
        pair_1 = db_logger.get_worker_assignment_pairing(
            worker_id_1, assignment_id_1)
        pair_2 = db_logger.get_worker_assignment_pairing(
            worker_id_2, assignment_id_2)
        pair_3 = db_logger.get_worker_assignment_pairing(
            worker_id_2, assignment_id_3)
        self.assertEqual(pairs_1[0], pair_1)
        self.assertEqual(pairs_1[1], pair_2)
        self.assertEqual(pairs_2[0], pair_3)
        self.assertEqual(len(pairs_3), 0)

        # Do some final processing on assignments
        db_logger.log_complete_assignment(
            worker_id_1, assignment_id_1, time.time(),
            AssignState.STATUS_PARTNER_DISCONNECT)
        db_logger.log_disconnect_assignment(worker_id_2, assignment_id_2,
                                            time.time(),
                                            AssignState.STATUS_DISCONNECT)
        db_logger.log_complete_assignment(worker_id_2, assignment_id_3,
                                          time.time(), AssignState.STATUS_DONE)

        # Assignment state consistent
        assignment_1_data = db_logger.get_assignment_data(assignment_id_1)
        assignment_2_data = db_logger.get_assignment_data(assignment_id_2)
        assignment_3_data = db_logger.get_assignment_data(assignment_id_3)
        self.assertEqual(assignment_1_data['assignment_id'], assignment_id_1)
        self.assertEqual(assignment_1_data['status'], 'Completed')
        self.assertIsNotNone(assignment_1_data['approve_time'])
        self.assertEqual(assignment_1_data['worker_id'], worker_id_1)
        self.assertEqual(assignment_1_data['hit_id'], HIT1['HIT']['HITId'])
        self.assertEqual(assignment_2_data['assignment_id'], assignment_id_2)
        self.assertEqual(assignment_2_data['status'], 'Completed')
        self.assertIsNotNone(assignment_2_data['approve_time'])
        self.assertEqual(assignment_2_data['worker_id'], worker_id_2)
        self.assertEqual(assignment_2_data['hit_id'], HIT2['HIT']['HITId'])
        self.assertEqual(assignment_3_data['assignment_id'], assignment_id_3)
        self.assertEqual(assignment_3_data['status'], 'Completed')
        self.assertIsNotNone(assignment_3_data['approve_time'])
        self.assertEqual(assignment_3_data['worker_id'], worker_id_2)
        self.assertEqual(assignment_3_data['hit_id'], HIT3['HIT']['HITId'])

        # Worker state consistent
        worker_1_data = db_logger.get_worker_data(worker_id_1)
        worker_2_data = db_logger.get_worker_data(worker_id_2)
        self.assertEqual(worker_1_data['worker_id'], worker_id_1)
        self.assertEqual(worker_1_data['accepted'], 1)
        self.assertEqual(worker_1_data['disconnected'], 0)
        self.assertEqual(worker_1_data['completed'], 1)
        self.assertEqual(worker_1_data['approved'], 0)
        self.assertEqual(worker_1_data['rejected'], 0)
        self.assertEqual(worker_2_data['worker_id'], worker_id_2)
        self.assertEqual(worker_2_data['accepted'], 2)
        self.assertEqual(worker_2_data['disconnected'], 1)
        self.assertEqual(worker_2_data['completed'], 1)
        self.assertEqual(worker_2_data['approved'], 0)
        self.assertEqual(worker_2_data['rejected'], 0)

        # Ensure Pairing state is consistent
        pair_1 = db_logger.get_worker_assignment_pairing(
            worker_id_1, assignment_id_1)
        pair_2 = db_logger.get_worker_assignment_pairing(
            worker_id_2, assignment_id_2)
        pair_3 = db_logger.get_worker_assignment_pairing(
            worker_id_2, assignment_id_3)
        self.assertEqual(pair_1['status'],
                         AssignState.STATUS_PARTNER_DISCONNECT)
        self.assertEqual(pair_2['status'], AssignState.STATUS_DISCONNECT)
        self.assertEqual(pair_3['status'], AssignState.STATUS_DONE)
        self.assertEqual(pair_1['worker_id'], worker_id_1)
        self.assertEqual(pair_2['worker_id'], worker_id_2)
        self.assertEqual(pair_3['worker_id'], worker_id_2)
        self.assertEqual(pair_1['assignment_id'], assignment_id_1)
        self.assertEqual(pair_2['assignment_id'], assignment_id_2)
        self.assertEqual(pair_3['assignment_id'], assignment_id_3)
        self.assertEqual(pair_1['conversation_id'], conversation_id_1)
        self.assertEqual(pair_2['conversation_id'], conversation_id_1)
        self.assertEqual(pair_3['conversation_id'], conversation_id_2)
        self.assertGreaterEqual(pair_1['onboarding_end'],
                                pair_1['onboarding_start'])
        self.assertGreaterEqual(pair_2['onboarding_end'],
                                pair_2['onboarding_start'])
        self.assertGreaterEqual(pair_3['onboarding_end'],
                                pair_3['onboarding_start'])
        self.assertGreaterEqual(pair_1['task_start'], pair_1['onboarding_end'])
        self.assertGreaterEqual(pair_2['task_start'], pair_2['onboarding_end'])
        self.assertGreaterEqual(pair_3['task_start'], pair_3['onboarding_end'])
        self.assertGreaterEqual(pair_1['task_end'], pair_1['onboarding_start'])
        self.assertGreaterEqual(pair_2['task_end'], pair_2['onboarding_start'])
        self.assertGreaterEqual(pair_3['task_end'], pair_3['onboarding_start'])
        self.assertEqual(pair_1['run_id'], run_id)
        self.assertEqual(pair_2['run_id'], run_id)
        self.assertEqual(pair_3['run_id'], run_id)
        self.assertEqual(pair_1['bonus_amount'], 0)
        self.assertEqual(pair_2['bonus_amount'], 0)
        self.assertEqual(pair_3['bonus_amount'], 0)
        self.assertEqual(pair_1['bonus_text'], '')
        self.assertEqual(pair_2['bonus_text'], '')
        self.assertEqual(pair_3['bonus_text'], '')
        self.assertFalse(pair_1['bonus_paid'])
        self.assertFalse(pair_2['bonus_paid'])
        self.assertFalse(pair_3['bonus_paid'])

        # Ensure run state is consistent
        run_data = db_logger.get_run_data(run_id)
        self.assertEqual(run_data['run_id'], run_id)
        self.assertEqual(run_data['created'], 3)
        self.assertEqual(run_data['completed'], 2)
        self.assertEqual(run_data['maximum'], hits_created)
        self.assertEqual(run_data['failed'], 1)

        # Test "submitting" and abandoning hits
        db_logger.log_submit_assignment(worker_id_1, assignment_id_1)
        db_logger.log_abandon_assignment(worker_id_2, assignment_id_2)
        db_logger.log_submit_assignment(worker_id_2, assignment_id_3)
        assignment_1_data = db_logger.get_assignment_data(assignment_id_1)
        assignment_2_data = db_logger.get_assignment_data(assignment_id_2)
        assignment_3_data = db_logger.get_assignment_data(assignment_id_3)
        self.assertEqual(assignment_1_data['assignment_id'], assignment_id_1)
        self.assertEqual(assignment_1_data['status'], 'Reviewable')
        self.assertIsNotNone(assignment_1_data['approve_time'])
        self.assertEqual(assignment_1_data['worker_id'], worker_id_1)
        self.assertEqual(assignment_1_data['hit_id'], HIT1['HIT']['HITId'])
        self.assertEqual(assignment_2_data['assignment_id'], assignment_id_2)
        self.assertEqual(assignment_2_data['status'], 'Abandoned')
        self.assertIsNotNone(assignment_2_data['approve_time'])
        self.assertEqual(assignment_2_data['worker_id'], worker_id_2)
        self.assertEqual(assignment_2_data['hit_id'], HIT2['HIT']['HITId'])
        self.assertEqual(assignment_3_data['assignment_id'], assignment_id_3)
        self.assertEqual(assignment_3_data['status'], 'Reviewable')
        self.assertIsNotNone(assignment_3_data['approve_time'])
        self.assertEqual(assignment_3_data['worker_id'], worker_id_2)
        self.assertEqual(assignment_3_data['hit_id'], HIT3['HIT']['HITId'])

        # Test approving and rejecting
        amount_use = 3
        reason_use = 'Just because'
        db_logger.log_award_amount(worker_id_1, assignment_id_1, amount_use,
                                   reason_use)
        db_logger.log_award_amount(worker_id_2, assignment_id_2, amount_use,
                                   reason_use)
        db_logger.log_bonus_paid(worker_id_1, assignment_id_1)
        db_logger.log_approve_assignment(worker_id_1, assignment_id_1)
        db_logger.log_approve_assignment(worker_id_2, assignment_id_2)
        db_logger.log_reject_assignment(worker_id_2, assignment_id_3)

        # Ensure state is valid again
        assignment_1_data = db_logger.get_assignment_data(assignment_id_1)
        assignment_2_data = db_logger.get_assignment_data(assignment_id_2)
        assignment_3_data = db_logger.get_assignment_data(assignment_id_3)
        self.assertEqual(assignment_1_data['assignment_id'], assignment_id_1)
        self.assertEqual(assignment_1_data['status'], 'Approved')
        self.assertIsNotNone(assignment_1_data['approve_time'])
        self.assertEqual(assignment_1_data['worker_id'], worker_id_1)
        self.assertEqual(assignment_1_data['hit_id'], HIT1['HIT']['HITId'])
        self.assertEqual(assignment_2_data['assignment_id'], assignment_id_2)
        self.assertEqual(assignment_2_data['status'], 'Approved')
        self.assertIsNotNone(assignment_2_data['approve_time'])
        self.assertEqual(assignment_2_data['worker_id'], worker_id_2)
        self.assertEqual(assignment_2_data['hit_id'], HIT2['HIT']['HITId'])
        self.assertEqual(assignment_3_data['assignment_id'], assignment_id_3)
        self.assertEqual(assignment_3_data['status'], 'Rejected')
        self.assertIsNotNone(assignment_3_data['approve_time'])
        self.assertEqual(assignment_3_data['worker_id'], worker_id_2)
        self.assertEqual(assignment_3_data['hit_id'], HIT3['HIT']['HITId'])

        worker_1_data = db_logger.get_worker_data(worker_id_1)
        worker_2_data = db_logger.get_worker_data(worker_id_2)
        self.assertEqual(worker_1_data['worker_id'], worker_id_1)
        self.assertEqual(worker_1_data['accepted'], 1)
        self.assertEqual(worker_1_data['disconnected'], 0)
        self.assertEqual(worker_1_data['completed'], 1)
        self.assertEqual(worker_1_data['approved'], 1)
        self.assertEqual(worker_1_data['rejected'], 0)
        self.assertEqual(worker_2_data['worker_id'], worker_id_2)
        self.assertEqual(worker_2_data['accepted'], 2)
        self.assertEqual(worker_2_data['disconnected'], 1)
        self.assertEqual(worker_2_data['completed'], 1)
        self.assertEqual(worker_2_data['approved'], 1)
        self.assertEqual(worker_2_data['rejected'], 1)

        pair_1 = db_logger.get_worker_assignment_pairing(
            worker_id_1, assignment_id_1)
        pair_2 = db_logger.get_worker_assignment_pairing(
            worker_id_2, assignment_id_2)
        pair_3 = db_logger.get_worker_assignment_pairing(
            worker_id_2, assignment_id_3)
        self.assertEqual(pair_1['status'],
                         AssignState.STATUS_PARTNER_DISCONNECT)
        self.assertEqual(pair_2['status'], AssignState.STATUS_DISCONNECT)
        self.assertEqual(pair_3['status'], AssignState.STATUS_DONE)
        self.assertEqual(pair_1['worker_id'], worker_id_1)
        self.assertEqual(pair_2['worker_id'], worker_id_2)
        self.assertEqual(pair_3['worker_id'], worker_id_2)
        self.assertEqual(pair_1['assignment_id'], assignment_id_1)
        self.assertEqual(pair_2['assignment_id'], assignment_id_2)
        self.assertEqual(pair_3['assignment_id'], assignment_id_3)
        self.assertEqual(pair_1['conversation_id'], conversation_id_1)
        self.assertEqual(pair_2['conversation_id'], conversation_id_1)
        self.assertEqual(pair_3['conversation_id'], conversation_id_2)
        self.assertGreaterEqual(pair_1['onboarding_end'],
                                pair_1['onboarding_start'])
        self.assertGreaterEqual(pair_2['onboarding_end'],
                                pair_2['onboarding_start'])
        self.assertGreaterEqual(pair_3['onboarding_end'],
                                pair_3['onboarding_start'])
        self.assertGreaterEqual(pair_1['task_start'], pair_1['onboarding_end'])
        self.assertGreaterEqual(pair_2['task_start'], pair_2['onboarding_end'])
        self.assertGreaterEqual(pair_3['task_start'], pair_3['onboarding_end'])
        self.assertGreaterEqual(pair_1['task_end'], pair_1['onboarding_start'])
        self.assertGreaterEqual(pair_2['task_end'], pair_2['onboarding_start'])
        self.assertGreaterEqual(pair_3['task_end'], pair_3['onboarding_start'])
        self.assertEqual(pair_1['run_id'], run_id)
        self.assertEqual(pair_2['run_id'], run_id)
        self.assertEqual(pair_3['run_id'], run_id)
        self.assertEqual(pair_1['bonus_amount'], amount_use)
        self.assertEqual(pair_2['bonus_amount'], amount_use)
        self.assertEqual(pair_3['bonus_amount'], 0)
        self.assertEqual(pair_1['bonus_text'], reason_use)
        self.assertEqual(pair_2['bonus_text'], reason_use)
        self.assertEqual(pair_3['bonus_text'], '')
        self.assertTrue(pair_1['bonus_paid'])
        self.assertFalse(pair_2['bonus_paid'])
        self.assertFalse(pair_3['bonus_paid'])
Пример #12
0
def main(opt, task_config):
    """Handles setting up and running a ParlAI-MTurk task by instantiating
    an MTurk manager and configuring it for the qa_data_collection task
    """

    np.random.seed(opt['seed'])

    # Set the task name to be the folder name
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))

    # append the contents of task_config.py to the configuration
    opt.update(task_config)

    # set up the HITs, which I think doesn't require a server
    setup_task_queue(opt)

    # Instantiate an MTurkManager with the given options and a maximum number
    # of agents per world of 1 (based on the length of mturk_agent_ids)
    mturk_manager = StaticMTurkManager(opt=opt)

    # Set up Heroku server
    mturk_manager.setup_server(
        task_directory_path=os.path.dirname(os.path.abspath(__file__)))

    # No onboarding function supported for static worlds at the moment,
    # should filter by making the first task against a "gold" example
    # which is processed in run_conversation at the moment.
    # Soon will support this behavior automatically
    mturk_manager.set_onboard_function(onboard_function=None)

    data_handler = MTurkDataHandler(
        task_group_id=mturk_manager.task_group_id,
        file_name='pmt_sbdata.db' if opt['is_sandbox'] else 'pmt_data.db')

    if opt['block_on_onboarding'] and opt['block_qualification'] is None:
        raise Exception(
            "You must set block_qualification or set block_on_onboarding to False"
        )
    qualifications = [
        {  # number of HITS approved
            'QualificationTypeId': '00000000000000000040',
            'Comparator': 'GreaterThan',
            'IntegerValues': [opt['qual_n_hits_approved']]
        },
        {  # percent approved
            'QualificationTypeId': '000000000000000000L0',
            'Comparator': 'GreaterThan',
            'IntegerValues': [opt['qual_percent_hits_approved']]
        },
    ]
    if opt['is_sandbox']:
        #qualifications.append(
        #    {
        #        'QualificationTypeId': '00000000000000000071',
        #        'Comparator': 'In',
        #        'LocaleValues': [
        #            {'Country': 'US', 'Subdivision': 'NY'},
        #            {'Country': 'CA'},
        #            {'Country': 'GB'},
        #            {'Country': 'AU'},
        #            {'Country': 'NZ'},
        #        ],
        #        'RequiredToPreview': True,
        #    })
        qualifications = []
    print(f"Qualifications: {qualifications}")

    out_fh = open(opt['out_file'], 'w')
    if opt['bad_worker_file'] is not None and not opt['is_sandbox']:
        print(f"Logging bad workers in {opt['bad_worker_file']}.")
        if os.path.exists(opt['bad_worker_file']):
            with open(opt['bad_worker_file'], 'r') as bad_worker_fh:
                workers_to_block = list(
                    set([worker.strip() for worker in bad_worker_fh]))
            print(
                f"\tLoaded {len(workers_to_block)} bad workers from {opt['bad_worker_file']}."
            )
        else:
            workers_to_block = list()
            print(f"\tNo previous bad workers from {opt['bad_worker_file']}.")
        bad_worker_fh = open(opt['bad_worker_file'], 'a')
    else:
        workers_to_block = list()
        bad_worker_fh = None

    if opt['ok_worker_file'] is not None and not opt[
            'is_sandbox'] and os.path.exists(opt['ok_worker_file']):
        with open(opt['ok_worker_file'], 'r') as worker_fh:
            workers_to_allow = list(
                set([worker.strip() for worker in worker_fh]))
        print(
            f"\tLoaded {len(workers_to_allow)} ok workers from {opt['ok_worker_file']}."
        )

    if opt['bonus_file'] is not None:
        bonus_fh = open(opt['bonus_file'], 'a')
        print(f"Logging bonuses awarded to {opt['bonus_file']}.")

    try:
        # Initialize run information
        mturk_manager.start_new_run()

        # (Soft) block bad workers
        if workers_to_block:
            for worker_id in workers_to_block:
                try:
                    mturk_manager.soft_block_worker(worker_id)
                    blocked_workers.append(worker_id)
                except:
                    print(f"Failed to block {worker_id}")
        if opt['is_sandbox']:
            mturk_manager.un_soft_block_worker(ALEX_ID)

        if opt['ok_worker_file'] is not None and not opt['is_sandbox']:
            for worker in workers_to_allow:
                mturk_manager.un_soft_block_worker(worker_id)

        # Set up the sockets and threads to recieve workers
        mturk_manager.ready_to_accept_workers()

        # Create the hits as specified by command line arguments
        mturk_manager.create_hits(qualifications=qualifications)

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            workers[0].id = display_agent_name

        # This function may be automatically implemented by StaticMTurkManager
        # soon, in which case you just need to provide get_new_task_data() and
        # return_task_data()
        def run_conversation(mturk_manager, opt, workers):
            task_data = get_new_task_data(workers[0],
                                          opt['comparisons_per_hit'])

            print("Started task...")
            world = StaticMTurkTaskWorld(
                opt,
                mturk_agent=workers[0],
                task_data=task_data,
            )
            while not world.episode_done():
                world.parley()
            print("\tFinished running task.")

            world.shutdown()

            to_save_data = world.prep_save_data(workers)

            if not world.did_complete():
                print("\tDidn't finish HIT. Returning task data...")
                return_task_data(workers[0].worker_id, task_data)
            elif opt['block_on_onboarding']:
                print("\tFinished HIT. Checking work...")
                did_fail = check_work(
                    mturk_manager,
                    data_handler,
                    to_save_data,
                    bad_worker_fh=bad_worker_fh,
                    onboard_threshold=opt['onboarding_threshold'],
                    min_time_threshold=opt['min_time_threshold'],
                    bonus_amount=opt['bonus_reward'],
                    bonus_fh=bonus_fh)
                to_save_data['did_fail'] = did_fail

            save_data(to_save_data, out_fh)
            return to_save_data

        print("This run id: {}".format(mturk_manager.task_group_id))

        # Begin the task, allowing mturk_manager to start running the task
        # world on any workers who connect
        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise

    finally:
        # Any hits that aren't claimed or completed have to be shut down. Must
        # keep the world running until that point.
        mturk_manager.expire_all_unassigned_hits()

        # Shutdown the manager and free all related resources
        mturk_manager.shutdown()

        # Close file handles
        out_fh.close()
        if opt['bad_worker_file'] is not None and not opt['is_sandbox']:
            bad_worker_fh.close()
        if opt['bonus_file'] is not None:
            bonus_fh.close()

        print(f"SOFTBLOCKED WORKERS: {blocked_workers}")
Пример #13
0
def main(opt):
    setup_aws_credentials()
    if opt['no_sandbox']:
        db_file, all_runs_dir = PATHS['live']
        opt['is_sandbox'] = False
    else:
        db_file, all_runs_dir = PATHS['sandbox']
    assert os.path.exists(db_file), f"DB file {db_file} doesn't exist!"
    assert os.path.isdir(
        all_runs_dir), f"run directory {all_runs_dir} doesn't exist!"
    db = MTurkDataHandler(file_name=db_file)
    mturk_manager = MTurkManager(opt, [])
    client = mturk_utils.get_mturk_client(not opt['no_sandbox'])

    # Get run IDs
    if opt['run_ids'] is None:
        run_ids = list(os.listdir(all_runs_dir))
        run2worker = defaultdict(lambda: dict())
        worker2run = defaultdict(lambda: dict())
        for run_id in run_ids:
            run_dir = os.path.join(all_runs_dir, run_id)
            hits = os.listdir(run_dir)
            for hit in hits:
                # t_*/workers/{WORKER_ID}.json
                resps = os.listdir(f"{run_dir}/{hit}/workers/")
                assert len(resps) == 1, "More than one response found!"
                worker_id = resps[0].split('.')[0]
                worker_data = json.load(
                    open(os.path.join(run_dir, hit, "workers", resps[0])))
                run2worker[run_id][worker_id] = worker_data
                worker2run[worker_id][run_id] = worker_data

    else:
        run_ids = opt['run_ids'].split(',')

    def get_all_hits():
        """ """
        all_hits = []
        resp = client.list_hits()
        all_hits.append(resp['HITs'])
        while 'NextToken' in resp and resp['NextToken']:
            resp = client.list_hits(NextToken=resp['NextToken'])
            all_hits += resp['HITs']
            time.sleep(0.5)
        return all_hits

    def get_run_id_data(run_ids):
        """ """
        print(f"Found following run IDs: ")
        n_hits = 0
        run_data = list()
        for run_id in run_ids:
            run_datum = db.get_run_data(run_id)
            run_data.append((run_id, run_datum))
        run_data.sort(key=lambda x: x[1]['launch_time'])
        for run_id, run_datum in run_data:
            start_time = datetime.fromtimestamp(run_datum['launch_time'])
            hits = db.get_pairings_for_run(run_id)
            n_hits += len(hits)
            print(f"{run_id} {len(hits)} HITS, started {start_time}")
        print(f"Total {n_hits} HITS over {len(run_ids)} runs")

    def approve_run_hits(run_id):
        """ """
        to_approve = []
        n_to_approve, n_approved = 0, 0
        hits = db.get_pairings_for_run(run_id)
        data = []
        for hit in hits:
            if hit['conversation_id'] is None:
                continue
            try:
                full_data = db.get_full_conversation_data(
                    run_id, hit['conversation_id'], False)
            except FileNotFoundError:
                continue

            datum = next(iter(full_data['worker_data'].values()))
            if datum['response']['text'] in BAD_RESPONSES:
                continue
            n_to_approve += 1
            to_approve.append(datum['assignment_id'])
            data.append(datum)
            print(f"To approve: {datum['assignment_id']}")

        print(f"Run ID {run_id}: to approve {n_to_approve} HITs")
        conf = input("Confirm? (y/n): ")
        if conf == "y":
            didnt_approve = list()
            for asgn_id in to_approve:
                try:
                    mturk_manager.approve_work(asgn_id)
                    n_approved += 1
                    print(f"Approved {asgn_id}")
                except:
                    didnt_approve.append(asgn_id)
                    print(f"Failed to approve: {asgn_id}")
            print(f"\tApproved {n_approved} HITs")
            if didnt_approve:
                print(
                    f"\tFailed to approve assignments {','.join(didnt_approve)}"
                )
        else:
            print("\tCancelled approvals")

    def approve_assignment(asgn_id):
        """ """
        conf = input(f"Confirm approving assignment {asgn_id}? (y/n): ")
        if conf == "y":
            try:
                mturk_manager.approve_work(asgn_id, override_rejection=True)
                print(f"\tSuccessfully approved!")
            except:
                print(f"\tFailed to approve.")

        else:
            print("\tCancelled approvals.")

    def award_from_file(bonus_file, msg):
        awards = [r.split(',') for r in open(bonus_file, encoding="utf-8")]
        total_bonus = sum(float(award[-1]) for award in awards)
        conf = input(
            f"Confirm awarding total bonus ${total_bonus} to {len(awards)} workers? "
        )
        if conf == "y":
            n_awarded = 0
            amt_awarded = 0.0
            didnt_award = list()
            for award in tqdm(awards):
                try:
                    worker_id, asgn_id, request_tok, bonus_amt = award
                except ValueError:
                    ipdb.set_trace()
                bonus_amt = float(bonus_amt)
                try:
                    mturk_manager.pay_bonus(worker_id=worker_id,
                                            bonus_amount=bonus_amt,
                                            assignment_id=asgn_id,
                                            reason=msg,
                                            unique_request_token=request_tok)
                    n_awarded += 1
                    amt_awarded += bonus_amt
                except:
                    didnt_award.append(
                        (worker_id, asgn_id, request_tok, bonus_amt))
                    #print(f"\tFailed to award bonus to {worker_id}")
            print(f"Awarded {amt_awarded} to {n_awarded} workers.")
            if didnt_award:
                print("Failed on:")
                for worker_id, asgn_id, request_tok, bonus_amt in didnt_award:
                    #print(f"\tFailed to award bonus {bonus_amt} to {worker_id} for assignment {asgn_id} (tok: {request_tok})")
                    print(f"{worker_id},{asgn_id},{request_tok},{bonus_amt}")
        else:
            print("\tCancelled bonus.")

        return

    def award_bonus(worker_id, bonus_amt, asgn_id, msg, request_tok):
        conf = input(f"Confirm awarding ${bonus_amt} to {worker_id}?")
        if conf == "y":
            try:
                mturk_manager.pay_bonus(worker_id=worker_id,
                                        bonus_amount=bonus_amt,
                                        assignment_id=asgn_id,
                                        reason=msg,
                                        unique_request_token=request_tok)
                print(f"\tSuccessfully approved!")
            except:
                print(f"\tFailed to approve.")
        else:
            print("\tCancelled bonus.")

    def inspect_assignment(asgn_id):
        """ """
        raise NotImplementedError
        #asgn_data = db.get_assignment_data(asgn_id)
        #if asgn_data is None:
        #    print("Assignment ID {asgn_id} not found.")

    def inspect_hit(hit_id):
        """ """
        raise NotImplementedError
        #hit_data = db.get_hit_data(hit_id)
        #if hit_data is None:
        #    print("HIT ID {hit_id} not found.")

    def inspect_run_worker_pair(run_id, worker_id):
        """ """
        worker_data = run2worker[run_id][worker_id]
        asgn_id = worker_data['assignment_id']
        answers = list()
        qsts = list()
        ctx = worker_data['task_data'][0]['conversations'][0]['dialog'][0][
            'text']
        for task_datum in worker_data['task_data']:
            qst_d = task_datum['conversations'][1]
            qsts.append(qst_d['dialog'][0]['text'])
            if 'answer' in qst_d and 'answer' is not None:
                answers.append(qst_d['answer'])
            else:
                answers.append(None)

        try:
            choices = [
                CHOICE2ANS[r['speakerChoice']]
                for r in worker_data['response']['task_data']
            ]
            reasons = [
                r['textReason'] for r in worker_data['response']['task_data']
            ]
        except KeyError as e:
            print("Key error!")
            print("task_data not in worker response!")
            ipdb.set_trace()

        try:
            pair = db.get_worker_assignment_pairing(worker_id, asgn_id)
            hit_time = pair['task_end'] - pair['task_start']
        except:
            ipdb.set_trace()

        print(f"\nAssignment ID: {worker_data['assignment_id']}")
        print(f"CONTEXT: {ctx}\n")
        for qst, ans, choice, reason in zip(qsts, answers, choices, reasons):
            print(f"QUESTION: {qst}")
            print(f"ANSWER: {ans}")
            print(f"CHOICE: {choice}")
            print(f"REASON: {reason}")
            print()
        print(f"HIT time: {hit_time}")
        resp = input("Accept (y/n) ? ")
        if resp == "y":
            #try:
            #    mturk_manager.approve_work(asgn_id, override_rejection=True)
            #    print("\tApproved!")
            #except:
            #    ipdb.set_trace()
            mturk_manager.approve_work(asgn_id, override_rejection=True)
            print("\tApproved!")

    def inspect_hit_worker_pair(hit_id, worker_id):
        """ """
        resp = client.list_assignments_for_hit(HITId=hit_id)
        all_asgns = list(resp['Assignments'])
        while 'NextToken' in resp and resp['NextToken']:
            resp = client.list_assignments_for_hit(HITId=hit_id,
                                                   NextToken=resp['NextToken'])
            if resp['Assignments']:
                all_asgns.append(resp['Assignments'])
            time.sleep(0.5)

        assert len(all_asgns) == 1, ipdb.set_trace()
        asgn_ids = [a['AssignmentId'] for a in all_asgns]
        run_ids = list()
        worker_runs = worker2run[worker_id]
        for asgn_id in asgn_ids:
            for run_id, run_d in worker_runs.items():
                if run_d['assignment_id'] == asgn_id:
                    run_ids.append(run_id)
        print(f"Assignment ID: {asgn_ids[0]}")
        print(f"Submit date: {all_asgns[0]['SubmitTime'].strftime('%m/%d')}")
        #assert len(run_ids) == 1, ipdb.set_trace()
        #run_id = run_ids[0]
        #asgn_id = asgn_ids[0]
        #worker_data = run2worker[run_id][worker_id]
        #answers = list()
        #qsts = list()
        #ctx = worker_data['task_data'][0]['conversations'][0]['dialog'][0]['text']
        #for task_datum in worker_data['task_data']:
        #    qst_d = task_datum['conversations'][1]
        #    qsts.append(qst_d['dialog'][0]['text'])
        #    if 'answer' in qst_d and 'answer' is not None:
        #        answers.append(qst_d['answer'])
        #    else:
        #        answers.append(None)

        #try:
        #    choices = [CHOICE2ANS[r['speakerChoice']] for r in worker_data['response']['task_data']]
        #    reasons = [r['textReason'] for r in worker_data['response']['task_data']]
        #except KeyError as e:
        #    print("Key error!")
        #    print("task_data not in worker response!")
        #    ipdb.set_trace()

        #try:
        #    pair = db.get_worker_assignment_pairing(worker_id, asgn_id)
        #    hit_time = pair['task_end'] - pair['task_start']
        #except:
        #    ipdb.set_trace()

        #print(f"\nAssignment ID: {worker_data['assignment_id']}")
        #print(f"CONTEXT: {ctx}\n")
        #for qst, ans, choice, reason in zip(qsts, answers, choices, reasons):
        #    print(f"QUESTION: {qst}")
        #    print(f"ANSWER: {ans}")
        #    print(f"CHOICE: {choice}")
        #    print(f"REASON: {reason}")
        #    print()
        #print(f"HIT time: {hit_time}")
        #resp = input("Accept (y/n) ? ")
        #if resp == "y":
        #    try:
        #        mturk_manager.approve_work(asgn_id, override_rejection=True)
        #        print("\tApproved!")
        #    except:
        #        ipdb.set_trace()

    # main loop
    while True:
        print("Enter 'p' to print runs")
        cmd = input("Enter command: ")
        if len(cmd) == 0 or cmd == "exit":
            break
        cmd_parts = cmd.split()
        if cmd_parts[0] == "p":
            get_run_id_data(run_ids)
        elif cmd_parts[0] == "inspect":
            assert len(cmd_parts) == 3, "Insufficient arguments!"
            inspect_run_worker_pair(cmd_parts[1], cmd_parts[2])
        elif cmd_parts[0] in ["get-asgn", 'ga']:
            assert len(
                cmd_parts
            ) == 3, "Insufficient arguments! Please provide worker_id and ..."
            inspect_hit_worker_pair(cmd_parts[1], cmd_parts[2])
        elif cmd_parts[0] == "inspect-asgn":
            assert len(cmd_parts) > 1, "No assignment ID provided."
            inspect_assignment(cmd_parts[1])
        elif cmd_parts[0] == "inspect-hit":
            assert len(cmd_parts) > 1, "No HIT ID provided."
            inspect_hit(cmd_parts[1])
        elif cmd_parts[0] == "approve":
            assert len(cmd_parts) > 1, "No run ID provided."
            run_id = cmd_parts[1]
            if run_id in run_ids:
                approve_run_hits(run_id)
            else:
                print(f"Run ID {run_id} not found!")
        elif cmd_parts[0] == "approve-asgn":
            assert len(cmd_parts) > 1, "No assignment ID provided."
            approve_assignment(cmd_parts[1])
        elif cmd_parts[0] == "award-from-file":
            assert len(cmd_parts) > 1, "No file provided."
            if not os.path.exists(cmd_parts[1]):
                print(f"File {cmd_parts[1]} not found!")
                continue
            award_from_file(cmd_parts[1], BONUS_MSG)
        elif cmd_parts[0] in ["d", "debug"]:
            ipdb.set_trace()
        else:
            print(f"Command `{cmd}` not understood.")