def main(): """ Human Evaluation of various responses to comments on images. A turker is shown an image and some dialog history. Then, the turker is asked to choose which response they think is more engaging. If no `--eval-data-path` is given, the data from the original Image-Chat dataset is used. To use your own data, please specify `--eval-data-path`, a path to an appropriate json file with a list of examples, where each example has the following structure: { 'image_hash': <hash of image>, 'dialog': [(personality, text), ...] - list of personality, text tuples 'personality': <personality of responses to compare> '<compare_key_1>': <first response to compare>, '<compare_key_2>': <second option to compare>, . . . } Note that compare_key_1 and compare_key_2 can be any field, as long as they map to a string response. Example Scenario: Suppose you have the original Image-Chat dataset, and you would like to compare the outputs of your model called `model`. Your data may look like the following: [{ 'image_hash': hashforimageofcat, 'dialog': [ ('Sweet', 'What a cute cat!'), ('Neutral', 'Just looks like a plain cat to me') ] 'personality': 'Sweet', 'comment': 'It really is adorable if you look!', # Human Comment 'model_comment': 'You'll love it if you pet it!' # Model Comment }, ...] Thus, you would specify `-ck1 comment -ck2 model_comment` to evaluate the outputs of the model vs. the human comments from Personality-Captions """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-min_t', '--min_turns', default=3, type=int, help='minimum number of turns') argparser.add_argument('-mt', '--max_turns', default=5, type=int, help='maximal number of chat turns') argparser.add_argument( '-mx_rsp_time', '--max_resp_time', default=1800, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '-mx_onb_time', '--max_onboard_time', type=int, default=300, help='time limit for turker' 'in onboarding', ) argparser.add_argument( '-ni', '--num_images', type=int, default=10, help='number of images to show \ to turker', ) argparser.add_argument( '--auto-approve-delay', type=int, default=3600 * 24, help='how long to wait for \ auto approval', ) argparser.add_argument('--data-path', type=str, default='', help='where to save data') argparser.add_argument( '--eval-data-path', type=str, default='', help='where to load data to rank from. Leave ' 'blank to use Image-Chat data', ) argparser.add_argument( '-ck1', '--compare-key-1', type=str, default='comment', help='key of first comparable', ) argparser.add_argument( '-ck2', '--compare-key-2', type=str, default='comment', help='key of first comparable', ) argparser.add_argument( '-rnd', '--dialog-round', type=str, default='first_response', choices=round_choices, help='which dialog round to show', ) argparser.add_argument( '--show-personality', default=True, type='bool', help='whether to show the personality', ) ImageChatTeacher.add_cmdline_args(argparser) opt = argparser.parse_args() build_ic(opt) directory_path = os.path.dirname(os.path.abspath(__file__)) opt['task'] = os.path.basename(directory_path) if 'data_path' not in opt or opt['data_path'] == '': opt['data_path'] = os.getcwd() + '/data/' + opt['task'] if opt.get('eval_data_path') == '': opt['eval_data_path'] = os.path.join(opt['datapath'], 'image_chat/test.json') config = config_first if opt[ 'dialog_round'] == 'first_response' else config_second opt.update(config) mturk_agent_ids = [CHOOSER] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) example_generator = ExampleGenerator(opt) mturk_manager.setup_server(task_directory_path=directory_path) try: mturk_manager.start_new_run() def run_onboard(worker): worker.example_generator = example_generator world = RoleOnboardWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() mturk_manager.create_hits() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for w in workers: w.id = mturk_agent_ids[0] def run_conversation(mturk_manager, opt, workers): agents = workers[:] conv_idx = mturk_manager.conversation_index world = MTurkImageChatStackRankWorld( opt, agents=agents, world_tag='conversation t_{}'.format(conv_idx)) while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """ Human Evaluation of various image captions/comments. A turker is shown an image and two possible comments/captions, and optionally the personality used to create these captions. Then, the turker is asked to choose which caption they think is more engaging. In this example, we will just be comparing the original comment twice (this is just to demonstrate the task for future use). To use your own data, please specify `--eval-data-path` to an appropriate json file with a list of examples, where each example has the following structure: { 'image_hash': <hash of image>, 'personality': <personality, if applicable>, '<compare_key_1>': <first option to compare>, '<compare_key_2>': <second option to compare>, . . . } Note that compare_key_1 and compare_key_2 can be any field, as long as they map to a string comment/caption. Example Scenario: Suppose you have the original Personality-Captions dataset, and you would like to compare the outputs of your model called `model`. Your data may look like the following: [{ 'image_hash': hashforimageofcat, 'personality': 'Sweet', 'comment': 'Look at the cute cat!', # Human Comment 'model_comment': 'That's a weird looking dog' # Model Comment }, ...] Thus, you would specify `-ck1 comment -ck2 model_comment` to evaluate the outputs of the model vs. the human comments from Personality-Captions """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-mx_rsp_time', '--max_resp_time', default=1800, type=int, help='time limit for entering a dialog message') argparser.add_argument('-mx_onb_time', '--max_onboard_time', type=int, default=300, help='time limit for turker' 'in onboarding') argparser.add_argument('-ni', '--num_images', type=int, default=10, help='number of images to show \ to turker') argparser.add_argument('--data-path', type=str, default='', help='where to save data') argparser.add_argument('--eval-data-path', type=str, default='', help='where to load data to rank from. Leave ' 'blank to use Personality-Captions data') argparser.add_argument('-ck1', '--compare-key-1', type=str, default='comment', help='key of first option to compare') argparser.add_argument('-ck2', '--compare-key-2', type=str, default='comment', help='key of second option to compare') argparser.add_argument('--show-personality', default=True, type='bool', help='whether to show the personality') PersonalityCaptionsTeacher.add_cmdline_args(argparser) opt = argparser.parse_args() directory_path = os.path.dirname(os.path.abspath(__file__)) opt['task'] = os.path.basename(directory_path) if 'data_path' not in opt or opt['data_path'] == '': opt['data_path'] = os.getcwd() + '/data/' + opt['task'] if opt.get('eval_data_path') == '': opt['eval_data_path'] = os.path.join( opt['datapath'], 'personality_captions/train.json') opt.update(task_config) mturk_agent_ids = [CHOOSER] mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=mturk_agent_ids ) example_generator = ExampleGenerator(opt) mturk_manager.setup_server(task_directory_path=directory_path) try: mturk_manager.start_new_run() def run_onboard(worker): worker.example_generator = example_generator world = RoleOnboardWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() mturk_manager.create_hits() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for w in workers: w.id = mturk_agent_ids[0] def run_conversation(mturk_manager, opt, workers): agents = workers[:] conv_idx = mturk_manager.conversation_index world = MTurkPersonalityCaptionsStackRankWorld( opt, agents=agents, world_tag='conversation t_{}'.format(conv_idx), ) while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """ Image Chat data collection task. A worker is shown an image and part of a conversation, and is given a personality with which the worker should continue the conversation. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-min_t', '--min_turns', default=3, type=int, help='minimum number of turns') argparser.add_argument('-mt', '--max_turns', default=5, type=int, help='maximal number of chat turns') argparser.add_argument( '-mx_rsp_time', '--max_resp_time', default=1800, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '-mx_onb_time', '--max_onboard_time', type=int, default=300, help='time limit for turker' 'in onboarding', ) argparser.add_argument( '-ni', '--num_images', type=int, default=10, help='number of images to show \ to turker', ) argparser.add_argument( '--auto-approve-delay', type=int, default=3600 * 24 * 5, help='how long to wait for \ auto approval', ) argparser.add_argument( '--second-response', type='bool', default=False, help='Specify if getting responses \ to responses to original comment', ) ImageChatTeacher.add_cmdline_args(argparser) opt = argparser.parse_args() directory_path = os.path.dirname(os.path.abspath(__file__)) opt['task'] = os.path.basename(directory_path) if 'data_path' not in opt: opt['data_path'] = os.getcwd() + '/data/' + opt['task'] opt.update(config_second if opt['second_response'] else config_first) mturk_agent_ids = [RESPONDER] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) personality_generator = PersonalityGenerator(opt) example_generator = ExampleGenerator(opt) mturk_manager.setup_server(task_directory_path=directory_path) try: mturk_manager.start_new_run() mturk_manager.ready_to_accept_workers() def run_onboard(worker): worker.personality_generator = personality_generator worker.example_generator = example_generator world = RoleOnboardWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.create_hits() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for w in workers: w.id = mturk_agent_ids[0] def run_conversation(mturk_manager, opt, workers): agents = workers[:] conv_idx = mturk_manager.conversation_index world = MTurkImageChatWorld( opt, agents=agents, world_tag='conversation t_{}'.format(conv_idx)) while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()