def main(): random.seed(42) # Get command line arguments parser = ParlaiParser() parser.add_argument('-n', '--num-examples', default=10) parser.set_defaults(datatype='train:ordered') ImageLoader.add_cmdline_args(parser) opt = parser.parse_args() opt['no_cuda'] = False opt['gpu'] = 0 # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) # Show some example dialogs. with world: for k in range(int(opt['num_examples'])): world.parley() print(world.display() + '\n~~') if world.epoch_done(): print('EPOCH DONE') break
def main(): # Get command line arguments parser = ParlaiParser() parser.add_argument('-n', '--num-examples', default=10) opt = parser.parse_args() agent = Agent(opt) opt['datatype'] = 'train' world_train = create_task(opt, agent) opt['datatype'] = 'valid' world_valid = create_task(opt, agent) start = time.time() # train / valid loop for _ in range(1): print('[ training ]') for _ in range(10): # train for a bit world_train.parley() print('[ training summary. ]') print(world_train.report()) print('[ validating ]') for _ in range(1): # check valid accuracy world_valid.parley() print('[ validation summary. ]') print(world_valid.report()) print('finished in {} s'.format(round(time.time() - start, 2)))
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser() parser.add_argument('-n', '--num-examples', default=10, type=int) opt = parser.parse_args() display_data(opt)
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser() parser.add_argument('-n', '--num-examples', default=10, type=int) opt = parser.parse_args() # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) # Show some example dialogs. with world: for _ in range(opt['num_examples']): world.parley() print(world.display() + '\n~~') if world.epoch_done(): print('EPOCH DONE') break
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser(True, True) parser.add_argument('-n', '--num-examples', default=10) opt = parser.parse_args() # Create model and assign it to the specified task agent = create_agent(opt) world = create_task(opt, agent) # Show some example dialogs. with world: for k in range(int(opt['num_examples'])): world.parley() print(world.display() + "\n~~") if world.epoch_done(): print("EPOCH DONE") break
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser(True, True) parser.add_argument('-d', '--display-examples', type='bool', default=False) opt = parser.parse_args() opt['task'] = 'parlai.agents.local_human.local_human:LocalHumanAgent' print(opt) # Create model and assign it to the specified task agent = create_agent(opt) world = create_task(opt, agent) # Show some example dialogs: while True: world.parley() if opt['display_examples']: print("---") print(world.display() + "\n~~") if world.epoch_done(): print("EPOCH DONE") break
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser(True, True) parser.add_argument('-n', '--num-examples', default=100000000) parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.set_defaults(datatype='valid') opt = parser.parse_args() # Create model and assign it to the specified task agent = create_agent(opt) world = create_task(opt, agent) # Show some example dialogs: for k in range(int(opt['num_examples'])): world.parley() print("---") if opt['display_examples']: print(world.display() + "\n~~") print(world.report()) if world.epoch_done(): print("EPOCH DONE") break world.shutdown()
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser(True, True) parser.add_argument('-n', '--num-examples', default=100000000) parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) parser.set_defaults(datatype='valid') opt = parser.parse_args(print_args=False) eval_model(opt, parser)
def setup_args(parser=None) -> ParlaiParser: """ Setup args. """ parser = ParlaiParser(True, False) parser.add_argument( '--ids', type='nonestr', help='Comma separated list of CONFIG ids for round robin evaluation (must be at least 2)', default=None, ) parser.add_argument( '--id-pairs', type='nonestr', help='Comma separated, colon-delimited list of CONFIG pairs for evaluation, ' 'e.g. model1:model2,model1:model3', default=None, ) parser.add_argument( '-eval', '--acute-eval-type', type=str, default='engaging', choices=list(ACUTE_EVAL_TYPES.keys()), help='which evaluation to run for acute', ) parser.add_argument( '-mpp', '--matchups-per-pair', type=int, default=MATCHUPS_PER_PAIR, help='How many matchups to generate for each pair of ids.', ) parser.add_argument( '--live-acute', type='bool', default=False, help='whether this is a LIVE acute run. ', ) parser.add_argument( '--onboarding-path', type=str, default=os.path.join(EXAMPLE_PATH, 'onboarding.jsonl'), help='path to onboarding pair', ) parser.set_defaults(selfchat_task=True, task='self_chat') return parser
def main(): """This task consists of one agent, model or MTurk worker, talking to an MTurk worker to negotiate a deal. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('--two_mturk_agents', dest='two_mturk_agents', action='store_true', help='data collection mode ' 'with converations between two MTurk agents') opt = argparser.parse_args() opt['task'] = 'dealnodeal' opt['datatype'] = 'valid' opt.update(task_config) local_agent_1_id = 'local_1' mturk_agent_ids = ['mturk_agent_1'] if opt['two_mturk_agents']: mturk_agent_ids.append('mturk_agent_2') mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=mturk_agent_ids ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() mturk_manager.set_onboard_function(onboard_function=None) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[:] # Create a local agent if not opt['two_mturk_agents']: if 'model' in opt: local_agent = create_agent(opt) else: local_agent = LocalHumanAgent(opt=None) local_agent.id = local_agent_1_id agents.append(local_agent) opt["batchindex"] = mturk_manager.started_conversations world = MTurkDealNoDealDialogWorld( opt=opt, agents=agents ) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def setup_args(cls): parser = ParlaiParser(False, False, description="Short opt test") parser.add_argument('-m', '--model') parser.add_argument('-mxx', '--my-other-option') return parser
def add_cmdline_args( cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None ) -> ParlaiParser: parser = parser.add_argument_group('Retriever Arguments') parser.add_argument( '--retriever-numworkers', type=int, default=None, help='Number of CPU processes (for tokenizing, etc)', ) parser.add_argument( '--retriever-ngram', type=int, default=2, help='Use up to N-size n-grams (e.g. 2 = unigrams + bigrams)', ) parser.add_argument( '--retriever-hashsize', type=int, default=int(math.pow(2, 24)), help='Number of buckets to use for hashing ngrams', ) parser.add_argument( '--retriever-tokenizer', type=str, default='simple', help='String option specifying tokenizer type to use.', ) parser.add_argument( '--retriever-num-retrieved', default=5, type=int, help='How many docs to retrieve.', ) parser.add_argument( '--remove-title', type='bool', default=False, help='Whether to remove the title from the retrieved passage', ) parser.add_argument( '--retriever-mode', choices=['keys', 'values'], default='values', help='Whether to retrieve the stored key or the stored value. For ' 'example, if you want to return the text of an example, use ' 'keys here; if you want to return the label, use values here.', ) parser.add_argument( '--index-by-int-id', type='bool', default=True, help=( 'Whether to index into database by doc id as an integer. This ' 'defaults to true for DBs built using ParlAI.' ), ) parser.add_argument( '--tfidf-context-length', default=-1, type=int, help='Number of past utterances to remember when ' 'building flattened batches of data in multi-' 'example episodes.', ) parser.add_argument( '--tfidf-include-labels', default=True, type='bool', help='Specifies whether or not to include labels ' 'as past utterances when building flattened ' 'batches of data in multi-example episodes.', )
def setup_args(cls) -> ParlaiParser: parser = ParlaiParser() parser.add_argument( '--input', type=str, nargs='+', help= 'The input fairseq model path. Specify multiple to imply a join is necessary', ) parser.add_argument('--output', type=str, help='The output ParlAI model path') parser.add_argument( '--vocab', type=str, help='The hugging face vocab file path, if applicable') parser.add_argument( '--merge', type=str, help='The hugging face merge file path, if applicable') parser.add_argument( '--add-prefix-space', type='bool', default=True, help='Add prefix space for hugging face bpe', ) parser.add_argument( '--activation', type=str, help='Activation function', choices=['relu', 'gelu'], default='gelu', ) parser.add_argument( '--tokenizer', type=str, help='Dict tokenizer', choices=['bytelevelbpe', 'gpt2'], default='bytelevelbpe', ) parser.add_argument('--delimiter', type=str, default=' ', help='Delimiter') parser.add_argument( '--retain-bos-emb', type='bool', default=False, help='Retain the BOS embedding.', ) parser.add_argument( '--model', type=str, default='transformer/generator', help='Which ParlAI agent to use.', ) parser.add_argument('--fp16', type='bool', default=False, help='Whether to initialize with fp16') parser.add_argument( '--history-add-global-end-token', type='nonestr', default='end', hidden=True, choices=[None, 'end'], help='Add special token to the end of history encoding.', ) return parser
def main(): """This task consists of one agent, model or MTurk worker, talking to an MTurk worker to negotiate a deal. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-min_t', '--min_turns', default=5, type=int, help='minimum number of turns') argparser.add_argument('-mt', '--max_turns', default=10, type=int, help='maximal number of chat turns') argparser.add_argument('-mx_rsp_time', '--max_resp_time', default=150, type=int, help='time limit for entering a dialog message') argparser.add_argument('-mx_psn_time', '--max_persona_time', type=int, default=300, help='time limit for turker' 'entering the persona') argparser.add_argument('--ag_shutdown_time', default=120, type=int, help='time limit for entering a dialog message') argparser.add_argument('--persona-type', default='both', type=str, choices=['both', 'self', 'other'], help='Which personas to load from personachat') argparser.add_argument('--revised', default=True, type='bool', help='Whether to use revised personas') argparser.add_argument('-rt', '--range_turn', default='5,7', help='sample range of number of turns') opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) if 'data_path' not in opt: opt['data_path'] = os.getcwd() + '/data/' + opt['task'] opt.update(task_config) mturk_agent_ids = ['PERSON_1'] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) persona_generator = PersonasGenerator(opt) mturk_manager.setup_server() # SET MODEL AGENT OPT HERE model_agent_opt = {} try: mturk_manager.start_new_run() mturk_manager.create_hits() if not opt['is_sandbox']: blocked_worker_list = [] for w in blocked_worker_list: mturk_manager.block_worker( w, 'We found that you have unexpected behaviors in our previous HITs. For more questions please email us.' ) def run_onboard(worker): worker.persona_generator = persona_generator world = PersonaProfileWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[0] conv_idx = mturk_manager.conversation_index world = PersonaChatEvalWorld( opt=opt, agents=[agents], range_turn=[int(s) for s in opt['range_turn'].split(',')], max_turn=opt['max_turns'], max_resp_time=opt['max_resp_time'], model_agent_opt=model_agent_opt, world_tag='conversation t_{}'.format(conv_idx)) world.reset_random() while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task(eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """This task consists of one agent, model or MTurk worker, talking to an MTurk worker to negotiate a deal. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('--two_mturk_agents', dest='two_mturk_agents', action='store_true', help='data collection mode ' 'with converations between two MTurk agents') opt = argparser.parse_args() opt['task'] = 'dealnodeal' opt['datatype'] = 'valid' opt.update(task_config) local_agent_1_id = 'local_1' mturk_agent_ids = ['mturk_agent_1'] if opt['two_mturk_agents']: mturk_agent_ids.append('mturk_agent_2') mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() mturk_manager.set_onboard_function(onboard_function=None) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[:] # Create a local agent if not opt['two_mturk_agents']: if 'model' in opt: local_agent = create_agent(opt) else: local_agent = LocalHumanAgent(opt=None) local_agent.id = local_agent_1_id agents.append(local_agent) opt["batchindex"] = mturk_manager.started_conversations world = MTurkDealNoDealDialogWorld(opt=opt, agents=agents) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.start_task(eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
print(text) if world.epoch_done(): print('EPOCH DONE') break fw.close() if __name__ == '__main__': random.seed(42) # Get command line arguments parser = ParlaiParser() parser.add_argument( '-n', '--num-examples', default=-1, type=int, help='Total number of exs to convert, -1 to convert \ all examples', ) parser.add_argument( '-of', '--outfile', default=None, type=str, help='Output file where to save, by default will be \ created in /tmp', ) parser.add_argument( '-if', '--ignore-fields', default='id',
def add_cmdline_args( cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None ) -> ParlaiParser: """ Add command-line arguments specifically for this agent. """ super().add_cmdline_args(parser, partial_opt=partial_opt) agent = parser.add_argument_group('Transformer Arguments') add_common_cmdline_args(agent) # memory and knowledge arguments agent.add_argument( '--use-memories', type='bool', default=False, help='use memories: must implement the function ' '`_vectorize_memories` to use this', ) agent.add_argument( '--wrap-memory-encoder', type='bool', default=False, help='wrap memory encoder with MLP', ) agent.add_argument( '--memory-attention', type=str, default='sqrt', choices=['cosine', 'dot', 'sqrt'], help='similarity for basic attention mechanism ' 'when using transformer to encode memories', ) # model specific arguments agent.add_argument('--normalize-sent-emb', type='bool', default=False) agent.add_argument('--share-encoders', type='bool', default=True) parser.add_argument( '--share-word-embeddings', type='bool', default=True, help='Share word embeddings table for candidate and context' 'in the memory network', ) agent.add_argument( '--learn-embeddings', type='bool', default=True, help='learn embeddings' ) agent.add_argument( '--data-parallel', type='bool', default=False, help='use model in data parallel, requires ' 'multiple gpus', ) agent.add_argument( '--reduction-type', type=str, default='mean', choices=['first', 'max', 'mean'], help='Type of reduction at the end of transformer', ) parser.set_defaults(learningrate=0.0001, optimizer='adamax', truncate=1024) cls.dictionary_class().add_cmdline_args(parser, partial_opt=partial_opt) return agent
print(f"Run ID {run_id} not found!") elif cmd_parts[0] == "approve-asgn": assert len(cmd_parts) > 1, "No assignment ID provided." approve_assignment(cmd_parts[1]) elif cmd_parts[0] == "award-from-file": assert len(cmd_parts) > 1, "No file provided." if not os.path.exists(cmd_parts[1]): print(f"File {cmd_parts[1]} not found!") continue award_from_file(cmd_parts[1], BONUS_MSG) elif cmd_parts[0] in ["d", "debug"]: ipdb.set_trace() else: print(f"Command `{cmd}` not understood.") if __name__ == '__main__': parser = ParlaiParser(False, False) parser.add_mturk_args() parser.add_argument('--run_ids', type=str, default=None, help='comma separated run ids') parser.add_argument('--no_sandbox', action='store_true', help='If given, run against live data') opt = parser.parse_args() main(opt) # python parlai_internal/mturk/tasks/pairwise_dialogue_eval/scripts/accept_all_hits.py --run_ids pairwise_dialogue_eval_1556568703,pairwise_dialogue_eval_1556853821
def add_cmdline_args(cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None) -> ParlaiParser: """ Add CLI args. """ super().add_cmdline_args(parser, partial_opt=partial_opt) parser = parser.add_argument_group('Torch Classifier Arguments') # class arguments parser.add_argument( '--classes', type=str, nargs='*', default=None, help='the name of the classes.', ) parser.add_argument( '--class-weights', type=float, nargs='*', default=None, help='weight of each of the classes for the softmax', ) parser.add_argument( '--ref-class', type=str, default=None, hidden=True, help='the class that will be used to compute ' 'precision and recall. By default the first ' 'class.', ) parser.add_argument( '--threshold', type=float, default=0.5, help='during evaluation, threshold for choosing ' 'ref class; only applies to binary ' 'classification', ) # interactive mode parser.add_argument( '--print-scores', type='bool', default=False, help='print probability of chosen class during ' 'interactive mode', ) # miscellaneous arguments parser.add_argument( '--data-parallel', type='bool', default=False, help='uses nn.DataParallel for multi GPU', ) parser.add_argument( '--classes-from-file', type=str, default=None, help='loads the list of classes from a file', ) parser.add_argument( '--ignore-labels', type='bool', default=None, help='Ignore labels provided to model', ) parser.add_argument( '--update-classifier-head-only', type='bool', default=False, help='Freeze the encoder and update the classifier head only', ) parser.set_defaults(use_reply='none') return parser
def main(): """This task consists of an MTurk agent evaluating a Controllable Dialog model. """ start_time = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M') argparser = ParlaiParser(False, add_model_args=True) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('--max-resp-time', default=240, type=int, help='time limit for entering a dialog message') argparser.add_argument('--max-choice-time', type=int, default=300, help='time limit for turker' 'choosing the topic') argparser.add_argument('--ag-shutdown-time', default=120, type=int, help='time limit for entering a dialog message') argparser.add_argument('--num-turns', default=6, type=int, help='number of turns of dialogue') argparser.add_argument('--human-eval', type='bool', default=False, help='human vs human eval, no models involved') argparser.add_argument('--auto-approve-delay', type=int, default=3600 * 24 * 2, help='how long to wait for auto approval') argparser.add_argument('--only-masters', type='bool', default=False, help='Set to true to use only master turks for ' 'this test eval') argparser.add_argument('--create-model-qualif', type='bool', default=True, help='Create model qualif so unique eval between' 'models.') argparser.add_argument('--limit-workers', type=int, default=len(SETTINGS_TO_RUN), help='max HITs a worker can complete') argparser.add_argument( '--mturk-log', type=str, default=('$HOME/ParlAI/data/mturklogs/controllable/{}.log'.format( start_time))) argparser.add_argument('--short-eval', type='bool', default=True, help='Only ask engagingness question and persona' 'question.') # persona specific arguments argparser.add_argument('--persona-type', type=str, default='self', choices=['self', 'other', 'none']) argparser.add_argument('--persona-datatype', type=str, default='valid', choices=['train', 'test', 'valid']) argparser.add_argument('--max-persona-time', type=int, default=360, help='max time to view persona') def get_logger(opt): logger = logging.getLogger() logger.setLevel(logging.INFO) fmt = logging.Formatter('%(asctime)s: [ %(message)s ]', '%m/%d/%Y %I:%M:%S %p') console = logging.StreamHandler() console.setFormatter(fmt) logger.addHandler(console) if 'mturk_log' in opt: logfn = opt['mturk_log'].replace('$HOME', os.environ['HOME']) if not os.path.isdir(os.path.dirname(logfn)): raise OSError("Please run `mkdir -p {}`".format( os.path.dirname(logfn))) logfile = logging.FileHandler(logfn, 'a') logfile.setFormatter(fmt) logger.addHandler(logfile) logger.info('COMMAND: %s' % ' '.join(sys.argv)) logger.info('-' * 100) logger.info('CONFIG:\n%s' % json.dumps(opt, indent=4, sort_keys=True)) return logger start_opt = argparser.parse_args() task_config['task_description'] = task_config['task_description'].format( start_opt['reward']) # set options start_opt['limit_workers'] = len(SETTINGS_TO_RUN) start_opt['allowed_conversations'] = 1 start_opt['max_hits_per_worker'] = start_opt['limit_workers'] start_opt['task'] = os.path.basename( os.path.dirname(os.path.abspath(__file__))) start_opt.update(task_config) get_logger(start_opt) model_share_params = {} worker_models_seen = {} model_opts = {} model_counts = {} lock = Lock() for setup in SETTINGS_TO_RUN: assert 'human' not in setup model_counts[setup] = 0 agent_config = getattr(mcf, setup) combined_config = copy.deepcopy(start_opt) for k, v in agent_config.items(): combined_config[k] = v combined_config['override'][k] = v folder_name = '{}-{}'.format(setup, start_time) combined_config['save_data_path'] = os.path.join( start_opt['datapath'], 'local_controllable_dialogue', folder_name) model_opts[setup] = combined_config bot = create_agent(combined_config, True) model_share_params[setup] = bot.share() if not start_opt.get('human_eval'): mturk_agent_ids = ['PERSON_1'] else: mturk_agent_ids = ['PERSON_1', 'PERSON_2'] mturk_manager = MTurkManager(opt=start_opt, mturk_agent_ids=mturk_agent_ids) personas_generator = PersonasGenerator(start_opt) directory_path = os.path.dirname(os.path.abspath(__file__)) mturk_manager.setup_server(task_directory_path=directory_path) try: mturk_manager.start_new_run() agent_qualifications = [] # assign qualifications if start_opt['create_model_qualif']: qual_name = 'ControlEvalRound2' qual_desc = ( 'Qualification to ensure workers complete only a certain' 'number of these HITs') qualification_id = mturk_utils.find_or_create_qualification( qual_name, qual_desc, False) print('Created qualification: ', qualification_id) start_opt['unique_qualif_id'] = qualification_id def run_onboard(worker): worker.personas_generator = personas_generator world = PersonaAssignWorld(start_opt, worker) world.parley() world.shutdown() def check_worker_eligibility(worker): worker_id = worker.worker_id lock.acquire() retval = len(worker_models_seen.get(worker_id, [])) < len(SETTINGS_TO_RUN) lock.release() return retval def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() mturk_manager.create_hits(qualifications=agent_qualifications) def run_conversation(mturk_manager, opt, workers): conv_idx = mturk_manager.conversation_index # gotta find a bot this worker hasn't seen yet assert len(workers) == 1 worker_id = workers[0].worker_id lock.acquire() if worker_id not in worker_models_seen: worker_models_seen[worker_id] = set() print("MODELCOUNTS:") print(pprint.pformat(model_counts)) logging.info("MODELCOUNTS\n" + pprint.pformat(model_counts)) model_options = [ (model_counts[setup_name] + 10 * random.random(), setup_name) for setup_name in SETTINGS_TO_RUN if setup_name not in worker_models_seen[worker_id] ] if not model_options: lock.release() logging.error( "Worker {} already finished all settings! Returning none". format(worker_id)) return None _, model_choice = min(model_options) worker_models_seen[worker_id].add(model_choice) model_counts[model_choice] += 1 lock.release() world = ControllableDialogEval( opt=model_opts[model_choice], agents=workers, num_turns=start_opt['num_turns'], max_resp_time=start_opt['max_resp_time'], model_agent_opt=model_share_params[model_choice], world_tag='conversation t_{}'.format(conv_idx), agent_timeout_shutdown=opt['ag_shutdown_time'], model_config=model_choice, ) world.reset_random() while not world.episode_done(): world.parley() world.save_data() lock.acquire() if not world.convo_finished: model_counts[model_choice] -= 1 worker_models_seen[worker_id].remove(model_choice) lock.release() world.shutdown() gc.collect() mturk_manager.start_task(eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """ Human Evaluation of various image captions/comments. A turker is shown an image and two possible comments/captions, and optionally the personality used to create these captions. Then, the turker is asked to choose which caption they think is more engaging. In this example, we will just be comparing the original comment twice (this is just to demonstrate the task for future use). To use your own data, please specify `--eval-data-path` to an appropriate json file with a list of examples, where each example has the following structure: { 'image_hash': <hash of image>, 'personality': <personality, if applicable>, '<compare_key_1>': <first option to compare>, '<compare_key_2>': <second option to compare>, . . . } Note that compare_key_1 and compare_key_2 can be any field, as long as they map to a string comment/caption. Example Scenario: Suppose you have the original Personality-Captions dataset, and you would like to compare the outputs of your model called `model`. Your data may look like the following: [{ 'image_hash': hashforimageofcat, 'personality': 'Sweet', 'comment': 'Look at the cute cat!', # Human Comment 'model_comment': 'That's a weird looking dog' # Model Comment }, ...] Thus, you would specify `-ck1 comment -ck2 model_comment` to evaluate the outputs of the model vs. the human comments from Personality-Captions """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument( '-mx_rsp_time', '--max_resp_time', default=1800, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '-mx_onb_time', '--max_onboard_time', type=int, default=300, help='time limit for turker' 'in onboarding', ) argparser.add_argument( '-ni', '--num_images', type=int, default=10, help='number of images to show \ to turker', ) argparser.add_argument('--data-path', type=str, default='', help='where to save data') argparser.add_argument( '--eval-data-path', type=str, default='', help='where to load data to rank from. Leave ' 'blank to use Personality-Captions data', ) argparser.add_argument( '-ck1', '--compare-key-1', type=str, default='comment', help='key of first option to compare', ) argparser.add_argument( '-ck2', '--compare-key-2', type=str, default='comment', help='key of second option to compare', ) argparser.add_argument( '--show-personality', default=True, type='bool', help='whether to show the personality', ) PersonalityCaptionsTeacher.add_cmdline_args(argparser) opt = argparser.parse_args() directory_path = os.path.dirname(os.path.abspath(__file__)) opt['task'] = os.path.basename(directory_path) if 'data_path' not in opt or opt['data_path'] == '': opt['data_path'] = os.getcwd() + '/data/' + opt['task'] if opt.get('eval_data_path') == '': opt['eval_data_path'] = os.path.join( opt['datapath'], 'personality_captions/train.json') opt.update(task_config) mturk_agent_ids = [CHOOSER] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) example_generator = ExampleGenerator(opt) mturk_manager.setup_server(task_directory_path=directory_path) try: mturk_manager.start_new_run() def run_onboard(worker): worker.example_generator = example_generator world = RoleOnboardWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() mturk_manager.create_hits() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for w in workers: w.id = mturk_agent_ids[0] def run_conversation(mturk_manager, opt, workers): agents = workers[:] conv_idx = mturk_manager.conversation_index world = MTurkPersonalityCaptionsStackRankWorld( opt, agents=agents, world_tag='conversation t_{}'.format(conv_idx)) while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
argparser.add_arg('--train_data_file', type=str, default='') argparser.add_arg('--valid_data_file', type=str, default='') argparser.add_arg('--perf_out_file', type=str, default='') argparser.add_arg('--weight_file', type=str, default='') argparser.add_arg('--model_file', type=str, default='') argparser.add_arg('--data_agent_file', type=str, default='') argparser.add_arg('--wrong_data_file', type=str, default='') argparser.add_arg('--once', type=bool, default=False) argparser.add_arg('--job_num', type=int) argparser.add_arg('--counter_ablation', type=bool, default=False) argparser.add_arg('--room_ablation', type=bool, default=False) # ============ above copied from projects/graph_world2/train.py ============ argparser.add_argument('--num_machines', type=int, default=1) argparser.add_argument('--job_timeout', type=float, default=3600 * 4) argparser.add_argument('--split', action='store_true', default=False) argparser.add_argument('--train', action='store_true', default=False) argparser.add_argument('--eval', action='store_true', default=False) argparser.add_argument('--seq2seq', action='store_true', default=False) argparser.add_argument('--constrain', action='store_true', default=False) argparser.add_argument('--rounds_breakdown', action='store_true', default=False) argparser.add_argument('--data_breakdown', action='store_true', default=False) argparser.add_argument('--ablation', action='store_true', default=False)
argparser.add_arg('--train_data_file', type=str, default='') argparser.add_arg('--valid_data_file', type=str, default='') argparser.add_arg('--perf_out_file', type=str, default='') argparser.add_arg('--weight_file', type=str, default='') argparser.add_arg('--model_file', type=str, default='') argparser.add_arg('--data_agent_file', type=str, default='') argparser.add_arg('--wrong_data_file', type=str, default='') argparser.add_arg('--once', type=bool, default=False) argparser.add_arg('--job_num', type=int) argparser.add_arg('--counter_ablation', type=bool, default=False) argparser.add_arg('--room_ablation', type=bool, default=False) # ============ above copied from projects/graph_world2/train.py ============ argparser.add_argument('--num_machines', type=int, default=1) argparser.add_argument('--job_timeout', type=float, default=3600*4) argparser.add_argument('--split', action='store_true', default=False) argparser.add_argument('--train', action='store_true', default=False) argparser.add_argument('--eval', action='store_true', default=False) argparser.add_argument('--seq2seq', action='store_true', default=False) argparser.add_argument('--constrain', action='store_true', default=False) argparser.add_argument('--rounds_breakdown', action='store_true', default=False) argparser.add_argument('--data_breakdown', action='store_true', default=False) argparser.add_argument('--ablation', action='store_true', default=False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args()
print('---Finished extracting and saving personas, to {}'.format( personas_path)) def main(opt): print('---Extracting and saving personas---') teacher_name = 'personachat:{}'.format(opt.get('persona_type')) teacher_name += 'Revised' if opt.get('revised') else 'Original' opt['task'] = teacher_name assert 'personas_path' in opt, 'Must specify personas path' opt['datatype'] = 'train:ordered:stream' opt['numthreads'] = 1 opt['batchsize'] = 1 extract_and_save(opt) if __name__ == '__main__': parser = ParlaiParser() parser.add_argument( '--persona-type', default='both', type=str, choices=['both', 'self', 'other'], help='Which personas to load from personachat', ) parser.add_argument('--revised', default=False, type='bool', help='Whether to use revised personas') opt = parser.parse_args()
def main(): """ IGC Human Evaluation. Specify the `--eval-data-path` to load examples for evaluation. The data in `--eval-data-path` should be formatted as a dictionary mapping IGC image ids to dicts with the following fields: { 'questions': list of (<generator_name>, <generated_question>) tuples, 'responses': list of (<generator_name>, <generated_response>) tuples, 'question': question to use when evaluating responses, 'context': context for the image } If not data path specified, loads a demo_example specified in worlds.py Specify `--image-path` for the path to the IGC images, where each example is saved as <image_id>.jpg NOTE: You can download the IGC Test Set from https://www.microsoft.com/en-us/download/details.aspx?id=55324 And you can use the `download_igc_images.py` script to download the images (please put the IGC_crowd_test.csv file in this directory to use the script) """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-min_t', '--min_turns', default=3, type=int, help='minimum number of turns') argparser.add_argument('-mt', '--max_turns', default=5, type=int, help='maximal number of chat turns') argparser.add_argument( '-mx_rsp_time', '--max_resp_time', default=1800, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '-mx_onb_time', '--max_onboard_time', type=int, default=300, help='time limit for turker' 'in onboarding', ) argparser.add_argument( '-ni', '--num_images', type=int, default=5, help='number of images to show \ to turker', ) argparser.add_argument( '--auto-approve-delay', type=int, default=3600 * 24, help='how long to wait for \ auto approval', ) argparser.add_argument('--data-path', type=str, default='', help='where to save data') argparser.add_argument( '--eval-data-path', type=str, default='', help='path to file with candidates to ' 'evaluate', ) argparser.add_argument('--image-path', type=str, default='', help='path to IGC images') argparser.add_argument( '-rnd', '--dialog-round', type=str, default='questions', choices=round_choices, help='which dialog round to show', ) opt = argparser.parse_args() directory_path = os.path.dirname(os.path.abspath(__file__)) opt['task'] = os.path.basename(directory_path) if 'data_path' not in opt or opt['data_path'] == '': opt['data_path'] = "{}/data/{}_evals".format(os.getcwd(), opt['dialog_round']) opt['task_dir'] = os.getcwd() if opt['dialog_round'] == 'questions': opt.update(tc_questions) else: opt.update(tc_responses) mturk_agent_ids = [RATER] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) example_generator = IGCExampleGenerator(opt) mturk_manager.setup_server(task_directory_path=directory_path) try: mturk_manager.start_new_run() def run_onboard(worker): worker.example_generator = example_generator world = RoleOnboardWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() mturk_manager.create_hits() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for w in workers: w.id = mturk_agent_ids[0] def run_conversation(mturk_manager, opt, workers): agents = workers[:] conv_idx = mturk_manager.conversation_index world = MTurkIGCEvalWorld( opt, agents=agents, world_tag='conversation t_{}'.format(conv_idx)) while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'compute statistics from model predictions') DictionaryAgent.add_cmdline_args(parser) # Get command line arguments parser.add_argument('-ne', '--num-examples', type=int, default=-1) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) parser.add_argument( '-ed', '--external-dict', type=str, default=None, help='External dictionary for stat computation', ) parser.add_argument( '-fb', '--freq-bins', type=str, default='0,100,1000,10000', help='Bins boundaries for rare words stat', ) parser.add_argument( '-dup', '--dump-predictions-path', type=str, default=None, help='Dump predictions into file', ) parser.add_argument( '-cun', '--compute-unique', type='bool', default=True, help='Compute %% of unique responses from the model', ) parser.set_defaults(datatype='valid') TensorboardLogger.add_cmdline_args(parser) return parser
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'Evaluate a model') # Get command line arguments parser.add_argument( '-rf', '--report-filename', type=str, default='', help='Saves a json file of the evaluation report either as an ' 'extension to the model-file (if begins with a ".") or a whole ' 'file path. Set to the empty string to not save at all.', ) parser.add_argument( '--world-logs', type=str, default='', help='Saves a jsonl file of the world logs.' 'Set to the empty string to not save at all.', ) parser.add_argument( '--save-format', type=str, default='conversations', choices=['conversations', 'parlai'], ) parser.add_argument( '--area-under-curve-digits', '-auc', type=int, default=-1, help='a positive number indicates to calculate the area under the roc curve and it also determines how many decimal digits of the predictions to keep (higher numbers->more precise); also used to determine whether or not to calculate the AUC metric', ) parser.add_argument( '--area-under-curve-class', '-auclass', type=str, default=None, nargs='*', help='the name(s) of the class to calculate the auc for', ) parser.add_argument('-ne', '--num-examples', type=int, default=-1) parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=10) parser.add_argument( '-mcs', '--metrics', type=str, default='default', help='list of metrics to show/compute, e.g. all, default,' 'or give a list split by , like ' 'ppl,f1,accuracy,hits@1,rouge,bleu' 'the rouge metrics will be computed as rouge-1, rouge-2 and rouge-l', ) parser.add_argument( '-micro', '--aggregate-micro', type='bool', default=False, help='Report micro-averaged metrics instead of macro averaged metrics.', recommended=False, ) WorldLogger.add_cmdline_args(parser, partial_opt=None) TensorboardLogger.add_cmdline_args(parser, partial_opt=None) parser.set_params(datatype='valid') return parser
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'Self chat with a model') parser.add_argument('--seed', type=int, default=42) parser.add_argument('-d', '--display-examples', type='bool', default=True) parser.add_argument('-n', '-ne', '--num-examples', type=int, default=10) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) parser.add_argument( '--display-ignore-fields', type=str, default='label_candidates,text_candidates', help='Do not display these fields', ) parser.add_argument( '-it', '--interactive-task', type='bool', default=True, help='Create interactive version of task', ) parser.add_argument( '--selfchat-max-turns', type=int, default=10, help="The number of dialogue turns before self chat ends.", ) parser.add_argument( '--seed-messages-from-task', action='store_true', help="Automatically seed conversation with messages from task dataset.", ) parser.add_argument('--outfile', type=str, default='/tmp/selfchat.json') parser.add_argument('--format', type=str, default='json', choices={'parlai', 'json'}) parser.set_defaults(interactive_mode=True, task='self_chat') WorldLogger.add_cmdline_args(parser) return parser
def main(): """This task consists of an MTurk agent evaluating a wizard model. They are assigned a topic and asked to chat. """ start_time = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M') argparser = ParlaiParser(False, add_model_args=True) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-mt', '--max-turns', default=10, type=int, help='maximal number of chat turns') argparser.add_argument( '--max-resp-time', default=240, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '--max-choice-time', type=int, default=300, help='time limit for turker' 'choosing the topic', ) argparser.add_argument( '--ag-shutdown-time', default=120, type=int, help='time limit for entering a dialog message', ) argparser.add_argument('-rt', '--range-turn', default='3,5', help='sample range of number of turns') argparser.add_argument( '--human-eval', type='bool', default=False, help='human vs human eval, no models involved', ) argparser.add_argument( '--auto-approve-delay', type=int, default=3600 * 24 * 1, help='how long to wait for auto approval', ) argparser.add_argument( '--only-masters', type='bool', default=False, help='Set to true to use only master turks for ' 'this test eval', ) argparser.add_argument( '--unique-workers', type='bool', default=False, help='Each worker must be unique', ) argparser.add_argument('--mturk-log', type=str, default='data/mturklogs/{}.log'.format(start_time)) def inject_override(opt, override_dict): opt['override'] = override_dict for k, v in override_dict.items(): opt[k] = v def get_logger(opt): logger = logging.getLogger() logger.setLevel(logging.INFO) fmt = logging.Formatter('%(asctime)s: [ %(message)s ]', '%m/%d/%Y %I:%M:%S %p') console = logging.StreamHandler() console.setFormatter(fmt) logger.addHandler(console) if 'mturk_log' in opt: logfile = logging.FileHandler(opt['mturk_log'], 'a') logfile.setFormatter(fmt) logger.addHandler(logfile) logger.info('COMMAND: %s' % ' '.join(sys.argv)) logger.info('-' * 100) logger.info('CONFIG:\n%s' % json.dumps(opt, indent=4, sort_keys=True)) return logger # MODEL CONFIG # NOTE: please edit this to test your own models config = { 'model': 'projects:wizard_of_wikipedia:interactive_retrieval', 'retriever_model_file': 'models:wikipedia_full/tfidf_retriever/model', 'responder_model_file': 'models:wizard_of_wikipedia/full_dialogue_retrieval_model/model', } argparser.add_model_subargs(config['model']) # add model args to opt start_opt = argparser.parse_args() inject_override(start_opt, config) if not start_opt.get('human_eval'): bot = create_agent(start_opt) shared_bot_params = bot.share() else: shared_bot_params = None if not start_opt['human_eval']: get_logger(bot.opt) else: get_logger(start_opt) if start_opt['human_eval']: folder_name = 'human_eval-{}'.format(start_time) else: folder_name = '{}-{}'.format(start_opt['model'], start_time) start_opt['task'] = os.path.basename( os.path.dirname(os.path.abspath(__file__))) if 'data_path' not in start_opt: start_opt['data_path'] = os.path.join(os.getcwd(), 'data', 'wizard_eval', folder_name) start_opt.update(task_config) if not start_opt.get('human_eval'): mturk_agent_ids = ['PERSON_1'] else: mturk_agent_ids = ['PERSON_1', 'PERSON_2'] mturk_manager = MTurkManager(opt=start_opt, mturk_agent_ids=mturk_agent_ids) topics_generator = TopicsGenerator(start_opt) directory_path = os.path.dirname(os.path.abspath(__file__)) mturk_manager.setup_server(task_directory_path=directory_path) worker_roles = {} connect_counter = AttrDict(value=0) try: mturk_manager.start_new_run() agent_qualifications = [] if not start_opt['is_sandbox']: # assign qualifications if start_opt['only_masters']: agent_qualifications.append(MASTER_QUALIF) if start_opt['unique_workers']: qual_name = 'UniqueChatEval' qual_desc = ( 'Qualification to ensure each worker completes a maximum ' 'of one of these chat/eval HITs') qualification_id = mturk_utils.find_or_create_qualification( qual_name, qual_desc, False) print('Created qualification: ', qualification_id) UNIQUE_QUALIF = { 'QualificationTypeId': qualification_id, 'Comparator': 'DoesNotExist', 'RequiredToPreview': True, } start_opt['unique_qualif_id'] = qualification_id agent_qualifications.append(UNIQUE_QUALIF) mturk_manager.create_hits(qualifications=agent_qualifications) def run_onboard(worker): if start_opt['human_eval']: role = mturk_agent_ids[connect_counter.value % len(mturk_agent_ids)] connect_counter.value += 1 worker_roles[worker.worker_id] = role else: role = 'PERSON_1' worker.topics_generator = topics_generator world = TopicChooseWorld(start_opt, worker, role=role) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_single_worker_eligibility(worker): return True def check_multiple_workers_eligibility(workers): valid_workers = {} for worker in workers: worker_id = worker.worker_id if worker_id not in worker_roles: print('Something went wrong') continue role = worker_roles[worker_id] if role not in valid_workers: valid_workers[role] = worker if len(valid_workers) == 2: break return valid_workers.values() if len(valid_workers) == 2 else [] if not start_opt['human_eval']: eligibility_function = { 'func': check_single_worker_eligibility, 'multiple': False, } else: eligibility_function = { 'func': check_multiple_workers_eligibility, 'multiple': True, } def assign_worker_roles(workers): if start_opt['human_eval']: for worker in workers: worker.id = worker_roles[worker.worker_id] else: for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): conv_idx = mturk_manager.conversation_index world = WizardEval( opt=start_opt, agents=workers, range_turn=[ int(s) for s in start_opt['range_turn'].split(',') ], max_turn=start_opt['max_turns'], max_resp_time=start_opt['max_resp_time'], model_agent_opt=shared_bot_params, world_tag='conversation t_{}'.format(conv_idx), agent_timeout_shutdown=opt['ag_shutdown_time'], ) while not world.episode_done(): world.parley() world.save_data() world.shutdown() gc.collect() mturk_manager.start_task( eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
"""Evaluate pre-trained retrieval model on the full Wizard Dialogue task. NOTE: Metrics here differ slightly to those reported in the paper as a result of code changes. Results on seen test set: Hits@1/100: 86.7 Results on unseen test set (run with flag `-t wizard_of_wikipedia:WizardDialogKnowledge:topic_split`): Hits@1/100: 68.96 """ if __name__ == '__main__': parser = ParlaiParser(add_model_args=True) parser.add_argument('-n', '--num-examples', default=100000000) parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) WizardTransformerRankerAgent.add_cmdline_args(parser) parser.set_params( task='wizard_of_wikipedia', model='projects:wizard_of_wikipedia:wizard_transformer_ranker', model_file= 'models:wizard_of_wikipedia/full_dialogue_retrieval_model/model', datatype='test', n_heads=6, ffn_size=1200, embeddings_scale=False, delimiter=' __SOC__ ', n_positions=1000, legacy=True,
def main(): """This task consists of an MTurk agent evaluating a chit-chat model. They are asked to chat to the model adopting a specific persona. After their conversation, they are asked to evaluate their partner on several metrics. """ argparser = ParlaiParser(False, add_model_args=True) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-mt', '--max-turns', default=10, type=int, help='maximal number of chat turns') argparser.add_argument('--max-resp-time', default=180, type=int, help='time limit for entering a dialog message') argparser.add_argument('--max-persona-time', type=int, default=300, help='time limit for turker' 'entering the persona') argparser.add_argument('--ag-shutdown-time', default=120, type=int, help='time limit for entering a dialog message') argparser.add_argument('--persona-type', default='both', type=str, choices=['both', 'self', 'other'], help='Which personas to load from personachat') argparser.add_argument('--revised', default=False, type='bool', help='Whether to use revised personas') argparser.add_argument('-rt', '--range-turn', default='5,6', help='sample range of number of turns') argparser.add_argument('--auto-approve-delay', type=int, default=3600 * 24 * 1, help='how long to wait for \ auto approval') # ADD MODEL ARGS HERE (KVMEMNN ADDED AS AN EXAMPLE) argparser.set_defaults( model='projects.personachat.kvmemnn.kvmemnn:Kvmemnn', model_file='models:convai2/kvmemnn/model', ) opt = argparser.parse_args() # add additional model args opt['no_cuda'] = True opt['override'] = ['interactive_mode'] opt['interactive_mode'] = True bot = create_agent(opt) shared_bot_params = bot.share() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) if 'data_path' not in opt: opt['data_path'] = os.getcwd() + '/data/' + opt['task'] opt.update(task_config) mturk_agent_ids = ['PERSON_1'] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) persona_generator = PersonasGenerator(opt) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() if not opt['is_sandbox']: # ADD BLOCKED WORKERS HERE blocked_worker_list = [] for w in blocked_worker_list: mturk_manager.block_worker( w, 'We found that you have unexpected behaviors in our \ previous HITs. For more questions please email us.') def run_onboard(worker): worker.persona_generator = persona_generator world = PersonaProfileWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[0] conv_idx = mturk_manager.conversation_index world = Convai2EvalWorld( opt=opt, agents=[agents], range_turn=[int(s) for s in opt['range_turn'].split(',')], max_turn=opt['max_turns'], max_resp_time=opt['max_resp_time'], model_agent_opt=shared_bot_params, world_tag='conversation t_{}'.format(conv_idx), agent_timeout_shutdown=opt['ag_shutdown_time'], ) world.reset_random() while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task(eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """This task consists of an MTurk agent evaluating a chit-chat model. They are asked to chat to the model adopting a specific persona. After their conversation, they are asked to evaluate their partner on several metrics. """ argparser = ParlaiParser(False, add_model_args=True) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-mt', '--max-turns', default=10, type=int, help='maximal number of chat turns') argparser.add_argument( '--max-resp-time', default=240, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '--max-persona-time', type=int, default=300, help='time limit for turker' 'entering the persona', ) argparser.add_argument( '--ag-shutdown-time', default=120, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '--persona-type', default='both', type=str, choices=['both', 'self', 'other'], help='Which personas to load from personachat', ) argparser.add_argument('--revised', default=False, type='bool', help='Whether to use revised personas') argparser.add_argument('-rt', '--range-turn', default='5,6', help='sample range of number of turns') argparser.add_argument( '--auto-approve-delay', type=int, default=3600 * 24 * 1, help='how long to wait for auto approval', ) argparser.add_argument( '--only-masters', type='bool', default=False, help='Set to True to use only master turks for this' + ' test eval, default is %(default)s', ) # ADD MODEL ARGS HERE, UNCOMMENT TO USE KVMEMNN MODEL AS AN EXAMPLE # argparser.set_defaults( # model='projects.personachat.kvmemnn.kvmemnn:Kvmemnn', # model_file='models:convai2/kvmemnn/model', # ) opt = argparser.parse_args() # add additional model args opt['override'] = { 'no_cuda': True, 'interactive_mode': True, 'tensorboard_log': False, } bot = create_agent(opt) shared_bot_params = bot.share() print('=== Actual bot opt === :\n {}'.format('\n'.join( ["[{}] : {}".format(k, v) for k, v in bot.opt.items()]))) folder_name = 'master_{}_YOURCOMMENT__'.format( opt['only_masters']) + '__'.join( ['{}_{}'.format(k, v) for k, v in opt['override'].items()]) # this is mturk task, not convai2 task from ParlAI opt['task'] = 'convai2:self' if 'data_path' not in opt: opt['data_path'] = os.getcwd() + '/data/' + folder_name opt.update(task_config) mturk_agent_ids = ['PERSON_1'] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) persona_generator = PersonasGenerator(opt) mturk_manager.setup_server() try: mturk_manager.start_new_run() agent_qualifications = [] if opt['only_masters']: if opt['is_sandbox']: agent_qualifications.append(MASTER_QUALIF_SDBOX) else: agent_qualifications.append(MASTER_QUALIF) mturk_manager.ready_to_accept_workers() mturk_manager.create_hits(qualifications=agent_qualifications) if not opt['is_sandbox']: # ADD SOFT-BLOCKED WORKERS HERE # NOTE: blocking qual *must be* specified blocked_worker_list = [] for w in blocked_worker_list: print('Soft Blocking {}\n'.format(w)) mturk_manager.soft_block_worker(w) time.sleep(0.1) # do the sleep to prevent amazon query drop def run_onboard(worker): worker.persona_generator = persona_generator world = PersonaProfileWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[0] conv_idx = mturk_manager.conversation_index world = Convai2EvalWorld( opt=opt, agents=[agents], range_turn=[int(s) for s in opt['range_turn'].split(',')], max_turn=opt['max_turns'], max_resp_time=opt['max_resp_time'], model_agent_opt=shared_bot_params, world_tag='conversation t_{}'.format(conv_idx), agent_timeout_shutdown=opt['ag_shutdown_time'], ) world.reset_random() while not world.episode_done(): world.parley() world.save_data() world.shutdown() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def setup_args(): """ Setup appropriate args. """ parser = ParlaiParser(True, False) parser.add_argument('-id', '--run-id', type=str, default=None, help='run id to analyze') parser.add_argument( '--is-sandbox', type='bool', default=True, help='whether the run is a sandbox run or not', ) parser.add_argument('--outdir', type=str, default=None, help='where to save the results') parser.add_argument( '--pairings-filepath', type=str, default=None, help='path to the acute analysis pairs for the corresponding run id', ) parser.add_argument( '--rounding-digit', type=int, default=2, help='number of digits for rounding displayed table', ) parser.add_argument( '--max-matchups-html', type=int, default=10, help='max number of matchups to display per model pair in html', ) parser.add_argument( '--min-dialogue-length', type=int, default=-1, help= 'the minimum number of turns for both dialogues in a matchup to be counted as valid for analysis', ) parser.add_argument( '--annotate-convo', type='bool', default=False, help= 'whether to include a checkbox column for annotating the conversation pairs', ) return parser
def setup_args(cls): parser = ParlaiParser(True, False, description='My Description') parser.add_argument('--foo', default='defaultvalue') parser.add_argument('--bar', default='sneaky', hidden=True) return parser
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'Self chat with a model') parser.add_argument('--seed', type=int, default=42) parser.add_argument('-d', '--display-examples', type='bool', default=True) parser.add_argument( '--display-ignore-fields', type=str, default='label_candidates,text_candidates', help='Do not display these fields', ) parser.add_argument( '-st', '--selfchat-task', type='bool', default=True, help='Create a self chat version of the task', ) parser.add_argument( '--num-self-chats', type=int, default=1, help='Number of self chats to run' ) parser.add_argument( '--selfchat-max-turns', type=int, default=6, help='The number of dialogue turns before self chat ends', ) parser.add_argument( '--seed-messages-from-task', action='store_true', help='Automatically seed conversation with messages from task dataset.', ) parser.add_argument( '--outfile', type=str, default=None, help='File to save self chat logs' ) parser.add_argument( '--save-format', type=str, default='conversations', choices=['conversations', 'parlai', 'jsonl'], help='Format to save logs in', ) parser.set_defaults(interactive_mode=True, task='self_chat') WorldLogger.add_cmdline_args(parser) return parser
def setup_args(parser=None): if parser is None: parser = ParlaiParser( True, True, 'Interactive chat with a model on the command line') parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.add_argument( '--display-prettify', type='bool', default=False, help='Set to use a prettytable when displaying ' 'examples with text candidates', ) parser.add_argument( '--display-add-fields', type=str, default='', help= 'Display these fields when verbose is off (e.g., "--display-add-fields label_candidates,beam_texts")', ) parser.add_argument( '-it', '--interactive-task', type='bool', default=True, help='Create interactive version of task', ) parser.add_argument( '--outfile', type=str, default='', help='Saves a jsonl file containing all of the task examples and ' 'model replies. Set to the empty string to not save at all', ) parser.add_argument( '--save-format', type=str, default='conversations', choices=['conversations', 'parlai'], help= 'Format to save logs in. conversations is a jsonl format, parlai is a text format.', ) parser.set_defaults(interactive_mode=True, task='interactive') LocalHumanAgent.add_cmdline_args(parser) WorldLogger.add_cmdline_args(parser) return parser
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'Evaluate a model') # Get command line arguments parser.add_argument( '-rf', '--report-filename', type=str, default='', help='Saves a json file of the evaluation report either as an ' 'extension to the model-file (if begins with a ".") or a whole ' 'file path. Set to the empty string to not save at all.', ) parser.add_argument( '--save-world-logs', type='bool', default=False, help='Saves a jsonl file containing all of the task examples and ' 'model replies. Must also specify --report-filename.', ) parser.add_argument( '--save-format', type=str, default='conversations', choices=['conversations', 'parlai'], ) parser.add_argument('-ne', '--num-examples', type=int, default=-1) parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=10) parser.add_argument( '-mcs', '--metrics', type=str, default='default', help='list of metrics to show/compute, e.g. all, default,' 'or give a list split by , like ' 'ppl,f1,accuracy,hits@1,rouge,bleu' 'the rouge metrics will be computed as rouge-1, rouge-2 and rouge-l', ) parser.add_argument( '-micro', '--aggregate-micro', type='bool', default=False, help='Report micro-averaged metrics instead of macro averaged metrics.', recommended=False, ) WorldLogger.add_cmdline_args(parser) TensorboardLogger.add_cmdline_args(parser) parser.set_params(datatype='valid') return parser
def main(): # Get command line arguments argparser = ParlaiParser() DictionaryAgent.add_cmdline_args(argparser) ParsedRemoteAgent.add_cmdline_args(argparser) argparser.add_argument('--num-examples', default=1000, type=int) argparser.add_argument('--num-its', default=100, type=int) argparser.add_argument('--dict-max-exs', default=10000, type=int) parlai_home = os.environ['PARLAI_HOME'] if '--remote-cmd' not in sys.argv: if os.system('which luajit') != 0: raise RuntimeError('Could not detect torch luajit installed: ' + 'please install torch from http://torch.ch ' + 'or manually set --remote-cmd for this example.') sys.argv.append('--remote-cmd') sys.argv.append('luajit {}/parlai/agents/'.format(parlai_home) + 'memnn_luatorch_cpu/memnn_zmq_parsed.lua') if '--remote-args' not in sys.argv: sys.argv.append('--remote-args') sys.argv.append('{}/examples/'.format(parlai_home) + 'memnn_luatorch_cpu/params_default.lua') opt = argparser.parse_args() # set up dictionary print('Setting up dictionary.') dictionary = DictionaryAgent(opt) if not opt.get('dict_file'): # build dictionary since we didn't load it ordered_opt = copy.deepcopy(opt) ordered_opt['datatype'] = 'train:ordered' ordered_opt['numthreads'] = 1 world_dict = create_task(ordered_opt, dictionary) print('Dictionary building on training data.') cnt = 0 # pass examples to dictionary for _ in world_dict: cnt += 1 if cnt > opt['dict_max_exs'] and opt['dict_max_exs'] > 0: print('Processed {} exs, moving on.'.format( opt['dict_max_exs'])) # don't wait too long... break world_dict.parley() # we need to save the dictionary to load it in memnn (sort it by freq) dictionary.sort() dictionary.save('/tmp/dict.txt', sort=True) print('Dictionary ready, moving on to training.') opt['datatype'] = 'train' agent = ParsedRemoteAgent(opt, {'dictionary_shared': dictionary.share()}) world_train = create_task(opt, agent) opt['datatype'] = 'valid' world_valid = create_task(opt, agent) start = time.time() with world_train: for _ in range(opt['num_its']): print('[ training ]') for _ in range(opt['num_examples'] * opt.get('numthreads', 1)): world_train.parley() world_train.synchronize() print('[ validating ]') world_valid.reset() for _ in world_valid: # check valid accuracy world_valid.parley() print('[ validation summary. ]') report_valid = world_valid.report() print(report_valid) if report_valid['accuracy'] > 0.95: break # show some example dialogs after training: world_valid = create_task(opt, agent) for _k in range(3): world_valid.parley() print(world_valid.display()) print('finished in {} s'.format(round(time.time() - start, 2)))
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'Interactive chat with a model') parser.add_argument('-d', '--display-examples', type='bool', default=False) # Get command line arguments parser.add_argument( '-rf', '--report-filename', type=str, default='', help='Saves a json file of the evaluation report either as an ' 'extension to the model-file (if begins with a ".") or a whole ' 'file path. Set to the empty string to not save at all.', ) parser.add_argument( '--save-world-logs', type='bool', default=False, help='Saves a jsonl file containing all of the task examples and ' 'model replies. Must also specify --report-filename.', ) parser.add_argument( '--display-prettify', type='bool', default=False, help='Set to use a prettytable when displaying ' 'examples with text candidates', ) parser.add_argument( '--display-ignore-fields', type=str, default='label_candidates,text_candidates', help='Do not display these fields', ) parser.add_argument( '-it', '--interactive-task', type='bool', default=True, help='Create interactive version of task', ) WorldLogger.add_cmdline_args(parser) parser.set_defaults(interactive_mode=True, task='interactive') LocalHumanAgent.add_cmdline_args(parser) return parser
# Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from parlai.core.build_data import download_models from parlai.core.params import ParlaiParser from parlai.scripts.interactive import interactive from projects.personachat.persona_seq2seq import PersonachatSeqseqAgentBasic '''Interact with pre-trained model Generative model trained on personachat using persona 'self' Run from ParlAI directory ''' if __name__ == '__main__': parser = ParlaiParser(add_model_args=True) parser.add_argument('-d', '--display-examples', type='bool', default=False) PersonachatSeqseqAgentBasic.add_cmdline_args(parser) parser.set_defaults( dict_file='models:personachat/profile_memory/fulldict.dict', interactive_mode=True, task='parlai.agents.local_human.local_human:LocalHumanAgent', model= 'projects.personachat.persona_seq2seq:PersonachatSeqseqAgentBasic', model_file= 'models:personachat/seq2seq_personachat/seq2seq_no_dropout0.2_lstm_1024_1e-3' ) opt = parser.parse_args() opt['model_type'] = 'seq2seq_personachat' # for builder # build all profile memory models fnames = ['seq2seq_no_dropout0.2_lstm_1024_1e-3', 'fulldict.dict']
def run_task(override_opt: Optional[dict] = None): """ This task consists of an MTurk worker talking to a model and MTurker also evaluates each utterance of the bot for various buckets (see constants). """ config_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'task_config') argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() default_task_folder = os.path.join(argparser.parlai_home, 'data', 'turn_annotations') argparser.add_mturk_args() argparser.add_argument('-num_t', '--num_turns', default=6, type=int, help='minimum number of turns') argparser.add_argument( '--conversations-needed', dest='conversations_needed_string', default=None, type=str, help= 'Number of convos needed for each model. For example: "modelA:50,modelB:20"', ) argparser.add_argument( '--task-model-parallel', default=True, type=bool, help='Whether to load models to be used with model_parallel True.', ) argparser.add_argument( '--auto-approve-delay', dest='auto_approve_delay', type=int, default=3600 * 24 * 5, help='how long to wait for auto approval', ) argparser.add_argument( '--max-resp-time', type=int, default=180, help='time limit for entering a dialog message', ) argparser.add_argument( '--max-onboard-time', type=int, default=300, help='time limit accepting onboarding', ) argparser.add_argument( '--base-save-folder', default=default_task_folder, type=str, help='base folder for saving all crowdsourcing results', ) argparser.add_argument( '--base-model-folder', default=None, type=str, help='base folder for loading model files from', ) argparser.add_argument( '--onboard-worker-answer-folder', default=os.path.join(default_task_folder, 'onboard_answers'), type=str, help= 'base folder for saving all worker answer results during onboarding', ) argparser.add_argument( '--worker-blocklist-paths', default=None, type=str, help= 'Path(s) to a list of IDs of workers to soft-block, separated by newlines. Use commas to indicate multiple lists', ) argparser.add_argument( '--check-acceptability', default=False, type=bool, help= "Check worker's responses against several metrics of acceptability", ) argparser.add_argument('--include-persona', default=False, type=bool, help="Show persona to the bot") argparser.add_argument( '--conversation-start-mode', default='hi', type=str, choices=['hi', 'bst'], help= 'Whether to show "Hi!" or two previous utterances (as in BlendedSkillTalk) at the beginning of the conversation', ) argparser.add_argument( '--context-seed', default=None, type=int, help="Set seed for pulling the context info (for testing)", ) argparser.add_argument( '--hit-config-path', default=os.path.join(config_folder, 'hit_config.json'), type=str, help= 'Path to file of parameters describing how MTurk will describe the HIT to the workers', ) argparser.add_argument( '--task-description-path', default=os.path.join(config_folder, 'task_description.html'), type=str, help='Path to file of HTML to show on the task-description page', ) argparser.add_argument( '--left-pane-text-path', default=os.path.join(config_folder, 'left_pane_text.html'), type=str, help= 'Path to file of HTML to show on the left-hand pane of the chat window', ) argparser.add_argument( '--annotations-intro', default= 'Does this comment from your partner have any of the following attributes? (Check all that apply)', type=str, help='Text shown to worker before they fill out annotation form', ) argparser.add_argument( '--annotations-config-path', default=os.path.join(config_folder, 'annotations_config.json'), type=str, help='Path to JSON of annotation categories', ) argparser.add_argument( '--onboard-task-data-path', default=os.path.join(config_folder, 'onboard_task_data.json'), type=str, help='Path to JSON containing settings for running onboarding', ) argparser.add_argument( '--final-rating-question', default='Please rate your partner on a scale of 1-5.', type=str, help='Text to show when asking worker to make their final rating', ) # NOTE: you have to set all three of these opts to enforce the MTurk core # param max_hits_per_worker. # - Without unique_qual_name, MTurkManager creates different qualification # for each run (so a worker could do N hits per run) Also, the # worker has to get to N HITs in at least one run or they won't be given # the qualification. # - allowed_conversations is like max concurrent conversations # allowed_conversations needs to be 1 or the actual max would be N + # allowed_conversations. Worker gets notified via frontend message that # they aren't eligible (second description screen), UNLESS the frontend # overwrites that functionality. # There's also still a race condition where the worker might be able to open # 1 extra task argparser.set_defaults( unique_qual_name='turn_annotations_max_submissions', max_hits_per_worker=10, allowed_conversations=3, ) if override_opt is not None: argparser.set_params(**override_opt) opt = argparser.parse_args([]) else: opt = argparser.parse_args() directory_path = os.path.dirname(os.path.abspath(__file__)) opt['task'] = os.path.basename(directory_path) # Set the number of conversations needed if opt.get('conversations_needed_string') is not None: parts = opt['conversations_needed_string'].split(',') conversations_needed = {} for part in parts: model_name, num_string = part.split(':') conversations_needed[model_name] = int(num_string) opt['conversations_needed'] = conversations_needed # Read in workers to soft-block if opt.get('worker_blocklist_paths') is not None: blocklist_paths = opt['worker_blocklist_paths'].split(',') worker_blocklist = set() for path in blocklist_paths: with open(path) as f: worker_blocklist |= set(f.read().strip().split('\n')) opt['worker_blocklist'] = worker_blocklist # Read in and define text shown to users if opt.get('hit_config') is None: with open(opt['hit_config_path']) as f: opt['hit_config'] = json.load(f) opt.update(opt['hit_config']) # Add all of the settings in hit_config into the base opt if opt.get('task_description') is None: with open(opt['task_description_path']) as f: opt['task_description'] = f.readlines() if opt.get('left_pane_text') is None: with open(opt['left_pane_text_path']) as f: opt['left_pane_text'] = f.readlines() if opt.get('annotations_config') is None: with open(opt['annotations_config_path']) as f: opt['annotations_config'] = json.load(f) if opt.get('onboard_task_data') is None: with open(opt['onboard_task_data_path']) as f: opt['onboard_task_data'] = json.load(f) # Limits the number of models that can generate at once max_concurrent_responses = 1 semaphore = threading.Semaphore(max_concurrent_responses) run_statistics = copy.deepcopy(opt['conversations_needed']) run_statistics = {r: 0 for (r, v) in run_statistics.items()} onboard_statistics = {} save_folder = 'sandbox' if opt['is_sandbox'] else 'live' opt['save_folder'] = os.path.join(opt['base_save_folder'], save_folder, time.strftime("%Y_%m_%d")) os.makedirs(opt['save_folder'], exist_ok=True) print( f'Going to start collecting {opt["num_conversations"]} conversations, max_hits_per_worker: {opt["max_hits_per_worker"]}, reward: {opt["reward"]}, is_sandbox: {opt["is_sandbox"]}.' ) # Create the models before it launches Heroku backend b/c takes a while models_needed = list(opt['conversations_needed'].keys()) active_models = [ m for m in models_needed if opt['conversations_needed'][m] > 0 ] shared_bot_agents = TurkLikeAgent.get_bot_agents(opt, active_models) mturk_agent_ids = [AGENT_0] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) mturk_manager.setup_server(task_directory_path=directory_path) if opt['include_persona'] or opt['conversation_start_mode'] == 'bst': context_generator = ContextGenerator(opt, datatype='test', seed=0) # We pull from the test set so that the model can't regurgitate # memorized conversations else: context_generator = None try: mturk_manager.start_new_run() mturk_manager.create_hits() if not opt['is_sandbox']: # Soft-block all chosen workers if len(opt['worker_blocklist']) > 0: print( f"About to soft-block {len(opt['worker_blocklist'])} workers." ) for w in set(opt['worker_blocklist']): try: print('Soft Blocking {}\n'.format(w)) mturk_manager.soft_block_worker(w) except Exception as e: print(f'Did not soft block worker {w}: {e}') time.sleep(0.1) else: print( 'WARNING: We are in live mode, but a list of workers to soft-block ' 'has not been passed in.') def run_onboard(worker): world = TurnAnnotationsOnboardWorld(opt, worker) status = world.parley() if status not in onboard_statistics: onboard_statistics[status] = 0 onboard_statistics[status] += 1 print( f'After onboard world parley. About to shutdown onboard world for {worker.worker_id}, status was: {status}. Total onboard statistics for this run are: {onboard_statistics}.' ) world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): workers[0].id = mturk_agent_ids[0] def run_conversation(mturk_manager, opt, workers): remaining_counts_needed = [ (m, c - run_statistics[m]) for (m, c) in opt['conversations_needed'].items() ] remaining_counts_needed.sort(reverse=True, key=lambda x: x[1]) model_name = remaining_counts_needed[0][0] print( f'Remaining conversation counts needed: {remaining_counts_needed}' ) # Get a bot and add it to the list of "workers" print(f'Choosing the "{model_name}" model for the bot.') agent = create_agent_from_shared(shared_bot_agents[model_name]) bot_worker = TurkLikeAgent( opt, model_name=model_name, model_agent=agent, num_turns=opt['num_turns'], semaphore=semaphore, ) workers_including_bot = workers + [bot_worker] assert len(workers_including_bot) == 2 # Get context: personas, previous utterances, etc. if context_generator is not None: context_info = context_generator.get_context() else: context_info = None conv_idx = mturk_manager.conversation_index world = TurnAnnotationsChatWorld( opt=opt, agents=workers_including_bot, num_turns=opt['num_turns'], max_resp_time=opt['max_resp_time'], tag='conversation t_{}'.format(conv_idx), context_info=context_info, ) while not world.episode_done(): print('About to parley') world.parley() model_nickname, worker_is_unacceptable, convo_finished = world.save_data( ) if worker_is_unacceptable: print(f'Soft-blocking worker {workers[0].worker_id}') mturk_manager.soft_block_worker(workers[0].worker_id) time.sleep(0.1) if not worker_is_unacceptable and convo_finished: run_statistics[model_nickname] += 1 world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()