def test_beamsearch_return_all_texts(self): """ Test beam_texts for beam_size > 1. """ size = 3 agent = create_agent_from_model_file( 'zoo:unittest/beam_blocking/model', opt_overrides={ "beam_size": size, "inference": "beam" }, ) agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True}) response = agent.act() self.assertTrue("beam_texts" in response) self.assertGreaterEqual(len(response["beam_texts"]), size) hyp, score = response["beam_texts"][0] self.assertTrue(isinstance(hyp, str)) self.assertTrue(isinstance(score, float)) agent = create_agent_from_model_file( 'zoo:unittest/beam_blocking/model', opt_overrides={ "beam_size": size, "inference": "topk" }, ) agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True}) response = agent.act() self.assertTrue("beam_texts" in response) self.assertEqual(len(response["beam_texts"]), size)
def test_beamsearch_contextblocking(self): """ Test beamsearch context blocking. """ agent = create_agent_from_model_file( 'zoo:unittest/context_blocking/model') agent.observe({'text': '5 4 3 2', 'episode_done': True}) assert agent.act()['text'] == '5 4 3 2' agent = create_agent_from_model_file( 'zoo:unittest/context_blocking/model', Opt(beam_context_block_ngram=1)) agent.observe({'text': '5 4 3 2', 'episode_done': True}) text = agent.act()['text'] assert '5' not in text assert '4' not in text assert '3' not in text assert '2' not in text agent = create_agent_from_model_file( 'zoo:unittest/context_blocking/model', Opt(beam_context_block_ngram=2)) agent.observe({'text': '5 4 3 2', 'episode_done': True}) text = agent.act()['text'] assert '5' in text assert '5 4' not in text assert '4 3' not in text assert '3 2' not in text
def __init__(self, opt: Opt): self.opt = opt self.agents = [] self.agent_dict = None self.generations = [] self.input_type = 'Memory' self.delimiter = opt.get('memory_decoder_delimiter', '\n') self.one_line_memories = opt.get('memory_decoder_one_line_memories', False) model_file = modelzoo_path(opt['datapath'], opt['memory_decoder_model_file']) if model_file and os.path.exists(model_file): logging.info(f'Building Memory Decoder from file: {model_file}') logging.disable() overrides = { 'skip_generation': False, 'inference': 'beam', 'beam_size': opt.get('memory_decoder_beam_size', 3), 'beam_min_length': opt.get('memory_decoder_beam_min_length', 10), 'beam_block_ngram': 3, } if self.opt.get('memory_decoder_truncate', -1) > 0: overrides['text_truncate'] = self.opt['memory_decoder_truncate'] overrides['truncate'] = self.opt['memory_decoder_truncate'] base_agent = create_agent_from_model_file( model_file, opt_overrides=overrides ) assert isinstance(base_agent, TorchAgent) self.agents = [base_agent] assert isinstance(self.agents[0], TorchAgent) copies = max(100, (opt['batchsize'] * opt.get('rag_turn_n_turns', 1))) self.agents += [ create_agent_from_shared(self.agents[0].share()) for _ in range(copies) ] self.agent_dict = self.agents[0].build_dictionary() logging.enable()
def get_classifier_model_and_dict( opt: Opt ) -> Tuple[Optional[TorchAgent], Optional[DictionaryAgent]]: """ Build classifier model and dictionary. """ model_file = modelzoo_path( opt['datapath'], opt['expanded_attention_classifier_model_file'] ) model, dictionary = None, None if model_file and os.path.exists(model_file): logging.info(f'Building polyencoder from path: {model_file}') logging.disable() overrides = { 'model': 'return_code_weights_agent', 'data_parallel': opt.get('data_parallel', False), 'model_parallel': opt['model_parallel'], 'delimiter': opt['delimiter'], 'no_cuda': opt['no_cuda'], 'fp16': opt['fp16'], } poly_agent = create_agent_from_model_file(model_file, overrides) logging.enable() logging.info('Poly Build Complete') dictionary = poly_agent.build_dictionary() model = poly_agent.model return model, dictionary
def __init__(self, opt, shared=None): # Define coefficients self.task_loss_coeff = opt['task_loss_coeff'] self.encoder_loss_coeff = opt['encoder_loss_coeff'] self.hidden_loss_coeff = opt['hidden_loss_coeff'] self.pred_loss_coeff = opt['pred_loss_coeff'] assert ( opt.get('model_parallel', False) is False ), 'model_parallel is not currently supported for distillation!' # Create teacher model if shared is None: to_copy = {'no_cuda', 'model_parallel', 'fp16', 'fp16_impl', 'gpu'} override = {k: opt[k] for k in to_copy} override[ 'datatype'] = 'train:evalmode' # Don't initialize the optimizer teacher_agent = create_agent_from_model_file( opt['teacher_model'], override) self.teacher_agent_opt = teacher_agent.opt self.teacher_model = teacher_agent.model self.teacher_model.eval() super().__init__(opt, shared)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_type', default='blender', help='Either `blender` or `dialogpt`.') parser.add_argument('--device', default='cpu', help='Either `cpu` or `cuda` for dialogpt.') parser.add_argument('--model_path', help='Path for dialogpt.') parser.add_argument('--tokenizer_path', help='Path for dialogpt.') parser.add_argument('--generated_file', default='', help='Evaluate previously generated samples.') parser.add_argument('--include_persona', action='store_true', help='Whether to include personas.') args = parser.parse_args() print(args) tokenizer = None model = None if args.model_type == 'blender': model = create_agent_from_model_file('zoo:blender/blender_90M/model') elif args.model_type == 'dialogpt': tokenizer = AutoTokenizer.from_pretrained( args.tokenizer_path if args.tokenizer_path else args.model_path) model = AutoModelWithLMHead.from_pretrained(args.model_path) model = model.to(args.device) args.model = model args.tokenizer = tokenizer global checklist checklist = BiasChecklist(args, util.generate_response_fn, personas=constants.PERSONAS, generated_file=args.generated_file) # Test cases. case = TestCase('Test: Harmful Affirmation (XYZs are [DESCRIPTION].)', negative_generator, neg_score) checklist.add(case) case = TestCase('Test: Harmful Refutations (XYZs are [DESCRIPTION].)', positive_generator, pos_score) checklist.add(case) # Report. result, samples = checklist.report(with_persona=args.include_persona) fi = './' + args.model_type + '_harmful_affirm_refute' if args.include_persona: fi += '_persona' else: fi += '_nopersona' if args.generated_file: fi += '_regenerated' with open(fi + '.txt', 'w') as f: f.writelines('\n'.join(result)) with open(fi + '_samples.tsv', 'w') as f: reader = csv.writer(f, delimiter='\t') for sample in samples: reader.writerow(sample)
def __init__(self, opt: Opt): self.opt = opt self.agents = [] self.agent_dict = None self.generations = [] self.input_type = 'Search' self.knowledge_access_method = KnowledgeAccessMethod( opt['knowledge_access_method'] ) model_file = modelzoo_path(opt['datapath'], opt['query_generator_model_file']) if model_file and os.path.exists(model_file): logging.info(f'Building Query Generator from file: {model_file}') logging.disable() overrides: Dict[str, Any] = {'skip_generation': False} overrides['inference'] = opt['query_generator_inference'] overrides['beam_size'] = opt.get('query_generator_beam_size', 3) overrides['beam_min_length'] = opt.get('query_generator_beam_min_length', 2) if self.opt['query_generator_truncate'] > 0: overrides['text_truncate'] = self.opt['query_generator_truncate'] overrides['truncate'] = self.opt['query_generator_truncate'] base_agent = create_agent_from_model_file( model_file, opt_overrides=overrides ) assert isinstance(base_agent, TorchAgent) self.agents = [base_agent] bsz = opt.get('batchsize', 1) rag_turn_n_turns = opt.get('rag_turn_n_turns', 1) if bsz > 1 or rag_turn_n_turns > 1: self.agents += [ create_agent_from_shared(self.agents[0].share()) for _ in range((bsz * rag_turn_n_turns) - 1) ] self.agent_dict = self.agents[0].build_dictionary() logging.enable()
def run(self): """ 1) load model 2) generate embeddings 3) save embeddings. """ self.use_cuda = not self.opt.get('no_cuda') and torch.cuda.is_available() overrides = {'interactive_mode': True, 'interactive_candidates': 'inline'} if self.opt['dpr_model']: overrides.update( { 'model': 'dpr_agent', 'model_file': self.opt['model_file'], 'override': { 'model': 'dpr_agent', 'interactive_candidates': 'inline', }, } ) agent = create_agent(Opt(overrides)) else: agent = create_agent_from_model_file(self.opt['model_file'], overrides) model = agent.model.module if hasattr(agent.model, 'module') else agent.model assert hasattr(model, 'encoder_cand') or hasattr(model, 'cand_encoder') assert isinstance(agent, TorchRankerAgent) passages = self.load_passages() data = self.encode_passages(agent, passages) self.save_data(data)
def self_chat(opt): random.seed(opt['seed']) partner = opt['partner_model_file'] partner_opt_file = opt.get('partner_opt_file') # Create agents agent1 = create_agent(opt, requireModelExists=True) if partner is None: # Self chat with same model agent2 = agent1.clone() else: # Self chat with different models if partner_opt_file: print(f"WARNING: Loading override opts from: {partner_opt_file}") with open(partner_opt_file) as f: partner_opt = json.load(f) else: partner_opt = {} partner_opt['interactive_mode'] = opt.get('interactive_mode', True) print( f"WARNING: Setting partner interactive mode to: {partner_opt['interactive_mode']}" ) agent2 = create_agent_from_model_file(partner, partner_opt) # Set IDs agent1.id = agent1.id + "_1" agent2.id = agent2.id + "_2" model_id = agent1.id + "_" + agent2.id world = create_task(opt, user_agents=[agent1, agent2]) # Set up world logging logger = WorldLogger(opt) log_time = TimeLogger() # Run some self chats. for i in range(opt['num_self_chats']): _run_self_chat_episode(opt, world, logger) report = world.report() text, report = log_time.log(i + 1, opt['num_self_chats'], report) logging.info(text) # Save chats if opt['outfile'] is None: outfile = '/tmp/{}_selfchat'.format(model_id) else: outfile = opt['outfile'] if opt['save_format'] == 'conversations' and hasattr(world, 'write'): # use self chat specific world to write conversation # this might be useful for logging extra contextual # information (like personas) world.write(logger, outfile) else: # use default logger write function logger.write(outfile, world, opt['save_format']) return logger.get_logs()
def __init__(self): # Load the model from the model zoo via ParlAI overrides = { "skip_generation": False, "interactive_mode": True, "init_opt": "gen/seeker_dialogue", "all_model_path": "zoo:seeker/seeker_dialogue_3B/model", # seeker_dialogue "beam_disregard_knowledge_for_context_blocking": False, "drm_beam_block_full_context": True, "drm_beam_block_ngram": 3, "drm_beam_context_block_ngram": 3, "drm_beam_min_length": 20, "drm_beam_size": 10, "drm_inference": "beam", "drm_message_mutators": None, "drm_model": "projects.seeker.agents.seeker:ComboFidSearchQueryAgent", "exclude_context_in_krm_context_blocking": False, "include_knowledge_in_krm_context_blocking": True, "inject_query_string": None, "knowledge_response_control_token": None, "krm_beam_block_ngram": 3, "krm_beam_context_block_ngram": 3, "krm_beam_min_length": 1, "krm_beam_size": 3, "krm_doc_chunks_ranker": "woi_chunk_retrieved_docs", "krm_inference": "beam", "krm_message_mutators": None, "krm_model": "projects.seeker.agents.seeker:ComboFidSearchQueryAgent", "krm_n_ranked_doc_chunks": 1, "krm_rag_retriever_type": "search_engine", "krm_search_query_generator_model_file": "''", "loglevel": "debug", "min_knowledge_length_when_search": 10, "model": "projects.seeker.agents.seeker:SeekerAgent", "model_file": "zoo:seeker/seeker_dialogue_3B/model", "sdm_beam_block_ngram": -1, "sdm_beam_min_length": 1, "sdm_beam_size": 1, "sdm_history_size": 1, "sdm_inference": "greedy", "sdm_model": "projects.seeker.agents.seeker:ComboFidSearchQueryAgent", "search_decision": "always", "search_decision_control_token": "__is-search-required__", "search_decision_do_search_reply": "__do-search__", "search_decision_dont_search_reply": "__do-not-search__", "search_query_control_token": "__generate-query__", "sqm_beam_block_ngram": -1, "sqm_beam_min_length": 2, "sqm_beam_size": 1, "sqm_inference": "beam", "sqm_model": "projects.seeker.agents.seeker:ComboFidSearchQueryAgent", } self.model = create_agent_from_model_file(self.zoo_path, overrides)
def load_model(model_checkpoint, gpu_num): opt_overrides = {} opt_overrides['gpu'] = gpu_num opt_overrides['datatype'] = 'test' opt_overrides['inference'] = 'nucleus' opt_overrides['skip_generation'] = False model = create_agent_from_model_file(model_checkpoint, opt_overrides=opt_overrides) logging.info("load Raw Blender model from:{}".format(model_checkpoint)) logging.info("allocate Raw Blender model to gpu_{}".format(gpu_num)) return model
def test_beamsearch_blocking(self): """ Test beamsearch blocking. """ with testing_utils.tempdir() as tmpdir: agent = create_agent_from_model_file( 'zoo:unittest/beam_blocking/model') agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True}) assert agent.act()['text'] == '5 5 5 5 5 5 5' agent = create_agent_from_model_file( 'zoo:unittest/beam_blocking/model', Opt(beam_block_ngram=1)) agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True}) assert '5 5' not in agent.act()['text'] agent = create_agent_from_model_file( 'zoo:unittest/beam_blocking/model', Opt(beam_block_ngram=2)) agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True}) assert '5 5 5' not in agent.act()['text'] with open(os.path.join(tmpdir, 'blocklist.txt'), 'w') as f: f.write("38\n62\n34 34\n") agent = create_agent_from_model_file( 'zoo:unittest/beam_blocking/model', Opt(beam_block_list_filename=os.path.join( tmpdir, 'blocklist.txt')), ) agent.observe({'text': '4 4 4', 'episode_done': True}) assert agent.act()['text'] == '4 4 4' agent.observe({'text': '38 38 38', 'episode_done': True}) assert '38' not in agent.act()['text'] agent.observe({'text': '62 62 62', 'episode_done': True}) assert '62' not in agent.act()['text'] agent.observe({'text': '34 34 34', 'episode_done': True}) text = agent.act()['text'] assert '34' in text assert '34 34' not in text
def init_predictor(self, opt: Opt, shared=None): """ Initializes Predictor Module """ if not shared: if not opt.get("predictor_model_file"): logging.warn( 'Reranker MUST specify predictor_model_file unless subclass __init__() sets up the model in its own way (unusual). Skipping predictor setup!' ) else: self.predictor = create_agent_from_model_file( self.predictor_model_file) else: self.predictor = shared['predictor']
def init_predictor(self, opt: Opt, shared=None): if not shared: override = { 'return_cand_scores': True, 'datatype': 'valid', 'interactive_mode': opt.get('interactive_mode', True), 'ignore_bad_candidates': True, 'encode_candidate_vecs': True, 'interactive_candidates': 'inline', } # to not init optim self.predictor = create_agent_from_model_file( self.predictor_model_file, opt_overrides=override) else: self.predictor = shared['predictor']
def eval_single(opt, tgt_agent, ref_agent, save_dir): eval_file_path = opt['eval_dir'] + ref_agent + '/' + opt['log_file'] save_file_path = os.path.join(save_dir, ref_agent + '.jsonl') model_mf = 'outputs/agent_' + tgt_agent + '/model' model_optf = 'outputs/agent_' + tgt_agent + '/model.opt' with open(model_optf) as f: model_opt = json.load(f) model_opt['interactive_mode'] = True tgt_agent = create_agent_from_model_file(model_mf, model_opt) model_mf = 'outputs/agent_' + ref_agent + '/model' model_optf = 'outputs/agent_' + ref_agent + '/model.opt' with open(model_optf) as f: model_opt = json.load(f) model_opt['interactive_mode'] = True ref_agent = create_agent_from_model_file(model_mf, model_opt) with open(eval_file_path) as eval_file, open(save_file_path, 'w') as save_file: num_match = 0 errorids = [] for i, line in tqdm(enumerate(eval_file)): if not line.strip(): continue conversation = json.loads(line) if _run_conversation(i, conversation, tgt_agent, ref_agent): num_match += 1 assert conversation['dialog'][-1]['speaker'] == 'tgt_model' assert len(conversation['dialog']) % 3 == 0 conversation['reward_ref'] = conversation.pop('report') save_file.write(json.dumps(conversation) + '\n') else: errorids.append(i) print('Matched: {}/{}'.format(num_match, (num_match + len(errorids)))) print('Error IDs: ', errorids)
def test_sparse_tfidf_retriever_singlethread(self): with testing_utils.tempdir() as tmpdir: MODEL_FILE = os.path.join(tmpdir, 'tmp_test_babi') testing_utils.train_model( dict( model='tfidf_retriever', task='babi:task1k:1', model_file=MODEL_FILE, retriever_numworkers=1, retriever_hashsize=2**8, retriever_tokenizer='simple', datatype='train:ordered', batchsize=1, num_epochs=1, )) agent = create_agent_from_model_file(MODEL_FILE) obs = { 'text': ('Mary moved to the bathroom. John went to the hallway. ' 'Where is Mary?'), 'episode_done': True, } agent.observe(obs) reply = agent.act() assert reply['text'] == 'bathroom' ANS = 'The one true label.' new_example = { 'text': 'A bunch of new words that are not in the other task, ' 'which the model should be able to use to identify ' 'this label.', 'labels': [ANS], 'episode_done': True, } agent.observe(new_example) reply = agent.act() assert 'text' in reply and reply['text'] == ANS new_example.pop('labels') agent.observe(new_example) reply = agent.act() assert reply['text'] == ANS
def init_search_query_generator(self, opt) -> TorchGeneratorAgent: model_file = opt['search_query_generator_model_file'] logging.info('Loading search generator model') logging.disable() search_query_gen_agent = create_agent_from_model_file( model_file, opt_overrides={ 'skip_generation': False, 'inference': opt['search_query_generator_inference'], 'beam_min_length': opt['search_query_generator_beam_min_length'], 'beam_size': opt['search_query_generator_beam_size'], 'text_truncate': opt['search_query_generator_text_truncate'], }, ) logging.enable() logging.info('Search query generator model loading completed!') return search_query_gen_agent
def __init__(self, model, device, maxlen=-1): model = self.check_agent(model) maxlen = maxlen if maxlen > 0 else self.default_maxlen() if "end2end_generator" in model: name = "end2end_generator" else: raise Exception("wrong model") super().__init__( name=model, suffix="\n", device=device, maxlen=maxlen, model=create_agent_from_model_file( f"zoo:wizard_of_wikipedia/{name}/model"), )
def __init__(self, opt: Opt): self.opt = opt self.agents = [] self.agent_dict = None self.generations = [] self.input_type = 'Search' self.knowledge_access_method = KnowledgeAccessMethod( opt['knowledge_access_method']) model_file = modelzoo_path(opt['datapath'], opt['query_generator_model_file']) if (self.knowledge_access_method is KnowledgeAccessMethod.SEARCH_ONLY and 'blenderbot2/query_generator/model' in model_file): raise ValueError( 'You cannot use the blenderbot2 query generator with search_only. Please ' 'consider setting --query-generator-model-file zoo:sea/bart_sq_gen/model ' 'instead.') if model_file and os.path.exists(model_file): logging.info(f'Building Query Generator from file: {model_file}') logging.disable() overrides: Dict[str, Any] = {'skip_generation': False} overrides['inference'] = opt['query_generator_inference'] overrides['beam_size'] = opt.get('query_generator_beam_size', 3) overrides['beam_min_length'] = opt.get( 'query_generator_beam_min_length', 2) overrides['model_parallel'] = opt['model_parallel'] overrides['no_cuda'] = opt['no_cuda'] if self.opt['query_generator_truncate'] > 0: overrides['text_truncate'] = self.opt[ 'query_generator_truncate'] overrides['truncate'] = self.opt['query_generator_truncate'] base_agent = create_agent_from_model_file(model_file, opt_overrides=overrides) assert isinstance(base_agent, TorchAgent) self.agents = [base_agent] bsz = max( opt.get('batchsize') or 1, opt.get('eval_batchsize') or 1) rag_turn_n_turns = opt.get('rag_turn_n_turns', 1) if bsz > 1 or rag_turn_n_turns > 1: self.agents += [ create_agent_from_shared(self.agents[0].share()) for _ in range((bsz * rag_turn_n_turns) - 1) ] self.agent_dict = self.agents[0].build_dictionary() logging.enable()
def init_predictor(self, opt: Opt, shared=None): if not shared: override = { 'return_cand_scores': True, 'datatype': 'valid', 'no_cuda': opt['reranker_no_cuda'], 'interactive_mode': opt.get('interactive_mode', True), 'ignore_bad_candidates': True, 'encode_candidate_vecs': True, 'interactive_candidates': 'inline', } # to not init optim if opt.get('predictor_characters_file'): override['fixed_candidates_path'] = opt[ 'predictor_characters_file'] self.predictor = create_agent_from_model_file( self.predictor_model_file, opt_overrides=override) else: self.predictor = shared['predictor']
def load_classifier(self, gpu_num): if self.has_classifier: if self.classifier is None: opt_overrides = {} self.classifier_gpu_num = gpu_num opt_overrides['gpu'] = gpu_num opt_overrides['datatype'] = 'test' # opt_overrides['inference'] = 'nucleus' opt_overrides['skip_generation'] = False self.classifier = create_agent_from_model_file( self.classifier_checkpoint, opt_overrides=opt_overrides) teacher_for_classifier_opt = deepcopy(self.classifier.opt) teacher_for_classifier_opt.update({"build_data_or_not": False}) self.teacher_for_classifier = create_task_agent_from_taskname( teacher_for_classifier_opt)[0] logging.info("load classifier from:{}".format( self.classifier_checkpoint)) logging.info("allocate classifier to gpu_{}".format(gpu_num)) else: self.classifier = None
def __init__(self): # Load the model from the model zoo via ParlAI overrides = {"skip_generation": False, "interactive_mode": True} self.model = create_agent_from_model_file(self.zoo_path, overrides)
def create_agent_and_persona(personas=''): blender_bot = create_agent_from_model_file("zoo:blender/blender_90M/model") for persona in personas: blender_bot.observe({'text': persona}) return blender_bot
def self_chat(opt): random.seed(opt['seed']) partner = opt['partner_model_file'] assert partner is not None partner_opt_file = opt.get('partner_opt_file') if partner_opt_file: assert partner_opt_file == partner + '.opt', ( 'Unless you think it is save,' ' you can remove assert') else: partner_opt_file = partner + '.opt' # Create agents if opt['model_file'].split(':')[0] == 'human': agent1 = MyLocalHumanAgent(opt) assert partner is not None else: agent1 = create_agent(opt, requireModelExists=True) if partner is None: # Self chat with same model agent2 = agent1.clone() else: # Self chat with different models if partner_opt_file: print(f"WARNING: Loading override opts from: {partner_opt_file}") with open(partner_opt_file) as f: partner_opt = json.load(f) else: partner_opt = {} partner_opt['interactive_mode'] = opt.get('interactive_mode', True) print( f"WARNING: Setting partner interactive mode to: {partner_opt['interactive_mode']}" ) agent2 = create_agent_from_model_file(partner, partner_opt) # Set IDs agent1.id = agent1.id + '_1' agent2.id = agent2.id + '_2' model_id = agent1.id + '_' + agent2.id world = create_task(opt, user_agents=[agent1, agent2]) # Set up world logging logger = WorldLogger(opt) log_time = TimeLogger() # Run some self chats. all_report = [] if opt['num_self_chats'] < 0: opt['num_self_chats'] = len(world.messages) for i in range(opt['num_self_chats']): _run_self_chat_episode(opt, world, logger) report = world.report() text, report = log_time.log(i + 1, opt['num_self_chats'], report) logging.info(text) all_report.append(report) world.write(logger, all_report, opt['outfile']) # Save chats if opt['outfile'] is None: outfile = '/tmp/{}_selfchat'.format(model_id) else: outfile = opt['outfile'] if opt['save_format'] == 'conversations' and hasattr(world, 'write'): # use self chat specific world to write conversation # this might be useful for logging extra contextual # information (like personas) world.write(logger, all_report, outfile) else: # use default logger write function logger.write(outfile, world, opt['save_format']) return logger.get_logs()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_type', default='blender', help='Either `blender` or `dialogpt`.') parser.add_argument('--device', default='cpu', help='Either `cpu` or `cuda` for dialogpt.') parser.add_argument('--model_path', help='Path for dialogpt.') parser.add_argument('--tokenizer_path', help='Path for dialogpt.') parser.add_argument('--generated_file', default='', help='Previously generated samples.') parser.add_argument('--include_persona', action='store_true', help='Whether to include personas.') parser.add_argument('--generator', default='rtp', help='Either `rtp` or `bst`.') args = parser.parse_args() print(args) tokenizer = None model = None if args.model_type == 'blender': model = create_agent_from_model_file('zoo:blender/blender_90M/model') elif args.model_type == 'dialogpt': tokenizer = AutoTokenizer.from_pretrained( args.tokenizer_path if args.tokenizer_path else args.model_path) model = AutoModelWithLMHead.from_pretrained(args.model_path) model = model.to(args.device) args.model = model args.tokenizer = tokenizer global checklist checklist = BiasChecklist(args, util.generate_response_fn, personas=constants.PERSONAS, generated_file=args.generated_file) # Test case assert args.generator in ['rtp', 'bst'] if args.generator == 'rtp': generator = rtp_generator elif args.generator == 'bst': generator = bst_generator case = TestCase('Test: Offensiveness (' + args.generator + ')', generator, score) checklist.add(case) # Report. result, samples = checklist.report(with_persona=args.include_persona) fi = './' + args.model_type + '_offensiveness' if args.include_persona: fi += '_persona' else: fi += '_nopersona' if args.generated_file: fi += '_regenerated' with open(fi + '.txt', 'w') as f: f.writelines('\n'.join(result)) with open(fi + '_samples.tsv', 'w') as f: reader = csv.writer(f, delimiter='\t') for sample in samples: reader.writerow(sample)