def write_conversations_format(self, outfile, world): Conversations.save_conversations( self._logs, outfile, world.opt, self_chat=world.opt.get('selfchat_task', False), )
def write_conversations_format(self, outfile, world): logging.info(f'Saving log to {outfile} in Conversations format') Conversations.save_conversations( self._logs, outfile, world.opt, self_chat=world.opt.get('selfchat_task', False), )
def test_conversations(self): act_list = [ [ [ {'id': 'Emily', 'text': 'Hello, do you like this test?'}, {'id': 'Stephen', 'text': 'Why yes! I love this test!'}, ], [ {'id': 'Emily', 'text': 'So will you stamp this diff?'}, {'id': 'Stephen', 'text': 'Yes, I will do it right now!'}, ], ], [ [ { 'id': 'A', 'text': 'Somebody once told me the world is gonna roll me', }, {'id': 'B', 'text': 'I aint the sharpest tool in the shed'}, ], [ { 'id': 'A', 'text': 'She was looking kind of dumb with her finger and her thumb', }, {'id': 'B', 'text': 'In the shape of an L on her forehead'}, ], ], ] self.opt = { 'A': 'B', 'C': 'D', 'E': 'F', } self.convo_datapath = os.path.join(self.datapath, 'convo1') Conversations.save_conversations( act_list, self.convo_datapath, self.opt, self_chat=False, other_info='Blah blah blah', ) assert os.path.exists(self.convo_datapath + '.jsonl') assert os.path.exists(self.convo_datapath + '.metadata') convos = Conversations(self.convo_datapath + '.jsonl') # test conversations loaded self.assertEqual(convos.num_conversations, 2) # test speakers saved speakers = {'Stephen', 'Emily', 'A', 'B'} self.assertEqual(set(convos.metadata.speakers), speakers) # test opt saved for x in ['A', 'C', 'E']: self.assertEqual( self.opt[x], convos.metadata.opt[x], ) # test kwargs self.assertEqual({'other_info': 'Blah blah blah'}, convos.metadata.extra_data) # test reading conversations with testing_utils.capture_output() as out: convos.read_conv_idx(0) str_version = ( 'Emily: Hello, do you like this test?\n' 'Stephen: Why yes! I love this test!\n' 'Emily: So will you stamp this diff?\n' 'Stephen: Yes, I will do it right now!\n' ) self.assertIn(str_version, out.getvalue())
def test_conversations(self): act_list = [ [ [ { 'id': 'Emily', 'text': 'Hello, do you like this test?' }, { 'id': 'Stephen', 'text': 'Why yes! I love this test!' }, ], [ { 'id': 'Emily', 'text': 'So will you stamp this diff?' }, { 'id': 'Stephen', 'text': 'Yes, I will do it right now!' }, ], ], [ [ { 'id': 'A', 'text': 'Somebody once told me the world is gonna roll me', }, { 'id': 'B', 'text': 'I aint the sharpest tool in the shed' }, ], [ { 'id': 'A', 'text': 'She was looking kind of dumb with her finger and her thumb', }, { 'id': 'B', 'text': 'In the shape of an L on her forehead' }, ], ], ] self.opt = {'A': 'B', 'C': 'D', 'E': 'F'} self.convo_datapath = os.path.join(self.datapath, 'convo1') Conversations.save_conversations( act_list, self.convo_datapath, self.opt, self_chat=False, other_info='Blah blah blah', ) assert os.path.exists(self.convo_datapath + '.jsonl') assert os.path.exists(self.convo_datapath + '.metadata') convos = Conversations(self.convo_datapath + '.jsonl') # test conversations loaded self.assertEqual(len(convos), 2) # test speakers saved speakers = {'Stephen', 'Emily', 'A', 'B'} self.assertEqual(set(convos.metadata.speakers), speakers) # test opt saved for x in ['A', 'C', 'E']: self.assertEqual(self.opt[x], convos.metadata.opt[x]) # test kwargs self.assertEqual({'other_info': 'Blah blah blah'}, convos.metadata.extra_data) # test reading conversations with self.assertLogs(logger=logging.logger, level='DEBUG') as cm: convos.read_conv_idx(0) str_version = ('Emily: Hello, do you like this test?\n' 'Stephen: Why yes! I love this test!\n' 'Emily: So will you stamp this diff?\n' 'Stephen: Yes, I will do it right now!\n') self.assertIn(str_version, "\n".join(cm.output)) # test getting a specific turn first = convos[0] # Conversation self.assertEqual(first[0].id, 'Emily') self.assertEqual(first[3].text, 'Yes, I will do it right now!')
def dump_data(opt): """ Dump task data to ACUTE-Eval. """ # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) task = opt.get('task') speaker_0_id = opt.get('speaker_0_id') or f'{task}_as_human' speaker_1_id = opt.get('speaker_1_id') or f'{task}_as_model' if opt['outfile'] is None: outfile = tempfile.mkstemp(prefix='{}_{}_'.format( opt['task'], opt['datatype']), suffix='.txt')[1] else: outfile = opt['outfile'] num_episodes = (world.num_episodes() if opt['num_episodes'] == -1 else min( opt['num_episodes'], world.num_episodes())) log_timer = TimeLogger() print(f'[ starting to convert, saving output to {outfile} ]') dialogues = [] for _ in range(num_episodes): episode = [] episode_done = False while not episode_done: world.parley() acts = world.get_acts() text = acts[0].get('text') split_text = text.split('\n') label = random.choice(acts[0].get('labels', acts[0].pop('eval_labels', None))) if not episode and opt.get('prepended_context'): # first turn context = split_text[:-1] text = split_text[-1] context_turn = [{ 'text': context, 'episode_done': False, 'id': 'context' } for _ in range(2)] episode.append(context_turn) turn = [ { 'text': text, 'episode_done': False, 'id': speaker_0_id }, { 'text': label, 'episode_done': False, 'id': speaker_1_id }, ] episode.append(turn) if acts[0].get('episode_done', False): episode[-1][-1]['episode_done'] = True episode_done = True dialogues.append(episode) if log_timer.time() > opt['log_every_n_secs']: text, _log = log_timer.log(world.total_parleys, world.num_examples()) print(text) if world.epoch_done(): break Conversations.save_conversations(dialogues, outfile, opt)