def _eval_single_world(opt, agent, task): print( '[ Evaluating task {} using datatype {}. ] '.format( task, opt.get('datatype', 'N/A') ) ) task_opt = opt.copy() # copy opt since we're editing the task task_opt['task'] = task world = create_task(task_opt, agent) # create worlds for tasks # set up logging log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() # max number of examples to evaluate max_cnt = opt['num_examples'] if opt['num_examples'] > 0 else float('inf') cnt = 0 while not world.epoch_done() and cnt < max_cnt: cnt += opt.get('batchsize', 1) world.parley() if opt['display_examples']: # display examples print(world.display() + '\n~~') if log_time.time() > log_every_n_secs: report = world.report() text, report = log_time.log(report['exs'], world.num_examples(), report) print(text) report = world.report() world.reset() return report
def train_lambda(opt, printargs=None, print_parser=None): """Trains lambda for pre-fit model components on training data. :param opt: tells the evaluation function how to run :param bool print_parser: if provided, prints the options that are set within the model after loading the model :return: the final result of calling report() """ if printargs is not None: print('[ Deprecated Warning: eval_model no longer uses `printargs` ]') print_parser = printargs if print_parser is not None: if print_parser is True and isinstance(opt, ParlaiParser): print_parser = opt elif print_parser is False: print_parser = None if isinstance(opt, ParlaiParser): print( '[ Deprecated Warning: eval_model should be passed opt not Parser ]' ) opt = opt.parse_args() random.seed(42) # Create model and assign it to the specified task agent = create_agent(opt, requireModelExists=True) world = create_task(opt, agent) if print_parser: # Show arguments after loading model print_parser.opt = agent.opt print_parser.print_args() log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() # Show some example dialogs: cnt = 0 while not world.epoch_done(): cnt += opt.get('batchsize', 1) world.parley() if opt['display_examples']: print(world.display() + "\n~~") if log_time.time() > log_every_n_secs: report = world.report() text, report = log_time.log(report['exs'], world.num_examples(), report) print(text) if opt['num_examples'] > 0 and cnt >= opt['num_examples']: break if world.epoch_done(): print("EPOCH DONE") print('finished evaluating task {} using datatype {}'.format( opt['task'], opt.get('datatype', 'N/A'))) report = world.report() print(report) return report
def eval_model(opt, printargs=None, print_parser=None): """Evaluates a model. Arguments: opt -- tells the evaluation function how to run print_parser -- if provided, prints the options that are set within the model after loading the model """ if printargs is not None: print('[ Deprecated Warning: eval_model no longer uses `printargs` ]') print_parser = printargs if print_parser is not None: if print_parser is True and isinstance(opt, ParlaiParser): print_parser = opt elif print_parser is False: print_parser = None if isinstance(opt, ParlaiParser): print( '[ Deprecated Warning: eval_model should be passed opt not Parser ]' ) opt = opt.parse_args() random.seed(42) # Create model and assign it to the specified task agent = create_agent(opt, requireModelExists=True) world = create_task(opt, agent) if print_parser: # Show arguments after loading model print_parser.opt = agent.opt print_parser.print_args() log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() # Show some example dialogs: cnt = 0 while not world.epoch_done(): cnt += 1 world.parley() if opt['display_examples']: print(world.display() + "\n~~") if log_time.time() > log_every_n_secs: report = world.report() text, report = log_time.log(report['exs'], world.num_examples(), report) print(text) if opt['num_examples'] > 0 and cnt >= opt['num_examples']: break if world.epoch_done(): print("EPOCH DONE") report = world.report() print(report) return report
def detect(opt, printargs=None, print_parser=None): """Checks a task for offensive language. """ if print_parser is not None: if print_parser is True and isinstance(opt, ParlaiParser): print_parser = opt elif print_parser is False: print_parser = None random.seed(42) # Create model and assign it to the specified task agent = create_agent(opt, requireModelExists=True) world = create_task(opt, agent) bad = OffensiveLanguageDetector() if print_parser: # Show arguments after loading model print_parser.opt = agent.opt print_parser.print_args() log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() # Show some example dialogs: cnt = 0 while not world.epoch_done(): world.parley() words = [] for a in world.acts: offensive = bad.contains_offensive_language(a.get('text', '')) if offensive: words.append(offensive) labels = a.get('labels', a.get('eval_labels', '')) for l in labels: offensive = bad.contains_offensive_language(l) if offensive: words.append(offensive) if len(words) > 0 and opt['display_examples']: print(world.display()) print("[Offensive words detected:]", ', '.join(words)) print("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n") cnt += len(words) if log_time.time() > log_every_n_secs: report = world.report() log = {'offenses': cnt} text, log = log_time.log(report['exs'], world.num_examples(), log) print(text) if world.epoch_done(): print("EPOCH DONE") print( str(cnt) + " offensive messages found out of " + str(world.num_examples()) + " messages.") return world.report()
def get_word_counts(opt, count_inputs): """Goes through the dataset specified in opt, returns word counts and all utterances Inputs: count_inputs: If True, include both input and reply when counting words and utterances. Otherwise, only include reply text. Returns: word_counter: a Counter mapping each word to the total number of times it appears total_count: int. total word count, i.e. the sum of the counts for each word all_utts: list of strings. all the utterances that were used for counting words """ # Create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) # Count word frequency for all words in dataset word_counter = Counter() total_count = 0 all_utts = [] log_timer = TimeLogger() while True: world.parley() # Count words in reply reply = world.acts[0].get('labels', world.acts[0].get('eval_labels'))[0] words = reply.split() word_counter.update(words) total_count += len(words) all_utts.append(reply) # Optionally count words in input text if count_inputs: input = world.acts[0]['text'] input = input.split('\n')[ -1] # e.g. in ConvAI2, this removes persona words = input.split() word_counter.update(words) total_count += len(words) all_utts.append(input) if log_timer.time() > opt['log_every_n_secs']: text, _log = log_timer.log(world.total_parleys, world.num_examples()) print(text) if world.epoch_done(): print('EPOCH DONE') break assert total_count == sum(word_counter.values()) return word_counter, total_count, all_utts
def get_word_counts(opt, count_inputs): """Goes through the dataset specified in opt and gets word counts. Inputs: count_inputs: If True, include both input and reply when counting words and utterances. Otherwise, only include reply text. Returns: word_counter_per_sent: a Counter mapping each word to the number of utterances in which it appears. num_sents: int. number of utterances counted """ # Create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) # Count word frequency for all words in dataset word_counter_per_sent = Counter() num_sents = 0 count = 0 log_timer = TimeLogger() while True: count += 1 world.parley() reply = world.acts[0].get('labels', world.acts[0].get('eval_labels'))[0] words = reply.split() words_no_dups = list(set(words)) # remove duplicates word_counter_per_sent.update(words_no_dups) num_sents += 1 # Optionally count words in input text if count_inputs: input = world.acts[0]['text'] words = input.split() words_no_dups = list(set(words)) # remove duplicates word_counter_per_sent.update(words_no_dups) num_sents += 1 if log_timer.time() > opt['log_every_n_secs']: text, _log = log_timer.log(world.total_parleys, world.num_examples()) print(text) if world.epoch_done(): print('EPOCH DONE') break return word_counter_per_sent, num_sents
def verify(opt, printargs=None, print_parser=None): # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() counts = {} counts['missing_text'] = 0 counts['missing_labels'] = 0 counts['missing_label_candidates'] = 0 counts['empty_label_candidates'] = 0 # Show some example dialogs. while not world.epoch_done(): world.parley() act = world.acts[0] if 'text' not in act: print("warning: missing text field") counts['missing_text'] += 1 if 'labels' not in act and 'eval_labels' not in act: print("warning: missing labels/eval_labels field") counts['missing_labels'] += 1 else: if 'label_candidates' not in act: counts['missing_label_candidates'] += 1 else: for c in act['label_candidates']: if c == '': print("warning: empty string label_candidate") counts['empty_label_candidates'] += 1 if log_time.time() > log_every_n_secs: text, log = report(world, counts, log_time) if print_parser: print(text) try: # print dataset size if available print('[ loaded {} episodes with a total of {} examples ]'.format( world.num_episodes(), world.num_examples())) except Exception: pass return report(world, counts, log_time)
def build_cands(opt): # create repeat label agent and assign it to the specified task if opt['numthreads'] > 1: # Broken in hogwild mode. Just fall back to single processing mode opt['numthreads'] = 1 agent = RepeatLabelAgent(opt) world = create_task(opt, agent) if opt['outfile'] is None: outfile = tempfile.mkstemp(prefix='{}_{}_'.format( opt['task'], opt['datatype']), suffix='.txt')[1] else: outfile = opt['outfile'] if opt.get('num_examples', -1) == -1: num_examples = world.num_examples() else: num_examples = opt['num_examples'] log_timer = TimeLogger() print('[ starting to build candidates from task.. (ex:' + str(num_examples) + ')]') print('[ saving output to {} ]'.format(outfile)) cands = [] for _ in range(num_examples): world.parley() # We get the acts of the first agent, which is the teacher. acts = world.get_acts()[0] if isinstance(acts, dict): # We turn into a batch of 1 example, in case batching is being used. acts = [acts] for a in acts: candidate = a.get('labels', a.get('eval_labels', None)) if candidate is not None: candidate = candidate[0] cands.append(candidate) if log_timer.time() > opt['log_every_n_secs']: text, _log = log_timer.log(world.total_parleys, world.num_examples()) print(text) if world.epoch_done(): print('EPOCH DONE') break fw = open(outfile, 'w') fw.write('\n'.join(cands)) fw.close()
def dump_data(opt): # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) if opt['outfile'] is None: outfile = tempfile.mkstemp(prefix='{}_{}_'.format( opt['task'], opt['datatype']), suffix='.txt')[1] else: outfile = opt['outfile'] if opt['num_examples'] == -1: num_examples = world.num_examples() else: num_examples = opt['num_examples'] log_timer = TimeLogger() print('[ starting to convert.. ]') print('[ saving output to {} ]'.format(outfile)) fw = open(outfile, 'w') text = '' for _ in range(num_examples): world.parley() world.acts[0]['labels'] = world.acts[0].get( 'labels', world.acts[0].pop('eval_labels', None)) samp = world.acts[0] text += samp["text"].replace("\n", " ") + " " fw.write("__label__%s %s\n" % (samp["labels"][0].replace(' ', '_'), text)) if world.acts[0].get('episode_done', False): text = '' if log_timer.time() > opt['log_every_n_secs']: text, _log = log_timer.log(world.total_parleys, world.num_examples()) print(text) if world.epoch_done(): print('EPOCH DONE') break fw.close()
def dump_data(opt): # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) ignorefields = opt.get('ignore_fields', '') if opt['outfile'] is None: outfile = tempfile.mkstemp(prefix='{}_{}_'.format( opt['task'], opt['datatype']), suffix='.txt')[1] else: outfile = opt['outfile'] if opt['num_examples'] == -1: num_examples = world.num_examples() else: num_examples = opt['num_examples'] log_timer = TimeLogger() print('[ starting to convert.. ]') print('[ saving output to {} ]'.format(outfile)) fw = open(outfile, 'w') for _ in range(num_examples): world.parley() world.acts[0]['labels'] = world.acts[0].get( 'labels', world.acts[0].pop('eval_labels', None)) txt = msg_to_str(world.acts[0], ignore_fields=ignorefields) fw.write(txt + '\n') if world.acts[0].get('episode_done', False): fw.write('\n') if log_timer.time() > opt['log_every_n_secs']: text, _log = log_timer.log(world.total_parleys, world.num_examples()) print(text) if world.epoch_done(): print('EPOCH DONE') break fw.close()
def eval_model(opt, printargs=None, print_parser=None): """Evaluates a model. :param opt: tells the evaluation function how to run :param bool print_parser: if provided, prints the options that are set within the model after loading the model :return: the final result of calling report() """ if printargs is not None: print('[ Deprecated Warning: eval_model no longer uses `printargs` ]') print_parser = printargs if print_parser is not None: if print_parser is True and isinstance(opt, ParlaiParser): print_parser = opt elif print_parser is False: print_parser = None if isinstance(opt, ParlaiParser): print( '[ Deprecated Warning: eval_model should be passed opt not Parser ]' ) opt = opt.parse_args() random.seed(42) # Create model and assign it to the specified task agent = create_agent(opt, requireModelExists=True) world = create_task(opt, agent) if print_parser: # Show arguments after loading model print_parser.opt = agent.opt print_parser.print_args() log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() # Show some example dialogs: cnt = 0 file_name = "results" + str(time.time()) + "wow" + ".txt" print("Writing to " + file_name) with open(file_name, "w+") as results: while not world.epoch_done(): cnt += opt.get('batchsize', 1) world.parley() if opt['display_examples']: print(world.display() + "\n~~") results.write(world.display() + "\n") if log_time.time() > log_every_n_secs: report = world.report() text, report = log_time.log(report['exs'], world.num_examples(), report) print(text) if opt['num_examples'] > 0 and cnt >= opt['num_examples']: break if world.epoch_done(): print("EPOCH DONE") print('finished evaluating task {} using datatype {}'.format( opt['task'], opt.get('datatype', 'N/A'))) report = world.report() print(report) print("\n".join([ "", "*" * 80, "Thank you for using ParlAI! We are conducting a user survey.", "Please consider filling it out at https://forms.gle/uEFbYGP7w6hiuGQT9", "*" * 80, "" ])) return report
def eval_wordstat(opt, print_parser=None): """Evaluates a model. Arguments: opt -- tells the evaluation function how to run print_parser -- if provided, prints the options that are set within the model after loading the model """ random.seed(42) # Create model and assign it to the specified task agent = create_agent(opt, requireModelExists=True) world = create_task(opt, agent) if opt.get('external_dict'): print('[ Using external dictionary from: {} ]'.format( opt['external_dict'])) dict_opt = copy.deepcopy(opt) dict_opt['dict_file'] = opt['external_dict'] dictionary = DictionaryAgent(dict_opt) else: print('[ Using model bundled dictionary ]') dictionary = agent.dict batch_size = opt['batchsize'] if print_parser: # Show arguments after loading model print_parser.opt = agent.opt print_parser.print_args() log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() cnt = 0 word_statistics = { 'mean_wlength': [], 'mean_clength': [], 'freqs_cnt': Counter(), 'word_cnt': 0, 'pred_list': [], 'pure_pred_list': [], 'context_list': [] } bins = [int(i) for i in opt['freq_bins'].split(',')] def process_prediction(prediction, word_statistics): word_statistics['pred_list'].append(normalize_answer(prediction)) freqs, _cnt, wlength, clength = get_word_stats(prediction, dictionary, bins=bins) word_statistics['word_cnt'] += _cnt word_statistics['mean_wlength'].append(wlength) word_statistics['mean_clength'].append(clength) word_statistics['freqs_cnt'] += Counter(freqs) return word_statistics while not world.epoch_done(): world.parley() if batch_size == 1: cnt += 1 prediction = world.acts[-1]['text'] word_statistics['context_list'].append(world.acts[0]['text']) word_statistics['pure_pred_list'].append(prediction) word_statistics = process_prediction(prediction, word_statistics) else: for w in world.worlds: try: prediction = w.acts[-1]['text'] word_statistics['context_list'].append(w.acts[0]['text']) word_statistics['pure_pred_list'].append(prediction) except: continue cnt += 1 word_statistics = process_prediction(prediction, word_statistics) if log_time.time() > log_every_n_secs: report = world.report() text, report = log_time.log(report['exs'], world.num_examples(), report) print(text) stat_str = 'total_words: {}, '.format( word_statistics['word_cnt']) + ', '.join([ '<{}:{} ({:.{prec}f}%)'.format( b, word_statistics['freqs_cnt'].get(b, 0), (word_statistics['freqs_cnt'].get(b, 0) / word_statistics['word_cnt']) * 100, prec=2) for b in bins ]) print( "Word statistics: {}, avg_word_length: {:.{prec}f}, avg_char_length: {:.{prec}f}" .format(stat_str, numpy.array(word_statistics['mean_wlength']).mean(), numpy.array(word_statistics['mean_clength']).mean(), prec=2)) if opt['num_examples'] > 0 and cnt >= opt['num_examples']: break if world.epoch_done(): print("EPOCH DONE") if opt['compute_unique'] is True: unique_list = [] cntr = Counter(word_statistics['pred_list']) for k, v in cntr.items(): if v == 1: unique_list.append(k) print("Unique responses: {:.{prec}f}%".format( len(unique_list) / len(word_statistics['pred_list']) * 100, prec=2)) if opt['dump_predictions_path'] is not None: with open(opt['dump_predictions_path'], 'w') as f: f.writelines([ 'CONTEXT: {}\nPREDICTION:{}\n\n'.format(c, p) for c, p in zip(word_statistics['context_list'], word_statistics['pure_pred_list']) ]) if opt['compute_unique'] is True: with open(opt['dump_predictions_path'] + '_unique', 'w') as f: f.writelines(['{}\n'.format(i) for i in unique_list]) report = world.report() print(report) return report
def make_dataset(opt): # Initialize control information so we can compute sentence attributes. # Here we set build_task=False so we don't download data/controllable_dialogue # (because we're trying to create it instead). initialize_control_information(opt, build_task=False) # Create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) ignorefields = opt.get('ignore_fields', '') outfile = opt['outfile'] # Number of examples to process if opt['num_examples'] == -1: num_examples = world.num_examples() else: num_examples = opt['num_examples'] # List of controls to include: controls = opt['controls'].split(',') if opt['controls'] != '' else [] print('[ starting to convert.. ]') print('[ saving output to {} ]'.format(outfile)) fw = open(outfile, 'w') log_timer = TimeLogger() for _ in range(num_examples): world.parley() world.acts[0]['labels'] = world.acts[0].get( 'labels', world.acts[0].pop('eval_labels', None)) # Need to get history in order to compute control values hist = ConvAI2History(world.acts[0]['text'], assume_persontokens=False) response = world.acts[0]['labels'][0] # Compute control values for ctrl in controls: ctrl_val = eval_attr(response, hist, ctrl) if ctrl == 'avg_nidf': assert ctrl_val >= 0 assert ctrl_val <= 1 elif ctrl == 'question': assert ctrl_val in [0, 1] elif ctrl == 'lastuttsim': if ctrl_val is not None: assert ctrl_val >= -1 assert ctrl_val <= 1 else: raise Exception('unexpected ctrl name: %s' % ctrl) world.acts[0][ctrl] = ctrl_val # add control value to act # Write to file txt = msg_to_str(world.acts[0], ignore_fields=ignorefields) fw.write(txt + '\n') if world.acts[0].get('episode_done', False): fw.write('\n') if log_timer.time() > opt['log_every_n_secs']: text, _log = log_timer.log(world.total_parleys, world.num_examples()) print(text) if world.epoch_done(): print('EPOCH DONE') break fw.close()
def eval_wordstat(opt, print_parser=None): """Evaluates a model. :param opt: tells the evaluation function how to run :param print_parser: if provided, prints the options that are set within the model after loading the model """ random.seed(42) # Setup control information initialize_control_information(opt) # Create model and assign it to the specified task agent = create_agent(opt, requireModelExists=True) world = create_task(opt, agent) if opt.get('external_dict'): print('[ Using external dictionary from: {} ]'.format( opt['external_dict'])) dict_opt = copy.deepcopy(opt) dict_opt['dict_file'] = opt['external_dict'] dictionary = DictionaryAgent(dict_opt) else: print('[ Using model bundled dictionary ]') dictionary = agent.dict batch_size = opt['batchsize'] if print_parser: # Show arguments after loading model print_parser.opt = agent.opt print_parser.print_args() log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() data = {} # This will be written to the output json file data['opt'] = agent.opt # Save the opt to json # Determine the output filename if opt['gold_response']: # Special output file for gold response model_dir, _ = os.path.split(opt.get('model_file')) outfile = os.path.join(model_dir, 'goldresponse') if opt['use_reply'] != 'label': raise ValueError( 'You should set --use-reply label (not --use-reply model) ' 'when measuring goldresponse stats') else: outfile = "%s.%s.%s.%s" % ( opt.get('model_file'), opt.get('datatype'), "use%sreply" % agent.opt['use_reply'], "beam%i" % agent.opt['beam_size'], ) if agent.opt['beam_size'] > 1: outfile += ".beamminnbest%i" % agent.opt['beam_min_n_best'] if len(agent.control_settings) > 0: outfile += ".setcontrols:" + "_".join([ "%s%s" % (c, str(agent.control_settings[c]['set_value'])) for c in sorted(agent.control_settings.keys()) ]) if agent.opt['beam_reorder'] not in ['none', False]: outfile += ".beamreorder_%s" % agent.opt['beam_reorder'] if len(agent.wd_features) > 0: sorted_bfw = sorted(list(zip(agent.wd_features, agent.wd_wts)), key=lambda x: x[0]) outfile += ".WDfeatures:" + "_".join( ["%s%s" % (f, str(w)) for f, w in sorted_bfw]) if opt['num_examples'] != -1: outfile += ".numex%i" % opt['num_examples'] outfile += ".wordstats.json" print("\nOutfile: %s\n" % outfile) cnt = 0 word_statistics = { 'mean_wlength': [], # list of length (in words) of utterances 'mean_clength': [], # list of length (in chars) of utterances 'freqs_cnt': Counter(), # Counter for word frequencies, bucketed 'word_cnt': 0, # total number of words in all utterances 'pred_list': [], # list of generated utterances after applying normalize_answer 'pure_pred_list': [], # list of generated utterances 'context_list': [] # list of text inputs (persona and conversation history) } bins = [int(i) for i in opt['freq_bins'].split(',')] # This dictionary records all the sentence-level controllable attributes # For each attribute, we have a list of all the values sent_attrs = {attr: [] for attr in ATTR2SENTSCOREFN.keys()} # str to list of floats # histories will be a list of ConvAI2History objects histories = [] def process_prediction(prediction, word_statistics): word_statistics['pred_list'].append(normalize_answer(prediction)) freqs, _cnt, wlength, clength = get_word_stats(prediction, dictionary, bins=bins) word_statistics['word_cnt'] += _cnt word_statistics['mean_wlength'].append(wlength) word_statistics['mean_clength'].append(clength) word_statistics['freqs_cnt'] += Counter(freqs) return word_statistics t0 = time.time() while not world.epoch_done(): world.parley() # orig eval_wordstat.py handles bsz=1 but for simplicity we assume bsz>1 assert batch_size != 1 for world_idx, w in enumerate(world.worlds): try: try: response_act = w.acts[-1] prediction = response_act['text'] except KeyError: continue if opt['gold_response']: # If we're measuring gold response, use eval_label as prediction prediction = w.acts[0]['eval_labels'][0] response_act = {'text': prediction} word_statistics['context_list'].append(w.acts[0]['text']) word_statistics['pure_pred_list'].append(prediction) except IndexError: continue cnt += 1 word_statistics = process_prediction(prediction, word_statistics) # Compute and record sentence-level attributes history = ConvAI2History(w.acts[0]['text']) histories.append(history) sent_attrs = update_sent_attr_stats(sent_attrs, history, prediction) # Periodically log some info if log_time.time() > log_every_n_secs: report = world.report() text, report = log_time.log(report['exs'], world.num_examples(), report) print(text) if opt['num_examples'] > 0 and cnt >= opt['num_examples']: break if world.epoch_done(): print("EPOCH DONE") print("Time to process %i examples: %f seconds" % (cnt, time.time() - t0)) # Compute percent unique # Note this is w.r.t. normalized pred_list not original pure_pred_list unique_list = [] cntr = Counter(word_statistics['pred_list']) for k, v in cntr.items(): if v == 1: unique_list.append(k) unique_percent = len(unique_list) / len(word_statistics['pred_list']) * 100 # Print a final report report = world.report() if opt['gold_response']: report['ppl'] = 0.0 # For gold responses, overwrite the perplexity print(report) # Put all information in data dict data[ 'unique_percent'] = unique_percent # percent of all responses that are unique data[ 'word_statistics'] = word_statistics # word stats, as in orig eval_wordstat data['report'] = report # the final report data['histories'] = [(hist.persona_lines, hist.partner_utts, hist.own_utts) for hist in histories] # history for each example data[ 'sent_attrs'] = sent_attrs # all sentence attribute values for responses # Write data to outfile print("Writing to %s..." % outfile) with open(outfile, 'w') as f: json.dump(data, f)
def verify(opt, printargs=None, print_parser=None): if opt['datatype'] == 'train': print("[ note: changing datatype from train to train:ordered ]") opt['datatype'] = 'train:ordered' # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() dictionary = DictionaryAgent(opt) ignore_tokens = opt.get('ignore_tokens').split(',') counts = {} for t in {'input', 'labels', 'both'}: counts['tokens_in_' + t] = 0 counts['utterances_in_' + t] = 0 counts['avg_utterance_length_in_' + t] = 0 counts['unique_tokens_in_' + t] = 0 counts['unique_utterances_in_' + t] = 0 # for counting the stats.. counts['token_dict_' + t] = {} counts['utterance_dict_' + t] = {} def tokenize(txt): return dictionary.tokenize(txt) def keep_token(t): for s in ignore_tokens: if s != '' and s in t: return False return True # Show some example dialogs. while not world.epoch_done(): world.parley() act = world.get_acts()[opt.get('agent')] for itype in {'input', 'labels'}: if itype == 'input': if opt.get('new_line_new_utt'): txts = act.get('text').split('\n') else: txts = [act.get('text')] else: txts = act.get('labels', act.get('eval_labels', [''])) for txt in txts: tokens = tokenize(txt) retxt = [] for t in tokens: if keep_token(t): retxt.append(t) counts['tokens_in_' + itype] += len(retxt) counts['tokens_in_' + 'both'] += len(retxt) counts['utterances_in_' + itype] += 1 counts['utterances_in_' + 'both'] += 1 counts['avg_utterance_length_in_' + itype] = ( counts['tokens_in_' + itype] / counts['utterances_in_' + itype] ) counts['avg_utterance_length_in_' + 'both'] = ( counts['tokens_in_' + 'both'] / counts['utterances_in_' + 'both'] ) for t in retxt: if t not in counts['token_dict_' + itype]: counts['unique_tokens_in_' + itype] += 1 counts['token_dict_' + itype][t] = True if t not in counts['token_dict_' + 'both']: counts['unique_tokens_in_' + 'both'] += 1 counts['token_dict_' + 'both'][t] = True retxt = ' '.join(retxt) if retxt not in counts['utterance_dict_' + itype]: counts['unique_utterances_in_' + itype] += 1 counts['utterance_dict_' + itype][retxt] = True if retxt not in counts['utterance_dict_' + 'both']: counts['unique_utterances_in_' + 'both'] += 1 counts['utterance_dict_' + 'both'][retxt] = True if log_time.time() > log_every_n_secs: text, log = report(world, counts, log_time) if print_parser: print(text) try: # print dataset size if available print( '[ loaded {} episodes with a total of {} examples ]'.format( world.num_episodes(), world.num_examples() ) ) except Exception: pass return report(world, counts, log_time)
def verify(opt, printargs=None, print_parser=None): if opt['datatype'] == 'train': print("[ note: changing datatype from train to train:ordered ]") opt['datatype'] = 'train:ordered' # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() counts = {} counts['missing_text'] = 0 counts['missing_labels'] = 0 counts['missing_label_candidates'] = 0 counts['empty_string_label_candidates'] = 0 counts['label_candidates_with_missing_label'] = 0 counts['did_not_return_message'] = 0 # Show some example dialogs. while not world.epoch_done(): world.parley() act = world.acts[0] if not isinstance(act, Message): counts['did_not_return_message'] += 1 if 'text' not in act: warn("warning: missing text field:\n", act, opt) counts['missing_text'] += 1 if 'labels' not in act and 'eval_labels' not in act: warn("warning: missing labels/eval_labels field:\n", act, opt) counts['missing_labels'] += 1 else: if 'label_candidates' not in act: counts['missing_label_candidates'] += 1 else: labels = act.get('labels', act.get('eval_labels')) is_label_cand = {} for l in labels: is_label_cand[l] = False for c in act['label_candidates']: if c == '': warn("warning: empty string label_candidate:\n", act, opt) counts['empty_string_label_candidates'] += 1 if c in is_label_cand: if is_label_cand[c] is True: warn( "warning: label mentioned twice in candidate_labels:\n", act, opt, ) is_label_cand[c] = True for _, has in is_label_cand.items(): if has is False: warn("warning: label missing in candidate_labels:\n", act, opt) counts['label_candidates_with_missing_label'] += 1 if log_time.time() > log_every_n_secs: text, log = report(world, counts, log_time) if print_parser: print(text) try: # print dataset size if available print('[ loaded {} episodes with a total of {} examples ]'.format( world.num_episodes(), world.num_examples())) except Exception: pass return report(world, counts, log_time)
def eval_wordstat(opt, print_parser=None): """Evaluates a model. Arguments: opt -- tells the evaluation function how to run print_parser -- if provided, prints the options that are set within the model after loading the model """ random.seed(42) # Create model and assign it to the specified task agent = create_agent(opt, requireModelExists=True) world = create_task(opt, agent) if opt['external_dict'] is not None: print('[ Using external dictionary from: {} ]'.format( opt['external_dict'])) dictionary = DictionaryAgent(opt) dictionary.load(opt['external_dict']) else: print('[ Using model bundled dictionary ]') dictionary = agent.dict if print_parser: # Show arguments after loading model print_parser.opt = agent.opt print_parser.print_args() log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() cnt = 0 mean_wlength = [] mean_clength = [] freqs_cnt = Counter() word_cnt = 0 bins = [int(i) for i in opt['freq_bins'].split(',')] while not world.epoch_done(): cnt += 1 world.parley() prediction = world.acts[-1]['text'] freqs, _cnt, wlength, clength = get_word_stats(prediction, dictionary, bins=bins) word_cnt += _cnt mean_wlength.append(wlength) mean_clength.append(clength) freqs_cnt += Counter(freqs) if log_time.time() > log_every_n_secs: report = world.report() text, report = log_time.log(report['exs'], world.num_examples(), report) print(text) stat_str = 'total_words: {}, '.format(word_cnt) + ', '.join([ '<{}:{} ({:.{prec}f}%)'.format( b, freqs_cnt.get(b, 0), (freqs_cnt.get(b, 0) / word_cnt) * 100, prec=2) for b in bins ]) print( "Word statistics: {}, avg_word_length: {:.{prec}f}, avg_char_length: {:.{prec}f}" .format(stat_str, numpy.array(mean_wlength).mean(), numpy.array(mean_clength).mean(), prec=2)) if opt['num_examples'] > 0 and cnt >= opt['num_examples']: break if world.epoch_done(): print("EPOCH DONE") report = world.report() print(report) return report
def bucket_data(opt): # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) if opt['num_examples'] == -1: num_examples = world.num_examples() else: num_examples = opt['num_examples'] log_timer = TimeLogger() assert opt['control'] != '' ctrl = opt['control'] num_buckets = opt['num_buckets'] ctrl_vals = [] # list of floats for _ in range(num_examples): world.parley() world.acts[0]['labels'] = world.acts[0].get( 'labels', world.acts[0].pop('eval_labels', None)) if ctrl not in world.acts[0].keys(): raise Exception('Error: control %s isn\'t in the data. available keys: %s' % (ctrl, ', '.join(world.acts[0].keys()))) ctrl_val = world.acts[0][ctrl] if ctrl_val == "None": assert ctrl == 'lastuttsim' ctrl_val = None else: ctrl_val = float(ctrl_val) if ctrl == 'avg_nidf': assert ctrl_val >= 0 assert ctrl_val <= 1 elif ctrl == 'question': assert ctrl_val in [0, 1] elif ctrl == 'lastuttsim': if ctrl_val is not None: assert ctrl_val >= -1 assert ctrl_val <= 1 else: raise Exception('Unexpected ctrl name: %s' % ctrl) ctrl_vals.append(ctrl_val) if log_timer.time() > opt['log_every_n_secs']: text, _log = log_timer.log(world.total_parleys, world.num_examples()) print(text) if world.epoch_done(): print('EPOCH DONE') break if ctrl == 'lastuttsim': num_nones = len([v for v in ctrl_vals if v is None]) ctrl_vals = [v for v in ctrl_vals if v is not None] print("Have %i Nones for lastuttsim; these have been removed " "for bucket calculation" % num_nones) print('Collected %i control vals between %.6f and %.6f' % (len(ctrl_vals), min(ctrl_vals), max(ctrl_vals))) # Calculate bucket lower bounds print('Calculating lowerbounds for %i buckets...' % num_buckets) ctrl_vals = sorted(ctrl_vals) lb_indices = [int(len(ctrl_vals)*i/num_buckets) for i in range(num_buckets)] lbs = [ctrl_vals[idx] for idx in lb_indices] print('\nBucket lowerbounds for control %s: ' % ctrl) print(lbs) # Calculate the actual bucket sizes bucket_sizes = Counter() bucket_ids = [sort_into_bucket(ctrl_val, lbs) for ctrl_val in ctrl_vals] bucket_sizes.update(bucket_ids) print('\nBucket sizes: ') for bucket_id in sorted(bucket_sizes.keys()): print("%i: %i" % (bucket_id, bucket_sizes[bucket_id]))
def _probe_single_world(opt, agent, task): print('[ Evaluating task {} using datatype {}. ] '.format( task, opt.get('datatype', 'N/A'))) task_opt = opt.copy() # copy opt since we're editing the task task_opt['task'] = task world = create_task(task_opt, agent) # create worlds for tasks if task_opt['batchsize'] == 1: raise ValueError('Batch size must be greater than 1. ' 'Use the --batchsize flag to set batch size.') # set up logging log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() # max number of examples to evaluate max_cnt = opt['num_examples'] if opt['num_examples'] > 0 else float('inf') cnt = 0 while not world.epoch_done() and cnt < max_cnt: cnt += opt.get('batchsize', 1) world.parley() if opt['display_examples']: # display examples print(world.display() + '\n~~') if log_time.time() > log_every_n_secs: report = world.report() _report = {'exs': cnt} text, report = log_time.log(cnt, world.num_examples(), _report) print(text) # Create save folder for probing outputs task_name = world.opt['task'].split('.')[-2] model_dir = Path(world.opt['model_file']).parent probing_dir = model_dir.joinpath('probing') probing_module_dir = probing_dir.joinpath(world.opt['probe']) task_dir = probing_module_dir.joinpath(task_name) save_path = task_dir.joinpath(task_name + '.pkl') if not probing_dir.exists(): print("*" * 10, "\n", "*" * 10) print(f"Creating dir to save probing outputs at {probing_dir}") print("*" * 10, "\n", "*" * 10) probing_dir.mkdir() if not probing_module_dir.exists(): print("*" * 10, "\n", "*" * 10) print( f"Creating dir to save {world.opt['probe']} probing outputs at {probing_module_dir}" ) print("*" * 10, "\n", "*" * 10) probing_module_dir.mkdir() if not task_dir.exists(): print("*" * 10, "\n", "*" * 10) print( f"Creating dir to save {task_name} probing outputs at {task_dir}") print("*" * 10, "\n", "*" * 10) task_dir.mkdir() if save_path.exists(): warnings.warn( f"\nVector representations for probing already exists at {save_path}!!\n" "They will be overwritten.", RuntimeWarning) print("*" * 10, "\n", "*" * 10) print( f"Creating pickle file to save {task_name} probing outputs at {save_path}" ) print("*" * 10, "\n", "*" * 10) # Save probing outputs try: pickle.dump(world.world.agents[1].probing_outputs, open(save_path, 'wb')) except: pickle.dump(world.world.agents[1].probing_outputs, open(save_path, 'wb'), protocol=4) report = world.report() world.reset() return report
def build_dict(opt, skip_if_built=False): if isinstance(opt, ParlaiParser): print('[ Deprecated Warning: should be passed opt not Parser ]') opt = opt.parse_args() if not opt.get('dict_file'): print('Tried to build dictionary but `--dict-file` is not set. Set ' + 'this param so the dictionary can be saved.') return if skip_if_built and os.path.isfile(opt['dict_file']): # Dictionary already built, skip all loading or setup print("[ dictionary already built .]") return None if opt.get('dict_class'): # Custom dictionary class dictionary = str2class(opt['dict_class'])(opt) else: # Default dictionary class dictionary = DictionaryAgent(opt) if os.path.isfile(opt['dict_file']): # Dictionary already built, return loaded dictionary agent print("[ dictionary already built .]") return dictionary ordered_opt = copy.deepcopy(opt) cnt = 0 # we use train set to build dictionary ordered_opt['numthreads'] = 1 ordered_opt['batchsize'] = 1 ordered_opt['image_mode'] = 'none' if ordered_opt['task'] == 'pytorch_teacher': pytorch_teacher_task = ordered_opt.get('pytorch_teacher_task', '') if pytorch_teacher_task != '': ordered_opt['task'] = pytorch_teacher_task datatypes = ['train:ordered:stream'] if opt.get('dict_include_valid'): datatypes.append('valid:stream') if opt.get('dict_include_test'): datatypes.append('test:stream') cnt = 0 for dt in datatypes: ordered_opt['datatype'] = dt world_dict = create_task(ordered_opt, dictionary) # pass examples to dictionary print('[ running dictionary over data.. ]') log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() while not world_dict.epoch_done(): cnt += 1 if cnt > opt['dict_maxexs'] and opt['dict_maxexs'] > 0: print('Processed {} exs, moving on.'.format(opt['dict_maxexs'])) # don't wait too long... break world_dict.parley() if log_time.time() > log_every_n_secs: sys.stdout.write('\r') text, _log = log_time.log(cnt, max(opt.get('dict_maxexs', 0), world_dict.num_examples())) sys.stdout.write(text) sys.stdout.flush() dictionary.save(opt['dict_file'], sort=True) print('[ dictionary built with {} tokens ]'.format(len(dictionary))) return dictionary
def learn_arora(opt): """ Go through ConvAI2 data and collect word counts, thus compute the unigram probability distribution. Use those probs to compute weighted sentence embeddings for all utterances, thus compute first principal component. Save all info to arora.pkl file. """ arora_file = os.path.join(opt['datapath'], 'controllable_dialogue', 'arora.pkl') opt['task'] = 'fromfile:parlaiformat' opt['log_every_n_secs'] = 2 print('Getting word counts from ConvAI2 train set...') opt['datatype'] = 'train:ordered' opt['fromfile_datapath'] = os.path.join(opt['datapath'], 'controllable_dialogue', 'ConvAI2_parlaiformat', 'train.txt') # Do include inputs because ConvAI2 train set reverses every convo: word_counter_train, total_count_train, all_utts_train = get_word_counts( opt, count_inputs=False) print('Getting word counts from ConvAI2 val set...') opt['datatype'] = 'valid' opt['fromfile_datapath'] = os.path.join(opt['datapath'], 'controllable_dialogue', 'ConvAI2_parlaiformat', 'valid.txt') # Don't include inputs because ConvAI2 val set doesn't reverses convos: word_counter_valid, total_count_valid, all_utts_valid = get_word_counts( opt, count_inputs=True) # Merge word counts word_counter = word_counter_train for word, count in word_counter_valid.items(): word_counter[word] += count total_count = total_count_train + total_count_valid # Merge all_utts all_utts = all_utts_train + all_utts_valid # Compute unigram prob for every word print("Computing unigram probs for all words...") word2prob = {w: c / total_count for w, c in word_counter.items()} # Settings for sentence embedder arora_a = 0.0001 glove_name = '840B' glove_dim = 300 glove_cache = modelzoo_path(opt['datapath'], 'models:glove_vectors') # Embed every sentence, without removing first singular value print('Embedding all sentences...') sent_embedder = SentenceEmbedder(word2prob, arora_a, glove_name, glove_dim, first_sv=None, glove_cache=glove_cache) utt_embs = [] log_timer = TimeLogger() for n, utt in enumerate(all_utts): utt_emb = sent_embedder.embed_sent(utt.split(), rem_first_sv=False) utt_embs.append(utt_emb) if log_timer.time() > opt['log_every_n_secs']: text, _log = log_timer.log(n, len(all_utts)) print(text) # Use SVD to calculate singular vector # https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.linalg.svd.html print('Calculating SVD...') utt_embs = np.stack(utt_embs, axis=0) # shape (num_embs, glove_dim) U, s, V = np.linalg.svd(utt_embs, full_matrices=False) first_sv = V[0, :] # first row of V. shape (glove_dim) # Remove singular vector from all embs to get complete Arora-style sent embs print('Removing singular vec from all sentence embeddings...') utt_embs_adj = [ remove_first_sv(torch.Tensor(emb), torch.Tensor(first_sv)).numpy() for emb in utt_embs ] # list of np arrays shape (glove_dim) # Make dict mapping ConvAI2 dataset utterances to Arora sent emb # We save this to file for convenience (e.g. if you want to inspect) utt2emb = {utt: emb for (utt, emb) in zip(all_utts, utt_embs_adj)} # Save unigram distribution, first singular value, hyperparameter value for a, # info about GloVe vectors used, and full dict of utt->emb to file print("Saving Arora embedding info to %s..." % arora_file) with open(arora_file, "wb") as f: pickle.dump( { 'word2prob': word2prob, # dict: string to float between 0 and 1 'first_sv': first_sv, # np array shape (glove_dim) 'arora_a': arora_a, # float, 0.0001 'glove_name': glove_name, # string, '840B' 'glove_dim': glove_dim, # int, 300 'utt2emb': utt2emb, # dict: string to np array shape (glove_dim) }, f)