def main(): buzzer = RNNBuzzer() # setup questions questions = list(QuestionDatabase().all_questions().values()) dev_questions = [x for x in questions if x.fold == 'dev'] # setup machine agent gspec = AbstractGuesser.list_enabled_guessers()[0] guesser_dir = AbstractGuesser.output_path(gspec.guesser_module, gspec.guesser_class, '') guesser = ElasticSearchWikidataGuesser.load(guesser_dir) guesser = ESGuesserWrapper(guesser) machine_agent = GuesserBuzzerAgent(guesser, buzzer) # setup human agent human_agent = HumanAgent() # setup hook hooks = [] hooks.append(hook.NotifyBuzzingHook) hooks.append(hook.GameInterfaceHook) hooks.append(hook.VisualizeGuesserBuzzerHook(machine_agent)) hooks.append(hook.HighlightHook) # setup game game = Game(dev_questions, [human_agent, machine_agent], hooks) game.run(10)
def output(self): targets = [] for g_spec in AbstractGuesser.list_enabled_guessers(): guesser = f"{g_spec.guesser_module}.{g_spec.guesser_class}" targets.append( LocalTarget(f"output/guesser/best/{guesser}/best.touch")) return targets
def merge_dfs(): GUESSERS = ["{0}.{1}".format( x.guesser_module, x.guesser_class) \ for x in AbstractGuesser.list_enabled_guessers()] log.info("Merging guesser DataFrames.") merged_dir = os.path.join(c.GUESSER_TARGET_PREFIX, 'merged') if not os.path.exists(merged_dir): os.makedirs(merged_dir) for fold in c.BUZZER_INPUT_FOLDS: if os.path.exists(AbstractGuesser.guess_path(merged_dir, fold)): log.info("Merged {0} exists, skipping.".format(fold)) continue new_guesses = pd.DataFrame(columns=[ 'fold', 'guess', 'guesser', 'qnum', 'score', 'sentence', 'token' ], dtype='object') for guesser in GUESSERS: guesser_dir = os.path.join(c.GUESSER_TARGET_PREFIX, guesser) guesses = AbstractGuesser.load_guesses(guesser_dir, folds=[fold]) new_guesses = new_guesses.append(guesses) for col in ['qnum', 'sentence', 'token', 'score']: new_guesses[col] = pd.to_numeric(new_guesses[col], downcast='integer') AbstractGuesser.save_guesses(new_guesses, merged_dir, folds=[fold]) log.info("Merging: {0} finished.".format(fold))
def test(): gspec = AbstractGuesser.list_enabled_guessers()[0] guesser_dir = AbstractGuesser.output_path(gspec.guesser_module, gspec.guesser_class, '') guesser = ElasticSearchWikidataGuesser.load(guesser_dir) torch.cuda.set_device(0) predictor = Predictor() predictor.cuda() dataset = BonusPairsDataset() examples = [x for x in dataset.examples if x['start'] != -1] guesses = [] for example in tqdm(examples): document = example['content'] question = example['query'] answer = example['answer'] predictions = predictor.predict(document, question, top_n=1) prediction = predictions[0][0] gs = guesser.guess_single(example['query']) gs = sorted(gs.items(), key=lambda x: x[1])[::-1] guess = gs[0][0].replace('_', ' ') guesses.append((prediction, guess, example['answer'])) with open('results.pkl', 'wb') as f: pickle.dump(guesses, f)
def requires(self): for g_spec in AbstractGuesser.list_enabled_guessers(): yield TrainGuesser( guesser_module=g_spec.guesser_module, guesser_class=g_spec.guesser_class, dependency_module=g_spec.dependency_module, dependency_class=g_spec.dependency_class, )
def requires(self): yield AllSingleGuesserReports() for g_spec in AbstractGuesser.list_enabled_guessers(): yield GenerateGuesses(guesser_module=g_spec.guesser_module, guesser_class=g_spec.guesser_class, dependency_module=g_spec.dependency_module, dependency_class=g_spec.dependency_class, fold='expo')
def requires(self): for g_spec in AbstractGuesser.list_enabled_guessers(): for fold in c.GUESSER_GENERATION_FOLDS: yield GenerateGuesses( guesser_module=g_spec.guesser_module, guesser_class=g_spec.guesser_class, dependency_module=g_spec.dependency_module, dependency_class=g_spec.dependency_class, fold=fold)
def requires(self): for g_spec in AbstractGuesser.list_enabled_guessers(): yield GuesserPerformance( guesser_module=g_spec.guesser_module, guesser_class=g_spec.guesser_class, dependency_module=g_spec.dependency_module, dependency_class=g_spec.dependency_class, config_num=g_spec.config_num, )
def run(self): guesser_types = set() for g_spec in AbstractGuesser.list_enabled_guessers(): guesser_types.add( f"{g_spec.guesser_module}.{g_spec.guesser_class}") _, _, all_dfs, _ = merge_reports(guesser_types) best_guessers = find_best_guessers(all_dfs) for g, config_num in best_guessers.items(): inp = f"output/guesser/{g}/{config_num}" out = f"output/guesser/best/{g}/" shell(f"touch {inp}/best.touch") shell(f"mkdir -p {out}") shell(f"cp -r {inp}/* {out}")
def __init__(self, buzzer_model_dir='data/neo_0.npz'): gspec = AbstractGuesser.list_enabled_guessers()[0] guesser_dir = 'data/guesser' self.guesser = ElasticSearchWikidataGuesser.load(guesser_dir) if chainer.cuda.available: self.buzzer = RNNBuzzer(model_dir=buzzer_model_dir, word_skip=conf['buzzer_word_skip']) else: self.buzzer = StupidBuzzer() self.ok_to_buzz = True self.answer = '' self.guesses = [] self.evidence = dict()
def requires(self): for g_spec in AbstractGuesser.list_enabled_guessers(): for fold in [c.GUESSER_DEV_FOLD, c.GUESSER_TEST_FOLD]: yield GuesserReport(guesser_module=g_spec.guesser_module, guesser_class=g_spec.guesser_class, dependency_module=g_spec.dependency_module, dependency_class=g_spec.dependency_class, config_num=g_spec.config_num, fold=fold) if os.path.exists(c.QANTA_EXPO_DATASET_PATH): yield GuesserReport(guesser_module=g_spec.guesser_module, guesser_class=g_spec.guesser_class, dependency_module=g_spec.dependency_module, dependency_class=g_spec.dependency_class, config_num=g_spec.config_num, fold=c.EXPO_FOLD)
def test_buzzer(): questions = QuestionDatabase().all_questions() buzzer = RNNBuzzer(word_skip=conf['buzzer_word_skip']) # setup machine agent gspec = AbstractGuesser.list_enabled_guessers()[0] guesser_dir = AbstractGuesser.output_path(gspec.guesser_module, gspec.guesser_class, '') guesser = ElasticSearchWikidataGuesser.load(guesser_dir) guesser = ESGuesserWrapper(guesser) key = list(questions.keys())[4] question = questions[key].flatten_text().split() for i, word in enumerate(question): clue = ' '.join(question[:i]) guesses = guesser.guess(clue) buzz = buzzer.buzz(guesses) print(buzz)
def generate_guesser_slurm(slurm_config_file, task, output_dir): with open(slurm_config_file) as f: slurm_config = yaml.load(f) default_slurm_config = slurm_config['default'] env = Environment(loader=PackageLoader('qanta', 'slurm/templates')) template = env.get_template('guesser-luigi-template.sh') enabled_guessers = list(AbstractGuesser.list_enabled_guessers()) for i, gs in enumerate(enabled_guessers): if gs.guesser_class == 'ElasticSearchGuesser': raise ValueError( 'ElasticSearchGuesser is not compatible with slurm') elif gs.guesser_class in slurm_config: guesser_slurm_config = slurm_config[gs.guesser_class] else: guesser_slurm_config = None partition = get_slurm_config_value('partition', default_slurm_config, guesser_slurm_config) qos = get_slurm_config_value('qos', default_slurm_config, guesser_slurm_config) mem_per_cpu = get_slurm_config_value('mem_per_cpu', default_slurm_config, guesser_slurm_config) gres = get_slurm_config_value('gres', default_slurm_config, guesser_slurm_config) max_time = get_slurm_config_value('max_time', default_slurm_config, guesser_slurm_config) cpus_per_task = get_slurm_config_value('cpus_per_task', default_slurm_config, guesser_slurm_config) account = get_slurm_config_value('account', default_slurm_config, guesser_slurm_config) if task == 'GuesserReport': folds = GUESSER_GENERATION_FOLDS else: folds = [] script = template.render({ 'task': task, 'guesser_module': gs.guesser_module, 'guesser_class': gs.guesser_class, 'dependency_module': gs.dependency_module, 'dependency_class': gs.dependency_class, 'config_num': gs.config_num, 'partition': partition, 'qos': qos, 'mem_per_cpu': mem_per_cpu, 'max_time': max_time, 'gres': gres, 'cpus_per_task': cpus_per_task, 'account': account, 'folds': folds }) slurm_file = path.join(output_dir, f'slurm-{i}.sh') with safe_open(slurm_file, 'w') as f: f.write(script) singleton_path = 'qanta/slurm/templates/guesser-singleton.sh' singleton_output = path.join(output_dir, 'guesser-singleton.sh') shell(f'cp {singleton_path} {singleton_output}') master_template = env.get_template('guesser-master-template.sh') master_script = master_template.render({ 'script_list': [ path.join(output_dir, f'slurm-{i}.sh') for i in range(len(enabled_guessers)) ] + [singleton_output], 'gres': gres, 'partition': partition, 'qos': qos, 'mem_per_cpu': mem_per_cpu, 'max_time': max_time, 'gres': gres, 'cpus_per_task': cpus_per_task, 'account': account }) with safe_open(path.join(output_dir, 'slurm-master.sh'), 'w') as f: f.write(master_script)
def generate_guesser_slurm(slurm_config_file, task, output_dir): with open(slurm_config_file) as f: slurm_config = yaml.load(f) default_slurm_config = slurm_config['default'] env = Environment(loader=PackageLoader('qanta', 'slurm/templates')) template = env.get_template('guesser-luigi-template.sh') enabled_guessers = list(AbstractGuesser.list_enabled_guessers()) for i, gs in enumerate(enabled_guessers): if gs.guesser_class == 'ElasticSearchGuesser': raise ValueError('ElasticSearchGuesser is not compatible with slurm') elif gs.guesser_class in slurm_config: guesser_slurm_config = slurm_config[gs.guesser_class] else: guesser_slurm_config = None partition = get_slurm_config_value('partition', default_slurm_config, guesser_slurm_config) qos = get_slurm_config_value('qos', default_slurm_config, guesser_slurm_config) mem_per_cpu = get_slurm_config_value('mem_per_cpu', default_slurm_config, guesser_slurm_config) gres = get_slurm_config_value('gres', default_slurm_config, guesser_slurm_config) max_time = get_slurm_config_value('max_time', default_slurm_config, guesser_slurm_config) cpus_per_task = get_slurm_config_value('cpus_per_task', default_slurm_config, guesser_slurm_config) account = get_slurm_config_value('account', default_slurm_config, guesser_slurm_config) if task == 'GuesserReport': folds = GUESSER_GENERATION_FOLDS else: folds = [] script = template.render({ 'task': task, 'guesser_module': gs.guesser_module, 'guesser_class': gs.guesser_class, 'dependency_module': gs.dependency_module, 'dependency_class': gs.dependency_class, 'config_num': gs.config_num, 'partition': partition, 'qos': qos, 'mem_per_cpu': mem_per_cpu, 'max_time': max_time, 'gres': gres, 'cpus_per_task': cpus_per_task, 'account': account, 'folds': folds }) slurm_file = path.join(output_dir, f'slurm-{i}.sh') with safe_open(slurm_file, 'w') as f: f.write(script) singleton_path = 'qanta/slurm/templates/guesser-singleton.sh' singleton_output = path.join(output_dir, 'guesser-singleton.sh') shell(f'cp {singleton_path} {singleton_output}') master_template = env.get_template('guesser-master-template.sh') master_script = master_template.render({ 'script_list': [ path.join(output_dir, f'slurm-{i}.sh') for i in range(len(enabled_guessers)) ] + [singleton_output], 'gres': gres, 'partition': partition, 'qos': qos, 'mem_per_cpu': mem_per_cpu, 'max_time': max_time, 'gres': gres, 'cpus_per_task': cpus_per_task, 'account': account }) with safe_open(path.join(output_dir, 'slurm-master.sh'), 'w') as f: f.write(master_script)
import json import textwrap from collections import defaultdict, Counter, namedtuple import argparse import itertools from csv import DictReader from time import sleep import os from qanta.datasets.quiz_bowl import QuizBowlDataset, QuestionDatabase from qanta.guesser.abstract import AbstractGuesser GUESSERS = [x.guesser_class for x in AbstractGuesser.list_enabled_guessers()] kSHOW_RIGHT = False kPAUSE = .25 kSYSTEM = "OUSIA" kBIGNUMBERS = { -1: """ 88888888 88888888
from tqdm import tqdm from elasticsearch_dsl.connections import connections from elasticsearch_dsl import DocType, Text, Keyword, Search, Index from qanta.util.constants import GUESSER_DEV_FOLD from qanta.guesser.abstract import AbstractGuesser from qanta.datasets.quiz_bowl import QuizBowlDataset from qanta.guesser.experimental.elasticsearch_instance_of import ElasticSearchWikidataGuesser from qanta.guesser.experimental.elasticsearch_instance_of import ElasticSearchIndex INDEX_NAME = 'qb_ir_instance_of' gspec = AbstractGuesser.list_enabled_guessers()[0] guesser_dir = AbstractGuesser.output_path(gspec.guesser_module, gspec.guesser_class, '') guesser = ElasticSearchWikidataGuesser.load(guesser_dir) es_index = ElasticSearchIndex() def recursive_guess(question, k=0): p_class, p_prob = guesser.test_instance_of([question])[0] first_guesses = search_not(question, p_class) print('First round') for x in first_guesses: print(x) print() print('Second round') new_guesses = [] for i in range(k): guess = first_guesses[i][0] question += ' ' + ' '.join(guess.split('_')) guesses = es_index.search(question, p_class, p_prob, 0.6)
import random import pickle from qanta.config import conf from qanta.util.io import safe_path from qanta.util.multiprocess import _multiprocess from qanta.guesser.abstract import AbstractGuesser from qanta.datasets.quiz_bowl import QuizBowlDataset, Question from qanta.guesser.experimental.elasticsearch_instance_of import ElasticSearchWikidataGuesser '''Randomly shuffle the word order and see if it changes the guesses. ''' gspec = AbstractGuesser.list_enabled_guessers()[0] guesser_dir = AbstractGuesser.output_path(gspec.guesser_module, gspec.guesser_class, '') guesser = ElasticSearchWikidataGuesser.load(guesser_dir) def main(): fold = 'guessdev' db = QuizBowlDataset(1, guesser_train=True, buzzer_train=True) questions = db.questions_in_folds([fold]) first_n = lambda x: len(x) print(guesser.guess_single(' '.join(questions[0].text.values()))) ''' s = [0, 0, 0, 0, 0] for q in questions: sents = list(q.text.values()) text_before = ' '.join(sents[:first_n(sents)]) words = text.split()
def generate_guesser_slurm(slurm_config_file, task, output_dir): with open(slurm_config_file) as f: slurm_config = yaml.load(f) default_slurm_config = slurm_config["default"] env = Environment(loader=PackageLoader("qanta", "slurm/templates")) template = env.get_template("guesser-luigi-template.sh") enabled_guessers = list(AbstractGuesser.list_enabled_guessers()) for i, gs in enumerate(enabled_guessers): if gs.guesser_class == "ElasticSearchGuesser": raise ValueError( "ElasticSearchGuesser is not compatible with slurm") elif gs.guesser_class in slurm_config: guesser_slurm_config = slurm_config[gs.guesser_class] else: guesser_slurm_config = None partition = get_slurm_config_value("partition", default_slurm_config, guesser_slurm_config) qos = get_slurm_config_value("qos", default_slurm_config, guesser_slurm_config) mem_per_cpu = get_slurm_config_value("mem_per_cpu", default_slurm_config, guesser_slurm_config) gres = get_slurm_config_value("gres", default_slurm_config, guesser_slurm_config) max_time = get_slurm_config_value("max_time", default_slurm_config, guesser_slurm_config) cpus_per_task = get_slurm_config_value("cpus_per_task", default_slurm_config, guesser_slurm_config) account = get_slurm_config_value("account", default_slurm_config, guesser_slurm_config) if task == "GuesserReport": folds = GUESSER_GENERATION_FOLDS else: folds = [] script = template.render({ "task": task, "guesser_module": gs.guesser_module, "guesser_class": gs.guesser_class, "dependency_module": gs.dependency_module, "dependency_class": gs.dependency_class, "config_num": gs.config_num, "partition": partition, "qos": qos, "mem_per_cpu": mem_per_cpu, "max_time": max_time, "gres": gres, "cpus_per_task": cpus_per_task, "account": account, "folds": folds, }) slurm_file = path.join(output_dir, f"slurm-{i}.sh") with safe_open(slurm_file, "w") as f: f.write(script) singleton_path = "qanta/slurm/templates/guesser-singleton.sh" singleton_output = path.join(output_dir, "guesser-singleton.sh") shell(f"cp {singleton_path} {singleton_output}") master_template = env.get_template("guesser-master-template.sh") master_script = master_template.render({ "script_list": [ path.join(output_dir, f"slurm-{i}.sh") for i in range(len(enabled_guessers)) ] + [singleton_output], "gres": gres, "partition": partition, "qos": qos, "mem_per_cpu": mem_per_cpu, "max_time": max_time, "gres": gres, "cpus_per_task": cpus_per_task, "account": account, }) with safe_open(path.join(output_dir, "slurm-master.sh"), "w") as f: f.write(master_script)