示例#1
0
文件: test.py 项目: ymedhat95/qb
def main():
    buzzer = RNNBuzzer()

    # setup questions
    questions = list(QuestionDatabase().all_questions().values())
    dev_questions = [x for x in questions if x.fold == 'dev']

    # setup machine agent
    gspec = AbstractGuesser.list_enabled_guessers()[0]
    guesser_dir = AbstractGuesser.output_path(gspec.guesser_module,
                                              gspec.guesser_class, '')
    guesser = ElasticSearchWikidataGuesser.load(guesser_dir)
    guesser = ESGuesserWrapper(guesser)
    machine_agent = GuesserBuzzerAgent(guesser, buzzer)

    # setup human agent
    human_agent = HumanAgent()

    # setup hook
    hooks = []
    hooks.append(hook.NotifyBuzzingHook)
    hooks.append(hook.GameInterfaceHook)
    hooks.append(hook.VisualizeGuesserBuzzerHook(machine_agent))
    hooks.append(hook.HighlightHook)

    # setup game
    game = Game(dev_questions, [human_agent, machine_agent], hooks)

    game.run(10)
示例#2
0
文件: guesser.py 项目: NPSDC/qb
 def output(self):
     targets = []
     for g_spec in AbstractGuesser.list_enabled_guessers():
         guesser = f"{g_spec.guesser_module}.{g_spec.guesser_class}"
         targets.append(
             LocalTarget(f"output/guesser/best/{guesser}/best.touch"))
     return targets
示例#3
0
文件: util.py 项目: Agnon1573/qb
def merge_dfs():
    GUESSERS = ["{0}.{1}".format(
        x.guesser_module, x.guesser_class) \
        for x in AbstractGuesser.list_enabled_guessers()]
    log.info("Merging guesser DataFrames.")
    merged_dir = os.path.join(c.GUESSER_TARGET_PREFIX, 'merged')
    if not os.path.exists(merged_dir):
        os.makedirs(merged_dir)
    for fold in c.BUZZER_INPUT_FOLDS:
        if os.path.exists(AbstractGuesser.guess_path(merged_dir, fold)):
            log.info("Merged {0} exists, skipping.".format(fold))
            continue
        new_guesses = pd.DataFrame(columns=[
            'fold', 'guess', 'guesser', 'qnum', 'score', 'sentence', 'token'
        ],
                                   dtype='object')
        for guesser in GUESSERS:
            guesser_dir = os.path.join(c.GUESSER_TARGET_PREFIX, guesser)
            guesses = AbstractGuesser.load_guesses(guesser_dir, folds=[fold])
            new_guesses = new_guesses.append(guesses)
        for col in ['qnum', 'sentence', 'token', 'score']:
            new_guesses[col] = pd.to_numeric(new_guesses[col],
                                             downcast='integer')
        AbstractGuesser.save_guesses(new_guesses, merged_dir, folds=[fold])
        log.info("Merging: {0} finished.".format(fold))
示例#4
0
文件: main.py 项目: ymedhat95/qb
def test():
    gspec = AbstractGuesser.list_enabled_guessers()[0]
    guesser_dir = AbstractGuesser.output_path(gspec.guesser_module,
            gspec.guesser_class, '')
    guesser = ElasticSearchWikidataGuesser.load(guesser_dir)

    torch.cuda.set_device(0)
    predictor = Predictor()
    predictor.cuda()

    dataset = BonusPairsDataset()
    examples = [x for x in dataset.examples if x['start'] != -1]
    
    guesses = []
    for example in tqdm(examples):
        document = example['content']
        question = example['query']
        answer = example['answer']
        predictions = predictor.predict(document, question, top_n=1)
        prediction = predictions[0][0]

        gs = guesser.guess_single(example['query'])
        gs = sorted(gs.items(), key=lambda x: x[1])[::-1]
        guess = gs[0][0].replace('_', ' ')

        guesses.append((prediction, guess, example['answer']))

    with open('results.pkl', 'wb') as f:
        pickle.dump(guesses, f)
示例#5
0
文件: guesser.py 项目: NPSDC/qb
 def requires(self):
     for g_spec in AbstractGuesser.list_enabled_guessers():
         yield TrainGuesser(
             guesser_module=g_spec.guesser_module,
             guesser_class=g_spec.guesser_class,
             dependency_module=g_spec.dependency_module,
             dependency_class=g_spec.dependency_class,
         )
示例#6
0
文件: __init__.py 项目: nadesai/qb
 def requires(self):
     yield AllSingleGuesserReports()
     for g_spec in AbstractGuesser.list_enabled_guessers():
         yield GenerateGuesses(guesser_module=g_spec.guesser_module,
                               guesser_class=g_spec.guesser_class,
                               dependency_module=g_spec.dependency_module,
                               dependency_class=g_spec.dependency_class,
                               fold='expo')
示例#7
0
文件: __init__.py 项目: nadesai/qb
 def requires(self):
     for g_spec in AbstractGuesser.list_enabled_guessers():
         for fold in c.GUESSER_GENERATION_FOLDS:
             yield GenerateGuesses(
                 guesser_module=g_spec.guesser_module,
                 guesser_class=g_spec.guesser_class,
                 dependency_module=g_spec.dependency_module,
                 dependency_class=g_spec.dependency_class,
                 fold=fold)
示例#8
0
 def requires(self):
     for g_spec in AbstractGuesser.list_enabled_guessers():
         yield GuesserPerformance(
             guesser_module=g_spec.guesser_module,
             guesser_class=g_spec.guesser_class,
             dependency_module=g_spec.dependency_module,
             dependency_class=g_spec.dependency_class,
             config_num=g_spec.config_num,
         )
示例#9
0
文件: guesser.py 项目: NPSDC/qb
 def run(self):
     guesser_types = set()
     for g_spec in AbstractGuesser.list_enabled_guessers():
         guesser_types.add(
             f"{g_spec.guesser_module}.{g_spec.guesser_class}")
     _, _, all_dfs, _ = merge_reports(guesser_types)
     best_guessers = find_best_guessers(all_dfs)
     for g, config_num in best_guessers.items():
         inp = f"output/guesser/{g}/{config_num}"
         out = f"output/guesser/best/{g}/"
         shell(f"touch {inp}/best.touch")
         shell(f"mkdir -p {out}")
         shell(f"cp -r {inp}/* {out}")
    def __init__(self, buzzer_model_dir='data/neo_0.npz'):
        gspec = AbstractGuesser.list_enabled_guessers()[0]
        guesser_dir = 'data/guesser'
        self.guesser = ElasticSearchWikidataGuesser.load(guesser_dir)

        if chainer.cuda.available:
            self.buzzer = RNNBuzzer(model_dir=buzzer_model_dir,
                                    word_skip=conf['buzzer_word_skip'])
        else:
            self.buzzer = StupidBuzzer()

        self.ok_to_buzz = True
        self.answer = ''
        self.guesses = []
        self.evidence = dict()
示例#11
0
 def requires(self):
     for g_spec in AbstractGuesser.list_enabled_guessers():
         for fold in [c.GUESSER_DEV_FOLD, c.GUESSER_TEST_FOLD]:
             yield GuesserReport(guesser_module=g_spec.guesser_module,
                                 guesser_class=g_spec.guesser_class,
                                 dependency_module=g_spec.dependency_module,
                                 dependency_class=g_spec.dependency_class,
                                 config_num=g_spec.config_num,
                                 fold=fold)
         if os.path.exists(c.QANTA_EXPO_DATASET_PATH):
             yield GuesserReport(guesser_module=g_spec.guesser_module,
                                 guesser_class=g_spec.guesser_class,
                                 dependency_module=g_spec.dependency_module,
                                 dependency_class=g_spec.dependency_class,
                                 config_num=g_spec.config_num,
                                 fold=c.EXPO_FOLD)
示例#12
0
文件: test.py 项目: nadesai/qb
def test_buzzer():
    questions = QuestionDatabase().all_questions()
    buzzer = RNNBuzzer(word_skip=conf['buzzer_word_skip'])

    # setup machine agent
    gspec = AbstractGuesser.list_enabled_guessers()[0]
    guesser_dir = AbstractGuesser.output_path(gspec.guesser_module,
                                              gspec.guesser_class, '')
    guesser = ElasticSearchWikidataGuesser.load(guesser_dir)
    guesser = ESGuesserWrapper(guesser)

    key = list(questions.keys())[4]
    question = questions[key].flatten_text().split()
    for i, word in enumerate(question):
        clue = ' '.join(question[:i])
        guesses = guesser.guess(clue)
        buzz = buzzer.buzz(guesses)
        print(buzz)
示例#13
0
def generate_guesser_slurm(slurm_config_file, task, output_dir):
    with open(slurm_config_file) as f:
        slurm_config = yaml.load(f)
        default_slurm_config = slurm_config['default']
    env = Environment(loader=PackageLoader('qanta', 'slurm/templates'))
    template = env.get_template('guesser-luigi-template.sh')
    enabled_guessers = list(AbstractGuesser.list_enabled_guessers())

    for i, gs in enumerate(enabled_guessers):
        if gs.guesser_class == 'ElasticSearchGuesser':
            raise ValueError(
                'ElasticSearchGuesser is not compatible with slurm')
        elif gs.guesser_class in slurm_config:
            guesser_slurm_config = slurm_config[gs.guesser_class]
        else:
            guesser_slurm_config = None
        partition = get_slurm_config_value('partition', default_slurm_config,
                                           guesser_slurm_config)
        qos = get_slurm_config_value('qos', default_slurm_config,
                                     guesser_slurm_config)
        mem_per_cpu = get_slurm_config_value('mem_per_cpu',
                                             default_slurm_config,
                                             guesser_slurm_config)
        gres = get_slurm_config_value('gres', default_slurm_config,
                                      guesser_slurm_config)
        max_time = get_slurm_config_value('max_time', default_slurm_config,
                                          guesser_slurm_config)
        cpus_per_task = get_slurm_config_value('cpus_per_task',
                                               default_slurm_config,
                                               guesser_slurm_config)
        account = get_slurm_config_value('account', default_slurm_config,
                                         guesser_slurm_config)
        if task == 'GuesserReport':
            folds = GUESSER_GENERATION_FOLDS
        else:
            folds = []
        script = template.render({
            'task': task,
            'guesser_module': gs.guesser_module,
            'guesser_class': gs.guesser_class,
            'dependency_module': gs.dependency_module,
            'dependency_class': gs.dependency_class,
            'config_num': gs.config_num,
            'partition': partition,
            'qos': qos,
            'mem_per_cpu': mem_per_cpu,
            'max_time': max_time,
            'gres': gres,
            'cpus_per_task': cpus_per_task,
            'account': account,
            'folds': folds
        })
        slurm_file = path.join(output_dir, f'slurm-{i}.sh')
        with safe_open(slurm_file, 'w') as f:
            f.write(script)

    singleton_path = 'qanta/slurm/templates/guesser-singleton.sh'
    singleton_output = path.join(output_dir, 'guesser-singleton.sh')
    shell(f'cp {singleton_path} {singleton_output}')

    master_template = env.get_template('guesser-master-template.sh')
    master_script = master_template.render({
        'script_list': [
            path.join(output_dir, f'slurm-{i}.sh')
            for i in range(len(enabled_guessers))
        ] + [singleton_output],
        'gres':
        gres,
        'partition':
        partition,
        'qos':
        qos,
        'mem_per_cpu':
        mem_per_cpu,
        'max_time':
        max_time,
        'gres':
        gres,
        'cpus_per_task':
        cpus_per_task,
        'account':
        account
    })
    with safe_open(path.join(output_dir, 'slurm-master.sh'), 'w') as f:
        f.write(master_script)
示例#14
0
文件: cli.py 项目: Pinafore/qb
def generate_guesser_slurm(slurm_config_file, task, output_dir):
    with open(slurm_config_file) as f:
        slurm_config = yaml.load(f)
        default_slurm_config = slurm_config['default']
    env = Environment(loader=PackageLoader('qanta', 'slurm/templates'))
    template = env.get_template('guesser-luigi-template.sh')
    enabled_guessers = list(AbstractGuesser.list_enabled_guessers())

    for i, gs in enumerate(enabled_guessers):
        if gs.guesser_class == 'ElasticSearchGuesser':
            raise ValueError('ElasticSearchGuesser is not compatible with slurm')
        elif gs.guesser_class in slurm_config:
            guesser_slurm_config = slurm_config[gs.guesser_class]
        else:
            guesser_slurm_config = None
        partition = get_slurm_config_value('partition', default_slurm_config, guesser_slurm_config)
        qos = get_slurm_config_value('qos', default_slurm_config, guesser_slurm_config)
        mem_per_cpu = get_slurm_config_value('mem_per_cpu', default_slurm_config, guesser_slurm_config)
        gres = get_slurm_config_value('gres', default_slurm_config, guesser_slurm_config)
        max_time = get_slurm_config_value('max_time', default_slurm_config, guesser_slurm_config)
        cpus_per_task = get_slurm_config_value('cpus_per_task', default_slurm_config, guesser_slurm_config)
        account = get_slurm_config_value('account', default_slurm_config, guesser_slurm_config)
        if task == 'GuesserReport':
            folds = GUESSER_GENERATION_FOLDS
        else:
            folds = []
        script = template.render({
            'task': task,
            'guesser_module': gs.guesser_module,
            'guesser_class': gs.guesser_class,
            'dependency_module': gs.dependency_module,
            'dependency_class': gs.dependency_class,
            'config_num': gs.config_num,
            'partition': partition,
            'qos': qos,
            'mem_per_cpu': mem_per_cpu,
            'max_time': max_time,
            'gres': gres,
            'cpus_per_task': cpus_per_task,
            'account': account,
            'folds': folds
        })
        slurm_file = path.join(output_dir, f'slurm-{i}.sh')
        with safe_open(slurm_file, 'w') as f:
            f.write(script)

    singleton_path = 'qanta/slurm/templates/guesser-singleton.sh'
    singleton_output = path.join(output_dir, 'guesser-singleton.sh')
    shell(f'cp {singleton_path} {singleton_output}')

    master_template = env.get_template('guesser-master-template.sh')
    master_script = master_template.render({
        'script_list': [
                           path.join(output_dir, f'slurm-{i}.sh') for i in range(len(enabled_guessers))
                       ] + [singleton_output],
        'gres': gres,
        'partition': partition,
        'qos': qos,
        'mem_per_cpu': mem_per_cpu,
        'max_time': max_time,
        'gres': gres,
        'cpus_per_task': cpus_per_task,
        'account': account
    })
    with safe_open(path.join(output_dir, 'slurm-master.sh'), 'w') as f:
        f.write(master_script)
示例#15
0
import json
import textwrap
from collections import defaultdict, Counter, namedtuple
import argparse
import itertools
from csv import DictReader
from time import sleep
import os

from qanta.datasets.quiz_bowl import QuizBowlDataset, QuestionDatabase
from qanta.guesser.abstract import AbstractGuesser

GUESSERS = [x.guesser_class for x in AbstractGuesser.list_enabled_guessers()]

kSHOW_RIGHT = False
kPAUSE = .25
kSYSTEM = "OUSIA"

kBIGNUMBERS = {
    -1:
    """








88888888
88888888
示例#16
0
from tqdm import tqdm
from elasticsearch_dsl.connections import connections
from elasticsearch_dsl import DocType, Text, Keyword, Search, Index
from qanta.util.constants import GUESSER_DEV_FOLD
from qanta.guesser.abstract import AbstractGuesser
from qanta.datasets.quiz_bowl import QuizBowlDataset
from qanta.guesser.experimental.elasticsearch_instance_of import ElasticSearchWikidataGuesser
from qanta.guesser.experimental.elasticsearch_instance_of import ElasticSearchIndex

INDEX_NAME = 'qb_ir_instance_of'

gspec = AbstractGuesser.list_enabled_guessers()[0]
guesser_dir = AbstractGuesser.output_path(gspec.guesser_module,
        gspec.guesser_class, '')
guesser = ElasticSearchWikidataGuesser.load(guesser_dir)
es_index = ElasticSearchIndex()

def recursive_guess(question, k=0):
    p_class, p_prob = guesser.test_instance_of([question])[0]
    first_guesses = search_not(question, p_class)
    print('First round')
    for x in first_guesses:
        print(x)
    print()

    print('Second round')
    new_guesses = []
    for i in range(k):
        guess = first_guesses[i][0]
        question += ' ' + ' '.join(guess.split('_'))
        guesses = es_index.search(question, p_class, p_prob, 0.6)
示例#17
0
import random
import pickle

from qanta.config import conf
from qanta.util.io import safe_path
from qanta.util.multiprocess import _multiprocess
from qanta.guesser.abstract import AbstractGuesser
from qanta.datasets.quiz_bowl import QuizBowlDataset, Question
from qanta.guesser.experimental.elasticsearch_instance_of import ElasticSearchWikidataGuesser
'''Randomly shuffle the word order and see if it changes the guesses.
'''

gspec = AbstractGuesser.list_enabled_guessers()[0]
guesser_dir = AbstractGuesser.output_path(gspec.guesser_module,
                                          gspec.guesser_class, '')
guesser = ElasticSearchWikidataGuesser.load(guesser_dir)


def main():
    fold = 'guessdev'
    db = QuizBowlDataset(1, guesser_train=True, buzzer_train=True)
    questions = db.questions_in_folds([fold])
    first_n = lambda x: len(x)

    print(guesser.guess_single(' '.join(questions[0].text.values())))
    '''
    s = [0, 0, 0, 0, 0]
    for q in questions:
        sents = list(q.text.values())
        text_before = ' '.join(sents[:first_n(sents)])
        words = text.split()
示例#18
0
文件: cli.py 项目: nhatsmrt/qb
def generate_guesser_slurm(slurm_config_file, task, output_dir):
    with open(slurm_config_file) as f:
        slurm_config = yaml.load(f)
        default_slurm_config = slurm_config["default"]
    env = Environment(loader=PackageLoader("qanta", "slurm/templates"))
    template = env.get_template("guesser-luigi-template.sh")
    enabled_guessers = list(AbstractGuesser.list_enabled_guessers())

    for i, gs in enumerate(enabled_guessers):
        if gs.guesser_class == "ElasticSearchGuesser":
            raise ValueError(
                "ElasticSearchGuesser is not compatible with slurm")
        elif gs.guesser_class in slurm_config:
            guesser_slurm_config = slurm_config[gs.guesser_class]
        else:
            guesser_slurm_config = None
        partition = get_slurm_config_value("partition", default_slurm_config,
                                           guesser_slurm_config)
        qos = get_slurm_config_value("qos", default_slurm_config,
                                     guesser_slurm_config)
        mem_per_cpu = get_slurm_config_value("mem_per_cpu",
                                             default_slurm_config,
                                             guesser_slurm_config)
        gres = get_slurm_config_value("gres", default_slurm_config,
                                      guesser_slurm_config)
        max_time = get_slurm_config_value("max_time", default_slurm_config,
                                          guesser_slurm_config)
        cpus_per_task = get_slurm_config_value("cpus_per_task",
                                               default_slurm_config,
                                               guesser_slurm_config)
        account = get_slurm_config_value("account", default_slurm_config,
                                         guesser_slurm_config)
        if task == "GuesserReport":
            folds = GUESSER_GENERATION_FOLDS
        else:
            folds = []
        script = template.render({
            "task": task,
            "guesser_module": gs.guesser_module,
            "guesser_class": gs.guesser_class,
            "dependency_module": gs.dependency_module,
            "dependency_class": gs.dependency_class,
            "config_num": gs.config_num,
            "partition": partition,
            "qos": qos,
            "mem_per_cpu": mem_per_cpu,
            "max_time": max_time,
            "gres": gres,
            "cpus_per_task": cpus_per_task,
            "account": account,
            "folds": folds,
        })
        slurm_file = path.join(output_dir, f"slurm-{i}.sh")
        with safe_open(slurm_file, "w") as f:
            f.write(script)

    singleton_path = "qanta/slurm/templates/guesser-singleton.sh"
    singleton_output = path.join(output_dir, "guesser-singleton.sh")
    shell(f"cp {singleton_path} {singleton_output}")

    master_template = env.get_template("guesser-master-template.sh")
    master_script = master_template.render({
        "script_list": [
            path.join(output_dir, f"slurm-{i}.sh")
            for i in range(len(enabled_guessers))
        ] + [singleton_output],
        "gres":
        gres,
        "partition":
        partition,
        "qos":
        qos,
        "mem_per_cpu":
        mem_per_cpu,
        "max_time":
        max_time,
        "gres":
        gres,
        "cpus_per_task":
        cpus_per_task,
        "account":
        account,
    })
    with safe_open(path.join(output_dir, "slurm-master.sh"), "w") as f:
        f.write(master_script)