import yaml from jinja2 import Environment, PackageLoader import tqdm from qanta import qlogging from qanta.guesser.abstract import AbstractGuesser from qanta.guesser.elasticsearch import elasticsearch_cli from qanta.util.environment import ENVIRONMENT from qanta.util.io import safe_open, shell, get_tmp_filename from qanta.util.constants import QANTA_SQL_DATASET_PATH, GUESSER_GENERATION_FOLDS from qanta.hyperparam import expand_config from qanta.wikipedia.categories import categorylinks_cli from qanta.wikipedia.vital import vital_cli from qanta.ingestion.trickme import trick_cli log = qlogging.get('cli') CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) @click.group(context_settings=CONTEXT_SETTINGS) def main(): log.info("QANTA starting with configuration:") for k, v in ENVIRONMENT.items(): log.info("{0}={1}".format(k, v)) main.add_command(categorylinks_cli, name='categories') main.add_command(vital_cli, name='vital') main.add_command(elasticsearch_cli, name='elasticsearch') main.add_command(trick_cli, name='trick')
import pickle import os import time import luigi from luigi import LocalTarget, Task, WrapperTask from qanta.config import conf from qanta.util import constants as c from qanta.util.io import shell from qanta.guesser.abstract import AbstractGuesser, get_class from qanta.pipeline.preprocess import DownloadData from qanta import qlogging log = qlogging.get(__name__) class EmptyTask(luigi.Task): def complete(self): return True class TrainGuesser(Task): guesser_module = luigi.Parameter() # type: str guesser_class = luigi.Parameter() # type: str dependency_module = luigi.Parameter() # type: str dependency_class = luigi.Parameter() # type: str config_num = luigi.IntParameter() # type: int def requires(self): yield DownloadData()
from elasticsearch_dsl.connections import connections import elasticsearch import tqdm from nltk.tokenize import word_tokenize from jinja2 import Environment, PackageLoader from qanta.wikipedia.cached_wikipedia import Wikipedia from qanta.datasets.abstract import QuestionText from qanta.guesser.abstract import AbstractGuesser from qanta.spark import create_spark_context from qanta.config import conf from qanta.util.io import get_tmp_dir, safe_path from qanta import qlogging log = qlogging.get(__name__) ES_PARAMS = 'es_params.pickle' connections.create_connection(hosts=['localhost']) def create_es_config(output_path, host='localhost', port=9200, tmp_dir=None): if tmp_dir is None: tmp_dir = get_tmp_dir() data_dir = safe_path(os.path.join(tmp_dir, 'elasticsearch/data/')) log_dir = safe_path(os.path.join(tmp_dir, 'elasticsearch/log/')) env = Environment(loader=PackageLoader('qanta', 'templates')) template = env.get_template('elasticsearch.yml') config_content = template.render({ 'host': host, 'port': port, 'log_dir': log_dir,
from jinja2 import Environment, PackageLoader import tqdm from qanta import qlogging from qanta.guesser.abstract import AbstractGuesser from qanta.guesser.elasticsearch import elasticsearch_cli from qanta.util.environment import ENVIRONMENT from qanta.util.io import safe_open, shell, get_tmp_filename from qanta.util.constants import QANTA_SQL_DATASET_PATH, GUESSER_GENERATION_FOLDS from qanta.hyperparam import expand_config from qanta.wikipedia.categories import categorylinks_cli from qanta.wikipedia.vital import vital_cli from qanta.ingestion.trickme import trick_cli from qanta.ingestion.command import ingestion_cli log = qlogging.get('cli') CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) @click.group(context_settings=CONTEXT_SETTINGS) def main(): log.info("QANTA starting with configuration:") for k, v in ENVIRONMENT.items(): log.info("{0}={1}".format(k, v)) main.add_command(categorylinks_cli, name='categories') main.add_command(vital_cli, name='vital') main.add_command(elasticsearch_cli, name='elasticsearch') main.add_command(trick_cli, name='trick')
from qanta import qlogging from qanta.ingestion.answer_mapping import read_wiki_titles from qanta.ingestion.annotated_mapping import PageAssigner log = qlogging.get('validate_annotations') def normalize(title): return title.replace(' ', '_') def check_page(page, titles): n_page = normalize(page) if n_page not in titles: log.error(f'Title not found: {page}') def main(): titles = read_wiki_titles() assigner = PageAssigner() log.info('Checking direct protobowl mappings...') for page in assigner.protobowl_direct.values(): check_page(page, titles) log.info('Checking direct quizdb mappings...') for page in assigner.quizdb_direct.values(): check_page(page, titles) log.info('Checking unambiguous mappings...') for page in assigner.unambiguous.values(): check_page(page, titles)
from jinja2 import Environment, PackageLoader import tqdm from qanta import qlogging from qanta.guesser.abstract import AbstractGuesser from qanta.guesser.elasticsearch import elasticsearch_cli from qanta.util.environment import ENVIRONMENT from qanta.util.io import safe_open, shell, get_tmp_filename from qanta.util.constants import QANTA_SQL_DATASET_PATH, GUESSER_GENERATION_FOLDS from qanta.hyperparam import expand_config from qanta.wikipedia.categories import categorylinks_cli from qanta.wikipedia.vital import vital_cli from qanta.ingestion.trickme import trick_cli from qanta.ingestion.command import ingestion_cli log = qlogging.get("cli") CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) @click.group(context_settings=CONTEXT_SETTINGS) def main(): log.info("QANTA starting with configuration:") for k, v in ENVIRONMENT.items(): log.info("{0}={1}".format(k, v)) main.add_command(categorylinks_cli, name="categories") main.add_command(vital_cli, name="vital") main.add_command(elasticsearch_cli, name="elasticsearch") main.add_command(trick_cli, name="trick")
from qanta import qlogging from qanta.ingestion.answer_mapping import read_wiki_titles from qanta.ingestion.annotated_mapping import PageAssigner log = qlogging.get("validate_annotations") def normalize(title): return title.replace(" ", "_") def check_page(page, titles): n_page = normalize(page) if n_page not in titles: log.error(f"Title not found: {page}") def main(): titles = read_wiki_titles() assigner = PageAssigner() log.info("Checking direct protobowl mappings...") for page in assigner.protobowl_direct.values(): check_page(page, titles) log.info("Checking direct quizdb mappings...") for page in assigner.quizdb_direct.values(): check_page(page, titles) log.info("Checking unambiguous mappings...") for page in assigner.unambiguous.values(): check_page(page, titles)