示例#1
0
    def test_dataset_values_assertion(self, tmp_path):
        runtask = Run(tmp_path)

        export_dict = {"era5": "not a list"}
        with pytest.raises(Exception) as exception_info:
            runtask.export(export_dict)
        error_message_contains = 'values to be a list. Got'
        assert error_message_contains in str(exception_info), \
            f'Got unexpected error message: {exception_info}'
示例#2
0
    def test_dataset_assertion(self, tmp_path):

        runtask = Run(tmp_path)

        export_dict = {"era42": ["towels"]}
        with pytest.raises(Exception) as exception_info:
            runtask.export(export_dict)
        error_message_contains = 'is not supported! Supported datasets are'
        assert error_message_contains in str(exception_info), \
            f'Got unexpected error message: {exception_info}'
示例#3
0
    def rank_article(self, query):
        config = self.conf.getConfig()
        config = namedtuple("Conf",
                            config['run'].keys())(*config['run'].values())
        articles = self.__articles(query)
        for i in articles:
            run = Run("".join(config.staff), config.step, config.num,
                      config.weighting, config.granularity,
                      ['k' + str(config.k), 'b' + str(config.b)])
            file = run.createRun("../runs", i[1], i[0][0])

        self.conf.incrementRun()
示例#4
0
    def rank_elem(self, query):
        config = self.conf.getConfig()
        config = namedtuple("Conf",
                            config['run'].keys())(*config['run'].values())
        docsEl = dict()
        for (k, v) in self.docs.items():
            for (w, x) in v.items():
                key = (k + w)
                docsEl[key] = x

        rank = XmlRank(docsEl)
        bm = []
        func = getattr(importlib.import_module("src.rank"), config.weighting)
        for q in query:
            req = clean_text(" ".join(q[1])).split()
            r = rank.getBm25(request=req,
                             func=func,
                             k=config.k,
                             b=config.b,
                             d=0)
            red = dict()
            fres = []
            lid = 0
            for t in r:
                if (len(fres) == config.limit):
                    break
                id, res = t[0].split("/", 1)
                if red.get(id) is None:
                    red[id] = [res]
                    fres.append((id, res, t[1]))
                    lid = id
                else:
                    rec = False
                    for z in red[id]:
                        if res in z or z in res:
                            rec = True
                    if not rec and id == lid:
                        red[id].append(res)
                        fres.append((id, res, t[1]))
                        lid = id

            run = Run("".join(config.staff), config.step, config.num,
                      config.weighting, config.granularity,
                      ['k' + str(config.k), 'b' + str(config.b)])
            file = run.createRunElem("../runs", fres, q[0])

        self.conf.incrementRun()
    def __init__(self):
        # Preprocessing pipeline
        self.data = self.prepare_data(Parameters.num_words, Parameters.seq_len)

        # Initialize the model
        self.model = TextClassifier(Parameters)

        # Training - Evaluation pipeline
        Run().train(self.model, self.data, Parameters)
示例#6
0
def main(input_args):

    base_dir = Path(os.path.dirname(os.path.abspath(__file__)))
    default_config_file = base_dir / "pipeline_config/minimal.json"

    with open(default_config_file, "r") as f:
        default_config = json.load(f)

    if input_args.config is not None:
        with open(args.config, "r") as f:
            user_config = json.load(f)
    else:
        user_config = {}

    config = DictWithDefaults(user_config, default_config)

    data_path = Path(config["data"])
    runtask = Run(data_path)
    runtask.run(config, args.run_from)
示例#7
0
from pathlib import Path
import os
import json
from src import DictWithDefaults, Run
from src.preprocess import VHIPreprocessor

base_dir = Path(os.path.dirname(os.path.abspath("./ml_drought")))
default_config_file = base_dir / "pipeline_config/minimal.json"
with open(default_config_file, "r") as f:
    default_config = json.load(f)

user_config = {}
config = DictWithDefaults(user_config, default_config)
data_path = Path(config["data"])

preprocess_args = config["preprocess"]
dataset2preprocessor = {"vhi": VHIPreprocessor}

r = Run(data_path)
dataset, variables = [(d, v) for d, v in preprocess_args.items()][0]
preprocessor = dataset2preprocessor[dataset](r.data)
preprocessor.preprocess(**variables[0])