예제 #1
0
def test_keys_with_list_of_values():
    # No exception in creating and executing model with a key/list pair
    model = vw(quiet=True, q=["fa", "fb"])
    model.learn('1 | a b c')
    prediction = model.predict(' | a b c')
    assert isinstance(prediction, float)
    del model
예제 #2
0
    def initialize(self, test, resume=False):
        if self.model_class == 'lookup':
            self.actor_model = {}

        elif self.model_class == 'vw_python':
            self.actor_model_path = self.base_folder_name + "/model.vw"

            if not test:
                if not resume:
                    self.actor_model = pyvw.vw(quiet=True, l2=self.params['l2'], loss_function=self.params['loss_function'], holdout_off=True,
                                           f=self.actor_model_path, b=self.params['b'], lrq=self.params['lrq'], l=self.params['l'], k=True)
                else:
                    self.actor_model = pyvw.vw("--quiet -f {0} -i {0}".format(self.actor_model_path))

            else:
                self.actor_model = pyvw.vw("--quiet -t -i {0}".format(self.actor_model_path))
예제 #3
0
def test_multilabel_prediction_type():
    model = vw(multilabel_oaa=4, quiet=True)
    model.learn('1 | a b c')
    assert model.get_prediction_type() == model.pMULTILABELS
    prediction = model.predict(' | a b c')
    assert isinstance(prediction, list)
    del model
예제 #4
0
def test_scalar_prediction_type():
    model = vw(quiet=True)
    model.learn('1 | a b c')
    assert model.get_prediction_type() == model.pSCALAR
    prediction = model.predict(' | a b c')
    assert isinstance(prediction, float)
    del model
예제 #5
0
def test_prob_prediction_type():
    model = vw(loss_function='logistic', csoaa_ldf='mc', probabilities=True, quiet=True)
    model.learn('1 | a b c')
    assert model.get_prediction_type() == model.pPROB
    prediction = model.predict(' | a b c')
    assert isinstance(prediction, float)
    del model
예제 #6
0
def test_action_probs_prediction_type():
    model = vw(cb_explore=2, ngram=2, quiet=True)
    model.learn('1 | a b c')
    assert model.get_prediction_type() == model.pACTION_PROBS
    prediction = model.predict(' | a b c')
    assert isinstance(prediction, list)
    del model
예제 #7
0
def test_action_scores_prediction_type():
    model = vw(loss_function='logistic', csoaa_ldf='m', quiet=True)
    model.learn('1 | a b c')
    assert model.get_prediction_type() == model.pMULTICLASS
    prediction = model.predict(' | a b c')
    assert isinstance(prediction, int)
    del model
예제 #8
0
 def save_and_continue(self, thread_id, event):
     if self.epochs % 1000.0 == 0 and thread_id == 1:
         event.clear()
         print "saving model..."
         print "epochs: " + str(self.epochs)
         self.actor_model.finish()
         self.actor_model = pyvw.vw("--quiet --save_resume -f {0} -i {1}".format(self.actor_model_path, self.actor_model_path))
         event.set()
예제 #9
0
def test_multiclass_prediction_type():
    n = 3
    model = vw(loss_function='logistic', oaa=n, quiet=True)
    model.learn('1 | a b c')
    assert model.get_prediction_type() == model.pMULTICLASS
    prediction = model.predict(' | a b c')
    assert isinstance(prediction, int)
    del model
예제 #10
0
def test_action_scores_prediction_type():
    model = vw(loss_function="logistic", csoaa_ldf="m", quiet=True)
    multi_ex = [model.example("1:1 | a b c"), model.example("2:-1  | a b c")]
    model.learn(multi_ex)
    assert model.get_prediction_type() == model.pMULTICLASS
    multi_ex = [model.example("1 | a b c"), model.example("2 | a b c")]
    prediction = model.predict(multi_ex)
    assert isinstance(prediction, int)
    del model
예제 #11
0
def test_scalars_prediction_type():
    n = 3
    model = vw(loss_function='logistic', oaa=n, probabilities=True, quiet=True)
    model.learn('1 | a b c')
    assert model.get_prediction_type() == model.pSCALARS
    prediction = model.predict(' | a b c')
    assert isinstance(prediction, list)
    assert len(prediction) == n
    del model
예제 #12
0
def test_cost_sensitive_label():
    model = vw(csoaa=4, quiet=True)
    csl = pyvw.cost_sensitive_label(model.example('2:5 |'))
    assert csl.costs[0].label == 2
    assert csl.costs[0].wap_value == 0.0
    assert csl.costs[0].partial_prediction == 0.0
    assert csl.costs[0].cost == 5.0
    assert str(csl) == '2:5.0'
    del model
예제 #13
0
def test_cbandits_label():
    model = vw(cb=4, quiet=True)
    cbl = pyvw.cbandits_label(model.example('1:10:0.5 |'))
    assert cbl.costs[0].action == 1
    assert cbl.costs[0].probability == 0.5
    assert cbl.costs[0].partial_prediction == 0
    assert cbl.costs[0].cost == 10.0
    assert str(cbl) == '1:10.0:0.5'
    del model
예제 #14
0
def test_prob_prediction_type():
    model = vw(loss_function='logistic', csoaa_ldf='mc', probabilities=True, quiet=True)
    multi_ex = [model.example('1:0.2 | a b c'), model.example('2:0.8  | a b c')]
    model.learn(multi_ex)
    assert model.get_prediction_type() == model.pPROB
    multi_ex = [model.example('1 | a b c'), model.example('2 | a b c')]
    prediction = model.predict(multi_ex)
    assert isinstance(prediction, float)
    del model
예제 #15
0
def test_multiclass_label_example():
    n = 4
    model = pyvw.vw(loss_function="logistic", oaa=n, quiet=True)
    ex = model.example("1 | a b c d", 2)
    ml2 = pyvw.multiclass_label(ex)
    assert ml2.label == 1
    assert ml2.weight == 1.0
    assert ml2.prediction == 0
    assert str(ml2) == "1"
예제 #16
0
def test_action_scores_prediction_type():
    model = vw(loss_function='logistic', csoaa_ldf='m', quiet=True)
    multi_ex = [model.example('1:1 | a b c'), model.example('2:-1  | a b c')]
    model.learn(multi_ex)
    assert model.get_prediction_type() == model.pMULTICLASS
    multi_ex = [model.example('1 | a b c'), model.example('2 | a b c')]
    prediction = model.predict(multi_ex)
    assert isinstance(prediction, int)
    del model
예제 #17
0
def test_simple_label_example():
    vw_ex = vw(quiet=True)
    ex = vw_ex.example("1 |a two features |b more features here")
    sl2 = pyvw.simple_label(ex)
    assert sl2.label == 1.0
    assert sl2.weight == 1.0
    assert sl2.prediction == 0.0
    assert sl2.initial == 0.0
    assert str(sl2) == "1.0"
예제 #18
0
def test_scalars_prediction_type():
    n = 3
    model = vw(loss_function='logistic', oaa=n, probabilities=True, quiet=True)
    model.learn('1 | a b c')
    assert model.get_prediction_type() == model.pSCALARS
    prediction = model.predict(' | a b c')
    assert isinstance(prediction, list)
    assert len(prediction) == n
    del model
예제 #19
0
def train_lr(args):

    logger.info("Reading data")
    df = pd.read_csv(args.input, names=['query', 'category'])

    logger.info("Lemmatizing and preparing data")
    df['query_lem'] = df['query'].apply(lemmatize)

    #     ensure that category start from 1
    df.category = df.category + 1
    df['vw_train'] = df.category.astype(str) + ' | ' + df['query_lem'].values
    df['vw_test'] = '| ' + df['query_lem'].values

    category_count = len(df.category.unique())

    df_train, df_test = train_test_split(df,
                                         test_size=0.2,
                                         stratify=df.category.values,
                                         random_state=42)

    train_examples = list(df_train['vw_train'].values)
    test_examples = list(df_test['vw_train'].values)

    logger.info("Training LR model")
    vw_command = "--oaa {} --random_seed 17 --cache_file ./tmp1 -b 27 -f ./models/lr.vw ".format(
        category_count)
    logger.info(vw_command)
    vw = pyvw.vw(vw_command)
    for iteration in range(2):
        logger.info("Iteration %s", iteration)
        for i in range(len(train_examples)):
            vw.learn(train_examples[i])
    vw.finish()
    logger.info("Finished model training")

    logger.info("Calculating accuracy and F1 score on hold out data set")
    vw = pyvw.vw("-i ./models/lr.vw  -t")
    pred = [vw.predict(sample) for sample in test_examples]
    logger.info("LR holdout accuracy score is %s",
                np.round(accuracy_score(df_test.category.values, pred), 2))
    logger.info(
        "LR holdout F1 score is %s",
        np.round(f1_score(df_test.category.values, pred, average='weighted'),
                 2))
def _vw_run(args, data, predict_and_yield):
    vw = pyvw.vw(" ".join(args))
    log.info("Running: vw " + " ".join(args))
    for d in data:
        ex = vw.example((d.label or b"") + b" | " + d.features + b"\n")
        if predict_and_yield:
            yield vw.predict(ex, pylibvw.vw.lMulticlass), d.tag
        else:
            vw.learn(ex)
    vw.finish()
예제 #21
0
def test_prob_prediction_type():
    model = vw(loss_function='logistic',
               csoaa_ldf='mc',
               probabilities=True,
               quiet=True)
    model.learn('1 | a b c')
    assert model.get_prediction_type() == model.pPROB
    prediction = model.predict(' | a b c')
    assert isinstance(prediction, float)
    del model
예제 #22
0
def _test_helper_save_load(vw_arg: str, num_iterations=2000, seed=10, has_automl=False, log_filename=None):
    split = 1500
    before_save = num_iterations-split

    first_vw = pyvw.vw(arg_str=vw_arg)
    has_automl = "automl" in first_vw.get_enabled_reductions()
    sim = Simulator(seed=seed, has_automl=has_automl, debug_logfile=log_filename)
    # first chunk
    ctr = sim.run_simulation(first_vw, before_save, sim.users, sim.times_of_day, sim.actions, sim.get_cost)
    # save
    model_file = "test_save_load.vw"
    first_vw.save(model_file)
    first_vw.finish()
    # reload in another instance
    other_vw = pyvw.vw(f"-i {model_file} {vw_arg}") # todo remove vw_arg from here
    # continue
    ctr = sim.run_simulation(other_vw, split, sim.users, sim.times_of_day, sim.actions, sim.get_cost, shift=before_save+1)

    return ctr
예제 #23
0
def _vw_run(args, data, predict_and_yield):
    vw = pyvw.vw(' '.join(args))
    util.log.info('Running: vw ' + ' '.join(args))
    for d in data:
        ex = vw.example((d.label or b'') + b' | ' + d.features + b'\n')
        if predict_and_yield:
            yield vw.predict(ex, pylibvw.vw.lMulticlass), d.tag
        else:
            vw.learn(ex)
    vw.finish()
예제 #24
0
def test_ccb_single_slot_and_cb_equivalence_no_slot_features():
    # --- CCB
    ccb_model_file_name = "model_file_ccb_equiv.txt"
    ccb_workspace = pyvw.vw(quiet=True,
                            ccb_explore_adf=True,
                            readable_model=ccb_model_file_name)

    ccb_ex = """
    ccb shared |User b
    ccb action |Action d
    ccb action |Action e
    ccb action |Action f
    ccb action |Action ff
    ccb action |Action fff
    ccb slot 4:1:0.2 |
    """
    ccb_workspace.learn(ccb_ex)
    ccb_workspace.finish()

    ccb_num_weights = count_weights_from_readable_model_file_for_equiv_test(
        ccb_model_file_name)

    # --- CB
    cb_model_file_name = "model_file_cb_equiv.txt"
    cb_workspace = pyvw.vw(quiet=True,
                           cb_explore_adf=True,
                           readable_model=cb_model_file_name)

    cb_ex = """
    shared |User b
    |Action d
    |Action e
    |Action f
    |Action ff
    4:1:0.2 |Action fff
    """

    cb_workspace.learn(cb_ex)
    cb_workspace.finish()
    cb_num_weights = count_weights_from_readable_model_file_for_equiv_test(
        cb_model_file_name)

    assert ccb_num_weights == cb_num_weights
예제 #25
0
    def get_vw(self):
        """Factory to create a vw instance on demand

        Returns
        -------
        pyvw.vw instance
        """
        if self.vw_ is None:
            self.vw_ = pyvw.vw(**self.params)

        return self.vw_
예제 #26
0
def test_get_weight_name():
    model = vw(quiet=True)
    model.learn("1 | a a b c |ns x")
    assert model.get_weight_from_name("a") != 0.
    assert model.get_weight_from_name("b") != 0.
    assert model.get_weight_from_name("b") == model.get_weight_from_name("c")
    assert model.get_weight_from_name("a") != model.get_weight_from_name("b")
    assert model.get_weight_from_name("x") == 0.
    assert model.get_weight_from_name("x", "ns") != 0.
    assert model.get_weight_from_name("x",
                                      "ns") == model.get_weight_from_name("b")
예제 #27
0
def test_multiclass_probabilities_label():
    n = 4
    model = pyvw.vw(
        loss_function="logistic", oaa=n, probabilities=True, quiet=True
    )
    ex = model.example("1 | a b c d", 2)
    model.learn(ex)
    mpl = pyvw.multiclass_probabilities_label(ex)
    assert str(mpl) == "1:0.25 2:0.25 3:0.25 4:0.25"
    mpl = pyvw.multiclass_probabilities_label([1, 2, 3], [0.4, 0.3, 0.3])
    assert str(mpl) == "1:0.4 2:0.3 3:0.3"
예제 #28
0
    def get_vw(self):
        """Factory to create a vw instance on demand

        Returns
        -------
        pyvw.vw instance
        """
        if self.vw_ is None:
            self.vw_ = pyvw.vw(**self.params)

        return self.vw_
예제 #29
0
def test_vw_config_manager():
    expected_set = {'--quiet', '--loss_function logistic', '--save_resume', '--data /root/vowpal_wabbit/test/train-sets/rcv1_small.dat'}

    vw = pyvw.vw(arg_str="--save_resume --loss_function logistic -d /root/vowpal_wabbit/test/train-sets/rcv1_small.dat --quiet")
    config = vw.get_config()

    cmd_str_list = helper_options_to_list_strings(config)
    assert set(cmd_str_list) == expected_set

    vw.finish()

    # do another iteration generating the cmd string from the output of previous
    new_args = " ".join(cmd_str_list)

    other_vw = pyvw.vw(new_args)
    new_config = vw.get_config()
    new_cmd_str_list = helper_options_to_list_strings(new_config)

    assert set(new_cmd_str_list) == expected_set

    other_vw.finish()
예제 #30
0
    def load(self, verify_on_load=True):
        """
        loads model file into memory (as a vw sub-process)
        verify model first, then stop process if status is not active

        Args:
            verify_on_load (bool): flag to call verify when loading a model
        """

        self.process = pyvw.vw(self.command)

        super(self.__class__, self).load(verify_on_load=verify_on_load)
예제 #31
0
    def __init__(self, horizon, num_actions, policy, default_model="--power_t 0.0 -q la --quiet"):
        """
        Initialize variables to store basic information of MDP
        and every Q model needed for each step.

        Params:
            default_model: a default VW model to learn Q. You can try different settings to get the best model.
        """
        super().__init__(horizon, num_actions, policy)

        # We assume that each step is indexed from 1 to H (horizon)
        self.models = [pyvw.vw(default_model) for _ in range(self.horizon)]
예제 #32
0
 def _create_model(self, project, initial_params={}):
     initial_params = initial_params.copy()  # don't mutate the original
     trainpath = os.path.join(self.datadir, self.TRAIN_FILE)
     initial_params['data'] = trainpath
     params = self._create_params(initial_params)
     if params.get('passes', 1) > 1:
         # need a cache file when there are multiple passes
         params.update({'cache': True, 'kill_cache': True})
     self.debug("model parameters: {}".format(params))
     self._model = pyvw.vw(**params)
     modelpath = os.path.join(self.datadir, self.MODEL_FILE)
     self._model.save(modelpath)
예제 #33
0
 def test_vw_oml_problem_and_vanilla_vw(self):
     vw_oml_problem_args, vw_online_aml_problem = get_vw_tuning_problem()
     vanilla_vw = pyvw.vw(**vw_oml_problem_args["fixed_hp_config"])
     cumulative_loss_list = online_learning_loop(
         vw_online_aml_problem.max_iter_num,
         vw_online_aml_problem.vw_examples,
         vanilla_vw,
         loss_func=vw_oml_problem_args["fixed_hp_config"].get(
             "loss_function", "squared"),
     )
     print("final average loss:",
           sum(cumulative_loss_list) / len(cumulative_loss_list))
예제 #34
0
    def _create_bandit(self, num_actions, seed=None):
        # --epsilon: Epsilon-Greedy exploration
        # --cover: Online Cover exploration
        # --nn N: use sigmoidal feedforward network w/ N hidden units
        from vowpalwabbit import pyvw
        cmd = "--nn 16 --epsilon 0.1 --cover 3 --cb_explore {}".format(
            num_actions)

        if seed:
            cmd += " --random_seed {}".format(seed)

        bandit = pyvw.vw(cmd, quiet=True)
        return bandit
예제 #35
0
    def get_vw(self):
        """Factory to create a vw instance on demand

        Returns
        -------
        pyvw.vw instance
        """
        if self.vw_ is None:
            self.vw_ = vw(**self.params)

            # set label type
            self.label_type_ = self.vw_.get_label_type()
        return self.vw_
예제 #36
0
def test_example_features():
    vw_ex = vw(quiet=True)
    ex = vw_ex.example("1 |a two features |b more features here")
    ns = pyvw.namespace_id(ex, 1)
    assert ex.get_feature_id(ns, "a") == 127530
    ex.push_hashed_feature(ns, 1122)
    ex.push_features("x", [("c", 1.0), "d"])
    ex.push_feature(ns, 11000)
    assert ex.num_features_in("x") == 2
    assert ex.sum_feat_sq(ns) == 5.0
    ns2 = pyvw.namespace_id(ex, 2)
    ex.push_namespace(ns2)
    assert ex.pop_namespace()
예제 #37
0
    def get_vw(self):
        """Factory to create a vw instance on demand

        Returns
        -------
        pyvw.vw instance
        """
        if self.vw_ is None:
            self.vw_ = vw(**self.params)

            # set label type
            self.label_type_ = self.vw_.get_label_type()
        return self.vw_
예제 #38
0
def test_example_features():
    vw_ex = vw(quiet=True)
    ex = vw_ex.example('1 |a two features |b more features here')
    ns = pyvw.namespace_id(ex, 1)
    assert ex.get_feature_id(ns, 'a') == 127530
    ex.push_hashed_feature(ns, 1122)
    ex.push_features('x', [('c', 1.), 'd'])
    ex.push_feature(ns, 11000)
    assert ex.num_features_in('x') == 2
    assert ex.sum_feat_sq(ns) == 5.0
    ns2 = pyvw.namespace_id(ex, 2)
    ex.push_namespace(ns2)
    assert ex.pop_namespace()
예제 #39
0
def test_regressor_args():
    # load and parse external data file
    data_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources', 'train.dat')
    model = vw(oaa=3, data=data_file, passes=30, c=True, k=True)
    assert model.predict('| feature1:2.5') == 1

    # update model in memory
    for _ in range(10):
        model.learn('3 | feature1:2.5')
    assert model.predict('| feature1:2.5') == 3

    # save model
    model.save('tmp.model')
    del model

    # load initial regressor and confirm updated prediction
    new_model = vw(i='tmp.model', quiet=True)
    assert new_model.predict('| feature1:2.5') == 3
    del new_model

    # clean up
    os.remove('{}.cache'.format(data_file))
    os.remove('tmp.model')
예제 #40
0
def model_pred(trn_tmp_xy,val_tmp_x,val_tmp_y, tst_x):

    param = ['-b 7 ' +
             '--link logistic ' +
            '--loss_function logistic '  +
            '-l 0.2 ' +
            '--l1 0 ' +
            '--l2 0 ' +
            '--holdout_off ' +
            '--total 32 ' +
            '-f vw.model ' +
            '--readable_model vw.readable.model']
    vw = pyvw.vw(*param)
    best_iter = 400
    for iteration in xrange(best_iter):
        for i in xrange(len(trn_tmp_xy)):
            vw.learn(trn_tmp_xy[i])
    vw.finish()
    vw = pyvw.vw("-i vw.model -t")
    pred_trn_tmp = [vw.predict(sample) for sample in val_tmp_x]
    pred_tst_tmp = [vw.predict(sample) for sample in tst_x]

    return pred_trn_tmp, pred_tst_tmp, best_iter
예제 #41
0
 def _create_model(self, project):
     self.info('creating VW model (algorithm: {})'.format(self.algorithm))
     trainpath = os.path.join(self.datadir, self.TRAIN_FILE)
     params = self._create_params({
         'data': trainpath,
         self.algorithm: len(project.subjects)
     })
     if params.get('passes', 1) > 1:
         # need a cache file when there are multiple passes
         params.update({'cache': True, 'kill_cache': True})
     self.debug("model parameters: {}".format(params))
     self._model = pyvw.vw(**params)
     modelpath = os.path.join(self.datadir, self.MODEL_FILE)
     self._model.save(modelpath)
예제 #42
0
 def initialize(self):
     if self._model is None:
         path = os.path.join(self.datadir, self.MODEL_FILE)
         if not os.path.exists(path):
             raise NotInitializedException(
                 'model {} not found'.format(path),
                 backend_id=self.backend_id)
         self.debug('loading VW model from {}'.format(path))
         params = self._create_params({'i': path, 'quiet': True})
         if 'passes' in params:
             # don't confuse the model with passes
             del params['passes']
         self.debug("model parameters: {}".format(params))
         self._model = pyvw.vw(**params)
         self.debug('loaded model {}'.format(str(self._model)))
예제 #43
0
def mini_vw(inputFile, numPasses, otherArgs):
    vw = pyvw.vw(otherArgs)
    for p in range(numPasses):
        print 'pass', (p+1)
        h = open(inputFile, 'r')
        for l in h.readlines():
            if learnFromStrings:
                vw.learn(l)
            else:
                ex = vw.example(l)
                vw.learn(ex)
                ex.finish()

        h.close()
    vw.finish()
예제 #44
0
def test_namespace_id():
    vw_ex = vw(quiet=True)
    ex = vw_ex.example("1 |a two features |b more features here")
    nm1 = pyvw.namespace_id(ex, 0)
    nm2 = pyvw.namespace_id(ex, 1)
    nm3 = pyvw.namespace_id(ex, 2)
    assert nm1.id == 0
    assert nm1.ord_ns == 97
    assert nm1.ns == "a"
    assert nm2.id == 1
    assert nm2.ord_ns == 98
    assert nm2.ns == "b"
    assert nm3.id == 2
    assert nm3.ord_ns == 128
    assert nm3.ns == "\x80"  # Represents string of ord_ns
예제 #45
0
def test_prob_prediction_type():
    model = vw(loss_function='logistic',
               csoaa_ldf='mc',
               probabilities=True,
               quiet=True)
    multi_ex = [
        model.example('1:0.2 | a b c'),
        model.example('2:0.8  | a b c')
    ]
    model.learn(multi_ex)
    assert model.get_prediction_type() == model.pPROB
    multi_ex = [model.example('1 | a b c'), model.example('2 | a b c')]
    prediction = model.predict(multi_ex)
    assert isinstance(prediction, float)
    del model
예제 #46
0
 def test_basic(self):
     vw = pyvw.vw(quiet=True)
     ex = vw.example('1 | a b c')
     vw.learn(ex)
     self.assertEqual(0.632030725479126, vw.predict(ex))
예제 #47
0
ds = config['DecisionService']
cache_folder = ds['CacheFolder']

for root, subdirs, files in os.walk(os.path.join(cache_folder, 'onlinetrainer')):
    print('looking at folder {0}'.format(root))
    model = None
    trackback = None
    for file in files:
        if file == 'model':
            model = os.path.join(root, file)
            continue
        if file == 'model.trackback':
            trackback = os.path.join(root, file)
            continue
    
    if model is None or trackback is None:
        continue
    
    print('looking at folder {0}'.format(root))

    with open(trackback, 'r') as f:
        first_line = f.readline()
        if (first_line.startswith('modelid:')):
            continue

    vw = pyvw.vw("--quiet -i {0}".format(model))
    id = vw.get_id()
    del vw

    line_prepender(trackback, 'modelid: {0}\n'.format(id))
예제 #48
0
            sortedSpans = []
            for s in spans: sortedSpans.append(s)
            sortedSpans.sort()
            oracle = []
            for id in range(len(sortedSpans)):
                if sortedSpans[id][0] > sortedSpans[0][0]: break
                oracle.append( sortedSpans[id][1] )

            pred = self.sch.predict(examples  = examples,
                                    my_tag    = i+1,
                                    oracle    = oracle,
                                    condition = [ (i, 'p'), (i-1, 'q') ] )

            self.vw.finish_example(examples)

            output.append( spans[pred][2] )
            for j in spans[pred][2]:
                covered[j] = True

        return output


print('training LDF')
vw = pyvw.vw("--search 0 --csoaa_ldf m --search_task hook --ring_size 1024 --quiet -q ef -q ep")
task = vw.init_search_task(WordAligner)
for p in range(10):
    task.learn(my_dataset)
print('====== test ======')
print(task.predict( ("the blue flower".split(), ([],[],[]), "la fleur bleue".split()) ))
print('should have printed [[0], [2], [1]]')
from vowpalwabbit import pyvw

vw = pyvw.vw('--audit')
full = vw.example( { 'a': ['b'], 'x': ['y'] } )
full.learn()

part = vw.example( {'a': ['b'] } )
part.learn()

part.push_features('x', ['y'])
part.learn()

part.erase_namespace(ord('x'))
part.push_features('x', ['z'])
part.learn()
예제 #50
0
def test_cost_sensitive_label():
    model = vw(csoaa=4, quiet=True)
    assert pyvw.cost_sensitive_label(model.example('1 |')).costs[0].label == 1
    del model
예제 #51
0
def test_cbandits_label():
    model = vw(cb=4, quiet=True)
    assert pyvw.cbandits_label(model.example('1 |')).costs[0].label == 1
    del model
예제 #52
0
 def test_del(self):
     vw = pyvw.vw()
     del vw
예제 #53
0
 def test_finish(self):
     vw = pyvw.vw()
     assert not vw.finished
     vw.finish()
     assert vw.finished
예제 #54
0
 def vw(self):
     return pyvw.vw(quiet=True, b=BIT_SIZE)
예제 #55
0
from vowpalwabbit import pyvw


def my_predict(vw, ex):
    pp = 0.
    for f,v in ex.iter_features():
        pp += vw.get_weight(f) * v
    return pp

def ensure_close(a,b,eps=1e-6):
    if abs(a-b) > eps:
        raise Exception("test failed: expected " + str(a) + " and " + str(b) + " to be " + str(eps) + "-close, but they differ by " + str(abs(a-b)))

###############################################################################
vw = pyvw.vw("--quiet")


###############################################################################
vw.learn("1 |x a b")


###############################################################################
print('# do some stuff with a read example:')
ex = vw.example("1 |x a b |y c")
ex.learn() ; ex.learn() ; ex.learn() ; ex.learn()

updated_pred = ex.get_updated_prediction()
print('current partial prediction =', updated_pred)

# compute our own prediction
예제 #56
0
                (NOUN, 'monster'),
                (VERB, 'ate'),
                (DET , 'a'),
                (ADJ , 'big'),
                (NOUN, 'sandwich')],
               [(DET , 'the'),
                (NOUN, 'sandwich'),
                (VERB, 'was'),
                (ADJ , 'tasty')],
               [(NOUN, 'it'),
                (VERB, 'ate'),
                (NOUN, 'it'),
                (ADJ , 'all')] ]


# initialize VW as usual, but use 'hook' as the search_task
vw = pyvw.vw("--search 4 --quiet --search_task hook --ring_size 1024")

# tell VW to construct your search task object
sequenceLabeler = vw.init_search_task(SequenceLabeler)

# train it on the above dataset ten times; the my_dataset.__iter__ feeds into _run above
print('training!', file=sys.stderr)
for i in range(10):
    sequenceLabeler.learn(my_dataset)

# now see the predictions on a test sentence
print('predicting!', file=sys.stderr)
print(sequenceLabeler.predict( [(1,w) for w in "the sandwich ate a monster".split()] ))
print('should have printed: [1, 2, 3, 1, 2]')
    vw_args = {
        'quiet': True,
        'passes': 10,
        'cache': True,
        'f':  "%s-predictor.vw" % topic,
        'k': True,
        'ngram': 2,
        'skips': 2,
        'ftrl': True,
        'decay_learning_rate': 0.99,
        'r': "%s-predictions.txt" % topic,
        # 'progressive_validation': "%s-validations.txt" % topic,
        'loss_function': 'hinge'
    }

    vw = pyvw.vw(**vw_args)

    for tweet in get_vw('%s-classified.train.vw' % topic):
        if (len(tweet) < 3):
            continue

        if tweet[:1] == '0':
            tweet = '-1' + tweet[1:]

        ex = vw.example(tweet)
        vw.learn(ex)
        # print(vw.predict(ex))
        # print(ex)
        print("%s" % (re.sub(r'\n', '', tweet)))
        # out.write(features + "\n")
        # counter += 1
예제 #58
0
def test_delete():
    model = vw(quiet=True, b=BIT_SIZE)
    assert 'model' in locals()
    del model
    assert 'model' not in locals()