Python NumbSkull примеры использования

Язык программирования: Python

Пространство имен/Пакет: numbskull

Класс/Тип: NumbSkull

Примеров на hotexamples.com: 12

Python NumbSkull - 12 примеров найдено. Это лучшие примеры Python кода для numbskull.NumbSkull, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

NumbSkull(10)

loadFactorGraph(10)

learning(7)

inference(3)

factorGraphs(2)

getFactorGraph(1)

Пример #1

Показать файл

def fg_inference(compile_fg):
    print('----------------------------------------------------------------')
    print('Weight inference')
    print('----------------------------------------------------------------')
    weight, variable, factor, ftv, domain_mask, n_edges = compile_fg
    fg = NumbSkull(
        n_inference_epoch=1000,
        n_learning_epoch=1000,
        stepsize=0.01,
        decay=0.95,
        reg_param=1e-6,
        regularization=2,
        truncation=10,
        quiet=(not False),
        verbose=False,
        learn_non_evidence=False,  # need to test
        sample_evidence=False,
        burn_in=10,
        nthreads=1)
    fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)
    fg.inference(out=True)
    for i in range(len(variable)):
        if fg.factorGraphs[0].marginals[i] > 0.5:
            variable[i]['initialValue'] = 1
        else:
            variable[i]['initialValue'] = 0

    weight_value = fg.factorGraphs[0].weight_value[0]
    return weight_value

Пример #2

Показать файл

    def train(self,
              V,
              cardinality,
              L,
              L_offset,
              y=None,
              deps=(),
              init_acc=1.0,
              init_deps=0.0,
              init_class_prior=-1.0,
              epochs=100,
              step_size=None,
              decay=0.99,
              reg_param=0.1,
              reg_type=2,
              verbose=False,
              truncation=10,
              burn_in=50,
              timer=None):

        n_data = V.shape[0]
        step_size = step_size or 1.0 / n_data
        reg_param_scaled = reg_param / n_data
        # self._process_dependency_graph(L, deps)
        weight, variable, factor, ftv, domain_mask, n_edges = self._compile(
            V, cardinality, L, L_offset, y, deps, init_acc,
            init_deps)  # , init_deps, init_class_prior)

        fg = NumbSkull(n_inference_epoch=0,
                       n_learning_epoch=epochs,
                       stepsize=step_size,
                       decay=decay,
                       reg_param=reg_param_scaled,
                       regularization=reg_type,
                       truncation=truncation,
                       quiet=(not verbose),
                       verbose=verbose,
                       learn_non_evidence=True,
                       burn_in=burn_in)
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        if timer is not None:
            timer.start()
        fg.learning(out=False)
        if timer is not None:
            timer.end()

        self.weights = fg.factorGraphs[0].weight_value[0][:len(L)]
        self.dep_weights = fg.factorGraphs[0].weight_value[0][len(L):]
        self.lf_accuracy = 1. / (1. + np.exp(-self.weights[:len(L)]))

Пример #3

Показать файл

    def marginals(self,
                  V,
                  cardinality,
                  L,
                  L_offset,
                  deps=(),
                  init_acc=1.0,
                  init_deps=1.0,
                  init_class_prior=-1.0,
                  epochs=100,
                  step_size=None,
                  decay=0.99,
                  verbose=False,
                  burn_in=50,
                  timer=None):
        if self.weights is None:
            raise ValueError(
                "Must fit model with train() before computing marginal probabilities."
            )

        y = None
        weight, variable, factor, ftv, domain_mask, n_edges = self._compile(
            V, cardinality, L, L_offset, y, deps, self.weights,
            self.dep_weights)

        fg = NumbSkull(n_inference_epoch=epochs,
                       n_learning_epoch=0,
                       stepsize=step_size,
                       decay=decay,
                       quiet=(not verbose),
                       verbose=verbose,
                       learn_non_evidence=True,
                       burn_in=burn_in,
                       sample_evidence=False)
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        fg.inference(out=False)
        marginals = fg.factorGraphs[0].marginals[:V.shape[0]]

        return marginals

Пример #4

Показать файл

def fg_learning(compile_fg):
    print('----------------------------------------------------------------')
    print('Weight learning')
    print('----------------------------------------------------------------')
    weight, variable, factor, ftv, domain_mask, n_edges = compile_fg
    fg = NumbSkull(
        n_inference_epoch=1000,
        n_learning_epoch=1000,
        stepsize=0.01,
        decay=0.95,
        reg_param=1e-6,
        regularization=2,
        truncation=10,
        quiet=(not False),
        verbose=False,
        learn_non_evidence=False,  # need to test
        sample_evidence=False,
        burn_in=10,
        nthreads=1)
    fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)
    fg.learning(out=True)
    weight_value = fg.factorGraphs[0].weight_value[0]
    return weight_value

Пример #5

Показать файл

 def train(self,
           L,
           y=None,
           deps=(),
           init_acc=1.0,
           epochs=100,
           step_size=None,
           decay=0.99,
           reg_param=0.1,
           reg_type=2,
           verbose=False,
           truncation=10,
           burn_in=50,
           timer=None):
     step_size = step_size or 1.0 / L.shape[0]
     reg_param_scaled = reg_param / L.shape[0]
     self._process_dependency_graph(L, deps)
     weight, variable, factor, ftv, domain_mask, n_edges = self._compile(
         L, y, init_acc)
     fg = NumbSkull(n_inference_epoch=0,
                    n_learning_epoch=epochs,
                    stepsize=step_size,
                    decay=decay,
                    reg_param=reg_param_scaled,
                    regularization=reg_type,
                    truncation=truncation,
                    quiet=(not verbose),
                    verbose=verbose,
                    learn_non_evidence=True,
                    burn_in=burn_in)
     fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)
     if timer is not None:
         timer.start()
     fg.learning(out=False)
     if timer is not None:
         timer.end()
     self._process_learned_weights(L, fg)

Пример #6

Показать файл

Файл: gen_learning.py Проект: www3838438/snorkel

    def train(self, L, deps=(), LF_acc_prior_weights=None,
        LF_acc_prior_weight_default=1, labels=None, label_prior_weight=5,
        init_deps=0.0, init_class_prior=-1.0, epochs=30, step_size=None, 
        decay=1.0, reg_param=0.1, reg_type=2, verbose=False, truncation=10, 
        burn_in=5, cardinality=None, timer=None, candidate_ranges=None, threads=1):
        """
        Fits the parameters of the model to a data set. By default, learns a
        conditionally independent model. Additional unary dependencies can be
        set to be included in the constructor. Additional pairwise and
        higher-order dependencies can be included as an argument.

        Results are stored as a member named weights, instance of
        snorkel.learning.gen_learning.GenerativeModelWeights.

        :param L: M x N csr_AnnotationMatrix-type label matrix, where there are 
            M candidates labeled by N labeling functions (LFs)
        :param deps: collection of dependencies to include in the model, each 
                     element is a tuple of the form 
                     (LF 1 index, LF 2 index, dependency type),
                     see snorkel.learning.constants
        :param LF_acc_prior_weights: An N-element list of prior weights for the
            LF accuracies (log scale)
        :param LF_acc_prior_weight_default: Default prior for the weight of each 
            LF accuracy; if LF_acc_prior_weights is unset, each LF will have 
            this accuracy prior weight (log scale)
        :param labels: Optional ground truth labels
        :param label_prior_weight: The prior probability that the ground truth 
            labels (if provided) are correct (log scale)
        :param init_deps: initial weight for additional dependencies, except
                          class prior (log scale)
        :param init_class_prior: initial class prior (in log scale), note only
                                 used if class_prior=True in constructor
        :param epochs: number of training epochs
        :param step_size: gradient step size, default is 1 / L.shape[0]
        :param decay: multiplicative decay of step size,
                      step_size_(t+1) = step_size_(t) * decay
        :param reg_param: regularization strength
        :param reg_type: 1 = L1 regularization, 2 = L2 regularization
        :param verbose: whether to write debugging info to stdout
        :param truncation: number of iterations between truncation step for L1
                           regularization
        :param burn_in: number of burn-in samples to take before beginning
                        learning
        :param cardinality: number of possible classes; by default is inferred
            from the label matrix L
        :param timer: stopwatch for profiling, must implement start() and end()
        :param candidate_ranges: Optionally, a list of M sets of integer values,
            representing the possible categorical values that each of the M
            candidates can take. If a label is outside of this range throws an
            error. If None, then each candidate can take any value from 0 to
            cardinality.
        :param threads: the number of threads to use for sampling. Default is 1.
        """
        m, n = L.shape
        step_size = step_size or 0.0001

        # Check to make sure matrix is int-valued
        element_type = type(L[0,0])
        # Note: Other simpler forms of this check often don't work; still not
        # sure why...
        if not issubclass(element_type, np.integer):
            raise ValueError("""Label matrix must have int-type elements, 
                but elements have type %s""" % element_type)

        # Automatically infer cardinality
        # Binary: Values in {-1, 0, 1} [Default]
        # Categorical: Values in {0, 1, ..., K}
        if cardinality is None:
            # If candidate_ranges is provided, use this to determine cardinality
            if candidate_ranges is not None:
                cardinality = max(map(max, candidate_ranges))
            else:
                # This is just an annoying hack for LIL sparse matrices...
                try:
                    lmax = L.max()
                except AttributeError:
                    lmax = L.tocoo().max()

                if lmax > 2:
                    cardinality = lmax
                elif lmax < 2:
                    cardinality = 2
                else:
                    raise ValueError(
                        "L.max() == %s, cannot infer cardinality." % lmax)
            print("Inferred cardinality: %s" % cardinality)
        self.cardinality = cardinality

        # Priors for LFs default to fixed prior value
        # NOTE: Setting default != 0.5 creates a (fixed) factor which increases
        # runtime (by ~0.5x that of a non-fixed factor)...
        if LF_acc_prior_weights is None:
            LF_acc_prior_weights = [LF_acc_prior_weight_default for _ in range(n)]
        else:
            LF_acc_prior_weights = list(copy(LF_acc_prior_weights))

        # LF weights are un-fixed
        is_fixed = [False for _ in range(n)]

        # If supervised labels are provided, add them as a fixed LF with prior
        # Note: For large L this column stack operation could be very
        # inefficient, can consider refactoring...
        if labels is not None:
            labels = labels.reshape(m, 1)
            L = sparse.hstack([L, labels])
            is_fixed.append(True)
            LF_acc_prior_weights.append(label_prior_weight)
            n += 1

        # Reduce overhead of tracking indices by converting L to a CSR sparse matrix.
        L = sparse.csr_matrix(L).copy()

        # If candidate_ranges is provided, remap the values of L using
        # candidate_ranges. This "scoped categorical" approach allows learning
        # and inference to be efficient even with very large cardinality, as
        # we only sample relevant values for each candidate. Also set
        # per-candidate cardinalities according to candidate_ranges if not None,
        # else as constant value.
        self.cardinalities = self.cardinality * np.ones(m, dtype=np.int64)
        self.candidate_ranges = candidate_ranges
        if self.candidate_ranges is not None:
            L, self.cardinalities, _ = self._remap_scoped_categoricals(L, 
                self.candidate_ranges)

        # Shuffle the data points, cardinalities, and candidate_ranges
        idxs = range(m)
        self.rng.shuffle(idxs)
        L = L[idxs, :]
        if candidate_ranges is not None:
            self.cardinalities = self.cardinalities[idxs]
            c_ranges_reshuffled = []
            for i in idxs:
                c_ranges_reshuffled.append(self.candidate_ranges[i])
            self.candidate_ranges = c_ranges_reshuffled

        # Compile factor graph
        self._process_dependency_graph(L, deps)
        weight, variable, factor, ftv, domain_mask, n_edges = self._compile(
            L, init_deps, init_class_prior, LF_acc_prior_weights, is_fixed, self.cardinalities)
        fg = NumbSkull(
            n_inference_epoch=0,
            n_learning_epoch=epochs, 
            stepsize=step_size,
            decay=decay,
            reg_param=reg_param,
            regularization=reg_type,
            truncation=truncation,
            quiet=(not verbose),
            verbose=verbose, 
            learn_non_evidence=True,
            burn_in=burn_in,
            nthreads=threads
        )
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        if timer is not None:
            timer.start()
        fg.learning(out=False)
        if timer is not None:
            timer.end()
        self._process_learned_weights(L, fg, LF_acc_prior_weights, is_fixed)

        # Store info from factor graph
        if self.candidate_ranges is not None:
            self.cardinality_for_stats = int(max(self.cardinalities))
        else:
            self.cardinality_for_stats = self.cardinality
        self.learned_weights = fg.factorGraphs[0].weight_value
        weight, variable, factor, ftv, domain_mask, n_edges =\
            self._compile(sparse.coo_matrix((1, n), L.dtype), init_deps,
                init_class_prior, LF_acc_prior_weights, is_fixed,
                [self.cardinality_for_stats])

        variable["isEvidence"] = False
        weight["isFixed"] = True
        weight["initialValue"] = fg.factorGraphs[0].weight_value

        fg.factorGraphs = []
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        self.fg = fg
        self.nlf = n
        self.cardinality = cardinality

Пример #7

Показать файл

Файл: synthetic.py Проект: saeideh-sh/snorkel-fork

def generate_label_matrix(weights, m):
    # Compilation

    # Weights
    n_weights = 1 if weights.class_prior != 0.0 else 0

    n_weights += weights.n

    for optional_name in GenerativeModel.optional_names:
        for i in range(weights.n):
            if getattr(weights, optional_name)[i] != 0.0:
                n_weights += 1

    for dep_name in GenerativeModel.dep_names:
        for i in range(weights.n):
            for j in range(weights.n):
                if getattr(weights, dep_name)[i, j] != 0.0:
                    n_weights += 1

    weight = np.zeros(n_weights, Weight)
    for i in range(len(weight)):
        weight[i]['isFixed'] = True

    if weights.class_prior != 0.0:
        weight[0]['initialValue'] = np.float64(weights.class_prior)
        w_off = 1
    else:
        w_off = 0

    for i in range(weights.n):
        weight[w_off + i]['initialValue'] = np.float64(weights.lf_accuracy[i])
    w_off += weights.n

    for optional_name in GenerativeModel.optional_names:
        for i in range(weights.n):
            if getattr(weights, optional_name)[i] != 0.0:
                weight[w_off]['initialValue'] = np.float64(
                    getattr(weights, optional_name)[i])
                w_off += 1

    for dep_name in GenerativeModel.dep_names:
        for i in range(weights.n):
            for j in range(weights.n):
                if getattr(weights, dep_name)[i, j] != 0.0:
                    weight[w_off]['initialValue'] = np.float64(
                        getattr(weights, dep_name)[i, j])
                    w_off += 1

    # Variables
    variable = np.zeros(1 + weights.n, Variable)

    variable[0]['isEvidence'] = 0
    variable[0]['initialValue'] = 0
    variable[0]["dataType"] = 0
    variable[0]["cardinality"] = 2

    for i in range(weights.n):
        variable[1 + i]['isEvidence'] = 0
        variable[1 + i]['initialValue'] = 0
        variable[1 + i]["dataType"] = 0
        variable[1 + i]["cardinality"] = 3

    # Factors and FactorToVar
    n_edges = 1 if weights.class_prior != 0.0 else 0
    n_edges += 2 * weights.n
    for optional_name in GenerativeModel.optional_names:
        for i in range(weights.n):
            if getattr(weights, optional_name)[i] != 0.0:
                if optional_name == 'lf_prior' or optional_name == 'lf_propensity':
                    n_edges += 1
                elif optional_name == 'lf_class_propensity':
                    n_edges += 2
                else:
                    raise ValueError()
    for dep_name in GenerativeModel.dep_names:
        for i in range(weights.n):
            for j in range(weights.n):
                if getattr(weights, dep_name)[i, j] != 0.0:
                    if dep_name == 'dep_similar' or dep_name == 'dep_exclusive':
                        n_edges += 2
                    elif dep_name == 'dep_fixing' or dep_name == 'dep_reinforcing':
                        n_edges += 3
                    else:
                        raise ValueError()

    factor = np.zeros(n_weights, Factor)
    ftv = np.zeros(n_edges, FactorToVar)

    if weights.class_prior != 0.0:
        factor[0]["factorFunction"] = FACTORS["DP_GEN_CLASS_PRIOR"]
        factor[0]["weightId"] = 0
        factor[0]["featureValue"] = 1
        factor[0]["arity"] = 1
        factor[0]["ftv_offset"] = 0

        ftv[0]["vid"] = 0

        f_off = 1
        ftv_off = 1
    else:
        f_off = 0
        ftv_off = 0

    for i in range(weights.n):
        factor[f_off + i]["factorFunction"] = FACTORS["DP_GEN_LF_ACCURACY"]
        factor[f_off + i]["weightId"] = f_off + i
        factor[f_off + i]["featureValue"] = 1
        factor[f_off + i]["arity"] = 2
        factor[f_off + i]["ftv_offset"] = ftv_off + 2 * i

        ftv[ftv_off + 2 * i]["vid"] = 0
        ftv[ftv_off + 2 * i + 1]["vid"] = 1 + i
    f_off += weights.n
    ftv_off += 2 * weights.n

    for i in range(weights.n):
        if weights.lf_prior[i] != 0.0:
            factor[f_off]["factorFunction"] = FACTORS["DP_GEN_LF_PRIOR"]
            factor[f_off]["weightId"] = f_off
            factor[f_off]["featureValue"] = 1
            factor[f_off]["arity"] = 1
            factor[f_off]["ftv_offset"] = ftv_off

            ftv[ftv_off]["vid"] = 1 + i
            f_off += 1
            ftv_off += 1

    for i in range(weights.n):
        if weights.lf_propensity[i] != 0.0:
            factor[f_off]["factorFunction"] = FACTORS["DP_GEN_LF_PROPENSITY"]
            factor[f_off]["weightId"] = f_off
            factor[f_off]["featureValue"] = 1
            factor[f_off]["arity"] = 1
            factor[f_off]["ftv_offset"] = ftv_off

            ftv[ftv_off]["vid"] = 1 + i
            f_off += 1
            ftv_off += 1

    for i in range(weights.n):
        if weights.lf_class_propensity[i] != 0.0:
            factor[f_off]["factorFunction"] = FACTORS[
                "DP_GEN_LF_CLASS_PROPENSITY"]
            factor[f_off]["weightId"] = f_off
            factor[f_off]["featureValue"] = 1
            factor[f_off]["arity"] = 2
            factor[f_off]["ftv_offset"] = ftv_off

            ftv[ftv_off]["vid"] = 0
            ftv[ftv_off + 1]["vid"] = 1 + i

            f_off += 1
            ftv_off += 2

    for dep_name in GenerativeModel.dep_names:
        for i in range(weights.n):
            for j in range(weights.n):
                if getattr(weights, dep_name)[i, j] != 0.0:
                    if dep_name == 'dep_similar' or dep_name == 'dep_exclusive':
                        factor[f_off]["factorFunction"] = FACTORS[
                            "DP_GEN_DEP_SIMILAR"] if dep_name == 'dep_similar' else FACTORS[
                                "DP_GEN_DEP_EXCLUSIVE"]
                        factor[f_off]["weightId"] = f_off
                        factor[f_off]["featureValue"] = 1
                        factor[f_off]["arity"] = 2
                        factor[f_off]["ftv_offset"] = ftv_off

                        ftv[ftv_off]["vid"] = 1 + i
                        ftv[ftv_off + 1]["vid"] = 1 + j

                        f_off += 1
                        ftv_off += 2
                    elif dep_name == 'dep_fixing' or dep_name == 'dep_reinforcing':
                        factor[f_off]["factorFunction"] = FACTORS[
                            "DP_GEN_DEP_FIXING"] if dep_name == 'dep_fixing' else FACTORS[
                                "DP_GEN_DEP_REINFORCING"]

                        factor[f_off]["weightId"] = f_off
                        factor[f_off]["featureValue"] = 1
                        factor[f_off]["arity"] = 3
                        factor[f_off]["ftv_offset"] = ftv_off

                        ftv[ftv_off]["vid"] = 0
                        ftv[ftv_off + 1]["vid"] = 1 + i
                        ftv[ftv_off + 2]["vid"] = 1 + j

                        f_off += 1
                        ftv_off += 3
                    else:
                        raise ValueError()

    # Domain mask
    domain_mask = np.zeros(1 + weights.n, np.bool)

    # Instantiates factor graph
    ns = NumbSkull(n_inference_epoch=100, quiet=True)
    ns.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)
    fg = ns.getFactorGraph()

    y = np.ndarray((m, ), np.int64)
    L = sparse.lil_matrix((m, weights.n), dtype=np.int64)
    for i in range(m):
        fg.burnIn(10, False)
        y[i] = 1 if fg.var_value[0, 0] == 0 else -1
        for j in range(weights.n):
            if fg.var_value[0, 1 + j] != 2:
                L[i, j] = 1 if fg.var_value[0, 1 + j] == 0 else -1

    return y, L.tocsr()

Пример #8

Показать файл

Файл: inference_factorgraph.py Проект: gml-explore/numbskull_extend

from create_factorgraph import create_fg
from numbskull import NumbSkull
weight, variable, factor, fmap, domain_mask, edges = create_fg()

ns_learing = NumbSkull(
    n_inference_epoch=1000,
    n_learning_epoch=1000,
    stepsize=0.01,
    decay=0.95,
    reg_param=1e-6,
    regularization=2,
    truncation=10,
    quiet=(not False),
    verbose=False,
    learn_non_evidence=False,  # need to test
    sample_evidence=False,
    burn_in=10,
    nthreads=1)
subgraph = weight, variable, factor, fmap, domain_mask, edges
ns_learing.loadFactorGraph(*subgraph)
# 因子图参数学习
ns_learing.learning()
# 因子图推理
# 参数学习完成后将weight的isfixed属性置为true
for index, w in enumerate(weight):
    w["isFixed"] = True
    w['initialValue'] = ns_learing.factorGraphs[0].weight[index][
        'initialValue']
ns_inference = NumbSkull(
    n_inference_epoch=1000,
    n_learning_epoch=1000,

Пример #9

Показать файл

Файл: gen_learning.py Проект: dossanbekzhan/snorkel

    def train(self, L, deps=(), LF_acc_prior_weights=None,
        LF_acc_prior_weight_default=1, labels=None, label_prior_weight=5,
        init_deps=0.0, init_class_prior=-1.0, epochs=30, step_size=None, 
        decay=1.0, reg_param=0.1, reg_type=2, verbose=False, truncation=10, 
        burn_in=5, cardinality=None, timer=None, candidate_ranges=None, threads=1):
        """
        Fits the parameters of the model to a data set. By default, learns a
        conditionally independent model. Additional unary dependencies can be
        set to be included in the constructor. Additional pairwise and
        higher-order dependencies can be included as an argument.

        Results are stored as a member named weights, instance of
        snorkel.learning.gen_learning.GenerativeModelWeights.

        :param L: M x N csr_AnnotationMatrix-type label matrix, where there are 
            M candidates labeled by N labeling functions (LFs)
        :param deps: collection of dependencies to include in the model, each 
                     element is a tuple of the form 
                     (LF 1 index, LF 2 index, dependency type),
                     see snorkel.learning.constants
        :param LF_acc_prior_weights: An N-element list of prior weights for the
            LF accuracies (log scale)
        :param LF_acc_prior_weight_default: Default prior for the weight of each 
            LF accuracy; if LF_acc_prior_weights is unset, each LF will have 
            this accuracy prior weight (log scale)
        :param labels: Optional ground truth labels
        :param label_prior_weight: The prior probability that the ground truth 
            labels (if provided) are correct (log scale)
        :param init_deps: initial weight for additional dependencies, except
                          class prior (log scale)
        :param init_class_prior: initial class prior (in log scale), note only
                                 used if class_prior=True in constructor
        :param epochs: number of training epochs
        :param step_size: gradient step size, default is 1 / L.shape[0]
        :param decay: multiplicative decay of step size,
                      step_size_(t+1) = step_size_(t) * decay
        :param reg_param: regularization strength
        :param reg_type: 1 = L1 regularization, 2 = L2 regularization
        :param verbose: whether to write debugging info to stdout
        :param truncation: number of iterations between truncation step for L1
                           regularization
        :param burn_in: number of burn-in samples to take before beginning
                        learning
        :param cardinality: number of possible classes; by default is inferred
            from the label matrix L
        :param timer: stopwatch for profiling, must implement start() and end()
        :param candidate_ranges: Optionally, a list of M sets of integer values,
            representing the possible categorical values that each of the M
            candidates can take. If a label is outside of this range throws an
            error. If None, then each candidate can take any value from 0 to
            cardinality.
        :param threads: the number of threads to use for sampling. Default is 1.
        """
        m, n = L.shape
        step_size = step_size or 0.0001

        # Check to make sure matrix is int-valued
        element_type = type(L[0,0])
        # Note: Other simpler forms of this check often don't work; still not
        # sure why...
        if not issubclass(element_type, np.integer):
            raise ValueError("""Label matrix must have int-type elements, 
                but elements have type %s""" % element_type)

        # Automatically infer cardinality
        # Binary: Values in {-1, 0, 1} [Default]
        # Categorical: Values in {0, 1, ..., K}
        if cardinality is None:
            # If candidate_ranges is provided, use this to determine cardinality
            if candidate_ranges is not None:
                cardinality = max(map(max, candidate_ranges))
            else:
                # This is just an annoying hack for LIL sparse matrices...
                try:
                    lmax = L.max()
                except AttributeError:
                    lmax = L.tocoo().max()

                if lmax > 2:
                    cardinality = lmax
                elif lmax < 2:
                    cardinality = 2
                else:
                    raise ValueError(
                        "L.max() == %s, cannot infer cardinality." % lmax)
            print("Inferred cardinality: %s" % cardinality)
        self.cardinality = cardinality

        # Priors for LFs default to fixed prior value
        # NOTE: Setting default != 0.5 creates a (fixed) factor which increases
        # runtime (by ~0.5x that of a non-fixed factor)...
        if LF_acc_prior_weights is None:
            LF_acc_prior_weights = [LF_acc_prior_weight_default for _ in range(n)]
        else:
            LF_acc_prior_weights = list(copy(LF_acc_prior_weights))

        # LF weights are un-fixed
        is_fixed = [False for _ in range(n)]

        # If supervised labels are provided, add them as a fixed LF with prior
        # Note: For large L this column stack operation could be very
        # inefficient, can consider refactoring...
        if labels is not None:
            labels = labels.reshape(m, 1)
            L = sparse.hstack([L, labels])
            is_fixed.append(True)
            LF_acc_prior_weights.append(label_prior_weight)
            n += 1

        # Reduce overhead of tracking indices by converting L to a CSR sparse matrix.
        L = sparse.csr_matrix(L).copy()

        # If candidate_ranges is provided, remap the values of L using
        # candidate_ranges. This "scoped categorical" approach allows learning
        # and inference to be efficient even with very large cardinality, as
        # we only sample relevant values for each candidate. Also set
        # per-candidate cardinalities according to candidate_ranges if not None,
        # else as constant value.
        self.cardinalities = self.cardinality * np.ones(m, dtype=np.int64)
        self.candidate_ranges = candidate_ranges
        if self.candidate_ranges is not None:
            L, self.cardinalities, _ = self._remap_scoped_categoricals(L, 
                self.candidate_ranges)

        # Shuffle the data points, cardinalities, and candidate_ranges
        idxs = self.rng.permutation(list(range(m)))
        L = L[idxs, :]
        if candidate_ranges is not None:
            self.cardinalities = self.cardinalities[idxs]
            c_ranges_reshuffled = []
            for i in idxs:
                c_ranges_reshuffled.append(self.candidate_ranges[i])
            self.candidate_ranges = c_ranges_reshuffled

        # Compile factor graph
        self._process_dependency_graph(L, deps)
        weight, variable, factor, ftv, domain_mask, n_edges = self._compile(
            L, init_deps, init_class_prior, LF_acc_prior_weights, is_fixed, self.cardinalities)
        fg = NumbSkull(
            n_inference_epoch=0,
            n_learning_epoch=epochs, 
            stepsize=step_size,
            decay=decay,
            reg_param=reg_param,
            regularization=reg_type,
            truncation=truncation,
            quiet=(not verbose),
            verbose=verbose, 
            learn_non_evidence=True,
            burn_in=burn_in,
            nthreads=threads
        )
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        if timer is not None:
            timer.start()
        fg.learning(out=False)
        if timer is not None:
            timer.end()
        self._process_learned_weights(L, fg, LF_acc_prior_weights, is_fixed)

        # Store info from factor graph
        if self.candidate_ranges is not None:
            self.cardinality_for_stats = int(max(self.cardinalities))
        else:
            self.cardinality_for_stats = self.cardinality
        self.learned_weights = fg.factorGraphs[0].weight_value
        weight, variable, factor, ftv, domain_mask, n_edges =\
            self._compile(sparse.coo_matrix((1, n), L.dtype), init_deps,
                init_class_prior, LF_acc_prior_weights, is_fixed,
                [self.cardinality_for_stats])

        variable["isEvidence"] = False
        weight["isFixed"] = True
        weight["initialValue"] = fg.factorGraphs[0].weight_value

        fg.factorGraphs = []
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        self.fg = fg
        self.nlf = n
        self.cardinality = cardinality

Пример #10

Показать файл

Файл: gen_learning.py Проект: HazyResearch/snorkel

    def train(self, L, deps=(), LF_acc_priors=None, LF_acc_features=None, LF_acc_prior_default=0.7, 
        labels=None, label_prior=0.99, init_deps=0.0,
        init_class_prior=-1.0, epochs=30, step_size=None, decay=1.0,
        reg_param=0.1, reg_type=2, verbose=False, truncation=10, burn_in=5,
        cardinality=None, timer=None):
        """
        Fits the parameters of the model to a data set. By default, learns a
        conditionally independent model with featurized accuracies. Additional unary dependencies can be
        set to be included in the constructor. Additional pairwise and
        higher-order dependencies can be included as an argument.

        Results are stored as a member named weights, instance of
        snorkel.learning.gen_learning.GenerativeModelWeights.

        :param L: M x N csr_AnnotationMatrix-type label matrix, where there are 
            M candidates labeled by N labeling functions (LFs)
        :param deps: collection of dependencies to include in the model, each 
                     element is a tuple of the form 
                     (LF 1 index, LF 2 index, dependency type),
                     see snorkel.learning.constants
        :param LF_acc_priors: An N-element list of prior probabilities for the
            LF accuracies
        :param LF_acc_features: An N-element list of features that determine
            the accuracy of its labeling function; its labeling function has a single
            feature; feature weights are coupled
        :param LF_acc_prior_default: Default prior probability for each LF 
            accuracy; if LF_acc_priors is unset, each LF will have this prior
        :param labels: Optional ground truth labels
        :param label_prior: The prior probability that the ground truth labels
            (if provided) are correct
        :param init_deps: initial weight for additional dependencies, except
                          class prior (in log scale)
        :param init_class_prior: initial class prior (in log scale), note only
                                 used if class_prior=True in constructor
        :param epochs: number of training epochs
        :param step_size: gradient step size, default is 1 / L.shape[0]
        :param decay: multiplicative decay of step size,
                      step_size_(t+1) = step_size_(t) * decay
        :param reg_param: regularization strength
        :param reg_type: 1 = L1 regularization, 2 = L2 regularization
        :param verbose: whether to write debugging info to stdout
        :param truncation: number of iterations between truncation step for L1
                           regularization
        :param burn_in: number of burn-in samples to take before beginning
                        learning
        :param cardinality: number of possible classes; by default is inferred
            from the label matrix L
        :param timer: stopwatch for profiling, must implement start() and end()
        """
        m, n = L.shape
        step_size = step_size or 0.0001
        reg_param_scaled = reg_param / L.shape[0]

        # Automatically infer cardinality
        # Binary: Values in {-1, 0, 1} [Default]
        # Categorical: Values in {0, 1, ..., K}
        if cardinality is None:
            # This is just an annoying hack for LIL sparse matrices...
            try:
                lmax = L.max()
            except AttributeError:
                lmax = L.tocoo().max()
            if lmax > 2:
                cardinality = lmax
            elif lmax < 2:
                cardinality = 2
            else:
                raise ValueError(
                    "L.max() == %s, cannot infer cardinality." % lmax)
            print("Inferred cardinality: %s" % cardinality)

        # Priors for LFs default to fixed prior value
        # NOTE: Setting default != 0.5 creates a (fixed) factor which increases
        # runtime (by ~0.5x that of a non-fixed factor)...
        if LF_acc_priors is None:
            LF_acc_priors = [LF_acc_prior_default for _ in range(n)]
        else:
            LF_acc_priors = list(copy(LF_acc_priors))
            
        if LF_acc_features is None:
            LF_acc_features = [str(i) for i in range(n)]
        else:
            LF_acc_features = list(copy(LF_acc_features))                

        # LF weights are un-fixed
        is_fixed = [False for _ in range(n)]

        # If supervised labels are provided, add them as a fixed LF with prior
        # Note: For large L this column stack operation could be very
        # inefficient, can consider refactoring...
        if labels is not None:
            labels = labels.reshape(m, 1)
            L = sparse.hstack([L.copy(), labels])
            is_fixed.append(True)
            LF_acc_priors.append(label_prior)
            n += 1

        # Shuffle the data points
        idxs = range(m)
        np.random.shuffle(idxs)
        if not isinstance(L, sparse.csr_matrix):
            L = sparse.csr_matrix(L)
        L = L[idxs, :]

        # Compile factor graph
        self._process_dependency_graph(L, deps)
        weight, variable, factor, ftv, domain_mask, n_edges, feature2WoffMap =\
            self._compile(L, init_deps, init_class_prior, LF_acc_priors, LF_acc_features,
                is_fixed, cardinality)
        fg = NumbSkull(
            n_inference_epoch=0,
            n_learning_epoch=epochs, 
            stepsize=step_size,
            decay=decay,
            reg_param=reg_param_scaled,
            regularization=reg_type,
            truncation=truncation,
            quiet=(not verbose),
            verbose=verbose, 
            learn_non_evidence=True,
            burn_in=burn_in
        )
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        if timer is not None:
            timer.start()
        fg.learning(out=False)
        if timer is not None:
            timer.end()
        self._process_learned_weights(L, fg, LF_acc_priors, LF_acc_features, feature2WoffMap, is_fixed)        

        # Store info from factor graph
        weight, variable, factor, ftv, domain_mask, n_edges, feature2WoffMap =\
            self._compile(sparse.coo_matrix((1, n), L.dtype), init_deps,
                init_class_prior, LF_acc_priors, LF_acc_features, is_fixed, cardinality)

        variable["isEvidence"] = False
        weight["isFixed"] = True
        weight["initialValue"] = fg.factorGraphs[0].weight_value

        fg.factorGraphs = []
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        self.fg = fg
        self.nlf = n
        self.cardinality = cardinality

Пример #11

Показать файл

Файл: gen_learning.py Проект: niru23/snorkel-1

    def train(self,
              L,
              deps=(),
              LF_acc_priors=None,
              LF_acc_features=None,
              LF_acc_prior_default=0.7,
              labels=None,
              label_prior=0.99,
              init_deps=0.0,
              init_class_prior=-1.0,
              epochs=30,
              step_size=None,
              decay=1.0,
              reg_param=0.1,
              reg_type=2,
              verbose=False,
              truncation=10,
              burn_in=5,
              cardinality=None,
              timer=None):
        """
        Fits the parameters of the model to a data set. By default, learns a
        conditionally independent model with featurized accuracies. Additional unary dependencies can be
        set to be included in the constructor. Additional pairwise and
        higher-order dependencies can be included as an argument.

        Results are stored as a member named weights, instance of
        snorkel.learning.gen_learning.GenerativeModelWeights.

        :param L: M x N csr_AnnotationMatrix-type label matrix, where there are 
            M candidates labeled by N labeling functions (LFs)
        :param deps: collection of dependencies to include in the model, each 
                     element is a tuple of the form 
                     (LF 1 index, LF 2 index, dependency type),
                     see snorkel.learning.constants
        :param LF_acc_priors: An N-element list of prior probabilities for the
            LF accuracies
        :param LF_acc_features: An N-element list of features that determine
            the accuracy of its labeling function; its labeling function has a single
            feature; feature weights are coupled
        :param LF_acc_prior_default: Default prior probability for each LF 
            accuracy; if LF_acc_priors is unset, each LF will have this prior
        :param labels: Optional ground truth labels
        :param label_prior: The prior probability that the ground truth labels
            (if provided) are correct
        :param init_deps: initial weight for additional dependencies, except
                          class prior (in log scale)
        :param init_class_prior: initial class prior (in log scale), note only
                                 used if class_prior=True in constructor
        :param epochs: number of training epochs
        :param step_size: gradient step size, default is 1 / L.shape[0]
        :param decay: multiplicative decay of step size,
                      step_size_(t+1) = step_size_(t) * decay
        :param reg_param: regularization strength
        :param reg_type: 1 = L1 regularization, 2 = L2 regularization
        :param verbose: whether to write debugging info to stdout
        :param truncation: number of iterations between truncation step for L1
                           regularization
        :param burn_in: number of burn-in samples to take before beginning
                        learning
        :param cardinality: number of possible classes; by default is inferred
            from the label matrix L
        :param timer: stopwatch for profiling, must implement start() and end()
        """
        m, n = L.shape
        step_size = step_size or 0.0001
        reg_param_scaled = reg_param / L.shape[0]

        # Automatically infer cardinality
        # Binary: Values in {-1, 0, 1} [Default]
        # Categorical: Values in {0, 1, ..., K}
        if cardinality is None:
            # This is just an annoying hack for LIL sparse matrices...
            try:
                lmax = L.max()
            except AttributeError:
                lmax = L.tocoo().max()
            if lmax > 2:
                cardinality = lmax
            elif lmax < 2:
                cardinality = 2
            else:
                raise ValueError("L.max() == %s, cannot infer cardinality." %
                                 lmax)
            print("Inferred cardinality: %s" % cardinality)

        # Priors for LFs default to fixed prior value
        # NOTE: Setting default != 0.5 creates a (fixed) factor which increases
        # runtime (by ~0.5x that of a non-fixed factor)...
        if LF_acc_priors is None:
            LF_acc_priors = [LF_acc_prior_default for _ in range(n)]
        else:
            LF_acc_priors = list(copy(LF_acc_priors))

        if LF_acc_features is None:
            LF_acc_features = [str(i) for i in range(n)]
        else:
            LF_acc_features = list(copy(LF_acc_features))

        # LF weights are un-fixed
        is_fixed = [False for _ in range(n)]

        # If supervised labels are provided, add them as a fixed LF with prior
        # Note: For large L this column stack operation could be very
        # inefficient, can consider refactoring...
        if labels is not None:
            labels = labels.reshape(m, 1)
            L = sparse.hstack([L.copy(), labels])
            is_fixed.append(True)
            LF_acc_priors.append(label_prior)
            n += 1

        # Shuffle the data points
        idxs = range(m)
        np.random.shuffle(idxs)
        if not isinstance(L, sparse.csr_matrix):
            L = sparse.csr_matrix(L)
        L = L[idxs, :]

        # Compile factor graph
        self._process_dependency_graph(L, deps)
        weight, variable, factor, ftv, domain_mask, n_edges, feature2WoffMap =\
            self._compile(L, init_deps, init_class_prior, LF_acc_priors, LF_acc_features,
                is_fixed, cardinality)
        fg = NumbSkull(n_inference_epoch=0,
                       n_learning_epoch=epochs,
                       stepsize=step_size,
                       decay=decay,
                       reg_param=reg_param_scaled,
                       regularization=reg_type,
                       truncation=truncation,
                       quiet=(not verbose),
                       verbose=verbose,
                       learn_non_evidence=True,
                       burn_in=burn_in)
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        if timer is not None:
            timer.start()
        fg.learning(out=False)
        if timer is not None:
            timer.end()
        self._process_learned_weights(L, fg, LF_acc_priors, LF_acc_features,
                                      feature2WoffMap, is_fixed)

        # Store info from factor graph
        weight, variable, factor, ftv, domain_mask, n_edges, feature2WoffMap =\
            self._compile(sparse.coo_matrix((1, n), L.dtype), init_deps,
                init_class_prior, LF_acc_priors, LF_acc_features, is_fixed, cardinality)

        variable["isEvidence"] = False
        weight["isFixed"] = True
        weight["initialValue"] = fg.factorGraphs[0].weight_value

        fg.factorGraphs = []
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        self.fg = fg
        self.nlf = n
        self.cardinality = cardinality

Пример #12

Показать файл

Файл: gen_learning.py Проект: vishnur/snorkel

    def train(self,
              L,
              y=None,
              deps=(),
              init_acc=1.0,
              init_deps=1.0,
              init_class_prior=-1.0,
              epochs=10,
              step_size=None,
              decay=0.99,
              reg_param=0.1,
              reg_type=2,
              verbose=False,
              truncation=10,
              burn_in=5,
              timer=None):
        """
        Fits the parameters of the model to a data set. By default, learns a conditionally independent model.
        Additional unary dependencies can be set to be included in the constructor. Additional pairwise and higher-order
        dependencies can be included as an argument.

        Results are stored as a member named weights, instance of snorkel.learning.gen_learning.GenerativeModelWeights.

        :param L: labeling function output matrix
        :param y: optional ground truth labels
        :param deps: collection of dependencies to include in the model, each element is a tuple of the form
                     (LF 1 index, LF 2 index, dependency type), see snorkel.learning.constants
        :param init_acc: initial weight for accuracy dependencies (in log scale)
        :param init_deps: initial weight for additional dependencies, except class prior (in log scale)
        :param init_class_prior: initial class prior (in log scale), note only used if class_prior=True in constructor
        :param epochs: number of training epochs
        :param step_size: gradient step size, default is 1 / L.shape[0]
        :param decay: multiplicative decay of step size, step_size_(t+1) = step_size_(t) * decay
        :param reg_param: regularization strength
        :param reg_type: 1 = L1 regularization, 2 = L2 regularization
        :param verbose: whether to write debugging info to stdout
        :param truncation: number of iterations between truncation step for L1 regularization
        :param burn_in: number of burn-in samples to take before beginning learning
        :param timer: stopwatch for profiling, must implement start() and end()
        """

        step_size = step_size or 1.0 / L.shape[0]
        reg_param_scaled = reg_param / L.shape[0]
        self._process_dependency_graph(L, deps)
        weight, variable, factor, ftv, domain_mask, n_edges = self._compile(
            L, y, init_acc, init_deps, init_class_prior)
        fg = NumbSkull(n_inference_epoch=0,
                       n_learning_epoch=epochs,
                       stepsize=step_size,
                       decay=decay,
                       reg_param=reg_param_scaled,
                       regularization=reg_type,
                       truncation=truncation,
                       quiet=(not verbose),
                       verbose=verbose,
                       learn_non_evidence=True,
                       burn_in=burn_in)
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)
        if timer is not None:
            timer.start()
        fg.learning(out=False)
        if timer is not None:
            timer.end()
        self._process_learned_weights(L, fg)