def Bootstrap_cv(estimator1, estimator2, X, y, score_func, cv=None, n_jobs=1,
                 verbose=0, ratio=.5):
    X, y = cross_validation.check_arrays(X, y, sparse_format='csr')
    cv = cross_validation.check_cv(cv, X, y,
                                   classifier=
                                   cross_validation.is_classifier(estimator1))
    if score_func is None:
        if not hasattr(estimator1, 'score') or \
                not hasattr(estimator2, 'score'):
            raise TypeError(
                "If no score_func is specified, the estimator passed "
                "should have a 'score' method. The estimator %s "
                "does not." % estimator1)
    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    scores = \
        cross_validation.Parallel(
            n_jobs=n_jobs, verbose=verbose)(
                cross_validation.delayed(
                    dual_cross_val_score)
                (cross_validation.clone(estimator1),
                 cross_validation.clone(estimator2),
                 X, y, score_func, train, test, verbose, ratio)
                for train, test in cv)
    return np.array(scores)
示例#2
0
def cross_val_score(estimator,
                    X,
                    y=None,
                    score_func=None,
                    cv=None,
                    n_jobs=-1,
                    verbose=0,
                    as_dvalues=False):
    """Evaluate a score by cross-validation.

  Replacement of :func:`sklearn.cross_validation.cross_val_score`, used to
  support computation of decision values.

  """
    X, y = check_arrays(X, y, sparse_format='csr')
    cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
    if score_func is None:
        if not hasattr(estimator, 'score'):
            raise TypeError(
                "If no score_func is specified, the estimator passed "
                "should have a 'score' method. The estimator %s "
                "does not." % estimator)
    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
        delayed(_cross_val_score)(clone(estimator), X, y, score_func, train,
                                  test, verbose, as_dvalues)
        for train, test in cv)
    return np.array(scores)
示例#3
0
def dynamic_cross_val_predict(estimator, fv, esa_feature_list, unigram_feature_list, dynamic_X, y=None, cv=None,
                              verbose=0, fit_params=None):


    print "dynamic predict cross val mit %s" % esa_feature_list + unigram_feature_list


    vec = DictVectorizer()
    tfidf = TfidfTransformer()

    X = vec.fit_transform(fv).toarray()
    # X = tfidf.fit_transform(X).toarray()

    X, y = cross_validation.indexable(X, y)
    cv = cross_validation.check_cv(cv, X, y, classifier=cross_validation.is_classifier(estimator))

    preds_blocks = []

    cross_val_step = 0
    for train, test in cv:

        fv_copy = copy.deepcopy(fv)

        #baue X in jedem Schritt neu
        for i in range(0,len(fv)): #jedes i steht für einen featuredict
            feature_dict = fv_copy[i]
            dynamic_vec = dynamic_X[cross_val_step] #zeigt auf esa_vec
            for feature in esa_feature_list:
                feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten
            for feature in unigram_feature_list:
                feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten


        X = vec.fit_transform(fv_copy).toarray()
        # X = tfidf.fit_transform(X).toarray()

        preds_blocks.append(cross_validation._fit_and_predict(cross_validation.clone(estimator), X, y,
                                                      train, test, verbose,
                                                      fit_params))

        cross_val_step+=1

    preds = [p for p, _ in preds_blocks]
    locs = np.concatenate([loc for _, loc in preds_blocks])
    if not cross_validation._check_is_partition(locs, cross_validation._num_samples(X)):
        raise ValueError('cross_val_predict only works for partitions')
    inv_locs = np.empty(len(locs), dtype=int)
    inv_locs[locs] = np.arange(len(locs))

    # Check for sparse predictions
    if sp.issparse(preds[0]):
        preds = sp.vstack(preds, format=preds[0].format)
    else:
        preds = np.concatenate(preds)
    return preds[inv_locs]
示例#4
0
def dynamic_cross_val_score(estimator, fv, esa_feature_list, unigram_feature_list, dynamic_X, y=None, scoring=None, cv=None,
                verbose=0, fit_params=None):

    print "dynamic cross val mit %s" % esa_feature_list + unigram_feature_list
    vec = DictVectorizer()
    tfidf = TfidfTransformer()

    X = vec.fit_transform(fv).toarray()
    # X= tfidf.fit_transform(X).toarray()

    X, y = cross_validation.indexable(X, y)

    cv = cross_validation.check_cv(cv, X, y, classifier=cross_validation.is_classifier(estimator))
    scorer = cross_validation.check_scoring(estimator, scoring=scoring)
    scores = []

    cross_val_step = 0
    for train, test in cv:

        fv_copy = copy.deepcopy(fv)

        #baue X in jedem Schritt neu
        for i in range(0,len(fv)): #jedes i steht für einen featuredict
            feature_dict = fv_copy[i]
            dynamic_vec = dynamic_X[cross_val_step] #zeigt auf esa_vec
            for feature in esa_feature_list:
                feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten
            for feature in unigram_feature_list:
                feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten



        X = vec.fit_transform(fv_copy).toarray()
        # X = tfidf.fit_transform(X).toarray()

        scores.append(cross_validation._fit_and_score(cross_validation.clone(estimator), X, y, scorer,
                        train, test, verbose, None, fit_params))

        cross_val_step += 1


    return np.array(scores)[:, 0]
示例#5
0
def cross_val_score(estimator, X, y=None, score_func=None, cv=None, n_jobs=-1,
    verbose=0, as_dvalues=False):
  """Evaluate a score by cross-validation.

  Replacement of :func:`sklearn.cross_validation.cross_val_score`, used to
  support computation of decision values.

  """
  X, y = check_arrays(X, y, sparse_format='csr')
  cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
  if score_func is None:
      if not hasattr(estimator, 'score'):
          raise TypeError(
              "If no score_func is specified, the estimator passed "
              "should have a 'score' method. The estimator %s "
              "does not." % estimator)
  # We clone the estimator to make sure that all the folds are
  # independent, and that it is pickle-able.
  scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
      delayed(_cross_val_score)(clone(estimator), X, y, score_func, train, test,
          verbose, as_dvalues)
      for train, test in cv)
  return np.array(scores)
示例#6
0
def Scaled(algorithm):
    """Create a pipelined algorithm that performs feature scaling."""
    return Pipeline([('scaler', StandardScaler()),
                     ('learner', clone(algorithm))])
示例#7
0
def Scaled(algorithm):
  """Create a pipelined algorithm that performs feature scaling."""
  return Pipeline([('scaler', StandardScaler()), ('learner', clone(algorithm))])
示例#8
0
ratio = .2
estimators = 20
train_size = .7
#output = ratio*forest.predict(test_data) + (1-ratio)*logit.predict(test_data)
#output = extra_forest.predict(test_data)


#Get bootstrapped data
bs = cross_validation.Bootstrap(train_data.shape[0], n_bootstraps=estimators, train_size=train_size, random_state=0)
cv = cross_validation.check_cv(bs, train_data[0::,1::], train_data[0::,0], classifier=cross_validation.is_classifier(extra_forest))
for train, test in cv:
  #Create training data
  X = train_data[0::,1::]
  y = train_data[0::,0]
  #Create estimator
  ef = cross_validation.clone(extra_forest)
  lgi = cross_validation.clone(logit)
  est = Pipeline([('ef', ef), ('logit', lgi)])
  est.fit(X[train], y[train])
  #print est.feature_importances_
  score.append(est.score(X[test], y[test]))

#Format output
score = np.array(score)

output = est.predict(test_data)

#Score
print score
print score.mean()
print "EF+Logit Accuracy: %0.2f (+/- %0.2f)" % (score.mean(), score.std() / 2)