Exemplo n.º 1
0
def interactor(df):
    """ This function takes in a data frame and creates binary interaction
    terms from all numerical and categorical variables as well as the assessment
    questions, and outputs a data frame """

    my_data_complete = df.dropna()
    # interactions can only be done for non-missings
    colnames = list(my_data_complete.columns.values)
    # id and date columns
    id_cols_list = [
        x
        for x in colnames  # only for continuous vars
        if not (bool(re.search("_N$", x)) | bool(re.search("_C$", x)) | bool(re.search("_Q$", x)))
    ]
    # actual feature columns - to make interactions from
    new_cols_list = [
        x
        for x in colnames  # only for continuous vars
        if (bool(re.search("_N$", x)) | bool(re.search("_C$", x)) | bool(re.search("_Q$", x)))
    ]
    othervars = my_data_complete[id_cols_list]
    little_df = my_data_complete[new_cols_list]
    # computing all binary interaction terms
    poly = PolynomialFeatures(degree=2, interaction_only=True)
    theints = pd.DataFrame(poly.fit_transform(little_df))
    theints = theints.drop(theints.columns[0], axis=1)  # dropping the first column
    theints.columns = list(new_cols_list + list(itertools.combinations(new_cols_list, 2)))
    # concatenating the interaction terms to the original data frame
    df = pd.DataFrame(othervars.join(theints))
    new_features = theints.columns.values
    return df, new_features
Exemplo n.º 2
0
def learning_curve(classifier, X, y, cv, sample_sizes,
    degree=1, pickle_path=None, verbose=True):
    """ Learning curve
    """

    learning_curves = []
    for i, (train_index, test_index) in enumerate(cv):
        X_train = X[train_index]
        X_test = X[test_index]
        y_train = y[train_index]
        y_test = y[test_index]

        if degree > 1:
            poly = PolynomialFeatures(degree=degree, interaction_only=False, include_bias=True)
            X_train = poly.fit_transform(X_train)
            X_test = poly.transform(X_test)

        lc = []
        for sample in sample_sizes:
            classifier.fit(X_train[:sample], y_train[:sample])

            # apply classifier on test set
            y_pred = classifier.predict(X_test)
            confusion = metrics.confusion_matrix(y_test, y_pred)
            lc.append(balanced_accuracy_expected(confusion))

        learning_curves.append(lc)
        if verbose: print(i, end=' ')
    
    # pickle learning curve
    if pickle_path:
        with open(pickle_path, 'wb') as f:
            pickle.dump(learning_curves, f, protocol=4)
    if verbose: print()
Exemplo n.º 3
0
def load_data(datafile = DEFAULT_DATA_FILE):
    """
    Loads feature and classification data from the given file.
    Returns a tuple of (features, labels) where both are
    features is an NP array and labels is a list.
    """
    # Read data
    dataframe = pd.read_csv(datafile)
    views = [ float(x) for x in dataframe["views"].tolist()]
    favs = [ float(x) for x in dataframe["favorites"].tolist()]
    ratio = []
    for i in range(len(views)):
        ratio.append(favs[i] / views[i])
    ratio = cap_results(ratio)

    # Uncomment this and fix the return line if you want to do 
    # favorites prediction instead
    # favs = dataframe["favorites"].tolist()

    # 5 is the first column after 'favs'.
    # -1 means last column from the end, because I added an extra
    # column for the artist name, which we want to ignore.
    dataframe = dataframe.drop("artist", axis=1)
    dataframe = dataframe.iloc[:, 5:]
    features = dataframe.values

    # # interaction terms
    poly = PolynomialFeatures(interaction_only=True)
    features = poly.fit_transform(features)
    print "interaction"

    return features, ratio, dataframe.columns.values
Exemplo n.º 4
0
def main():
    weekfile = sys.argv[1]
    modelfile = sys.argv[2]
    polyorder = int(sys.argv[3])
    metadata = sys.argv[4]

    week_data = np.genfromtxt(weekfile, delimiter=',', skip_header=1)
    
    poly = PolynomialFeatures(degree=polyorder)
    Xpoly = poly.fit_transform(week_data)

    with open(modelfile, 'rb') as model, open(metadata) as md:
        lr = pickle.load(model)
        preds = lr.predict(Xpoly).astype(int)
        probs = lr.predict_proba(Xpoly)
        results = []

        for i, line in enumerate(md):
            home, away = line.strip().split(',')
            if preds[i] == 1:
                results.append((home+'*', "{0:.3f}".format(probs[i,1]), away, "{0:.3f}".format(probs[i,0])))
            else: 
                results.append((away, "{0:.3f}".format(probs[i,0]), home+'*', "{0:.3f}".format(probs[i,1])))

        results = sorted(results, key=getWinnerProb, reverse=True)
        for result in results:
            print('\t'.join(result))
Exemplo n.º 5
0
    def predict(self, x):
        ## as it is trained on polynominal features, we need to transform x
        poly = PolynomialFeatures(degree=self.degree)
        polynominal_features = poly.fit_transform(x)[0]

        print polynominal_features.reshape
        return self.model.predict(polynominal_features)
Exemplo n.º 6
0
def lassoRegression(X,y):

    print("\n### ~~~~~~~~~~~~~~~~~~~~ ###")
    print("Lasso Regression")

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    myDegree = 40
    polynomialFeatures = PolynomialFeatures(degree=myDegree, include_bias=False)
    Xp = polynomialFeatures.fit_transform(X)

    myScaler = StandardScaler()
    scaled_Xp = myScaler.fit_transform(Xp)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    lassoRegression = Lasso(alpha=1e-7)
    lassoRegression.fit(scaled_Xp,y)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    dummyX = np.arange(0,2,0.01)
    dummyX = dummyX.reshape((dummyX.shape[0],1))
    dummyXp = polynomialFeatures.fit_transform(dummyX)
    scaled_dummyXp = myScaler.transform(dummyXp)
    dummyY = lassoRegression.predict(scaled_dummyXp)

    outputFILE = 'plot-lassoRegression.png'
    fig, ax = plt.subplots()
    fig.set_size_inches(h = 6.0, w = 10.0)
    ax.axis([0,2,0,15])
    ax.scatter(X,y,color="black",s=10.0)
    ax.plot(dummyX, dummyY, color='red', linewidth=1.5)
    plt.savefig(filename = outputFILE, bbox_inches='tight', pad_inches=0.2, dpi = 600)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    return( None )
def computeZs(w, x_train):
    z = []
    for i in xrange(x_train.shape[0]):
        z.append(np.array([sigmoid(w[j], x_train[i]) for j in xrange(w.shape[0])]))
    poly = PolynomialFeatures(1)
    z = poly.fit_transform(z)
    return np.array(z)
def log_reg_kclass(x, y, nfolds=4, degree=1, limit=None):
    """Performs logistic regression experiments on Iris dataset for k class discrimination."""
    #print 'Training k Class classifier on Iris dataset'
    if limit is not None:
        print 'Considering only', limit, ' datapoints'
        x = x[:limit]
        y = y[:limit]

    #x /= x.max(axis=0)

    poly = PolynomialFeatures(degree)
    x = poly.fit_transform(x)
    num_classes = len(set(y))
    avg_accuracy =  0.; avg_precision = np.zeros(num_classes); avg_recall = np.zeros(num_classes); avg_fscore = np.zeros(num_classes); avg_conf_mat = np.zeros([num_classes, num_classes])
    kf = KFold(y.shape[0], n_folds=nfolds, shuffle=True)
    
    for train_ids, test_ids in kf:
        thetas = log_reg_trainer_kclass(x[train_ids], y[train_ids])
        y_pred = log_reg_pred_kclass(thetas, x[test_ids])
        acc = accuracy_score(y[test_ids], y_pred)
        avg_accuracy += acc
        precision1, recall1, fscore1, supp1 = precision_recall_fscore_support(y[test_ids], y_pred)
        conf_mat = confusion_matrix(y[test_ids], y_pred)
        avg_precision += precision1; avg_recall += recall1; avg_fscore += fscore1; avg_conf_mat += conf_mat
        lol=0
    
    return avg_accuracy / nfolds, avg_precision / nfolds, avg_recall / nfolds, avg_fscore / nfolds, avg_conf_mat / nfolds
Exemplo n.º 9
0
def get_cl(tau, consider='EE', degree=5):
    if consider == 'EE':
        values = values_EE
    else:
        values = values_BB

    v = values#[:100]
    p = points#[:100]

    poly = PolynomialFeatures(degree=degree)
    # Vandermonde matrix of pre-computed paramter values.
    X_ = poly.fit_transform(p.reshape(-1,1))

    predict = np.array([tau]).reshape(1,-1)
    # Creates matrix of values you want to estimate from the existing
    # measurements. Computation speed scales very slowly when you ask for
    # estimate of many sets of parameters.
    predict_ = poly.fit_transform(predict)

    clf = LinearRegression()
    estimate = []
    for l in range(2, v.shape[1]):
        values_l = v[:,l]
        clf.fit(X_, values_l)
        estimate_l = clf.predict(predict_)
        estimate.append(estimate_l)
    estimate = np.array(estimate)

    ell = np.arange(2, l+1)
    Z = 2*np.pi/(ell*(ell+1))
    return ell, Z*estimate[:,0]
	def polynomial_expansion(self, rank=2): 
		"""
		Expand the features with polynonial of rank rnank 
		"""
		pf = PolynomialFeatures(degree=2)
		self.X_red = pf.fit_transform(self.X_red)
		self.X_white = pf.fit_transform(self.X_white)
def test_polynomial_fits(x, y, n_comps, model, k_folds=3):
  for i in range(1,6):
    poly = PolynomialFeatures(degree=i)
    poly_x = poly.fit_transform(x)
    r2_mean, r2_std, mse_mean, mse_std = run_conventional_linkage(poly_x, y, n_comps, model)
    print r2_mean, r2_std, mse_mean, mse_std
    print
Exemplo n.º 12
0
def main():
    testfile = sys.argv[1]
    modelfile = sys.argv[2]
    polyorder = int(sys.argv[3])
    testweeks = sys.argv[4]

    test_data = np.genfromtxt(testfile, delimiter=',', skip_header=1)

    X = test_data[:,:-1]
    y = test_data[:,-1]

    poly = PolynomialFeatures(degree=polyorder)
    Xpoly = poly.fit_transform(X)

    with open(modelfile, 'rb') as model, open(testweeks) as weeks:
        lr = pickle.load(model)
        games_per_week = (int(line) for line in weeks)
        ranges = []
        pos = 0
        for week in games_per_week:
            newpos = pos + week
            ranges.append( (pos, newpos) )
            pos = newpos
        print('W\tL\tPoints')
        weekly_results = (evaluate_week(week, Xpoly, y, lr) for week in ranges)
        for result in weekly_results:
            print('\t'.join(str(piece) for piece in result))
Exemplo n.º 13
0
def newtonRaphson(inputFiles):
      pol = PolynomialFeatures(2)
      errors = []
      for File  in inputFiles:
            data = tools.readData(File)
            X = data[:, :-1]
            Y = data[:, -1]
            kf = KFold(len(Y), n_folds = 10)
            trainError = 0
            testError = 0
            for train, test in kf:
                  Z = pol.fit_transform(X[train])
                  row, col = Z.shape
                  theta = np.empty(col, dtype='float')
                  meanDiff = 1.0
                  i = 1
                  #print "Theta iteration %s: \n%s" % ('0', str(theta))
                  while abs(meanDiff) > 1.0e-15 :
                        theta_new = recalculateTheta(theta, Z, Y[train])
                        diff = np.subtract(theta_new, theta)
                        meanDiff = np.mean(diff)
                        #print "Theta iteration %s: \n%s" % (str(i), str(theta_new))
                        #print "Diff: %s" % str(meanDiff)
                        theta = theta_new
                        i += 1
                  Z_test = pol.fit_transform(X[test])
                  Y_hat_test = np.dot(Z_test, theta)
                  Y_hat = np.dot(Z, theta)
                  trainError += tools.findError(Y_hat, Y[train])
                  testError += tools.findError(Y_hat_test, Y[test])
            trainError = trainError/len(kf)
            testError = testError/len(kf)
            iterative_error = [trainError, testError]
            errors. append(iterative_error)
      return np.asarray(errors)
Exemplo n.º 14
0
def set_pdynome_degree(degree, lis):
    lis = [lis]
    ploy = PolynomialFeatures(degree)
    result = ploy.fit_transform(lis)
    result = result.tolist()
    result = result[0]
    return result
Exemplo n.º 15
0
def init_predict(mode):
    """ 整理为用于预测的 X

    i: features
    o: X
    """
    import scipy.io as sio
    import scipy as sp
    from sklearn.preprocessing import PolynomialFeatures

    uid_ave = sio.loadmat('predict_cut_uid_ave.mat')['X']
    poly = PolynomialFeatures(degree=2)
    poly_uid_ave = poly.fit_transform(uid_ave)
    combined_list = [sp.sparse.csc_matrix(poly_uid_ave)]

    if mode == 'f':
        X_words = sio.loadmat('predict_cut_Xf.mat')['X']
    elif mode == 'c':
        X_words = sio.loadmat('predict_cut_Xc.mat')['X']
    else:
        X_words = sio.loadmat('predict_cut_Xl.mat')['X']
    #transformer = TfidfTransformer()
    #X_tfidf = transformer.fit_transform(X_words)

    combined_list.append(X_words)

    X = sp.sparse.hstack(combined_list)

    print(X.shape)
    return X
Exemplo n.º 16
0
def polyRegressionKFold(inputFiles, deg=2):
      print "***************************"
      print "Degree: %s" % deg
      start_time = time.time()
      errors = []
      for File in inputFiles:
            print "___________________________"
            print "Data Set: %s" % File
            data = tools.readData(File)
            data = data[np.argsort(data[:,0])]
            X = data[:, :-1]
            Y = data[:, len(data[1,:]) - 1]
            kf = KFold(len(data), n_folds = 10, shuffle = True)
            TrainError = 0
            TestError = 0
            for train, test in kf:
                  pol = PolynomialFeatures(deg)
                  Z = pol.fit_transform(X[train]) 
                  Z_test = pol.fit_transform(X[test])     
                  theta = regress(Z, Y[train])
                  Y_hat = np.dot(Z, theta)
                  Y_hat_test = np.dot(Z_test, theta)
                  TrainError += mean_squared_error(Y[train], Y_hat)
                  TestError += mean_squared_error(Y[test], Y_hat_test)
            TestError /= len(kf)
            TrainError /= len(kf)
            errors.append([TestError, deg])
            print "---------------------------"
            print "Test Error: %s" % TestError
            print "Train Error: %s" % TrainError
      time_taken = start_time - time.time()
      print "Time Taken for primal: %s" % str(time_taken)
      return np.asarray(errors)
Exemplo n.º 17
0
    def predict(self, X, coefs):
        # first column of Z is time
        # we will replace the other columns with regressed data

        # clean-up from before
        Z = self.X.copy()
        print type(Z), Z.head()
        print type(coefs), coefs.head()

        poly = PolynomialFeatures(degree=self.n)
        for trial_index, (coefficients, x) in enumerate(izip(coefs, Z)):
            print trial_index, coefficients.shape, x.shape
            # reshape required by t
            t = poly.fit_transform((x[:,0]).reshape(-1,1))
            # only regress on data past reference time
            t = t[self.reference_time:]

            z = np.zeros(x.shape)
            # first column is time
            z[:,0] = x[:,0]
            # columns up to reference time are just 0 and were not regressed
            z[:self.reference_time, 1:] = 0
            # columns after reference_time were regressed with coefficients
            print t.shape, z.shape, coefficients.shape
            z[self.reference_time:, 1:] = np.dot(t, coefficients)
            Z.iloc[trial_index] = z
        return Z
Exemplo n.º 18
0
 def hidden_layer(self, X, w):
     # The dimension of matrix Z is (R + 1) * m. The extra dimension is constant
     # extra 1 dimension for bias.
     Z = sigmoid(np.dot(X, w.T))
     p = PolynomialFeatures(degree = 1)
     Z = p.fit_transform(Z)
     return Z
def main():
    # linear model (using polynomial features)
    model = linear_model.Ridge(alpha=0.1)

    # get data
    out = getData()
    X = out["x_train"]
    y = out["y_train"]
    perc = int(0.75 * len(X))
    X_train = X[0:perc]
    X_cv = X[perc:]
    y_train = y[0:perc]
    y_cv = y[perc:]
    X_test = out["x_test"]
    Id = out["test_id"]

    # add bias unit
    poly = PolynomialFeatures(degree=3)
    X_train = poly.fit_transform(X_train)

    # train model
    model.fit(X_train, y_train)

    # score
    X_cv = poly.fit_transform(X_cv)
    print model.score(X_cv, y_cv)

    # predict
    X_test = poly.fit_transform(X_test)
    pred = model.predict(X_test)
    f = open("submissions/PolyReg3.csv", "w")
    f.write("Id,Hazard\n")
    for i in xrange(len(pred)):
        f.write(str(Id[i]) + "," + str(pred[i]) + "\n")
Exemplo n.º 20
0
def logistic_regression(x,y):
    """
        Ierative multifeatures regression.
        Find the best theta by changing it in each iteration
        Print the training and testing errors for each mapping
    """
    errors_training_fmeasure = []
    errors_training_accuracy = []
    errors_testing_fmeasure = []
    errors_testing_accuracy = []

    regr = LogisticRegressionKClasses()

    poly = PolynomialFeatures(degree=1)

    # Cross validation
    cv = KFold(len(x), n_folds=10)
    for train_idx, test_idx in cv:

        x_train = x[train_idx]
        x_test = x[test_idx]
        y_train = y[train_idx]
        y_test = y[test_idx]

        x_ = poly.fit_transform(x_train)
        x_2 = poly.fit_transform(x_test)

        regr.fit(x_,y_train)

        # Predict over the testing data and getting the error
        predicted_y = regr.predict(x_2)

        conf_matrix = confusion_matrix(y_test, predicted_y)
        precision, recall, f_measure, accuracy = get_measures(conf_matrix, len(set(y_train)))
        print 'Precision:', precision, ' Recall:', recall, ' Accuracy:', accuracy, ' F-Measure:', f_measure
Exemplo n.º 21
0
def mvr(data): 
    x = data[:, 0:len(data[0])-1]
    y = data[:, -1]
    
    minTestingError = np.inf
    for dim in xrange(1,3):
        if(dim > 1):
            print("Mapping into higher dimension of {} \n".format(dim))
        else:
            evaluateGradientDesc(data)
            print("Explicit solution\n")
        poly = PolynomialFeatures(dim)
        z = poly.fit_transform(x)
        
        theta = fitModel(z , y)
        
        print("Intercept     :   {} \nCoefficients : {}\n".format(theta[0], theta[1:]))
        testingError, sol = evaluateModel(z,y, False)
        
        if(dim == 1):
            print "Testing Error :", testingError
        
        if (testingError < minTestingError):
            minTestingError = testingError
            optimalDimension = dim
            optSol = sol
         
    print "Optimal Dimension : {}, Testing Error : {} ".format(optimalDimension, minTestingError)
    return optSol
Exemplo n.º 22
0
def prepare(file, survived_info=True):
  df = pd.read_csv(file, header=0)

  df = pd.concat([df, pd.get_dummies(df['Embarked'], prefix='Embarked')], axis=1)
  df = pd.concat([df, pd.get_dummies(df['Sex'], prefix='Sex')], axis=1)
  df = pd.concat([df, pd.get_dummies(df['Pclass'], prefix='Pclass')], axis=1)

  df = df.fillna(value={'Age': df['Age'].dropna().median(), 'Fare': df['Fare'].dropna().median()})

  survived = None
  if survived_info:
    survived = df['Survived'].values
    df = df.drop(['Survived'], axis=1)

  ids = df['PassengerId'].values
  df = df.drop(['PassengerId', 'Pclass', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis=1)

  poly = PolynomialFeatures(interaction_only=True)
  polydata = poly.fit_transform(df)

  cols = np.hstack((['1s'], df.columns, [None]*(polydata.shape[1] - len(df.columns) -1)))
  polydf = pd.DataFrame.from_records(polydata, columns=cols)

  if survived_info: polydf['Survived'] = survived

  return (polydf, ids)
def batterLife_chargeMoreThan4(chargeTime):    
    import numpy as np
    trainDataArr = np.genfromtxt("trainingdata_batteryLife.txt", delimiter = ",")
    trainDataArr = trainDataArr[trainDataArr[ :,0] > 4]
    trainData = trainDataArr[:, 0]
    trainData = trainData.reshape(-1,1)
    trainValue = trainDataArr[:,1]
    testData = np.array(chargeTime)
    testData = testData.reshape(-1,1)
    
    from sklearn.preprocessing import PolynomialFeatures
    from sklearn import linear_model
    
    # Plot outputs
    import matplotlib.pyplot as plt
    plt.scatter(trainData, trainValue,  color='black')
    plt.xticks(())
    plt.yticks(())
    plt.show()

    # Fit regression model
    poly = PolynomialFeatures(degree = 1)
    trainData_ = poly.fit_transform(trainData)
    testData_ = poly.fit_transform(testData)
    
    clf = linear_model.LinearRegression()
    clf.fit(trainData_, trainValue)
    return clf.predict(testData_)
Exemplo n.º 24
0
def linearRegreSin(url,degree):
    [a,b] = getData(url)
    trainA = a[0:139]
    trainB = b[0:139]
    testA = a[140:]
    testB = b[140:]

    poly = PolynomialFeatures(degree)
    trainA = np.float64(poly.fit_transform(trainA))
    testA = np.float64(poly.fit_transform(testA))
    theta = np.dot(np.dot(np.linalg.inv(np.dot(trainA.T,trainA)),trainA.T),trainB)
    plt.figure(1)
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title('data')
    plt.plot(trainA[:,1],trainB,"r*")
    y=np.dot(trainA, theta)
    print(pow(sum((y-trainB)**2),1/2)/140) #print MSE

    y=np.dot(testA, theta)
    #plt.plot(testA[:,1], testB, "r.")
    plt.plot(testA[:,1],y,"k*")
    print(pow(sum((y-testB)**2),1/2)/60) #print MSE
    plt.show()
    print(theta)
Exemplo n.º 25
0
    def __init__(self):
        self.theta = T.matrix()
        # define output for b
        combinations = PolynomialFeatures._combinations(2, 3, False, False)
        n_output_features_ = sum(1 for _ in combinations) + 1
        self.A_b = theano.shared(
            value=np.ones((n_output_features_,), dtype=theano.config.floatX),
            borrow=True, name='A_b')
        self.b_b = theano.shared(value=1.,
                                 borrow=True, name='b_b')

        combinations = PolynomialFeatures._combinations(2, 3, False, False)
        L = [(self.theta[:, 0] ** 0).reshape([-1, 1])]
        for i, c in enumerate(combinations):
            L.append(self.theta[:, c].prod(1).reshape([-1, 1]))
        self.XF3 = T.concatenate(L, axis=1)
        b = (T.dot(self.XF3, self.A_b) + self.b_b).reshape([-1, 1])

        # define output for k
        combinations = PolynomialFeatures._combinations(2, 2, False, False)
        n_output_features_ = sum(1 for _ in combinations) + 1
        self.rho_k = theano.shared(
            value=np.ones((n_output_features_,), dtype=theano.config.floatX),
            borrow=True, name='rho_k')

        combinations = PolynomialFeatures._combinations(2, 2, False, False)
        L = [(self.theta[:, 0] ** 0).reshape([-1, 1])]
        for i, c in enumerate(combinations):
            L.append(self.theta[:, c].prod(1).reshape([-1, 1]))
        self.XF2 = T.concatenate(L, axis=1)
        k = T.dot(self.XF2, self.rho_k).reshape([-1, 1])

        self.outputs = [T.concatenate([b, k], axis=1)]
        self.inputs = [self.theta]
        self.trainable_weights = [self.A_b, self.b_b, self.rho_k]
Exemplo n.º 26
0
def myTradingSystem(DATE, OPEN, HIGH, LOW, CLOSE, VOL, OI, P, R, RINFO, exposure, equity, settings):
    """ This system uses linear regression to allocate capital into the desired equities"""

    # Get parameters from setting
    nMarkets = len(settings['markets'])
    lookback = settings['lookback']
    dimension = settings['dimension']
    threshold = settings['threshold']

    pos = np.zeros(nMarkets, dtype=np.float)

    poly = PolynomialFeatures(degree=dimension)
    for market in range(nMarkets):
        reg = linear_model.LinearRegression()
        try:
            reg.fit(poly.fit_transform(np.arange(lookback).reshape(-1, 1)), CLOSE[:, market])
            trend = (reg.predict(poly.fit_transform(np.array([[lookback]]))) - CLOSE[-1, market]) / CLOSE[-1, market]

            if abs(trend[0]) < threshold:
                trend[0] = 0

            pos[market] = np.sign(trend)

        # for NaN data set position to 0
        except ValueError:
            pos[market] = .0

    return pos, settings
Exemplo n.º 27
0
def prob_max(x):
    poly=PolynomialFeatures(degree=2)
    x=poly.fit_transform(x)
    
    ####define best fit coefficient arrays

    theta_0=np.array([5.017034759466216798e+00,-4.953976374628412532e-02,-5.853604893727188709e-03,-1.732076056200582692e-01,4.646876717720006822e-02,-2.787195959859810248e-04,-1.222728739255723981e-07,6.120106921025333935e-02,4.604924515407455714e-06,-1.475861223279032741e-01,-4.060326310707941784e-09,1.177855732870812001e-02,3.113699082333943463e-02,-8.110887996756119586e-12,-1.113811480228766704e-05,-1.501651909640449069e-07,-2.190797370951344465e-06,-1.718990505473245339e-05,-1.199898098055512375e-13,-2.571924773608319866e-07,-2.147269697093823931e-12,-3.256296536440236682e-05,-2.581007347409745425e-05,1.392377894191479523e-03,-4.129157456238496948e-02,-1.811677361205055875e-02,-7.083807139833804416e-06,4.116671309652412958e-02,3.361594896247442773e-04,-8.223201336497298203e-03,-1.862209709284966395e-07,1.527880447451521184e-02,-3.672245027121902317e-02,-4.975817315933817863e-10,-6.237344094335352815e-04,-1.217106713769066128e-05,-1.489610233924158246e-04,-1.156461881655085214e-03,-5.159561821638818347e-12,-1.884192981459143558e-05,-1.825179242529750414e-10,-5.438522396156177874e-07,4.167833399722946711e-05,5.607144654864255374e-03,-3.093787958451529527e-02,-2.041422430639949412e-04,7.895983583095988675e-03,1.293062803926413491e-02,5.899640081165494730e-03,-1.021176015149306061e-05,8.486220614842233598e-03,5.822368958314040610e-03,-2.243937133831174112e-08,-8.464968966797879399e-03,-1.906386791427585779e-04,-1.795243901952780228e-03,-1.046895210502369993e-02,-3.330917120202175767e-10,-4.235251180738666644e-04,-5.694559236692822056e-09,-1.583929993116185621e-03,1.629024063907276165e-01,-6.967989967191325247e-03,-3.673107962032413740e-06,-2.280088579624509337e-01,1.726846693277796316e-04,1.013912471248917396e-01,-7.647706080406362405e-08,-3.240179256710575273e-01,1.214811767523774205e-01,-3.401281050759457049e-10,-1.670938331047893612e-07,-7.369899627351106136e-06,-9.856333774434332797e-05,-4.534506039623955074e-05,-9.599184784444142215e-12,-5.151527253102048208e-06,-1.030689454605035745e-10,4.646876717720006822e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.787195959859810248e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.222728739255723981e-07,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,6.120106921025333935e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,4.604924515407455714e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.475861223279032741e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-4.060326310707941784e-09,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.177855732870812001e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,3.113699082333943463e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-8.110887996756119586e-12,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.113811480228766704e-05,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.501651909640449069e-07,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.190797370951344465e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.718990505473245339e-05,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.199898098055512375e-13,0.000000000000000000e+00,0.000000000000000000e+00,-2.571924773608319866e-07,0.000000000000000000e+00,-2.147269697093823931e-12])

    theta_1=np.array([-9.861773710361221745e+00,1.930173314618176539e-01,6.178220889126870112e-03,9.590504030349773779e-02,-2.071552578564340164e-01,1.061401114719375260e-01,3.276754675639340086e-02,-1.761411784826558136e-02,-1.219468120911304441e-06,-9.174348236081142360e-02,2.132582599900693932e-02,-1.168137887722866912e-02,1.014151234082172059e-01,9.598987135568204463e-04,-4.542651588690940767e-02,-7.514592183950008714e-04,1.113651743862166532e-03,3.535033504077587929e-02,1.348878960300472899e-06,4.158088521609443200e-02,1.744377835470558925e-06,-8.830070079582454969e-04,-6.118986593694282407e-05,-4.784785490618059583e-04,6.231388236713353984e-02,1.984193321394733464e-02,3.807758267577563555e-02,-1.857758661936751918e-02,-8.902117282652563328e-05,1.684544497032977118e-03,-4.354918224284388961e-02,8.135671785350261087e-03,1.838040795364327554e-03,4.648089395429296639e-02,1.603282923510754299e-02,-5.706248765095311287e-02,6.474737189221846378e-02,-1.666585875194382532e-02,5.800179529291954185e-05,6.960244357250958136e-02,1.482721160150063508e-04,-5.299760763074679222e-07,-4.512899253144341872e-05,-9.330422825892547602e-04,-3.692049341246863322e-04,-7.641113350637301687e-04,-3.553288559473667197e-04,-3.424266483519060756e-03,4.323086081437536800e-04,-4.955185382381825611e-04,-5.468633412309427573e-03,3.023053081335558886e-04,2.032432933463332054e-03,-1.868881428527514009e-04,5.907286677952040300e-03,1.224575926635180362e-03,1.491552037995557810e-03,3.744487993794240379e-03,-1.585824627682363985e-03,4.626090019667926378e-03,2.914276434916693195e-04,-6.421237001048539506e-04,1.343912634023189216e-02,1.202887078507273999e-02,4.579648647433440592e-03,-4.573005453417482836e-05,-2.603037492365091118e-02,1.093608117200833424e-01,3.532167048002045617e-01,-1.790610728587208392e-02,-7.755213616683120925e-02,-5.213887650785711293e-03,-1.747560651202587356e-01,-4.635745132339050972e-02,-5.689835106400319142e-02,1.079103168240419384e-04,8.490464847112829186e-03,8.373013610258914587e-05,-2.071552578564340164e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.061401114719375260e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,3.276754675639340086e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.761411784826558136e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.219468120911304441e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-9.174348236081142360e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,2.132582599900693932e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.168137887722866912e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.014151234082172059e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,9.598987135568204463e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-4.542651588690940767e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-7.514592183950008714e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.113651743862166532e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,3.535033504077587929e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.348878960300472899e-06,0.000000000000000000e+00,0.000000000000000000e+00,4.158088521609443200e-02,0.000000000000000000e+00,1.744377835470558925e-06])

    theta_2=np.array([-2.604982997969506187e+01,2.522175474048852784e-01,6.275718741926675920e-03,1.273176496282046599e-01,-1.716361908427019300e-01,-8.312891928874267811e-02,4.068642760504040390e-02,-2.445951924349220458e-02,-8.746331909292573688e-07,-1.542657353435612777e-02,-1.765684782956331370e-02,-3.195224775777173168e-04,2.484350665759416446e-02,4.813993958703906978e-03,1.759866699719525307e-01,5.747258345660388864e-04,-1.022129045161229450e-03,5.567310929387970370e-02,-9.063835339582872293e-07,-8.930479773136143495e-02,-9.138473645722535673e-07,-7.459379939882724523e-04,-4.125238423403655301e-05,-4.278814974555324602e-04,1.234252674940865789e-02,4.708747007997553247e-02,2.657070242802176546e-02,6.926664427951148562e-02,-6.384822293781164664e-05,4.964280033292678418e-02,9.853135356553717472e-02,-2.621681271491586862e-02,6.630289966406467672e-02,-2.208061355155441774e-01,4.922574438806641417e-02,4.310173077725486246e-02,-5.622794820973487512e-02,1.006576646572381883e-01,-3.897449196020566275e-05,-7.080593340274707326e-03,-7.767702598866720021e-05,-3.990070230109308789e-07,-1.651061082255117919e-05,1.537024690049966936e-03,8.005698436542285070e-04,8.994568249232704014e-04,5.470196351385650481e-04,-2.455970000128474082e-03,4.988277998095904915e-04,1.262763556509414152e-03,4.601679612131920685e-03,-1.194497842888761268e-04,2.882224654372331132e-03,5.875401491502118233e-04,-2.458015081252763658e-03,5.859965255224170106e-04,-2.547687917446368093e-04,-2.516120690268733393e-03,2.300462784971263851e-03,-2.423523210845587861e-03,-1.539288004294190964e-04,-1.260645266526524456e-02,-2.136594669075533859e-02,-1.240381092246360673e-02,1.775253607050698845e-02,-3.279874465984122252e-05,1.667948986384345557e-03,-1.177656364439296638e-01,-8.947706286380961412e-04,5.282554584883104691e-03,9.528953029071411673e-02,-1.953324553475337816e-03,1.692159896831275101e-01,6.332910268512657870e-02,-3.059270306265245501e-02,-7.251068271668771679e-05,-2.748819360572268139e-02,-4.386467349947201168e-05,-1.716361908427019300e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-8.312891928874267811e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,4.068642760504040390e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.445951924349220458e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-8.746331909292573688e-07,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.542657353435612777e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.765684782956331370e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-3.195224775777173168e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,2.484350665759416446e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,4.813993958703906978e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.759866699719525307e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,5.747258345660388864e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.022129045161229450e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,5.567310929387970370e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-9.063835339582872293e-07,0.000000000000000000e+00,0.000000000000000000e+00,-8.930479773136143495e-02,0.000000000000000000e+00,-9.138473645722535673e-07])

    theta_3=np.array([-2.972725322795918140e-02,-2.504227156229747453e-01,-9.722118342779062158e-03,1.229149113213241912e-01,2.039923850853684467e-02,-1.805107341267933943e-02,7.334563069172345476e-03,-6.475321568310828764e-04,-8.474944289249388250e-08,9.714545617984883855e-05,-2.075035998257516458e-07,-1.221820060933139164e-05,-1.714475447964966190e-02,-2.129377838506303893e-03,-1.277321374533818017e-06,-2.380156363764723250e-06,-5.273025783548628525e-06,-1.984111789391731009e-03,-1.335426121119178434e-07,3.339996589013074558e-04,-2.141039464532945376e-07,2.576647414932395786e-03,2.945797215512836505e-06,-1.895000552612198606e-04,-1.462288947682845522e-02,-1.951654268095479733e-02,-1.630737487857820273e-02,-5.655678104343885015e-02,-6.186709331152055607e-06,-2.000570956968860184e-02,-1.460798045208372536e-05,-9.892777618470509349e-04,-6.134943418829629652e-02,5.204243735868804843e-02,-6.976997540713989398e-05,-1.924795146172466416e-04,-3.585644621246710678e-04,-8.751744876445026466e-02,-5.742332320812468485e-06,-2.493414480788682872e-02,-1.819883544853002577e-05,1.673188626427698019e-06,-2.199004526442708865e-05,3.929065891591175703e-03,3.411106034336343351e-03,4.689455918427083009e-03,-1.623583183295337906e-02,-2.379764356421232188e-04,4.563617516957610247e-03,-5.845377676580722996e-04,-5.550332977089329420e-03,5.817926665026248653e-03,3.489254807540806587e-03,-8.968364091790517831e-04,-2.770023159211470760e-03,-4.227833625220314175e-03,1.685174688793472349e-03,-3.707142912226834078e-04,-5.865829701672146956e-03,-5.678036659941369801e-04,8.344188249964974876e-05,-2.863247383273172242e-02,-6.482258485425367728e-03,-4.199374526758931081e-02,-1.256077522453134809e-02,-3.178104108468527999e-06,-3.440396173308768457e-02,-9.901849306080791411e-06,-3.180423753092536477e-05,-7.452030759889874401e-02,-6.907406950607837548e-02,5.971308793973397274e-07,-1.155260382492086013e-04,-2.332571299853613211e-04,1.410515664042338024e-01,-1.068340896895342663e-05,2.499449671921087357e-01,-1.027698942975813230e-05,2.039923850853684467e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.805107341267933943e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,7.334563069172345476e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-6.475321568310828764e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-8.474944289249388250e-08,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,9.714545617984883855e-05,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.075035998257516458e-07,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.221820060933139164e-05,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.714475447964966190e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.129377838506303893e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.277321374533818017e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.380156363764723250e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-5.273025783548628525e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.984111789391731009e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.335426121119178434e-07,0.000000000000000000e+00,0.000000000000000000e+00,3.339996589013074558e-04,0.000000000000000000e+00,-2.141039464532945376e-07,])

#####calculate probabilities
    z_0=np.sum(x*theta_0)
    z_1=np.sum(x*theta_1)
    z_2=np.sum(x*theta_2)
    z_3=np.sum(x*theta_3)
    
    p_0=1./(1.+np.exp(-z_0))
    p_1=1./(1.+np.exp(-z_1))
    p_2=1./(1.+np.exp(-z_2))
    p_3=1./(1.+np.exp(-z_3))

    prob_arra=np.array([p_0, p_1, p_2, p_3])
    
    return prob_arra.argmax()
Exemplo n.º 28
0
def poly_model(ins,outs,degrees):
	poly   = PolynomialFeatures(degree=degrees)
	X = poly.fit_transform(ins)

	regr = linear_model.LinearRegression()
	regr.fit(X, outs)
	print_model("poly-"+str(degrees), regr, X, outs)
Exemplo n.º 29
0
def evaluateGradientDesc(data): #Function to evaluate Gradient Descent Algorithm in terms of testing Error
     poly = PolynomialFeatures(1)
     x = data[:, 0:len(data[0])-1]
     y = data[:, -1]
     z = poly.fit_transform(x)
     testingError, sol = evaluateModel(z,y, True)
     
     print "Iterative solution\nTesting error : ", testingError , "\n"
from scipy.stats import boxcox

X_train_transformed = X_train.copy()
X_train_transformed['Fare'] = boxcox(X_train_transformed['Fare'] + 1)[0]
X_test_transformed = X_test.copy()
X_test_transformed['Fare'] = boxcox(X_test_transformed['Fare'] + 1)[0]
# Rescale data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_transformed_scaled = scaler.fit_transform(X_train_transformed)
X_test_transformed_scaled = scaler.transform(X_test_transformed)
# Get polynomial features
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2).fit(X_train_transformed)
X_train_poly = poly.transform(X_train_transformed_scaled)
X_test_poly = poly.transform(X_test_transformed_scaled)
# Debug
print(poly.get_feature_names())
# Select features using chi-squared test
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

## Get score using original model
logreg = LogisticRegression(C=1)
logreg.fit(X_train, y_train)
scores = cross_val_score(logreg, X_train, y_train, cv=10)
print('CV accuracy (original): %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
highest_score = np.mean(scores)
Exemplo n.º 31
0
v.feature_names_

#결측치 처리
from sklearn.preprocessing import Imputer

imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
imp.fit_transform([[1, 2], [np.nan, 3], [7, 6]])
#파생변수 생성
# [1, a, b, a^2, ab, b^2]

from sklearn.preprocessing import PolynomialFeatures

X = np.arange(6).reshape(3, 2)
X

poly = PolynomialFeatures(2)
poly.fit_transform(X)
## [1, a, b, a^2, ab, b^2]

#apply와 같은 효과
from sklearn.preprocessing import FunctionTransformer


def all_but_first_column(X):
    return X[:, 1:]


X = np.arange(12).reshape(4, 3)
X

FunctionTransformer(all_but_first_column).fit_transform(X)
def Lasso_Regression(degree, alpha):
    return Pipeline([
        ("poly", PolynomialFeatures(degree=degree)),  # 建立多项式
        ("std_scaler", StandardScaler()),  # 归一化处理
        ("ridge_reg", Lasso(alpha=alpha))  # 岭回归方程
    ])
Exemplo n.º 33
0
# Feature Scaling
"""from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)"""

#Linear Regression
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X,y)

#Polynomial Linear Regression
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=4)
X_poly = poly_reg.fit_transform(X)

lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y)

# Visualising the Linear Regression results
plt.scatter(X, y, color = 'red')
plt.plot(X, lin_reg.predict(X), color = 'blue')
plt.title('Truth or Bluff (Linear Regression)')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()

# Visualising the Polynomial Regression results
plt.scatter(X, y, color = 'red')
-1.17165272, -0.89129801,  -0.85572252, -0.7736467,  -0.21234812,-0.12717219])
 
x_test = np.array([0.31273956 , 0.46122891, 0.4917774, 0.7039386, 0.84386983, 0.97020886])
 
y_test = np.array([0.909136, 0.38747724, -0.92084687, -1-0.03804487,.03453301,-0.1177253])
 
# create matrix versions of these arrays
X_train = x_train[:, np.newaxis]
X_test = x_test[:, np.newaxis]
 
colors = ['teal', 'yellow' ,'green', 'gold']
lw = 2
 
train_error = []
test_error = []
 
 
for degree in range(11):
    for count, degree in enumerate([degree]):
        fig.clf()
        model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha = 0))
        model.fit(X_train, y_train)
        train_error.append(mean_squared_error(y_train, model.predict(X_train)))
        test_error.append(mean_squared_error(y_test, model.predict(X_test)))
plt.plot(np.arange(11), train_error, color='green', label='train')
plt.plot(np.arange(11), test_error, color='red', label='test')
plt.ylim((0.0, 1e0))
plt.ylabel('Mean Squared Error)')
plt.xlabel('Degree')
plt.legend(loc='lower left')
fig.savefig('Testing_Answer4_1.png', bbox_inches='tight')
Exemplo n.º 35
0
y = y[:-forecast_out]

print('Dimension of X', X.shape)
print('Dimension of X_lately', X_lately.shape)
print('Dimension of y', y.shape)

# Separation of training and testing of model by cross validation train test split
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2)

# Linear regression
clfreg = LinearRegression(n_jobs=-1)
clfreg.fit(X_train, y_train)

# Quadratic Regression 2
clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())
clfpoly2.fit(X_train, y_train)

# Quadratic Regression 3
clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())
clfpoly3.fit(X_train, y_train)

# KNN Regression
clfknn = KNeighborsRegressor(n_neighbors=2)
clfknn.fit(X_train, y_train)

confidencereg = clfreg.score(X_test, y_test)
confidencepoly2 = clfpoly2.score(X_test, y_test)
confidencepoly3 = clfpoly3.score(X_test, y_test)
confidenceknn = clfknn.score(X_test, y_test)
Exemplo n.º 36
0
def predictStockPrices(df):
    dfreg = df.loc[:, ['Adj Close', 'Volume']]
    dfreg['HL_PCT'] = (df['High'] - df['Low']) / df['Close'] * 100.0
    dfreg['PCT_change'] = (df['Close'] - df['Open']) / df['Open'] * 100.0
    print(dfreg.tail())

    # cleanup process
    # --------------------------------------------------------------------

    # drop missing values
    dfreg.fillna(value=-99999, inplace=True)

    # we want to separate 1 percent of the data to forecast
    forecast_out = int(math.ceil(0.01 * len(dfreg)))
    # we want to predict the AdjClose
    forecast_col = 'Adj Close'
    dfreg['label'] = dfreg[forecast_col].shift(-forecast_out)
    X = np.array(dfreg.drop(['label'], 1))

    # scale the X so that everyone can have the same distribution for linear regression
    X = sk.preprocessing.scale(X)

    # find data series of late X and early X (train) for model generation and evaluation
    X_lately = X[-forecast_out:]
    X = X[:-forecast_out]

    # Separate label and identify it as y
    y = np.array(dfreg['label'])
    y = y[:-forecast_out]

    print('Dimension of X', X.shape)
    print('Dimension of y', y.shape)

    # Separation of training and testing of model by cross validation train test split
    X_train, X_test, y_train, y_test = sk.model_selection.train_test_split(
        X, y, test_size=0.2)

    # Linear Regression
    clfreg = LinearRegression(n_jobs=-1)
    clfreg.fit(X_train, y_train)

    # Lasso Regression
    clflasso = Lasso(selection='random')
    clflasso.fit(X_train, y_train)

    # Quadratic Regression 2
    clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())
    clfpoly2.fit(X_train, y_train)

    # Quadratic Regression 3
    clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())
    clfpoly3.fit(X_train, y_train)

    # KNN
    clfknn = KNeighborsRegressor(n_neighbors=2)
    clfknn.fit(X_train, y_train)

    # Test the models
    confidencereg = clfreg.score(X_test, y_test)
    confidencepoly2 = clfpoly2.score(X_test, y_test)
    confidencepoly3 = clfpoly3.score(X_test, y_test)
    confidenceknn = clfknn.score(X_test, y_test)
    confidencelasso = clflasso.score(X_test, y_test)

    print("The linear regression confidence is ", confidencereg)
    print("The quadratic regression 2 confidence is ", confidencepoly2)
    print("The quadratic regression 3 confidence is ", confidencepoly3)
    print("The knn regression confidence is ", confidenceknn)
    print("The knn lasso confidence is ", confidencelasso)

    # Predict
    predictAndPlot(clfreg, X_lately, dfreg.copy(), confidencereg, forecast_out)
    predictAndPlot(clfpoly2, X_lately, dfreg.copy(), confidencepoly2,
                   forecast_out)
    predictAndPlot(clfpoly3, X_lately, dfreg.copy(), confidencepoly3,
                   forecast_out)
    predictAndPlot(clfknn, X_lately, dfreg.copy(), confidenceknn, forecast_out)
    predictAndPlot(clflasso, X_lately, dfreg.copy(), confidencelasso,
                   forecast_out)
Exemplo n.º 37
0
#1. kutuphaneler
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt

# veri yukleme
df = pd.read_csv('winequality-red.csv')

X = df[['quality']]
y = df[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = PolynomialFeatures(degree=4)
y_ = model.fit_transform(y)
y_test_ = model.fit_transform(y_test)
print(y_, y_test_)
lg = LinearRegression()
lg.fit(y_,X)
predicted_data = lg.predict(y_test_)
predicted_data = np.round(predicted_data)

print(mean_squared_error(X_test,predicted_data))

print(predicted_data)
Exemplo n.º 38
0
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('Position_Salaries.csv')
X = dataset.iloc[:, 1:2].values
y = dataset.iloc[:, 2].values

# Fitting Linear Regression to the dataset
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X, y)

# Fitting Polynomial Regression to the dataset
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=10)
X_poly = poly_reg.fit_transform(X)

lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y)

# Visualising the Linear Regression Results
plt.scatter(X, y, color='red')
plt.plot(X, lin_reg.predict(X), color='blue')
plt.title('Truth or Bluff (Linear Regression)')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.show()

# Visualising the Polynomial Regression Results
X_grid = np.arange(min(X), max(X), 0.1)
# Employ a quadratic regression to smooth win share vs age data

import operator

import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures

x = wsby['Age'].values.reshape(-1, 1)
y = wsby['WS']

polynomial_features= PolynomialFeatures(degree=2)
x_poly = polynomial_features.fit_transform(x)

model = LinearRegression()
model.fit(x_poly, y)
y_poly_pred = model.predict(x_poly)

rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
# R-squared is 0.8705593525409101, pretty good

plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)
Exemplo n.º 40
0
 def _get_polynomials(self, x):
     poly = PolynomialFeatures(degree=self.degree)
     x_poly = poly.fit_transform(x.reshape(-1, 1))
     return x_poly
def Polynomial_Regression(degree):
    return Pipeline([
        ("poly", PolynomialFeatures(degree=degree)),  # 建立多项式
        ("std_scaler", StandardScaler()),  # 归一化处理
        ("lin_reg", LinearRegression())  # 回归方程
    ])
Exemplo n.º 42
0
normalizer = preprocessing.Normalizer().fit(X)
print(normalizer.transform(X))

# Binarization
# Feature binarization
print("Feature binarization")
X = [[1., -1., 2.], [2., 0., 0.], [0., 1., -1.]]
binarizer = preprocessing.Binarizer().fit(X)
print(binarizer.transform(X))

# Imputation of missing values
print("Imputation of missing values")
from sklearn.preprocessing import Imputer

imp = Imputer(missing_values='NaN', strategy='mean', verbose=0)
imp.fit([[1, 2], [np.nan, 3], [7, 6]])
X = [[np.nan, 2], [6, np.nan], [7, 6]]
print(imp.transform(X))

# Generating polynomial features
print("Generating polynomial features")
from sklearn.preprocessing import PolynomialFeatures

X = np.arange(6).reshape(3, 2)
poly = PolynomialFeatures(2)
print(poly.fit_transform(X))

X = np.arange(9).reshape(3, 3)
poly = PolynomialFeatures(degree=3, interaction_only=True)
print(poly.fit_transform(X))
Exemplo n.º 43
0
lines = fr.readlines()
for line in lines:  # 逐行进行操作,循环遍历所有数据
    items = line.strip().split(',')
    datasets_X.append(int(items[0]))  #将读取的数据转换为int型,并分别写入datasets_X和datasets_Y
    datasets_Y.append(int(items[1]))

length = len(datasets_X)
datasets_X = np.array(datasets_X).reshape([length, 1])
datasets_Y = np.array(datasets_Y)

minX = min(datasets_X)
maxX = max(datasets_X)
X = np.arange(minX, maxX).reshape([-1, 1])

# degree=2 表示建立datasets_X的二次多项式特征X_poly
poly_reg = PolynomialFeatures(degree=2)
X_poly = poly_reg.fit_transform(datasets_X)
lin_reg_2 = linear_model.LinearRegression()
lin_reg_2.fit(X_poly, datasets_Y)

print(poly_reg)
#查看回归方程系数
print('Cofficients:', lin_reg_2.coef_)
#查看回归方程截距
print('intercept', lin_reg_2.intercept_)

# 图像中显示
plt.scatter(datasets_X, datasets_Y, color='red')
plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color='blue')
plt.xlabel('Area')
plt.ylabel('Price')
Exemplo n.º 44
0
 def train_model_lassoLARS_style(predictors, predictants, alpha, deg):
     Vandermonde = PolynomialFeatures(degree=deg)
     Vandermonde = Vandermonde.fit_transform(predictors)
     LinModel = linear_model.LassoLars(alpha=alpha)
     LinModel = LinModel.fit(Vandermonde, predictants)
     return LinModel
# reading in the data file
A = np.loadtxt("steel_composition_train_2.csv", delimiter=",", skiprows=1)
numOfRows =  A.shape[0]
numOfCols = A.shape[1]

# deleting the first and last column
trainingData = np.delete(A, [0, numOfCols-1], 1)
# saving the last column as target vector
targetVector_train = A[:,-1]

# predict the values from test set
B = np.loadtxt("steel_composition_test.csv", delimiter=",", skiprows=1)
testData = np.delete(B, 0, 1)

X_train, X_test, y_train, y_test = cross_validation.train_test_split(trainingData, targetVector_train, test_size=0.4, random_state=0)
poly = PolynomialFeatures(3)
features_train = poly.fit_transform(X_train)
features_test = poly.fit_transform(X_test)

regr = linear_model.LinearRegression()
#trainFeatures_T = features_train.transpose()
trainFeatures_T = trainingData.transpose()
#phi_T_phi = np.dot(trainFeatures_T, features_train)
phi_T_phi = np.dot(trainFeatures_T, trainingData)
ident = np.eye(phi_T_phi.shape[0], phi_T_phi.shape[1])
#for k in range(-40, 41):
for k in range(18, 19):
    lamda = np.exp(k)
    lamdaEye = np.dot(lamda, ident)
    matrix_1 = np.add(lamdaEye, phi_T_phi)
    matrix_1_inv = inv(matrix_1)
import pylab as plt
import pandas as pd

#importing dataset
dataset = pd.read_csv('Position_Salaries.csv')
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

#fitting linear regression
from sklearn.linear_model import LinearRegression
lin_regressor = LinearRegression()
lin_regressor.fit(X, y)

#fitting polynomial regression to the dataset
from sklearn.preprocessing import PolynomialFeatures
poly_regressor = PolynomialFeatures(degree=2)
X_poly = poly_regressor.fit_transform(X)
lin_regressor2 = LinearRegression()
lin_regressor2.fit(X_poly, y)

#visualization of linear regression resutls
plt.scatter(X, y, color='red')
plt.plot(X, lin_regressor.predict(X))
plt.title('Linear Regression (Truth)')
plt.xlabel('Positoin Level')
plt.ylabel('Salary')
plt.show()

#visualization of polynomial linear regression
plt.scatter(X, y, color='red')
plt.plot(X, lin_regressor2.predict(poly_regressor.fit_transform(X)))
Exemplo n.º 47
0
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#Let's examine the distribution of the predicted values of the training data.
Title = 'Distribution  Plot of  Predicted Value Using Training Data vs Training Data Distribution'
DistributionPlot(y_train, yhat_train, "Actual Values (Train)", "Predicted Values (Train)", Title)

Title='Distribution  Plot of  Predicted Value Using Test Data vs Data Distribution of Test Data'
DistributionPlot(y_test,yhat_test,"Actual Values (Test)","Predicted Values (Test)",Title)
from sklearn.preprocessing import PolynomialFeatures
#Overfitting
#Let's use 55 percent of the data for testing and the rest for training:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.45, random_state=0)    
#We will perform a degree 5 polynomial transformation on the feature 'horse power'.

pr = PolynomialFeatures(degree=5)
x_train_pr = pr.fit_transform(x_train[['horsepower']])
x_test_pr = pr.fit_transform(x_test[['horsepower']])
pr    
#Now let's create a linear regression model "poly" and train it.    
poly = LinearRegression()
poly.fit(x_train_pr, y_train)
#We can see the output of our model using the method "predict." then assign the values to "yhat".
yhat = poly.predict(x_test_pr)
yhat[0:5]
#Let's take the first five predicted values and compare it to the actual targets.
print("Predicted values:", yhat[0:4])
print("True values:", y_test[0:4].values)

PollyPlot(x_train[['horsepower']], x_test[['horsepower']], y_train, y_test, poly,pr)
#R^2 of the training data:
Exemplo n.º 48
0
model = SelectFromModel(clf, prefit=True)
X_new = model.transform(X)
X_new.shape(150, 2)

#reg = RandomForestRegressor(n_estimators=500,random_state=0)
#reg.fit(X_train,y_train)

percent_diff_other = ((y_test - pred) / (y_test)) * 100
from sklearn import datasets
from sklearn import metrics
from sklearn.ensemble import ExtraTreesClassifier
dataset = datasets.load_iris()
model = ExtraTreesClassifier()
model.fit()
from sklearn.metrics import accuracy_score
feature_importance = reg.feature_importances_
np_arr = np.array(y_test, dtype=pd.Float64Index)
score = accuracy_score(np_arr[0], predictions[0])
accuracy_score(y_test, pred)

from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model
poly = PolynomialFeatures(degree=2)
X_train_ = poly.fit_transform(X_train)
y_train_ = poly.fit_transform(y_train)
X_test_ = poly.fit_transform(X_test)
#X_train, X_test, y_train, y_test = train_test_split(X_, y_, test_size=0.2,random_state=0)
preds = GenerateLinearRegressionModel(X_train, y_train, X_test)

from sklearn.model_selection import cross_val_score
scores = cross_val_score(estimator=reg, X=X_train, y=y_train, cv=10)
Exemplo n.º 49
0
    }
    return results


def runLR(train_X, train_y, test_X, test_y, test_X2, params):
    print('Train LR')
    model = RandomForestClassifier(**params)
    model.fit(train_X, train_y)
    print('Predict 1/2')
    pred_test_y = model.predict_proba(test_X)[:, 1]
    print('Predict 2/2')
    pred_test_y2 = model.predict_proba(test_X2)[:, 1]
    return pred_test_y, pred_test_y2


target = train['redemption_status'].values
poly = PolynomialFeatures(degree=2)
sc = StandardScaler()
lr_params = {'n_estimators': 1000}
results = run_cv_model(
    sc.fit_transform(poly.fit_transform(train[train_cols].fillna(0).values)),
    sc.fit_transform(poly.fit_transform(test[train_cols].fillna(0).values)),
    target, runLR, lr_params, auc, 'lr')
day = 2
sub = 3
name = f"day_{day}_sub_{sub}"
tmp = dict(zip(test.id.values, results['test']))
answer1 = pd.DataFrame()
answer1['id'] = test.id.values
answer1['redemption_status'] = answer1['id'].map(tmp)
answer1.to_csv(f'{name}.csv', index=None)
Exemplo n.º 50
0
# imputer is for handling missing values
from sklearn.preprocessing import Imputer
imputer = Imputer(strategy='median')

poly_target = poly_features['TARGET']

poly_features = poly_features.drop(columns=['TARGET'])

# Need to impute missing values
poly_features = imputer.fit_transform(poly_features)
poly_features_test = imputer.transform(poly_features_test)

from sklearn.preprocessing import PolynomialFeatures

# Create the polynomial object with specified degree
poly_transformer = PolynomialFeatures(degree=3)
# Train the polynomial features
poly_transformer.fit(poly_features)

# Transform the features
poly_features = poly_transformer.transform(poly_features)
poly_features_test = poly_transformer.transform(poly_features_test)
#print('Polynomial Features shape: ', poly_features.shape)
#print(poly_transformer.get_feature_names(input_features = ['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'DAYS_BIRTH'])[:34])

poly_features = pd.DataFrame(poly_features,
                             columns=poly_transformer.get_feature_names([
                                 'EXT_SOURCE_1', 'EXT_SOURCE_2',
                                 'EXT_SOURCE_3', 'DAYS_BIRTH'
                             ]))
Exemplo n.º 51
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('Position_Salaries.csv')
X = dataset.iloc[:, 1:2].values
y = dataset.iloc[:, 2].values
#Fitting Linear Regression to the Dataset
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X, y)

#Fitting Polynomial Regressor to the dataset
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=4)

X_poly = poly_reg.fit_transform(X)
poly_reg.fit(X_poly, y)
lin_reg2 = LinearRegression()
lin_reg2.fit(X_poly, y)

#Visualize Linear Regression Result
plt.scatter(X, y, color="red")
plt.plot(X, lin_reg.predict(X), color="green")
plt.title("Linear Regression")
plt.xlabel("Position Level")
plt.ylabel("Salary")
plt.show()

#Visualize Polynomial Linear Regression Result
Exemplo n.º 52
0
            label = label.replace("|label ", "")

            f_r, f_g, f_b = feature.split(" ")
            predict.append([int(f_r), int(f_g), int(f_b)])

            l_r, l_g, l_b, sp = label.split(" ")
            ground_truth.append([
                int(l_r),
                int(l_g),
                int(l_b),
            ])
    return predict, ground_truth


input_X, vector = loadTrainData(train_path)
poly = PolynomialFeatures(degree=3)

X_ = poly.fit_transform(input_X)
X_ = numpy.delete(X_, (0), axis=1)

clf = linear_model.LinearRegression()
clf.fit(X_, vector)

predict, ground_truth = loadTestData(test_path)
predict_ = poly.fit_transform(predict)
predict_ = numpy.delete(predict_, (0), axis=1)
predicted = clf.predict(predict_)
predicted = [[int(round(x[0])),
              int(round(x[1])),
              int(round(x[2]))] for x in predicted]
Exemplo n.º 53
0
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from math import sqrt
from sklearn.preprocessing import PolynomialFeatures

house_data = pd.read_csv('housing.csv')

X = pd.DataFrame(house_data['RM'])
Y = house_data['MEDV']

X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.20,
                                                    random_state=5)

poly_reg = PolynomialFeatures(degree=2)

X_train_2_d = poly_reg.fit_transform(X_train)
X_test_2_d = poly_reg.transform(X_test)

lin_reg = LinearRegression(normalize=True)
lin_reg.fit(X_train_2_d, Y_train)

test_pred = lin_reg.predict(X_test_2_d)
train_pred = lin_reg.predict(X_train_2_d)

rms = sqrt(mean_squared_error(Y_test, test_pred))
print(rms)

r2 = r2_score(Y_test, test_pred)
print(r2)
Exemplo n.º 54
0
    # data = np.loadtxt(path, dtype=float, delimiter=',',
    #                   converters={4: iris_type})

    data = pd.read_csv(path, header=None)
    data[4] = pd.Categorical(data[4]).codes
    # iris_types = data[4].unique()
    # print iris_types
    # for i, type in enumerate(iris_types):
    #     data.set_value(data[4] == type, 4, i)
    x, y = np.split(data.values, (4, ), axis=1)
    # print 'x = \n', x
    # print 'y = \n', y
    # 仅使用前两列特征
    x = x[:, :2]
    lr = Pipeline([('sc', StandardScaler()),
                   ('poly', PolynomialFeatures(degree=3)),
                   ('clf', LogisticRegression())])
    lr.fit(x, y.ravel())
    y_hat = lr.predict(x)
    y_hat_prob = lr.predict_proba(x)
    np.set_printoptions(suppress=True)
    print 'y_hat = \n', y_hat
    print 'y_hat_prob = \n', y_hat_prob
    print u'准确度:%.2f%%' % (100 * np.mean(y_hat == y.ravel()))
    # 画图
    N, M = 500, 500  # 横纵各采样多少个值
    x1_min, x1_max = x[:, 0].min(), x[:, 0].max()  # 第0列的范围
    x2_min, x2_max = x[:, 1].min(), x[:, 1].max()  # 第1列的范围
    t1 = np.linspace(x1_min, x1_max, N)
    t2 = np.linspace(x2_min, x2_max, M)
    x1, x2 = np.meshgrid(t1, t2)  # 生成网格采样点
Exemplo n.º 55
0
def PolynomialLogisticRegression(degree):
    return Pipeline([('poly', PolynomialFeatures(degree=degree)),
                     ('std_scaler', StandardScaler()),
                     ('log_reg', LogisticRegression())])
Exemplo n.º 56
0
def true_fun(X):
    return np.sin(2 * np.pi * X)

np.random.seed(0)

n_samples = 30
degrees = [1, 4, 15]
X = np.sort(np.random.rand(n_samples))
y = true_fun(X) + np.random.randn(n_samples) * 0.1

plt.figure(figsize=(7, 2.5))
for i in range(len(degrees)):
    ax = plt.subplot(1, len(degrees), i + 1)
    plt.setp(ax, xticks=(), yticks=())
    polynomial_features = PolynomialFeatures(degree=degrees[i], include_bias=False)
    linear_regression = LinearRegression()
    pipeline = Pipeline([("polynomial_features", polynomial_features), ("linear_regression", linear_regression)])
    pipeline.fit(X[:, np.newaxis], y)

    # Evaluate the models using crossvalidation
    scores = cross_val_score(pipeline, X[:, np.newaxis], y, scoring="neg_mean_squared_error", cv=10)

    X_test = np.linspace(0, 1, 100)
    plt.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), label="Model")
    plt.plot(X_test, true_fun(X_test), label="True function")
    plt.scatter(X, y, edgecolor='b', s=20, label="Samples")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.xlim((0, 1))
    plt.ylim((-2, 2))
Exemplo n.º 57
0
####################################################################################################################################################

# Linear Regression

# Fitting Linear Regression to the dataset
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred_linear = lin_reg.predict(X_test)

result_score_linear_prediction = r2_score(y_test, y_pred_linear)

print('result score for linear regression is ', result_score_linear_prediction)

####################################################################################################################################################

# Polynomial Regression

from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=2)
X_poly = poly_reg.fit_transform(X_train)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y_train)
y_pred_poly = lin_reg_2.predict(poly_reg.fit_transform(X_test))

result_score_polynomial_prediction = r2_score(y_test, y_pred_linear)

print('result score for polynomial regression is ',
      result_score_polynomial_prediction)
Exemplo n.º 58
0
onehotEncr = OneHotEncoder(categorical_features=[0])
#onehotEncr = OneHotEncoder(categorical_features=[1])

X = onehotEncr.fit_transform(X).toarray()

labEnc_Y = LabelEncoder()
Y = labEnc_Y.fit_transform(Y)

plt.scatter(X[:,2], Y, marker='o')


"""New"""

np.random.seed(0) 
poly_features = PolynomialFeatures(degree=2, include_bias=False) 
X = poly_features.fit_transform(X) 
model = SGDRegressor(max_iter=10000, eta0=0.001) 

model.fit(X,Y) 
print('Coeff R2 =', model.score(X, Y)) 

plt.scatter(X[:,4], Y, marker='o')
plt.scatter(X[:,0], model.predict(X), c='red', marker='+') 






# Polynomial Regression

import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('polynomial_regression.csv')
X = dataset.iloc[:, 1:2].values
y = dataset.iloc[:, 2].values

# Fitting Polynomial Regression to the dataset
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=4)
X_poly = poly_reg.fit_transform(X)

from sklearn.linear_model import LinearRegression
poly_reg.fit(X_poly, y)
model = LinearRegression()
model.fit(X_poly, y)

# Visualising the Polynomial Regression results
plt.scatter(X, y, color='red')
plt.plot(X, model.predict(X_poly), color='blue')
plt.title('Polynomial Regression Example')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()
Exemplo n.º 60
-1
def analysis_7(df_Coredata):
	""" 多次元多項式モデル """

	#https://www.jeremyjordan.me/polynomial-regression/

	X = df_Coredata[['d','e','f','g','i']]
	y = df_Coredata['j']

	# グラフのスタイルを指定
	sns.set(style = 'whitegrid', context = 'notebook')
	# 変数のペアの関係をプロット
	#sns.pairplot(df_Coredata)
	#plt.show()


	#X_train, X_test, y_train, y_test  =  train_test_split(X,y,random_state = 0)
	#lr = linear_model.LinearRegression().fit(X_train, y_train)
	#print("Trainng set score: {:.2f}".format(lr.score(X_train, y_train)))
	#print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))

	### データのスケール変換
	# 標準化
	std_Scaler = StandardScaler()
	data_std = std_Scaler.fit_transform(X)

	mmx_Scaler =MinMaxScaler()
	X_scaled = mmx_Scaler.fit_transform(X)
	#X_test_scaled = scaler.transform(X_test)

	#print(X_train_scaled)

	poly = PolynomialFeatures(degree = 2).fit(data_std)
	print(poly.get_feature_names())