Пример #1
def main():
    '''The data in this example give the life talbe for motion sickness data
    from an experiment with vertical movement at a frequency of 0.167 Hz and
    acceleration 0.111 g, and of a second experiment with 0.333 Hz and acceleration
    of 0.222 g.

    # get the data
    data1 = getData('altman_13_2.txt', subDir='..\Data\data_altman')
    data2 = getData('altman_13_3.txt', subDir='..\Data\data_altman')

    # Determine the Kaplan-Meier curves
    (p1, r1, t1, sp1, se1) = kaplanmeier(data1)
    (p2, r2, t2, sp2, se2) = kaplanmeier(data2)

    # Make a combined plot for both datasets
    plt.step(t1, sp1, where='post')
    plt.step(t2, sp2, 'r', where='post')

    plt.legend(['Data1', 'Data2'])
    plt.ylim(0, 1)
    plt.ylabel('Survival Probability')

    # Check the hypothesis that the two survival curves are the same
    # --- >>> START stats <<< ---
    (p, X2) = logrank(data1, data2)
    # --- >>> STOP stats <<< ---

    return p  # supposed to be 0.073326322306832212
    return prob # should be 0.018137235176105802
Пример #56
from tradestats import tradestats
from plot import plot_net_value
from configue import M, T
import pandas as pd

# set the display parameters for pandas DataFrame
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# set some basic parameters
init_capital = 3000
quantity = 1
fee_rate = 0.0001

# do the backtest
data_1m = getData()
signaltrade_result = signaltrade(data_1m, 0.003, M, T, quantity, fee_rate)
orderbook = signaltrade_result[1]
tradedate = signaltrade_result[2]
# get detailed backtest performance
stats = tradestats(orderbook, init_capital, tradedate)

# save backtest performance into excel file
writer = pd.ExcelWriter('backtest_result.xlsx')
stats.to_excel(writer, 'stats', index=False)
orderbook.to_excel(writer, 'orderbook', index=False)


# plot the net value figure
start_time = signaltrade_result[0][tradedate[0]].loc[0, 'time']
Пример #57
    chi2 = (O1 - E1) ** 2 / V
    p = stats.chi2.sf(chi2, 1)

    print("X^2 = {0}".format(chi2))
    if p < 0.05:
        print("p={0}, the two survival curves are signifcantly different.".format(p))
        print("p={0}, the two survival curves are not signifcantly different.".format(p))

    return (p, chi2)

if __name__ == "__main__":
    # get the data
    data1 = getData("altman_13_2.txt")
    data2 = getData("altman_13_3.txt")

    # Determine the Kaplan-Meier curves
    (p1, r1, t1, sp1, se1) = kaplanmeier(data1)
    (p2, r2, t2, sp2, se2) = kaplanmeier(data2)

    # Make a combined plot for both datasets
    plt.step(t1, sp1, where="post")
    plt.step(t2, sp2, "r", where="post")

    plt.legend(["Data1", "Data2"])
    plt.ylim(0, 1)
    plt.ylabel("Survival Probability")
Пример #58
def main():
    """Main function for sf-crime machine learning
    From training data try to predict the category of crime
    given the date and location.

    again = True

    while again:
        p = float(raw_input('Percent of data to train on: '))
        ran = raw_input('Shuffle data?(y/n) ')
        if ran == 'y' or ran == 'Y':
            ran = True
            ran = False

        # setup matrices from train.csv file
        out = getData('train.csv', perc=p, rand=ran)
        X = np.array(out['X'])
        Y = out['Y']
        X_test = np.array(out['X_test'])
        Y_test = out['Y_test']
        crimes = out['crimes']

        # calculate mean and standard deviation
        mu = np.mean(X)
        sigma = np.std(X)
        X = normalize(X, mu, sigma)
        X_test = normalize(X_test, mu, sigma)
        # get dimensions of matrices 
        m = len(X)
        n = len(X[0])
        k = len(Y[0])
        k_h = (n + k) // 2
        print 'Dimensions: m =', m, 'n =', n, 'k =', k, 'k_h =', k_h

        # randomly initialize Theta
        epsilon = 0.15
        Theta1 = np.random.rand(n, k_h)
        Theta1 = Theta1 * 2 * epsilon - epsilon
        Theta2 = np.random.rand(k_h, k)
        Theta2 = Theta2 * 2 * epsilon - epsilon
        one = np.ones(k_h)
        one = np.reshape(one, (1, k_h))
        Theta1 = np.concatenate((one, Theta1), axis=0)
        one = np.ones(k)
        one = np.reshape(one, (1, k))
        Theta2 = np.concatenate((one, Theta2), axis=0)
        Theta1 = np.ndarray.flatten(Theta1)
        Theta2 = np.ndarray.flatten(Theta2)
        Theta = np.append(Theta1, Theta2)

        # minimize costFunction of Theta
        new_lam = True
        while new_lam:
            lam = float(raw_input('Enter lambda: '))
            xopt = fmin_bfgs(costFunction, Theta, 
                             fprime=gradient, args=(X,Y,lam)
            Theta1 = np.reshape(xopt[0:(n+1)*k_h], (n + 1, k_h))
            Theta2 = np.reshape(xopt[(n+1)*k_h:], (k_h + 1, k))

            # accuracy against training set
            m = len(X)
            one = np.ones(m)
            one = np.reshape(one, (m, 1))
            a1 = np.concatenate((one, X), axis=1)
            a2 = sigmoid(np.dot(a1, Theta1))
            a2 = np.concatenate((one, a2), axis=1)
            test = sigmoid(np.dot(a2, Theta2))
            correct = 0
            for i in range(len(test)):
                j = np.argmax(test[i])
                if j == np.argmax(Y[i]):
                    correct += 1
            print 'Training set accuracy =', 100.0 * correct / len(test)

            # if there is a test matrix test accuracy of Theta
            if len(X_test) > 0:
                m = len(X_test)
                one = np.ones(m)
                one = np.reshape(one, (m, 1))
                a1 = np.concatenate((one, X_test), axis=1)
                a2 = sigmoid(np.dot(a1, Theta1))
                a2 = np.concatenate((one, a2), axis=1)
                test = sigmoid(np.dot(a2, Theta2))
                correct = 0
                for i in range(len(test)):
                    j = np.argmax(test[i])
                    if j == np.argmax(Y_test[i]):
                        correct += 1
                print 'Test set accuracy =', 100.0 * correct / len(test)
            new_lam = raw_input('Different lambda?(y/n) ')
            if new_lam == 'y' or new_lam == 'Y':
                new_lam = True
                new_lam = False

        sub = raw_input('Create submission file?(y/n) ')
        if sub == 'y' or sub == 'Y':
            # create predictions for kaggle test data set
            out = getData('test.csv', perc=1.0, test=True)
            X_test = out['X']
            X_test = normalize(X_test, mu, sigma)
            m = len(X_test)
            one = np.ones(m)
            one = np.reshape(one, (m, 1))
            a1 = np.concatenate((one, X_test), axis=1)
            a2 = sigmoid(np.dot(a1, Theta1))
            a2 = np.concatenate((one, a2), axis=1)
            ans = sigmoid(np.dot(a2, Theta2))

            # write to submission csv file
            sub_file = raw_input('Enter submission file name: ')
            f = open(sub_file, 'w')
            header ='Id'
            for c in crimes:
                header += ',' + c
            f.write(header + '\n')
            for i in range(len(ans)):
                f.write(str(i) + ',' + ','.join(map(str, ans[i])) + '\n')

        again = raw_input('Run again? (y/n) ')
        if again == 'y' or again == 'Y':
            again = True
            again = False
Пример #59
 def test_getdata(self):
     data = getData('altman_93.txt')
     self.assertEqual(data[0][0], 5260)