Пример #1
0
def test_AnscomeQuartet():
    '''!
	RABE p25
	'''
    anscomeData = csv('../data/P025b.txt', sep='\t')
    Y1 = array(anscomeData['Y1'])
    X1 = array(anscomeData['X1'])
    Y2 = array(anscomeData['Y2'])
    X2 = array(anscomeData['X2'])
    Y3 = array(anscomeData['Y3'])
    X3 = array(anscomeData['X3'])
    Y4 = array(anscomeData['Y4'])
    X4 = array(anscomeData['X4'])

    lr1 = simpleLR(Y1, X1)
    lr2 = simpleLR(Y2, X2)
    lr3 = simpleLR(Y3, X3)
    lr4 = simpleLR(Y4, X4)

    cor1 = cor(Y1, X1)
    cor2 = cor(Y2, X2)
    cor3 = cor(Y3, X3)
    cor4 = cor(Y4, X4)

    assert (abs(cor1 - cor2) < 0.0005)
    assert (abs(cor1 - cor3) < 0.0005)
    assert (abs(cor1 - cor4) < 0.0005)
    assert (abs(lr1['beta0Hat'] - lr2['beta0Hat']) < 0.005)
    assert (abs(lr1['beta0Hat'] - lr3['beta0Hat']) < 0.005)
    assert (abs(lr1['beta0Hat'] - lr4['beta0Hat']) < 0.005)
Пример #2
0
def test_Parabola():
    '''!
	RABE p25
	'''
    parabolaData = csv('../data/P025a.txt', sep='\t')
    Y = parabolaData['Y']
    X = parabolaData['X']
    assert (cor(X, Y) == 0)
Пример #3
0
def load_titanic():
    """Load ames housing dataset.

    Returns
    -------
    data : DataFrame
        DataFrame containing the ames housing dataset.
    """
    module_path = dirname(__file__)
    return pd.csv(join(module_path, 'titanic.csv'))
Пример #4
0
def test_ComputerRepairData():
    '''!
	RABE p27-42
	NOTE: the correlation test here doesn't match RABE. I believe this
	to be an error in RABE, not in my code.
	'''
    computerRepairData = csv('../data/P027.txt', sep='\t')
    Y = array(computerRepairData['Minutes'])
    X = array(computerRepairData['Units'])

    assert (abs(bar(Y) - 97.21) < 0.005)
    assert (abs(bar(X) - 6) == 0)
    assert (abs(cov(Y, X) - 136) < 0.5)
    assert (abs(cor(Y, X) - 0.9936) < 0.0005)

    #beta_1^0 == 12, beta_1^0 == 12, so beta00 nees to be set, while
    #beta10 does not.
    lr = simpleLR(Y, X, criticalT=2.18)
    # assert( abs(lr['SSR']) )
    assert (abs(sum(X) - 84) < 0.5)
    assert (abs(sum(Y) - 1361) < 0.5)
    assert (abs(sum(Y - bar(Y)) < 1e-12))
    assert (abs(sum(X - bar(X)) < 1e-12))
    assert (abs(lr['SST'] - 27768.36) < 0.005)
    assert (abs(sum((X - bar(X))**2) - 114 < 0.5))
    assert (abs(sum((X - bar(X)) * (Y - bar(Y))) - 1768 < 0.005))
    assert (abs(lr['beta0Hat'] - 4.162) < 0.0005)
    assert (abs(lr['beta1Hat'] - 15.509) < 0.0005)
    assert (abs(lr['seBeta0Hat'] - 3.355) < 0.0005)
    assert (abs(lr['seBeta1Hat'] - 0.505) < 0.0005)
    assert (abs(lr['t0'] - 1.24) < 0.005)
    assert (abs(lr['t1'] - 30.71) < 0.005)
    assert (abs(lr['beta0HatPM'] - 2.18 * 3.355) < 0.0005)
    assert (abs(lr['beta1HatPM'] - 2.18 * 0.505) < 0.0005)
    assert (abs(lr['SSR'] / lr['SST'] - (1 - lr['SSE'] / lr['SST'])) < 1e-14)
    assert (abs(lr['SSR'] / lr['SST'] - cor(Y, X)**2) < 1e-14)
    assert (abs(lr['SSR'] / lr['SST'] - .987) < 0.0005)
    lrEstimate = simpleLREstimate(lr, 4, 0)

    assert (abs(lrEstimate['seY0Hat'] - 5.67) < 0.005)
    assert (abs(lrEstimate['seMu0Hat'] - 1.76) < 0.005)
Пример #5
0
for x in mycursor:
    print(x)

#######################################
# Usando sqlalchemy
#######################################
# pip install SQLAlchemy
# conda install -c anaconda sqlalchemy
import sqlalchemy

# conexion para sqlalchemy
# mysql+mysqldb://<user>:<password>@<host>[:<port>]/<dbname>
engine = sqlalchemy.create_engine('mysql+mysqlconnector://root:root@localhost[8889]/')

# leer el csv
data = pd.csv('file.txt') # checar nombres de columnas igual a la tabla en MySQL
# inserar en la tabla
data.to_sql('customers', con = engine)
#######################################


#######################################
# Ingresar la tabla directo a MySQL
#######################################
## SQL
#LOAD DATA LOCAL INFILE '/file.csv'
#INTO TABLE customers
#FIELDS TERMINATED BY ','
#LINES TERMINATED BY '\n'
#IGNORE 1 ROWS # header
#(name,address)
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load training data set from CSV file
training_data_df = pd.csv("sales_data_training.csv")

# Load testing data set from CSV file
test_data_df = pd.read_csv("sales_data_test.csv")

# Data needs to be scaled to a small range like 0 to 1 for the neural
# network to work well.
scaler = MinMaxScaler(feature_range=(0, 1))

# Scale both the training inputs and outputs
scaled_training = scaler.fit_transform(training_data_df)
scaled_testing = scaler.fit_transform(training_data_df)

# Print out the adjustment that the scaler applied to the total_earnings column of data
print(
    "Note: total_earnings values were scaled by multiplying by {:.10f} and adding {:.6f}"
    .format(scaler.scale_[8], scaler.min_[8]))

# Create new pandas DataFrame objects from the scaled data
scaled_training_df = pd.DataFrame(scaled_training,
                                  columns=training_data_df.columns.values)
scaled_testing_df = pd.DataFrame(scaled_testing,
                                 columns=test_data_df.columns.values)

# Save scaled data dataframes to new CSV files
scaled_training_df.to_csv("sales_data_training_scaled.csv", index=False)
scaled_testing_df.to_csv("sales_data_test_scaled.csv", index=False)
Пример #7
0
import os
import string
import pandas as pd
import numpy as np
from pandas import Series,DataFrame

os.chdir('f:/di____di/season_1/training_data/poi_data')
df=pd.read_table('poi_data_count',header=None,error_bad_lines=False)
df=df.fillna('nx')
re=np.zeros([66,26])
mx=20
for x in range(0,66):
	for y in range(1,24):
		s=df.ix[x].ix[y]
		if(s!='nx'):
			tp=int(s.split(':')[0])
			if tp>mx:
				mx=tp
			re[x][tp]=int(s.split(':')[1])

			
df=pd.csv('ddf1.csv')
Пример #8
0
@author: Arsene Gasana
"""

import pandas as pd
import os
from datetime import datetime
version = '1_0'

main_dir = (r'C:\Users\user\Desktop\Investigation data generation')
os.chdir(main_dir)
time = datetime.now()
datestring_for_file = '%04d%02d%02d%02d%02d%02d' % (
    time.year, time.month, time.day, time.hour, time.minute, time.second)

SCdata = pd.csv(
    r'C:\Users\user\Desktop\Investigation data generation\Season Clients Detailed_20200928-093633.csv'
)
VRdata = pd.csv(
    r'C:\Users\user\Desktop\Investigation data generation\Light_20200928-073312.csv'
)
SCdata.insert(0, 'UID',
              SCdata['DistrictName'] + '_' + SCdata['OAFID'].astype('str'))
VRdata.insert(0, 'UID',
              VRdata['District'] + '_' + VRdata['OAFID'].astype('str'))
ListOfSitesUID = [
    'Nyamagabe_Nkumbure B', 'Nyagatare_Mahoro', 'Gatsibo_Nyabisindu A'
]

for site in ListOfSitesUID:
    dir_name = '%s-%s-%s' % (site, version, datestring_for_file)
    os.chdir(main_dir)
if __name__ == "__main__":
    """
    python scripts/05_blend_predictions.py
    """

    # load test and leak
    test = load_data("test_clean")
    leak = load_data("is_leak")
    target = leak["meter_reading"].values

    # load predictions
    preds_matrix = [np.load(x) for x in glob.glob(f"{OUTPUT_PATH}/*.npy")]
    if len(glob.glob(f"{OUTPUT_PATH}/*.csv")) > 0:
        preds_matrix += [
            pd.csv(x).meter_reading.values
            for x in glob.glob(f"{OUTPUT_PATH}/*.csv")
        ]
    preds_matrix = np.vstack(preds_matrix).T
    preds_matrix[preds_matrix < 0] = 0

    # initialize data
    X_train = preds_matrix[~np.isnan(target)]
    y_train = target[~np.isnan(target)]

    # correct site 0
    correction_indices = (test.site_id[~np.isnan(target)]
                          == 0) & (test.meter[~np.isnan(target)] == 0)
    X_train[correction_indices] *= 0.2931
    y_train[correction_indices] *= 0.2931
def importCSV(file):
    data = csv(file, header=None)
    data = data.fillna(value=0).transpose().to_numpy()
    print(data.shape)
    return data
Пример #11
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pandas as pd

pd.csv()