Пример #1
0
    def dbReport(self, input_db, inputValues, taxon=9606, output="raw"):
        """Returns report

        :param output: returns dataframe if set to dataframe

        ::

            s.dbReport("Ensembl Gene ID", ['ENSG00000121410', 'ENSG00000171428'])


        """
        inputValues = self._interpret_input_db(inputValues)

        params = self.suds.factory.create('dbReportParams')
        params.input = input_db
        params.taxonId = str(taxon)  # do we need the cast ?
        params.inputValues = inputValues

        res = self.serv.dbReport(params)
        if output == "dataframe":
            try:
                import pandas as pd
            except:
                print(
                    "Pandas library is not installed. dataframe are not  available"
                )
            df = pd.readcsv(io.StringIO(res.strip()), sep="\t")
            return df
        else:
            return res
Пример #2
0
    def dbReport(self, input_db, inputValues, taxon=9606, output="raw"):
        """Returns report

        :param output: returns dataframe if set to dataframe

        ::

            s.dbReport("Ensembl Gene ID", ['ENSG00000121410', 'ENSG00000171428'])


        """
        inputValues = self._interpret_input_db(inputValues)

        params = self.suds.factory.create('dbReportParams')
        params.input = input_db
        params.taxonId = str(taxon) # do we need the cast ?
        params.inputValues = inputValues

        res = self.serv.dbReport(params)
        if output == "dataframe":
            try:
                import pandas as pd
            except:
                print("Pandas library is not installed. dataframe are not  available")
            df = pd.readcsv(io.StringIO(res.strip()), sep="\t")
            return df
        else:
            return res
Пример #3
0
def get_top_votos(cargo, indice):
    import pandas as pd

    dados = pd.readcsv('dados/dados.csv')
    dados = dados.loc[dados['CARGO'] == cargo]
    dados = dados.sort_values('QTD_VOTOS', ascending=False).head(indice)

    return dados
def import_db():
    conn = sqlite3.connect("studentdb.db")
    c = conn.cursor()
    safety = input("WARNING, before you do this ensure you have made a copy. Do you wish to proceed? y/n: ")
    if safety != "y" and safety != "Y":
        print ("Response recieved, aborting operation")
        return
    file_name = input("Please specify a file path with valid extension: ")
    db_vals_csv = pandas.readcsv(file_name)
    db_vals_csv.to_sql("Students", conn, if_exists="append", index = False)
    conn.commit()
    conn.close()
Пример #5
0
def paradigm_info(design_text_file):
    """Create the inputs for first- (or second-) level analyses based on a csv design file.
    The expected column titles for the csv are 'name' for the specific task (e.g., 'fp_run1'),'conditions' (e.g.,'highCal'), 'onsets',and 'durations'"""
    from nipype.interfaces.base import Bunch
    import pandas as pd
    try:
        df = pd.DataFrame(pd.readcsv(design_text_file))
        conditions = []
        onsets = []
        durations = []

        for task in sorted(set(df['name'])):
            df = df.loc[df['name'] ==
                        task, :]  # limiting to the applicable data
            conditions.append(df['conditions'])
            onsets.append(df['onsets'])
            durations.append(df['durations'])
        par_info = Bunch([conditions], [onsets], [durations])

    except:
        # TODO test to see what types of errors may be common (lists in the durations, no column name, etc.)
        print('Error in paradigm info.')

    return par_info
Пример #6
0
#!/usr/bin/env python
# coding: utf-8

# In[25]:


import numpy as np
import scipy as ss
from collections import Counter
import math
from scipy import stats
import pandas as pd
data=pd.read_csv("")
#print(data.columns)

traindf=pd.readcsv('')
y=traindf['Overall']>=87
x=traindf.copy()
del x['Overall']
feature_name=list(x.columns)
num_feats=30
print(feature_name)
def cor_selector(x,y,num_feats):
    cor_list=[]
    feature_name=x.columns.tolist()
    for i in x.columns.tolist():
        cor=np.corrcoef(x[i],y)[0,1]
        cor_list.append(cor)
        cor_list=[0 if np.isnan(i) else i for i in cor_list]
        cor_feature=x.iloc[:,np.argsort(np.abs(cor_list))[-num_feats:]].columns.tolist()
        cor_support=[True if i in cor_feature else False for i in feature_name]
Пример #7
0
#importing important libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn.modelselection as sl
from sklearn import linearmodel
from sklearn.metrics import meansquarederror, r2score

#loading dataset and storing in a variable named dataset
dataset = pd.readcsv('Climate.csv')

#Creating primary array of temperature and rainfall
Temperature = dataset[['MeanTemperatureMinimum']]
Rainfall = dataset[['MeanRainfallMM']]

#Splitting the dataset
Temptrain, Temptest, Raintrain, Raintest = sl.traintestsplit(Temperature,
                                                             Rainfall,
                                                             testsize=0.2,
                                                             shuffle=True)

#Creating regression object
regr = linearmodel.LinearRegression(fitintercept=True, normalize=False)

#Training the model
regr.fit(Temptrain, Raintrain)

#Using the model to predict values from Test Dataset
Rainpred = regr.predict(Temptest)

#Printing mean square error and r2 score
Пример #8
0
import pandas as pd
from pandas.tseries.resample import TimeGrouper
from pandas.tseries.offsets import DateOffset
flyers = pd.readcsv('/Users/ramintaghizada/TwitterMining/Flask/myproject/test.csv')
flyers['createdat'] = pd.todatetime(pd.Series(flyers['createdat']))
flyers.setindex('createdat', drop=False, inplace=True)
flyers.index = flyers.index.tzlocalize('GMT').tzconvert('EST')
flyers.index = flyers.index - DateOffset(hours = 12)
flyers.index
Пример #9
0
#importing important libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sklearn.modelselection as sl
from sklearn.linearmodel import LogisticRegression
import seaborn as sns
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

employment = {'Salaried': 0, 'Self employed': 1, ' ': 1}

#importing dataset
dataset = pd.readcsv('CreditCardData.csv')
dataset = dataset.dropna()

#creating dummy variable for column EmploymentType
dataset.EmploymentType = [
    employment[number] for number in dataset.EmploymentType
]
loanDefaulter = dataset[['loandefault']]
factors = dataset[[
    'disbursedamount', 'assetcost', 'EmploymentType', 'PRI.CURRENT.BALANCE',
    'PRI.SANCTIONED.AMOUNT', 'PRIMARY.INSTAL.AMT'
]]

#creating training and testing set and scaling the data
factorsTrain, factorsTest, loanTrain, loanTest = sl.traintestsplit(
    factors, loanDefaulter, testsize=0.2, shuffle=True)
scaler = StandardScaler()
factorsTrain = scaler.fittransform(factorsTrain)
Пример #10
0
from sklearn.preprocessing import OneHotEncoder
one=OneHotEncoder(categorical_features=[0,4,5,6])
X=one.fit_transform(X)
X=X.toarray()

#Scaling now the feature matrix
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X=sc.fit_transform(X)

from sklearn.tree import DecisionTreeClassifier
dtf=DecisionTreeClassifier(max_depth=25)#incresase precission
dtf.fit(X,y)
dtf.score(X,y)

test=pd.readcsv("test.csv")
X_test=test.iloc[:,[1,2,5,6,7,8,10]].values #feature matrix 

y_test=dtf.predict(X_test)

from sklearn.metrics import precision_score,recall_score,f1_score
precision_score(y_test,y_pred)
recall_score(y_test,y_pred)
f1_score(y_test,y_pred)






Пример #11
0
def Recognize(fname):
    pd.readcsv(fname)
Пример #12
0
    in the shape of ["State","RegionName"].'''
           
    states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 
          'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 
          'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 
          'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 
          'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 
          'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 
          'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 
          'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 
          'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 
          'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 
          'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 
          'ND': 'North Dakota', 'VA': 'Virginia'}
           
    quarterly_data = (pd.readcsv('City_Zhvi_AllHomes.csv').drop(['RegionID','Metro','CountyName','SizeRank'],axis=1)
                        .sort_values(['State','RegionName'],ascending=[True,True]))
    quarterly_data['State'] = quarterly_data['State'].map(states)
    quarterly_data.set_index(['State','RegionName'],inplace=True)
    quarterly_data.drop(quarterly_data.columns[0:45],axis=1,inplace=True)                 
    quarterly_data = quarterly_data.groupby(pd.PeriodIndex(quarterly_data.columns,freq='Q'),axis=1).mean()
    print(quarterly_data)
    
convert_quarterly_data()
          
def run_ttest():
    '''First creates new data showing the decline or growth of housing prices
    between the recession start and the recession bottom. Then runs a ttest
    comparing the university town values to the non-university towns values, 
    return whether the alternative hypothesis (that the two groups are the same)
    is true or not as well as the p-value of the confidence. '''
        data.append(cur_feature)

    data = pd.concat(data, axis=0)

    return data


# In[ ]:

## test

if __name__ == '__main__':
    gt_path = './data/gt.csv'
    doc_folder = '.data/docs/'

    table_gt = pd.readcsv(gt_path)
    pdfnames = table_gt['filename'].unique()
    # table_gt is the grouna truth table
    # cols: filename, region_id, table_id, page_n, x1, y1, x2, y2, cor_x1, cor_y1, cor_x2, cor_y2

    data = {}
    for i in range(len(pdfnames)):
        pdfname = pdfnames[i]
        data[pdfname] = get_table_feature(table_gt, doc_folder, pdfname)
    # the format of data[pdfname] is a dataframe, with cols of:
    # [row,top,bottom,to_pre,to_next,row_ele,row_min,row_max,match_pre,match_next,top_to_line,bottom_to_line,spaceratio,label]

# In[ ]:
'''
## to build models on these feature, just do:
X = data[pdfname].iloc[:, 1:-1]
Пример #14
0
def load_csv():
    return pandas.readcsv(CSV_FILE_LOCATION)
import numpy as np
import scipy
import pandas as pd
import sklearn as sk

import matplotlib as plt

# Set some constant values that will never change

avg_work_hrs_yr = 40 *	52
#TODO set function for hours in order to account for non-standard work weeks


# set initial hours
# needs to be based on time
hours = pd.readcsv('work_hours.csv')
hr_per_week = pd.readcsv('work_week.csv')

# set hourly rate
rates = pd.readcsv('hourly_rate.csv')

deduction = pd.readcsv('deductions.csv')

# deductions block from other budget file
# DEDUCTION CALCULATION LEADING TO USEABLE INCOME
'''
def deduction_calc():
    if 0.00 < total_gross_income <= 9525.99:
        ded_taxes = (total_gross_income - deductions()) * 0.10
        ded_taxes = round(ded_taxes, 2)
        return ded_taxes
Пример #16
0
import pandas as pd

article = pd.readcsv('G:\news_article.csv')
Пример #17
0
import pandas as pd

dataframe = pd.readcsv("01110.csv")

datafram.head()
def holi(to_mins, route):
    if (to_mins >= 480 and to_mins < 540):
        traffic = pd.read_csv('sloth1.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 540 and to_mins < 600):
        traffic = pd.read_csv('sloth2.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 600 and to_mins < 660):
        traffic = pd.read_csv('sloth3.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 660 and to_mins < 720):
        traffic = pd.readcsv('sloth4.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 720 and to_mins < 780):
        traffic = pd.readcsv('sloth5.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 780 and to_mins < 840):
        traffic = pd.readcsv('sloth6.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 840 and to_mins < 900):
        traffic = pd.readcsv('sloth7.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 900 and to_mins < 960):
        traffic = pd.readcsv('sloth8.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 960 and to_mins < 1020):
        traffic = pd.readcsv('sloth9.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 1020 and to_mins < 1080):
        traffic = pd.readcsv('sloth10.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 1080 and to_mins < 1140):
        traffic = pd.read_csv('sloth11.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 1140 and to_mins < 1200):
        traffic = pd.read_csv('sloth12.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)
    elif (to_mins >= 1200 and to_mins < 1260):
        traffic = pd.read_csv('sloth13.csv')
        X = traffic['to_minutes'].values
        X = X.reshape(-1, 1)

    if (route == 1):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)

#print (str(int(ans))+" minutes")

    elif (route == 2):
        Y = traffic['car1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)

        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by car")
        ans1 = reg.predict(to_mins)

        #print (str(int(ans))+" minutes")
        Y = traffic['bus1'].values

        Y = Y.reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            random_state=42)
        reg.fit(X_train, y_train)
        y_pred = reg.predict(X_test)
        #print("Predicted Time by bus")
        ans2 = reg.predict(to_mins)
Пример #19
0
def populateFileArray(nameFile):
    df = pandas.readcsv(nameFile)
Пример #20
0
import pandas

df = pandas.readcsv('excsv.csv')
df.to_csv(excsv1.csv)
Пример #21
0
import pandas as pd

for patient in range(0,10):
    data1 = pd.readcsv("Data/Patient", patient, "2019-01-15_TimeSeries_Muse-EEG_0.csv")
    print("Data/Patient" + str(patient) + "/2019-01-15_TimeSeries_Muse-EEG_0.csv")
    AllData[i] = data1
    AllData[]
Пример #22
0
#plots
plt.figure()
sb.lineplot(Average.index, Average.Houses)

plt.figure()
sb.lineplot(Aheating.index.hour, Aheating)
sb.lineplot(Aheating[Aheating.index.month == 8].index.hour,
            Aheating[Aheating.index.month == 8])

plt.figure()
sb.lineplot(Hheating.index.hour, Hheating)
sb.lineplot(
    Hheating[Hheating.index.month == 6 and Hheating.index.month == 7
             and Hheating.index.month == 8].index.hour,
    Hheating[Hheating.index.month == 8])

#save datframe to json
k = df.iloc[:50, :5].to_json(orient='index')
import json
with open('dataframe.json', 'w') as f:
    json.dump(k, f)
#read json
f = open('dataframe.json')
data = json.load(f)
f.close()

data = pd.read_json('dataframe.json', orient='index')

pd.readcsv('Fle')
Пример #23
0
import pandas as pd

#imports data file
mpg = pd.readcsv("jp-us-mpg.dat", delim_whitespace = True)
mpg.head()
mpg.tail()


from nupy import mean

#Drops missing and finds mean
mean(mpg["Japan"].dropna())
mean(mpg["US"].drop.na())

from numpy import var
us = mpg["US"].dropna()
jp = mpg["Japan"].dropna()
jp_var = var(jp) (len(jp) / float(len(jp) - 1)
us_var = var(us) (len(us) / float(len(jp) - 1) 


summation : (xsubi - mean)^2 / (n-1)
(20.1 - 30.5) / (sqrt((41.1/149) + (37.3/79))) = 12/94

P avelue
#Two sided ptest
2* (1.0  - t.cdf(abs(12.946), 136.8750))
Пример #24
0
import pandas

df = pandas.readcsv('infor.csv')
df.to_csv(infor1.csv)
Пример #25
0
    plt.show()
    return


def writePCADataToCSV(csvData):
    with open('PCAData.csv', 'w') as writeFile:
        writeFile.write("X-Values" + "," + "Y-Values")
        writeFile.write("\n")
        for entries in csvData:
            writeFile.write("" + str(entries[0]) + "," + str(entries[1]) + "")
            writeFile.write("\n")
    writeFile.close()


if __name__ == '__main__':
    originalMatrix = pd.readcsv()
    # to calculate the eigen values and eigen vectors
    covar_matrix = PCA(n_components=2)

    # We perform data preprocessing using StandardScaler()
    originalMatrix = StandardScaler().fit_transform(originalMatrix)
    principalComponents = covar_matrix.fit_transform(originalMatrix)
    principalDf = pd.DataFrame(
        data=principalComponents,
        columns=['principal component 1', 'principal component 2'])

    x = principalComponents[:, 0]
    y = principalComponents[:, 1]

    # performing the visualization
    scatterPlotVisualizer(x, y)
Пример #26
0
import pandas as pd
import numpy as np
import matplotlib as plt

countries = pd.readcsv("countries.csv")
currencies = pd.readcsv("currencies.csv")
 def print_lat_lan(filename, delay):
     geocordinates = pd.readcsv("filename")
     for i in geocordinates:
         time.sleep(delay)
         print("%s: %s" % ({"Eid": (geocordinates[0], geocordinates[1])}))