@author: Madhu """ # Import libraries necessary for this project import sklearn import numpy as np from matplotlib import pyplot as plt print "-----------------------------------------------------------------------" print('The scikit-learn version is {}.'.format(sklearn.__version__)) #get the working directory and filename path = r'C:\Users\pmspr\Documents\HS\MS\Sem 2\EECS 738\Lab\2\Work\Code\Data' #load data using load class and print describe of data from projectFunctions import loadData filename = "forestfires.csv" data = loadData(path, filename) ##explore the data from projectFunctions import exploreData exploreData(data) # Success - Display the first record if data is not None: display(data.head(n=1)) print data.describe(include='all') drop_col = ['X', 'Y', 'rain', 'area'] features_raw = data.drop(drop_col, axis=1) target_raw = data['area'] if features_raw is not None: display(features_raw.head(n=1))
# -*- coding: utf-8 -*- """ Created on Sun May 3 13:54:50 2020 @author: pmspr """ #Step 0: Import relevant packages #Step 1: Load imdb database from projectFunctions import loadData x_train, y_train, x_test, y_test = loadData() #Step 2: Pad train and test data from projectFunctions import padInput x_train, x_test = padInput(x_train, x_test) #Step 3: Create a 1D CNN for baseline from projectFunctions import cnn11D cnn11D(x_train, x_test, y_train, y_test)
#import networkx as nx import numpy as np import pandas as pd from pandas import compat compat.PY3 = True print( "-----------------------------------------------------------------------") print('The scikit-learn version is {}.'.format(sklearn.__version__)) #load functions from from projectFunctions import loadData, sentimentPolarity, exploreData, missingValues path = r'C:\Users\pmspr\Documents\HS\MS\Sem 3\EECS 731\Week 5\HW\Git\EECS-731-Project-3\Data' filename = "links.csv" data_l = loadData(path, filename) path = r'C:\Users\pmspr\Documents\HS\MS\Sem 3\EECS 731\Week 5\HW\Git\EECS-731-Project-3\Data' filename = "movies.csv" data_m = loadData(path, filename) genres = [ 'Action', 'Adventure', 'Animation', 'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western' ] d1 = pd.DataFrame(columns=['movieId', 'title', 'genre']) for ind, row in data_m.iterrows(): gstr = row['genres'] glst = gstr.split("|") cnt = 0
#for fn in uploaded.keys(): # print('User uploaded file "{name}" with length {length} bytes'.format( # name=fn, length=len(uploaded[fn]))) # In[2]: #!ls # ### **Load data** # # In the next cell we create a variable with the path of the training and test file. We use the *loadData ()* function that loads the data with the specified input format and sorts these by user and date. # In[2]: # Load datasets df_train = pF.loadData("train.txt") df_train.head() df_test = pF.loadData("test.txt") # In[3]: df_train.shape # In[4]: n_users = df_train.ID_Customer.unique().shape[0] n_items = df_train['Cod_Prod'].unique().shape[0] print(str(n_users) + ' users') print(str(n_items) + ' products')
import seaborn as sns sns.set() from matplotlib import pyplot as plt from pandas import compat compat.PY3 = True print( "-----------------------------------------------------------------------") print('The scikit-learn version is {}.'.format(sklearn.__version__)) #load functions from from projectFunctions import loadData, exploreData, missingValues, transformData path = r'C:\Users\pmspr\Documents\HS\MS\Sem 3\EECS 731\Week 6\HW\Git\EECS-731-Project-4\Data' filename = "nfl_games.csv" data_raw = loadData(path, filename) data = data_raw.drop(['neutral', 'playoff', 'date', 'season', 'result1'], axis=1) #data.rename(columns={'elo_prob1':'target'},inplace=True) teams = data['team1'].unique() data_ct = data.groupby(['team1', 'team2'], as_index=False).count() data_ct = pd.DataFrame(data_ct, columns=['team1', 'team2', 'score1']) data_ct.rename(columns={'score1': 'Count'}, inplace=True) data = pd.merge(data, data_ct, on=['team1', 'team2'], how='inner') #Check the missing values misVal, mis_val_table_ren_columns = missingValues(data) print(mis_val_table_ren_columns.head(20)) t1 = pd.DataFrame(data, columns=['team1', 'score1'])