예제 #1
0
import turicreate as tc
data = tc.SFrame.read_csv('mangabox/manga_fav.csv')
data.explore()
train, test = tc.recommender.util.random_split_by_user(data, 'uid', 'manga_id')

print("#### train #####")
m = tc.recommender.create(train, user_id='uid', item_id='manga_id')

print("#### eval #####")
res = m.evaluate(test)

pro = res['precision_recall_overall']
pro.print_rows(18, 3)
tc.show(pro['recall'], pro['precision'], 'recall', 'precision')

m.save('recommend.model')
import turicreate

#Importing Data
sales = turicreate.SFrame(
    'https://courses.cs.washington.edu/courses/cse416/18sp/notebooks/Philadelphia_Crime_Rate_noNA.csv'
)

# Exploring the data
#The house price in a town is correlated with the crime rate of that town. Low crime towns tend to be associated with higher house prices and vice versa.
turicreate.show(sales['CrimeRate'], sales['HousePrice'])

# Fit the regression model using crime as the feature
crime_model = turicreate.linear_regression.create(sales,
                                                  target='HousePrice',
                                                  features=['CrimeRate'],
                                                  validation_set=None,
                                                  verbose=False)

#Let's see what our fit looks like
import matplotlib.pylplot as plt
plt.plot(sales['CrimeRate'], sales['HousePrice'], '.', sales['CrimeRate'],
         crime_model.predict(sales), '-')
plt.show()

# Remove Center City and redo the analysis
#Center City is the one observation with an extremely high crime rate, yet house prices are not very low. This point does not follow the trend of the rest of the data very well. A question is how much including Center City is influencing our fit on the other datapoints. Let's remove this datapoint and see what happens.
sales_noCC = sales[sales['MilesPhila'] != 0.0]
turicreate.show(sales_noCC['CrimeRate'], sales_noCC['HousePrice'])

#Fitting a regression model for sales_noCC
crime_model_noCC = turicreatelinear_regression.create(sales_noCC,
import turicreate
sales = turicreate.SFrame('home_data.sframe')

#displaying imported data in graphical view
sales.show()
#displaying data in tabular view
print(sales[:])

#Plotting sqft_living(x) vs price(y)
turicreate.show(sales[1:5000]['sqft_living'],sales[1:5000]['price'])





##Simple Linear Regression model 
#Spliting data into training data and testing data
training_data,testing_data = sales.random_split(0.75,seed=0)

#Defining our linear regression model
sqft_model = turicreate.linear_regression.create(training_data,target='price',features='sqft_living')

#Evaluate the quality of model
print(testing_data['price'].mean())
print(sqft_model.evaluate(testing_data))
sqft_model.coefficients

#Exploring a little further
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(testing_data['sqft_living'],testing_data['price'],'.',
예제 #4
0
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
plt.plot(test_set['Age'], test_set['Purchased'], '.', test_set['Age'],
         model.predict(test_set), '-')

# In[19]:

my_features = ['Gender', 'EstimatedSalary']

# In[20]:

data[my_features].show()

# In[21]:

t.show(data['Gender'], data['Purchased'])

# In[22]:

t.show(data['EstimatedSalary'], data['Purchased'])

# In[23]:

my_features_model = t.linear_regression.create(training_set,
                                               target='Purchased',
                                               features=my_features)

# In[24]:

print(my_features)