Пример #1
0
if __name__ == '__main__':
	with open('data/MasterApartmentData.json') as f:
	    my_dict = json.load(f)
	dframe = DataFrame(my_dict)

	dframe = dframe.T

	dframe = dframe[['content', 'laundry',  'price', 'dog', 'bed', 
	'bath', 'feet', 'long', 'parking', 'lat', 'smoking', 'getphotos', 
	'cat', 'hasmap', 'wheelchair', 'housingtype']]

	#save number of data points that we started with
	og = dframe.shape[0]

	#replace shared bathrooms with "half" of a bathroom
	dframe.bath = dframe.bath.replace('shared',0.5)
	dframe.bath = dframe.bath.replace('split',0.5)

	# We only want to model 
	dframe = dframe[dframe.lat > 45.4][dframe.lat < 45.6][dframe.long < -122.0][dframe.long > -123.5]

	aftermap = dframe.shape[0]

	print og - aftermap

	data = [[dframe['lat'][i],dframe['long'][i]] for i in dframe.index]

	from sklearn.cluster import KMeans
	km = KMeans(n_clusters=40)
	km.fit(data)
	neighborhoods = km.cluster_centers_