Exemplo n.º 1
0
#pip install pydataset
from pydataset import data
data('iris')
data('mtcars')

#https://vincentarelbundock.github.io/Rdatasets/datasets.html
import statsmodels.api as sm
mtcars = sm.datasets.get_rdataset(dataname='mtcars', package= 'datasets')
mtcars.data.head()


#%%
#Load from Excel/ CSV and export to
data = mtcars.data
data.head(6)
type(data)
data.to_csv('mtcars.csv')
data.to_excel('mtcarsExcel.xlsx','sheet3', header=False)

#writing to multiple sheets
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet. you could write different string like above if you want
data.to_excel(writer, sheet_name='sheet1')
data.to_excel(writer, sheet_name='sheet2')
# Close the Pandas Excel writer and output the Excel file.
writer.save()

#%%
data.to_excel?
#load from CSV and Excel
Exemplo n.º 2
0
from sklearn.cluster import KMeans
Kmean2 = KMeans(n_clusters=2)
Kmean2.fit(X2)
centers2 = Kmean2.cluster_centers_
centers2
Kmean2.labels_

plt.scatter(X2[:, 0], X2[:, 1], s=50, c=Kmean2.labels_)
plt.scatter(centers2[:, 0], centers2[:, 1], s=100, marker='*', color=['red'])
plt.show()

from pydataset import data
iris = data('iris')
data = iris.copy()
data.head()
#how many groups
data.Species.value_counts()
data.columns
X3 = data[['Sepal.Length', 'Sepal.Width']]
X3
y3 = data.Species.values
y3
X3.shape

plt.scatter(X3['Sepal.Length'], X3['Sepal.Width'], s=50)
#group them into 3 categories
irisCluster = KMeans(n_clusters=3)
irisCluster.fit(X3)
irisCenters = irisCluster.cluster_centers_
irisCenters