def mall_cluster(): data = db.get_db_url(comm='SELECT * FROM customers', database='mall_customers') data['gender'] = data['gender'].apply(lambda x: 0 if x == 'Female' else 1) print(data) x = data[['annual_income', 'spending_score']] kmeans = KMeans(n_clusters=5) kmeans.fit(x) print(KMeans(n_cluster=5).fit(data[['age']]).cluster_centers_) print(kmeans.cluster_centers_) fig = plt.figure(figsize=(12, 9)) ax = Axes3D(fig) centers = pd.DataFrame(kmeans.cluster_centers_, columns=x.columns) ax.scatter(data.age, data.annual_income, data.spending_score, c=kmeans.labels_) ax.scatter(centers.age, centers.annual_income, centers.spending_score, c='pink', s=10000, alpha=.4) ax.set(xlabel='age', ylabel='annual_income', zlabel='spending_score') plt.show()
def wrangle_telco(): cust = get_db_url(comm = """SELECT customer_id, monthly_charges, tenure, total_charges FROM customers WHERE contract_type_id = 3 ORDER BY total_charges DESC;""", \ database = 'telco_churn') cust['total_charges'].apply(lambda x: x.strip()) cust['total_charges'] = cust['total_charges'].apply( lambda x: float(x) if x[0].isdigit() else 0) return telco
from dbtools import get_db_url import env import MySQLdb def get_db_url(): db=MySQLdb.connect(host='157.230.209.171', user = env.user, \ passwd = env.password, db=database) return psql.read_sql(comm, con=db) cust = get_db_url(comm = """SELECT customer_id, monthly_charges, tenure, total_charges FROM customers WHERE contract_type_id = 3 ORDER BY total_charges DESC;""", \ database = 'telco_churn') def wrangle_telco(): cust = get_db_url(comm = """SELECT customer_id, monthly_charges, tenure, total_charges FROM customers WHERE contract_type_id = 3 ORDER BY total_charges DESC;""", \ database = 'telco_churn') cust['total_charges'].apply(lambda x: x.strip()) cust['total_charges'] = cust['total_charges'].apply( lambda x: float(x) if x[0].isdigit() else 0) return telco
def get_iris_data(command="""SELECT measurement_id, sepal_length, sepal_width ,petal_length, petal_width, species.species_name FROM measurements JOIN species USING(species_id);""", database='iris_db'): return db.get_db_url(comm=command, database=database)
def get_titanic_data(command='SELECT * FROM passengers', database='titanic_db'): return db.get_db_url(comm=command, database=database)
return db.get_db_url(comm=command, database=database) def get_iris_data(command="""SELECT measurement_id, sepal_length, sepal_width ,petal_length, petal_width, species.species_name FROM measurements JOIN species USING(species_id);""", database='iris_db'): return db.get_db_url(comm=command, database=database) a = input('<<<<<>>>>>') #error_me = me_error df_iris = pd.DataFrame(db.get_db_url('SELECT * FROM measurements', \ database = 'iris_db')) print(df_iris.head(3)) print(df_iris.shape) print(df_iris.columns) print(df_iris.info()) print(df_iris.describe()) ints = df_iris.select_dtypes(include=['int64', 'float64']) for i in ints: print(i) print(str(df_iris[i].max() - df_iris[i].min())) print('<<<<<>>>>>') df_excel = pd.DataFrame(pd.read_excel('mytable_customer_details.xlsx')) df_excel_sample = df_excel.head(100)
return_data['num_cols_missing'] = data.isnull().sum(axis=1) return_data['pct_cols_missing'] = (data.isnull().sum(axis=1)) / len(data) return_data['num_rows'] = '!' return return_data zillow_data = db.get_db_url(comm="""Select * From properties_2017 Join (SELECT p_17.parcelid, logerror, transactiondate FROM predictions_2017 p_17 JOIN (SELECT parcelid, Max(transactiondate) as tdate FROM predictions_2017 Group By parcelid )as sq1 ON (sq1.parcelid=p_17.parcelid and sq1.tdate = p_17.transactiondate )) sq2 USING (parcelid) WHERE (latitude IS NOT NULL AND longitude IS NOT NULL) AND properties_2017.propertylandusetypeid NOT IN (31, 47,246, 247, 248, 267, 290, 291) LIMIT 10000;""", database='zillow') print(zillow_data) print('---------------|DATABASE_INFO|---------------') print(zillow_data.info()) print(zillow_data.describe()) print(zillow_data.shape)