def main(): X_train, y_train = load_dataset('mnist_train.csv') X_test, y_test = load_dataset('mnist_test.csv') # feature selection fs = select_features(X_train, y_train) selected_features = [] for i in range(len(fs.scores_)): #if fs.scores_[i] > 3000: #if fs.scores_[i] > 1500: #if fs.scores_[i] > 2500: if fs.scores_[i] > 1500: selected_features.append(i + 1) selected_feature_size = len(selected_features) print("selected_features : size (" + str(selected_feature_size) + ")") pyplot.bar([i for i in range(len(fs.scores_))], fs.scores_) pyplot.show() pyplot.close() #### # load the data-set with selected feature X_imp3, y3 = load_dataset_columns('mnist_train.csv', columns = selected_features) X_test, y_test = load_dataset_columns('mnist_test.csv', columns=selected_features) ds_X = np.array(X_imp3) ds_y = np.array(y3) plot_eigens(ds_X) df = pd.DataFrame(ds_X) df["num_cat"] = ds_y finalDf = plot_with_pca(ds_X, df, True) finalDf["num_cat"] = ds_y ds_X_test = np.array(X_test) ds_y_test = np.array(y_test) plot_eigens(ds_X_test) df_test = pd.DataFrame(ds_X_test) df_test["num_cat"] = ds_y_test finalDf_test = plot_with_pca(ds_X_test, df_test) finalDf_test["num_cat"] = ds_y_test classifier_with_pca(finalDf, finalDf_test)
def plot_with_pca(X,df, saveModel = False): num_compnenets=25 culumn_labels = [] for i in range(num_compnenets): culumn_labels.append("pc "+str(i+1)) print("PCA X Shape:") print(X.shape) filename = 'mnist_pca_capstone.pkl' if saveModel: # 25 components cumulative variance roughly equals 0.98 for 36 selected feauters (>3000 val) # 60 components cumulative variance roughly equals 0.98 for 197 selected feauters (>1500 val) # but 25 seems result is better than 60 or 35 pca = PCA(n_components=num_compnenets) principalComponents = pca.fit_transform(X) pickle.dump(pca, open(filename, 'wb')) print("------------- pca.explained_variance_ratio_ -------------") print(pca.explained_variance_ratio_) print("------------- pca.components_ -------------") print(pca.components_) else: loaded_pca_model = pickle.load(open(filename, 'rb')) principalComponents = loaded_pca_model.transform(X) # print(principalComponents.shape) # column_input = [] # for i in range(25): # column_input.append("pc " + str(i + 1)) # principalDf_test = pd.DataFrame(data=principalComponents_test, \ # columns=column_input) # principalDf_test["num_cat"] = y_test principalDf = pd.DataFrame(data=principalComponents, \ columns=culumn_labels) finalDf = pd.concat([principalDf, df[['num_cat']]], axis = 1) fig = plt.figure(figsize = (8,8)) ax = fig.add_subplot(1,1,1) ax.set_xlabel('pc 1', fontsize = 15) ax.set_ylabel('pc 2', fontsize = 15) ax.set_title('2 component PCA', fontsize = 20) targets = [i for i in range(10)] colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] for target, color in zip(targets, colors): indicesToKeep = finalDf['num_cat'] == target ax.scatter(finalDf.loc[indicesToKeep, 'pc 1'], finalDf.loc[indicesToKeep, 'pc 2'], c = color) ax.legend(targets) ax.grid() plt.show() return finalDf
def __init__(self, database, dialect='mysql', driver='pymysql', username='******', password='******', host='127.0.0.1', port="3306"): connection_string = (dialect + '+' + driver + '://' + username + ':' + password + '@' + host + ':' + port + "/" + database) self.engine = create_engine(connection_string) self.df = pd.DataFrame()
def plot(plots): """Plots the test completeness graph Parameters: plot1: List[tuple(int, int)] A list of plot points that could used to plot test completeness """ # traditional_random, # traditional_bestcase, # natural_naturalness[0][0], # natural_bestcase, # natural_random, # all_bestcase, # all_random, # all_naturalness, plot_names = ["Traditional Mutants -- Best Case (TB)", "Traditional Mutants -- Random (TR)", "Natural Mutants -- Best Case (NB)", "Natural Mutants -- Random (NR)", "Natural Mutants -- Naturalness (NN)", "All Mutants -- Best Case (AB)", "All Mutants -- Random (AR)", "All Mutants -- Naturalness (AN)", ] plots_info = [pd.DataFrame(data=d, columns=[n]) for n, d in zip( plot_names, plots)] maxi = max(len(i) for i in plots_info) increment = int(max(maxi, 1) / 10) if increment == 0: increment = 1 # TODO factor out code here ax = plots_info[0].plot(fontsize=4, xticks=(range(0, maxi, increment)), yticks=range(0, 105, 25), legend=False) for j in range(1, len(plots_info)): plots_info[j].plot(fontsize=4, xticks=(range(0, maxi, increment)), yticks=(range(0, 105, 25)), legend=False, ax=ax) plt.legend() ax.set_xlabel("Work") ax.set_ylabel("Test Completeness") return plt
def print_weel_state(self): # Обнуляем массив self.field = np.zeros((self.high_w, self.width_w)) # Получаем данные по точкам, заносим их в массив for num_point in self.list_point: y_p, x_p, c_p = num_point.get_point_position( ) # получаем координаты точки и ее цвет # print(y_p,x_p,c_p) self.field[y_p, x_p] = c_p # заполням массив колодца # Получаем данные по блокам ракетки, заносим их в массив list_rpos = self.get_list_position( ) # получаем список координаткоординаты блоков ракетки for num_block in list_rpos: y_r, x_r, c_r = num_block # заполням массив колодца self.field[y_r, x_r] = c_r return (pd.DataFrame(self.field).astype(float))
def supervise(): auth_token = '4jIbUl4sRrL8GNN0merp1KESByCrZ5HmSHiwOUEtGWSk5aSZb6sfjM9fVvOQFzIFeAUUTAVi8WHvyFd9g0hBLUOEZumyBXFhH0mUyGVEgrXmCP6UYtCe8ixbJDhuDyOZ' header = {"x-api-key": auth_token, 'Content-Type': "application/json"} project_data = {"workspaceId": 32504} project_url = 'https://app.supervise.ly/public/api/v3/projects.list' response = requests.get(project_url, json=project_data, headers=header).json() x = list() y = list() for entity in response["entities"]: id = entity["id"] name = entity["name"] x.append(id) y.append(name) for c, d in zip(x, y): result = ('{:>15} {:<15}'.format(c, d)) df_3 = pd.DataFrame(x, columns=['Id']) df_3['Name'] = y auth_token = 'ZowU7fVBiMajzKgwlG5ux6aEVMoL2aLHFhvOeru3uZuaWXvV6IJZNhV7ZRS80icaw16K8hUICZtNNGW6wQjuke3kkb6wtjIxf1DEbob7XIL9TLLJ13Wgc3CVOlaZ3sgv' header = {"x-api-key": auth_token, 'Content-Type': "application/json"} projectid = request.form.get('text') print(projectid) workspace_data = {"id": projectid, "extended": True} workspace_url = 'https://app.supervise.ly/public/api/v3/projects.meta' res = requests.get(workspace_url, json=workspace_data, headers=header).json() df = pd.DataFrame.from_dict(res, orient="index") print(df) project_data = {"id": projectid} project_url = 'https://app.supervise.ly/public/api/v3/projects.info' res_2 = requests.get(project_url, json=project_data, headers=header).json() df_2 = pd.DataFrame.from_dict(res_2, orient="index") print(df_2) return render_template( "index_1.html", items=[df_3.to_html(classes='data', header="true")], tables=[df.to_html(classes='data', header="true")], titles=[df_2.to_html(classes='data', header="true")])
def saveDist(speedData, path): S_time, E_time, Speed = [], [], [] stps = {} for i in range(len(speedData["point"])): last_point = speedData["point"][i] S_time.append(nanoseconds(int(last_point.get("startTimeNanos", 0)))) E_time.append(nanoseconds(int(last_point.get("endTimeNanos", 0)))) Speed.append(last_point["value"][0].get("fpVal", None)) stps.update({ last_point["value"][0].get("fpVal", None): [ nanoseconds(int(last_point.get("startTimeNanos", 0))), nanoseconds(int(last_point.get("endTimeNanos", 0))) ] }) # print(S_time) adf = pd.DataFrame({'Start Time': S_time, 'End Time': E_time, path: Speed}) # print(heartdf.head()) adf.to_csv('./data/' + path + ' .csv', columns=['Start Time', 'End Time', path], header=True, index=False) with open('./data/json/' + path + ' .json', 'w') as outfile: json.dump(stps, outfile)
def callback(): auth_token = '4jIbUl4sRrL8GNN0merp1KESByCrZ5HmSHiwOUEtGWSk5aSZb6sfjM9fVvOQFzIFeAUUTAVi8WHvyFd9g0hBLUOEZumyBXFhH0mUyGVEgrXmCP6UYtCe8ixbJDhuDyOZ' header = {"x-api-key": auth_token, 'Content-Type': "application/json"} project_data = {"workspaceId": 32504} project_url = 'https://app.supervise.ly/public/api/v3/projects.list' response = requests.get(project_url, json=project_data, headers=header).json() x = list() y = list() for entity in response["entities"]: id = entity["id"] name = entity["name"] x.append(id) y.append(name) for c, d in zip(x, y): result = ('{:>15} {:<15}'.format(c, d)) df = pd.DataFrame(x, columns=['Id']) df['Name'] = y print(df) return render_template("index_1.html", items=[df.to_html(classes='data', header="true")])
print(deadCount) print(provinceName) print(hyperlink) print(type(python_list)) print(len(countryname)) print(len(confirmedCount)) print(len(suspectedCount)) print(len(curedCount)) print(len(deadCount)) print(len(provinceName)) print(len(hyperlink)) data_list = list(zip(countryname,confirmedCount,suspectedCount,curedCount,deadCount)) #每个省份每个城市具体感染,确珍,死亡疑似的人数 print(data_list) name = ['省份', '确诊人数', '疑似人数','治愈人数','死亡人数'] test = pd.DataFrame(columns=name, data=data_list) test.to_csv('../../data/last_day_corona_virus_of_china.csv') hyperlink_list = list(zip(provinceName,hyperlink)) #每个省份自2020.1.18来死亡疑似的人数 print(hyperlink_list) print(type(hyperlink_list))