예제 #1
0
def main():
    X_train, y_train = load_dataset('mnist_train.csv')
    X_test, y_test = load_dataset('mnist_test.csv')

    # feature selection
    fs = select_features(X_train, y_train)
    selected_features = []
    for i in range(len(fs.scores_)):
        #if fs.scores_[i] > 3000:
        #if fs.scores_[i] > 1500:
        #if fs.scores_[i] > 2500:
        if fs.scores_[i] > 1500:
            selected_features.append(i + 1)

    selected_feature_size = len(selected_features)
    print("selected_features : size (" + str(selected_feature_size) + ")")
    pyplot.bar([i for i in range(len(fs.scores_))], fs.scores_)
    pyplot.show()
    pyplot.close()

    ####
    # load the data-set with selected feature
    X_imp3, y3 = load_dataset_columns('mnist_train.csv', columns = selected_features)
    X_test, y_test = load_dataset_columns('mnist_test.csv', columns=selected_features)

    ds_X = np.array(X_imp3)
    ds_y = np.array(y3)
    plot_eigens(ds_X)
    df = pd.DataFrame(ds_X)
    df["num_cat"] = ds_y
    finalDf = plot_with_pca(ds_X, df, True)
    finalDf["num_cat"] = ds_y

    ds_X_test = np.array(X_test)
    ds_y_test = np.array(y_test)
    plot_eigens(ds_X_test)
    df_test = pd.DataFrame(ds_X_test)
    df_test["num_cat"] = ds_y_test
    finalDf_test = plot_with_pca(ds_X_test, df_test)
    finalDf_test["num_cat"] = ds_y_test

    classifier_with_pca(finalDf, finalDf_test)
예제 #2
0
def plot_with_pca(X,df, saveModel = False):
    num_compnenets=25
    culumn_labels = []
    for i in range(num_compnenets):
        culumn_labels.append("pc "+str(i+1))
    print("PCA X Shape:")
    print(X.shape)
    filename = 'mnist_pca_capstone.pkl'
    if saveModel:
        # 25 components cumulative variance roughly equals 0.98 for 36 selected feauters (>3000 val)
        # 60 components cumulative variance roughly equals 0.98 for 197 selected feauters (>1500 val)
        # but 25 seems result is better than 60 or 35
        pca = PCA(n_components=num_compnenets)
        principalComponents = pca.fit_transform(X)
        pickle.dump(pca, open(filename, 'wb'))
        print("-------------   pca.explained_variance_ratio_   -------------")
        print(pca.explained_variance_ratio_)
        print("-------------   pca.components_   -------------")
        print(pca.components_)
    else:
        loaded_pca_model = pickle.load(open(filename, 'rb'))
        principalComponents = loaded_pca_model.transform(X)
        # print(principalComponents.shape)

        # column_input = []
        # for i in range(25):
        #     column_input.append("pc " + str(i + 1))
        # principalDf_test = pd.DataFrame(data=principalComponents_test, \
        #                                 columns=column_input)
        # principalDf_test["num_cat"] = y_test

    principalDf = pd.DataFrame(data=principalComponents, \
                               columns=culumn_labels)

    finalDf = pd.concat([principalDf, df[['num_cat']]], axis = 1)

    fig = plt.figure(figsize = (8,8))
    ax = fig.add_subplot(1,1,1)
    ax.set_xlabel('pc 1', fontsize = 15)
    ax.set_ylabel('pc 2', fontsize = 15)
    ax.set_title('2 component PCA', fontsize = 20)
    targets = [i for i in range(10)]
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
    for target, color in zip(targets, colors):
        indicesToKeep = finalDf['num_cat'] == target
        ax.scatter(finalDf.loc[indicesToKeep, 'pc 1'],
                   finalDf.loc[indicesToKeep, 'pc 2'],
                   c = color)
    ax.legend(targets)
    ax.grid()
    plt.show()

    return finalDf
예제 #3
0
 def __init__(self,
              database,
              dialect='mysql',
              driver='pymysql',
              username='******',
              password='******',
              host='127.0.0.1',
              port="3306"):
     connection_string = (dialect + '+' + driver + '://' + username + ':' +
                          password + '@' + host + ':' + port + "/" +
                          database)
     self.engine = create_engine(connection_string)
     self.df = pd.DataFrame()
예제 #4
0
def plot(plots):
    """Plots the test completeness graph


    Parameters:
         plot1: List[tuple(int, int)]
            A list of plot points that could used to plot test completeness
            """
    # traditional_random,
    # traditional_bestcase,
    # natural_naturalness[0][0],
    # natural_bestcase,
    # natural_random,
    # all_bestcase,
    # all_random,
    # all_naturalness,

    plot_names = ["Traditional Mutants -- Best Case (TB)",
                  "Traditional Mutants -- Random (TR)",
                  "Natural Mutants -- Best Case (NB)",
                  "Natural Mutants -- Random (NR)",
                  "Natural Mutants -- Naturalness  (NN)",
                  "All Mutants -- Best Case  (AB)",
                  "All Mutants -- Random (AR)",
                  "All Mutants -- Naturalness (AN)",
                  ]

    plots_info = [pd.DataFrame(data=d, columns=[n]) for n, d in zip(
        plot_names, plots)]
    maxi = max(len(i) for i in plots_info)
    increment = int(max(maxi, 1) / 10)
    if increment == 0:
        increment = 1

    # TODO factor out code here
    ax = plots_info[0].plot(fontsize=4,
                            xticks=(range(0, maxi, increment)),
                            yticks=range(0, 105, 25),
                            legend=False)

    for j in range(1, len(plots_info)):
        plots_info[j].plot(fontsize=4,
                           xticks=(range(0, maxi, increment)),
                           yticks=(range(0, 105, 25)),
                           legend=False, ax=ax)

    plt.legend()
    ax.set_xlabel("Work")
    ax.set_ylabel("Test Completeness")

    return plt
예제 #5
0
    def print_weel_state(self):
        # Обнуляем массив
        self.field = np.zeros((self.high_w, self.width_w))

        # Получаем данные по точкам, заносим их в массив
        for num_point in self.list_point:
            y_p, x_p, c_p = num_point.get_point_position(
            )  # получаем координаты точки и ее цвет
            # print(y_p,x_p,c_p)
            self.field[y_p, x_p] = c_p  # заполням массив колодца

        # Получаем данные по блокам ракетки, заносим их в массив
        list_rpos = self.get_list_position(
        )  # получаем список координаткоординаты блоков ракетки
        for num_block in list_rpos:
            y_r, x_r, c_r = num_block  # заполням массив колодца
            self.field[y_r, x_r] = c_r
        return (pd.DataFrame(self.field).astype(float))
예제 #6
0
def supervise():
    auth_token = '4jIbUl4sRrL8GNN0merp1KESByCrZ5HmSHiwOUEtGWSk5aSZb6sfjM9fVvOQFzIFeAUUTAVi8WHvyFd9g0hBLUOEZumyBXFhH0mUyGVEgrXmCP6UYtCe8ixbJDhuDyOZ'
    header = {"x-api-key": auth_token, 'Content-Type': "application/json"}

    project_data = {"workspaceId": 32504}
    project_url = 'https://app.supervise.ly/public/api/v3/projects.list'
    response = requests.get(project_url, json=project_data,
                            headers=header).json()
    x = list()
    y = list()
    for entity in response["entities"]:
        id = entity["id"]
        name = entity["name"]
        x.append(id)
        y.append(name)
    for c, d in zip(x, y):
        result = ('{:>15}     {:<15}'.format(c, d))

    df_3 = pd.DataFrame(x, columns=['Id'])
    df_3['Name'] = y

    auth_token = 'ZowU7fVBiMajzKgwlG5ux6aEVMoL2aLHFhvOeru3uZuaWXvV6IJZNhV7ZRS80icaw16K8hUICZtNNGW6wQjuke3kkb6wtjIxf1DEbob7XIL9TLLJ13Wgc3CVOlaZ3sgv'
    header = {"x-api-key": auth_token, 'Content-Type': "application/json"}
    projectid = request.form.get('text')
    print(projectid)

    workspace_data = {"id": projectid, "extended": True}
    workspace_url = 'https://app.supervise.ly/public/api/v3/projects.meta'
    res = requests.get(workspace_url, json=workspace_data,
                       headers=header).json()
    df = pd.DataFrame.from_dict(res, orient="index")
    print(df)

    project_data = {"id": projectid}
    project_url = 'https://app.supervise.ly/public/api/v3/projects.info'
    res_2 = requests.get(project_url, json=project_data, headers=header).json()
    df_2 = pd.DataFrame.from_dict(res_2, orient="index")
    print(df_2)

    return render_template(
        "index_1.html",
        items=[df_3.to_html(classes='data', header="true")],
        tables=[df.to_html(classes='data', header="true")],
        titles=[df_2.to_html(classes='data', header="true")])
예제 #7
0
def saveDist(speedData, path):
    S_time, E_time, Speed = [], [], []
    stps = {}
    for i in range(len(speedData["point"])):
        last_point = speedData["point"][i]
        S_time.append(nanoseconds(int(last_point.get("startTimeNanos", 0))))
        E_time.append(nanoseconds(int(last_point.get("endTimeNanos", 0))))
        Speed.append(last_point["value"][0].get("fpVal", None))
        stps.update({
            last_point["value"][0].get("fpVal", None): [
                nanoseconds(int(last_point.get("startTimeNanos", 0))),
                nanoseconds(int(last_point.get("endTimeNanos", 0)))
            ]
        })
    # print(S_time)
    adf = pd.DataFrame({'Start Time': S_time, 'End Time': E_time, path: Speed})
    # print(heartdf.head())
    adf.to_csv('./data/' + path + ' .csv',
               columns=['Start Time', 'End Time', path],
               header=True,
               index=False)
    with open('./data/json/' + path + ' .json', 'w') as outfile:
        json.dump(stps, outfile)
예제 #8
0
def callback():
    auth_token = '4jIbUl4sRrL8GNN0merp1KESByCrZ5HmSHiwOUEtGWSk5aSZb6sfjM9fVvOQFzIFeAUUTAVi8WHvyFd9g0hBLUOEZumyBXFhH0mUyGVEgrXmCP6UYtCe8ixbJDhuDyOZ'
    header = {"x-api-key": auth_token, 'Content-Type': "application/json"}

    project_data = {"workspaceId": 32504}
    project_url = 'https://app.supervise.ly/public/api/v3/projects.list'
    response = requests.get(project_url, json=project_data,
                            headers=header).json()
    x = list()
    y = list()
    for entity in response["entities"]:
        id = entity["id"]
        name = entity["name"]
        x.append(id)
        y.append(name)
    for c, d in zip(x, y):
        result = ('{:>15}     {:<15}'.format(c, d))

    df = pd.DataFrame(x, columns=['Id'])
    df['Name'] = y
    print(df)

    return render_template("index_1.html",
                           items=[df.to_html(classes='data', header="true")])
예제 #9
0
print(deadCount)
print(provinceName)
print(hyperlink)



print(type(python_list))



print(len(countryname))
print(len(confirmedCount))
print(len(suspectedCount))
print(len(curedCount))
print(len(deadCount))
print(len(provinceName))
print(len(hyperlink))



data_list = list(zip(countryname,confirmedCount,suspectedCount,curedCount,deadCount))  #每个省份每个城市具体感染,确珍,死亡疑似的人数
print(data_list)
name = ['省份', '确诊人数', '疑似人数','治愈人数','死亡人数']
test = pd.DataFrame(columns=name, data=data_list)
test.to_csv('../../data/last_day_corona_virus_of_china.csv')


hyperlink_list = list(zip(provinceName,hyperlink))  #每个省份自2020.1.18来死亡疑似的人数
print(hyperlink_list)
print(type(hyperlink_list))