Exemplo n.º 1
0
 def week_time_series(fname, ttype, utype):
     """
     以一周 7天去統計每一小時區間的使用次數
     開始天數往後推 7天, 第一天 2015-01-01 是禮拜四
     以 day_time_series 計算完的 csv 檔去產生新的 csv
     """
     header = ['Thu', 'Fri', 'Sat', 'Sun', 'Mon', 'Tue', 'Wed']
     idx = []
     pd_borrow_data, pd_return_data = (pd.DataFrame() for _ in range(2))
     for file, days in zip(fname, header):
         borrow_data, return_data = LoadData.load_station_count(
             file, ttype, utype)
         idx += [(days, ) + make_tuple(x) for x in borrow_data.index]
         pd_borrow_data = pd.concat((pd_borrow_data, borrow_data),
                                    axis=0,
                                    ignore_index=True)
         pd_return_data = pd.concat((pd_return_data, return_data),
                                    axis=0,
                                    ignore_index=True)
     pd_borrow_data.index = idx
     pd_return_data.index = idx
     # print('pd_borrow_data\n', pd_borrow_data.head())
     # print('pd_return_data\n', pd_return_data.head())
     name = fname[0][6:16] + '~' + fname[-1][6:16]
     Export.week_station_count(pd_borrow_data, name, utype[0])
     Export.week_station_count(pd_return_data, name, utype[1])
Exemplo n.º 2
0
def generate_graph(fname, ttype, utype):
    """
    Make Daily Graph
    :param fname:
    :param ttype:
    :param utype:
    :return:
    """
    # Basic Setting
    sns.set(style='darkgrid')
    myfont = fm.FontProperties(fname='C:/Windows/Fonts/msyh.ttc')
    path = "../Data/Graph/Statistic_Graph/Day/" + fname[6:-2]
    if not os.path.isdir(path):
        os.mkdir(path)
    color_array = ['Blue', 'Red', 'Green']
    label_array = ['Borrow', 'Return', 'Total']

    # Load data and plot
    borrow_data, return_data = LoadData.load_station_count(fname, ttype, utype)

    # print('borrow_data\n', borrow_data.head())
    # print('return_data\n', return_data.head())
    # x=input()

    header = list(borrow_data.columns)
    x_data = np.array(borrow_data.index)
    print('x_data', x_data)

    x_data = np.array([i for i in range(24)])
    for col in header:
        fig, ax = plt.subplots(figsize=(8, 6), dpi=100)
        y1_data = borrow_data[col].values.T
        y2_data = return_data[col].values.T
        y3_data = y1_data + y2_data
        ax.plot(x_data, y1_data, color=color_array[0], label=label_array[0])
        ax.plot(x_data, y2_data, color=color_array[1], label=label_array[1])
        ax.plot(x_data, y3_data, color=color_array[2], label=label_array[2])

        plt.title(col, fontproperties=myfont)
        plt.xlabel('Time')
        plt.ylabel('Count')
        plt.xticks(np.arange(24), borrow_data.index, rotation=45)
        if np.sum(y3_data) == 0:
            plt.ylim(ymin=0)
        else:
            plt.ylim(ymin=0, ymax=np.max(y3_data))

        # Put a nicer background color on the legend.
        legend = ax.legend(loc='upper right')
        legend.get_frame().set_facecolor('C0')
        try:
            photo_path = path + '/' + col + '.png'
            plt.savefig(photo_path)
        except FileNotFoundError:
            col = col.replace('/', '-')
            photo_path = path + '/' + col + '.png'
            plt.savefig(photo_path)
        plt.ion()
        plt.pause(1)
        plt.close()
Exemplo n.º 3
0
def train_fnn(nn):
    accuracy = 0.0
    matrix = np.array([])
    fnn_copy = FNN()
    all_nn_accuracy = np.array([])

    org_data, org_label = LoadData.get_method2_fnn_train(nn)
    org_label = np.array([1 if label == nn else 0 for label in org_label])
    X_train, X_test, y_train, y_test = train_test_split(org_data,
                                                        org_label,
                                                        test_size=0.3)
    # print(X_train, X_train.shape)
    # print(y_train, y_train.shape)

    print('<---Train the FNN ' + nn + ' Start--->')
    for i in range(fnn_random_size):
        # Random Generate the mean, standard deviation
        mean = np.array(
            [np.random.uniform(-1, 1) for _ in range(fnn_membership_size)])
        stddev = np.array(
            [np.random.uniform(0, 1) for _ in range(fnn_membership_size)])
        weight = np.array(
            [np.random.uniform(-1, 1) for _ in range(fnn_rule_size)])

        fnn = FNN(fnn_input_size, fnn_membership_size, fnn_rule_size,
                  fnn_output_size, mean, stddev, weight, fnn_lr, 1)
        fnn.training_model(fnn_epoch, X_train, y_train)

        test_output = fnn.testing_model(X_test)
        label_pred = [
            1 if values >= fnn_threshold else 0 for values in test_output
        ]
        C_matrix = confusion_matrix(y_test, label_pred)
        C_accuracy = np.sum(C_matrix.diagonal()) / np.sum(C_matrix)
        all_nn_accuracy = np.append(all_nn_accuracy, C_accuracy)
        # print(C_matrix)
        # print(C_accuracy)
        if C_accuracy > accuracy:
            fnn_copy = copy.deepcopy(fnn)
            accuracy = copy.deepcopy(C_accuracy)
            matrix = copy.deepcopy(C_matrix)
            print('swap')
    print('<---Train the FNN ' + nn + ' Successfully--->')
    print('<----------------------------------------------->')

    # rel_path = 'Experiment/Graph/method2/Best_FNN_' + nn + '_error_trend.png'
    # abs_path = os.path.join(os.path.dirname(__file__), rel_path)
    # ErrorPlot.error_trend(
    #     'Best_FNN_' + str(nn) + '_error_trend', len(fnn_copy.error_list), fnn_copy.error_list, abs_path)
    #
    # rel_path = 'Experiment/Graph/method2/Accuracy vs FNN' + str(nn) + '.png'
    # abs_path = os.path.join(os.path.dirname(__file__), rel_path)
    # AccuracyPlot.build_accuracy_plot(
    #     'Accuracy vs FNN'+str(nn), np.array([i for i in range(1, len(all_nn_accuracy) + 1, 1)]),
    #     all_nn_accuracy, abs_path)

    return fnn_copy, accuracy, matrix
Exemplo n.º 4
0
 def __init__(self, file):
     """
     self.station_type, 場地名稱
     self.file_name, 資料夾存在的csv檔案名稱
     """
     desired_width = 320
     pd.set_option('display.width', desired_width)
     pd.set_option('display.max_columns', 20)
     self.station = LoadData.load_refactor_ubike_station()
     self.station_type = set(self.station['sna'])
     self.file_name = file
Exemplo n.º 5
0
def test_model(fnn_model):
    org_data, org_label = LoadData.get_method2_test()
    X_train, X_test, y_train, y_test = train_test_split(org_data,
                                                        org_label,
                                                        test_size=0.3)

    # Convert y_test(28 category to 6 category)
    y_test = np.array([int(e[1:2]) for e in y_test])

    print('<---Test Model Start--->')
    output_list = np.array([])
    for model in fnn_model:
        fnn = FNN(fnn_input_size, fnn_membership_size, fnn_rule_size,
                  fnn_output_size, model.mean, model.stddev, model.weight,
                  fnn_lr, 1)
        output = fnn.testing_model(X_test)
        output_list = np.append(output_list, output)

    # y_label = label_convert(y_test, build_hash_table())
    output_list = output_list.reshape(-1, len(fnn_model))

    # 不再做正規畫了試試
    output_list = Normalize.normalization(output_list)

    label_pred, count = label_encoding(output_list, build_hash_table())
    # cnt = 0
    # for x, y in zip(output_list[0:10], y_test[0:10]):
    #     print(x, ' ', y, ' ', label_pred[cnt])
    #     cnt += 1

    for x, y in zip(y_test, label_pred):
        print('correct', x, '<->', 'predict', y)

    cnf_matrix = confusion_matrix(y_test, label_pred)
    # 做confusion matrix 的圖
    # plt.figure()
    # ConfusionMatrix.plot_confusion_matrix(cnf_matrix, classes=list(set(y_test)),
    #                       title='Confusion matrix(Final FNN Model)')

    cnf_accuracy = np.sum(cnf_matrix.diagonal()) / np.sum(cnf_matrix)

    print('FinalModel_Accuracy: ', accuracy_score(y_test, label_pred))

    print('This is the confusion matrix(test_all_model)\n', cnf_matrix)
    # print(C_matrix)
    # print(C_accuracy)

    print('<---Test Model Successfully--->')
    print('<----------------------------------------------->')
    return cnf_accuracy, count
Exemplo n.º 6
0
    def day_time_series(self, use):
        """
        以24小時去統計每一小時區間的使用次數
        不同站點,不同的統計圖
        計算完先儲存成 csv file

        例外發生描述:
        南港車站有不同的路口
        南港車站(興華路) 南港車站(忠孝東路)
        其他檔案也有類似情形
        """
        header = [(i, i + 1) for i in range(24)]
        for element in self.file_name:
            data = LoadData.load_refactor_ubike_timestamp(element)
            station_count = {}
            for station in self.station_type:
                station_count[station] = [0 for _ in range(24)]

            borrow_time, borrow_station = [], []
            if use == 'BorrowStation':
                borrow_time = [
                    datetime.datetime.strptime(x, '%Y/%m/%d %H:%M:%S')
                    for x in data['BorrowTime'].values
                ]
                borrow_station = data['BorrowStation'].values
            elif use == 'ReturnStation':
                borrow_time = [
                    datetime.datetime.strptime(x, '%Y/%m/%d %H:%M:%S')
                    for x in data['ReturnTime'].values
                ]
                borrow_station = data['ReturnStation'].values
            else:
                print('<---No this columns--->')

            for b_time, b_station in zip(borrow_time, borrow_station):
                try:
                    station_count[b_station][b_time.hour] += 1
                except KeyError as e:
                    for field in self.station_type:
                        if field.find(b_station) != -1:
                            station_count[field][b_time.hour] += 1

            day = borrow_time[0].date()
            pd_data = pd.DataFrame(station_count,
                                   index=header,
                                   columns=station_count.keys())
            Export.day_station_count(pd_data, day, use)
Exemplo n.º 7
0
    def month_time_series(fname, ttype, utype):
        """
        以一月的天數去統計每天的使用次數
        第一個月是 2015-01 ~ 2017-05
        以 day_time_series 計算完的 csv 檔去產生新的 csv
        """
        target_timestamp = np.array([])
        start_date = datetime.datetime(2015, 1, 1)
        end_date = datetime.datetime(2017, 5, 31)
        for dt in rrule(MONTHLY, dtstart=start_date, until=end_date):
            target_timestamp = np.append(target_timestamp,
                                         dt.strftime("%Y-%m"))
        # print('target_timestamp\n', target_timestamp)

        for timestamp in target_timestamp:
            element = [x for x in fname if x.find(timestamp) != -1]
            np_borrow, np_return = (np.array([]) for _ in range(2))
            idx = np.array([])
            columns = np.array([])

            for field in element:
                idx = np.append(idx, field[6:16])
                borrow_data, return_data = LoadData.load_station_count(
                    field, ttype, utype)
                if columns.size == 0:
                    columns = borrow_data.columns

                # np_data1-> sum of the borrow_data
                # np_data2-> sum of the return_data
                np_data1 = np.sum(borrow_data.values, axis=0).reshape(1, -1)
                np_data2 = np.sum(return_data.values, axis=0).reshape(1, -1)
                if np_borrow.size == 0:
                    np_borrow = np_data1
                else:
                    np_borrow = np.concatenate((np_borrow, np_data1), axis=0)

                if np_return.size == 0:
                    np_return = np_data2
                else:
                    np_return = np.concatenate((np_return, np_data2), axis=0)

            # print('np_borrow\n', np_borrow, np_borrow.shape)
            # print('np_return\n', np_return, np_return.shape)
            pd_borrow = pd.DataFrame(np_borrow, columns=columns, index=idx)
            pd_return = pd.DataFrame(np_return, columns=columns, index=idx)
            Export.month_station_count(pd_borrow, timestamp, utype[0])
            Export.month_station_count(pd_return, timestamp, utype[1])
Exemplo n.º 8
0
def graph_by_hour(fname, ttype, utype):
    """
    Month Graph(Build One Month -> 24 hours on each day)
    Ubike-DataMining\\Data\\NewUbike\\UbikeStatistic\\Day\\
    """
    # Basic Setting
    header = [x[14:16] for x in fname]
    sns.set(style='darkgrid')
    myfont = fm.FontProperties(fname='C:/Windows/Fonts/msyh.ttc')
    color_array = ['Blue', 'Red', 'Green']
    label_array = ['Borrow', 'Return', 'Total']
    line_size = 0.7

    # Open or Create a folder
    name = fname[0][6:13]
    path = "../Data/Graph/Statistic_Graph/Month(hour)/" + name
    if not os.path.isdir(path):
        os.mkdir(path)

    # Load data and plot
    # x_data = np.array([i for i in range(24 * len(header))])
    pd_borrow_data, pd_return_data = (pd.DataFrame() for _ in range(2))
    for file in fname:
        borrow_data, return_data = LoadData.load_station_count(
            file, ttype, utype)
        pd_borrow_data = pd.concat((pd_borrow_data, borrow_data), axis=0)
        pd_return_data = pd.concat((pd_return_data, return_data), axis=0)

    for col in pd_borrow_data.columns:
        y1_data = pd_borrow_data[col].values
        y2_data = pd_return_data[col].values
        y3_data = y1_data + y2_data

        f, axarr = plt.subplots(3, 1, sharex='all', figsize=(16, 8), dpi=100)
        axarr[0].plot(y1_data,
                      color=color_array[0],
                      label=label_array[0],
                      linewidth=line_size)
        axarr[1].plot(y2_data,
                      color=color_array[1],
                      label=label_array[1],
                      linewidth=line_size)
        axarr[2].plot(y3_data,
                      color=color_array[2],
                      label=label_array[2],
                      linewidth=line_size)

        # Graph Setting
        # plt.tight_layout()
        axarr[0].set_title(col + '(' + name + ')', fontproperties=myfont)
        plt.xticks(np.arange(0, 24 * len(header), 24),
                   header,
                   rotation=0,
                   fontsize=12)
        for sub_plot in axarr:
            sub_plot.legend(loc='upper right')
            sub_plot.set_ylim(
                ymin=0) if np.sum(y3_data) == 0 else sub_plot.set_ylim(
                    ymin=0, ymax=np.max(y3_data))
            sub_plot.set_ylabel('次數', fontproperties=myfont)
        try:
            photo_path = path + '/' + col + '.png'
            plt.savefig(photo_path)
        except FileNotFoundError:
            col = col.replace('/', '-')
            photo_path = path + '/' + col + '.png'
            plt.savefig(photo_path)
        # plt.ion()
        # plt.pause(1)
        plt.close()
Exemplo n.º 9
0
import pandas as pd

from Method.LoadData import LoadData


def ubkike_station_clean(data):
    taipei_area = [
        '中正區', '大同區', '中山區', '松山區', '大安區', '萬華區', '信義區', '士林區', '北投區', '內湖區',
        '南港區', '文山區'
    ]

    new_taipei_area = [
        '板橋區', '中和區', '新莊區', '土城區', '汐止區', '鶯歌區', '淡水區', '五股區', '林口區', '深坑區',
        '坪林區', '石門區', '萬里區', '雙溪區', '烏來區', '三重區', '永和區', '新店區', '蘆洲區', '樹林區',
        '三峽區', '瑞芳區', '泰山區', '八里區', '石碇區', '三芝區', '金山區', '平溪區', '貢寮區'
    ]
    collection = set(taipei_area + new_taipei_area)
    new_data = data[data['sarea'].isin(collection)]
    return new_data


ubike_station = LoadData.load_ubike_station()
# print('ubike_station\n', ubike_station.head())

new_ubike_station = ubkike_station_clean(ubike_station)
path = '../Data/NewUbike/youbikeStation.csv'
new_ubike_station.to_csv(path, index=False, encoding='utf_8_sig')

data = pd.read_csv(path)
print(data.head())
Exemplo n.º 10
0
            col = col.replace('/', '-')
            photo_path = path + '/' + col + '.png'
            plt.savefig(photo_path)
        # plt.ion()
        # plt.pause(1)
        plt.close()
        # plt.show()


print("<---0. Month (x-axis is one day)--->")
print("<---1. Month:Subplot (x-axis is one hour)--->")
instruction = input("<---Please choose and run program--->: ")

if int(instruction) == 0:
    # Generate the time series graph
    borrow_file_name = LoadData.load_month_borrow_fname()
    return_file_name = LoadData.load_month_return_fname()
    graph_by_day(borrow_file_name, return_file_name)

elif int(instruction) == 1:
    bandwidths = ['Day']
    used = ['BorrowStation', 'ReturnStation']
    file_name = LoadData.load_station_count_fname(bandwidths[0], used[0])
    # print(file_name)

    # 先產生該月的日期
    target_timestamp = np.array([])
    start_date = datetime.datetime(2015, 1, 1)
    end_date = datetime.datetime(2017, 5, 31)
    for dt in rrule(MONTHLY, dtstart=start_date, until=end_date):
        target_timestamp = np.append(target_timestamp, dt.strftime("%Y-%m"))
Exemplo n.º 11
0
def graph_by_day(borrow_file, return_file):
    """
    Month Graph(one Month)
    如果要畫一個月統計圖的話
    直接讀檔進行畫圖
    Ubike-DataMining\\Data\\NewUbike\\UbikeStatistic\\Month\\
    """

    sns.set(style='darkgrid')
    myfont = fm.FontProperties(fname='C:/Windows/Fonts/msyh.ttc')
    color_array = ['Blue', 'Red', 'Green']
    label_array = ['Borrow', 'Return', 'Total']

    for x, y in zip(borrow_file, return_file):

        # Open or Create a folder
        name = x[6:13]
        path = "../Data/Graph/Statistic_Graph/Month/" + name
        if not os.path.isdir(path):
            os.mkdir(path)

        borrow_data = LoadData.load_month_borrow_data(x)
        return_data = LoadData.load_month_return_data(y)
        # print('borrow_data\n', borrow_data.head())
        # print('return_data\n', return_data.head())
        columns = borrow_data.columns
        idx = borrow_data.index
        x_data = np.array([i for i in range(len(idx))])

        for col in columns:
            fig, ax = plt.subplots(figsize=(16, 8), dpi=100)
            y1_data = borrow_data[col].values
            y2_data = return_data[col].values
            y3_data = y1_data + y2_data
            ax.plot(x_data,
                    y1_data,
                    color=color_array[0],
                    label=label_array[0])
            ax.plot(x_data,
                    y2_data,
                    color=color_array[1],
                    label=label_array[1])
            ax.plot(x_data,
                    y3_data,
                    color=color_array[2],
                    label=label_array[2])

            plt.title(col + '(' + name + ')', fontproperties=myfont)
            plt.xlabel('時間軸', fontproperties=myfont)
            plt.ylabel('租借次數', fontproperties=myfont)
            plt.xticks(np.arange(len(idx)), idx, rotation=45, fontsize=12)
            if np.sum(y3_data) == 0:
                plt.ylim(ymin=0)
            else:
                plt.ylim(ymin=0, ymax=np.max(y3_data))

            # Put a nicer background color on the legend.
            legend = ax.legend(loc='upper right')
            legend.get_frame().set_facecolor('C0')
            try:
                photo_path = path + '/' + col + '.png'
                plt.savefig(photo_path)
            except FileNotFoundError:
                col = col.replace('/', '-')
                photo_path = path + '/' + col + '.png'
                plt.savefig(photo_path)
            # plt.ion()
            # plt.pause(1)
            plt.close()
def graph_by_day(borrow_file, return_file, condition):
    """
    Year Graph(x-axis is one day)
    如果要畫一年統計圖的話
    先取得一年各月的資訊再讀檔進行畫圖
    Ubike-DataMining\\Data\\NewUbike\\UbikeStatistic\\Month\\
    condition 代表是否要做組圖
    """

    sns.set(style='darkgrid')
    myfont = fm.FontProperties(fname='C:/Windows/Fonts/msyh.ttc')
    color_array = ['Blue', 'Red', 'Green']
    label_array = ['Borrow', 'Return', 'Total']

    timestamp = build_timestamp()
    # print('target_timestamp', timestamp)
    for count in range(int(len(timestamp) / 12)):
        title_name = ""
        total_borrow_data = pd.DataFrame()
        total_return_data = pd.DataFrame()
        for x, y in zip(borrow_file[12 * count:12 * (count + 1)],
                        return_file[12 * count:12 * (count + 1)]):
            # Open or Create a folder
            title_name = x[6:10]
            if condition == 1:
                path = "../Data/Graph/Statistic_Graph/Year/" + title_name
            else:
                path = "../Data/Graph/Statistic_Graph/Year(Subplots)/" + title_name

            if not os.path.isdir(path):
                os.mkdir(path)

            borrow_data = LoadData.load_month_borrow_data(x)
            return_data = LoadData.load_month_return_data(y)
            # print('borrow_data\n', borrow_data.head())
            # print('return_data\n', return_data.head())

            total_borrow_data = pd.concat((total_borrow_data, borrow_data),
                                          axis=0)
            total_return_data = pd.concat((total_return_data, return_data),
                                          axis=0)
        # print('borrow_data\n', total_borrow_data.shape)
        # print('return_data\n', total_return_data.shape)

        columns = total_borrow_data.columns
        # idx = total_borrow_data.index
        x_data = np.array([i for i in range(len(total_borrow_data.index))])
        # xticks = timestamp[12*count: 12*(count+1)]

        for col in columns:
            if condition == 1:
                fig, ax = plt.subplots(figsize=(16, 8), dpi=100)
                y1_data = total_borrow_data[col].values
                y2_data = total_return_data[col].values
                # y3_data = y1_data + y2_data
                ax.plot(x_data,
                        y1_data,
                        color=color_array[0],
                        label=label_array[0])
                ax.plot(x_data,
                        y2_data,
                        color=color_array[1],
                        label=label_array[1])
                # ax.plot(x_data, y3_data, color=color_array[2], label=label_array[2])

                # year = mdates.YearLocator()
                # month = mdates.MonthLocator()
                # day = mdates.DayLocator()
                # date_format = mdates.DateFormatter("%Y-%m")
                # ax.xaxis.set_major_locator(year)
                # ax.xaxis.set_major_locator(month)
                # ax.xaxis.set_major_locator(day)
                # ax.xaxis.set_major_formatter(date_format)
                # fig.autofmt_xdate()

                plt.title(col + '(' + title_name + ')', fontproperties=myfont)
                plt.xlabel('時間軸', fontproperties=myfont)
                plt.ylabel('租借次數', fontproperties=myfont)
                # print('xticks', xticks)
                # plt.xticks(np.arange(len(xticks)), xticks, rotation=45, fontsize=12)

                # plt.margins(x=0, y=0)

                # if np.sum(y3_data) == 0:
                #     plt.ylim(ymin=0)
                # else:
                #     plt.ylim(ymin=0, ymax=np.max(y3_data))

                # Put a nicer background color on the legend.
                legend = ax.legend(loc='upper right')
                legend.get_frame().set_facecolor('C0')

                path = "../Data/Graph/Statistic_Graph/Year/" + title_name
                try:
                    photo_path = path + '/' + col + '.png'
                    plt.savefig(photo_path)
                except FileNotFoundError:
                    col = col.replace('/', '-')
                    photo_path = path + '/' + col + '.png'
                    plt.savefig(photo_path)
                # plt.ion()
                # plt.pause(1)
                plt.close()
                # plt.show()
            # 組合圖
            else:
                fig, ax = plt.subplots(2,
                                       sharex='all',
                                       figsize=(16, 8),
                                       dpi=100)
                y1_data = total_borrow_data[col].values
                y2_data = total_return_data[col].values
                ax[0].plot(x_data,
                           y1_data,
                           color=color_array[0],
                           label=label_array[0])
                ax[1].plot(x_data,
                           y2_data,
                           color=color_array[1],
                           label=label_array[1])

                ax[0].set_title(col + '(' + title_name + ')',
                                fontproperties=myfont)
                ax[1].set_xlabel('時間軸', fontproperties=myfont)
                ax[1].set_ylabel('租借次數', fontproperties=myfont)

                ax[0].legend(loc='upper right')
                ax[1].legend(loc='upper right')

                path = "../Data/Graph/Statistic_Graph/Year(Subplots)/" + title_name
                try:
                    photo_path = path + '/' + col + '.png'
                    plt.savefig(photo_path)
                except FileNotFoundError:
                    col = col.replace('/', '-')
                    photo_path = path + '/' + col + '.png'
                    plt.savefig(photo_path)
                # plt.ion()
                # plt.pause(1)
                plt.close()
Exemplo n.º 13
0
all_label = ['C1', 'C2', 'C3', 'C4', 'C5', 'C6']
# cluster_num = {'C1': 6, 'C2': 5, 'C3': 5, 'C4': 5, 'C5': 5, 'C6': 4}
cluster_num = {'C1': 2, 'C2': 2, 'C3': 2, 'C4': 2, 'C5': 2, 'C6': 2}
nn_category = np.array([])
for element in all_label:
    if cluster_num[element] == 0:
        nn_category = np.append(nn_category, element)
    else:
        for num in range(cluster_num[element]):
            nn_category = np.append(nn_category, element + '_' + str(num))
print('nn_category', nn_category)

# Run the experiment from one dimension to five dimension
for nn in nn_category:
    # Read file LNN_Train_data.xlsx'
    org_data, org_label = LoadData.get_method2_fnn_train(nn)
    # print('org_data', org_data)
    # print('org_label', org_label)

    data1, data2 = (np.array([]) for _ in range(2))
    for element, stamp in zip(org_data, org_label):
        print(element, stamp)
        if stamp == nn:
            data1 = np.append(data1, element)
        else:
            data2 = np.append(data2, element)

    # Make graph
    fig = plt.figure(figsize=(8, 6), dpi=100)
    ax = Axes3D(fig)
Exemplo n.º 14
0
    tmp = np.array([])
    for i in range(0, len(data), 1):
        if data[i] > up_bound or data[i] < low_bound:
            print('smooth')
            if 0 < i < len(data) - 1:
                tmp = np.append(
                    tmp,
                    ((data[i - 2] + data[i - 1] + data[i + 1] + data[i + 2]) /
                     4))
        else:
            tmp = np.append(tmp, data[i])
    return tmp, up_bound, low_bound


feature = 0
org_data, org_label = LoadData.get_split_data()
# org_data = Normalize.normalization(org_data.T[0])

print(org_data.shape)
print(org_label.shape)

# title = 'Acc'
plt.figure(figsize=(8, 6), dpi=120)
# plt.title(title)
# plt.xlabel('Time')
# plt.ylabel('Values')

# plt.xlim(np.min(x_data) - 1, np.max(x_data) + 1)
plt.ylim(200, 1500)

# 0 -> Acc_X
Exemplo n.º 15
0
        plt.ylabel('Count')
        plt.xticks(np.arange(24), borrow_data.index, rotation=45)
        if np.sum(y3_data) == 0:
            plt.ylim(ymin=0)
        else:
            plt.ylim(ymin=0, ymax=np.max(y3_data))

        # Put a nicer background color on the legend.
        legend = ax.legend(loc='upper right')
        legend.get_frame().set_facecolor('C0')
        try:
            photo_path = path + '/' + col + '.png'
            plt.savefig(photo_path)
        except FileNotFoundError:
            col = col.replace('/', '-')
            photo_path = path + '/' + col + '.png'
            plt.savefig(photo_path)
        plt.ion()
        plt.pause(1)
        plt.close()
        # plt.show()


# Generate the time series graph
bandwidths = ['Day']
used = ['BorrowStation', 'ReturnStation']
file_name = LoadData.load_station_count_fname(bandwidths[0], used[0])
print(file_name)
for i in range(0, 10, 1):
    generate_graph(file_name[i], bandwidths[0], used)
Exemplo n.º 16
0
fnn_label_size = 6
fnn_input_size = 3
fnn_membership_size = fnn_input_size * fnn_label_size
fnn_rule_size = 6
fnn_output_size = 1
fnn_lr = 0.001
fnn_epoch = 1
fnn_random_size = 1

fnn_threshold = [
    0.2, 0.0, 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.0, 0.2, 0.1, 0.1, 0.0,
    0.2, 0.0, 0.1, 0.1, 0.2, 0.1, 0.0, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1
]

# Load the Test data
org_data, org_label = LoadData.get_method2_test()
# print('org_data.shape', org_data.shape)
# print('org_label.shape', org_label)

output_array = np.array([])

# Load the test data, forward, store
for nn in nn_category:
    print('nn -> ', nn)
    rel_path = '../Experiment/Method2/FNNModel/FNN/' + str(nn) + '.json'
    abs_path = os.path.join(os.path.dirname(__file__), rel_path)
    attribute = LoadData.load_fnn_weight(abs_path)
    # print(attribute)
    mean = np.asarray(attribute['Mean'])
    stddev = np.asarray(attribute['Stddev'])
    weight = np.asarray(attribute['Weight'])
Exemplo n.º 17
0
        #         fnn_accuracy.append(accuracy)
        #         fnn_matrix.append(matrix)

        #         print('<---Train the FNN' + name + ' Successfully--->')
        #         print('<----------------------------------------------->')

        # print('fnn_matrix', fnn_matrix)
        # print('fnn_accuracy', fnn_accuracy)
        #for i in range(len(fnn_matrix)):
        #rel_path = './Experiment/method3/Graph/cnf'+str(i)+'.png'
        # abs_path = os.path.join(os.path.dirname(__file__), rel_path)
        #ConfusionMatrix.plot_confusion_matrix(
        # fnn_matrix[i], abs_path, classes=[0,1], title='C'+str(i)+' Cnf')

        print('<---Part2, Keras Networks(LNN)--->')
        # org_data, org_label = LoadData.get_lnn_training_data()
        org_data, org_label = LoadData.get_method3_test()
        # reduced_data = ra.pca(org_data, fnn_input_size)
        # normalized_data = Normalize.normalization(reduced_data)
        # nn_category = [i for i in range(1, 7, 1)]

        nn_array = [
            'C1_0', 'C1_1', 'C2_0', 'C2_1', 'C3_0', 'C3_1', 'C4_0', 'C4_1',
            'C5_0', 'C5_1', 'C6_0', 'C6_1'
        ]
        train_keras_lnn(nn_array, org_data, org_label, algorithm)

        end = time.time()
        print('All cost time is (' + str(end - start) + ')')
Exemplo n.º 18
0
start = time.time()

# 小波分解的層數
layer = 15

# 起始年和到哪年為止
syear = 2015
eyear = 2016
timeseries = generate_target_timeseries(syear, eyear)

# 讀檔,將每個csv檔案的資料取出
for series, target_timestamp in timeseries.items():
    total_borrow_data = pd.DataFrame()
    total_return_data = pd.DataFrame()
    for target in target_timestamp:
        borrow_data, return_data = LoadData.load_station_count(
            target, 'Day', ['BorrowStation', 'ReturnStation'])
        total_borrow_data = pd.concat((total_borrow_data, borrow_data), axis=0)
        total_return_data = pd.concat((total_return_data, return_data), axis=0)
    #
    print(total_borrow_data.shape)
    print(total_return_data.shape)

    # 進行小波分解(0 ~ N 層)
    # 並做出相對應的小波分解後的視覺化圖
    algorithm = pywt.Wavelet('db2')
    wavelet = WaveletTransform(total_borrow_data,
                               total_return_data,
                               int(layer),
                               algorithm,
                               interval=series[0])
    wavelet.fit_transform()
Exemplo n.º 19
0
取出的特徵值 (Feature)
窗格大小在移動的時候必須重疊與前一筆資料重疊(1/2)的
並命名為 Original_data.xlsx
"""

import os
import numpy as np
import pandas as pd

from Method.LoadData import LoadData
from Method.DataCombine import DataCombine
from Method.Export import Export

label_type = 'C'

load = LoadData()
load_data, org_label = load.get_original_excel(label_type, 'Split_data')

# Convert the original data to the data with 264 dimensions
org_data = DataCombine.combine(load_data)
print(org_data.shape)
print(org_label.shape)

header = ['Dim' + str(i) for i in range(1, 265, 1)]
pd_data = pd.DataFrame(org_data, columns=header)
pd_label = pd.DataFrame(org_label, columns=['Label'])

result = pd.concat([pd_data, pd_label], axis=1)

print(result.head())
Exemplo n.º 20
0
fnn_threshold = 0.0
threshold_internal = [(num / 10) for num in range(-6, 7, 1)]

print('<---1. For loop to run all--->')
print('<---2. For loop to run one in threshold internal--->')
print('<---3. For loop to run all with threshold internal--->')
method = input('<---Choose Method--->: ')

if method == "1":
    for nn in nn_category:
        print('nn ->', nn)
        # Load the json
        rel_path = '../Experiment/Method3/FNNModel/FNN/' + str(nn) + '.json'
        abs_path = os.path.join(os.path.dirname(__file__), rel_path)
        attribute = LoadData.load_fnn_weight(abs_path)
        # print(attribute)

        # Load the test data
        org_data, org_label = LoadData.get_method2_fnn_train(nn)
        # print('org_data.shape', org_data.shape)
        # print('org_label.shape', org_label)

        mean = np.asarray(attribute['Mean'])
        stddev = np.asarray(attribute['Stddev'])
        weight = np.asarray(attribute['Weight'])
        # Test the FNN
        fnn = FNN(fnn_input_size, fnn_membership_size, fnn_rule_size,
                  fnn_output_size, mean, stddev, weight, fnn_lr, 1)

        output = fnn.testing_model(org_data)
"""
Data Clean(Ubike Charging Date)
"""

import pandas as pd

from Method.LoadData import LoadData

desired_width = 320
pd.set_option('display.width', desired_width)
pd.set_option('display.max_columns', 20)

file_name = LoadData.load_timestamp_fname()
# print('file_name', file_name)

station = LoadData.load_refactor_ubike_station()
sna = station['sna'].values
# print('station', station)

name = 'youbike_20150413.csv'
data = LoadData.load_ubike_timestamp(name)
new_data = data[data['BorrowStation'].isin(sna)
                & data['ReturnStation'].isin(sna)]
# print('new_data\n', new_data)
path = '../Data/NewUbike/Ubike/' + name
new_data.to_csv(path, index=False, encoding='utf_8_sig')

# for name in file_name:
#     data = LoadData.load_ubike_timestamp(name)
#     new_data = data[data['BorrowStation'].isin(sna) & data['ReturnStation'].isin(sna)]
#     # print('new_data\n', new_data)
from sklearn.preprocessing import MinMaxScaler

from Method.LoadData import LoadData

# plotly.tools.set_credentials_file(username='******', api_key='DNnX5s0iZwH24b2qb4Zo')

mapbox_access_token = pd.read_json('../Data/PlotlyToken.json', typ='series')
print(mapbox_access_token['token'])

desired_width = 320
pd.set_option('display.width', desired_width)
pd.set_option('display.max_columns', 20)

# Load ubike station
# ubike_station = LoadData.load_ubike_station()
ubike_station = LoadData.load_refactor_ubike_station()
# print('ubike_station\n', ubike_station.head())

ubike_slice = ubike_station.loc[:, ['sna', 'lat', 'lng']]
# print(ubike_slice.head())

header = ['sna', 'attr1', 'attr2', 'attr3', 'slope', 'color']
borrow_data = pd.DataFrame(columns=header)
return_data = pd.DataFrame(columns=header)
year = 2015
path = '../Data/' + str(year) + 'Final_Wavelet_Trend(Borrow).txt'
with open(path, 'r', encoding='utf-8') as file_handle:
    for line in file_handle.readlines():
        field = line.split('|')
        sname = list(make_tuple(field[0]))
        wdata = list(make_tuple(field[1]))
Exemplo n.º 23
0
mds run so long
tSNE run so long

modified_lle will wrong
hessian_lle will wrong
"""
# reduced_algorithm = [
# 'lle', 'pca', 'KernelPCA', 'FactorAnalysis', 'Isomap'
# , 'ltsa_lle', 'sparse_pca', 'tSNE']

reduced_algorithm = ['Isomap']

# Run the experiment from one dimension to five dimension
for algorithm in reduced_algorithm:
    # Read file LNN_Train_data.xlsx'
    org_data, org_label = LoadData.get_lnn_training_data()

    # Normalize the data
    # normalized_data = preprocessing.normalize(org_data)
    min_max_scaler = preprocessing.MinMaxScaler()
    normalized_data = min_max_scaler.fit_transform(org_data)
    # print(normalized_data)

    # 呼叫不同的降維法去降維, 取特徵直
    # Use different reduced algorithm
    reduced_data = np.array([])
    if algorithm == 'lle':
        reduced_data = ra.lle(normalized_data, dim)

    elif algorithm == 'modified_lle':
        reduced_data = ra.modified_lle(normalized_data, dim)
                        element,
                        c=color,
                        linewidth=line_size,
                        label='Len(Wavelet): ' + str(layer))

            # plt.legend(loc='upper right')
            ax.set_title(str(key))
            path = '../Data/Graph/WaveletTrend/' + str(year) + '/'
            if not os.path.exists(path):
                os.mkdir(path)
            photo = path + str(key) + '.png'
            plt.savefig(photo, dpi=100)
            # plt.show()
            plt.close()


years = [x for x in range(2015, 2017, 1)]
for year in years:
    wavelet_data = LoadData.load_wavelet_data(year)
    # print(len(wavelet_data.keys()))
    # print(len(wavelet_data.values()))

    # 檢查一年的第一天維星期幾
    today_week = datetime.datetime.strptime(
        str(year) + '-' + '01' + '-' + '01', '%Y-%m-%d').weekday()
    # start_day = datetime.datetime.strptime(str(year)+'-'+'01'+'-'+'01', '%Y-%m-%d') + \
    #             datetime.timedelta(days=(7 - today_week))
    # print(start_day)

    plot_trend(wavelet_data, 7 - today_week, year)
Exemplo n.º 25
0
def train_local_fnn(nn, algorithm):

    # Declare variables
    nn_mean, nn_stddev, nn_weight = (0.0 for _ in range(3))
    accuracy = 0.0
    matrix = np.array([])
    record_fnn = FNN()
    loss_list = np.array([])

    # This variable is used to store the all accuracy
    all_nn_accuracy = np.array([])

    # Load file FNN_Train_data_' + str(num) + '.xlsx
    org_data, org_label = LoadData.get_method1_fnn_train(nn)
    org_label = np.array([1 if element == nn else 0 for element in org_label])

    # Reduce dimension and generate train/test data
    reduced_data = reduce_dimension(org_data, org_label, algorithm)
    # normalized_data = preprocessing.normalize(reduced_data)
    # reduced_data = normalization(reduced_data)

    # 正規化 1
    # min_max_scaler = preprocessing.MinMaxScaler()
    # normalized_data = min_max_scaler.fit_transform(reduced_data)

    # 正規化 2
    # normalized_data = preprocessing.scale(reduced_data)

    # 正規化 3
    normalized_data = Normalize.normalization(reduced_data)

    X_train, X_test, y_train, y_test = train_test_split(normalized_data,
                                                        org_label,
                                                        test_size=0.3)
    # print(X_train, X_train.shape)
    # print(y_train, y_train.shape)

    # Train the FNN
    print('<---Train the FNN' + str(nn) + ' Start--->')
    for i in range(fnn_random_size):

        # Random Generate the mean, standard deviation
        mean = np.array(
            [np.random.uniform(-1, 1) for _ in range(fnn_membership_size)])
        stddev = np.array(
            [np.random.uniform(0, 1) for _ in range(fnn_membership_size)])
        weight = np.array(
            [np.random.uniform(-1, 1) for _ in range(fnn_rule_size)])
        """
        # Generate FNN object to train
        # para1 -> fnn input layer size
        # para2 -> fnn membership layer size
        # para3 -> fnn rule layer size
        # para4 -> fnn output layer size
        # para5 -> random mean values
        # para6 -> random stddev values
        # para7 -> random weight values
        # para8 -> nn label type
        """
        fnn = FNN(fnn_input_size, fnn_membership_size, fnn_rule_size,
                  fnn_output_size, mean, stddev, weight, fnn_lr, 1)
        fnn.training_model(fnn_epoch, X_train, y_train)

        # Test the FNN model, save the one that has the best accuracy
        test_output = fnn.testing_model(X_test)

        label_pred = label_encode(nn, test_output)
        # print(y_test.shape)
        # print(label_pred.shape)
        # print(y_test)
        # print(label_pred)

        C_matrix = confusion_matrix(y_test, label_pred)
        C_accuracy = np.sum(C_matrix.diagonal()) / np.sum(C_matrix)
        all_nn_accuracy = np.append(all_nn_accuracy, C_accuracy)

        # print(C_matrix)
        # print(C_accuracy)
        if C_accuracy > accuracy:
            accuracy = copy.deepcopy(C_accuracy)
            nn_mean = copy.deepcopy(fnn.mean)
            nn_stddev = copy.deepcopy(fnn.stddev)
            nn_weight = copy.deepcopy(fnn.weight)
            matrix = copy.deepcopy(C_matrix)
            record_fnn = copy.deepcopy(fnn)
            loss_list = copy.deepcopy(fnn.loss_list)
        """
        Every error trend graph will output
        Output the Error Plot to observe trend
        """
        # rel_path = './Data/Graph/' + str(i) + '_FNN_' + str(nn) + '_error_trend.png'
        # abs_path = os.path.join(os.path.dirname(__file__), rel_path)
        # ErrorPlot.error_trend(
        #     str(i) + '_FNN_' + str(nn) + '_error_trend', len(fnn.error_list), fnn.error_list, abs_path)

    print('<---Train the FNN' + str(nn) + ' Successfully--->')
    print('<----------------------------------------------->')

    # print('1_目錄:', os.getcwd())

    # First Time, you need to create a folder
    if nn == 1:
        org_path = './Data/Graph/'
        makedir(org_path, algorithm)
    # else:
    #     os.chdir('./Data/Graph/' + dimension_reduce_algorithm)
    # print('2_目錄:', os.getcwd())

    # Choose the best FNN to Plot error trend
    # rel_path = org_path + 'Best_FNN_' + str(nn) + '_error_trend.png'
    # abs_path = os.path.join(os.path.dirname(__file__), rel_path)
    abs_path = os.getcwd() + '\\Best_FNN_' + str(nn) + '_error_trend.png'
    # print('ErrorPlot', abs_path)
    ErrorPlot.error_trend('Best_FNN_' + str(nn) + '_error_trend',
                          len(record_fnn.error_list), record_fnn.error_list,
                          abs_path)

    abs_path = os.getcwd() + '\\Best_FNN_' + str(nn) + '_loss_trend.png'
    # Choose the best FNN to Plot loss on every epoch
    # ErrorPlot.loss_trend(
    #     'Best_FNN_' + str(nn) + '_loss_trend', len(loss_list), loss_list, abs_path)

    # Choose the best Accuracy to Plot
    # rel_path = org_path + 'Accuracy vs FNN' + str(nn) + '.png'
    # abs_path = os.path.join(os.path.dirname(__file__), rel_path)
    abs_path = os.getcwd() + '\\Accuracy vs FNN' + str(nn) + '.png'
    # print('AccuracyPlot', abs_path)
    AccuracyPlot.build_accuracy_plot(
        'Accuracy vs FNN' + str(nn),
        np.array([i for i in range(1,
                                   len(all_nn_accuracy) + 1, 1)]),
        all_nn_accuracy, abs_path)

    return nn_mean, nn_stddev, nn_weight, accuracy, matrix
                ax[0].legend(loc='upper right')
                ax[1].legend(loc='upper right')

                path = "../Data/Graph/Statistic_Graph/Year(Subplots)/" + title_name
                try:
                    photo_path = path + '/' + col + '.png'
                    plt.savefig(photo_path)
                except FileNotFoundError:
                    col = col.replace('/', '-')
                    photo_path = path + '/' + col + '.png'
                    plt.savefig(photo_path)
                # plt.ion()
                # plt.pause(1)
                plt.close()
                # plt.show()


# Generate the time series graph
borrow_file_name = LoadData.load_month_borrow_fname()
return_file_name = LoadData.load_month_return_fname()

print('<---1. 同一張圖--->')
print('<---2. 組合圖--->')
choose = input('<---Please input the condition--->: ')

if int(choose) == 1 or int(choose) == 2:
    graph_by_day(borrow_file_name, return_file_name, int(choose))
else:
    print('<---Error Condition--->')
Exemplo n.º 27
0
def train_keras_lnn(nn_array, org_data, org_label, algorithm):
    """Get the fnn output and input the lnn"""
    fnn_output = np.array([])
    for name in nn_array:
        print('<---nn -> ', name, '--->')
        rel_path = './Experiment/Method3/FNNModel/' + name + '.json'
        abs_path = os.path.join(os.path.dirname(__file__), rel_path)
        attribute = LoadData.load_fnn_weight(abs_path)
        mean = np.asarray(attribute['Mean'])
        stddev = np.asarray(attribute['Stddev'])
        weight = np.asarray(attribute['Weight'])
        # Test the FNN
        fnn = FNN(fnn_input_size, fnn_membership_size, fnn_rule_size,
                  fnn_output_size, mean, stddev, weight, fnn_lr, 1)
        result = fnn.testing_model(org_data)
        fnn_output = np.append(fnn_output, result)

    fnn_output = fnn_output.reshape(len(nn_array), -1).T

    # fnn_label = np.array([int(e[1:2])-1 for e in org_label])
    print('org_label', org_label)
    fnn_label = label_convert(org_label)
    X_train, X_test, y_train, y_test = train_test_split(fnn_output,
                                                        fnn_label,
                                                        test_size=0.3,
                                                        random_state=42)
    print('X_train.shape', X_train.shape)
    print('y_train.shape', y_train.shape)

    # Construct the lnn
    y_trainOneHot = np_utils.to_categorical(y_train)
    y_testOneHot = np_utils.to_categorical(y_test)

    model = Sequential()
    model.add(Dense(units=32, input_dim=12))
    model.add(Dense(32, activation='tanh'))
    model.add(
        Dense(units=12, kernel_initializer='normal', activation='softmax'))
    adam = optimizers.Adam(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=adam, metrics=['mse'])
    model.summary()

    train_history = model.fit(x=X_train,
                              y=y_trainOneHot,
                              validation_split=0.2,
                              epochs=30,
                              batch_size=200,
                              verbose=2)
    show_train_history(train_history, 'mean_squared_error',
                       'val_mean_squared_error', 'mean_squared_error.png')
    show_train_history(train_history, 'loss', 'val_loss', 'loss.png')

    scores = model.evaluate(X_test, y_testOneHot)
    print('scores', scores)

    prediction = model.predict(X_test)
    for x, y in zip(prediction[:10], y_testOneHot[:10]):
        print(x, ' ', y)

    prediction = model.predict_classes(X_test)
    y_pred = prediction_convert(prediction)
    yy = onehot_convert(y_testOneHot)

    print(set(y_pred))
    print(set(yy))

    cnf_matrix = confusion_matrix(yy, y_pred)
    print('accuracy_score', accuracy_score(yy, y_pred))
    print('cnf_matrix\n', cnf_matrix)
    rel_path = './Experiment/method3/Graph/cnf_lnn.png'
    abs_path = os.path.join(os.path.dirname(__file__), rel_path)
    plt.figure(figsize=(8, 6), dpi=200)
    ConfusionMatrix.plot_confusion_matrix(cnf_matrix,
                                          abs_path,
                                          classes=list(set(y_pred)),
                                          title='Final Model Confusion matrix')
Exemplo n.º 28
0
def train_label_nn(fnn_attribute, algorithm):
    # Declare variables
    nn_weight1, nn_weight2, nn_bias = (0.0 for _ in range(3))
    accuracy = 0.0
    matrix = np.array([])
    record_lnn = LabelNN()

    # This variable is used to store the all accuracy
    all_nn_accuracy = np.array([])

    # Load file LNN_Train_data.xlsx
    org_data, org_label = LoadData.get_lnn_training_data()

    # Reduce dimension and generate train/test data
    reduced_data = reduce_dimension(org_data, org_label, algorithm)

    # normalized_data = preprocessing.normalize(reduced_data)

    # min_max_scaler = preprocessing.MinMaxScaler()
    # normalized_data = min_max_scaler.fit_transform(reduced_data)

    # normalized_data = preprocessing.scale(reduced_data)

    normalized_data = Normalize.normalization(reduced_data)

    # reduced_data = normalization(reduced_data)
    X_train, X_test, y_train, y_test = train_test_split(normalized_data,
                                                        org_label,
                                                        test_size=0.3)

    # print('X_train', X_train)
    # print('X_test', X_test)

    # Label Neural Networks Structure
    weight1_size = 36
    weight2_size = 36
    bias_size = 6

    # Save the one that has the best accuracy
    lnn_input_list = np.array([])
    for test_data, test_label in zip(X_test, y_test):
        lnn_input = get_fnn_output(test_data, fnn_attribute)
        lnn_input_list = np.append(lnn_input_list, lnn_input)
        lnn_input_list = lnn_input_list.reshape(-1, 6)
        # print('label_nn_test(Test)', lnn_input)
    # 產生輸出後再正規化一次
    # lnn_test_input_list = preprocessing.scale(lnn_input_list)

    lnn_test_input_list = Normalize.normalization(lnn_input_list)

    # Save the one that has the best accuracy
    lnn_input_list = np.array([])
    for train_data, train_label in zip(X_train, y_train):
        lnn_input = get_fnn_output(train_data, fnn_attribute)
        lnn_input_list = np.append(lnn_input_list, lnn_input)
        lnn_input_list = lnn_input_list.reshape(-1, 6)
        # print('label_nn_test(Test)', lnn_input)

    # 產生輸出後再正規化一次
    # lnn_train_input_list = preprocessing.scale(lnn_input_list)

    lnn_train_input_list = Normalize.normalization(lnn_input_list)

    for e in lnn_train_input_list:
        print(e)

    # Train the Label NN start
    print('<---Train the Label NN Start--->')
    for _ in range(lnn_random_size):

        weight1 = \
            np.array([np.random.uniform(-1, 1) for _ in range(weight1_size)]).reshape(-1, 6)
        weight2 = \
            np.array([np.random.uniform(-1, 1) for _ in range(weight2_size)]).reshape(-1, 6)
        bias = \
            np.array([np.random.uniform(-1, 1) for _ in range(bias_size)])

        lnn = LabelNN(lnn_input_size, lnn_hidden_size, lnn_output_size,
                      weight1, weight2, bias, lnn_lr)

        # for train_data, train_label in zip(X_train, y_train):
        #     # Calculate the input of the LNN
        #     # By getting the output the FNN1 ~ FNN6
        #     lnn_input = get_fnn_output(train_data, fnn_attribute)
        #
        #     # print('lnn_input', lnn_input)
        #
        #     # print('lnn_input(Train)', lnn_input)
        #     try:
        #         lnn.training_model(lnn_epoch, lnn_input, train_label)
        #
        #     except OverflowError:
        #         print("<---Main.py(Something error had happen in train lnn)--->")
        #         break
        #     except ZeroDivisionError:
        #         print("<---Main.py(Something error had happen in train lnn)--->")
        #         break

        # Test the FNN model,
        # Encoding the label NN
        # Make the confusion matrix
        try:
            lnn.training_model(fnn_epoch, lnn_train_input_list, y_train)
            test_output = lnn.testing_model(lnn_test_input_list)

        except OverflowError:
            print("<---Main.py(Something error had happen in test lnn)--->")
            continue
        except ZeroDivisionError:
            print("<---Main.py(Something error had happen in test lnn)--->")
            continue

        label_pred = LabelNN.label_encode(test_output)
        C_matrix = confusion_matrix(y_test, label_pred)
        C_accuracy = np.sum(C_matrix.diagonal()) / np.sum(C_matrix)

        # Record the single accuracy
        all_nn_accuracy = np.append(all_nn_accuracy, C_accuracy)
        # print(C_matrix)
        # print(C_accuracy)

        if C_accuracy > accuracy:
            accuracy = copy.deepcopy(C_accuracy)
            nn_weight1 = copy.deepcopy(lnn.weight1)
            nn_weight2 = copy.deepcopy(lnn.weight2)
            nn_bias = copy.deepcopy(lnn.bias)
            matrix = copy.deepcopy(C_matrix)
            record_lnn = copy.deepcopy(lnn)
        """
        Every error trend graph will output
        Output the Error Plot to observe trend
        """
        # rel_path = './Data/Graph/' + str(i) + '_LNN_error_trend.png'
        # abs_path = os.path.join(os.path.dirname(__file__), rel_path)
        # ErrorPlot.error_trend(
        #     str(i) + '_LNN_error_trend', len(lnn.error_list), lnn.error_list)

    print('<---Train the Label NN Successfully--->')
    print('<----------------------------------------------->')

    # # Create a folder
    # org_path = './Data/Graph/'
    # org_path = makedir(org_path, dimension_reduce_algorithm)

    # print('3_目錄', os.getcwd())

    abs_path = os.getcwd()
    # Choose the best LNN to Plot error trend
    ErrorPlot.mul_error_trend('Best_LNN_error_trend',
                              len(record_lnn.error_list),
                              record_lnn.error_list, abs_path)

    # Choose the best Accuracy to Plot
    # rel_path = org_path + 'Accuracy vs LNN.png'
    # abs_path = os.path.join(os.path.dirname(__file__), rel_path)

    abs_path = os.getcwd() + '\\Accuracy vs LNN.png'
    AccuracyPlot.build_accuracy_plot(
        'Accuracy vs LNN',
        np.array([i for i in range(1,
                                   len(all_nn_accuracy) + 1, 1)]),
        all_nn_accuracy, abs_path)

    return nn_weight1, nn_weight2, nn_bias, accuracy, matrix
Exemplo n.º 29
0
import pandas as pd

from Method.LoadData import LoadData

header = ['Dim' + str(i) for i in range(1, 265, 1)]
column = header.append('Label')
org_data = LoadData.get_org_data()
print(type(org_data))
# pd_data = pd.DataFrame(org_data, columns=column)

for i in range(1, 7, 1):
    result = org_data.loc[org_data['Label'] == 'C' + str(i), header]
    # pd_array = pd.DataFrame(tmp_data, columns=column)
    result.to_excel('../Data/Labeling/C/method2/C' + str(i) +
                    '_Original_data.xlsx',
                    sheet_name='Data',
                    index=False)
    print('<---C' + str(i) + ' Successfully--->')

# print(pd_data)
Exemplo n.º 30
0
def test_all_model(fnn_attribute, lnn_attribute, algorithm):
    # Load file, Original_data.xlsx
    org_data, org_label = LoadData.get_method1_test()

    # Reduce dimension and generate train/test data
    reduced_data = reduce_dimension(org_data, org_label, algorithm)
    # normalized_data = preprocessing.normalize(reduced_data)
    # reduced_data = normalization(reduced_data)

    # min_max_scaler = preprocessing.MinMaxScaler()
    # normalized_data = min_max_scaler.fit_transform(reduced_data)

    # normalized_data = preprocessing.scale(reduced_data)

    normalized_data = Normalize.normalization(reduced_data)

    X_train, X_test, y_train, y_test = train_test_split(normalized_data,
                                                        org_label,
                                                        test_size=0.3)

    print('<---Test the Label NN Start--->')

    test_output_list = np.array([])
    # 直接投票法,不用LNN
    for test_data, test_label in zip(X_test, y_test):
        lnn_input = get_fnn_output(test_data, fnn_attribute)
        test_output_list = np.append(test_output_list, lnn_input)

    # lnn_input_list = np.array([])
    # for test_data, test_label in zip(X_test, y_test):
    #     lnn_input = get_fnn_output(test_data, fnn_attribute)
    #     lnn_input_list = np.append(lnn_input_list, lnn_input)
    # lnn_input_list = lnn_input_list.reshape(-1, 6)
    # print('label_nn_test(Test)', lnn_input)
    # 產生輸出後再正規化一次
    # lnn_test_input_list = preprocessing.scale(lnn_input_list)

    # lnn_test_input_list = normalization(lnn_input_list)

    # test_output_list = np.array([])
    # for train_data, train_label in zip(X_train, y_train):
    #     lnn_input = get_fnn_output(train_data, fnn_attribute)
    #     # print('lnn_input(Test ALL)', lnn_input)
    #
    #     weight1 = lnn_attribute['Weight1']
    #     weight2 = lnn_attribute['Weight2']
    #     bias = lnn_attribute['Bias']
    #
    #     lnn = LabelNN(lnn_input_size, lnn_hidden_size, lnn_output_size, weight1, weight2, bias, lnn_lr)
    #     test_output = lnn.forward(lnn_input)
    #     test_output_list = np.append(test_output_list, test_output)
    #
    #
    #     # # 直接投票法,不用LNN
    #     # lnn_input = get_fnn_output(train_data, fnn_attribute)
    #     # test_output_list = np.append(test_output_list, lnn_input)

    final_output_list = np.array([])

    # weight1 = lnn_attribute['Weight1']
    # weight2 = lnn_attribute['Weight2']
    # bias = lnn_attribute['Bias']

    # lnn = LabelNN(lnn_input_size, lnn_hidden_size, lnn_output_size, weight1, weight2, bias, lnn_lr)

    # lnn_test_input_list = lnn_input_list.reshape(-1, 6)
    # final_output_list = lnn.forward(lnn_test_input_list)
    # final_output_list = final_output_list.reshape(-1, 6)

    test_output_list = test_output_list.reshape(-1, 6)
    label_pred = LabelNN.label_encode(test_output_list)
    for x, y in zip(test_output_list, y_test):
        print(x, ' ', y)

    # normalized_output = min_max_scaler.fit_transform(test_output_list)
    # print('normalized_output', normalized_output)
    # label_pred = LabelNN.label_encode(normalized_output)

    C_matrix = confusion_matrix(y_test, label_pred)
    rel_path = './Experiment/Method1/cnf_matrix_fnn(1-6).png'
    abs_path = os.path.join(os.path.dirname(__file__), rel_path)
    ConfusionMatrix.plot_confusion_matrix(C_matrix,
                                          abs_path,
                                          classes=list(set(y_test)),
                                          title='Confusion matrix')
    C_accuracy = np.sum(C_matrix.diagonal()) / np.sum(C_matrix)

    print('This is the confusion matrix(test_all_model)\n', C_matrix)
    # print(C_matrix)
    # print(C_accuracy)

    print('<---Test the Label NN Successfully--->')
    print('<----------------------------------------------->')
    return C_accuracy