Exemplo n.º 1
0
def load_data(dict):

    data_dict = {}

    for key in dict:
        data_dict[key] = None

    for key in dict:
        path = dict[key]
        print(path)

        npy_files = find_click.list_npy_files(path)

        xs = np.empty((0, 320))
        count = 0

        for npy in npy_files:
            npy_data = np.load(npy)
            if npy_data.shape[0] == 0:
                continue
            xs = np.vstack((xs, npy_data))
            count += npy_data.shape[0]
        print('loaded clicks:', count)

        data_dict[key] = xs

    return data_dict
Exemplo n.º 2
0
def load_data(dict):
    n_class = len(dict)

    # train_dict = {'0': None, '1': None, '2': None}
    # test_dict = {'0': None, '1': None, '2': None}
    train_dict = {}
    test_dict = {}

    for key in dict:
        train_dict[key] = None
        test_dict[key] = None

    for key in dict:
        path = dict[key]
        # print(path)
        c = int(key)

        ### split by date
        # file_list = find_click.list_files(path)
        # random_index = np.random.permutation(len(file_list))
        # test_set = file_list[random_index[0]]
        # train_set = [file_list[i] for i in random_index[1:]]
        # # label = np.zeros(n_class)
        # # label[c] = 1
        #
        # # training set
        # xs = np.empty((0, 320))
        # count = 0
        # print('training set loading.......')
        # for folder in train_set:
        #     # print('loading %s' % folder[-6:])
        #     npy_list = find_click.list_npy_files(folder)
        #     for npy in npy_list:
        #         # print('loading %s' % npy)
        #         npy_data = np.load(npy)
        #         if npy_data.shape[0] == 0:
        #             continue
        #         xs = np.vstack((xs, npy_data))
        #         count += npy_data.shape[0]
        # print('loaded clicks:', count)
        #
        # # test set
        # txs = np.empty((0, 320))
        # count = 0
        # print('test set loading.......')
        # print('loading %s' % test_set[-6:])
        # npy_list = find_click.list_npy_files(test_set)
        # for npy in npy_list:
        #     # print('loading %s' % npy)
        #     npy_data = np.load(npy)
        #     if npy_data.shape[0] == 0:
        #         continue
        #     txs = np.vstack((txs, npy_data))
        #     count += npy_data.shape[0]
        # print('loaded clicks:', count)

        ### split by file
        npy_files = find_click.list_npy_files(path)
        npy_num = len(npy_files)
        random_index = np.random.permutation(npy_num)
        split_point = int(npy_num/4)
        test_set = [npy_files[i] for i in random_index[:split_point]]
        train_set = [npy_files[i] for i in random_index[split_point:]]

        # training set
        xs = np.empty((0, 320))
        count = 0
        print('training set loading.......')
        for npy in train_set:
            npy_data = np.load(npy)
            if npy_data.shape[0] == 0:
                continue
            xs = np.vstack((xs, npy_data))
            count += npy_data.shape[0]
        print('loaded clicks:', count)

        # testing set
        txs = np.empty((0, 320))
        count = 0
        print('testing set loading.......')
        for npy in test_set:
            print(npy)
            npy_data = np.load(npy)
            if npy_data.shape[0] == 0:
                continue
            txs = np.vstack((txs, npy_data))
            count += npy_data.shape[0]
        print('loaded clicks:', count)

        train_dict[key] = xs
        test_dict[key] = txs

    return train_dict, test_dict
Exemplo n.º 3
0
def test_cnn_batch_learn(data_path, label=3, n_class=8, batch_num=20):
    c = label
    npy_files = find_click.list_npy_files(data_path)
    random_index = np.random.permutation(len(npy_files))
    label = np.zeros(n_class)
    label[c] = 1

    # xs = np.empty((0, 256))

    count = 0
    #

    tf.reset_default_graph()
    x = tf.placeholder("float", [None, 96])

    # 输入
    x_image = tf.reshape(x, [-1, 1, 96, 1])

    # 第一个卷积层
    W_conv1 = weight_variable([1, 5, 1, 32])
    b_conv1 = bias_variable([32])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_1x2(h_conv1)

    # 第二个卷积层
    W_conv2 = weight_variable([1, 5, 32, 32])
    b_conv2 = bias_variable([32])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_1x2(h_conv2)

    # 密集链接层
    W_fc1 = weight_variable([1 * 24 * 32, 256])
    b_fc1 = bias_variable([256])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 1 * 24 * 32])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    # Dropout
    keep_prob = tf.placeholder("float")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_prob)

    # 输出层
    W_fc2 = weight_variable([256, n_class])
    b_fc2 = bias_variable([n_class])
    y = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    temp_y = tf.reshape(y, [-1, batch_num, n_class])
    temp_y_t = tf.transpose(temp_y, perm=[0, 2, 1])
    multi_y_t = tf.reshape(temp_y_t, [-1, batch_num])
    multi_y = tf.transpose(multi_y_t, perm=[1, 0])

    # 融合层
    # # 单模式融合
    # W_fuse = weight_variable([1, batch_num])
    # b_fuse = bias_variable([n_class])
    # fuse_out = tf.nn.softmax(tf.reshape(tf.matmul(W_fuse, multi_y), [-1, n_class]) + b_fuse)

    # 多模式融合
    fuse_mode = 9
    W_fuse = weight_variable([fuse_mode, batch_num])
    # b_fuse = bias_variable([fuse_mode, n_class])
    multi_fuse_out = tf.nn.relu(tf.matmul(W_fuse, multi_y))  # + b_fuse)

    mode_weight = weight_variable([1, fuse_mode])
    mode_b = bias_variable([n_class])
    fuse_out = tf.nn.softmax(
        tf.reshape(tf.matmul(mode_weight, multi_fuse_out), [-1, n_class]) +
        mode_b)

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()

    print('==============================================')
    total_correct = 0
    total = 0
    with tf.Session() as sess:
        sess.run(init)
        saver.restore(sess, "params/cnn_net_lwy.ckpt")  # 加载训练好的网络参数

        for i in range(len(npy_files)):
            npy = npy_files[random_index[i]]
            print('loading %s' % npy)
            npy_data = np.load(npy)

            if npy_data.shape[0] == 0:
                continue
            xs = np.empty((0, 320))
            xs = np.vstack((xs, npy_data))
            # xs = npy_data
            count = npy_data.shape[0]
            print('loaded clicks:', count)
            # if count >= batch_num * n_total:
            #     break

            click_batch = []
            sample_num = xs.shape[0]
            total_batch = int(sample_num / batch_num)
            # print('the number of data(%(datasrc)s): %(d)d' % {'datasrc': data_path, 'd': total_batch})
            for i in range(0, total_batch):
                tmp_xs = np.empty((0, 96))
                # for j in range(batch_num * i, batch_num * (i + 1)):
                j = batch_num * i
                if j > xs.shape[0]:
                    break
                while j >= (batch_num * i) and j < (batch_num * (i + 1)):
                    if xs.shape[0] == 0:
                        break
                    index = j % xs.shape[0]
                    temp_x = xs[index]
                    beg_idx = np.random.randint(64, (64 + 32))
                    crop_x = temp_x[beg_idx:(beg_idx + 192)]
                    crop_x = np.reshape(crop_x, [1, 192])

                    crop_x = np.fft.fft(crop_x)
                    crop_x = np.sqrt(crop_x.real**2 + crop_x.imag**2)

                    crop_x = crop_x[0, :96]
                    crop_x = np.reshape(crop_x, [1, 96])

                    if c >= 0:
                        # peak值位于20k以下,70k以上的滤去
                        peak_index = np.argmax(crop_x)
                        if peak_index < 20 or peak_index > 70:
                            xs = np.delete(xs, index, 0)
                            continue

                    crop_x = energy_normalize(crop_x)
                    tmp_xs = np.vstack((tmp_xs, crop_x))
                    j += 1
                # click_batch.append(tmp_xs)

                label = [0] * n_class
                label[c] = 1

                label = np.array([[label]])
                label = list(label)

                tmp_xs = np.expand_dims(np.expand_dims(tmp_xs, axis=0), axis=0)
                tmp_xs = list(tmp_xs)
                sample = tmp_xs + label
                click_batch.append(sample)

            print('the number of batch:', len(click_batch))
            if len(click_batch) == 0:
                continue
            total += len(click_batch)
            count = 0
            majority_mat = [0] * n_class
            for i in range(len(click_batch)):
                temp_xs = click_batch[i][0][0]
                label = np.zeros(n_class)
                out_y = sess.run(fuse_out,
                                 feed_dict={
                                     x: temp_xs,
                                     keep_prob: 1.0
                                 })
                pre_y = np.argmax(out_y, 1)
                ref_y = click_batch[i][1]
                if np.equal(pre_y, np.argmax(ref_y)):
                    count += 1
            total_correct += count
            print('correct:', count, 'total:', len(click_batch))
            print('cnn test accuracy (batch learn): ',
                  round(count / len(click_batch), 3))
            # print('result:', majority_mat)
        print('total correct: %d, total: %d, batch learn acc: %f' %
              (total_correct, total, total_correct / total))
Exemplo n.º 4
0
def test_cnn_data(data_path, label=3, n_class=8, batch_num=20):
    c = label
    npy_files = find_click.list_npy_files(data_path)
    random_index = np.random.permutation(len(npy_files))
    label = np.zeros(n_class)
    label[c] = 1

    # xs = np.empty((0, 256))

    count = 0
    #

    tf.reset_default_graph()
    x = tf.placeholder("float", [None, 96])
    # 输入
    x_image = tf.reshape(x, [-1, 1, 96, 1])

    # 第一个卷积层
    W_conv1 = weight_variable([1, 5, 1, 32])
    b_conv1 = bias_variable([32])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_1x2(h_conv1)

    # 第二个卷积层
    W_conv2 = weight_variable([1, 5, 32, 32])
    b_conv2 = bias_variable([32])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_1x2(h_conv2)

    # 密集链接层
    W_fc1 = weight_variable([1 * 24 * 32, 256])
    b_fc1 = bias_variable([256])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 1 * 24 * 32])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    # Dropout
    keep_prob = tf.placeholder("float")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_prob)

    # 输出层
    W_fc2 = weight_variable([256, n_class])
    b_fc2 = bias_variable([n_class])
    y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()

    print('==============================================')
    total_correct = 0
    total = 0
    with tf.Session() as sess:
        sess.run(init)
        saver.restore(sess, "params/cnn_net_lwy.ckpt")  # 加载训练好的网络参数

        for i in range(len(npy_files)):
            npy = npy_files[random_index[i]]
            print('loading %s' % npy)
            npy_data = np.load(npy)

            # x = np.arange(0, 320)
            # plt.plot(x, npy_data[0])
            # plt.show()

            if npy_data.shape[0] == 0:
                continue

            # npy_data = np.divide(npy_data, 2 ** 10)
            # energy = np.sqrt(np.sum(npy_data ** 2, 1))
            # energy = np.tile(energy, (npy_data.shape[1], 1))
            # energy = energy.transpose()
            # npy_data = np.divide(npy_data, energy)

            # plt.plot(x, npy_data[0])
            # plt.show()
            xs = np.empty((0, 320))
            xs = np.vstack((xs, npy_data))
            # xs = npy_data
            count = npy_data.shape[0]
            print('loaded clicks:', count)
            # if count >= batch_num * n_total:
            #     break

            click_batch = []
            sample_num = xs.shape[0]
            total_batch = int(sample_num / batch_num)
            # print('the number of data(%(datasrc)s): %(d)d' % {'datasrc': data_path, 'd': total_batch})
            for i in range(0, total_batch):
                tmp_xs = np.empty((0, 96))
                # for j in range(batch_num * i, batch_num * (i + 1)):
                j = batch_num * i
                if j > xs.shape[0]:
                    break
                while j >= (batch_num * i) and j < (batch_num * (i + 1)):
                    if xs.shape[0] == 0:
                        break
                    index = j % xs.shape[0]
                    temp_x = xs[index]
                    beg_idx = np.random.randint(64, (64 + 32))
                    crop_x = temp_x[beg_idx:(beg_idx + 192)]
                    crop_x = np.reshape(crop_x, [1, 192])

                    crop_x = np.fft.fft(crop_x)
                    crop_x = np.sqrt(crop_x.real**2 + crop_x.imag**2)

                    crop_x = crop_x[0, :96]
                    crop_x = np.reshape(crop_x, [1, 96])

                    if c >= 0:
                        # peak值位于20k以下,70k以上的滤去
                        peak_index = np.argmax(crop_x)
                        if peak_index < 20 or peak_index > 70:
                            xs = np.delete(xs, index, 0)
                            continue

                    crop_x = energy_normalize(crop_x)
                    tmp_xs = np.vstack((tmp_xs, crop_x))
                    j += 1

                label = [0] * n_class
                label[c] = 1

                label = np.array([[label]])
                label = list(label)

                tmp_xs = np.expand_dims(np.expand_dims(tmp_xs, axis=0), axis=0)
                tmp_xs = list(tmp_xs)
                sample = tmp_xs + label
                click_batch.append(sample)

            print('the number of batch:', len(click_batch))
            if len(click_batch) == 0:
                continue
            total += len(click_batch)
            count = 0
            majority_mat = [0] * n_class
            for i in range(len(click_batch)):
                temp_xs = click_batch[i][0]
                label = np.zeros(n_class)
                for j in range(0, temp_xs.shape[1]):
                    txs = temp_xs[0, j, :]
                    txs = np.reshape(txs, [1, 96])
                    out_y = sess.run(y, feed_dict={x: txs, keep_prob: 1.0})
                    pre_y = np.argmax(out_y, 1)
                    label[pre_y] += 1

                ref_y = click_batch[i][1]
                predict = np.argmax(label)
                majority_mat[int(predict)] += 1
                if np.equal(np.argmax(label), np.argmax(ref_y)):
                    count += 1
            total_correct += count
            print('correct:', count, 'total:', len(click_batch))
            print('cnn test accuracy (majority voting): ',
                  round(count / len(click_batch), 3))
            print('result:', majority_mat)

            # count = 0
            # weight_vote_mat = [0] * n_class
            # weight = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
            # for i in range(len(click_batch)):
            #     temp_xs = click_batch[i][0]
            #     label = np.zeros(n_class)
            #     for j in range(0, temp_xs.shape[1]):
            #         txs = temp_xs[0, j, :]
            #         txs = np.reshape(txs, [1, 192])
            #         out = sess.run(weight, feed_dict={x: txs, keep_prob: 1.0})
            #         out = np.reshape(out, label.shape)
            #         label = label + out
            #
            #     ref_y = click_batch[i][1]
            #     predict = np.argmax(label)
            #     weight_vote_mat[int(predict)] += 1
            #     if np.equal(np.argmax(label), np.argmax(ref_y)):
            #         count += 1
            #
            # print('cnn test accuracy (weight voting): ', round(count / len(click_batch), 3))
            # print('result:', weight_vote_mat)
            #
            # count = 0
            # softmax_mat = [0] * n_class
            # for i in range(len(click_batch)):
            #     temp_xs = click_batch[i][0]
            #     label = np.zeros(n_class)
            #     for j in range(0, temp_xs.shape[1]):
            #         txs = temp_xs[0, j, :]
            #         txs = np.reshape(txs, [1, 192])
            #         out = sess.run(y, feed_dict={x: txs, keep_prob: 1.0})
            #         out = np.reshape(out, label.shape)
            #         label = label + out
            #
            #     ref_y = click_batch[i][1]
            #     predict = np.argmax(label)
            #     softmax_mat[int(predict)] += 1
            #     if np.equal(np.argmax(label), np.argmax(ref_y)):
            #         count += 1
            #
            # print('cnn test accuracy (sum of softmax voting): ', round(count / len(click_batch), 3))
            # print('result:', softmax_mat)
    print('total correct:', total_correct, 'total batch:', total)
    print('%s mean acc: %f' % (data_path, total_correct / total))
Exemplo n.º 5
0
def load_npy_data(batch_num=20, n_total=500):

    dict = {'0': '', '1': '', '2': ''}
    # dict["0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Melon"
    # dict["1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Spinner"
    # dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Tt"
    dict[
        "0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Melon"
    dict[
        "1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Spinner"
    dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Tt"

    n_class = len(dict)
    # train_xs = np.empty((0, 96))
    train_ys = np.empty((0, n_class))
    # test_xs = np.empty((0, 96))
    test_ys = np.empty((0, n_class))

    train_xs = []
    # train_ys = []
    test_xs = []
    # test_ys = []

    for key in dict:
        path = dict[key]

        print(path)

        c = int(key)

        # npy_files = find_click.list_npy_files(path)

        file_list = find_click.list_files(path)

        random_index = np.random.permutation(len(file_list))

        test_set = file_list[random_index[0]]

        train_set = [file_list[i] for i in random_index[1:]]

        label = np.zeros(n_class)
        label[c] = 1

        # training set
        xs = np.empty((0, 320))
        count = 0
        print('training set loading.......')
        for folder in train_set:
            # print('loading %s' % folder[-6:])
            npy_list = find_click.list_npy_files(folder)
            for npy in npy_list:
                # print('loading %s' % npy)
                npy_data = np.load(npy)
                if npy_data.shape[0] == 0:
                    continue
                xs = np.vstack((xs, npy_data))
                count += npy_data.shape[0]
        print('loaded clicks:', count)

        # test set
        txs = np.empty((0, 320))
        count = 0
        print('test set loading.......')
        print('loading %s' % test_set[-6:])
        npy_list = find_click.list_npy_files(test_set)
        for npy in npy_list:
            # print('loading %s' % npy)
            npy_data = np.load(npy)
            if npy_data.shape[0] == 0:
                continue
            txs = np.vstack((txs, npy_data))
            count += npy_data.shape[0]
        print('loaded clicks:', count)

        # xs0, xs1 = split_data(xs)
        # print('crop and split clicks...')
        # temp_train_xs = random_crop_filter_click(xs, batch_num, n_total, key)
        # temp_test_xs  = random_crop_filter_click(txs, batch_num, n_total=0, key=key)
        print('training set crop...')
        temp_train_xs = random_crop(xs, batch_num, n_total, key)
        print('testing set crop...')
        temp_test_xs = random_crop(txs, batch_num, n_total=0, key=key)

        temp_train_ys = np.tile(label, (len(temp_train_xs), 1))
        temp_test_ys = np.tile(label, (len(temp_test_xs), 1))
        train_xs += temp_train_xs
        train_ys = np.vstack((train_ys, temp_train_ys))
        test_xs += temp_test_xs
        test_ys = np.vstack((test_ys, temp_test_ys))
    train_xs = np.array(train_xs)
    test_xs = np.array(test_xs)

    # xs0, xs1 = split_data(xs)
    # print('crop training clicks...')
    # temp_train_xs = random_crop(xs0, batch_num, int(n_total * 4 / 5), key)
    # print('crop testing clicks...')
    # temp_test_xs = random_crop(xs1, batch_num, int(n_total / 5), key)

    # print('crop training clicks...')
    # temp_train_xs = random_crop_average_click(xs0, batch_num, int(n_total * 4 / 5), key)
    # print('crop testing clicks...')
    # temp_test_xs = random_crop_average_click(xs1, batch_num, int(n_total / 5), key)

    # temp_train_ys = np.tile(label, (temp_train_xs.shape[0], 1))
    # temp_test_ys = np.tile(label, (temp_test_xs.shape[0], 1))
    #
    # train_xs = np.vstack((train_xs, temp_train_xs))
    # train_ys = np.vstack((train_ys, temp_train_ys))
    # test_xs = np.vstack((test_xs, temp_test_xs))
    # test_ys = np.vstack((test_ys, temp_test_ys))

    return train_xs, train_ys, test_xs, test_ys
Exemplo n.º 6
0
    if not os.path.exists(root_save_path):
        os.makedirs(root_save_path)

    for key in dict:
        count = 0
        print(dict[key])
        path = dict[key]
        specie_name = path.split('/')[-1]
        file_list = find_click.list_files(path)
        save_specie_path = os.path.join(root_save_path, specie_name)
        for date_path in file_list:
            date = date_path.split('/')[-1]
            save_path = os.path.join(save_specie_path, date)
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            npy_list = find_click.list_npy_files(date_path)
            for npy in npy_list:
                npy_data = np.load(npy)
                num = npy_data.shape[0]
                xs = np.empty((0, 320))
                for index in range(num):
                    temp_x = npy_data[index]
                    beg_idx = np.random.randint(64, (64 + 32))
                    crop_x = temp_x[beg_idx:(beg_idx + 192)]
                    crop_x = np.reshape(crop_x, [1, 192])

                    crop_x = np.fft.fft(crop_x)
                    crop_x = np.sqrt(crop_x.real**2 + crop_x.imag**2)

                    crop_x = crop_x[0, :96]
                    crop_x = np.reshape(crop_x, [1, 96])
Exemplo n.º 7
0
def load_npy_data(batch_num=20, n_total=500):
    # dict = {'0': '', '1': '', '2': '', '3':'', '4':'', '5':'', '6':'', '7':''}
    # # dict = {'0': '', '1': '', '2': '', '3': '', '4': '', '5': ''}
    #
    # dict["0"] = "/home/fish/ROBB/CNN_click/click/Data/BBW/Blainvilles_beaked_whale_(Mesoplodon_densirostris)"
    # dict["1"] = "/home/fish/ROBB/CNN_click/click/Data/Gm/Pilot_whale_(Globicephala_macrorhynchus)"
    # dict["2"] = "/home/fish/ROBB/CNN_click/click/Data/Gg/Rissos_(Grampus_grisieus)"
    #
    # dict["3"] = "/home/fish/ROBB/CNN_click/click/Data/Dc/Dc"
    # dict["4"] = "/home/fish/ROBB/CNN_click/click/Data/Dd/Dd"
    # dict["5"] = "/home/fish/ROBB/CNN_click/click/Data/Melon/palmyra2006"
    # dict["6"] = "/home/fish/ROBB/CNN_click/click/Data/Spinner/palmyra2006"
    # dict["7"] = "/home/fish/ROBB/CNN_click/click/Data/Tt/palmyra2006"


    # dict["0"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/BBW/Blainvilles_beaked_whale_(Mesoplodon_densirostris)"
    # dict["1"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Gm/Pilot_whale_(Globicephala_macrorhynchus)"
    # dict["2"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Gg/Rissos_(Grampus_grisieus)"
    #
    # # dict["3"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Tt/palmyra2006"
    # # dict["4"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Dc/Dc"
    # # dict["5"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Dd/Dd"
    # # dict["6"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Melon/palmyra2006"
    # # dict["7"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Spinner/palmyra2006"
    #
    # # dict["3"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Tt/palmyra2006"
    # dict["3"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Dc/Dc"
    # dict["4"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Dd/Dd"
    # dict["5"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Melon/palmyra2006"
    # # dict["6"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Spinner/palmyra2006"


    dict = {'0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': ''}
    # dict = {'0': '', '1': '', '2': '', '3': '', '4': '', '5': ''}

    dict["0"] = "/home/fish/ROBB/CNN_click/click/ClearData/Mesoplodon/Blainvilles_beaked_whale_(Mesoplodon_densirostris)"
    dict["1"] = "/home/fish/ROBB/CNN_click/click/ClearData/Gg/Rissos_(Grampus_grisieus)"
    dict["2"] = "/home/fish/ROBB/CNN_click/click/ClearData/Gm/Pilot_whale_(Globicephala_macrorhynchus)"

    dict["3"] = "/home/fish/ROBB/CNN_click/click/Data/Melon/palmyra2006"
    dict["4"] = "/home/fish/ROBB/CNN_click/click/Data/Dd/Dd"
    dict["5"] = "/home/fish/ROBB/CNN_click/click/Data/Spinner/palmyra2006"
    # dict["0"] = "/home/fish/ROBB/CNN_click/click/Data/Dc/Dc"
    dict["6"] = "/home/fish/ROBB/CNN_click/click/Data/Tt/palmyra2006"



    n_class = len(dict)
    train_xs = np.empty((0, 96))
    train_ys = np.empty((0, n_class))
    test_xs = np.empty((0, 96))
    test_ys = np.empty((0, n_class))

    for key in dict:
        path = dict[key]
        c = int(key)
        npy_files = find_click.list_npy_files(path)

        random_index = np.random.permutation(len(npy_files))

        label = np.zeros(n_class)
        label[c] = 1

        xs = np.empty((0, 320))

        count = 0
        #
        for i in range(len(npy_files)):
            npy = npy_files[i]
            print('loading %s' % npy)
            npy_data = np.load(npy)
            if npy_data.shape[0] == 0:
                continue
            xs = np.vstack((xs, npy_data))
            count += npy_data.shape[0]
            # if count >= batch_num * n_total:
            #     break
            if count >= 100000:
                break
        print('loaded clicks:', count)

        xs0, xs1 = split_data(xs)
        print('crop training clicks...')
        temp_train_xs = random_crop(xs0, batch_num, int(n_total * 4 / 5), key)
        print('crop testing clicks...')
        temp_test_xs = random_crop(xs1, batch_num, int(n_total / 5), key)

        temp_train_ys = np.tile(label, (temp_train_xs.shape[0], 1))
        temp_test_ys = np.tile(label, (temp_test_xs.shape[0], 1))

        train_xs = np.vstack((train_xs, temp_train_xs))
        train_ys = np.vstack((train_ys, temp_train_ys))
        test_xs = np.vstack((test_xs, temp_test_xs))
        test_ys = np.vstack((test_ys, temp_test_ys))

    return train_xs, train_ys, test_xs, test_ys
Exemplo n.º 8
0
def load_data(batch_num=20):

    dict = {'0': '', '1': '', '2': ''}
    # dict["0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Melon"
    # dict["1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Spinner"
    # dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Tt"
    dict["0"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Melon"
    dict["1"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Spinner"
    dict["2"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Tt"

    n_class = len(dict)
    test_ys = np.empty((0, n_class))
    test_xs = []

    # gmm_dict = {'0': None, '1': None, '2': None}
    train_dict = {'0': None, '1': None, '2': None}

    for key in dict:
        path = dict[key]

        print(path)

        c = int(key)

        # npy_files = find_click.list_npy_files(path)

        file_list = find_click.list_files(path)

        random_index = np.random.permutation(len(file_list))

        test_set = file_list[random_index[0]]

        train_set = [file_list[i] for i in random_index[1:]]

        label = np.zeros(n_class)
        label[c] = 1

        # training set
        xs = np.empty((0, 320))
        count = 0
        print('training set loading.......')
        for folder in train_set:
            # print('loading %s' % folder[-6:])
            npy_list = find_click.list_npy_files(folder)
            for npy in npy_list:
                # print('loading %s' % npy)
                npy_data = np.load(npy)
                if npy_data.shape[0] == 0:
                    continue
                xs = np.vstack((xs, npy_data))
                count += npy_data.shape[0]
        print('loaded clicks:', count)

        # test set
        txs = np.empty((0, 320))
        count = 0
        print('test set loading.......')
        print('loading %s' % test_set[-6:])
        npy_list = find_click.list_npy_files(test_set)
        for npy in npy_list:
            # print('loading %s' % npy)
            npy_data = np.load(npy)
            if npy_data.shape[0] == 0:
                continue
            txs = np.vstack((txs, npy_data))
            count += npy_data.shape[0]
        print('loaded clicks:', count)

        print('crop training clicks...')
        train_xs = random_crop(xs, batch_num, n_total=0)
        train_xs = np.array(train_xs)
        print('crop testing clicks...')
        temp_test_xs = random_crop(txs, batch_num, n_total=0)

        train_dict[key] = train_xs

        # gmm = GMM(n_components=16).fit(train_xs)
        #
        # gmm_dict[key] = gmm

        temp_test_ys = np.tile(label, (len(temp_test_xs), 1))
        test_xs += temp_test_xs
        test_ys = np.vstack((test_ys, temp_test_ys))
    test_xs = np.array(test_xs)
    return train_dict, test_xs, test_ys