Пример #1
0
def evaluation(method, dataset, user, device_source):
    log_name = 'log/cnn_%s_%s_evaluation.txt' % (dataset, method)
    if os.path.exists(log_name):
        os.remove(log_name)
    if user == 'mlsnrs':
        root_dir_prefix = '/home/mlsnrs/apks'
    elif user == 'shellhand':
        root_dir_prefix = '/mnt'
    save_feature_path = '%s/%s/mamadroid/%s/%s/%s_save_feature_list.csv' % (
        root_dir_prefix, device_source, dataset, method, method)
    save_feature_dict = get_save_feature_dict(save_feature_path)
    print('have read save_feature_dict: %d' % len(save_feature_dict))
    x_train, y_train = get_train_data(dataset, method, save_feature_dict,
                                      root_dir_prefix, device_source)
    print('x_train shape: %s y_train shape: %s' %
          (str(x_train.shape), str(y_train.shape)))
    start = time.time()
    print('start train')
    clf = CNN(layer_num=3, kernel_size=3, gpu_id=2)
    clf.fit(x_train, y_train, epoch=5, batch_size=500, lr=0.01)
    end = time.time()
    print('Training  model time used: %f s' % (end - start))
    #     torch.cuda.empty_cache()
    print(x_train.shape)
    y_pred = clf.predict(x_train, batch_size=20)
    print(y_pred.shape)
    cm = confusion_matrix(y_train, np.int32(y_pred >= 0.5))
    TP = cm[1][1]
    FP = cm[0][1]
    TN = cm[0][0]
    FN = cm[1][0]
    F1 = float(2 * TP) / (2 * TP + FN + FP)
    print('train data TP FP TN FN F1: %d %d %d %d %.4f' % (TP, FP, TN, FN, F1))
    with open(log_name, 'a') as f:
        f.write('train data TP FP TN FN F1: %d %d %d %d %.4f\n' %
                (TP, FP, TN, FN, F1))
    x_train = []
    y_train = []
    for test_id in range(0, 1):  #13):
        x_test, y_test = get_test_data(dataset, test_id, method,
                                       save_feature_dict, root_dir_prefix,
                                       device_source)
        print('x_test shape: %s y_test shape: %s' %
              (str(x_test.shape), str(y_test.shape)))
        y_pred = clf.predict(x_test, batch_size=500)
        #         y_pred = classify(y_pred)
        cm = confusion_matrix(y_test, y_pred)
        TP = cm[1][1]
        FP = cm[0][1]
        TN = cm[0][0]
        FN = cm[1][0]
        F1 = float(2 * TP) / (2 * TP + FN + FP)
        print('test_id %d TP FP TN FN F1: %d %d %d %d %.4f' %
              (test_id, TP, FP, TN, FN, F1))
        with open(log_name, 'a') as f:
            f.write('test_id %d TP FP TN FN F1: %d %d %d %d %.4f\n' %
                    (test_id, TP, FP, TN, FN, F1))
Пример #2
0
def evaluation(method, dataset, user, device_source):
    if user == 'mlsnrs':
        root_dir_prefix = '/home/mlsnrs/apks'
    elif user == 'shellhand':
        root_dir_prefix = '/mnt'
    save_feature_path = '%s/ssd_1T/mamadroid/%s/%s/%s_save_feature_list.csv' % (
        root_dir_prefix, dataset, method, method)
    save_feature_dict = get_save_feature_dict(save_feature_path)
    print('have read save_feature_dict: %d' % len(save_feature_dict))
    for train_year in range(2012, 2018):
        log_name = 'log/cnn_%s_%s_%dtrain_evaluation.txt' % (dataset, method,
                                                             train_year)
        if os.path.exists(log_name):
            os.remove(log_name)
        x_train, y_train = get_train_data(dataset, train_year, method,
                                          save_feature_dict, root_dir_prefix)
        print('x_train shape: %s y_train shape: %s' %
              (str(x_train.shape), str(y_train.shape)))
        start = time.time()
        print('start train')
        clf = CNN(layer_num=3, kernel_size=5, gpu_id=3)
        clf.fit(x_train, y_train, epoch=260, batch_size=350, lr=0.01)  # 260
        end = time.time()
        print('Training  model time used: %f s' % (end - start))
        print(x_train.shape)
        len_x = x_train.shape[0]
        if (len_x % 20) != 1:
            y_pred = clf.predict(x_train, batch_size=20)
        else:
            y_pred = clf.predict(x_train, batch_size=21)
        print(y_pred.shape)
        cm = confusion_matrix(y_train, np.int32(y_pred >= 0.5))
        TP = cm[1][1]
        FP = cm[0][1]
        TN = cm[0][0]
        FN = cm[1][0]
        F1 = float(2 * TP) / (2 * TP + FN + FP)
        print('train %d data TP FP TN FN F1: %d %d %d %d %.4f' %
              (train_year, TP, FP, TN, FN, F1))
        with open(log_name, 'a') as f:
            f.write('train %d data TP FP TN FN F1: %d %d %d %d %.4f\n' %
                    (train_year, TP, FP, TN, FN, F1))
        x_train = []
        y_train = []
        for test_year in range(train_year + 1, 2019):
            for test_month in range(0, 12):
                x_test, y_test = get_test_data(dataset, test_year, test_month,
                                               method, save_feature_dict,
                                               root_dir_prefix)
                print('%d-%02d x_test shape: %s y_test shape: %s' %
                      (test_year, test_month + 1, str(
                          x_test.shape), str(y_test.shape)))
                len_x = x_test.shape[0]
                if (len_x % 20) != 1:
                    y_pred = clf.predict(x_test, batch_size=20)
                else:
                    y_pred = clf.predict(x_test, batch_size=21)


#                 y_pred = clf.predict(x_test, batch_size = 20)
                cm = confusion_matrix(y_test, np.int32(y_pred >= 0.5))
                TP = cm[1][1]
                FP = cm[0][1]
                TN = cm[0][0]
                FN = cm[1][0]
                F1 = float(2 * TP) / (2 * TP + FN + FP)
                print('test %d-%02d TP FP TN FN F1: %d %d %d %d %.4f' %
                      (test_year, test_month + 1, TP, FP, TN, FN, F1))
                with open(log_name, 'a') as f:
                    f.write('test %d-%02d TP FP TN FN F1: %d %d %d %d %.4f\n' %
                            (test_year, test_month + 1, TP, FP, TN, FN, F1))
Пример #3
0
def optimize_para(method, dataset, user, device_source):
    log_name = 'log/optimize_cnn_%s_%s_evaluation_v2.txt' % (dataset, method)
    #     if os.path.exists(log_name):
    #         os.remove(log_name)
    if user == 'mlsnrs':
        root_dir_prefix = '/home/mlsnrs/apks'
    elif user == 'shellhand':
        root_dir_prefix = '/mnt'
    save_feature_path = '%s/%s/mamadroid/%s/%s/%s_save_feature_list.csv' % (
        root_dir_prefix, device_source, dataset, method, method)
    save_feature_dict = get_save_feature_dict(save_feature_path)
    print('have read save_feature_dict: %d' % len(save_feature_dict))
    x_train, y_train = get_train_data(
        dataset, 2012, method, save_feature_dict, root_dir_prefix
    )  # dataset, train_year, method, save_feature_dict, root_dir_prefix
    print('x_train shape: %s y_train shape: %s' %
          (str(x_train.shape), str(y_train.shape)))
    start = time.time()
    print('start train')
    for b in range(50, 501, 50):
        for k in [5]:  # 3, 5
            for lr in [0.01, 0.1, 0.001]:
                clf = CNN(layer_num=3, kernel_size=k, gpu_id=2)
                step_size = 10
                for e in range(10, 501, step_size):
                    clf.fit(x_train,
                            y_train,
                            epoch=step_size,
                            batch_size=b,
                            lr=lr)
                    end = time.time()
                    #                     print('Training batch_size=%d kernel_size=%d lr=%.2f epoch=%d time used: %f s' % (b, k, lr, e, end - start))
                    #     torch.cuda.empty_cache()
                    y_pred = clf.predict(x_train, batch_size=20)
                    cm = confusion_matrix(y_train, np.int32(y_pred >= 0.5))
                    TP = cm[1][1]
                    FP = cm[0][1]
                    TN = cm[0][0]
                    FN = cm[1][0]
                    F1 = float(2 * TP) / (2 * TP + FN + FP)
                    print(
                        'train data batch_size=%d kernel_size=%d lr=%.2f epoch=%d TP FP TN FN F1: %d %d %d %d %.4f'
                        % (b, k, lr, e, TP, FP, TN, FN, F1))
                    with open(log_name, 'a') as f:
                        f.write(
                            'train data batch_size=%d kernel_size=%d lr=%.2f epoch=%d TP FP TN FN F1: %d %d %d %d %.4f\n'
                            % (b, k, lr, e, TP, FP, TN, FN, F1))
                    for test_id in range(0, 1):  #13):
                        x_test, y_test = get_test_data(
                            dataset, 2013, 0, method, save_feature_dict,
                            root_dir_prefix
                        )  # dataset, test_year, test_month, method, save_feature_dict, root_dir_prefix
                        #                 print('x_test shape: %s y_test shape: %s' % (str(x_test.shape), str(y_test.shape)))
                        y_pred = clf.predict(x_test, batch_size=20)
                        #         y_pred = classify(y_pred)
                        cm = confusion_matrix(y_test, np.int32(y_pred >= 0.5))
                        TP = cm[1][1]
                        FP = cm[0][1]
                        TN = cm[0][0]
                        FN = cm[1][0]
                        F1 = float(2 * TP) / (2 * TP + FN + FP)
                        print(
                            'test_id %d batch_size=%d kernel_size=%d lr=%.2f epoch=%d TP FP TN FN F1: %d %d %d %d %.4f'
                            % (test_id, b, k, lr, e, TP, FP, TN, FN, F1))
                        with open(log_name, 'a') as f:
                            f.write(
                                'test_id %d batch_size=%d kernel_size=%d lr=%.2f epoch=%d TP FP TN FN F1: %d %d %d %d %.4f\n'
                                % (test_id, b, k, lr, e, TP, FP, TN, FN, F1))
Пример #4
0
num_batches = 1000
batch_size = 50
learning_rate = 0.01

# model = MLP()
model = CNN()
data_loader = DataLoader()
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

losses = []
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_logit_pred = model(tf.convert_to_tensor(X))
        loss = tf.losses.sparse_softmax_cross_entropy(labels=y,
                                                      logits=y_logit_pred)
        if batch_index % 100 == 0:
            print("batch %d: loss %f" % (batch_index, loss.numpy()))
        losses.append(loss.numpy())
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

num_eval_samples = np.shape(data_loader.eval_labels)[0]
y_pred = model.predict(data_loader.eval_data).numpy()
print("Test accuracy: {}".format(
    sum(y_pred == data_loader.eval_labels) / num_eval_samples))

plt.plot(losses)
plt.show()