def main():
    ####################################################################################################################
    # Setup
    # Get flag values
    embeddings = task_utils.get_embeddings()
    folder_results = FLAGS.out
    assert len(
        folder_results
    ) > 0, "Please specify a path to the results folder using --folder_results"
    folder_data = FLAGS.input_data
    dense_layer_size = FLAGS.dense_layer
    print_summary = FLAGS.print_summary
    num_epochs = FLAGS.num_epochs
    batch_size = FLAGS.batch_size
    train_samples = FLAGS.train_samples

    # Acquire data
    if not os.path.exists(os.path.join(folder_data, 'ir_train')):
        # Download data
        task_utils.download_and_unzip(
            'https://polybox.ethz.ch/index.php/s/JOBjrfmAjOeWCyl/download',
            'classifyapp_training_data', folder_data)

    task_utils.llvm_ir_to_trainable(os.path.join(folder_data, 'ir_train'))
    assert os.path.exists(os.path.join(
        folder_data, 'ir_val')), "Folder not found: " + folder_data + '/ir_val'
    task_utils.llvm_ir_to_trainable(os.path.join(folder_data, 'ir_val'))
    assert os.path.exists(os.path.join(
        folder_data,
        'ir_test')), "Folder not found: " + folder_data + '/ir_test'
    task_utils.llvm_ir_to_trainable(os.path.join(folder_data, 'ir_test'))

    # Create directories if they do not exist
    if not os.path.exists(folder_results):
        os.makedirs(folder_results)
    if not os.path.exists(os.path.join(folder_results, "models")):
        os.makedirs(os.path.join(folder_results, "models"))
    if not os.path.exists(os.path.join(folder_results, "predictions")):
        os.makedirs(os.path.join(folder_results, "predictions"))

    ####################################################################################################################
    # Train model
    # Evaluate Classifyapp
    print("\nEvaluating ClassifyappInst2Vec ...")
    if (not FLAGS.inference):
        classifyapp_accuracy = evaluate(NCC_classifyapp(), embeddings,
                                        folder_data, train_samples,
                                        folder_results, dense_layer_size,
                                        print_summary, num_epochs, batch_size)

        ####################################################################################################################
        # Print results
        print('\nTest accuracy:',
              sum(classifyapp_accuracy) * 100 / len(classifyapp_accuracy), '%')

    else:
        test_accuracy(NCC_classifyapp(), embeddings, folder_data,
                      train_samples, folder_results, dense_layer_size,
                      print_summary, num_epochs, batch_size)
    def load_embed(self):
        print('Loading Embedding.')
        embeddings = task_utils.get_embeddings()
        vocab_size = embeddings.shape[0]
        weights = torch.from_numpy(embeddings).type(torch.FloatTensor)
        weights = F.normalize(weights, p=2, dim=1)

        self.embed = nn.Embedding(vocab_size, self.args.embedding_dim)
        self.embed.weight = torch.nn.Parameter(weights)
        self.embed = torch.nn.DataParallel(self.embed).cuda()
        self.embed.eval()
示例#3
0
def main(argv):
    del argv  # unused

    ####################################################################################################################
    # Setup
    # Get flag values
    embeddings = task_utils.get_embeddings()
    folder_results = FLAGS.out
    assert len(
        folder_results
    ) > 0, "Please specify a path to the results folder using --folder_results"
    folder_data = FLAGS.input_data
    dense_layer_size = FLAGS.dense_layer
    print_summary = FLAGS.print_summary
    num_epochs = FLAGS.num_epochs
    batch_size = FLAGS.batch_size
    train_samples = FLAGS.train_samples
    # Acquire data
    if not os.path.exists(folder_data + '_train'):
        print("Error")

    r = task_utils.llvm_ir_to_trainable(folder_data + '_train')
    assert os.path.exists(folder_data +
                          '_val'), "Folder not found: " + folder_data + '_val'
    task_utils.llvm_ir_to_trainable(folder_data + '_val')
    assert os.path.exists(
        folder_data + '_test'), "Folder not found: " + folder_data + '_test'
    task_utils.llvm_ir_to_trainable(folder_data + '_test')

    print(folder_data)
    # Create directories if they do not exist
    if not os.path.exists(folder_results):
        os.makedirs(folder_results)
    if not os.path.exists(os.path.join(folder_results, "models")):
        os.makedirs(os.path.join(folder_results, "models"))
    if not os.path.exists(os.path.join(folder_results, "predictions")):
        os.makedirs(os.path.join(folder_results, "predictions"))

    ####################################################################################################################
    print("\nEvaluating Vulnerability using Inst2Vec ...")
    classifyapp_accuracy = evaluate(Main_Model(), embeddings, folder_data,
                                    train_samples, folder_results,
                                    dense_layer_size, print_summary,
                                    num_epochs, batch_size)
示例#4
0
def main(argv):
    del argv    # unused

    ####################################################################################################################
    # Setup
    # Get flag values
    embeddings = task_utils.get_embeddings()
    folder_results = FLAGS.out
    assert len(folder_results) > 0, "Please specify a path to the results folder using --folder_results"
    folder_data = FLAGS.input_data
    assert os.path.exists(folder_data + '_train'), "Folder not found: " + folder_data + '_train'
    task_utils.llvm_ir_to_trainable(folder_data + '_train')
    assert os.path.exists(folder_data + '_val'), "Folder not found: " + folder_data + '_val'
    task_utils.llvm_ir_to_trainable(folder_data + '_val')
    assert os.path.exists(folder_data + '_test'), "Folder not found: " + folder_data + '_test'
    task_utils.llvm_ir_to_trainable(folder_data + '_test')
    dense_layer_size = FLAGS.dense_layer
    print_summary = FLAGS.print_summary
    num_epochs = FLAGS.num_epochs
    batch_size = FLAGS.batch_size
    train_samples = FLAGS.train_samples

    # Create directories if they do not exist
    if not os.path.exists(folder_results):
        os.makedirs(folder_results)
    if not os.path.exists(os.path.join(folder_results, "models")):
        os.makedirs(os.path.join(folder_results, "models"))
    if not os.path.exists(os.path.join(folder_results, "predictions")):
        os.makedirs(os.path.join(folder_results, "predictions"))

    ####################################################################################################################
    # Train model
    # Evaluate Classifyapp
    print("\nEvaluating ClassifyappInst2Vec ...")
    classifyapp_accuracy = evaluate(NCC_classifyapp(), embeddings, folder_data, train_samples, folder_results,
                                    dense_layer_size, print_summary, num_epochs, batch_size)

    ####################################################################################################################
    # Print results
    print('\nTest accuracy:', sum(classifyapp_accuracy)*100/len(classifyapp_accuracy), '%')
示例#5
0
def main(argv):
    del argv  # unused

    ####################################################################################################################
    # Setup
    # Get flag values
    embeddings = task_utils.get_embeddings()
    input_data = FLAGS.input_data
    out = FLAGS.out
    if not os.path.exists(out):
        os.makedirs(out)
    device = FLAGS.device
    assert device in ["all", "Cypress", "Tahiti", "Fermi", "Kepler"], \
        'Choose device among: all, Cypress, Tahiti, Fermi, Kepler'
    dense_layer_size = FLAGS.dense_layer
    print_summary = FLAGS.print_summary
    num_epochs = FLAGS.num_epochs
    batch_size = FLAGS.batch_size
    if not os.path.exists(os.path.join(input_data, 'kernels_ir')):

        # Download data
        task_utils.download_and_unzip(
            'http://spclstorage.inf.ethz.ch/projects/ncc/tasks/threadcoarsening_data.zip',
            'threadcoarsening_training_data', input_data)

    task_utils.llvm_ir_to_trainable(os.path.join(input_data, 'kernels_ir'))

    ####################################################################################################################
    # Reference values
    # Values copied from papers and github
    magni_pl_sp_vals = [1.21, 1.01, 0.86, 0.94]
    magni_sp_mean = 1.005
    deeptune_pl_sp_vals = [1.10, 1.05, 1.10, 0.99]
    deeptune_sp_mean = 1.06
    deeptuneTL_pl_sp_vals = [1.17, 1.23, 1.14, 0.93]
    deeptuneTL_sp_mean = 1.1175

    ####################################################################################################################
    # Train model
    # Evaluate NCC_threadcoarsening
    print("\nEvaluating NCC_threadcoarsening ...")
    ncc_threadcoarsening = evaluate(NCC_threadcoarsening(), device, input_data,
                                    out, embeddings, dense_layer_size,
                                    print_summary, num_epochs, batch_size)

    ####################################################################################################################
    # Print results
    print(
        '\n',
        ncc_threadcoarsening.groupby('Platform')['Platform', 'Speedup',
                                                 'Oracle'].mean())
    d = np.array([ncc_threadcoarsening[['Speedup', 'Oracle']].mean()]).T
    print(
        '\n',
        pd.DataFrame(d,
                     columns=["DeepTuneInst2Vec"],
                     index=["Speedup", "Oracle"]))

    # Model comparison: speedups
    print('\nModel comparison: speedups')
    d = list()
    d.append(np.append(magni_pl_sp_vals, magni_sp_mean))
    d.append(np.append(deeptune_pl_sp_vals, deeptune_sp_mean))
    d.append(np.append(deeptuneTL_pl_sp_vals, deeptuneTL_sp_mean))
    d.append(
        np.append(
            ncc_threadcoarsening.groupby(['Platform'
                                          ])['Speedup'].mean().values,
            ncc_threadcoarsening['Speedup'].mean()))
    if FLAGS.device == 'all':
        d = np.array(d).T.reshape(5, 4)
        devs = [
            'AMD Radeon HD 5900', 'AMD Tahiti 7970', 'NVIDIA GTX 480',
            'NVIDIA Tesla K20c', 'Average'
        ]
    else:
        d = np.array(d).T.reshape(1, 4)
        devs = [_FLAG_TO_DEVICE_NAME[FLAGS.device]]
    print(
        '\n',
        pd.DataFrame(d,
                     columns=[
                         'Magni et al.', 'DeepTune', 'DeepTuneTL',
                         'DeepTuneInst2Vec'
                     ],
                     index=devs))
示例#6
0
def main(argv):
    del argv  # unused

    ####################################################################################################################
    # Setup
    # Get flag values
    embeddings = task_utils.get_embeddings()
    out = FLAGS.out
    if not os.path.exists(out):
        os.makedirs(out)
    device = FLAGS.device
    assert device in ['all', 'amd', 'nvidia'], \
        'Choose device among: all, amd, nvidia'
    dense_layer_size = FLAGS.dense_layer
    print_summary = FLAGS.print_summary
    num_epochs = FLAGS.num_epochs
    batch_size = FLAGS.batch_size
    input_data = FLAGS.input_data
    if not os.path.exists(os.path.join(input_data, 'kernels_ir')):

        # Download data
        task_utils.download_and_unzip(
            'https://polybox.ethz.ch/index.php/s/U08Z3xLhvbLk8io/download',
            'devmap_training_data', input_data)

    task_utils.llvm_ir_to_trainable(os.path.join(input_data, 'kernels_ir'))

    ####################################################################################################################
    # Reference values
    # Values copied from:
    # https://github.com/ChrisCummins/paper-end2end-dl/blob/master/code/Case%20Study%20A.ipynb
    static_pred_vals = [58.823529, 56.911765]
    static_pred_mean = 57.867647
    static_sp_vals = [1.0, 1.0]
    static_sp_mean = 1.0
    grewe_pred_vals = [73.382353, 72.941176]
    grewe_pred_mean = 73.161765
    grewe_sp_vals = [2.905822, 1.264801]
    grewe_sp_mean = 2.085312
    deeptune_pred_vals = [83.676471, 80.294118]
    deeptune_pred_mean = 81.985294
    deeptune_sp_vals = [3.335612, 1.412222]
    deeptune_sp_mean = 2.373917

    ####################################################################################################################
    # Train model
    print("Evaluating DeepTuneInst2Vec ...")
    ncc_devmap = evaluate(NCC_devmap(), device, input_data, out, embeddings,
                          dense_layer_size, print_summary, num_epochs,
                          batch_size)

    ####################################################################################################################
    # Print results
    print('\n--- Prediction results')
    print(
        ncc_devmap.groupby(['Platform',
                            'Benchmark Suite'])['Platform', 'Correct?',
                                                'Speedup'].mean())
    print('\n--- Prediction results (summarized)')
    print(
        ncc_devmap.groupby(['Platform'])['Platform', 'Correct?',
                                         'Speedup'].mean())

    # Model comparison: prediction accuracy
    print('\n--- Model comparison: prediction accuracy')
    d = list()
    d.append(np.append(static_pred_vals, static_pred_mean))
    d.append(np.append(grewe_pred_vals, grewe_pred_mean))
    d.append(np.append(deeptune_pred_vals, deeptune_pred_mean))
    d.append(
        np.append(
            ncc_devmap.groupby(['Platform'])['Correct?'].mean().values * 100,
            ncc_devmap['Correct?'].mean() * 100))
    d = np.array(d).T.reshape(3, 4)
    print(
        '\n',
        pd.DataFrame(d,
                     columns=[
                         'Static mapping', 'Grewe et al.', 'DeepTune',
                         'DeepTuneInst2Vec'
                     ],
                     index=['AMD Tahiti 7970', 'NVIDIA GTX 970', 'Average']))

    # Model comparison: speedups
    print('\n--- Model comparison: speedups')
    d = list()
    d.append(np.append(static_sp_vals, static_sp_mean))
    d.append(np.append(grewe_sp_vals, grewe_sp_mean))
    d.append(np.append(deeptune_sp_vals, deeptune_sp_mean))
    d.append(
        np.append(
            ncc_devmap.groupby(['Platform'])['Speedup'].mean().values,
            ncc_devmap['Speedup'].mean()))
    d = np.array(d).T.reshape(3, 4)
    print(
        '\n',
        pd.DataFrame(d,
                     columns=[
                         'Static mapping', 'Grewe et al.', 'DeepTune',
                         'DeepTuneInst2Vec'
                     ],
                     index=['AMD Tahiti 7970', 'NVIDIA GTX 970', 'Average']))
def predict_labels():
    folder_vocabulary = FLAGS.vocabulary_dir
    dictionary_pickle = os.path.join(folder_vocabulary, 'dic_pickle')
    with open(dictionary_pickle, 'rb') as f:
        dictionary = pickle.load(f)
    unk_index = dictionary[rgx.unknown_token]
    del dictionary

    embeddings = task_utils.get_embeddings()
    embedding_matrix_normalized = tf.nn.l2_normalize(embeddings, axis=1)

    seed = 204
    num_classes = 104
    vocabulary_size, embedding_dimension = embedding_matrix_normalized.shape
    dense_layer_size = FLAGS.dense_layer

    path = './inference'
    task_utils.llvm_ir_to_trainable(os.path.join(path, 'ir_test'))

    files = [
        os.path.join(os.path.join(path, 'seq_test'), f)
        for f in os.listdir(os.path.join(path, 'seq_test')) if f[-4:] == '.rec'
    ]

    X_test = files
    batch_size = len(X_test)
    X_seq_test, maxlen = encode_srcs(X_test, 'predict_sample', unk_index)
    print('Max. sequence length overall:', maxlen)
    if FLAGS.maxlen > 0:
        maxlen = FLAGS.maxlen
    print('Padding sequences to length', maxlen)
    X_seq_test = pad_src(X_seq_test, maxlen, unk_index)

    model = NCC_classifyapp()
    model.init(seed=seed,
               maxlen=maxlen,
               embedding_dim=int(embedding_dimension),
               num_classes=num_classes,
               dense_layer_size=dense_layer_size,
               embedding_matrix=embedding_matrix_normalized)
    model.model.summary()
    model.load_weights(os.path.join(FLAGS.out, model.__name__ + '_weights.h5'))
    indices, probabilities = model.predict_topk(X_seq_test, batch_size,
                                                FLAGS.topk)

    import json

    print()
    json_out = open(
        os.path.join('./inference',
                     FLAGS.input_file.split('/')[-1] + '.json'), 'w')
    json_out.write('{\n')
    for i in range(len(files)):
        json_out.write('\"' + files[i][:-8].split('/')[-1] + '\": ' +
                       json.dumps({
                           'classes': indices[i],
                           'Probabilities': probabilities[i]
                       }))
        if i < len(files) - 1:
            json_out.write(',')
        json_out.write('\n')
    json_out.write('}')
    json_out.close()