def test(net):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(test_loader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)

        test_loss += loss.data.item()
        _, predicted = torch.max(outputs.data, 1)
        total += float(targets.size(0))
        correct += predicted.eq(targets.data).cpu().sum().type(
            torch.FloatTensor)

        progress_bar(
            batch_idx, len(test_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (test_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))

    # Save checkpoint.
    acc = 100. * correct / total
    return acc
Exemplo n.º 2
0
def extract_xref(files_list):
    # total number of files to calculate completion percentage
    total_files = len(files_list)
    bad_files_names = []
    # Extract all features related to DATA and CODE XREF
    xref_dict = xref_initialization()
    for idx, file_name in enumerate(files_list):
        asm_file = DATASET_DIR + 'train/' + file_name + '.asm.gz'
        try:
            get_xref_features(asm_file, xref_dict)
        except Exception as e:
            # log corrupted files for future correction
            log_exception(e, sys.argv[0], asm_file)
            bad_files_idx.append(idx)
            bad_files_names.append(file_name)
        progress_bar(idx+1, total_files, 50)

    xref_pd = pd.DataFrame.from_dict(xref_dict)
    # store xref features to avoid recalculation
    save_obj(xref_pd, 'xref_features')
    '''
    save_obj(bad_files_names, 'bad_asm_files')
    # drop corrupted files (if any) from the training set
    if len(bad_files_names) > 0:
        # log the number of corrupted files
        logging.info('XREF Feature Extraction completed: ' + 
                str(len(bad_files_names)) + ' file(s) are corrupted.')
        # store the corrupted files names in 'bad_asm_files.txt'
        with open('bad_asm_files.txt', 'w') as bfp:
            for name in bad_files_names:
                bfp.write(name + '.asm')
    '''
    # save xref features dataframe to csv file to keep results (optional)
    xref_pd.to_csv('features/xref_features.csv', index=False)
    return xref_pd
Exemplo n.º 3
0
def test(epoch):
    netG.eval()
    total_psnr = 0
    total_loss = 0
    for iteration, batch in enumerate(test_data_loader, 1):

        with torch.no_grad():
            input, target = Variable(batch[0]), Variable(batch[1])
            if opt.cuda:
                input = input.cuda()
                target = target.cuda()

            prediction = netG(input)
            mse = criterionMSE(prediction, target)
            total_loss += mse.item()
            psnr = 10 * log10(1 / mse.item())
            total_psnr += psnr
            # display
            progress_bar(epoch, iteration, len(test_data_loader)+1, ': Avg.Loss: {:.4f}, Avg.PSNR: {:.4f} dB'.format(mse.item(), psnr)  )


    if epoch % 20 == 0 or epoch == opt.nEpochs:
        testDir = 'Test_Prediction/'
        os.makedirs(testDir, exist_ok=True)
        imgList = [ input[0], prediction[0], target[0] ]
        grid_img = make_grid(imgList)
        save_image(grid_img, testDir + 'epoch_{}.png'.format(epoch))

    avg_loss = total_loss / len(test_data_loader)
    avg_psnr = total_psnr / len(test_data_loader)
    print('Test Avg.Loss: {:.4f}, Avg.PSNR: {:.4f} dB'.format(avg_loss, avg_psnr))


    return avg_loss, avg_psnr
Exemplo n.º 4
0
def run_new(country, csv_path, save_path, save_result):
    # ############ get country list and save as js file###############
    f = open('data/country.js', 'w')
    f.write('window.country=' + json.dumps(country))
    f.close()
    # ################################################################

    js_data = make_json()
    preConfirmed = list()
    global SCALE
    step = 0
    distance = len(country)
    # get predict data
    for item in country:
        file = csv_path + item + '.csv'
        preConfirmed.append(pre_week(file)[0].astype(int))
        js_data["data"][item]["PreConfirmed"] = preConfirmed[-1].tolist()
        progress_bar(step, distance)
        step += 1

    # ####### save the js_data as a js file#######
    f = open('data/data.js', 'w')
    f.write('window.data=' + json.dumps(js_data))
    f.close()
    # ############################################

    result = {}
    for a, b in zip(country, preConfirmed):
        result[a] = b.tolist()
        print(a, b)

    # ########  here to save the prediction data  ##########
    f = open(save_result, 'w')
    f.write(json.dumps(result))
    f.close()
def train(net, epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()

        optimizer.step()

        train_loss += loss.data.item()
        _, predicted = torch.max(outputs.data, 1)
        total += float(targets.size(0))
        correct += predicted.eq(targets.data).cpu().sum().type(
            torch.FloatTensor)

        progress_bar(
            batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))
Exemplo n.º 6
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        channel0, channel1, channel2 = inputs.numpy(
        )[:, 0, :, :], inputs.numpy()[:, 1, :, :], inputs.numpy()[:, 2, :, :]
        channel0, channel1, channel2 = encoder.tempencoding(
            channel0), encoder.tempencoding(channel1), encoder.tempencoding(
                channel2)
        channel0, channel1, channel2 = torch.Tensor(channel0), torch.Tensor(
            channel1), torch.Tensor(channel2)
        if use_cuda:
            channel0, channel1, channel2, targets = channel0.cuda(
            ), channel1.cuda(), channel2.cuda(), targets.cuda()
        optimizer.zero_grad()
        channel0, channel1, channel2, targets = Variable(channel0), Variable(
            channel1), Variable(channel2), Variable(targets)
        outputs = net(channel0, channel1, channel2)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        progress_bar(
            batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))
Exemplo n.º 7
0
def train(epoch):
    print('-- Current Training Epoch %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        Optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        Optimizer.step()

        train_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        util.progress_bar(
            batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.2f%% (%d/%d)' %
            (train_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))
Exemplo n.º 8
0
    def stream_from_source(self):
        self.cntr = 0
        # training folder path
        training_path = os.path.join(self.root, "TrainingSet")
        if self.segment is None:
            patient_folders = os.listdir(training_path)
        else:
            patient_folders = self.segment
        # iterate through the patients
        for patient_folder in patient_folders:
            self.cntr += 1
            patient_path = os.path.join(training_path, patient_folder)
            folders = os.listdir(patient_path)
            img_folder = [fd for fd in folders if fd.find('dicom') != -1][0]
            con_folder = [fd for fd in folders if fd.find('contours') != -1][0]
            img_path = os.path.join(patient_path, img_folder)
            con_path = os.path.join(patient_path, con_folder)
            # read the images and the contours
            dcm = dicom_reader.DCMreaderMC2(img_path)
            con = con_reader.CONreaderMC2(con_path)
            p = Patient(Origin.MC2)
            p.origin_path = patient_path
            p.patient_id = patient_folder
            p.gender = dcm.get_gender()
            p.height = dcm.get_height()
            p.weight = dcm.get_weight()
            p.pixel_spacing = dcm.get_pixelspacing()
            p.slice_thicknes = dcm.get_slicethickness()
            p.gap = dcm.get_gap()

            for k, v in dcm.get_imagepaths().items():
                image = Image(Origin.MC2)
                image.origin_path = v['path']
                image.patient_id = p.patient_id
                image.slice = v['slice']
                image.frame = v['frame']

                if k in con.get_contours().keys():
                    for part, cntr in con.get_contours()[k].items():
                        contour = Contour(Origin.MC2)
                        contour.origin_path = cntr['path']
                        contour.patient_id = p.patient_id
                        contour.slice = image.slice
                        contour.frame = image.frame
                        contour.length = cntr['contour'].shape[0]
                        contour.contour_mtx = cntr['contour']
                        contour.corresponding_image = image
                        if part == 'i':  # i indicates right-endo
                            contour.part = Region.RN
                        elif part == 'o':
                            contour.part = Region.RP
                        image.ground_truths.append(contour)
                        p.append_gt(contour)
                    image.has_gt = True
                p.append_image(image)
            pickle_path = os.path.join(self.cache, patient_folder + '.pickle')
            with open(pickle_path, 'wb') as f:
                pickle.dump(p, f)  # create cache
            util.progress_bar(self.cntr, len(patient_folders), 50)
            yield p
Exemplo n.º 9
0
def test(epoch, model, criterion, testloader):
    model.eval()  # drop outを適用しない
    test_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for iteration, batch in enumerate(testloader):
            inputs = batch[0]
            labels = batch[1]

            # GPUが使えるなら
            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            # forward
            outputs = model(inputs)
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)

            test_loss += loss.item()
            total += labels.size(0)
            correct += preds.eq(labels).sum().item()

            printLoss = (test_loss / (iteration + 1))
            printAcc = 100. * correct / total
            progress_bar(
                epoch, iteration, len(testloader),
                ': Loss: {:.4f}, Acc: {:.4f} % ({}/{})'.format(
                    printLoss, printAcc, correct, total))
Exemplo n.º 10
0
def print_test_info(cfg, step, metrics):
    """Print validate information on the screen.
    Inputs:
        cfg: training options
        step: a list includes --> [per_step, total_step]
        metrics: the current batch testing metrics
    """
    report = info = message = ""
    if step[0] == 0:
        equal_left, equal_right = cal_equal(15)
        info += "\n" + "=" * equal_left + " Start Testing " + "=" * equal_right
        print(info)
        if cfg.opts.save_test_log:
            wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR, 'Test_log.txt'),
                           info,
                           mode='w',
                           show=False)
    progress_bar(step[0], step[1], display=False)
    if cfg.opts.test_label != 'None':
        if step[0] + 1 >= step[1]:
            report, message = prepare_metrics(report, message, metrics)
            if cfg.opts.save_test_log:
                wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR,
                                            'Test_log.txt'),
                               message + "\n" + report,
                               mode='a',
                               show=False)
Exemplo n.º 11
0
def print_val_info(val_flag, cfg, step, metrics):
    """Print validate information on the screen.
    Inputs:
        val_flag: whether print the information or not(bool)
        cfg: training options
        step: a list includes --> [per_step, total_step]
        metrics: the current batch validating metrics
    """
    report = info = message = ""
    if val_flag:
        if step[0] - 1 == 0:
            info += ">>> Validate on the val dataset ..."
            print(info)
            if cfg.opts.save_train_log:
                wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR,
                                            'Train_Log.txt'),
                               info,
                               mode='a',
                               show=False)
        progress_bar(step[0] - 1, step[1], display=False)
        if step[0] >= step[1]:
            report, message = prepare_metrics(report, message, metrics)
            if cfg.opts.save_train_log:
                wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR,
                                            'Train_Log.txt'),
                               message + "\n" + report,
                               mode='a',
                               show=False)
def extract_byte_ngram_features(files_list, n, addrlength=32):
    ngram_d_list = []
    for idx, file_name in enumerate(files_list):
        byte_file = conf['dataset_dir'] + file_name + '.bytes.gz'
        byte_seq = parse_bytes(byte_file, addrlength)
        # check for successful byte file parsing
        if not byte_seq:
            continue
        ngram_cnt_d = count_byte_ngrams(byte_seq, n)
        ngram_d_list.append(ngram_cnt_d)
        progress_bar(idx + 1, len(files_list), 50)

    # convert list of dictionaries to a byte ngram count numpy array
    vec = DictVectorizer()
    ngram_freq = vec.fit_transform(ngram_d_list).toarray()
    ngram_freq_df = pd.DataFrame(ngram_freq, columns=vec.get_feature_names())
    # store frequency of each byte ngram
    ngram_freq_df.to_csv(conf['featsets_dir'] + str(n) + 'gram_byte_freq2.csv',
                         index=False)

    # transform ngram frequency array to ngram tfidf array
    transformer = TfidfTransformer(smooth_idf=False)
    ngram_tfidf = transformer.fit_transform(ngram_freq)
    # store tfidf of each byte ngram in CSV file
    ngram_tfidf_df = pd.DataFrame(
        ngram_tfidf.todense(),
        columns=[x.decode("utf-8") + '_tf' for x in vec.get_feature_names()])
    ngram_tfidf_df.to_csv(conf['featsets_dir'] + str(n) +
                          'gram_byte_tfidf2.csv',
                          index=False)
    return ngram_freq_df, ngram_tfidf_df
def retrain(net, epoch):
    print('\nEpoch: %d' % epoch)
    global best_acc
    net.train()
    train_loss = 0
    total = 0
    correct = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()

        if args.fixed:
            net = util.quantize(net, args.pprec)

        optimizer.step()

        train_loss += loss.data.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += float(predicted.eq(targets.data).cpu().sum())

        progress_bar(
            batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))
        acc = 100. * correct / total
    if args.fixed:
        net = util.quantize(net, args.pprec)
Exemplo n.º 14
0
    def stream_from_source(self):
        self.cntr = 0
        # training folder path
        training_path = os.path.join(self.root, "CAP_challenge_training_set")
        if self.segment is None:
            patient_folders = os.listdir(training_path)
        else:
            patient_folders = self.segment
        patient_folders = [p for p in patient_folders if os.path.isdir(os.path.join(training_path, p))]
        # iterate through the patients
        for patient_folder in patient_folders:
            self.cntr += 1
            patient_path = os.path.join(training_path, patient_folder)
            # read the images and the contours
            dcm = dicom_reader.DCMreaderST(patient_path)
            con = con_reader.CONreaderST(patient_path)
            p = Patient(Origin.ST11)
            p.origin_path = patient_path
            p.patient_id = patient_folder
            p.gap = dcm.get_gap()
            p.pixel_spacing = dcm.get_pixelspacing()
            p.slice_thicknes = dcm.get_slicethickness()
            for slice in dcm.get_imagepaths():
                for frame in dcm.get_imagepaths()[slice]:
                    image = Image(Origin.ST11)
                    image.origin_path = dcm.get_imagepaths()[slice][frame]['path']
                    image.patient_id = p.patient_id
                    image.slice = slice
                    image.frame = frame

                    if slice in con.get_contours():
                        if frame in con.get_contours()[slice]:
                            for part in ['LN', 'LP']:
                                cntr = con.get_contours()[slice][frame]
                                contour = Contour(Origin.ST11)
                                contour.origin_path = cntr['path']
                                contour.patient_id = p.patient_id
                                contour.slice = image.slice
                                contour.frame = image.frame
                                contour.length = cntr['contours'][part].shape[0]
                                contour.contour_mtx = cntr['contours'][part]
                                contour.corresponding_image = image
                                if part == 'LN':  # i indicates left-endo
                                    contour.part = Region.LN
                                elif part == 'LP':
                                    contour.part = Region.LP
                                image.ground_truths.append(contour)
                                p.append_gt(contour)
                                image.has_gt = True
                    p.append_image(image)

            pickle_path = os.path.join(self.cache, patient_folder + '.pickle')
            with open(pickle_path, 'wb') as f:
                pickle.dump(p, f)  # create cache
            util.progress_bar(self.cntr, len(patient_folders), 50)
            yield p
Exemplo n.º 15
0
def train(epoch):
    netD.train()
    netG.train()
    total_D_loss = 0
    total_G_loss = 0
    for iteration, batch in enumerate(training_data_loader, 1):
        # forward
        real_a_cpu, real_b_cpu = batch[0], batch[1]
        real_a.data.resize_(real_a_cpu.size()).copy_(real_a_cpu)
        real_b.data.resize_(real_b_cpu.size()).copy_(real_b_cpu)

        fake_b = netG(real_a)
        ############################
        # (1) Update D network: maximize log(D(x,y)) + log(1 - D(x,G(x)))
        ###########################
        optimizerD.zero_grad()

        # train with fake
        fake_ab = torch.cat((real_a, fake_b), 1)
        pred_fake = netD.forward(fake_ab.detach())
        loss_d_fake = criterionGAN(pred_fake, False)

        # train with real
        real_ab = torch.cat((real_a, real_b), 1)
        pred_real = netD.forward(real_ab)
        loss_d_real = criterionGAN(pred_real, True)

        # Combined loss
        loss_d = (loss_d_fake + loss_d_real) * 0.5
        loss_d.backward()
        optimizerD.step()

        ############################
        # (2) Update G network: maximize log(D(x,G(x))) + L1(y,G(x))
        ##########################
        optimizerG.zero_grad()
        # First, G(A) should fake the discriminator
        fake_ab = torch.cat((real_a, fake_b), 1)
        pred_fake = netD.forward(fake_ab)
        loss_g_gan = criterionGAN(pred_fake, True)
         # Second, G(A) = B
        loss_g_l1 = criterionL1(fake_b, real_b) * opt.lamb
        loss_g = loss_g_gan + loss_g_l1
        loss_g.backward()
        optimizerG.step()

        total_D_loss += loss_d.item()
        total_G_loss += loss_g.item()

        # display
        progress_bar(epoch, iteration, len(training_data_loader)+1, ': Loss_D: {:.4f}, Loss_G: {:.4f}'.format(loss_d.item(), loss_g.item()) )

    avg_D_loss = total_D_loss / len(training_data_loader)
    avg_G_loss = total_G_loss / len(training_data_loader)

    return avg_D_loss, avg_G_loss
Exemplo n.º 16
0
def test(epoch, net, criterion, testLoader, device):
    net.eval()
    test_loss = 0

    for batch_idx, (inputs, targets) in enumerate(testLoader):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()

        util.progress_bar(batch_idx, len(testLoader),
                          'Test Loss: %.3f' % (test_loss / (batch_idx + 1)))

    return test_loss / len(testLoader)
Exemplo n.º 17
0
    def stream_from_source(self):
        self.cntr = 0
        # training folder path
        training_path = os.path.join(self.root, "TrainingData_LVQuan19")
        if self.segment is None:
            patient_datas = os.listdir(training_path)
        else:
            patient_datas = self.segment
        patient_datas = [p for p in patient_datas if p.endswith('.mat')]
        # iterate through the patients
        for patient_file in patient_datas:
            self.cntr += 1
            patient_path = os.path.join(training_path, patient_file)
            # read the images and the contours
            mat = loadmat(patient_path)
            p = Patient(Origin.ST19)
            p.origin_path = patient_path
            p.patient_id = patient_file[:-4]
            p.pixel_spacing = mat['pix_spacing'][0, 0]
            for frame in range(20):
                image = Image(Origin.ST19)
                image.origin_path = p.origin_path
                image.patient_id = p.patient_id
                image.frame = frame
                image.image = mat['image'][:, :, frame]
                image.size = image.image.shape

                lvq = LVQuant(Origin.ST19)
                lvq.origin_path = p.origin_path
                lvq.patient_id = p.patient_id
                lvq.frame = image.frame
                lvq.epi_mask = mat['epi'][:, :, frame]
                lvq.endo_mask = mat['endo'][:, :, frame]
                lvq.area_cavity = mat['areas'][0, frame]
                lvq.area_myo = mat['areas'][1, frame]
                lvq.dims = mat['dims'][:, frame]
                lvq.rwt = mat['rwt'][:, frame]
                lvq.phase = mat['lv_phase']  # systole: 1, diastole: 0
                lvq.corresponding_image = image
                image.ground_truths.append(lvq)
                p.append_gt(lvq)
                image.has_gt = True
                p.append_image(image)

            pickle_path = os.path.join(self.cache, p.patient_id + '.pickle')
            with open(pickle_path, 'wb') as f:
                pickle.dump(p, f)  # create cache
            util.progress_bar(self.cntr, len(patient_datas), 10)
            yield p
Exemplo n.º 18
0
def main():
    train_labels = pd.read_csv(DATASET_DIR + 'trainLabels.csv')
    files_list = train_labels['Id'].tolist()
    
    # total number of files to calculate completion percentage
    total_files = len(files_list)

    # do not count corrupted files
    bad_files_idx = []
    bad_files_names = []
    # Extract all features related to DATA and CODE XREF
    xref_dict = xref_initialization()
    for idx, file_name in enumerate(files_list):
        asm_file = DATASET_DIR + 'train/' + file_name + '.asm.gz'
        try:
            get_xref_features(asm_file, xref_dict)
        except Exception as e:
            # log corrupted files for future correction
            log_exception(e, sys.argv[0], asm_file)
            bad_files_idx.append(idx)
            bad_files_names.append(file_name)

        progress_bar(idx+1, total_files, 50)

    xref_pd = pd.DataFrame.from_dict(xref_dict)
    
    # store xref features to avoid recalculation
    save_obj(xref_pd, 'xref_features')
    save_obj(bad_files_names, 'bad_files')

    # concat features with classes and IDs to create the dataset
    data = pd.concat([train_labels, xref_pd], axis=1, sort=False)

    # drop corrupted files (if any) from the training set
    if len(bad_files_idx) > 0:
        data.drop(data.index[bad_files_idx], inplace=True)
        data = data.reset_index(drop=True)
        # log the number of corrupted files
        logging.info('XREF Feature Extraction completed: ' + 
                str(len(bad_files_idx)) + ' file(s) are corrupted.')
        # store the corrupted files names in 'bad_asm_files.txt'
        with open('bad_asm_files.txt', 'w') as bfp:
            for name in bad_files_names:
                bfp.write(name + '.asm.gz')

    # save xref features dataframe to csv file to keep results (optional)
    data.to_csv('results/xref_features.csv')
    
    '''
Exemplo n.º 19
0
def extract_opcode_ngram(files_list, n):
    dicts_list = []
    total_files = len(files_list)
    for idx, file_name in enumerate(files_list):
        asm_file = conf['dataset_dir'] + file_name + '.asm.gz'
        clean_asm_code = clean_asm_lines(asm_file)
        opcode_sequence = [] 
        # this loop constructs a sequence of opcodes delimited by space character
        for line in clean_asm_code:
            # below commands works assuming that the preprocessing of the .asm
            # file has already occured
            opcode_mnem = line.split(' ')[0].rstrip()
            # further condition to minimize the number of outliers (handle extreme cases)
            is_valid_opcode = bool(re.match('^[a-z]{2,7}$', opcode_mnem))
            if is_valid_opcode:
                opcode_sequence.append(opcode_mnem)

        ngram_dict = {} 
        for index, opcode in enumerate(opcode_sequence):
            if (n + index) > len(opcode_sequence):
                break
            opcode_ngram = ""
            for j in range(index, index + n):
                opcode_ngram += opcode_sequence[j] + '-'
            # remove trailing '-' char from opcode_ngram
            opcode_ngram = opcode_ngram[:-1]
            if opcode_ngram in ngram_dict:
                ngram_dict[opcode_ngram] += 1
            else:
                ngram_dict[opcode_ngram] = 1

        dicts_list.append(ngram_dict)
        # progress bars always save my sanity
        progress_bar(idx+1, total_files, 50)
    
    # convert list of dictionaries to an opcode ngram count numpy array
    vec = DictVectorizer()
    ngram_freq = vec.fit_transform(dicts_list).toarray()
    ngram_freq_df = pd.DataFrame(ngram_freq, columns=vec.get_feature_names())
    ngram_freq_df.to_csv('features/' + str(n) + 'gram_opcode_freq1.csv', index=False)
    save_obj(ngram_freq_df, str(n) + 'gram_opcode_freq')
    # transform ngram frequency array to ngram tfidf array
    transformer = TfidfTransformer(smooth_idf=False)
    ngram_tfidf = transformer.fit_transform(ngram_freq)
    # transform array to pandas dataframe
    freq_vec_df = pd.DataFrame(ngram_tfidf.todense(), columns=vec.get_feature_names())
    freq_vec_df.to_csv('features/' + str(n) + 'gram_opcode_tfidf1.csv', index=False)
    save_obj(freq_vec_df, str(n) + 'gram_opcode_tfidf')
    return freq_vec_df
def extract_byte_entropy_features(files_list, addrlength=32):
    ent_d_list = []
    for idx, file_name in enumerate(files_list):
        bytes_file = conf['dataset_dir'] + file_name + '.bytes.gz'
        bytes_seq = parse_bytes(bytes_file, addrlength)
        ent_stats = extract_entropy_statistics(bytes_seq, window_size=10000)
        ent_d_list.append(ent_stats)
        progress_bar(idx+1, len(files_list), 50)
    
    # convert list of dictionaries to pandas DataFrame
    vec = DictVectorizer()
    ent_features = vec.fit_transform(ent_d_list).toarray()
    ent_features = pd.DataFrame(ent_features, columns=vec.get_feature_names())
    # store entropy feature set in CSV file
    ent_features.to_csv('features/entropy_features.csv', index=False)
    return ent_features
Exemplo n.º 21
0
def train(epoch, net, criterion, optimizer, trainLoader, device):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0

    for batch_idx, (inputs, targets) in enumerate(trainLoader):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        util.progress_bar(batch_idx, len(trainLoader),
                          'Train Loss: %.3f' % (train_loss / (batch_idx + 1)))
Exemplo n.º 22
0
    def stream_from_cache(self):
        # calculate the number of patients for checking progress
        cntr = 0
        if self.segment is None:
            patients = os.listdir(self.cache)
        else:
            patients = [p for p in self.segment]
        all_patient = len(patients)

        # stream the saved pickle files
        for p in patients:
            p_path = os.path.join(self.cache, p)
            cntr += 1
            with open(p_path, 'br') as f:
                patient = pickle.load(f)
            util.progress_bar(cntr, all_patient, 50)
            yield patient
Exemplo n.º 23
0
def main():

    while True:

        micro_config = util.read_config_file(micro_config_file_name)
        macro_config = util.read_config_file(macro_config_file_name)

        micro_util = monitor.get_microservices_utilization()
        #micro_util = monitor.get_container_utilization()
        macro_util = monitor.get_macroservices_utilization()

        compare_cpu_util(micro_config, micro_util, macro_config, macro_util)

        print("****** Completed monitoring! Wait for " +
              str(monitoring_interval) + " seconds. *****")

        util.progress_bar(monitoring_interval)
Exemplo n.º 24
0
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(testloader):
        channel0, channel1, channel2 = inputs.numpy(
        )[:, 0, :, :], inputs.numpy()[:, 1, :, :], inputs.numpy()[:, 2, :, :]
        channel0, channel1, channel2 = encoder.tempencoding(
            channel0), encoder.tempencoding(channel1), encoder.tempencoding(
                channel2)
        channel0, channel1, channel2 = torch.Tensor(channel0), torch.Tensor(
            channel1), torch.Tensor(channel2)
        if use_cuda:
            channel0, channel1, channel2, targets = channel0.cuda(
            ), channel1.cuda(), channel2.cuda(), targets.cuda()
        channel0, channel1, channel2, targets = Variable(channel0), Variable(
            channel1), Variable(channel2), Variable(targets)
        outputs = net(channel0, channel1, channel2)
        loss = criterion(outputs, targets)

        test_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        progress_bar(
            batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (test_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))

    # Save checkpoint.
    acc = 100. * correct / total
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': net.module if use_cuda else net,
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/ckpt.t7')
        best_acc = acc
Exemplo n.º 25
0
def advtrain(epoch):
    global attackstep
    global attacker
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        channel0, channel1, channel2 = attacker.attackthreechannel(
            inputs, targets)
        channel0, channel1, channel2 = torch.Tensor(channel0), torch.Tensor(
            channel1), torch.Tensor(channel2)
        if use_cuda:
            channel0, channel1, channel2, targets = channel0.cuda(
            ), channel1.cuda(), channel2.cuda(), targets.cuda()
        optimizer.zero_grad()
        channel0, channel1, channel2, targets = Variable(channel0), Variable(
            channel1), Variable(channel2), Variable(targets)
        outputs = net(channel0, channel1, channel2)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
        if batch_idx == 0:
            advc0, advc1, advc2 = (
                channel[-3:].data.cpu().numpy()
                for channel in [channel0, channel1, channel2])
            advc0, advc1, advc2 = (encoder.temp2img(advc)
                                   for advc in [advc0, advc1, advc2])
            advc0, advc1, advc2 = (torch.Tensor(advc[:, np.newaxis, :, :])
                                   for advc in [advc0, advc1, advc2])
            advimg = torch.cat((advc0, advc1, advc2), dim=1)
            advimg = torchvision.utils.make_grid(advimg)
            writer.add_image('Image', advimg, epoch)

        progress_bar(
            batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))
Exemplo n.º 26
0
def extract_reg_counts(files_list):
    reg_cnt_d_list = []
    for idx, file_name in enumerate(files_list):
        asm_file = conf['dataset_dir'] + file_name + '.asm.gz'
        reg_cnt = register_count(asm_file)
        reg_cnt_d_list.append(reg_cnt)
        progress_bar(idx+1, len(files_list), 50)
    # convert list of dictionaries to an opcode count numpy array
    vec = DictVectorizer()
    reg_freq = vec.fit_transform(reg_cnt_d_list).toarray()
    reg_freq = pd.DataFrame(reg_freq, columns=vec.get_feature_names())
    csv_file = conf['featsets_dir'] + 'reg_freq.csv'
    reg_freq.to_csv(csv_file)
    # transform ngram frequency array to ngram tfidf array
    transformer = TfidfTransformer(smooth_idf=False)
    reg_tfidf = transformer.fit_transform(reg_freq)
    reg_tfidf = pd.DataFrame(reg_tfidf.todense(), columns=vec.get_feature_names())
    csv_file = conf['featsets_dir'] + 'reg_tfidf.csv'
    reg_tfidf.to_csv(csv_file) 
Exemplo n.º 27
0
def extract_opcode_ngrams(files_list, n):
    opcode_d_list = []
    for idx, file_name in enumerate(files_list):
        asm_file = conf['dataset_dir'] + file_name + '.asm.gz'
        opcode_count = opcode_ngram_count(asm_file, n)
        opcode_d_list.append(opcode_count)
        progress_bar(idx+1, len(files_list), 50)
    # convert list of dictionaries to an opcode count numpy array
    vec = DictVectorizer()
    opc_freq = vec.fit_transform(opcode_d_list).toarray()
    opc_freq = pd.DataFrame(opc_freq, columns=vec.get_feature_names())
    csv_file = conf['featsets_dir'] + str(n) + 'gram_opcode_freq.csv'
    opc_freq.to_csv(csv_file)
    # transform ngram frequency array to ngram tfidf array
    transformer = TfidfTransformer(smooth_idf=False)
    opc_tfidf = transformer.fit_transform(opc_freq)
    opc_tfidf = pd.DataFrame(opc_tfidf.todense(), 
            columns=[x.decode("utf-8") + '_tf' + for x in vec.get_feature_names()])
    csv_file = conf['featsets_dir'] + str(n) + 'gram_opcode_tfidf.csv'
    opc_tfidf.to_csv(csv_file) 
Exemplo n.º 28
0
    def evaluate(self, sess, pre_score, saver=None):
        """在@examples_raw数据上评估模型/预测数据类别
        """
        new_score = pre_score
        preds = []  # 所有数据的预测值存放
        for j, batch in enumerate(
                minibatches(self.dev_set, self.batch_size, shuffle=False)):
            inputs_batch, mask_batch = batch[0], batch[2]
            feed = self.create_feed_dict(inputs_batch=inputs_batch,
                                         mask_batch=mask_batch)
            preds_ = sess.run(tf.argmax(self.preds, axis=2),
                              feed_dict=feed)  # 一个batch的预测值

            preds += list(preds_)

            total_batch = 1 + int(len(self.dev_set) / self.batch_size)
            print(progress_bar(j, total_batch, "batch"))

        all_original_labels = []  # 标准答案
        all_predicted_labesl = []  # 预测值

        for i, (sentence, labels) in enumerate(self.dev_vec):
            _, _, mask = self.dev_set[i]  # 获取每个句子的mask
            labels_ = [l for l, m in zip(preds[i], mask)
                       if m]  # mask作用(预测值只保留mask标记为True的)

            if len(labels_) == len(labels):  # 最后一个batch
                all_original_labels += labels
                all_predicted_labesl += labels_

        cm = confusion_matrix(all_original_labels,
                              all_predicted_labesl)  # 混淆矩阵
        acc_sorce = accuracy_score(all_original_labels, all_predicted_labesl)
        f_score = f1_score(all_original_labels,
                           all_predicted_labesl,
                           average="micro")
        report = classification_report(all_original_labels,
                                       all_predicted_labesl,
                                       target_names=self.LBLS)

        print("准确率:", acc_sorce)
        print("F值:", f_score)
        print("混淆矩阵:\n", cm)
        print("分类结果:\n", report)

        if f_score > pre_score:
            new_score = f_score
            if saver:  # 训练时可保存下最好的模型,测试时可选择没有saver
                logger.info("New best score! Saving model in %s",
                            self.model_output)
                saver.save(sess, self.model_output)

        return all_predicted_labesl, new_score
Exemplo n.º 29
0
    def load_data(self, mode, shuffle=False):
        """Load the train or val or test dataset"""
        if mode == 'Train':
            label_name, label_dir, data_dir = [
                self.opts.train_label, self.cfg.TRAIN_LABEL_DIR,
                self.cfg.TRAIN_DATA_DIR
            ]
        elif mode == 'Val':
            label_name, label_dir, data_dir = [
                self.opts.val_label, self.cfg.VAL_LABEL_DIR,
                self.cfg.VAL_DATA_DIR
            ]
        else:
            label_name, label_dir, data_dir = [
                self.opts.test_label, self.cfg.TEST_LABEL_DIR,
                self.cfg.TEST_DATA_DIR
            ]

        if label_name == 'None':
            label_data = []
            test_names = os.listdir(data_dir)
            for name in test_names:
                label_data.append([name, 0])
        else:
            label_data = self._open_data_file(label_name, label_dir)
        if shuffle:
            random.shuffle(label_data)
        for index, data_set in enumerate(label_data):
            if mode == 'Test':
                length = self.opts.num_test if self.opts.num_test < len(
                    label_data) else len(label_data)
                if index + 1 > self.opts.num_test:
                    break
            else:
                length = len(label_data)
            progress_bar(index, length, "Loading {} dataset".format(mode))
            self._add_to_database(index, data_set, data_dir)

        equal_left, equal_right = cal_equal(6)
        print('\n%s Done %s' % ('=' * equal_left, '=' * equal_right))
Exemplo n.º 30
0
def print_test_info(cfg, step, loss=100.0, metric=0.0):
    """Print validate information on the screen.
    Inputs:
        cfg: training options
        step: a list includes --> [per_step, total_step]
        loss: a float includes --> val loss value
        metric: the current batch testing metric
    """
    info = message = ""
    if step[0] == 0:
        equal_left, equal_right = cal_equal(15)
        info += "\n" + "=" * equal_left + " Start Testing " + "=" * equal_right
        print(info)
        if cfg.opts.save_test_log:
            wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR, 'Test_log.txt'), info, mode='w', show=False)
    progress_bar(step[0], step[1], display=False)
    if cfg.opts.test_label != 'None':
        if step[0] + 1 >= step[1]:
            message += "\n>>> Loss:{:.4f}  ACC:{:.3f}%  ".format(loss / step[1], metric / step[1] * 100)
            print(message)
            if cfg.opts.save_test_log:
                wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR, 'Test_log.txt'), message, mode='a', show=False)
Exemplo n.º 31
0
# original LabVIEW code.
print '\rBuilding a list of directories with raw data in them.'
PATH_STRING_1 = '/Volumes/science/projects/kinesin_and_microtubules'
PATH_STRING_2 = 'water_isotope_study/data'
BASE_PATH = os.path.join(PATH_STRING_1, PATH_STRING_2)
ASSAY_TYPE = 'heavy_hydrogen_buffer'
PATH = os.path.join(BASE_PATH, ASSAY_TYPE)
raw_data_paths = util.list_data_paths(PATH, 'Full')
LENGTH_RAW = len(raw_data_paths)

# Create a new directory that will contain data for the python scripts.
print '\rCreating python_data directories.'
for i,raw_data_file in enumerate(raw_data_paths):
    util.create_python_data_directory(raw_data_file)
    percent = float(i + 1)/LENGTH_RAW*100
    util.progress_bar(percent)

# Create a copy of the raw data generated by LabVIEW into the python_data
# directory.
print '\rCopying raw data files.'
for i,raw_data_file in enumerate(raw_data_paths):
    util.copy_raw_data(raw_data_file)
    percent = float(i + 1)/LENGTH_RAW*100
    util.progress_bar(percent)

# Smooth the raw x and y position data.
print '\rSmoothing data and saving it to the python_data directory.'
for i,raw_data_file in enumerate(raw_data_paths):
    util.smooth_raw_data(raw_data_file)
    percent = float(i + 1)/LENGTH_RAW*100
    util.progress_bar(percent)