Пример #1
0
    def train_epoch(self, epoch):
        self.model.train()
        if not os.path.exists(self.csv_path):
            os.mkdir(self.csv_path)
        train_csv = os.path.join(self.csv_path, 'train.csv')
        pred_list, target_list, loss_list = [], [], []
        print('Epoch: ', epoch)
        for batch_idx, (data, target) in enumerate(self.train_loader):
            #             if self.cfig['use_rnn']:
            #                 sequence_length, input_size = 28, 28
            #                 data = data.reshape(-1, sequence_length, input_size)
            data, target = data.to(self.device), target.to(self.device)
            self.optim.zero_grad()
            if batch_idx == 0: print(data.shape)
            data = data.type(torch.cuda.FloatTensor)
            if self.cfig['model_name'][-3:] == 'rnn':
                data = data.permute([1, 0, 2, 3, 4])
            pred = self.model(data)  # here should be careful
            if batch_idx == 0:
                print('data.shape', data.shape)
                print('pred.shape', pred.shape)
                print('Epoch: ', epoch)

            #loss = LossPool(pred, target, self.cfig, loss_name=self.cfig['loss_name']).get_loss()
            loss = nn.CrossEntropyLoss()(pred, target)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5)
            self.optim.step()
            print_str = 'train epoch=%d, batch_idx=%d/%d, loss=%.4f\n' % (
                epoch, batch_idx, len(self.train_loader), loss.data[0])

            pred_cls = pred.data.max(1)[1]
            pred_list += pred_cls.data.cpu().numpy().tolist()
            target_list += target.data.cpu().numpy().tolist()
            loss_list.append(loss.data.cpu().numpy().tolist())

        accuracy = accuracy_score(target_list, pred_list)
        #-------------------------save to csv -----------------------#
        if not os.path.exists(train_csv):
            csv_info = ['epoch', 'loss', 'accuracy']
            init_csv = pd.DataFrame()
            for key in csv_info:
                init_csv[key] = []
            init_csv.to_csv(train_csv)
        df = pd.read_csv(train_csv)
        data = pd.DataFrame()
        tmp_epoch = df['epoch'].tolist()
        tmp_epoch.append(epoch)

        #print('------------------', tmp_epoch)
        print('train accuracy: ', accuracy)
        tmp_loss = df['loss'].tolist()
        tmp_loss.append(np.mean(loss_list))
        tmp_acc = df['accuracy'].tolist()
        tmp_acc.append(accuracy)

        data['epoch'], data['loss'], data[
            'accuracy'] = tmp_epoch, tmp_loss, tmp_acc
        data.to_csv(train_csv)
        print('train acc: ', accuracy)
Пример #2
0
 def test(self):
     model_root = osp.join(self.save_path, 'models')
     model_list = os.listdir(model_root)
     Acc, F1, Recl, Prcn = [], [], [], []
     for epoch in range(len(model_list)):
         print ('epoch: ', epoch)
         model_pth = '%s/model_epoch_%04d.pth' % (osp.join(self.save_path, 'models'), epoch)
         accuracy, f1, recall, precision = self.test_epoch(model_pth)
         print (accuracy, f1, recall, precision)
         Acc.append(accuracy)
         F1.append(f1)
         Recl.append(recall)
         Prcn.append(precision)
     data = pd.DataFrame()
     data['accuracy'] = Acc
     data['f1'] = F1
     data['recall'] = Recl
     data['precision'] = Prcn
     print ('Acc: ', Acc)
     print ('f1:', F1)
     print ('Recl', Recl)
     print ('Prcn', Prcn)
     if not os.path.exists(self.csv_path):
         os.mkdir(self.csv_path)
     test_csv = os.path.join(self.csv_path, 'test.csv')
     data.to_csv(test_csv)
Пример #3
0
    def test_epoch(self, epoch):
        self.model.eval()
        if not os.path.exists(self.csv_path):
            os.mkdir(self.csv_path)
        eval_csv = os.path.join(self.csv_path, 'test.csv')
        pred_list, target_list, loss_list, pos_list = [], [], [], []
        for batch_idx, item in enumerate(self.test_loader):
            if self.cfig['model_name'] in ['disrnn', 'trnn']:
                data, target, dist = item
                data, target, dist = data.to(self.device), target.to(
                    self.device), dist.to(self.device)
            else:
                data, target, ID = item
                data, target = data.to(self.device), target.to(self.device)

            if self.cfig['model_name'][-3:] == 'rnn':
                data = data.permute([1, 0, 2, 3, 4])
            #data = pack_padded_sequence(data, [3] * self.cfig['batch_size'])   # if use cell, we don't need this
            self.optim.zero_grad()
            if self.cfig['model_name'] in ['disrnn', 'trnn']:
                pred = self.model(data, dist)
            else:
                pred = self.model(data)
            pred_prob = F.softmax(pred)
            #loss = self.criterion(pred, target)
            loss = nn.CrossEntropyLoss()(pred, target)

            pred_cls = pred.data.max(1)[1]  # not test yet
            pos_list += pred_prob[:, 1].data.cpu().numpy().tolist()
            pred_list += pred_cls.data.cpu().numpy().tolist()
            target_list += target.data.cpu().numpy().tolist()
            loss_list.append(loss.data.cpu().numpy().tolist())

        accuracy = accuracy_score(target_list, pred_list)
        print(confusion_matrix(target_list, pred_list))
        fpr, tpr, threshold = metrics.roc_curve(target_list, pos_list)
        roc_auc = metrics.auc(fpr, tpr)
        #-------------------------save to csv -----------------------#
        if not os.path.exists(eval_csv):
            csv_info = ['epoch', 'loss', 'auc', 'accuracy']
            init_csv = pd.DataFrame()
            for key in csv_info:
                init_csv[key] = []
            init_csv.to_csv(eval_csv)
        df = pd.read_csv(eval_csv)
        data = pd.DataFrame()
        tmp_epoch = df['epoch'].tolist()
        tmp_epoch.append(epoch)
        tmp_auc = df['auc'].tolist()
        tmp_auc.append(roc_auc)
        #print ('------------------', tmp_epoch)
        tmp_loss = df['loss'].tolist()
        tmp_loss.append(np.mean(loss_list))
        tmp_acc = df['accuracy'].tolist()
        tmp_acc.append(accuracy)

        data['epoch'], data['loss'], data['auc'], data[
            'accuracy'] = tmp_epoch, tmp_loss, tmp_auc, tmp_acc
        data.to_csv(eval_csv)
        print('test accuracy: ', accuracy, 'test auc: ', roc_auc)
Пример #4
0
    def eval_epoch(self, epoch):

        self.model.eval()
        if not os.path.exists(self.csv_path):
            os.mkdir(self.csv_path)
        eval_csv = os.path.join(self.csv_path, 'eval.csv')
        pred_list, target_list, loss_list = [], [], []
        print()
        for batch_idx, (data, target) in enumerate(self.val_loader):

            data, target = data.to(self.device), target.to(self.device)
            self.optim.zero_grad()
            #print ('=================',data.shape)
            data = data.type(torch.cuda.FloatTensor)
            if self.cfig['model_name'][-3:] == 'rnn':
                data = data.permute([1, 0, 2, 3, 4])
            pred = self.model(data)  # here should be careful
            #loss = self.criterion(pred, target)
            loss = nn.CrossEntropyLoss()(pred, target)
            pred_cls = pred.data.max(1)[1]  # not test yet
            pred_list += pred_cls.data.cpu().numpy().tolist()
            target_list += target.data.cpu().numpy().tolist()
            loss_list.append(loss.data.cpu().numpy().tolist())

        accuracy = accuracy_score(target_list, pred_list)
        #-------------------------save to csv -----------------------#
        if not os.path.exists(eval_csv):
            csv_info = ['epoch', 'loss', 'accuracy']
            init_csv = pd.DataFrame()
            for key in csv_info:
                init_csv[key] = []
            init_csv.to_csv(eval_csv)
        df = pd.read_csv(eval_csv)
        data = pd.DataFrame()
        tmp_epoch = df['epoch'].tolist()
        tmp_epoch.append(epoch)

        #print ('------------------', tmp_epoch)
        print('val accuracy: ', accuracy)
        tmp_loss = df['loss'].tolist()
        tmp_loss.append(np.mean(loss_list))
        tmp_acc = df['accuracy'].tolist()
        tmp_acc.append(accuracy)

        data['epoch'], data['loss'], data[
            'accuracy'] = tmp_epoch, tmp_loss, tmp_acc
        print('max val accuracy at: ', max(tmp_acc),
              tmp_acc.index(max(tmp_acc)))
        data.to_csv(eval_csv)
Пример #5
0
def label_to_file(label, img, filepath):
    data_frame = []
    for j, _ in enumerate(label):
        label_part = [label[j]]
        img_part = img[j]
        label_part.extend(img_part)
        data = tuple(label_part)
        data_frame.append(data)
    data = pd.DataFrame(data_frame, index=range(len(label)))
    try:
        data.to_csv(filepath, index=False, header=False)
    except FileNotFoundError:
        print("File name Invalid")
    else:
        print("Parameters are save in path " + filepath)
Пример #6
0
 def _initialize_data(self) -> pd.DataFrame:
     filename = self.metadata_processed_filename.format(
         timit_type=self.ds_type, rows_slice=self.row_count_in_features)
     filepath = os.path.join(self.ds_path, filename)
     if not os.path.isfile(os.path.join(self.ds_path, filename)):
         metadata = pd.read_csv(os.path.join(self.ds_path,
                                             self.metadata_filename),
                                comment=';',
                                delimiter="  ",
                                index_col=self.metadata_id,
                                names=self.metadata_cols,
                                engine="python")
         # add sample location and populate data
         columns = metadata.columns.to_list()
         columns.extend([self.sample_loc_column, self.sample_split_column])
         data = pd.DataFrame(columns=columns)
         for root, subdirs, filenames in os.walk(self.ds_path):
             for filename in filenames:
                 if os.path.splitext(filename)[1] != ".scores":
                     continue
                 features_row_num = 1
                 file_path = os.path.join(root, filename)
                 with open(file_path, 'r') as f:
                     line = f.readline()
                     features_row_num = int(
                         line.split()[0]) // self.row_count_in_features
                 idx = os.path.split(root)[1][-4:].upper()
                 for i in range(features_row_num):
                     metadata_row: pd.Series = metadata.loc[idx].copy()
                     metadata_row[self.sample_loc_column] = file_path
                     metadata_row[self.sample_split_column] = i
                     metadata_row = metadata_row.rename(
                         f"{metadata_row.name}_{os.path.splitext(filename)[0]}_{i}"
                     )
                     data = data.append(metadata_row)
         # alter DR mapping
         data = data[~data["DR"].isin(self.metadata_dr_exclude)]
         for k, v in self.metadata_dr_mapping.items():
             data.loc[data["DR"] == k, "DR"] = v
         if self.ds_type == config.Timit2Groups:
             data = data[data["DR"].isin([1, 2])]
         data[self.label_column] = pd.factorize(data[self.label_column])[0]
         # save file
         data.to_csv(filepath, index=False)
     data = pd.read_csv(filepath)
     return data
Пример #7
0
def dnn_classification(data, pcap_file_name):

    features = [
        'Src Port', 'Dst Port', 'Protocol', 'Fwd Pkt Len Max',
        'Fwd Pkt Len Std', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std',
        'Flow IAT Max', 'Fwd IAT Max', 'Bwd IAT Tot', 'Bwd IAT Std',
        'Bwd IAT Max', 'Bwd PSH Flags', 'Fwd Pkts/s', 'Bwd Pkts/s',
        'Pkt Len Mean', 'Pkt Len Std', 'FIN Flag Cnt', 'SYN Flag Cnt',
        'RST Flag Cnt', 'ACK Flag Cnt', 'Down/Up Ratio', 'Fwd Seg Size Avg',
        'Bwd Seg Size Avg', 'Init Bwd Win Byts', 'Idle Mean', 'Idle Max',
        'Idle Min'
    ]

    data_model = data[features]

    data_model_ndarray = data_model.apply(pd.to_numeric).values

    # Load model from pth file

    dnn_model = Deep_Neural_Network(D_in=len(features))

    dnn_model.load_state_dict(torch.load(
        '{}/Deep_Neural_Network.pth'.format(DIR_MODELS), map_location='cpu'),
                              strict=False)

    dnn_model.to(torch.device('cpu'))

    labels = dnn_predict(data_model_ndarray, dnn_model)

    data['Label'] = labels

    # Write out classified data to csv file

    labeled_flow_csv_path = '{}/{}_Flow_labeled.csv'.format(
        DIR_CLASSIFIED_FLOWS_DNN, pcap_file_name)

    print('Writing data classified by DNN model to {}...'.format(
        labeled_flow_csv_path))

    data.to_csv(labeled_flow_csv_path)
Пример #8
0
def Detection(net, urlFilepath):
    try:
        #t = time.time()

        # CRAFT
        cuda_stats = False
        device = torch.device('cpu')
#         device = torch.device('cuda')
        if device.type == 'cpu':
            cuda_stats = False
        else:
            cuda_stats = True

        #"cuda":False, True를 False로 수정 
        args = {"trained_model":'/data/OCR_code/Pipeline/craft_mlt_25k.pth',
                "text_threshold":0.7,
                "low_text":0.4,
                "link_threshold":0.4,
                "cuda":cuda_stats, 
                "canvas_size":1280,
                "mag_ratio": 1.5,
                "poly":False,
                "show_time":False,
                "test_folder": "/data/OCR_dir/",
                "filepath": 'Data//FoodDetection/data/text_detection/RDProject/ocr_1000056.jpg',
                "refine" : False,
                 "refiner_model": 'weights/craft_refiner_CTW1500.pth'
        }

        #date = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))

        filename = urlFilepath.split("/")[-1]
        
        # 저장 된 이미지 확인
        #filepath = "/Data/CRAFT_process/test_1/01_images/"+str(date)+filename.rstrip()
        
        filepath = urlFilepath

        if os.path.isfile(filepath):
            #print( "Yes. it is a file")

            ##if sys.argv[1] is null:
            # filepath = args["filepath"]

            # image_list = [args.filepath]
            image_list = [filepath]
            image_names = []
            image_paths = []

            # CUSTOMISE START
            ##start = '/Data/CRAFT_process/test_1/01_images'  
            start = filepath.split(filename)[0]    # 파일 경로에 따라 Flexible하게 결정

            for num in range(len(image_list)):
                image_names.append(os.path.relpath(image_list[num], start))

            ###result_folder = args.test_folder+'02_map'
            ###if not os.path.isdir(result_folder):
            ###    os.mkdir(result_folder)

            crop_path = start+'%s_crop'%(filename.split('.')[0])
            
            if not os.path.isdir(crop_path):
                os.mkdir(crop_path)

            data = pd.DataFrame(columns=['image_name', 'word_bboxes', 'pred_words', 'align_text'])
            data['image_name'] = image_names
            
            box_idx = 0
            bbox_dict = {}

            # load data
            for k, image_path in enumerate(image_list):
#                 print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r')
                image = imgproc.loadImage(image_path)

                bboxes, polys, score_text, det_scores = test_.test_net(net, image, args["text_threshold"],
                                                                          args["link_threshold"],
                                                                          args["low_text"], args["cuda"], args["poly"],
                                                                          args)  # refinenet = None

                bbox_score = {}
                bbox_list = []

                for box_num in range(len(bboxes)):
                    if det_scores[box_num] < 0.85: # score filtering
                        continue
                    key = str(det_scores[box_num])
                    item = bboxes[box_num]
                    bbox_dict[box_idx] = item.tolist()
                    box_idx += 1
                    bbox_score[key] = item
                
                data['word_bboxes'][k] = bbox_score
                

            csv_file = start+'%s_data.csv'%(filename.split('.')[0]) ### 처리한 이미지 이름_data.csv

            data.to_csv(csv_file, sep=',', na_rep='Unknown')
            del data

            data = pd.read_csv(csv_file)
            # Crop

            for image_num in range(data.shape[0]):
                image = cv2.imread(os.path.join(start, data['image_name'][image_num]))
                image_name = data['image_name'][image_num].strip('.jpg')
                score_bbox = data['word_bboxes'][image_num].split('),')
                cropped_imgs = crop_words_.generate_words(image_name, score_bbox, image, crop_path, bbox_show=False)
            
            print("=========Text Detection and Crop Ends ============")
              
#         else:
#             raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filepath)


    except Exception as e:  # 모든 예외의 에러 메시지를 출력할 때는 Exception을 사용
#         print('예외가 발생했습니다.', e)
        traceback.print_exc()
        return str(e), 400
    return [bbox_dict, cropped_imgs], 200
Пример #9
0
        softmax = torch.exp(output_test).cpu()
        test_prob = list(softmax.cpu().detach().numpy())
        test_predictions = np.argmax(test_prob, axis=1)
        tacc = accuracy_score(y_test.cpu(), test_predictions)
        test_acc.append(tacc)

    eval_list.append([
        round(np.mean(train_accs), 3),
        round(np.mean(train_losses), 3),
        round(np.mean(test_acc), 3),
        round(np.mean(test_loss), 3)
    ])

    test_check = round(np.mean(test_acc), 3)

    if test_check >= 0.65:
        PATH = './' + str(test_check) + 'ad_net.pth'
        torch.save(model.state_dict(), PATH)

    data = pd.DataFrame(eval_list,
                        columns=['Accuracy', "Loss", "Test Acc", "Test Loss"])
    data.to_csv("train_results.csv")

    # printing the validation loss
    print('Epoch : ', epoch + 1, '\t =>', 'Train loss :',
          round(np.mean(train_losses), 3), ', Train Acc :',
          round(np.mean(train_accs), 3), ', Test loss :',
          round(np.mean(test_loss), 3), ', Test Acc :',
          round(np.mean(test_acc), 3))
Пример #10
0
    def train_epoch(self, epoch):
        self.model.train()
        if not os.path.exists(self.csv_path):
            os.mkdir(self.csv_path)
        train_csv = os.path.join(self.csv_path, 'train.csv')
        pred_list, target_list, loss_list, pos_list = [],[],[],[]
        print ('epoch: ', epoch)

        
        
        for batch_idx, item in enumerate(self.train_loader):
            if self.cfig['model_name'] in ['disrnn']:
                data, target, dist = item
                data, target, dist = data.to(self.device), target.to(self.device), dist.to(self.device)
            else:
                data, target, ID = item
                data, target = data.to(self.device), target.to(self.device)
            

            if self.cfig['model_name'][-3:] == 'rnn':
                data = data.permute([1,0,2,3,4])
            
            self.optim.zero_grad()
            #print ('=================',data.shape)
            if self.cfig['model_name'] in ['disrnn']:
                pred = self.model(data, dist)
            else:
                pred = self.model(data)             # here should be careful
            pred_prob = F.softmax(pred)
            
            if batch_idx == 0:
                print ('data.shape',data.shape)
                print ('pred.shape', pred.shape)
                print('Epoch: ', epoch)
            loss = nn.CrossEntropyLoss()(pred, target)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 4)
            self.optim.step()
            print_str = 'train epoch=%d, batch_idx=%d/%d, loss=%.4f\n' % (
            epoch, batch_idx, len(self.train_loader), loss.data[0])
            #print(print_str)
            pred_cls = pred.data.max(1)[1]
            pos_list += pred_prob[:, 1].data.cpu().numpy().tolist()
            pred_list += pred_cls.data.cpu().numpy().tolist()
            target_list += target.data.cpu().numpy().tolist()
            loss_list.append(loss.data.cpu().numpy().tolist())
        try: 
            print (1000 * self.model.dislstmcell.a.grad, ' a grad')
            
            print (self.model.dislstmcell.a.data, self.model.dislstmcell.c.data)
            print (1000 * self.model.dislstmcell.c.grad, 'c grad')
            
        except:
            print ('a.grad none')    
        print (confusion_matrix(target_list, pred_list))
        accuracy=accuracy_score(target_list,pred_list)
        fpr, tpr, threshold = metrics.roc_curve(target_list, pos_list)
        roc_auc = metrics.auc(fpr, tpr)
        #-------------------------save to csv -----------------------#
        if not os.path.exists(train_csv):
            csv_info = ['epoch', 'loss', 'auc', 'accuracy']
            init_csv = pd.DataFrame()
            for key in csv_info:
                init_csv[key] = []
            init_csv.to_csv(train_csv)
        df = pd.read_csv(train_csv)
        data = pd.DataFrame()
        tmp_epoch = df['epoch'].tolist()
        tmp_epoch.append(epoch)
        tmp_auc = df['auc'].tolist()
        tmp_auc.append(roc_auc)
        #print('------------------', tmp_epoch)
        tmp_loss = df['loss'].tolist()
        tmp_loss.append(np.mean(loss_list))
        
        tmp_acc = df['accuracy'].tolist()
        tmp_acc.append(accuracy)
        
        data['epoch'], data['loss'],data['auc'], data['accuracy'] =tmp_epoch, tmp_loss,tmp_auc, tmp_acc
        print ('train accuracy: ', accuracy, 'train auc: ', roc_auc)
        data.to_csv(train_csv)
Пример #11
0
def rfc_classification(data, pcap_file_name):
    """
    Args:
        data: pd.DataFrame
    """

    print('Binning data for Random Forest Classifier...')

    bins = 5

    # binning columns
    for feature in data.columns[7:]:

        data[feature] = pd.cut(data[feature], bins, labels=False)

    data_model = data[[
        'Src Port', 'Dst Port', 'Protocol', 'Flow Duration', 'Tot Fwd Pkts',
        'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts',
        'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean',
        'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min',
        'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s',
        'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min',
        'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max',
        'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std',
        'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags',
        'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len',
        'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max',
        'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt',
        'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt',
        'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio',
        'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg',
        'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Avg',
        'Bwd Byts/b Avg', 'Bwd Pkts/b Avg', 'Bwd Blk Rate Avg',
        'Subflow Fwd Pkts', 'Subflow Fwd Byts', 'Subflow Bwd Pkts',
        'Subflow Bwd Byts', 'Init Fwd Win Byts', 'Init Bwd Win Byts',
        'Fwd Act Data Pkts', 'Fwd Seg Size Min', 'Active Mean', 'Active Std',
        'Active Max', 'Active Min', 'Idle Mean', 'Idle Std', 'Idle Max',
        'Idle Min', 'Total Sum Bytes', 'Max / Avg', 'Total Packets'
    ]]

    data_model_ndarray = data_model.values

    # Unpickle rfc model and classify data

    with open('./{}/rfc_model.pkl'.format(DIR_MODELS), 'rb') as rfc_pkl:

        rfc_model = pickle.load(rfc_pkl)

    print('Classifying data using Random Forest model...')

    labels = rfc_model.predict(data_model_ndarray)

    data['Label'] = labels

    # Write out classified data to csv file

    labeled_flow_csv_path = '{}/{}_Flow_labeled.csv'.format(
        DIR_CLASSIFIED_FLOWS_RFC, pcap_file_name)

    print('Writing data classified by Random Forest model to {}...'.format(
        labeled_flow_csv_path))

    # print('Data: ', data)

    data.to_csv(labeled_flow_csv_path)
Пример #12
0
    def evaluate(self,
                 data_loader,
                 model,
                 model_cat,
                 criterion,
                 val_loss=None):
        logger.info("In evaluate")

        # switch to evaluate mode
        model.eval()

        self.on_start_epoch(False, model, criterion, data_loader)

        if self.state['use_pb']:
            data_loader = tqdm(data_loader, desc='Test')

        end = time.time()
        scores = []
        preds = []
        filenames = []
        catlist = []
        value_list = []
        with torch.no_grad():
            for i, (input) in enumerate(data_loader):
                #                 if i==2:
                #                     break
                if self.state['use_gpu']:
                    input[0] = input[0].to('cuda')

    #             attrs=[ast.literal_eval(item) for item in input[1]]
    # measure data loading time
                self.state['iteration'] = i
                self.state['data_time_batch'] = time.time() - end
                self.state['data_time'].add(self.state['data_time_batch'])
                #                 print(attrs)
                #             exit()

                self.state['input'] = input
                self.state['target'] = None
                #                 print(input[0].shape)
                #                 input_var = torch.autograd.Variable(self.state['input'])
                #                 print(input_var.shape)
                cat = model_cat(input[0])
                #                 print(cat.shape)
                #                 print("-----------")
                #                 print(cat.shape)
                testing = self.state['testing']

                self.on_start_batch(False, model, criterion, data_loader)

                #                 if self.state['use_gpu']:
                #                     self.state['target'] = self.state['target'].cuda()

                self.on_forward(False, model, criterion, data_loader)

                cats, values = decode_outputs(self.state['threshold'],
                                              self.state['value_decoding'],
                                              self.state['category_decoding'],
                                              cat, self.state['output'])
                #                 catlist=[[i] for i in cats]

                #                 catlist.append(cats)
                #                 value_list.append(values)

                #                 predictions=get_outputs(self.state['category_attrs_mapping'],
                #                                                 self.state['attrs_value_mapping'],
                #                                                 cats,values,self.state['value_decoding'])
                predictions = get_outputs(self.state['category_attrs_mapping'],
                                          self.state['attrs_value_mapping'],
                                          cats, values,
                                          self.state['value_decoding'])
                #                 print("+++")
                #                 print(attrs)
                #                 print("------")
                #                 print(predictions)
                #                 print("******")
                #                 exit()
                #                 print(cats[0])
                #                 exit()
                #                 print(predictions)
                #                 print(type(predictions[0]))
                #                 l=list()
                #                 l.append(cats[0])
                # predictions[0]['vertical']=cats
                n_items = len(predictions)

                for t in range(n_items):
                    predictions[t]['vertical'] = [cats[t]]
#                 exit()
#                 predictions['vertical']=cats[0]
                preds.append(predictions)
                filenames.append(input[1])
                #                 print(predictions)
                #                 exit()
                #                 score=evaluate_model_scores(attrs,predictions)
                #                 scores.append(score)
                #                 print("This is da score: ",str(score))
                # # measure elapsed time
                self.state['batch_time_current'] = time.time() - end
                self.state['batch_time'].add(self.state['batch_time_current'])

                end = time.time()
                # measure accuracy


#                 self.on_end_batch(False, model, criterion, data_loader)

# score = self.on_end_epoch(False, model, criterion, data_loader)
        import pandas as pd

        filenames = [f for sublist in filenames for f in sublist]
        preds = [f for sublist in preds for f in sublist]
        #         probs = [f for sublist in value_list for f in sublist]
        #         cats = [f for sublist in catlist for f in sublist]

        data = pd.DataFrame()
        data["filename"] = filenames
        data["predictions"] = preds
        #         data['cat']=cats
        #         data['probs']=probs
        file_names = self.state['output_file']
        val_loss = 0.000
        file_names = file_names.split('.csv')[0] + str(round(
            val_loss, 3)).replace('.', '_') + ".csv"
        data.to_csv(file_names, index=False)
        return 0