Exemplo n.º 1
 def test_get_bow_and_categories(self):
     imdb = Imdb(config.DATASETS_FOLDER)
     num_features = 5000
     (train_x_bow, train_categories), (_, _) = imdb.get_bow_and_categories(
     self.assertEqual(train_x_bow.shape, (25000, num_features))
     self.assertEqual(len(train_categories), 25000)
Exemplo n.º 2
    def __init__(self, image_set, year, use_diff=False):
        name = 'voc_' + year + '_' + image_set
        if use_diff:
            name += '_diff'
        Imdb.__init__(self, name)
        self._year = year
        self._image_set = image_set
        self._devkit_path = self._get_default_path()
        self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
        self._classes = (
            '__background__',  # always index 0
        self._class_to_ind = dict(
            list(zip(self.classes, list(range(self.num_classes)))))
        self._image_ext = '.jpg'
        self._image_index = self._load_image_set_index()
        # Default to roidb handler
        self._roidb_handler = self.gt_roidb
        self._salt = str(uuid.uuid4())
        self._comp_id = 'comp4'

        # PASCAL specific config options
        self.config = {
            'cleanup': True,
            'use_salt': True,
            'use_diff': use_diff,
            'matlab_eval': False,
            'rpn_file': None

        assert os.path.exists(self._devkit_path), \
          'VOCdevkit path does not exist: {}'.format(self._devkit_path)
        assert os.path.exists(self._data_path), \
          'Path does not exist: {}'.format(self._data_path)
Exemplo n.º 3
    def rpn_roidb(self):
        if int(self._year) == 2007 or self._image_set != 'test':
            gt_roidb = self.gt_roidb()
            rpn_roidb = self._load_rpn_roidb(gt_roidb)
            roidb = Imdb.merge_roidbs(gt_roidb, rpn_roidb)
            roidb = self._load_rpn_roidb(None)

        return roidb
Exemplo n.º 4
# Accuracy: 
    0.76628 (with 5k words)
    0.80664 (with 50k words)
    0.81732 (with 250k words)
    0.81828 (with 500k words)
    0.81892 (with entire vocab)

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import config
from datasets.imdb import Imdb

# Load dataset
print('Loading dataset...', flush=True)
imdb = Imdb(config.DATASETS_FOLDER)
(train_x_bof, train_y), (test_x_bof,
                         test_y) = imdb.get_bof_fasttext_wiki_news_300d_1M()

# Train LR model
print('Training model...', flush=True)
lm = LogisticRegression()
lm.fit(train_x_bof, train_y)

# Predict and score on test set
ps = lm.predict(test_x_bof)
acc = accuracy_score(test_y, ps)
print(f'Accuracy: {acc}', flush=True)
Exemplo n.º 5

from torch import FloatTensor as T
from torch.autograd import Variable as V
from torch.nn import CrossEntropyLoss
from torch import nn
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

import config
from datasets.imdb import Imdb

# Load dataset
print('Loading dataset...')
imdb = Imdb(config.DATASETS_FOLDER)
 train_y), (test_x_bow,
            test_y) = imdb.get_bow_and_categories(max_features=5000)

# Pack dataset to torch Variables
train_x_bow = V(T(train_x_bow.toarray()), requires_grad=False)
test_x_bow = V(T(test_x_bow.toarray()), requires_grad=False)
train_y = V(T(train_y), requires_grad=False).long()
test_y = V(T(test_y), requires_grad=False).long()

# Compute train mean and std
train_mean = train_x_bow.mean(0)
train_std = train_x_bow.std(0)

# Normalize train and test sets
Exemplo n.º 6
 def test_get_texts_and_categories(self):
     imdb = Imdb(config.DATASETS_FOLDER)
     (train_texts, train_categories), (_,
                                       _) = imdb.get_texts_and_categories()
     self.assertEqual(len(train_texts), 25000)
     self.assertEqual(len(train_categories), 25000)
Exemplo n.º 7
# Accuracy
  0.85144 (with 5k words)
  0.87032 (with the entire vocab)


from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import config
from datasets.imdb import Imdb

# Load dataset
print('Loading dataset...')
imdb = Imdb(config.DATASETS_FOLDER)
(train_x_bow, train_y), (test_x_bow, test_y) = imdb.get_bow_and_categories(max_features=5000)

# Train LR model
print('Training model...')
lm = LogisticRegression()
lm.fit(train_x_bow, train_y)

# Predict and score on test set
ps = lm.predict(test_x_bow)
acc = accuracy_score(test_y, ps)
print(f'Accuracy: {acc}')