def train(self, train_corp):
        if not self._debug:
            train_corp = get_nice_data(train_corp)
        train_corp = Solution._remove_differencies(train_corp)
        texts = train_corp[0]

        target = self._encode_opinions(train_corp[1])
        features_list = []

        token_list = []
        for text in texts:
            tokens = Solution._text_tokenize(text)
            token_list.append([])

            for token in tokens:
                token_list[-1].append(token)

                for ngram in Solution._get_ngrams(token):
                    self._ngr_add(ngram)

        for tokens in token_list:
            features_list.append(self._get_features_from_tokens(tokens))

        if self._debug:
            print 'Initial number of features:', len(features_list[0])

        features_list = self._feature_transformer.fit(features_list, target)

        if self._debug:
            print 'Reduced number of features:', len(features_list[0])

        self._clf.fit(features_list, target)
        all_ops.update(set(op))

    d = dict()
    for op in all_ops:
        d[op] = len(d)

    return d

def transform(ops, tr):
    ret = []
    for op in ops:
        ret.append(list(map(lambda x: tr[x], op)))
    return ret

if True:
    train_data = get_nice_data(get_data('reviews.json'))
    train_data = list(map(lambda x: np.array(x), train_data))

    scores = []
    for train_idx, test_idx in KFold(len(train_data[0]), n_folds=7, \
            shuffle=True):
        X_train = train_data[0][train_idx]
        Y_train = train_data[1][train_idx]

        X_test, Y_test = Solution._remove_differencies((train_data[0][test_idx],\
                train_data[1][test_idx]), True)

        sol = Solution(True)
        sol.train((X_train, Y_train))

        # sometimes it says "AttributeError: '_ConstantPredictor'