Пример #1
0
    parser.add_argument(
        '--model-dir',
        type=str,
        default='../models',
        metavar='str',
        help=f'path where fitted model will be saved. Dataset name is added')

    opt = parser.parse_args()

    if not opt.dataset:
        parser.error('Missing dataset name')

    dataset_name = opt.dataset

    dataset = Dataset.load(dataset_name=dataset_name,
                           pickle_path=os.path.join(opt.pickle_dir,
                                                    f'{dataset_name}.pickle'))

    dataset.show()

    singlelabel = dataset.classification_type == 'singlelabel'

    if singlelabel:
        ModelArgsClass = ClassificationArgs
        ModelClass = ClassificationModel
        confusion_function = confusion_matrix
    else:
        ModelArgsClass = MultiLabelClassificationArgs
        ModelClass = MultiLabelClassificationModel
        confusion_function = multilabel_confusion_matrix
Пример #2
0
from sklearn.svm import LinearSVC
from sklearn.metrics import f1_score, make_scorer


def classify_and_test(X, y, Xte, yte, average=1):
    svm = LinearSVC()
    svm.fit(X, y)
    # print(svm.best_params_)
    yte_ = svm.predict(Xte)
    if average>1:
        yte_ = yte_.reshape(-1,average).mean(axis=1)>0.5
    f1 = f1_score(yte, yte_)
    print(f'f1={f1:.3f}')


dataset = Dataset.load('reuters21578', pickle_path='./reuters.pickle')

Xtr, Xte = dataset.devel_raw, dataset.test_raw
ytr, yte = dataset.devel_target, dataset.test_target

# generate the co-occurrence matrices
counter = CountVectorizer(min_df=5)
Xtr = counter.fit_transform(Xtr)
Xte = counter.transform(Xte)
train_nwords = Xtr.sum(axis=1).getA().flatten()
test_nwords = Xte.sum(axis=1).getA().flatten()

# generate the tfidf matrices
tfidf = TfidfTransformer()
Xtr = tfidf.fit_transform(Xtr)
Xte = tfidf.transform(Xte)