Пример #1
0
def run_classification_test(mat,
                            true_labels,
                            binarize=True,
                            percentage_train=0.8,
                            print_train_test_set_stat=True,
                            test_thresholds=False,
                            random_seed=None,
                            d_args=None,
                            d_triples=None):
    """
    """
    ## binarize full matrix if desired
    if binarize:
        mat = tm.binarize_sparse_matrix(mat)

    #### create train and test split
    logging.info('preparing train and test set.')
    train_indexes, test_indexes = get_stratified_train_test_indexes(
        true_labels, percentage_train, random_seed)
    # train_indexes, test_indexes = get_train_test_indexes_presplit(d_args)
    # train_indexes, test_indexes = \
    #    get_train_test_indices_from_triples(d_triples, true_labels, percentage_train, random_seed)
    train_mat, test_mat, true_train_labels, true_test_labels = \
        split_matrix_to_train_and_test(mat, true_labels, train_indexes, test_indexes, print_stat=print_train_test_set_stat)
    model = classify(train_mat, test_mat, true_train_labels, true_test_labels,
                     test_thresholds)
    return model
Пример #2
0
def run_classification_test(mat, true_labels, binarize=True,
    percentage_train=0.8, print_train_test_set_stat = True,
    test_thresholds=False, random_seed=None, d_args=None, d_triples=None):
    """
    """
    ## binarize full matrix if desired
    if binarize:
        mat = tm.binarize_sparse_matrix(mat)

    #### create train and test split
    logging.info('preparing train and test set.')
    train_indexes, test_indexes = get_stratified_train_test_indexes(true_labels, percentage_train, random_seed)
    # train_indexes, test_indexes = get_train_test_indexes_presplit(d_args)
    # train_indexes, test_indexes = \
    #    get_train_test_indices_from_triples(d_triples, true_labels, percentage_train, random_seed)
    train_mat, test_mat, true_train_labels, true_test_labels = \
        split_matrix_to_train_and_test(mat, true_labels, train_indexes, test_indexes, print_stat=print_train_test_set_stat)
    model = classify(train_mat, test_mat, true_train_labels, true_test_labels, test_thresholds)
    return model
Пример #3
0
            _train_idxes = np.delete(np.arange(0,len(_d_triples)), w1w2_idxs);
            _test_idxes = w1w2_idxs;
            print('Testing Context - ArgL - ArgR triples: \n{}'.format('\n'.join(['{} - {}'.format(i, _d_triples.get_triple(i)) for i in w1w2_idxs])));
        else:
            if 's' in _w1:
                _train_idxes, _test_idxes, _zero_v_idxes =  tc.get_stratified_train_test_indexes_notzero(_mat,true_labels, percentage_train=0.8, random_seed=623519);
#                _test_idxes = np.hstack((_test_idxes, _zero_v_idxes));
            if 'd' in _w1:
                _train_idxes, _test_idxes, _zero_v_idxes =  tc.get_fully_delex_train_test_indices_from_triples_notzero(_d_triples, _mat, true_labels, percentage_train_vocabulary=0.5, random_seed=623519);
#                _test_idxes = np.hstack((_test_idxes, _zero_v_idxes));

        _in = raw_input('Binarize feature matrix? ([{}]es, [n]o, type q! to quit): '.format('\033[4m\033[1my\033[0m'));
        if _in == 'q!':
            raise KeyboardInterrupt();
        if not _in.strip() or _in.strip().lower() == 'y':
            _mat = tm.binarize_sparse_matrix(_mat);

        _mat_train = _mat[_train_idxes,:];
        _train_labels = true_labels[_train_idxes];
        _mat_test = _mat[_test_idxes,:];
        _test_labels = true_labels[_test_idxes];

        predicted_test_labels, model = tc.clazzify(_mat_train, _mat_test, _train_labels);
        sorted_idxs = np.argsort(np.abs(model.coef_[0]))[::-1]; # sort and reverse indices, model.coef_ is just a (1 x n) matrix
        print('Coefficients:\n\t{}\n\t{}'.format(model.intercept_[0], '\n\t'.join(['{:+.3f} {:6d} {}'.format(model.coef_[0][i], i, _colheader[i]) for i in sorted_idxs[:20]])));

        _in = raw_input('Enter y to predict {} zero-vector(s) with default class (0) (press <Enter> or n to not classify zero-vectors, type q! to quit): '.format(len(_zero_v_idxes)));
        if _in == 'q!':
            raise KeyboardInterrupt();
        if _in.strip().lower() == 'y' and len(_zero_v_idxes) > 0:
            _test_idxes = np.hstack((_test_idxes, _zero_v_idxes));