Пример #1
0
from multiprocessing import Pool

from pyartm_datasets import main_cases
from pyartm import regularizers
from pyartm.optimizations import obd
from pyartm.optimizations import default

import manager

ITERS_COUNT = 100
SAMPLES = 100

if __name__ == '__main__':
    train_n_dw_matrix, test_n_dw_matrix = main_cases.get_20newsgroups(
        [
            'rec.autos', 'rec.motorcycles', 'rec.sport.baseball',
            'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med',
            'sci.space'
        ],
        train_proportion=0.8)[:2]

    args_list = list()
    for T in [10, 25]:
        args_list.append((train_n_dw_matrix, T,
                          '20news_experiment/20news_{}t_plots.pkl'.format(T)))

    Pool(processes=5).map(manager.perform_plots, args_list)
Пример #2
0
from pyartm import regularizers
from pyartm_datasets import main_cases
from pyartm.optimizations import timed_default

import manager

if __name__ == '__main__':
    n_dw_matrix = main_cases.get_20newsgroups([
        'rec.autos', 'rec.motorcycles', 'rec.sport.baseball',
        'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med',
        'sci.space'
    ])[0]
    manager.perform_experiment(
        n_dw_matrix,
        timed_default.Optimizer(
            regularization_list=[regularizers.Additive(0., 0.)] * 100,
            return_counters=True), 10, 100)
Пример #3
0
from pyartm_datasets import main_cases
from pyartm import regularizers
from pyartm.optimizations import default, thetaless
from pyartm.common import experiments
from pyartm.calculations import metrics


def print_matrix(arr):
    for row in arr:
        line = list(map(str, row))
        print(' '.join(line))


if __name__ == '__main__':
    _n_dw_matrix, _, num_2_token, doc_targets = main_cases.get_20newsgroups([
        'rec.sport.hockey',
        'talk.politics.guns',
    ])
    topic_0_indices, topic_1_indices = [], []
    for index, target in enumerate(doc_targets):
        if target == 0:
            topic_0_indices.append(index)
        elif target == 1:
            topic_1_indices.append(index)

    thetaless_rels = []
    lda_rels = []
    for balance in range(10, 201, 10):
        print(balance)
        n_dw_matrix = _n_dw_matrix[topic_0_indices +
                                   topic_1_indices * balance, :]
        regularization_list = [regularizers.Additive(-0.1, 0.)] * 100
Пример #4
0
        perplexities.append(calc_perplexity(phi, theta))
    if not os.path.exists(os.path.dirname(output_path)):
        os.makedirs(os.path.dirname(output_path))
    with open(output_path, 'w') as output_file:
        pickle.dump({
            'init_phi': init_phi,
            'init_theta': init_theta,
            'perplexities': perplexities,
            'phis': phis
        }, output_file)


if __name__ == '__main__':
    n_dw_matrix, _, _, _ = main_cases.get_20newsgroups([
        'sci.crypt',
        'sci.electronics',
        'sci.med',
        'sci.space'
    ])

    print('Original PLSA')
    perform_lda(
        n_dw_matrix, optimizer=get_optimizer(0., 100), T=10,
        samples=300, output_path='stability_exp/plsa.pkl'
    )

    print('Full initialized PLSA')
    phi, theta = experiments.default_sample(
        n_dw_matrix, T=10, seed=42, optimizer=get_optimizer(-0.1, 100)
    )
    init_phi, init_theta = experiments.default_sample(
        n_dw_matrix, T=10, seed=42, optimizer=get_optimizer(0., 100),
# coding: utf-8
from multiprocessing import Pool

from pyartm_datasets import main_cases
from pyartm import regularizers
from pyartm.optimizations import default

import manager

if __name__ == '__main__':
    train_n_dw_matrix, test_n_dw_matrix = main_cases.get_20newsgroups(
        ['sci.crypt', 'sci.electronics', 'sci.med', 'sci.space'],
        train_proportion=0.8)[:2]
    args_list = list()
    phi_alpha = -0.1
    for T in range(3, 16):
        for theta_alpha in [-0.1, 0., 0.1]:
            regularization_list = [
                regularizers.Additive(phi_alpha, theta_alpha)
            ] * 100
            args_list.append((train_n_dw_matrix, test_n_dw_matrix,
                              default.Optimizer(regularization_list), T, 10,
                              'iter_exp/20news_{}t_{}_{}.pkl'.format(
                                  T, phi_alpha, theta_alpha)))

    Pool(processes=5).map(manager.perform_iteration_dependency_experiment,
                          args_list)
Пример #6
0
from pyartm.optimizations import default, balanced
from pyartm.common import experiments
from pyartm.calculations import metrics

from pyartm_experiments.balanced import balanced_ptdw


def print_matrix(arr):
    for row in arr:
        line = list(map(str, row))
        print(' '.join(line))


if __name__ == '__main__':
    _n_dw_matrix, _, num_2_token, doc_targets = main_cases.get_20newsgroups([
        'comp.sys.mac.hardware',
        'talk.politics.guns',
    ])
    topic_0_indices, topic_1_indices = [], []
    for index, target in enumerate(doc_targets):
        if target == 0:
            topic_0_indices.append(index)
        elif target == 1:
            topic_1_indices.append(index)

    for balance in [1, 2, 5, 10, 20, 50, 100, 200, 300, 500]:
        n_dw_matrix = _n_dw_matrix[topic_0_indices +
                                   topic_1_indices * balance, :]
        regularization_list = [regularizers.Additive(-0.1, 0.)] * 100
        lda_phi, lda_theta = experiments.default_sample(
            n_dw_matrix,
            T=2,