def get_jsonable_from_parameters(self): """ Gets artm model params. Returns ------- dict artm model parameters """ parameters = transform_complex_entity_to_dict(self._model) regularizers = {} for name, regularizer in iteritems(self._model._regularizers.data): tau = None gamma = None try: tau = regularizer.tau gamma = regularizer.gamma except KeyError: pass regularizers[name] = [str(regularizer.config), tau, gamma] for name, regularizer in iteritems(self.custom_regularizers): tau = getattr(regularizer, 'tau', None) gamma = getattr(regularizer, 'gamma', None) config = str(getattr(regularizer, 'config', '')) regularizers[name] = [config, tau, gamma] parameters['regularizers'] = regularizers parameters['version'] = artm.version() return parameters
def main(): print artm.version() config = ConfigPaths('config.cfg') plot_maker = PlotMaker() printer = PrintHelper() print config.models_file_name batch_vectorizer = artm.BatchVectorizer( data_path=config.output_batches_path, data_format='batches') dictionary = artm.Dictionary() dictionary.load(dictionary_path=config.dictionary_path + '.dict') models_file = open(config.models_file_name, 'a') # model = process_one_model(config, batch_vectorizer, models_file, printer, plot_maker, # dictionary, _n_topics=50, _n_doc_passes=5, _seed_value=100, _n_top_tokens=10, _p_mass_threshold=0.25, # _n_iterations=20, _model_name='model1') exp = Experiment( Pool(topics_filter=OptimizationTopicsFilter(eps=10**(-2.5), verbose=False), save_topics=True)) for i in xrange(3): model_artm = process_one_model(config, batch_vectorizer, models_file, printer, plot_maker, dictionary, _n_topics=50, _n_doc_passes=5, _seed_value=100, _n_top_tokens=10, _p_mass_threshold=0.25, _n_iterations=20, _model_name='model_{}'.format(i)) #display_points(model_artm.get_phi()) exp.collect_topics(model_artm.get_phi(), model_artm.get_theta()) vals, bins = exp.topics_pool.topics_filter.plot_hist() save_hist(vals, bins, "data_iter_{}.csv".format(i)) print exp.topics_pool.get_basic_topics_count() # models_file.close()
def run(): print 'BigARTM version ', artm.version(), '\n\n\n' preprocessing_for_artm(True) topics = 10 batch_vectorizer = artm.BatchVectorizer( data_path="/home/goncharoff/PythonLab/labs/labs/lab5/result/result.txt", data_format="vowpal_wabbit", target_folder="batch_vectorizer_target_folder", batch_size=10) topic_names = ["topic#1" + str(i) for i in range(topics - 1)] + ["bcg"] dictionary = artm.Dictionary("dictionary") dictionary.gather(batch_vectorizer.data_path) artm_plsa(batch_vectorizer, topics, topic_names, dictionary) artm_lda(batch_vectorizer, topics, dictionary)
def run(): print 'BigARTM version ', artm.version(), '\n\n\n' preprocessing_for_artm(True) topics = 10 batch_vectorizer = artm.BatchVectorizer( data_path="../data/lenta.txt", data_format="vowpal_wabbit", target_folder="batch_vectorizer_target_folder", batch_size=10) topic_names = ["topic#1" + str(i) for i in range(topics - 1)] + ["bcg"] dictionary = artm.Dictionary("dictionary") dictionary.gather(batch_vectorizer.data_path) artm_plsa(batch_vectorizer, topics, topic_names, dictionary) artm_lda(batch_vectorizer, topics, dictionary) subprocess.call(['./clear.sh'])
import artm print(artm.version()) print(artm.ARTM(num_topics=10).info)
import pytest import warnings import shutil import artm from ..cooking_machine.models.dummy_topic_model import DummyTopicModel from ..cooking_machine.models.topic_model import TopicModel from ..cooking_machine.experiment import Experiment from ..cooking_machine.dataset import Dataset, W_DIFF_BATCHES_1 from ..cooking_machine.models.example_score import ScoreExample from ..cooking_machine.models.blei_lafferty_score import BleiLaffertyScore ARTM_NINE = artm.version().split(".")[1] == "9" MAIN_MODALITY = "@text" NGRAM_MODALITY = "@ngramms" EXTRA_MODALITY = "@str" # to run all test @pytest.fixture(scope="function") def experiment_enviroment(request): """ """ with warnings.catch_warnings(): warnings.filterwarnings(action="ignore", message=W_DIFF_BATCHES_1) dataset = Dataset('tests/test_data/test_dataset.csv') dictionary = dataset.get_dictionary() model_artm = artm.ARTM( num_topics=5, class_ids={MAIN_MODALITY: 1.0, NGRAM_MODALITY: 1.0, EXTRA_MODALITY: 1.0}, num_document_passes=1, dictionary=dictionary, scores=[artm.PerplexityScore(name='PerplexityScore', )],
def __init__(self, model): self.model = model self.phi = model.get_phi() if '10' in artm.version(): self.phi = self.phi.set_index( pd.MultiIndex.from_tuples(self.phi.index))
import artm print('artm.version()', artm.version()) def create_and_learn_PLSA(name="", topic_number=750, num_collection_passes=1): batch_vectorizer_train = None batch_vectorizer_train = artm.BatchVectorizer(data_path='./' + name, data_format='vowpal_wabbit', target_folder='folder' + name) dictionary = artm.Dictionary() dictionary.gather(data_path=batch_vectorizer_train.data_path) topic_names = ['topic_{}'.format(i) for i in range(topic_number)] model_plsa = artm.ARTM(topic_names=topic_names, class_ids={ '@text': 1.0, '@first': 1.0, '@second': 1.0, '@third': 1.0 }, cache_theta=True, theta_columns_naming='title', scores=[ artm.PerplexityScore(name='PerplexityScore', dictionary=dictionary) ]) model_plsa.initialize(dictionary=dictionary)
from numbers import Number import artm from artm.wrapper.exceptions import ArtmException from six import iteritems from copy import deepcopy from inspect import signature # change log style lc = artm.messages.ConfigureLoggingArgs() lc.minloglevel = 3 lib = artm.wrapper.LibArtm(logging_config=lc) LIBRARY_VERSION = artm.version() ARTM_NINE = LIBRARY_VERSION.split(".")[1] == "9" SUPPORTED_SCORES_WITHOUT_VALUE_PROPERTY = ( artm.score_tracker.TopTokensScoreTracker, artm.score_tracker.ThetaSnippetScoreTracker, artm.score_tracker.TopicKernelScoreTracker, ) class TopicModel(BaseModel): """ Topic Model contains artm model and all necessary information: scores, training pipeline, etc. """ def __init__(self, artm_model=None, model_id=None,