def create_session_configs(configs_location=None, delete_old_configs=1):
        if configs_location is None:
            configs_location = ConfigReader.get_configs_location()
        if delete_old_configs:
            Storage.delete_location(configs_location)

        configs = [SessionConfigReader.get_config_template()]

        configs = SessionConfigBuilderCustom1.add_all_config_info(configs)

        n_configs = len(configs)

        SessionLogger.log('Constructed ' + str(n_configs) +
                          ' new session configs from template: \'' +
                          ConfigReader.get_config_template_id() + '\'.')

        config_ids = list()
        idx = 0
        for conf in configs:
            config_id = configs_location + '/' + SessionConfigBuilderCustom1.config_name + str(
                idx + 1)
            SessionConfigReader.set_config(conf, config_id)
            config_ids.append(config_id)
            idx = idx + 1

        SessionLogger.log('Stored ' + str(n_configs) +
                          ' session configs in \'' + configs_location + '\'.')

        return config_ids
    def interpret_output(data_frame,
                         col_name=col_name_class_out,
                         new_col_name=col_name_result,
                         storage_level=0,
                         storage_name='',
                         log=1):
        df = data_frame.copy()
        category_list = CategoryListHandler.read_categories()
        category_vectors = Vectorizer.get_word_vectors(category_list)
        df[new_col_name] = df.apply(lambda x: [
            ClassificationInterpreterCustom1.get_highest_similarity(
                x[col_name], category_list, category_vectors)
        ],
                                    axis=1)

        log_text = 'Categories have been determined (' + str(len(
            df.index)) + ' entries).'
        if storage_level >= 1 and storage_name != '':
            storage_name = storage_name + ClassificationInterpreterCustom1.ext_categorized
            Storage.store_pd_frame(df, storage_name)
            log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + new_col_name + '\').'
        if log:
            SessionLogger.log(log_text)

        return df
Exemplo n.º 3
0
 def add_stopwords(stopwords):
     db_type = ConfigReader.get_db_type()
     session_id = ConfigReader.get_session_id()
     if db_type == StopwordHandler.db_type_fs:
         DiskStorageStopwordHandler.add_stopwords(session_id, stopwords)
         SessionLogger.log(
             str(len(stopwords)) + ' stop words have been added.')
 def create_out_vectors(data_frame,
                        col_name=col_name_categories,
                        new_col_name=new_col_name_cat_vec,
                        storage_level=0,
                        storage_name=''):
     classification_interpreter = SessionConfigReader.read_value(
         ClassificationInterpreter.classification_interpreter_key)
     if classification_interpreter == ClassificationInterpreter.classification_interpreter_custom1:
         return ClassificationInterpreterCustom1.create_out_vectors(
             data_frame,
             col_name=col_name,
             new_col_name=new_col_name,
             storage_level=storage_level,
             storage_name=storage_name)
     elif classification_interpreter == ClassificationInterpreter.classification_interpreter_custom2:
         return ClassificationInterpreterCustom2.create_out_vectors(
             data_frame,
             col_name=col_name,
             new_col_name=new_col_name,
             storage_level=storage_level,
             storage_name=storage_name)
     else:
         SessionLogger.log(
             'Tried to create category vectors. Specified ClassificationInterpreter is not supported.',
             log_type='error')
         return pd.DataFrame()
Exemplo n.º 5
0
 def import_docs(csv_path=None):
     if csv_path is None:
         session_folder = os.path.join(TenKGnadImporter.sessions_folder,
                                       SessionConfigReader.get_session_id())
         corpus_id = SessionConfigReader.read_value(
             TenKGnadImporter.corpus_id_key)
         corpus_id = DiskStorageMisc.get_identifier_path(corpus_id)
         csv_path = os.path.join(session_folder,
                                 corpus_id + TenKGnadImporter.csv_ext)
     df = pd.read_csv(
         csv_path,
         sep=';',
         quotechar='\'',
         quoting=csv.QUOTE_MINIMAL,
         header=None,
         names=[TenKGnadImporter.category_name, TenKGnadImporter.text_name])
     category_list = df[TenKGnadImporter.category_name].tolist()
     df[TenKGnadImporter.category_name] = df.apply(
         lambda x: [x[TenKGnadImporter.category_name]], axis=1)
     head, f_name = os.path.split(csv_path)
     identifier = f_name.split('.')[0]
     Storage.store_pd_frame(df, identifier)
     SessionLogger.log('TenKGnad Corpus (' + str(len(df.index)) +
                       ' entries) has been imported into \'' + identifier +
                       '\' (columns: \'' + TenKGnadImporter.category_name +
                       '\', \'' + TenKGnadImporter.text_name + '\').')
     category_set = set(category_list)
     category_list = list(category_set)
     CategoryListHandler.set_categories(category_list)
     return identifier
 def remove_stopwords(data_frame,
                      custom_stop_words=None,
                      download_live_stopwords=0,
                      col_name=col_name,
                      storage_level=0,
                      storage_name='',
                      log=1):
     df = data_frame.copy()
     stop_words = StopwordHandler.read_stopwords()
     if download_live_stopwords:
         stop_words = stop_words.union(
             StopwordDownloaderNLTK.get_stopwords(store=0))
     stop_words = StopWordRemoverCustom.capitalize_words(stop_words)
     if custom_stop_words is not None:
         stop_words = stop_words.union(custom_stop_words)
     df[StopWordRemoverCustom.new_col_name] = df.apply(
         lambda x: StopWordRemoverCustom.process_text(
             x[col_name], stop_words),
         axis=1)
     log_text = 'Removed stop words from documents (' + str(len(
         df.index)) + ' entries).'
     if storage_level >= 1 and storage_name != '':
         Storage.store_pd_frame(df, storage_name)
         log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + StopWordRemoverCustom.new_col_name + '\').'
     if log:
         SessionLogger.log(log_text)
     return df
    def create_out_vectors(data_frame,
                           col_name=col_name_categories,
                           new_col_name=new_col_name_cat_vec,
                           storage_level=0,
                           storage_name=''):
        df = data_frame.copy()
        df[ClassificationInterpreterCustom1.one_word_cat] = df.apply(
            lambda x: ClassificationInterpreterCustom1.extract_one_word_cat(x[
                col_name]),
            axis=1)
        vectorized_df = Vectorizer.vectorize(
            df,
            col_name=ClassificationInterpreterCustom1.one_word_cat,
            new_col_name=new_col_name,
            storage_level=0,
            log=0)
        vectorized_df = vectorized_df.drop(
            columns=[ClassificationInterpreterCustom1.one_word_cat])
        vectorized_df[new_col_name] = vectorized_df.apply(
            lambda x: (x[new_col_name] + 1) / 2,
            axis=1)  # adjust to softmax codomain

        log_text = 'Category vectors for classifier training have been created (' + str(
            len(data_frame.index)) + ' entries).'
        if storage_level >= 1 and storage_name != '':
            storage_name = storage_name + ClassificationInterpreterCustom1.ext_out_vecs
            Storage.store_pd_frame(vectorized_df, storage_name)
            log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + new_col_name + '\').'
        SessionLogger.log(log_text)

        return vectorized_df
Exemplo n.º 8
0
 def remove_stopwords(data_frame,
                      custom_stop_words=None,
                      download_live_stopwords=0,
                      col_name=col_name,
                      storage_level=0,
                      storage_name='',
                      log=1):
     stopwordremover = SessionConfigReader.read_value(
         StopWordRemover.stopwordremover_key)
     if custom_stop_words is None:
         custom_stop_words = list()
     custom_stop_words.extend(
         SessionConfigReader.read_value(StopWordRemover.custom_sw_key))
     if stopwordremover == StopWordRemover.stopwordremover_custom:
         return StopWordRemoverCustom.remove_stopwords(
             data_frame,
             custom_stop_words=custom_stop_words,
             download_live_stopwords=download_live_stopwords,
             col_name=col_name,
             storage_level=storage_level,
             storage_name=storage_name,
             log=log)
     else:
         if log:
             SessionLogger.log(
                 'Tried to remove stopwords from documents. Specified Stopword Remover not supported.',
                 log_type='error')
         return pd.DataFrame()
 def split_train_test(identifier=None, data_frame=None):
     if data_frame is None:
         data_frame = Storage.load_pd_frame(identifier)
     split_ratio = SessionConfigReader.read_value(
         TrainTestSplitterCustom1.split_ratio_key)
     if split_ratio > 1:
         split_ratio = 1
     random_state = SessionConfigReader.read_value(
         TrainTestSplitterCustom1.random_state_key)
     if isinstance(random_state, int):
         train = data_frame.sample(frac=split_ratio,
                                   random_state=random_state)
     else:
         train = data_frame.sample(frac=split_ratio)
     test = data_frame.drop(train.index)
     if identifier is None:
         identifier = SessionConfigReader.read_value(
             TrainTestSplitterCustom1.corpus_identifier_key)
     train_name = identifier + TrainTestSplitterCustom1.ext_train
     test_name = identifier + TrainTestSplitterCustom1.ext_test
     Storage.store_pd_frame(train, train_name)
     Storage.store_pd_frame(test, test_name)
     SessionLogger.log('Split \'' + identifier + '\' (' +
                       str(len(data_frame.index)) + ' entries) into \'' +
                       train_name + '\' (' + str(len(train.index)) +
                       ' entries) and \'' + test_name + '\' (' +
                       str(len(test.index)) + ' entries).')
 def interpret_output(data_frame,
                      col_name=col_name_class_out,
                      new_col_name=col_name_result,
                      storage_level=0,
                      storage_name='',
                      log=1):
     classification_interpreter = SessionConfigReader.read_value(
         ClassificationInterpreter.classification_interpreter_key)
     if classification_interpreter == ClassificationInterpreter.classification_interpreter_custom1:
         return ClassificationInterpreterCustom1.interpret_output(
             data_frame,
             col_name=col_name,
             new_col_name=new_col_name,
             storage_level=storage_level,
             storage_name=storage_name,
             log=log)
     elif classification_interpreter == ClassificationInterpreter.classification_interpreter_custom2:
         return ClassificationInterpreterCustom2.interpret_output(
             data_frame,
             col_name=col_name,
             new_col_name=new_col_name,
             storage_level=storage_level,
             storage_name=storage_name,
             log=log)
     else:
         SessionLogger.log(
             'Tried to interpret output vectors. Specified ClassificationInterpreter is not supported.',
             log_type='error')
         return pd.DataFrame()
Exemplo n.º 11
0
 def preprocess_texts(data_frame, col_name=col_name, new_col_name=new_col_name, storage_level=0, storage_name='', log=1):
     preprocessor_type = SessionConfigReader.read_value(TextPreprocessor.preprocessor_key)
     if preprocessor_type == TextPreprocessor.preprocessor_custom:
         return TextPreprocessorCustom.preprocess_texts(data_frame, col_name=col_name, new_col_name=new_col_name, storage_level=storage_level, storage_name=storage_name, log=log)
     else:
         if log:
             SessionLogger.log('Tried to preprocess texts. Specified Preprocessor is not supported.', log_type='error')
         return pd.DataFrame()
Exemplo n.º 12
0
 def normalize(data_frame, col_name=col_name, storage_level=0, storage_name='', log=1):
     lemmatizer = SessionConfigReader.read_value(Lemmatizer.lemmatizer_key)
     if lemmatizer == Lemmatizer.lemmatizer_spacy_german:
         return LemmatizerSpacyGerman.normalize(data_frame, col_name=col_name, storage_level=storage_level, storage_name=storage_name, log=log)
     else:
         if log:
             SessionLogger.log('Tried to lemmatize documents. Specified Lemmatizer not supported.', log_type='error')
         return pd.DataFrame()
 def get_stopwords(language=german, store=1, log=1):
     nltk.download('stopwords')
     sw = set(stopwords.words(language))
     log_text = str(len(sw)) + ' stop words downloaded from NLTK.'
     if log:
         SessionLogger.log(log_text)
     if store:
         StopwordHandler.set_stopwords(sw)
     return sw
Exemplo n.º 14
0
 def get_model_id():
     classifier_type = SessionConfigReader.read_value(
         Classifier.classifier_key)
     if classifier_type == Classifier.classifier_keras_nn:
         return ClassifierKerasNN.get_model_id()
     else:
         SessionLogger.log(
             'Tried to get classifier model id. Specified Classifier is not supported.',
             log_type='error')
         return ''
Exemplo n.º 15
0
 def remove_noise(data_frame, col_name=col_name, storage_level=0, storage_name='', log=1):
     df = data_frame.copy()
     df[NoiseRemoverCustom.new_col_name] = df.apply(lambda x: NoiseRemoverCustom.process_text(x[col_name]), axis=1)
     log_text = 'Removed noise from documents (' + str(len(df.index)) + ' entries).'
     if storage_level >= 1 and storage_name != '':
         Storage.store_pd_frame(df, storage_name)
         log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + NoiseRemoverCustom.new_col_name + '\').'
     if log:
         SessionLogger.log(log_text)
     return df
Exemplo n.º 16
0
 def import_docs():
     importer_type = SessionConfigReader.read_value(
         CorpusImporter.corpus_importer_key)
     if importer_type == CorpusImporter.tenkgnad_importer:
         return TenKGnadImporter.import_docs()
     else:
         SessionLogger.log(
             'Tried to import corpus. Specified Corpus Importer is not supported.',
             log_type='error')
         return ''
    def preprocess_texts(data_frame,
                         col_name=col_name,
                         new_col_name=col_name_preprocessed,
                         storage_level=0,
                         storage_name='',
                         log=1):

        storage_name_ext = storage_name
        if storage_name != '':
            storage_name_ext = storage_name + TextPreprocessorCustom.ext_noise_removed
        noise_removed_df = NoiseRemover.remove_noise(
            data_frame,
            col_name=col_name,
            storage_level=storage_level - 1,
            storage_name=storage_name_ext,
            log=log)
        if storage_name != '':
            storage_name_ext = storage_name + TextPreprocessorCustom.ext_stops_removed
        stops_removed_df = StopWordRemover.remove_stopwords(
            noise_removed_df,
            col_name=TextPreprocessorCustom.col_name_noise_removed,
            storage_level=storage_level - 1,
            storage_name=storage_name_ext,
            log=log)
        if storage_name != '':
            storage_name_ext = storage_name + TextPreprocessorCustom.ext_lemmatized
        processed_texts_df = Lemmatizer.normalize(
            stops_removed_df,
            col_name=TextPreprocessorCustom.col_name_stops_removed,
            storage_level=storage_level - 1,
            storage_name=storage_name_ext,
            log=log)

        if storage_level <= 1:
            processed_texts_df = processed_texts_df.drop(
                columns=[TextPreprocessorCustom.col_name_noise_removed])
            processed_texts_df = processed_texts_df.drop(
                columns=[TextPreprocessorCustom.col_name_stops_removed])

        processed_texts_df = processed_texts_df.rename(
            columns={TextPreprocessorCustom.col_name_lemmatized: new_col_name})

        log_text = 'Documents have been preprocessed (' + str(
            len(data_frame.index)) + ' entries).'

        if storage_level >= 1 and storage_name != '':
            Storage.store_pd_frame(
                processed_texts_df,
                storage_name + TextPreprocessorCustom.ext_preprocessed)
            log_text = log_text + ' Stored in \'' + storage_name + TextPreprocessorCustom.ext_preprocessed + '\' (column: \'' + new_col_name + '\').'

        if log:
            SessionLogger.log(log_text)

        return processed_texts_df
Exemplo n.º 18
0
 def create_model(data_frame, new_model_id=None, col_name=col_name):
     vectorizer_type = SessionConfigReader.read_value(
         Vectorizer.vectorizer_key)
     if vectorizer_type == Vectorizer.vectorizer_gensim_w2v:
         return VectorizerGensimWord2Vec.create_model(
             data_frame, new_model_id=new_model_id, col_name=col_name)
     else:
         SessionLogger.log(
             'Tried to create vector model. Specified Vectorizer is not supported.',
             log_type='error')
         return ''
Exemplo n.º 19
0
 def split_train_test(identifier=None, data_frame=None):
     tt_splitter_type = SessionConfigReader.read_value(
         TrainTestSplitter.tt_splitter_key)
     if tt_splitter_type == TrainTestSplitter.tt_splitter_custom1:
         TrainTestSplitterCustom1.split_train_test(identifier=identifier,
                                                   data_frame=data_frame)
     else:
         SessionLogger.log(
             'Tried to split \'' + identifier +
             '\' into train and test set. Specified TrainTestSplitter is not supported.',
             log_type='error')
Exemplo n.º 20
0
 def delete_from_folder(path):
     if os.path.exists(path):
         for filename in os.listdir(path):
             file_path = os.path.join(path, filename)
             try:
                 if os.path.isfile(file_path) or os.path.islink(file_path):
                     os.unlink(file_path)
                 elif os.path.isdir(file_path):
                     shutil.rmtree(file_path)
             except Exception as e:
                 SessionLogger.log('Failed to delete %s. Reason: %s' %
                                   (file_path, e))
Exemplo n.º 21
0
 def resume_config_tests_at_idx(idx, run_import=0, run_preprocessing=0, run_vectorization=0):
     SessionLogger.log('Resuming config tests at config # ' + str(idx) + ' ...')
     if idx > 0:
         idx = idx - 1
     configs_location = SessionConfigBuilder.get_configs_location()
     config_ids = Storage.list_ids(configs_location)
     config_ids = SetupRunner.sort_config_list(config_ids)
     config_ids_with_dir = list()
     for c_id in config_ids:
         config_ids_with_dir.append(configs_location + '/' + c_id)
     SessionLogger.log('Config ID list has been restored.')
     return SetupRunner.run_config_tests(run_import=run_import, run_preprocessing=run_preprocessing, run_vectorization=run_vectorization, config_ids=config_ids_with_dir, resume_at_idx=idx)
 def normalize(data_frame,
               col_name=col_name,
               storage_level=0,
               storage_name='',
               log=1):
     df = data_frame.copy()
     df[LemmatizerSpacyGerman.new_col_name] = df.apply(
         lambda x: LemmatizerSpacyGerman.process_text(x[col_name]), axis=1)
     log_text = 'Documents lemmatized with spacy (' + str(len(
         df.index)) + ' entries).'
     if storage_level >= 1 and storage_name != '':
         Storage.store_pd_frame(df, storage_name)
         log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + LemmatizerSpacyGerman.new_col_name + '\').'
     if log:
         SessionLogger.log(log_text)
     return df
Exemplo n.º 23
0
 def train_model(data_frame,
                 model_id=None,
                 fv_col_name=fv_col_name,
                 cat_v_col_name=cat_v_col_name):
     classifier_type = SessionConfigReader.read_value(
         Classifier.classifier_key)
     if classifier_type == Classifier.classifier_keras_nn:
         return ClassifierKerasNN.train_model(data_frame,
                                              model_id=model_id,
                                              fv_col_name=fv_col_name,
                                              cat_v_col_name=cat_v_col_name)
     else:
         SessionLogger.log(
             'Tried to train classifier model. Specified Classifier is not supported.',
             log_type='error')
         return ''
Exemplo n.º 24
0
 def remove_noise(data_frame,
                  col_name=col_name,
                  storage_level=0,
                  storage_name='',
                  log=1):
     noiseremover_type = SessionConfigReader.read_value(
         NoiseRemover.noiseremover_key)
     if noiseremover_type == NoiseRemover.noiseremover_custom:
         return NoiseRemoverCustom.remove_noise(data_frame,
                                                col_name=col_name,
                                                storage_level=storage_level,
                                                storage_name=storage_name,
                                                log=log)
     else:
         if log:
             SessionLogger.log(
                 'Tried to remove noise from documents. Specified Noise Remover not supported.',
                 log_type='error')
         return pd.DataFrame()
    def create_out_vectors(data_frame,
                           col_name=col_name_categories,
                           new_col_name=new_col_name_cat_vec,
                           storage_level=0,
                           storage_name=''):
        data_frame[new_col_name] = data_frame.apply(
            lambda x: ClassificationInterpreterCustom2.get_cat_vec(x[col_name]
                                                                   ),
            axis=1)

        log_text = 'Category vectors for classifier training have been created (' + str(
            len(data_frame.index)) + ' entries).'
        if storage_level >= 1 and storage_name != '':
            storage_name = storage_name + ClassificationInterpreterCustom2.ext_out_vecs
            Storage.store_pd_frame(data_frame, storage_name)
            log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + new_col_name + '\').'
        SessionLogger.log(log_text)

        return data_frame
 def evaluate_output(data_frame,
                     col_name_categories=col_name_categories,
                     col_name_outputs=col_name_result):
     classification_interpreter = SessionConfigReader.read_value(
         ClassificationInterpreter.classification_interpreter_key)
     if classification_interpreter == ClassificationInterpreter.classification_interpreter_custom1:
         return ClassificationInterpreterCustom1.evaluate_output(
             data_frame,
             col_name_categories=col_name_categories,
             col_name_outputs=col_name_outputs)
     elif classification_interpreter == ClassificationInterpreter.classification_interpreter_custom2:
         return ClassificationInterpreterCustom2.evaluate_output(
             data_frame,
             col_name_categories=col_name_categories,
             col_name_outputs=col_name_outputs)
     else:
         SessionLogger.log(
             'Tried to evaluate classification. Specified ClassificationInterpreter is not supported.',
             log_type='error')
         return 0
Exemplo n.º 27
0
    def run_config_tests(run_import=0, run_preprocessing=0, run_vectorization=0, config_ids=None, resume_at_idx=0):
        if config_ids is None:
            config_ids = SessionConfigBuilder.create_session_configs()
        n_configs = len(config_ids)
        idx = resume_at_idx
        while idx < len(config_ids):
            config_id = config_ids[idx]
            ConfigReader.set_session_config_id(config_id)

            SetupRunner.run_setup(run_import=run_import, run_preprocessing=run_preprocessing, run_vectorization=run_vectorization, run_classification=0)

            res = SetupRunner.run_classification_test()

            score = ClassificationInterpreter.evaluate_output(res)

            idx = idx + 1
            SessionLogger.log('Evaluated config # ' + str(idx) + ' / ' + str(n_configs) + ' . Score: ' + str(score))
        EvaluationHandler.sort()
        evaluations = EvaluationHandler.load_evaluations()
        return evaluations
Exemplo n.º 28
0
 def classify(data_frame,
              model_id=None,
              col_name=fv_col_name,
              new_col_name=class_out_col_name,
              storage_level=0,
              storage_name='',
              log=1):
     df = data_frame.copy()
     if model_id is None:
         model_id = ClassifierKerasNN.get_model_id()
     model = Storage.load_h5_model(model_id)
     df[new_col_name] = df.apply(
         lambda x: model.predict(np.asarray([x[col_name]]))[0], axis=1)
     log_text = 'Classified documents (' + str(len(df.index)) + ' entries).'
     if storage_level >= 1 and storage_name != '':
         storage_name = storage_name + ClassifierKerasNN.ext_classified
         Storage.store_pd_frame(df, storage_name)
         log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + new_col_name + '\').'
     if log:
         SessionLogger.log(log_text)
     return df
Exemplo n.º 29
0
 def classify(data_frame,
              model_id=None,
              col_name=fv_col_name,
              new_col_name=class_out_col_name,
              storage_level=0,
              storage_name='',
              log=1):
     classifier_type = SessionConfigReader.read_value(
         Classifier.classifier_key)
     if classifier_type == Classifier.classifier_keras_nn:
         return ClassifierKerasNN.classify(data_frame,
                                           model_id=model_id,
                                           col_name=col_name,
                                           new_col_name=new_col_name,
                                           storage_level=storage_level,
                                           storage_name=storage_name,
                                           log=log)
     else:
         if log:
             SessionLogger.log(
                 'Tried to classify data. Specified Classifier is not supported.',
                 log_type='error')
         return pd.DataFrame()
Exemplo n.º 30
0
    def train_model(data_frame,
                    model_id=None,
                    fv_col_name=fv_col_name,
                    cat_v_col_name=cat_v_col_name):

        # read config params
        config_keys = list()
        config_keys.append(ClassifierKerasNN.model_id_key)
        config_keys.append(ClassifierKerasNN.epochs_key)
        config_keys.append(ClassifierKerasNN.batch_size_key)
        config = SessionConfigReader.read_values(config_keys)
        if model_id is None:
            model_id = config[0]
        epochs = config[1]
        batch_size = config[2]

        # extract vector lists from data frame
        doc_vectors = data_frame[fv_col_name].tolist()
        cat_vectors = data_frame[cat_v_col_name].tolist()

        # load the model
        model = Storage.load_h5_model(model_id)
        # train the model
        model.fit(np.asarray(doc_vectors),
                  np.asarray(cat_vectors),
                  epochs=epochs,
                  batch_size=batch_size)
        # store the model
        Storage.store_h5_model(model, model_id)

        # make log entry
        SessionLogger.log('Trained keras neural network \'' + model_id +
                          '\' with ' + str(len(data_frame.index)) +
                          ' new entries.')

        return model_id
Exemplo n.º 31
0
 def __init__(self, server):
     self.sessionLogger = SessionLogger()
     self.server = server
     self.openSessions = {}
Exemplo n.º 32
0
class SessionAdmin:
    def __init__(self, server):
        self.sessionLogger = SessionLogger()
        self.server = server
        self.openSessions = {}
    
    def closeAll(self, error):
        self.sessionLogger.closeAllSessions(error)

    def newPostProcessSession(self, tutorId):
        session = PostProcessSession(self.server, self.sessionLogger, tutorId, POST_PROCESS)
        self.openSessions[tutorId] = session
        session.newSession()

    def newOfflineSession(self, tutorId, pupilId):
        session = OfflineQuestionSession(self.server, self.sessionLogger, tutorId, OFFLINE_QUESTION)
        self.openSessions[tutorId] = session
        session.newSession(OFFLINE_QUESTION, pupilId)
    
    def newSession(self, roomId, kind, status, pupilId="NULL"):
        if kind == EXTRA_IACLASS:
            session = ExtraSession(self.server, self.sessionLogger, roomId, kind)
        else:
            session = IAPASession(self.server, self.sessionLogger, roomId, kind)
        self.openSessions[roomId] = session
        session.newSession(status, pupilId)
        
    def closeSession(self, roomId):
        session = self.openSessions[roomId]
        session.closeSession()
        del(self.openSessions[roomId])

    def hasOpenSession(self, roomId):
        return (roomId in self.openSessions.keys())

    def tutorDecide(self, roomId, status=DECIDING, pupilId="NULL"):
        session = self.openSessions[roomId]
        session.tutorDecide(pupilId)

    def tutorEnter(self, roomId, status, pupilId="NULL"):
        session = self.openSessions[roomId]
        session.tutorEnter(status, pupilId)

    def tutorReject(self, roomId, status, pupilId="NULL"):
        session = self.openSessions[roomId]
        session.tutorReject(status, pupilId)

    def tutorEnd(self, roomId, status, pupilId="NULL"):
        session = self.openSessions[roomId]
        if session.tutorEnd(status, pupilId):
            del(self.openSessions[roomId])

    def tutorQuit(self, roomId):
        session = self.openSessions[roomId]
        if session.tutorQuit():
            del(self.openSessions[roomId])

    def pupilEnter(self, roomId, pupilId):
        session = self.openSessions[roomId]
        session.pupilEnter(pupilId)

    def pupilEnd(self, roomId, status, pupilId="NULL"):
        session = self.openSessions[roomId]
        if session.pupilEnd(status, pupilId):
            del(self.openSessions[roomId])

    def changeSessionKind(self, roomId, kind, pupilId=None):
        self.closeSession(roomId)
        if pupilId != None:
            self.newSession(roomId, kind, IACLASS, pupilId)
        else:
            self.newSession(roomId, kind, WAITING)

    def sessionKey(self, roomId):
	print self.openSessions
        try:
		session = self.openSessions[roomId]
		return session.sessionKey()
	except KeyError:
		return None