示例#1
0
def load_best_lstm_mult_label_multi_class(exp, name_model, we_file_name):
		exp.pp_data.vocabulary_size = 5000
		
		exp.pp_data.embedding_size = 300
		exp.pp_data.max_posts = 1750
		exp.pp_data.max_terms_by_post = 300
		exp.pp_data.binary_classifier = True
		exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT
		exp.pp_data.remove_stopwords = False
		exp.pp_data.delete_low_tfid = False
		exp.pp_data.min_df = 0
		exp.pp_data.min_tf = 0
		exp.pp_data.random_posts = False
		exp.pp_data.random_users = False
		exp.pp_data.tokenizing_type = 'WE'
		exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL
		
		exp.use_custom_metrics = False
		exp.use_valid_set_for_train = True
		exp.valid_split_from_train_set = 0.0
		exp.imbalanced_classes = False
		
		lstm = ModelClass(1)
		lstm.loss_function = 'binary_crossentropy'
		lstm.optmizer_function = 'adam'
		lstm.epochs = 15
		lstm.batch_size = 32
		lstm.patience_train = 10
		lstm.use_embedding_pre_train = exp.pp_data.use_embedding
		lstm.embed_trainable = (lstm.use_embedding_pre_train == (dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC))
		
		neuronios_by_layer = [16]
		epochs = [32]
		batch_sizes = [40]
		dropouts = [0.2]
		
		exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL
		np.random.seed(dn.SEED)
		time_ini_rep = datetime.datetime.now()
		x_test, y_test = exp.pp_data.load_dataset_generic(
				"/home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_multi_label_test_test_528.df",
				['control', 'anxiety', 'depression'])
		exp.set_period_time_end(time_ini_rep, 'Load data')
		
		for neuronios in neuronios_by_layer:
				for batch_size in batch_sizes:
						for epoch in epochs:
								for dropout in dropouts:
										lstm.epochs = epoch
										lstm.batch_size = batch_size
										lstm.patience_train = epoch / 2
										exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + \
																					str(batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' + \
																					we_file_name
										
										lstm.model = exp.load_model(dn.PATH_PROJECT + exp.experiment_name + '.h5')
										exp.save_geral_configs()
										exp.save_summary_model(lstm.model)
										exp.predict_samples(lstm, x_test, y_test)
示例#2
0
		exp.pp_data.max_posts = 1750
		exp.pp_data.max_terms_by_post = 300
		exp.pp_data.binary_classifier = True
		exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT
		exp.pp_data.remove_stopwords = False
		exp.pp_data.delete_low_tfid = False
		exp.pp_data.min_df = 0
		exp.pp_data.min_tf = 0
		exp.pp_data.random_posts = False
		exp.pp_data.random_users = False
		exp.pp_data.tokenizing_type = 'WE'
		exp.pp_data.use_embedding = dn.UseEmbedding.RAND
		exp.pp_data.embedding_type = dn.EmbeddingType.NONE
		
		lstm = ModelClass(1)
		lstm.loss_function = 'binary_crossentropy'
		lstm.optmizer_function = 'adam'
		lstm.epochs = 10
		lstm.batch_size = 32
		lstm.patience_train = 4
		lstm.use_embedding_pre_train = exp.pp_data.use_embedding
		lstm.embed_trainable = True
		
		lstm.model = Sequential()
		lstm.model.add(Embedding(exp.pp_data.vocabulary_size, exp.pp_data.embedding_size, trainable=lstm.embed_trainable))
		lstm.model.add(LSTM(64, activation='tanh', dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
		lstm.model.add(LSTM(32, activation='tanh', dropout=0.2, recurrent_dropout=0.2))
		lstm.model.add(Dense(1, activation='sigmoid'))
		
		time_ini_exp = datetime.datetime.now()
		# exp.k_fold_cross_validation(lstm)
示例#3
0
def generate_model(exp, name_model, kernel_name, set_params, function):
    exp.pp_data.vocabulary_size = 5000

    exp.pp_data.embedding_size = 300
    exp.pp_data.max_posts = 1750
    exp.pp_data.max_terms_by_post = 300
    exp.pp_data.binary_classifier = True
    exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT
    exp.pp_data.remove_stopwords = False
    exp.pp_data.delete_low_tfid = False
    exp.pp_data.min_df = 0
    exp.pp_data.min_tf = 0
    exp.pp_data.random_posts = False
    exp.pp_data.random_users = False
    exp.pp_data.tokenizing_type = 'WE'
    exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL

    exp.use_custom_metrics = False
    exp.use_valid_set_for_train = True
    exp.valid_split_from_train_set = 0.0
    exp.imbalanced_classes = False

    cnn_lstm = ModelClass(1)
    cnn_lstm.loss_function = 'binary_crossentropy'
    cnn_lstm.optmizer_function = 'adadelta'
    cnn_lstm.epochs = 15
    cnn_lstm.batch_size = 32
    cnn_lstm.patience_train = 10

    filters_by_layer = set_params['filters_by_layer']
    neuronios_by_lstm_layer = set_params['neuronios_by_lstm_layer']
    dropouts = set_params['dropouts']
    dropouts_lstm = set_params['dropouts_lstm']
    kernels_size = set_params['kernels_size']
    epochs = set_params['epochs']
    batch_sizes = set_params['batch_sizes']

    np.random.seed(dn.SEED)

    time_ini_rep = datetime.datetime.now()

    for embedding_type in set_params['embedding_types']:
        for embedding_custom_file in set_params['embedding_custom_files']:
            for use_embedding in set_params['use_embeddings']:
                exp.pp_data.embedding_type = embedding_type
                exp.pp_data.word_embedding_custom_file = embedding_custom_file
                exp.pp_data.use_embedding = use_embedding
                exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL

                exp.set_period_time_end(time_ini_rep, 'Load data')
                x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data(
                )

                cnn_lstm.use_embedding_pre_train = exp.pp_data.use_embedding
                cnn_lstm.embed_trainable = (
                    cnn_lstm.use_embedding_pre_train == (
                        dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC))

                emb_name = function

                if embedding_custom_file != '':
                    emb_name = exp.pp_data.word_embedding_custom_file.split(
                        '.')[0]

                we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + \
                        str(exp.pp_data.use_embedding.value) + '_EF_' + emb_name + kernel_name

                for filter in filters_by_layer:
                    for kernel_size in kernels_size:
                        for batch_size in batch_sizes:
                            for epoch in epochs:
                                for dropout in dropouts:
                                    for dropout_lstm in dropouts_lstm:
                                        for neuronios in neuronios_by_lstm_layer:
                                            cnn_lstm.epochs = epoch
                                            cnn_lstm.batch_size = batch_size
                                            cnn_lstm.patience_train = epoch / 2
                                            exp.experiment_name = name_model + '_cnn_lstm' + '_F' + str(filter) + '_K' +\
                                                       str(kernel_size) + '_P' + 'None' + '_B' + str(batch_size) +\
                                                       '_E' + str(epoch) + '_D' + str(dropout) + '_HLN' + str(filter) +\
                                                       '_LSTM_N' + str(neuronios) + '_D' + str(dropout_lstm) + '_' + we_file_name

                                            cnn_lstm.model = Sequential()
                                            cnn_lstm.model.add(
                                                Embedding(
                                                    exp.pp_data.
                                                    vocabulary_size,
                                                    exp.pp_data.embedding_size,
                                                    trainable=cnn_lstm.
                                                    embed_trainable,
                                                    name='emb_' + name_model))
                                            cnn_lstm.model.add(
                                                Dropout(dropout,
                                                        name='dropout_1_' +
                                                        name_model))
                                            cnn_lstm.model.add(
                                                Conv1D(
                                                    filters=filter,
                                                    kernel_size=kernel_size,
                                                    kernel_initializer=
                                                    'glorot_uniform',
                                                    # kernel_regularizer=regularizers.l2(0.03),
                                                    padding='valid',
                                                    activation='relu',
                                                    name='conv_1_' +
                                                    name_model))
                                            cnn_lstm.model.add(
                                                MaxPooling1D(
                                                    name='max_pool_1_' +
                                                    name_model))
                                            cnn_lstm.model.add(
                                                LSTM(neuronios,
                                                     activation='tanh',
                                                     dropout=dropout_lstm,
                                                     recurrent_dropout=
                                                     dropout_lstm,
                                                     return_sequences=True,
                                                     name='lstm_1_' +
                                                     name_model))
                                            cnn_lstm.model.add(
                                                LSTM(neuronios,
                                                     activation='tanh',
                                                     dropout=dropout_lstm,
                                                     recurrent_dropout=
                                                     dropout_lstm,
                                                     return_sequences=True,
                                                     name='lstm_2_' +
                                                     name_model))
                                            cnn_lstm.model.add(
                                                LSTM(neuronios,
                                                     activation='tanh',
                                                     dropout=dropout_lstm,
                                                     recurrent_dropout=
                                                     dropout_lstm,
                                                     name='lstm_3_' +
                                                     name_model))
                                            cnn_lstm.model.add(
                                                Dense(3,
                                                      activation='sigmoid',
                                                      name='dense_1_' +
                                                      name_model))

                                            time_ini_exp = datetime.datetime.now(
                                            )
                                            exp.generate_model_hypeparams(
                                                cnn_lstm, x_train, y_train,
                                                x_valid, y_valid,
                                                embedding_matrix)
                                            exp.set_period_time_end(
                                                time_ini_exp,
                                                'Total experiment')

                del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix

    # Test
    np.random.seed(dn.SEED)
    time_ini_rep = datetime.datetime.now()

    for embedding_type in set_params['embedding_types']:
        for embedding_custom_file in set_params['embedding_custom_files']:
            for use_embedding in set_params['use_embeddings']:
                exp.pp_data.embedding_type = embedding_type
                exp.pp_data.word_embedding_custom_file = embedding_custom_file
                exp.pp_data.use_embedding = use_embedding
                exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL

                exp.set_period_time_end(time_ini_rep, 'Load data')
                x_test, y_test = exp.pp_data.load_data()

                cnn_lstm.use_embedding_pre_train = exp.pp_data.use_embedding
                cnn_lstm.embed_trainable = (
                    cnn_lstm.use_embedding_pre_train == (
                        dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC))

                emb_name = function

                if embedding_custom_file != '':
                    emb_name = exp.pp_data.word_embedding_custom_file.split(
                        '.')[0]

                we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + \
                        str(exp.pp_data.use_embedding.value) + '_EF_' + emb_name + kernel_name

                for filter in filters_by_layer:
                    for kernel_size in kernels_size:
                        for batch_size in batch_sizes:
                            for epoch in epochs:
                                for dropout in dropouts:
                                    for dropout_lstm in dropouts_lstm:
                                        for neuronios in neuronios_by_lstm_layer:
                                            cnn_lstm.epochs = epoch
                                            cnn_lstm.batch_size = batch_size
                                            cnn_lstm.patience_train = epoch
                                            exp.experiment_name = name_model + '_cnn_lstm' + '_F' + str(filter) + '_K' +\
                                                       str(kernel_size) + '_P' + 'None' + '_B' + str(batch_size) +\
                                                       '_E' + str(epoch) + '_D' + str(dropout) + '_HLN' + str(filter) +\
                                                       '_LSTM_N' + str(neuronios) + '_D' + str(dropout_lstm) + '_' + we_file_name

                                            cnn_lstm.model = exp.load_model(
                                                dn.PATH_PROJECT +
                                                exp.experiment_name + '.h5')
                                            exp.save_geral_configs(
                                                'Experiment Specific Configuration: '
                                                + exp.experiment_name)
                                            exp.save_summary_model(
                                                cnn_lstm.model)
                                            exp.predict_samples(
                                                cnn_lstm, x_test, y_test)

                del x_test, y_test

    del cnn_lstm, exp
def generate_model(exp, name_model, kernel_function, set_params, function):
    exp.pp_data.vocabulary_size = 5000

    exp.pp_data.embedding_size = 300
    exp.pp_data.max_posts = 1750
    exp.pp_data.max_terms_by_post = 300
    exp.pp_data.binary_classifier = True
    exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT
    exp.pp_data.remove_stopwords = False
    exp.pp_data.delete_low_tfid = False
    exp.pp_data.min_df = 0
    exp.pp_data.min_tf = 0
    exp.pp_data.random_posts = False
    exp.pp_data.random_users = False
    exp.pp_data.tokenizing_type = 'WE'
    exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL

    exp.use_custom_metrics = False
    exp.use_valid_set_for_train = True
    exp.valid_split_from_train_set = 0.0
    exp.imbalanced_classes = False

    lstm = ModelClass(1)
    lstm.loss_function = 'binary_crossentropy'

    optimizer_functions = set_params['optimizer_function']
    neuronios_by_layer = set_params['neuronios_by_layer']
    epochs = set_params['epochs']
    batch_sizes = set_params['batch_sizes']
    hidden_layers = set_params['hidden_layers']
    dropouts = [0.2]

    np.random.seed(dn.SEED)

    time_ini_rep = datetime.datetime.now()

    for embedding_type in set_params['embedding_types']:
        for embedding_custom_file in set_params['embedding_custom_files']:
            for use_embedding in set_params['use_embeddings']:
                exp.pp_data.embedding_type = embedding_type
                exp.pp_data.word_embedding_custom_file = embedding_custom_file
                exp.pp_data.use_embedding = use_embedding
                exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL

                exp.set_period_time_end(time_ini_rep, 'Load data')
                x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data(
                )

                lstm.use_embedding_pre_train = exp.pp_data.use_embedding
                lstm.embed_trainable = (lstm.use_embedding_pre_train == (
                    dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC))

                emb_name = function
                if embedding_custom_file != '':
                    emb_name = exp.pp_data.word_embedding_custom_file.split(
                        '.')[0]

                we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + \
                        str(exp.pp_data.use_embedding.value) + '_EF_' + emb_name + kernel_function

                for neuronios in neuronios_by_layer:
                    for batch_size in batch_sizes:
                        for epoch in epochs:
                            for dropout in dropouts:
                                for optmizer_function in optimizer_functions:
                                    for hidden_layer in hidden_layers:
                                        lstm.optmizer_function = dn.OPTIMIZER_FUNCTIONS[
                                            optmizer_function]
                                        lstm.epochs = epoch
                                        lstm.batch_size = batch_size
                                        lstm.patience_train = epoch / 2
                                        exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + \
                                                   str(batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_OF'+ \
                                                   lstm.optmizer_function + '_HL' + str(hidden_layer) + '_' + we_file_name

                                        lstm.model = Sequential()
                                        lstm.model.add(
                                            Embedding(
                                                exp.pp_data.vocabulary_size,
                                                exp.pp_data.embedding_size,
                                                trainable=lstm.embed_trainable,
                                                name='emb_' + name_model))

                                        for id_hl in range(hidden_layer):
                                            lstm.model.add(LSTM(neuronios,
                                                      activation='tanh', dropout=dropout, recurrent_dropout=dropout,
                                                      return_sequences=True, name='dense_' + str(id_hl) + '_' + \
                                                      name_model))

                                        lstm.model.add(
                                            LSTM(neuronios,
                                                 activation='tanh',
                                                 dropout=dropout,
                                                 recurrent_dropout=dropout,
                                                 name='dense_' +
                                                 str(id_hl + 1) + '_' +
                                                 name_model))
                                        lstm.model.add(
                                            Dense(3,
                                                  activation='sigmoid',
                                                  name='dense_' +
                                                  str(id_hl + 2) + '_' +
                                                  name_model))

                                        time_ini_exp = datetime.datetime.now()
                                        exp.generate_model_hypeparams(
                                            lstm, x_train, y_train, x_valid,
                                            y_valid, embedding_matrix)
                                        exp.set_period_time_end(
                                            time_ini_exp, 'Total experiment')

                del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix

    # Test
    np.random.seed(dn.SEED)
    time_ini_rep = datetime.datetime.now()

    for embedding_type in set_params['embedding_types']:
        for embedding_custom_file in set_params['embedding_custom_files']:
            for use_embedding in set_params['use_embeddings']:
                exp.pp_data.embedding_type = embedding_type
                exp.pp_data.word_embedding_custom_file = embedding_custom_file
                exp.pp_data.use_embedding = use_embedding
                exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL

                exp.set_period_time_end(time_ini_rep, 'Load data')
                x_test, y_test = exp.pp_data.load_data()

                lstm.use_embedding_pre_train = exp.pp_data.use_embedding
                lstm.embed_trainable = (lstm.use_embedding_pre_train == (
                    dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC))

                emb_name = function
                if embedding_custom_file != '':
                    emb_name = exp.pp_data.word_embedding_custom_file.split(
                        '.')[0]

                we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + \
                        str(exp.pp_data.use_embedding.value) + '_EF_' + emb_name + kernel_function

                for neuronios in neuronios_by_layer:
                    for batch_size in batch_sizes:
                        for epoch in epochs:
                            for dropout in dropouts:
                                for optmizer_function in optimizer_functions:
                                    for hidden_layer in hidden_layers:
                                        lstm.optmizer_function = dn.OPTIMIZER_FUNCTIONS[
                                            optmizer_function]
                                        lstm.epochs = epoch
                                        lstm.batch_size = batch_size
                                        lstm.patience_train = epoch / 2
                                        exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + \
                                                   str(batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_OF' + \
                                                   lstm.optmizer_function + '_HL' + str(hidden_layer) + '_' + we_file_name

                                        lstm.model = exp.load_model(
                                            dn.PATH_PROJECT +
                                            exp.experiment_name + '.h5')
                                        exp.save_geral_configs(
                                            'Experiment Specific Configuration: '
                                            + exp.experiment_name)
                                        exp.save_summary_model(lstm.model)
                                        exp.predict_samples(
                                            lstm, x_test, y_test)

                del x_test, y_test

    del lstm, exp
示例#5
0
def generate_model_5(exp, name_model):
    exp.pp_data.vocabulary_size = 5000

    exp.pp_data.embedding_size = 300
    exp.pp_data.max_posts = 1750
    exp.pp_data.max_terms_by_post = 300
    exp.pp_data.binary_classifier = True
    exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT
    exp.pp_data.remove_stopwords = False
    exp.pp_data.delete_low_tfid = False
    exp.pp_data.min_df = 0
    exp.pp_data.min_tf = 0
    exp.pp_data.random_posts = False
    exp.pp_data.random_users = False
    exp.pp_data.tokenizing_type = 'WE'
    exp.pp_data.word_embedding_custom_file = ''
    exp.pp_data.embedding_type = dn.EmbeddingType.WORD2VEC_CUSTOM
    exp.pp_data.use_embedding = dn.UseEmbedding.NON_STATIC
    exp.pp_data.word_embedding_custom_file = 'SMHD-CBOW-AllUsers-300.bin'
    exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL

    we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + str(exp.pp_data.use_embedding.value) + \
            '_EF_' + exp.pp_data.word_embedding_custom_file

    lstm = ModelClass(1)
    lstm.loss_function = 'binary_crossentropy'
    lstm.optmizer_function = 'adam'
    lstm.epochs = 15
    lstm.batch_size = 32
    lstm.patience_train = 10
    lstm.use_embedding_pre_train = exp.pp_data.use_embedding
    lstm.embed_trainable = (lstm.use_embedding_pre_train == (
        dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC))

    neuronios_by_layer = [16]
    epochs = [32]
    batch_sizes = [40]
    dropouts = [0.2]

    np.random.seed(dn.SEED)

    time_ini_rep = datetime.datetime.now()
    x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data(
    )
    exp.set_period_time_end(time_ini_rep, 'Load data')

    for neuronios in neuronios_by_layer:
        for batch_size in batch_sizes:
            for epoch in epochs:
                for dropout in dropouts:
                    lstm.epochs = epoch
                    lstm.batch_size = batch_size
                    lstm.patience_train = epoch / 2
                    exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(
                        neuronios) + '_B' + str(batch_size) + '_E' + str(
                            epoch) + '_D' + str(dropout) + '_' + we_file_name

                    lstm.model = Sequential()
                    lstm.model.add(
                        Embedding(exp.pp_data.vocabulary_size,
                                  exp.pp_data.embedding_size,
                                  trainable=lstm.embed_trainable,
                                  name='emb_' + name_model))
                    lstm.model.add(
                        LSTM(neuronios,
                             activation='tanh',
                             dropout=dropout,
                             recurrent_dropout=dropout,
                             return_sequences=True,
                             name='dense_1_' + name_model))
                    lstm.model.add(
                        LSTM(neuronios,
                             activation='tanh',
                             dropout=dropout,
                             recurrent_dropout=dropout,
                             return_sequences=True,
                             name='dense_2_' + name_model))
                    lstm.model.add(
                        LSTM(neuronios,
                             activation='tanh',
                             dropout=dropout,
                             recurrent_dropout=dropout,
                             name='dense_3_' + name_model))
                    lstm.model.add(
                        Dense(1,
                              activation='sigmoid',
                              name='dense_4_' + name_model))

                    time_ini_exp = datetime.datetime.now()
                    exp.generate_model_hypeparams(lstm, x_train, y_train,
                                                  x_valid, y_valid,
                                                  embedding_matrix)
                    exp.set_period_time_end(time_ini_exp, 'Total experiment')

    del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix

    # Test
    exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL
    np.random.seed(dn.SEED)
    time_ini_rep = datetime.datetime.now()
    x_test, y_test = exp.pp_data.load_data()
    exp.save_data_format_train(x_test, name_model + '_x')
    exp.save_data_format_train(y_test, name_model + '_y')
    exp.set_period_time_end(time_ini_rep, 'Load data')

    for neuronios in neuronios_by_layer:
        for batch_size in batch_sizes:
            for epoch in epochs:
                for dropout in dropouts:
                    lstm.epochs = epoch
                    lstm.batch_size = batch_size
                    lstm.patience_train = epoch / 2
                    exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(
                        neuronios) + '_B' + str(batch_size) + '_E' + str(
                            epoch) + '_D' + str(dropout) + '_' + we_file_name

                    lstm.model = exp.load_model(dn.PATH_PROJECT +
                                                exp.experiment_name + '.h5')
                    exp.save_geral_configs()
                    exp.save_summary_model(lstm.model)
                    exp.predict_samples(lstm, x_test, y_test)

    del x_test, y_test, lstm, exp
def generate_model_ml_le(exp, name_model, set_params):
		exp.pp_data.vocabulary_size = 5000
		
		exp.pp_data.embedding_size = 300
		exp.pp_data.max_posts = 1750
		exp.pp_data.max_terms_by_post = 300
		exp.pp_data.binary_classifier = True
		exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT
		exp.pp_data.remove_stopwords = False
		exp.pp_data.delete_low_tfid = False
		exp.pp_data.min_df = 0
		exp.pp_data.min_tf = 0
		exp.pp_data.random_posts = False  # False = chronological order
		exp.pp_data.random_users = False
		exp.pp_data.tokenizing_type = 'WE'
		exp.pp_data.word_embedding_custom_file = ''
		exp.pp_data.embedding_type = dn.EmbeddingType.GLOVE_6B
		exp.pp_data.use_embedding = dn.UseEmbedding.STATIC
		exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL
		exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL
		
		exp.use_custom_metrics = False
		exp.use_valid_set_for_train = True
		exp.valid_split_from_train_set = 0.0
		exp.imbalanced_classes = False
		
		we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + str(exp.pp_data.use_embedding.value) + \
									 '_EF_' + 'glove6B300d'
		
		## Load model according configuration
		lstm = ModelClass(1)
		lstm.loss_function = 'binary_crossentropy'
		lstm.optmizer_function = 'adam'
		lstm.epochs = 15
		lstm.batch_size = 32
		lstm.patience_train = 10
		lstm.use_embedding_pre_train = exp.pp_data.use_embedding
		lstm.embed_trainable = (lstm.use_embedding_pre_train == (dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC))
		
		# neuronios_by_layer = [16]
		# epochs = [32]
		# batch_sizes = [40]
		# dropouts = [0.2]
		
		neuronios_by_layer = set_params['neuronios_by_layer']
		epochs = set_params['epochs']
		batch_sizes = set_params['batch_sizes']
		dropouts = set_params['dropouts']

		np.random.seed(dn.SEED)
		
		time_ini_rep = datetime.datetime.now()
		x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data()
		exp.set_period_time_end(time_ini_rep, 'Load data')
		
		for neuronios in neuronios_by_layer:
				for batch_size in batch_sizes:
						for epoch in epochs:
								for dropout in dropouts:
										lstm.epochs = epoch
										lstm.batch_size = batch_size
										lstm.patience_train = epoch / 2
										exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + str(
												batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' + we_file_name
										
										lstm.model = Sequential()
										lstm.model.add(Embedding(exp.pp_data.vocabulary_size, exp.pp_data.embedding_size,
																						 trainable=lstm.embed_trainable, name='emb_' + name_model))
										lstm.model.add(LSTM(neuronios,
																				kernel_initializer='lecun_uniform', recurrent_initializer='orthogonal',
																				activation='tanh', dropout=dropout, recurrent_dropout=dropout,
																				return_sequences=True, name='dense_1_' + name_model))
										lstm.model.add(LSTM(neuronios,
																				kernel_initializer='lecun_uniform', recurrent_initializer='orthogonal',
																				activation='tanh', dropout=dropout, recurrent_dropout=dropout,
																				return_sequences=True, name='dense_2_' + name_model))
										lstm.model.add(LSTM(neuronios,
																				kernel_initializer='lecun_uniform', recurrent_initializer='orthogonal',
																				activation='tanh', dropout=dropout, recurrent_dropout=dropout,
																				name='dense_3_' + name_model))
										lstm.model.add(Dense(3,
																				 kernel_initializer='lecun_uniform',
																				 activation='sigmoid',
																				 name='dense_4_' + name_model))
										
										time_ini_exp = datetime.datetime.now()
										exp.generate_model_hypeparams(lstm, x_train, y_train, x_valid, y_valid, embedding_matrix)
										exp.set_period_time_end(time_ini_exp, 'Total experiment')
		
		del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix
		
		# Test
		exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL
		np.random.seed(dn.SEED)
		time_ini_rep = datetime.datetime.now()
		x_test, y_test = exp.pp_data.load_data()
		exp.set_period_time_end(time_ini_rep, 'Load data')
		
		for neuronios in neuronios_by_layer:
				for batch_size in batch_sizes:
						for epoch in epochs:
								for dropout in dropouts:
										lstm.epochs = epoch
										lstm.batch_size = batch_size
										lstm.patience_train = epoch / 2
										exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + str(
												batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' + we_file_name
										
										lstm.model = exp.load_model(dn.PATH_PROJECT + exp.experiment_name + '.h5')
										exp.save_geral_configs()
										exp.save_summary_model(lstm.model)
										exp.predict_samples(lstm, x_test, y_test)
		
		del x_test, y_test, lstm, exp
def load_submodel_anx(exp, name_model, kernel_name, set_params):
		exp.pp_data.vocabulary_size = 5000
		
		exp.pp_data.embedding_size = 300
		exp.pp_data.max_posts = 1750
		exp.pp_data.max_terms_by_post = 300
		exp.pp_data.binary_classifier = True
		exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT
		exp.pp_data.remove_stopwords = False
		exp.pp_data.delete_low_tfid = False
		exp.pp_data.min_df = 0
		exp.pp_data.min_tf = 0
		exp.pp_data.random_posts = False
		exp.pp_data.random_users = False
		exp.pp_data.tokenizing_type = 'WE'
		exp.pp_data.embedding_type = dn.EmbeddingType.GLOVE_CUSTOM
		exp.pp_data.use_embedding = dn.UseEmbedding.STATIC
		exp.pp_data.word_embedding_custom_file = 'SMHD-glove-A-D-ADUsers-300.pkl'
		exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL
		exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL

		we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + str(exp.pp_data.use_embedding.value) + \
									 '_EF_' + exp.pp_data.word_embedding_custom_file.split('.')[0] + kernel_name
		
		lstm = ModelClass(1)
		lstm.loss_function = 'binary_crossentropy'
		lstm.optmizer_function = 'adam'
		lstm.epochs = 15
		lstm.batch_size = 32
		lstm.patience_train = 10
		lstm.use_embedding_pre_train = exp.pp_data.use_embedding
		lstm.embed_trainable = (lstm.use_embedding_pre_train == (dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC))
		
		# neuronios_by_layer = [16]
		# epochs = [96]
		# batch_sizes = [20]
		# dropouts = [0.1]

		neuronios_by_layer = set_params['neuronios_by_layer']
		epochs = set_params['epochs']
		batch_sizes = set_params['batch_sizes']
		dropouts = set_params['dropouts']

		np.random.seed(dn.SEED)
		
		time_ini_rep = datetime.datetime.now()
		x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data()
		exp.set_period_time_end(time_ini_rep, 'Load data')
		
		for neuronios in neuronios_by_layer:
				for batch_size in batch_sizes:
						for epoch in epochs:
								for dropout in dropouts:
										lstm.epochs = epoch
										lstm.batch_size = batch_size
										lstm.patience_train = epoch / 2
										exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + str(
												batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' + we_file_name
										
										lstm.model = Sequential()
										# Embedding(tamanho_vocabulario, embedding_size, input_length=max_len), onde max_len
										# representa o corte nos textos após max_len tokens.
										lstm.model.add(Embedding(exp.pp_data.vocabulary_size, exp.pp_data.embedding_size,
																						 trainable=lstm.embed_trainable, name='emb_' + name_model))
										lstm.model.add(LSTM(neuronios,
																				activation='tanh', dropout=dropout, recurrent_dropout=dropout,
																				return_sequences=True, name='dense_1_' + name_model))
										lstm.model.add(LSTM(neuronios,
																				activation='tanh', dropout=dropout, recurrent_dropout=dropout,
																				return_sequences=True, name='dense_2_' + name_model))
										lstm.model.add(LSTM(neuronios,
																				activation='tanh', dropout=dropout, recurrent_dropout=dropout,
																				name='dense_3_' + name_model))
										lstm.model.add(Dense(3,
																				 activation='sigmoid',
																				 name='dense_4_' + name_model))
										
										time_ini_exp = datetime.datetime.now()
										exp.generate_model_hypeparams(lstm, x_train, y_train, x_valid, y_valid, embedding_matrix)
										exp.set_period_time_end(time_ini_exp, 'Total experiment')
		
		del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix
		
		# Test
		exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL
		np.random.seed(dn.SEED)
		time_ini_rep = datetime.datetime.now()
		x_test, y_test = exp.pp_data.load_data()
		exp.set_period_time_end(time_ini_rep, 'Load data')
		
		for neuronios in neuronios_by_layer:
				for batch_size in batch_sizes:
						for epoch in epochs:
								for dropout in dropouts:
										lstm.epochs = epoch
										lstm.batch_size = batch_size
										lstm.patience_train = epoch / 2
										exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + str(
												batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' + we_file_name
										
										lstm.model = exp.load_model(dn.PATH_PROJECT + exp.experiment_name + '.h5')
										exp.save_geral_configs()
										exp.save_summary_model(lstm.model)
										exp.predict_samples(lstm, x_test, y_test)
		
		del x_test, y_test, lstm, exp
示例#8
0
def generate_model(exp, name_model, kernel_function, set_params):
		# Configura pre-processamento dos dados para importação
		exp.pp_data.vocabulary_size = 5000
		
		exp.pp_data.embedding_size = 300  # 300 obrigatório se for usar word_embedding word2vec google neg300
		exp.pp_data.max_posts = 1750
		exp.pp_data.max_terms_by_post = 300
		exp.pp_data.binary_classifier = True
		exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT
		exp.pp_data.remove_stopwords = False
		exp.pp_data.delete_low_tfid = False
		exp.pp_data.min_df = 0
		exp.pp_data.min_tf = 0
		exp.pp_data.random_posts = False  # False = ordem cronológica
		exp.pp_data.random_users = False  # Não usada, as amostras são sempre random no validation k-fold
		exp.pp_data.tokenizing_type = 'WE'
		exp.pp_data.word_embedding_custom_file = ''
		exp.pp_data.embedding_type = dn.EmbeddingType.GLOVE_6B
		exp.pp_data.use_embedding = dn.UseEmbedding.STATIC
		exp.pp_data.word_embedding_custom_file = ''
		exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL
		
		exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL
		
		exp.use_custom_metrics = False
		exp.use_valid_set_for_train = True
		exp.valid_split_from_train_set = 0.0
		exp.imbalanced_classes = False
		
		we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + str(exp.pp_data.use_embedding.value) + \
									 '_EF_' + 'glove6B300d' + kernel_function
		
		## Gera dados conforme configuração
		cnn_lstm = ModelClass(1)
		cnn_lstm.loss_function = 'binary_crossentropy'
		cnn_lstm.optmizer_function = 'adadelta'
		cnn_lstm.epochs = 15
		cnn_lstm.batch_size = 32
		cnn_lstm.patience_train = 10
		cnn_lstm.use_embedding_pre_train = exp.pp_data.use_embedding
		cnn_lstm.embed_trainable = (cnn_lstm.use_embedding_pre_train == (dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC))
		
		# set_params is empty
		if not bool(set_params):
				filters_by_layer = [32, 64, 128]
				neuronios_by_lstm_layer = [64, 128, 256]
				dropouts = [0.2, 0.5]
				dropouts_lstm = [0.2, 0.5]
		else:
				filters_by_layer = set_params['filters_by_layer']
				neuronios_by_lstm_layer = set_params['neuronios_by_lstm_layer']
				dropouts = set_params['dropouts']
				dropouts_lstm = set_params['dropouts_lstm']
		
		kernels_size = [5]
		epochs = [10]
		batch_sizes = [20]
		# Expected input batch shape: (batch_size, timesteps, data_dim)
		# Note that we have to provide the full batch_input_shape since the network is stateful.
		# the sample of index i in batch k is the follow-up for the sample i in batch k-1.
		np.random.seed(dn.SEED)
		
		time_ini_rep = datetime.datetime.now()
		x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data()
		exp.set_period_time_end(time_ini_rep, 'Load data')
		
		for filter in filters_by_layer:
				for kernel_size in kernels_size:
						for batch_size in batch_sizes:
								for epoch in epochs:
										for dropout in dropouts:
												for dropout_lstm in dropouts_lstm:
														for neuronios in neuronios_by_lstm_layer:
																cnn_lstm.epochs = epoch
																cnn_lstm.batch_size = batch_size
																cnn_lstm.patience_train = epoch
																exp.experiment_name = name_model + '_cnn_lstm' + '_F' + str(filter) + '_K' + \
																											str(kernel_size) + '_P' + 'None' + '_B' + str(batch_size) + \
																											'_E' + str(epoch) + '_D' + str(dropout) + '_HLN' + \
																											str(filter) + '_LSTM_N' + str(neuronios) + \
																											'_D'+ str(dropout_lstm) +	'_' + we_file_name
																
																cnn_lstm.model = Sequential()
																cnn_lstm.model.add(Embedding(exp.pp_data.vocabulary_size, exp.pp_data.embedding_size,
																												trainable=cnn_lstm.embed_trainable, name='emb_' + name_model))
																cnn_lstm.model.add(Dropout(dropout, name='dropout_1_' + name_model))
																cnn_lstm.model.add(Conv1D(filters=filter, kernel_size=kernel_size,
																										 kernel_initializer='glorot_uniform',
																										 # kernel_regularizer=regularizers.l2(0.03),
																										 padding='valid', activation='relu',
																										 name='conv_1_' + name_model))
																cnn_lstm.model.add(MaxPooling1D(name='max_pool_1_' + name_model))
																cnn_lstm.model.add(LSTM(neuronios,
																												activation='tanh', dropout=dropout_lstm,
																												recurrent_dropout=dropout_lstm,
																												return_sequences=True, name='lstm_1_' + name_model))
																cnn_lstm.model.add(LSTM(neuronios,
																												activation='tanh', dropout=dropout_lstm,
																												recurrent_dropout=dropout_lstm,
																												return_sequences=True, name='lstm_2_' + name_model))
																cnn_lstm.model.add(LSTM(neuronios,
																												activation='tanh', dropout=dropout_lstm,
																												recurrent_dropout=dropout_lstm,
																												name='lstm_3_' + name_model))
																cnn_lstm.model.add(Dense(3, activation='sigmoid', name='dense_1_' + name_model))
										
																time_ini_exp = datetime.datetime.now()
																exp.generate_model_hypeparams(cnn_lstm, x_train, y_train, x_valid, y_valid, embedding_matrix)
																exp.set_period_time_end(time_ini_exp, 'Total experiment')
		
		del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix
		
		# Test
		exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL
		np.random.seed(dn.SEED)
		time_ini_rep = datetime.datetime.now()
		x_test, y_test = exp.pp_data.load_data()
		exp.set_period_time_end(time_ini_rep, 'Load data')
		
		for filter in filters_by_layer:
				for kernel_size in kernels_size:
						for batch_size in batch_sizes:
								for epoch in epochs:
										for dropout in dropouts:
												for dropout_lstm in dropouts_lstm:
														for neuronios in neuronios_by_lstm_layer:
																cnn_lstm.epochs = epoch
																cnn_lstm.batch_size = batch_size
																cnn_lstm.patience_train = epoch
																exp.experiment_name = name_model + '_cnn_lstm' + '_F' + str(filter) + '_K' + \
																											str(kernel_size) + '_P' + 'None' + '_B' + str(batch_size) + \
																											'_E' + str(epoch) + '_D' + str(dropout) + '_HLN' + \
																											str(filter) + '_LSTM_N' + str(neuronios) + \
																											'_D'+ str(dropout_lstm) +	'_' + we_file_name
										
																cnn_lstm.model = exp.load_model(dn.PATH_PROJECT + exp.experiment_name + '.h5')
																exp.save_geral_configs('Experiment Specific Configuration: ' + exp.experiment_name)
																exp.save_summary_model(cnn_lstm.model)
																exp.predict_samples(cnn_lstm, x_test, y_test)
		
		del x_test, y_test, cnn_lstm, exp