示例#1
0
def train():
    settings = Settings()

    batch_size = settings.get_training_parameters('batch_size')
    epochs = settings.get_training_parameters('epochs')

    model_builder = Model4Builder()

    model = model_builder()

    preprocessor = Preprocessor(model)

    preprocessor.load_data(['category', 'is_top_submission'])

    training_input = [preprocessor.training_data['category']]
    validation_input = [preprocessor.validation_data['category']]
    training_output = [preprocessor.training_data['is_top_submission']]
    validation_output = [preprocessor.validation_data['is_top_submission']]

    class_weights = calculate_class_weights(preprocessor.training_data['is_top_submission'],
                                            [ol.name for ol in model.output_layers])

    callbacks = CallbackBuilder(model, [CsvLogger, CsvPlotter, ConfigLogger, ModelSaver])()

    model.fit(training_input, training_output, batch_size=batch_size, epochs=epochs,
              callbacks=callbacks, validation_data=(validation_input, validation_output), class_weight=class_weights)
def train():
    settings = Settings()

    batch_size = settings.get_training_parameters('batch_size')
    epochs = settings.get_training_parameters('epochs')
    dictionary_size = settings.get_training_parameters('dictionary_size')
    max_headline_length = settings.get_training_parameters('max_headline_length')

    glove = Glove(dictionary_size)
    glove.load_embedding()

    model_builder = Model1Builder() \
        .set_input('glove', glove) \
        .set_parameter('max_headline_length', max_headline_length)

    model = model_builder()

    preprocessor = Preprocessor(model)
    preprocessor.set_encoder('glove', glove)
    preprocessor.set_parameter('max_headline_length', max_headline_length)

    preprocessor.load_data(['headline', 'is_top_submission'])

    training_input = [preprocessor.training_data['headline']]
    validation_input = [preprocessor.validation_data['headline']]
    training_output = [preprocessor.training_data['is_top_submission']]
    validation_output = [preprocessor.validation_data['is_top_submission']]

    class_weights = calculate_class_weights(preprocessor.training_data['is_top_submission'],
                                            [ol.name for ol in model.output_layers])

    callbacks = CallbackBuilder(model, [CsvLogger, CsvPlotter, ConfigLogger, ModelSaver])()

    model.fit(training_input, training_output, batch_size=batch_size, epochs=epochs,
              callbacks=callbacks, validation_data=(validation_input, validation_output), class_weight=class_weights)
示例#3
0
def train():
    settings = Settings()

    batch_size = settings.get_training_parameters('batch_size')
    epochs = settings.get_training_parameters('epochs')
    max_headline_length = settings.get_training_parameters(
        'max_headline_length')
    max_article_length = settings.get_training_parameters('max_article_length')

    headline_numeric_log = NumericLog(max_headline_length)
    article_numeric_log = NumericLog(max_article_length)

    model_builder = Model6Builder() \
        .set_input('headline_numeric_log', headline_numeric_log) \
        .set_input('article_numeric_log', article_numeric_log)

    model = model_builder()

    preprocessor = Preprocessor(model)
    preprocessor.set_encoder('headline_numeric_log', headline_numeric_log)
    preprocessor.set_encoder('article_numeric_log', article_numeric_log)

    preprocessor.load_data([
        'headline_log_representation', 'article_log_representation',
        'is_top_submission'
    ])
    training_input = [
        preprocessor.training_data['headline_log_representation'],
        preprocessor.training_data['article_log_representation']
    ]
    validation_input = [
        preprocessor.validation_data['headline_log_representation'],
        preprocessor.validation_data['article_log_representation']
    ]
    training_output = [preprocessor.training_data['is_top_submission']]
    validation_output = [preprocessor.validation_data['is_top_submission']]

    class_weights = calculate_class_weights(
        preprocessor.training_data['is_top_submission'],
        [ol.name for ol in model.output_layers])

    callbacks = CallbackBuilder(
        model, [CsvLogger, CsvPlotter, ConfigLogger, ModelSaver])()

    model.fit(training_input,
              training_output,
              batch_size=batch_size,
              epochs=epochs,
              callbacks=callbacks,
              validation_data=(validation_input, validation_output),
              class_weight=class_weights)
示例#4
0
    def __init__(self, model, log_path):
        super().__init__()
        settings = Settings()

        self.training_parameters = settings.get_training_parameters()
        self.network_parameters = settings.get_network_parameters()

        self.log_path = log_path
        self.filename = '{}/{}'.format(log_path, 'config.txt')

        model.summary(print_fn=self._handle_summary_print)
        self.model = model
def calculate_correlations():
    arg_parse = ArgumentParser()
    arg_parse.add_argument('--model_1', type=str)
    arg_parse.add_argument('--model_2', type=str)
    arg_parse.add_argument('--model_3', type=str)
    arg_parse.add_argument('--model_4', type=str)
    arg_parse.add_argument('--model_5', type=str)
    arg_parse.add_argument('--model_6', type=str)
    arg_parse.add_argument('--model_7', type=str)
    arguments = arg_parse.parse_args()

    settings = Settings()
    default_parameters = settings.get_training_parameters()

    glove = Glove(default_parameters['dictionary_size'])
    glove.load_embedding()

    headline_numeric_log = NumericLog(
        default_parameters['max_headline_length'])
    article_numeric_log = NumericLog(default_parameters['max_article_length'])

    print('load data...')
    preprocessor = Preprocessor(None)
    preprocessor.set_encoder('glove', glove)
    preprocessor.set_encoder('headline_numeric_log', headline_numeric_log)
    preprocessor.set_encoder('article_numeric_log', article_numeric_log)
    preprocessor.set_parameter('max_headline_length',
                               default_parameters['max_headline_length'])
    preprocessor.set_parameter('body_begin_length',
                               default_parameters['body_begin_length'])

    preprocessor.load_data([
        'headline', 'body_begin', 'category', 'minute', 'hour', 'day_of_week',
        'day_of_year', 'headline_log_representation',
        'article_log_representation', 'competitive_score'
    ])

    custom_objects = {
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

    print('load models...')
    model_inputs = {}
    model_inputs['model_1'] = [preprocessor.test_data['headline']]
    model_inputs['model_2'] = [preprocessor.test_data['headline']]
    model_inputs['model_3'] = [preprocessor.test_data['body_begin']]
    model_inputs['model_4'] = [preprocessor.test_data['category']]
    model_inputs['model_5'] = [
        preprocessor.test_data[key]
        for key in ['minute', 'hour', 'day_of_week', 'day_of_year']
    ]
    model_inputs['model_6'] = [
        preprocessor.test_data[key] for key in
        ['headline_log_representation', 'article_log_representation']
    ]
    model_inputs['model_7'] = [preprocessor.test_data['competitive_score']]

    print('predict...')
    predictions = {}
    for model_name in model_inputs.keys():
        if hasattr(arguments, model_name) and getattr(arguments, model_name):
            model = load_model(getattr(arguments, model_name),
                               custom_objects=custom_objects)
            predictions[model_name] = np.round(
                model.predict(model_inputs[model_name]))

    print('calculate correlation...')
    for model_name_1 in predictions.keys():
        for model_name_2 in predictions.keys():
            if model_name_1 != model_name_2:
                correlation = np.corrcoef(predictions[model_name_1][:, -1],
                                          predictions[model_name_2][:, -1])[0]
                print(model_name_1, model_name_2, correlation[1])