예제 #1
0
        'BERT_VERSION': 1,
        'BATCH_SIZE': 16,
        'EPOCHS': 3,
        'OPTIMIZER': 'adam',
        'LEARNING_RATE': 2e-6,
        'PREPROCESSING_ALGORITHM_UUID': algorithm_id,
        'PREPROCESSING_ALGORITHM': preprocessing_algorithm,
        'KFOLD_HISTORY': []
    }

    kfold = StratifiedKFold(n_splits=KFOLD, shuffle=True, random_state=SEED)

    if algorithm_id != 'None':
        train_data['preprocessed'] = tweets_preprocessor.preprocess(
            train_data.text,
            preprocessing_algorithm,
            keywords=train_data.keyword,
            locations=train_data.location)

        test_data['preprocessed'] = tweets_preprocessor.preprocess(
            test_data.text,
            preprocessing_algorithm,
            keywords=test_data.keyword,
            locations=test_data.location)
    else:
        train_data['preprocessed'] = train_data.text
        test_data['preprocessed'] = test_data.text

    inputs = train_data['preprocessed']
    targets = train_data['target']
예제 #2
0
    if USE_GLOVE:
        MODEL['GLOVE'] = {'SIZE': 200}
        GLOVE = f'glove.twitter.27B.{MODEL["GLOVE"]["SIZE"]}d.txt'
        GLOVE_FILE_PATH = f'./data/glove/{GLOVE}'
        GLOVE_EMBEDDINGS = get_glove_embeddings(GLOVE_FILE_PATH)

    MODEL['UUID'] = str(uuid.uuid4())
    MODEL['PREPROCESSING_ALGORITHM'] = PREPROCESSING_ALGORITHM
    MODEL['PREPROCESSING_ALGORITHM_UUID'] = PREPROCESSING_ALGORITHM_ID
    MODEL['DIR'] = f'./data-saved-models/glove-false/{NETWORK_KEY}/'
    ensure_path_exists(MODEL['DIR'])
    MODEL['PREFIX'] = f'{NETWORK_KEY}-{PREPROCESSING_ALGORITHM_ID}-SEED-{SEED}'

    train_data['preprocessed'] = tweets_preprocessor.preprocess(
        train_data.text,
        PREPROCESSING_ALGORITHM,
        keywords=train_data.keyword,
        locations=train_data.location)

    test_data['preprocessed'] = tweets_preprocessor.preprocess(
        test_data.text,
        PREPROCESSING_ALGORITHM,
        keywords=test_data.keyword,
        locations=test_data.location)

    train_inputs, val_inputs, train_targets, val_targets = train_test_split(
        train_data['preprocessed'],
        train_data['target'],
        test_size=0.3,
        random_state=SEED)