'BERT_VERSION': 1, 'BATCH_SIZE': 16, 'EPOCHS': 3, 'OPTIMIZER': 'adam', 'LEARNING_RATE': 2e-6, 'PREPROCESSING_ALGORITHM_UUID': algorithm_id, 'PREPROCESSING_ALGORITHM': preprocessing_algorithm, 'KFOLD_HISTORY': [] } kfold = StratifiedKFold(n_splits=KFOLD, shuffle=True, random_state=SEED) if algorithm_id != 'None': train_data['preprocessed'] = tweets_preprocessor.preprocess( train_data.text, preprocessing_algorithm, keywords=train_data.keyword, locations=train_data.location) test_data['preprocessed'] = tweets_preprocessor.preprocess( test_data.text, preprocessing_algorithm, keywords=test_data.keyword, locations=test_data.location) else: train_data['preprocessed'] = train_data.text test_data['preprocessed'] = test_data.text inputs = train_data['preprocessed'] targets = train_data['target']
if USE_GLOVE: MODEL['GLOVE'] = {'SIZE': 200} GLOVE = f'glove.twitter.27B.{MODEL["GLOVE"]["SIZE"]}d.txt' GLOVE_FILE_PATH = f'./data/glove/{GLOVE}' GLOVE_EMBEDDINGS = get_glove_embeddings(GLOVE_FILE_PATH) MODEL['UUID'] = str(uuid.uuid4()) MODEL['PREPROCESSING_ALGORITHM'] = PREPROCESSING_ALGORITHM MODEL['PREPROCESSING_ALGORITHM_UUID'] = PREPROCESSING_ALGORITHM_ID MODEL['DIR'] = f'./data-saved-models/glove-false/{NETWORK_KEY}/' ensure_path_exists(MODEL['DIR']) MODEL['PREFIX'] = f'{NETWORK_KEY}-{PREPROCESSING_ALGORITHM_ID}-SEED-{SEED}' train_data['preprocessed'] = tweets_preprocessor.preprocess( train_data.text, PREPROCESSING_ALGORITHM, keywords=train_data.keyword, locations=train_data.location) test_data['preprocessed'] = tweets_preprocessor.preprocess( test_data.text, PREPROCESSING_ALGORITHM, keywords=test_data.keyword, locations=test_data.location) train_inputs, val_inputs, train_targets, val_targets = train_test_split( train_data['preprocessed'], train_data['target'], test_size=0.3, random_state=SEED)