def experiment(config: DictConfig): train_in_path = hydra.utils.to_absolute_path(config.data.raw_train_path) test_in_path = hydra.utils.to_absolute_path(config.data.raw_test_path) test_score_path = hydra.utils.to_absolute_path( config.data.raw_test_score_path) train_out_path = hydra.utils.to_absolute_path(config.data.train_path) test_out_path = hydra.utils.to_absolute_path(config.data.test_path) train_df = prepare_train(train_in_path, train_out_path) test_df = prepare_test(test_in_path, test_score_path, test_out_path) model = train_model(config) train_df['similarity'] = sentence_similarity(model, train_df) test_df['similarity'] = sentence_similarity(model, test_df) train_correlation = pearson_correlation(train_df.similarity, train_df.score) test_correlation = pearson_correlation(test_df.similarity, test_df.score) train_chart = plot_scatter(train_df, train_correlation) train_chart.save('train_chart.json') test_chart = plot_scatter(test_df, test_correlation) test_chart.save('test_chart.json') logger.info('Encoder %s', config.model.encoder) logger.info('Pooling %s', config.model.pooling) logger.info('Normalizer %s', config.model.normalizer) logger.info('MedSTS Train correlation %s', train_correlation) logger.info('MedSTS Test correlation %s', test_correlation)
def experiment(config: DictConfig): df = pd.read_csv(hydra.utils.to_absolute_path(config.data.raw_path)) df.columns = ['pair_id', 'sentence_1', 'sentence_2', 'a_1', 'a2', 'a3', 'a4', 'a5', 'score'] df['score_bin'] = df.score.map(lambda x: int(x)) results = [] cross_validation = StratifiedKFold(n_splits=10, random_state=config.data.random_state) for i, (train_index, test_index) in enumerate(cross_validation.split(df, df.score_bin.values)): train_df = df.iloc[train_index] test_df = df.iloc[test_index] to_file(train_df, hydra.utils.to_absolute_path(config.data.train_path)) to_file(test_df, hydra.utils.to_absolute_path(config.data.test_path)) model = train_model(config) train_df['similarity'] = sentence_similarity(model, train_df) test_df['similarity'] = sentence_similarity(model, test_df) train_correlation = pearson_correlation(train_df.similarity, train_df.score) test_correlation = pearson_correlation(test_df.similarity, test_df.score) logger.info('Cross Validation Split %s', i) logger.info('Train correlation %s', train_correlation) logger.info('Test correlation %s', test_correlation) results.append({'train': train_correlation, 'test': test_correlation}) result_df = pd.DataFrame(results) logger.info('Encoder %s', config.model.encoder) logger.info('Pooling %s', config.model.pooling) logger.info('Normalizer %s', config.model.normalizer) logger.info('BIOSSES Train correlation %s', result_df.train.mean()) logger.info('BIOSSES Test correlation %s', result_df.test.mean())
def experiment(config): base_dir = hydra.utils.to_absolute_path(config.data.raw_path) train_path = hydra.utils.to_absolute_path(config.data.train_path) test_path = hydra.utils.to_absolute_path(config.data.test_path) labels = sorted(os.listdir(base_dir)) # Collect all files and train model on all official training files train_dfs = [] test_dfs = [] for label in labels: train_dfs.append(get_dataset(base_dir, label, 'train')) train_dfs.append(get_dataset(base_dir, label, 'devel')) test_dfs.append(get_dataset(base_dir, label, 'test')) train_dfs = pd.concat(train_dfs) test_dfs = pd.concat(test_dfs) to_txt(train_dfs, train_path) to_txt(test_dfs, test_path) model = train_model(config) # Train binary classifier for each label for label in labels: logger.info('Classifying: %s', label) train_df = get_dataset(base_dir, label, 'train') val_df = get_dataset(base_dir, label, 'devel') test_df = get_dataset(base_dir, label, 'test') classify(model, train_df, val_df, test_df, label)
def experiment(config): prepare_datasets(config) model = train_model(config) logger.info('Test set ablation study') logger.info('Sampling %s documents per graph category for ablation study', config.ablation.num_samples) df = sample_documents(config, model.test_df, config.ablation.num_samples) ablation_df = ablation_study(model, config, df) ablation_df.to_csv('ablation.csv') logger.info(ablation_df.groupby('ablation_category').mean().to_string()) if config.ablation.distance_map: logger.info('Distance: Mean Average Precision: %s', ablation_df['distance_map'].mean()) logger.info('Distance: Median Mean Average Precision: %s', ablation_df['distance_map'].median()) if config.ablation.attention_map: logger.info('Attention: Mean Average Precision: %s', ablation_df['attention_map'].median()) logger.info('Attention: Median Mean Average Precision: %s', ablation_df['attention_map'].median()) logger.info('Test set ablation study (Unique Words)') logger.info('Sampling %s documents per graph category for ablation study', config.ablation.num_samples) df = sample_documents(config, model.test_df, config.ablation.num_samples) ablation_df = ablation_study(model, config, df, unique_tokens=True) ablation_df.to_csv('ablation_unique.csv') logger.info(ablation_df.groupby('ablation_category').mean().to_string()) if config.ablation.distance_map: logger.info('Distance: Mean Average Precision: %s', ablation_df['distance_map'].mean()) logger.info('Distance: Median Mean Average Precision: %s', ablation_df['distance_map'].median()) if config.ablation.attention_map: logger.info('Attention: Mean Average Precision: %s', ablation_df['attention_map'].median()) logger.info('Attention: Median Mean Average Precision: %s', ablation_df['attention_map'].median())
def experiment(config): train_df, test_df = prepare_datasets(config) model = train_model(config) classify(model, train_df, test_df)