Exemplo n.º 1
0
import evaluations

dataloader = MsdBbLoader(
    hits_file_path='/storage/nas3/datasets/music/billboard/msd_bb_matches.csv',
    non_hits_file_path=
    '/storage/nas3/datasets/music/billboard/msd_bb_non_matches.csv',
    features_path='/storage/nas3/datasets/music/billboard',
    non_hits_per_hit=1,
    features=[
        *common.hl_list(),
    ],
    label='peak',
    nan_value=150,
    random_state=42,
)

pipeline = Pipeline([
    ('scale', MinMaxScaler()),
    ('linreg', Lasso(alpha=1.0, normalize=False)),
])

evaluator = GridEvaluator(
    parameters={},
    grid_parameters=evaluations.grid_parameters(),
)

result_handlers = [
    result_handlers.print_gridsearch_results,
]
Exemplo n.º 2
0
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from nlp4musa2020.dataloaders import vectorizer
from nlp4musa2020.dataloaders.alf200k import ALF200KLoader
from nlp4musa2020.dataloaders.alf200k import genre_target_labels
import nlp4musa2020.evaluators as evaluators

dataloader = ALF200KLoader(
    path='data/processed/dataset-lfm-genres.pickle',
    load_feature_groups=[],
    text_vectorizers=vectorizer.lda(),
    target=genre_target_labels(),
)

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', RandomForestClassifier(n_jobs=-1)),
])

evaluator = GridEvaluator(
    parameters={
        'model__n_estimators': [10, 100, 300],
    },
    grid_parameters=evaluators.grid_parameters_genres(),
)

result_handlers = [
    result_handlers.print_gridsearch_results,
]
Exemplo n.º 3
0
from nlp4musa2020.dataloaders.vectorizer import tfidf
import nlp4musa2020.evaluators as evaluators
from nlp4musa2020.models.simplenn_genre import SimpleGenreNN

dataloader = ALF200KLoader('data/processed/dataset-lfm-genres.pickle',
                           load_feature_groups=[],
                           text_vectorizers=tfidf(),
                           target=genre_target_labels())

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', SimpleGenreNN(epochs=50)),
])

evaluator = GridEvaluator(
    parameters={
        'model__dense_sizes': [
            (32, 32),
            (64, 64),
        ],
        'model__dropout_rate': [
            0.1,
        ],
    },
    grid_parameters=evaluators.grid_parameters_genres(),
)

result_handlers = [
    result_handlers.print_gridsearch_results,
]
    hits_file_path='/storage/nas3/datasets/music/billboard/msd_bb_matches.csv',
    non_hits_file_path=
    '/storage/nas3/datasets/music/billboard/msd_bb_non_matches.csv',
    features_path='/storage/nas3/datasets/music/billboard',
    non_hits_per_hit=1,
    features=[
        *common.all_list(),
    ],
    label='peak',
    nan_value=150,
    random_state=42,
)

pipeline = Pipeline([
    ('scale', MinMaxScaler()),
    ('wide_and_deep', WideAndDeep(features=dataloader.feature_indices)),
])

evaluator = GridEvaluator(
    parameters={
        'wide_and_deep__epochs': [10, 50, 100, 200],
        'wide_and_deep__batch_normalization': [False, True],
        'wide_and_deep__dropout_rate': [None, 0.25, 0.5],
    },
    grid_parameters=evaluations.grid_parameters(),
)

result_handlers = [
    result_handlers.print_gridsearch_results,
]
Exemplo n.º 5
0
dataloader = ALF200KLoader(path='data/processed/dataset-lfm-genres.pickle',
                           load_feature_groups=[
                               'explicitness',
                           ],
                           text_vectorizers=None,
                           target=[
                               'alternative', 'blues', 'country', 'dance',
                               'electronic', 'funk', 'hip hop', 'indie',
                               'jazz', 'metal', 'pop', 'punk', 'rap', 'rnb',
                               'rock', 'soul'
                           ])

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', KNeighborsClassifier(n_jobs=-1, algorithm='ball_tree')),
])

evaluator = GridEvaluator(
    parameters={
        'model__n_neighbors': [3, 4, 5, 10],
        'model__weights': ['distance'],
        'model__p': [1, 2],
    },
    grid_parameters=evaluators.grid_parameters_genres(),
)

result_handlers = [
    result_handlers.print_gridsearch_results,
]
Exemplo n.º 6
0
dataloader = ALF200KLoader('data/processed/dataset-lfm-genres.pickle',
                           load_feature_groups=[
                               'rhymes',
                           ],
                           text_vectorizers=None,
                           target=genre_target_labels())

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', MultiOutputClassifier(LinearSVC())),
])

evaluator = GridEvaluator(
    parameters={
        'model__estimator__C': [
            0.1,
            0.5,
            1.0,
            2.0,
            5.0,
        ],
        'model__estimator__loss': ['epsilon_insensitive'],
    },
    grid_parameters=evaluators.grid_parameters_genres(),
)

result_handlers = [
    result_handlers.print_gridsearch_results,
]
Exemplo n.º 7
0
dataloader = MsdBbLoader(
    hits_file_path='/storage/nas3/datasets/music/billboard/msd_bb_matches.csv',
    non_hits_file_path=
    '/storage/nas3/datasets/music/billboard/msd_bb_non_matches.csv',
    features_path='/storage/nas3/datasets/music/billboard',
    non_hits_per_hit=1,
    features=[
        *common.hl_list(),
        *common.ll_list(),
    ],
    label='peak',
    nan_value=150,
    random_state=42,
)

pipeline = Pipeline([
    ('scale', MinMaxScaler()),
    ('logreg', LogisticRegression(multi_class='auto', solver='lbfgs')),
])

evaluator = GridEvaluator(
    parameters={
        'logreg__C': [1.0],
    },
    grid_parameters=evaluations.grid_parameters(),
)

result_handlers = [
    result_handlers.print_gridsearch_results,
]
from nlp4musa2020.dataloaders.alf200k import ALF200KLoader, genre_target_labels
import nlp4musa2020.evaluators as evaluators
from nlp4musa2020.models.simplenn_genre import SimpleGenreNN

dataloader = ALF200KLoader('data/processed/dataset-lfm-genres.pickle',
                           load_feature_groups=[
                               'audio',
                           ],
                           text_vectorizers=None,
                           target=genre_target_labels())

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model',
     DummyClassifier(strategy="constant",
                     constant=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
                               0])),  #rock
])

evaluator = GridEvaluator(
    parameters={
        "model__random_state": [42],
    },
    grid_parameters=evaluators.grid_parameters_genres(),
)

result_handlers = [
    result_handlers.print_gridsearch_results,
]