def grid_search(path_to_texts, param_grid, word_to_check) :

    params=ParameterGrid(param_grid)
    results=defaultdict(list)
    for ind in range(params.__len__()) :
        print('-'*100)
        print('Done {}/{}'.format(ind,params.__len__()))
        curr_params=params.__getitem__(ind)
        print('\tCurrent set of params : {}'.format(curr_params))
        model=fasttext.train_unsupervised(path_to_texts, dim=300,model='skipgram',epoch=curr_params['epoch'],ws=curr_params['ws'],minCount=curr_params['minCount'],thread=4,bucket=100000,verbose=0)
        results['ws'].append(curr_params['ws'])
        results['epoch'].append(curr_params['epoch'])
        results['minCount'].append(curr_params['minCount'])
        z=get_cos_sim_from_model(word_to_check,model)

        results['cos_sim'].append(z)
        print('\tNumber of words in vocab : {}'.format(len(model.words)))
        print('\tMost similar to "{}" : '.format(word_to_check))
        for key,value in z.items() :
            print('\t',key,' : ', value)
        del model

    return results
Exemplo n.º 2
0
            if minimum == maximum:
                alle[feature] = [minimum] * 25
            else:
                diff = (maximum - minimum) / 25
                alle[feature] = np.arange(minimum, maximum, diff)
    else:
        alle[feature] = X[feature].unique()
from sklearn.model_selection import ParameterGrid
grid = ParameterGrid(alle)

# Evaluate the Grid
grid_results = pd.DataFrame()
n_samples = 50000
iteration = 0
while iteration < n_samples:
    params = grid[randint(0, grid.__len__())]
    params = check_conditions(params)
    test = np.array(pd.Series(params)).reshape(1, -1)
    params['prediction'] = full_emp_randomForest.predict(test)[0]
    grid_results = grid_results.append(params, ignore_index=True)
    iteration += 1

# Train Gaussian Process with results
hyperparameter = grid_results[choice]
prediction = grid_results['prediction']
standardScaler = StandardScaler()
hyperparameter = standardScaler.fit_transform(
    hyperparameter.values.reshape(-1, 1))
rbf = 1.0**2 * RBF(length_scale=5.0, length_scale_bounds=(1e-1, 20.0))
gaussianProcessRegressor = GaussianProcessRegressor(kernel=rbf)
batch_offset = 0