def grid_search(path_to_texts, param_grid, word_to_check) : params=ParameterGrid(param_grid) results=defaultdict(list) for ind in range(params.__len__()) : print('-'*100) print('Done {}/{}'.format(ind,params.__len__())) curr_params=params.__getitem__(ind) print('\tCurrent set of params : {}'.format(curr_params)) model=fasttext.train_unsupervised(path_to_texts, dim=300,model='skipgram',epoch=curr_params['epoch'],ws=curr_params['ws'],minCount=curr_params['minCount'],thread=4,bucket=100000,verbose=0) results['ws'].append(curr_params['ws']) results['epoch'].append(curr_params['epoch']) results['minCount'].append(curr_params['minCount']) z=get_cos_sim_from_model(word_to_check,model) results['cos_sim'].append(z) print('\tNumber of words in vocab : {}'.format(len(model.words))) print('\tMost similar to "{}" : '.format(word_to_check)) for key,value in z.items() : print('\t',key,' : ', value) del model return results
if minimum == maximum: alle[feature] = [minimum] * 25 else: diff = (maximum - minimum) / 25 alle[feature] = np.arange(minimum, maximum, diff) else: alle[feature] = X[feature].unique() from sklearn.model_selection import ParameterGrid grid = ParameterGrid(alle) # Evaluate the Grid grid_results = pd.DataFrame() n_samples = 50000 iteration = 0 while iteration < n_samples: params = grid[randint(0, grid.__len__())] params = check_conditions(params) test = np.array(pd.Series(params)).reshape(1, -1) params['prediction'] = full_emp_randomForest.predict(test)[0] grid_results = grid_results.append(params, ignore_index=True) iteration += 1 # Train Gaussian Process with results hyperparameter = grid_results[choice] prediction = grid_results['prediction'] standardScaler = StandardScaler() hyperparameter = standardScaler.fit_transform( hyperparameter.values.reshape(-1, 1)) rbf = 1.0**2 * RBF(length_scale=5.0, length_scale_bounds=(1e-1, 20.0)) gaussianProcessRegressor = GaussianProcessRegressor(kernel=rbf) batch_offset = 0