def search(new_grid, Cs): grid = { 'segment': ['gender_female'], 'files_n': [5], 'lower_pop_bound': [3], 'ngram_ns': [[1, 2]], 'min_word_length': [2], 'word_type': ['alphanumeric'], 'use_urls': [True], 'min_url_depth': [1] } for k, v in new_grid.iteritems(): grid[k] = v res = [] for p in params(grid): X, y, X_test, y_test, features_map = \ constr(segment=p['segment'], files_n=p['files_n'], lower_pop_bound=p['lower_pop_bound'], ngram_ns=p['ngram_ns'], min_word_length=p['min_word_length'], word_type=p['word_type'], use_urls=p['use_urls'], min_url_depth=p['min_url_depth']) sc = set() for C in Cs: sc.add((C, C_score(C, X, y, X_test, y_test))) res.append((p, X.shape, sc)) print res return res
def search(new_grid, Cs): grid = {'segment' : ['gender_female'], 'files_n' : [5], 'lower_pop_bound' : [3], 'ngram_ns' : [[1, 2]], 'min_word_length' : [2], 'word_type' : ['alphanumeric'], 'use_urls' : [True], 'min_url_depth' : [1]} for k, v in new_grid.iteritems(): grid[k] = v res = [] for p in params(grid): X, y, X_test, y_test, features_map = \ constr(segment=p['segment'], files_n=p['files_n'], lower_pop_bound=p['lower_pop_bound'], ngram_ns=p['ngram_ns'], min_word_length=p['min_word_length'], word_type=p['word_type'], use_urls=p['use_urls'], min_url_depth=p['min_url_depth']) sc = set() for C in Cs: sc.add((C, C_score(C, X, y, X_test, y_test))) res.append((p, X.shape, sc)) print res return res
import cPickle as pickle from constr import constr X, y, X_test, y_test, features_map = constr(segment='gender_female', files_n=47, lower_pop_bound=2, ngram_ns=[1, 2], min_word_length=1, word_type='alphanumeric', use_urls=True, min_url_depth=1) pickle.dump((X, y, X_test, y_test, features_map), open('prepared/gender_female_allegedly_best.pickle', 'wb'), -1)