def init_data(self, param): if param.data is not None or isinstance(param, NumericFeatureParameters): return param.num = self.feature_extractor.num_features_non_numeric(param.suffix) if isinstance(param.dim, Number): # Dimensions given as a number, not as a file to load param.data = defaultdict(lambda d=param.dim: Config().random.normal(size=d)) param.data[UnknownDict.UNKNOWN] # Initialize unknown value else: # Otherwise, not a number but a string with path to word vectors file w2v = load_word2vec(param.dim) unk = Config().random.normal(size=w2v.vector_size) param.dim = w2v.vector_size param.data = UnknownDict({x: w2v[x] for x in w2v.vocab}, unk)
def main(): Config().args.nowrite = True out_file = os.environ.get("PARAMS_FILE", "params.csv") w2v_files = [os.environ[f] for f in os.environ if f.startswith("W2V_FILE")] num = int(os.environ.get("PARAMS_NUM", 30)) np.random.seed() domains = ( ("seed", 2147483647), ("classifier", 100 * [config.FEEDFORWARD_NN] + list(config.CLASSIFIERS)), ("wordvectors", [50, 100, 200, 300] + [load_word2vec(f) for f in w2v_files]), ("tagdim", (5, 10, 20)), ("labeldim", (5, 10, 20)), ("punctdim", (1, 2, 3)), ("gapdim", (1, 2, 3)), ("actiondim", (3, 5, 10)), ("layerdim", (50, 100, 200, 300, 500, 1000)), ("layers", [1] + 5 * [2]), ("activation", config.ACTIVATIONS), ("init", 5 * [config.INITIALIZATIONS[0]] + list(config.INITIALIZATIONS)), ("batchsize", (10, 30, 50, 100, 200, 500)), ("minibatchsize", (50, 100, 200, 300, 500, 1000)), ("nbepochs", range(10, 51)), ("optimizer", 10 * [config.OPTIMIZERS[0]] + list(config.OPTIMIZERS)), ("loss", 20 * [config.OBJECTIVES[0]] + list(config.OBJECTIVES)), ("importance", (1, 2)), ("earlyupdate", 6 * [False] + [True]), ("iterations", range(1, 21)), ("worddropout", (0, .1, .2, .25, .3)), ("normalize", (False, True)), ("regularizer", [None] + 3 * [config.REGULARIZERS[-1]] + list(config.REGULARIZERS)), ("regularization", (1e-7, 1e-6, 1e-5, 1e-4)), ("dropout", (0, .1, .2, .3, .4, .5)), ) params = [Params(OrderedDict(p)) for p in zip(*[[(n, v) for v in np.random.choice(vs, num)] for n, vs in domains])] print("All parameter combinations to try:") print("\n".join(map(str, params))) print("Saving results to '%s'" % out_file) with open(out_file, "w") as f: csv.writer(f).writerow(params[0].get_field_titles()) for param in params: param.run() with open(out_file, "a") as f: csv.writer(f).writerow(param.get_fields()) best = max(params, key=Params.score) print("Best parameters: %s" % best)
def init_data(self, param): if param.data is not None or isinstance(param, NumericFeatureParameters): return param.num = self.feature_extractor.num_features_non_numeric(param.suffix) if isinstance(param.dim, Number): param.data = DropoutDict(max_size=param.size, dropout=param.dropout) else: w2v = load_word2vec(param.dim) vocab = w2v.vocab if param.size is None or param.size == 0: param.size = len(w2v.vocab) + 1 else: vocab = list(vocab)[:param.size - 1] param.dim = w2v.vector_size weights = np.array([w2v[x] for x in vocab]) unknown = weights.mean(axis=0) param.init = (np.vstack((unknown, weights)),) param.data = DropoutDict(max_size=param.size, keys=vocab, dropout=param.dropout)