all_vocab = set()
for ind, dataset in enumerate(datasets):
    all_vocab = set().union(*[all_vocab, data[ind].vocabulary])

   
w2v = NormEmbeddingFeature.from_file(config.wordvecs,
                                    max_rank=config.max_vocab_size,
                                    vocabulary=all_vocab,
                                    name='words')
features = [w2v]
if config.word_features:
    features.append(SennaCapsFeature('caps'))

for ind, dataset in enumerate(datasets):
    data[ind].tokens.add_features(features)
    data[ind].tokens.add_inputs(windowed_inputs(config.window_size, features))

# Log word vector feature stat summary
info('{}: {}'.format(config.wordvecs, w2v.summary()))

inputs, embeddings = inputs_and_embeddings(features, config)

# Combine and reshape for convolution
seq = concat(embeddings)
cshape = (config.window_size, sum(f.output_dim for f in features))
seq = Reshape((1,) + cshape)(seq)

# Convolutions
conv_outputs = []
for filter_size, filter_num in zip(config.filter_sizes, config.filter_nums):
    conv = Convolution2D(filter_num, filter_size, cshape[1],activation='relu')(seq)
config = cli_settings(['datadir', 'wordvecs'], Defaults)

data = conlldata.load_dir(config.datadir, config)

vmapper = viterbi.get_prediction_mapper(data.train.sentences, config)

w2v = NormEmbeddingFeature.from_file(config.wordvecs,
                                     max_rank=config.max_vocab_size,
                                     vocabulary=data.vocabulary,
                                     name='words')
features = [w2v]
if config.word_features:
    features.append(SennaCapsFeature(name='caps'))

data.tokens.add_features(features)
data.tokens.add_inputs(windowed_inputs(config.window_size, features))

# Log word vector feature stat summary
info('{}: {}'.format(config.wordvecs, w2v.summary()))

inputs, embeddings = inputs_and_embeddings(features, config)

seq = concat(embeddings)
seq = Flatten()(seq)
for size in config.hidden_sizes:
    seq = Dense(size, activation=config.hidden_activation)(seq)
seq = Dropout(config.output_drop_prob)(seq)
out = Dense(data.tokens.target_dim, activation='softmax')(seq)
model = Model(input=inputs, output=out)

optimizer = get_optimizer(config)