def model_two_layer_small_embedding(alphabet): return CSM(layers=[ DictionaryEncoding(vocabulary=alphabet), WordEmbedding(dimension=32, vocabulary_size=len(alphabet)), SentenceConvolution(n_feature_maps=5, kernel_width=10, n_channels=1, n_input_dimensions=32), SumFolding(), KMaxPooling(k=7), Bias(n_input_dims=16, n_feature_maps=5), Tanh(), SentenceConvolution(n_feature_maps=5, kernel_width=5, n_channels=5, n_input_dimensions=16), KMaxPooling(k=4), Bias(n_input_dims=16, n_feature_maps=5), Tanh(), Softmax(n_classes=2, n_input_dimensions=320), ])
def model_one_layer_variant_2(alphabet): return CSM(layers=[ DictionaryEncoding(vocabulary=alphabet), WordEmbedding(dimension=42, vocabulary_size=len(alphabet)), SentenceConvolution(n_feature_maps=5, kernel_width=6, n_channels=1, n_input_dimensions=42), SumFolding(), KMaxPooling(k=4), Bias(n_input_dims=21, n_feature_maps=5), Tanh(), Softmax(n_classes=2, n_input_dimensions=420), ])
validation_data_provider = LabelledSequenceMinibatchProvider( X=X[-500:], Y=Y[-500:], batch_size=500) word_embedding_model = CSM(layers=[ WordEmbedding( # really a character embedding dimension=16, vocabulary_size=len(alphabet)), SentenceConvolution(n_feature_maps=10, kernel_width=5, n_channels=1, n_input_dimensions=16), SumFolding(), KMaxPooling(k=2), MaxFolding(), Tanh(), ]) word_embedding = WordFromCharacterEmbedding( embedding_model=word_embedding_model, alphabet_encoding=alphabet) # print word_embedding.fprop(X, meta) tweet_model = CSM(layers=[ word_embedding, SentenceConvolution(n_feature_maps=5, kernel_width=10, n_channels=1, n_input_dimensions=80), SumFolding(), KMaxPooling(k=7),
model = CSM(layers=[ WordEmbedding(dimension=embedding_dimension, vocabulary_size=len(alphabet)), SentenceConvolution(n_feature_maps=n_feature_maps, kernel_width=kernel_width, n_channels=1, n_input_dimensions=embedding_dimension), SumFolding(), KMaxPooling(k=pooling_size), # Bias( # n_input_dims=embedding_dimension / 2, # n_feature_maps=n_feature_maps), Linear(n_input=n_feature_maps * pooling_size * embedding_dimension / 2, n_output=64), Tanh(), Linear(n_output=1, n_input=64), ]) print model cost_function = LargeMarginCost(0.1) noise_model = RandomAlphabetCorruption(alphabet) objective = NoiseContrastiveObjective(cost=cost_function, data_provider=train_data_provider, noise_model=noise_model) update_rule = AdaGrad(gamma=0.1, model_template=model) optimizer = SGD(model=model, objective=objective, update_rule=update_rule)