Exemplo n.º 1
0
    def __init__(self, alphabet_encoding):
        self.alphabet_encoding = alphabet_encoding

        #self.test_words = ["cat", "feline", "car", "truck", "tuck"]
        #self.test_words = ['cat', 'CAT', 'egg', 'eggplant', 'brontosaurus']
        self.test_words = ['pokemon', 'bigger', 'better', 'faster', 'stronger']

        _encoded_test_words = map(self._encode, self.test_words)
        self.data_provider = PaddedSequenceMinibatchProvider(
            X=_encoded_test_words,
            padding=self.alphabet_encoding['PADDING'],
            batch_size=len(_encoded_test_words),
            shuffle=False)
class ModelEvaluator(object):
    def __init__(self, alphabet_encoding):
        self.alphabet_encoding = alphabet_encoding

        #self.test_words = ["cat", "feline", "car", "truck", "tuck"]
        #self.test_words = ['cat', 'CAT', 'egg', 'eggplant', 'brontosaurus']
        self.test_words = ['pokemon', 'bigger', 'better', 'faster', 'stronger']

        _encoded_test_words = map(self._encode, self.test_words)
        self.data_provider = PaddedSequenceMinibatchProvider(
            X=_encoded_test_words,
            padding=self.alphabet_encoding['PADDING'],
            batch_size=len(_encoded_test_words),
            shuffle=False)

    def _encode(self, word):
        encoded_word = [self.alphabet_encoding[c] for c in word]
        encoded_word = [self.alphabet_encoding['START']] + encoded_word + [self.alphabet_encoding['END']]
        return encoded_word

    def evaluate(self, model):
        X, meta = self.data_provider.next_batch()

        Y_hat, meta, _ = model.fprop(X, meta=meta, num_layers=-1, return_state=True)
        Y_hat, _ = meta['space_above'].transform(Y_hat, ['b', 'dwf'])

        # print Y_hat

        # compute cosine distance between rows of Y
        Y_hat_norms = np.sqrt(np.sum(Y_hat**2, axis=1, keepdims=True))

        distances = np.dot(Y_hat, Y_hat.T) / (Y_hat_norms * Y_hat_norms.T)

        print self.test_words
        print distances
    def __init__(self, alphabet_encoding):
        self.alphabet_encoding = alphabet_encoding

        #self.test_words = ["cat", "feline", "car", "truck", "tuck"]
        #self.test_words = ['cat', 'CAT', 'egg', 'eggplant', 'brontosaurus']
        self.test_words = ['pokemon', 'bigger', 'better', 'faster', 'stronger']

        _encoded_test_words = map(self._encode, self.test_words)
        self.data_provider = PaddedSequenceMinibatchProvider(
            X=_encoded_test_words,
            padding=self.alphabet_encoding['PADDING'],
            batch_size=len(_encoded_test_words),
            shuffle=False)
Exemplo n.º 4
0
class ModelEvaluator(object):
    def __init__(self, alphabet_encoding):
        self.alphabet_encoding = alphabet_encoding

        #self.test_words = ["cat", "feline", "car", "truck", "tuck"]
        #self.test_words = ['cat', 'CAT', 'egg', 'eggplant', 'brontosaurus']
        self.test_words = ['pokemon', 'bigger', 'better', 'faster', 'stronger']

        _encoded_test_words = map(self._encode, self.test_words)
        self.data_provider = PaddedSequenceMinibatchProvider(
            X=_encoded_test_words,
            padding=self.alphabet_encoding['PADDING'],
            batch_size=len(_encoded_test_words),
            shuffle=False)

    def _encode(self, word):
        encoded_word = [self.alphabet_encoding[c] for c in word]
        encoded_word = [self.alphabet_encoding['START']
                        ] + encoded_word + [self.alphabet_encoding['END']]
        return encoded_word

    def evaluate(self, model):
        X, meta = self.data_provider.next_batch()

        Y_hat, meta, _ = model.fprop(X,
                                     meta=meta,
                                     num_layers=-1,
                                     return_state=True)
        Y_hat, _ = meta['space_above'].transform(Y_hat, ['b', 'dwf'])

        # print Y_hat

        # compute cosine distance between rows of Y
        Y_hat_norms = np.sqrt(np.sum(Y_hat**2, axis=1, keepdims=True))

        distances = np.dot(Y_hat, Y_hat.T) / (Y_hat_norms * Y_hat_norms.T)

        print self.test_words
        print distances
Exemplo n.º 5
0

def load_json(file_name):
    with open(file_name) as f:
        return json.loads(f.read())


if __name__ == "__main__":
    np.set_printoptions(linewidth=100)
    data = load_json(
        os.path.join(os.environ['DATA'], "words", "words.encoded.json"))
    alphabet = load_json(
        os.path.join(os.environ['DATA'], "words",
                     "words.alphabet.encoding.json"))

    train_data_provider = PaddedSequenceMinibatchProvider(
        X=data, padding=alphabet['PADDING'], batch_size=100)

    embedding_dimension = 8
    vocabulary_size = len(alphabet)
    n_feature_maps = 8
    kernel_width = 5
    pooling_size = 2

    n_epochs = 1

    model = CSM(layers=[
        WordEmbedding(dimension=embedding_dimension,
                      vocabulary_size=len(alphabet)),
        SentenceConvolution(n_feature_maps=n_feature_maps,
                            kernel_width=kernel_width,
                            n_channels=1,