def _read_csv_file(self, delimiter="\t"):
     print("Reading CSV file ")
     begin_time = time.time()
     # for Micky's big file
     # wikitree_sf = SFrame.read_csv(self._input_directory_path + self._target_file_name, delimiter="\t")
     wikitree_sf = SFrame.read_csv(self._input_directory_path +
                                   self._target_file_name,
                                   delimiter=delimiter)
     end_time = time.time()
     run_time = end_time - begin_time
     print(run_time)
     return wikitree_sf
Exemplo n.º 2
0
from sframe import SFrame
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression

products = SFrame('review.csv')

from string import punctuation


def remove_punctuation(text):
    return text.translate(punctuation)


products['review_clean'] = products['text'].apply(remove_punctuation)

products = products[products['stars'] != 3]

products['sentiment'] = products['stars'].apply(lambda r: +1 if r > 3 else -1)

train_data, test_data = products.random_split(.8, seed=1)

vectorizer = CountVectorizer(token_pattern=r'\b\w+\b')

train_matrix = vectorizer.fit_transform(train_data['review_clean'])
test_matrix = vectorizer.transform(test_data['review_clean'])
print(test_matrix[0])

model = LogisticRegression()
model.fit(train_matrix, train_data['sentiment'])

sample_test_matrix = vectorizer.transform(['ammazing wow wow'])
Exemplo n.º 3
0
    def __init__(self,
                 batch_size=BATCH_SIZE,
                 seq_len=SUB_SEQ_LEN,
                 slow_fs=SLOW_FS,
                 slow_dim=SLOW_DIM,
                 dim=DIM,
                 mid_fs=MID_FS,
                 q_levels=Q_LEVELS,
                 mlp_activation='relu'):
        self.weight_norm = True
        self.stateful = True
        self.slow_fs = slow_fs
        self.mid_fs = mid_fs
        self.q_levels = q_levels
        self.dim = dim
        self.slow_dim = slow_dim
        self.batch_size = batch_size
        slow_seq_len = max(1, seq_len // slow_fs)
        mid_seq_len = max(1, seq_len // mid_fs)
        prev_sample_seq_len = seq_len + 1

        ################################################################################
        ################## Model to train
        ################################################################################

        self.slow_tier_model_input = Input(
            batch_shape=(batch_size, slow_seq_len * slow_fs, 1))
        self.slow_tier_model = Lambda(
            lambda x: scale_samples_for_rnn(x, q_levels=q_levels),
            name='slow_scale')(self.slow_tier_model_input)
        self.slow_tier_model = Reshape(
            (slow_seq_len, self.slow_fs),
            name='slow_reshape4rnn')(self.slow_tier_model)

        self.slow_rnn_h = K.variable(
            np.zeros((1, self.slow_dim)), dtype=K.floatx(), name='show_h0')
        self.slow_rnn_h0 = K.tile(self.slow_rnn_h, (batch_size, 1))
        self.mid_rnn_h = K.variable(
            np.zeros((1, self.dim)), dtype=K.floatx(), name='mid_h0')
        self.mid_rnn_h0 = K.tile(self.mid_rnn_h, (batch_size, 1))

        self.state_selector = K.zeros(
            (), dtype=K.floatx(), name='slow_state_mask')
        self.slow_rnn = GruWithWeightNorm(
            slow_dim,
            use_bias=True,
            name='slow_rnn',
            recurrent_activation='sigmoid',
            return_sequences=True,
            stateful=self.stateful,
            state_selector=self.state_selector,
            weight_norm=self.weight_norm)
        self.slow_rnn._trainable_weights.append(self.slow_rnn_h)
        self.slow_tier_model = self.slow_rnn(
            self.slow_tier_model, initial_state=self.slow_rnn_h0)

        # upscale slow rnn output to mid tier ticking freq
        self.slow_tier_model = TimeDistributed(
            DenseWithWeightNorm(dim * slow_fs / mid_fs,
                                weight_norm=self.weight_norm,
                                ), name='slow_project2mid') \
            (self.slow_tier_model)
        self.slow_tier_model = Reshape(
            (mid_seq_len, dim), name='slow_reshape4mid')(self.slow_tier_model)

        self.mid_tier_model_input = Input(
            batch_shape=(batch_size, mid_seq_len * mid_fs, 1))
        self.mid_tier_model = Lambda(
            lambda x: scale_samples_for_rnn(x, q_levels=q_levels),
            name='mid_scale')(self.mid_tier_model_input)
        self.mid_tier_model = Reshape(
            (mid_seq_len, self.mid_fs),
            name='mid_reshape2rnn')(self.mid_tier_model)
        mid_proj = DenseWithWeightNorm(
            dim, name='mid_project2rnn', weight_norm=self.weight_norm)
        self.mid_tier_model = TimeDistributed(
            mid_proj, name='mid_project2rnn')(self.mid_tier_model)
        self.mid_tier_model = layers.add(
            [self.mid_tier_model, self.slow_tier_model])
        self.mid_rnn = GruWithWeightNorm(
            dim,
            name='mid_rnn',
            return_sequences=True,
            recurrent_activation='sigmoid',
            stateful=self.stateful,
            state_selector=self.state_selector)

        self.mid_rnn._trainable_weights.append(self.mid_rnn_h)
        self.mid_tier_model = self.mid_rnn(
            self.mid_tier_model, initial_state=self.mid_rnn_h0)
        self.mid_adapter = DenseWithWeightNorm(
            dim * mid_fs, name='mid_project2top', weight_norm=self.weight_norm)
        self.mid_tier_model = TimeDistributed(
            self.mid_adapter, name='mid_project2top')(self.mid_tier_model)
        self.mid_tier_model = Reshape(
            (mid_seq_len * mid_fs, dim),
            name='mid_reshape4top')(self.mid_tier_model)
        self.embed_size = 256
        self.sframe = SFrame()
        self.top_tier_model_input = self.sframe.build_sframe_model(
            (batch_size, prev_sample_seq_len, 1),
            frame_size=self.mid_fs,
            q_levels=self.q_levels,
            embed_size=self.embed_size)
        self.top_adapter = DenseWithWeightNorm(
            dim,
            use_bias=False,
            name='top_project2mlp',
            kernel_initializer='lecun_uniform',
            weight_norm=self.weight_norm)
        self.top_tier_model = TimeDistributed(
            self.top_adapter,
            name='top_project2mpl')(self.top_tier_model_input.output)

        self.top_tier_model_input_from_mid_tier = Input(
            batch_shape=(batch_size, 1, dim))
        self.top_tier_model_input_predictor = Input(
            batch_shape=(batch_size, mid_fs, 1))
        self.top_tier_model = layers.add(
            [self.mid_tier_model, self.top_tier_model])

        self.top_tier_mlp_l1 = DenseWithWeightNorm(
            dim,
            activation=mlp_activation,
            name='mlp_1',
            weight_norm=self.weight_norm)
        self.top_tier_mlp_l2 = DenseWithWeightNorm(
            dim,
            activation=mlp_activation,
            name='mlp_2',
            weight_norm=self.weight_norm)
        self.top_tier_mlp_l3 = DenseWithWeightNorm(
            q_levels,
            kernel_initializer='lecun_uniform',
            name='mlp_3',
            weight_norm=self.weight_norm)

        self.top_tier_model = TimeDistributed(
            self.top_tier_mlp_l1, name='mlp_1')(self.top_tier_model)
        self.top_tier_model = TimeDistributed(
            self.top_tier_mlp_l2, name='mlp_2')(self.top_tier_model)
        self.top_tier_model = TimeDistributed(
            self.top_tier_mlp_l3, name='mlp_3')(self.top_tier_model)

        self.mid_tier_model_input_from_slow_tier = Input(
            batch_shape=(batch_size, 1, dim))
        self.mid_tier_model_input_predictor = Input(
            batch_shape=(batch_size, mid_fs, 1))

        self.srnn = Model([
            self.slow_tier_model_input, self.mid_tier_model_input,
            self.top_tier_model_input.input
        ], self.top_tier_model)

        ################################################################################
        ################## Model to sample from (predictor)
        ################################################################################

        ################################################################################
        ################## Slow tier predictor
        ################################################################################
        self.slow_tier_model_predictor = Model(
            inputs=self.slow_tier_model_input, outputs=self.slow_tier_model)

        ################################################################################
        ################## Mid tier predictor
        ################################################################################

        self.mid_tier_model_predictor = Lambda(
            lambda x: scale_samples_for_rnn(x, q_levels=q_levels))(
                self.mid_tier_model_input_predictor)
        self.mid_tier_model_predictor = Reshape(
            (1, self.mid_fs))(self.mid_tier_model_predictor)
        self.mid_tier_model_predictor = TimeDistributed(mid_proj)(
            self.mid_tier_model_predictor)
        self.mid_tier_model_predictor = layers.add([
            self.mid_tier_model_predictor,
            self.mid_tier_model_input_from_slow_tier
        ])
        """ Creating new layer instead of sharing it with the model to train
        due to https://github.com/keras-team/keras/issues/6939
        Sharing statefull layers gives a crosstalk
        """
        self.predictor_mid_rnn = GruWithWeightNorm(
            self.dim,
            name='mid_rnn_pred',
            return_sequences=True,
            recurrent_activation='sigmoid',
            stateful=self.stateful,
            state_selector=self.state_selector)
        self.predictor_mid_rnn._trainable_weights.append(self.mid_rnn_h)
        self.mid_tier_model_predictor = self.predictor_mid_rnn(
            self.mid_tier_model_predictor, initial_state=self.mid_rnn_h0)
        self.predictor_mid_rnn.set_weights(self.mid_rnn.get_weights())
        self.mid_tier_model_predictor = TimeDistributed(self.mid_adapter)(
            self.mid_tier_model_predictor)
        self.mid_tier_model_predictor = Reshape(
            (mid_fs, dim))(self.mid_tier_model_predictor)
        self.mid_tier_model_predictor = Model([
            self.mid_tier_model_input_predictor,
            self.mid_tier_model_input_from_slow_tier
        ], self.mid_tier_model_predictor)

        ################################################################################
        ################## Top tier predictor
        ################################################################################

        self.top_predictor_embedding = self.sframe.get_embedding()
        self.top_tier_model_predictor = self.top_predictor_embedding(
            self.top_tier_model_input_predictor)
        self.top_tier_model_predictor = Reshape(
            (1, mid_fs * self.embed_size))(self.top_tier_model_predictor)
        self.top_tier_model_predictor = TimeDistributed(self.top_adapter)(
            self.top_tier_model_predictor)
        self.top_tier_model_predictor = layers.add([
            self.top_tier_model_predictor,
            self.top_tier_model_input_from_mid_tier
        ])

        self.top_tier_model_predictor = TimeDistributed(self.top_tier_mlp_l1)(
            self.top_tier_model_predictor)
        self.top_tier_model_predictor = TimeDistributed(self.top_tier_mlp_l2)(
            self.top_tier_model_predictor)
        self.top_tier_model_predictor = TimeDistributed(self.top_tier_mlp_l3)(
            self.top_tier_model_predictor)

        self.top_tier_model_predictor = Model([
            self.top_tier_model_input_predictor,
            self.top_tier_model_input_from_mid_tier
        ], self.top_tier_model_predictor)

        def categorical_crossentropy(target, output):
            new_target_shape = [
                K.shape(output)[i] for i in xrange(K.ndim(output) - 1)
            ]
            output = K.reshape(output, (-1, self.q_levels))
            xdev = output - K.max(output, axis=1, keepdims=True)
            lsm = xdev - K.log(K.sum(K.exp(xdev), axis=1, keepdims=True))
            cost = -K.sum(lsm * K.reshape(target, (-1, self.q_levels)), axis=1)
            log2e = K.variable(np.float32(np.log2(np.e)))
            return K.reshape(cost, new_target_shape) * log2e

        self.srnn.compile(
            loss=categorical_crossentropy,
            optimizer=keras.optimizers.Adam(clipvalue=1.),
            sample_weight_mode='temporal')
Exemplo n.º 4
0
class SRNN(object):
    def __init__(self,
                 batch_size=BATCH_SIZE,
                 seq_len=SUB_SEQ_LEN,
                 slow_fs=SLOW_FS,
                 slow_dim=SLOW_DIM,
                 dim=DIM,
                 mid_fs=MID_FS,
                 q_levels=Q_LEVELS,
                 mlp_activation='relu'):
        self.weight_norm = True
        self.stateful = True
        self.slow_fs = slow_fs
        self.mid_fs = mid_fs
        self.q_levels = q_levels
        self.dim = dim
        self.slow_dim = slow_dim
        self.batch_size = batch_size
        slow_seq_len = max(1, seq_len // slow_fs)
        mid_seq_len = max(1, seq_len // mid_fs)
        prev_sample_seq_len = seq_len + 1

        ################################################################################
        ################## Model to train
        ################################################################################

        self.slow_tier_model_input = Input(
            batch_shape=(batch_size, slow_seq_len * slow_fs, 1))
        self.slow_tier_model = Lambda(
            lambda x: scale_samples_for_rnn(x, q_levels=q_levels),
            name='slow_scale')(self.slow_tier_model_input)
        self.slow_tier_model = Reshape(
            (slow_seq_len, self.slow_fs),
            name='slow_reshape4rnn')(self.slow_tier_model)

        self.slow_rnn_h = K.variable(
            np.zeros((1, self.slow_dim)), dtype=K.floatx(), name='show_h0')
        self.slow_rnn_h0 = K.tile(self.slow_rnn_h, (batch_size, 1))
        self.mid_rnn_h = K.variable(
            np.zeros((1, self.dim)), dtype=K.floatx(), name='mid_h0')
        self.mid_rnn_h0 = K.tile(self.mid_rnn_h, (batch_size, 1))

        self.state_selector = K.zeros(
            (), dtype=K.floatx(), name='slow_state_mask')
        self.slow_rnn = GruWithWeightNorm(
            slow_dim,
            use_bias=True,
            name='slow_rnn',
            recurrent_activation='sigmoid',
            return_sequences=True,
            stateful=self.stateful,
            state_selector=self.state_selector,
            weight_norm=self.weight_norm)
        self.slow_rnn._trainable_weights.append(self.slow_rnn_h)
        self.slow_tier_model = self.slow_rnn(
            self.slow_tier_model, initial_state=self.slow_rnn_h0)

        # upscale slow rnn output to mid tier ticking freq
        self.slow_tier_model = TimeDistributed(
            DenseWithWeightNorm(dim * slow_fs / mid_fs,
                                weight_norm=self.weight_norm,
                                ), name='slow_project2mid') \
            (self.slow_tier_model)
        self.slow_tier_model = Reshape(
            (mid_seq_len, dim), name='slow_reshape4mid')(self.slow_tier_model)

        self.mid_tier_model_input = Input(
            batch_shape=(batch_size, mid_seq_len * mid_fs, 1))
        self.mid_tier_model = Lambda(
            lambda x: scale_samples_for_rnn(x, q_levels=q_levels),
            name='mid_scale')(self.mid_tier_model_input)
        self.mid_tier_model = Reshape(
            (mid_seq_len, self.mid_fs),
            name='mid_reshape2rnn')(self.mid_tier_model)
        mid_proj = DenseWithWeightNorm(
            dim, name='mid_project2rnn', weight_norm=self.weight_norm)
        self.mid_tier_model = TimeDistributed(
            mid_proj, name='mid_project2rnn')(self.mid_tier_model)
        self.mid_tier_model = layers.add(
            [self.mid_tier_model, self.slow_tier_model])
        self.mid_rnn = GruWithWeightNorm(
            dim,
            name='mid_rnn',
            return_sequences=True,
            recurrent_activation='sigmoid',
            stateful=self.stateful,
            state_selector=self.state_selector)

        self.mid_rnn._trainable_weights.append(self.mid_rnn_h)
        self.mid_tier_model = self.mid_rnn(
            self.mid_tier_model, initial_state=self.mid_rnn_h0)
        self.mid_adapter = DenseWithWeightNorm(
            dim * mid_fs, name='mid_project2top', weight_norm=self.weight_norm)
        self.mid_tier_model = TimeDistributed(
            self.mid_adapter, name='mid_project2top')(self.mid_tier_model)
        self.mid_tier_model = Reshape(
            (mid_seq_len * mid_fs, dim),
            name='mid_reshape4top')(self.mid_tier_model)
        self.embed_size = 256
        self.sframe = SFrame()
        self.top_tier_model_input = self.sframe.build_sframe_model(
            (batch_size, prev_sample_seq_len, 1),
            frame_size=self.mid_fs,
            q_levels=self.q_levels,
            embed_size=self.embed_size)
        self.top_adapter = DenseWithWeightNorm(
            dim,
            use_bias=False,
            name='top_project2mlp',
            kernel_initializer='lecun_uniform',
            weight_norm=self.weight_norm)
        self.top_tier_model = TimeDistributed(
            self.top_adapter,
            name='top_project2mpl')(self.top_tier_model_input.output)

        self.top_tier_model_input_from_mid_tier = Input(
            batch_shape=(batch_size, 1, dim))
        self.top_tier_model_input_predictor = Input(
            batch_shape=(batch_size, mid_fs, 1))
        self.top_tier_model = layers.add(
            [self.mid_tier_model, self.top_tier_model])

        self.top_tier_mlp_l1 = DenseWithWeightNorm(
            dim,
            activation=mlp_activation,
            name='mlp_1',
            weight_norm=self.weight_norm)
        self.top_tier_mlp_l2 = DenseWithWeightNorm(
            dim,
            activation=mlp_activation,
            name='mlp_2',
            weight_norm=self.weight_norm)
        self.top_tier_mlp_l3 = DenseWithWeightNorm(
            q_levels,
            kernel_initializer='lecun_uniform',
            name='mlp_3',
            weight_norm=self.weight_norm)

        self.top_tier_model = TimeDistributed(
            self.top_tier_mlp_l1, name='mlp_1')(self.top_tier_model)
        self.top_tier_model = TimeDistributed(
            self.top_tier_mlp_l2, name='mlp_2')(self.top_tier_model)
        self.top_tier_model = TimeDistributed(
            self.top_tier_mlp_l3, name='mlp_3')(self.top_tier_model)

        self.mid_tier_model_input_from_slow_tier = Input(
            batch_shape=(batch_size, 1, dim))
        self.mid_tier_model_input_predictor = Input(
            batch_shape=(batch_size, mid_fs, 1))

        self.srnn = Model([
            self.slow_tier_model_input, self.mid_tier_model_input,
            self.top_tier_model_input.input
        ], self.top_tier_model)

        ################################################################################
        ################## Model to sample from (predictor)
        ################################################################################

        ################################################################################
        ################## Slow tier predictor
        ################################################################################
        self.slow_tier_model_predictor = Model(
            inputs=self.slow_tier_model_input, outputs=self.slow_tier_model)

        ################################################################################
        ################## Mid tier predictor
        ################################################################################

        self.mid_tier_model_predictor = Lambda(
            lambda x: scale_samples_for_rnn(x, q_levels=q_levels))(
                self.mid_tier_model_input_predictor)
        self.mid_tier_model_predictor = Reshape(
            (1, self.mid_fs))(self.mid_tier_model_predictor)
        self.mid_tier_model_predictor = TimeDistributed(mid_proj)(
            self.mid_tier_model_predictor)
        self.mid_tier_model_predictor = layers.add([
            self.mid_tier_model_predictor,
            self.mid_tier_model_input_from_slow_tier
        ])
        """ Creating new layer instead of sharing it with the model to train
        due to https://github.com/keras-team/keras/issues/6939
        Sharing statefull layers gives a crosstalk
        """
        self.predictor_mid_rnn = GruWithWeightNorm(
            self.dim,
            name='mid_rnn_pred',
            return_sequences=True,
            recurrent_activation='sigmoid',
            stateful=self.stateful,
            state_selector=self.state_selector)
        self.predictor_mid_rnn._trainable_weights.append(self.mid_rnn_h)
        self.mid_tier_model_predictor = self.predictor_mid_rnn(
            self.mid_tier_model_predictor, initial_state=self.mid_rnn_h0)
        self.predictor_mid_rnn.set_weights(self.mid_rnn.get_weights())
        self.mid_tier_model_predictor = TimeDistributed(self.mid_adapter)(
            self.mid_tier_model_predictor)
        self.mid_tier_model_predictor = Reshape(
            (mid_fs, dim))(self.mid_tier_model_predictor)
        self.mid_tier_model_predictor = Model([
            self.mid_tier_model_input_predictor,
            self.mid_tier_model_input_from_slow_tier
        ], self.mid_tier_model_predictor)

        ################################################################################
        ################## Top tier predictor
        ################################################################################

        self.top_predictor_embedding = self.sframe.get_embedding()
        self.top_tier_model_predictor = self.top_predictor_embedding(
            self.top_tier_model_input_predictor)
        self.top_tier_model_predictor = Reshape(
            (1, mid_fs * self.embed_size))(self.top_tier_model_predictor)
        self.top_tier_model_predictor = TimeDistributed(self.top_adapter)(
            self.top_tier_model_predictor)
        self.top_tier_model_predictor = layers.add([
            self.top_tier_model_predictor,
            self.top_tier_model_input_from_mid_tier
        ])

        self.top_tier_model_predictor = TimeDistributed(self.top_tier_mlp_l1)(
            self.top_tier_model_predictor)
        self.top_tier_model_predictor = TimeDistributed(self.top_tier_mlp_l2)(
            self.top_tier_model_predictor)
        self.top_tier_model_predictor = TimeDistributed(self.top_tier_mlp_l3)(
            self.top_tier_model_predictor)

        self.top_tier_model_predictor = Model([
            self.top_tier_model_input_predictor,
            self.top_tier_model_input_from_mid_tier
        ], self.top_tier_model_predictor)

        def categorical_crossentropy(target, output):
            new_target_shape = [
                K.shape(output)[i] for i in xrange(K.ndim(output) - 1)
            ]
            output = K.reshape(output, (-1, self.q_levels))
            xdev = output - K.max(output, axis=1, keepdims=True)
            lsm = xdev - K.log(K.sum(K.exp(xdev), axis=1, keepdims=True))
            cost = -K.sum(lsm * K.reshape(target, (-1, self.q_levels)), axis=1)
            log2e = K.variable(np.float32(np.log2(np.e)))
            return K.reshape(cost, new_target_shape) * log2e

        self.srnn.compile(
            loss=categorical_crossentropy,
            optimizer=keras.optimizers.Adam(clipvalue=1.),
            sample_weight_mode='temporal')

    def set_h0_selector(self, use_learned_h0):
        if use_learned_h0:
            self.srnn.reset_states()
            self.slow_rnn.reset_states()
            self.mid_rnn.reset_states()
            self.slow_tier_model_predictor.reset_states()
            self.mid_tier_model_predictor.reset_states()
            K.set_value(self.state_selector, np.ones(()))
        else:
            K.set_value(self.state_selector, np.zeros(()))

    def save_weights(self, file_name):
        self.srnn.save_weights(file_name)

    def load_weights(self, file_name):
        self.srnn.load_weights(file_name)
        self.predictor_mid_rnn.set_weights(self.mid_rnn.get_weights())

    def numpy_one_hot(self, labels_dense, n_classes):
        """Convert class labels from scalars to one-hot vectors."""
        labels_shape = labels_dense.shape[:-1]
        labels_dtype = labels_dense.dtype
        labels_dense = labels_dense.ravel().astype("int32")
        n_labels = labels_dense.shape[0]
        index_offset = np.arange(n_labels) * n_classes
        labels_one_hot = np.zeros((n_labels, n_classes))
        labels_one_hot[np.arange(n_labels).astype("int32"),
                       labels_dense.ravel()] = 1
        labels_one_hot = labels_one_hot.reshape(labels_shape + (n_classes, ))
        return labels_one_hot.astype(labels_dtype)

    def _prep_batch(self, x, mask):
        x_slow = x[:, :-self.slow_fs]
        x_mid = x[:, self.slow_fs - self.mid_fs:-self.mid_fs]
        x_prev = x[:, self.slow_fs - self.mid_fs:-1]
        target = x[:, self.slow_fs:]
        target = self.numpy_one_hot(target, self.q_levels)
        if mask is None:
            mask = np.ones((x.shape[0], x.shape[1]))
        target_mask = mask[:, self.slow_fs:]
        return x_slow, x_mid, x_prev, target, target_mask

    def train_on_batch(self, x, mask=None):
        x_slow, x_mid, x_prev, target, target_mask = self._prep_batch(x, mask)

        return self.model().train_on_batch(
            [x_slow, x_mid, x_prev], target, sample_weight=target_mask)

    def predict_on_batch(self, x, mask=None):
        x_slow, x_mid, x_prev, target, target_mask = self._prep_batch(x, mask)
        return self.model().predict_on_batch([x_slow, x_mid, x_prev])

    def test_on_batch(self, x, mask=None):
        x_slow, x_mid, x_prev, target, target_mask = self._prep_batch(x, mask)
        return self.model().test_on_batch(
            [x_slow, x_mid, x_prev], target, sample_weight=target_mask)

    def model(self):
        return self.srnn

    def numpy_sample_softmax2d(self, coeff, random_state, debug=False):
        if coeff.ndim > 2:
            raise ValueError("Unsupported dim")
        if debug:
            idx = coeff.argmax(axis=1)
        else:
            # renormalize to avoid numpy errors about summation...
            coeff = coeff / (coeff.sum(axis=1, keepdims=True) + 1E-6)
            idxs = [
                np.argmax(random_state.multinomial(1, pvals=coeff[i]))
                for i in range(len(coeff))
            ]
            idx = np.array(idxs)
        return idx.astype(K.floatx())

    def numpy_sample_softmax(self, logits, random_state, debug=False):
        old_shape = logits.shape
        flattened_logits = logits.reshape((-1, logits.shape[logits.ndim - 1]))
        new_shape = list(old_shape)
        new_shape[-1] = 1
        samples = self.numpy_sample_softmax2d(flattened_logits, random_state,
                                              debug).reshape(new_shape)
        return samples

    def numpy_softmax(self, X, temperature=1.):
        # should work for both 2D and 3D
        dim = X.ndim
        X = X / temperature
        e_X = np.exp((X - X.max(axis=dim - 1, keepdims=True)))
        out = e_X / e_X.sum(axis=dim - 1, keepdims=True)
        return out

    def sample(self, ts, random_state, debug):
        samples = np.zeros((1, ts, 1), dtype='int32')
        Q_ZERO = self.q_levels // 2
        samples[:, :self.slow_fs] = Q_ZERO
        big_frame_level_outputs = None
        frame_level_outputs = None
        self.set_h0_selector(False)

        for t in xrange(self.slow_fs, ts):
            if t % self.slow_fs == 0:
                big_frame_level_outputs = self.slow_tier_model_predictor. \
                    predict_on_batch([samples[:, t-self.slow_fs:t,:]])

            if t % self.mid_fs == 0:
                frame_level_outputs = self.mid_tier_model_predictor. \
                    predict_on_batch([samples[:, t-self.mid_fs:t],
                                      big_frame_level_outputs[:, (t / self.mid_fs) % (self.slow_fs / self.mid_fs)][:,np.newaxis,:]])

            sample_prob = self.top_tier_model_predictor. \
                predict_on_batch([samples[:, t-self.mid_fs:t],
                                  frame_level_outputs[:, t % self.mid_fs][:,np.newaxis,:]])
            sample_prob = self.numpy_softmax(sample_prob)
            samples[:, t] = self.numpy_sample_softmax(
                sample_prob, random_state, debug=debug > 0)
        return samples[0].astype('float32')
Exemplo n.º 5
0
        profile = webdriver.FirefoxProfile()
        profile.set_preference('browser.download.folderList', 2)
        profile.set_preference('browser.download.dir', symlink_path)
        profile.set_preference('browser.helperApps.neverAsk.saveToDisk',
                               'text/html,application/pdf,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
        profile.set_preference('pdfjs.disabled', True)

        browser = webdriver.Firefox(profile)
        #  load classifier
        lr_clf = joblib.load('./model/pfpj_classifier.pkl')

        totalprocessos = 0
        totalerros = 0

        seeds = SFrame.read_csv('seedSP.csv', verbose=False, column_type_hints=[str, str, int])
        del seeds['Seed']

        if hasattr(args, 'a') and args.a:
            fh = open(args.a, 'r')
            numprocessos, numerro = [buscaprocesso(busca) for busca in fh.readlines()]
            fh.close
            totalprocessos += numprocessos
            totalerros += numerro
        else:
            buscas = args.q
            totalprocessos, totalerros = buscaprocesso(buscas)
            totalbuscas = 1

        print("Parsing has been done")
        print('Total de erros / processos: %d / %d:' % (totalerros, totalprocessos))
            *z : pre-activation function
        ** return (Float value) **
    """
    return sigmoid(z) * (1 - sigmoid(z))

def sigmoid(z):
    """
        Compute the sigmoid function
        ** input : **
            *z : pre-activation function
        ** return (Float value) from O to 1  **
    """
    return 1 / (1 + np.exp(-z))

if __name__ == '__main__':
    dataset = SFrame.read_csv("adult.csv")

    CATEGORY_KEYS = ["workclass", "education", "marital-status", "occupation", "relationship", "race", "gender", "native-country"]
    CONTINU_KEYS = ["capital-gain", "fnlwgt", "hours-per-week", "age", "capital-loss", "educational-num"]

    # Process nonlinear columns
    dataset = columns_to_category(dataset, CATEGORY_KEYS)
    # Process linear columns
    dataset = columns_to_normalize(dataset, CONTINU_KEYS)
    # Convert the output from string to binary
    dataset["income"] = dataset["income"].apply(lambda x : 1. if x == ">50K" else 0.)

    keys = CATEGORY_KEYS + CONTINU_KEYS + ["income"]
    features = []
    # Create the features matrix
    for line in dataset:
from sframe import SFrame
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression


products = SFrame('review.csv')

from string import punctuation

def remove_punctuation(text):
        return text.translate(punctuation)

products['review_clean'] = products['text'].apply(remove_punctuation)

products = products[products['stars'] != 3]

products['sentiment'] = products['stars'].apply(lambda r: +1 if r>3 else -1)

train_data, test_data = products.random_split(.8, seed=1)

vectorizer = CountVectorizer(token_pattern=r'\b\w+\b')

train_matrix = vectorizer.fit_transform(train_data['review_clean'])
test_matrix = vectorizer.transform(test_data['review_clean'])
print(test_matrix[0])


model = LogisticRegression()
model.fit(train_matrix, train_data['sentiment'])

sample_test_matrix = vectorizer.transform(['ammazing wow wow'])
Exemplo n.º 8
0
    """
        Shuffle the two lists keeping the order
        ** input : **
            *features : numpy array of features
            *targets : numpy vector of targets
        ** return (numpy array of features, numpy vector of targets) **
    """
    c = list(zip(features.tolist(), targets.tolist()))
    random.shuffle(c)
    features[:], targets[:] = zip(*c)
    return np.array(features), np.array(targets)


if __name__ == '__main__':
    # Load both csv with sframe
    train_data = SFrame.read_csv("train.csv")
    test_data = SFrame.read_csv("test.csv")

    test_data["Survived"] = -1
    # We add a new columns for each csv to be abel to differentiate them later
    train_data["type"] = "train"
    test_data["type"] = "test"
    # We now can merge the two csv together
    data = train_data.append(test_data)

    # We extract features and targets from the csv
    train_features, train_targets, test_features = process_csv(data)

    # We initialize all variables. The weight is a one dimensional vector (one weight per feature)
    weights = np.random.randn(train_features.shape[1])
    # The bias
Exemplo n.º 9
0
)  # run at the start of every ipython notebook to use plotly.offline
# this injects the plotly.js source files into the notebook
#--------------------------------------------------
# %matplotlib inline
# import matplotlib.pyplot as plt
# import seaborn as sns
#--------------------------------------------------

# ---
# # Read data into SFrames

# In[4]:

usersSF = SFrame.read_csv("%s/users.dat" % DATADIR,
                          delimiter='::',
                          header=False,
                          verbose=False,
                          column_type_hints=[int, str, int, int, str])
usersSF = usersSF.rename({
    'X1': 'UserID',
    'X2': 'Gender',
    'X3': 'Age',
    'X4': 'Occupation',
    'X5': 'ZipCode',
})
usersDescSF = dict(zip(usersSF.column_names(), usersSF.column_types()))
print usersDescSF

# In[5]:

ratingsSF = SFrame.read_csv("%s/ratings.dat" % DATADIR,