Exemplo n.º 1
0
custom_vgg_model.fit_generator(
        train_generator,
        steps_per_epoch=6400// batch_size,
        epochs=10,
        validation_data=validation_generator,
        validation_steps=6400 // batch_size)


# In[11]:


json_string = custom_vgg_model.to_json()  
open('custom_vgg_model.json','w').write(json_string)  
custom_vgg_model.save_weights('custom_vgg_model_weights.h5') 
a=custom_vgg_model.evaluate_generator( test_generator, len(test_generator.filenames))


# In[12]:


a


# In[13]:


custom_vgg_model.save('fcnn_model.h5') 


# In[140]:
Exemplo n.º 2
0
# Other options:rotation_range, height_shift_range, featurewise_center, vertical_flip, featurewise_std_normalization...
# Also you can give a function as an argument to apply to every iamge


# this is the augmentation configuration we will use for testing:
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_img)



# Generator of images from the data folder
train_generator = train_datagen.flow_from_directory(train_data_dir, target_size=(224, 224),
                                                    batch_size=batch_size, class_mode='categorical', shuffle=True)

validation_generator = test_datagen.flow_from_directory(validation_data_dir, target_size=(224, 224),
                                                        batch_size=(batch_size / 2), class_mode='categorical',
                                                        shuffle=True)

# train the model on the new data for a few epochs


model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size, epochs=epochs,
                    validation_data=validation_generator, validation_steps=nb_validation_samples // batch_size)


score = model.evaluate_generator(validation_generator, nb_validation_samples)
model.save( 'weights2.h5')
# model.save('train_255.h5')
# testear  el primero nrmal y el segudno normal y quitando media
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Exemplo n.º 3
0
class LanguageModel(object):
    def __init__(self, igor):
        now = datetime.now()
        self.run_name = "fergusr_{}mo_{}day_{}hr_{}min".format(now.month, now.day, 
                                                                now.hour, now.minute)
        log_location = join(igor.log_dir, self.run_name+".log")
        self.logger = igor.logger = make_logger(igor, log_location)
        self.igor = igor
        
    @classmethod
    def from_config(cls, config):
        igor = Igor(config)
        igor.prep()
        model = cls(igor)
        model.make()
        return model


    def make(self):

        B = self.igor.batch_size
        R = self.igor.rnn_size
        S = self.igor.max_sequence_len
        V = self.igor.vocab_size
        E = self.igor.embedding_size
        emb_W = self.igor.embeddings.astype(K.floatx())
        
        ## dropout parameters
        p_emb = self.igor.p_emb_dropout
        p_W = self.igor.p_W_dropout
        p_U = self.igor.p_U_dropout
        p_dense = self.igor.p_dense_dropout
        w_decay = self.igor.weight_decay

        def embedding_parameters():
            return {"W_regularizer": l2(w_decay),
                    "weights": [emb_W],
                    "mask_zero": True,
                    "dropout": p_emb}
                    
        def sequence_parameters():
            return {"return_sequences": True,
                    "dropout_W": p_W,
                    "dropout_U": p_U,
                    "U_regularizer": l2(w_decay),
                    "W_regularizer": l2(w_decay)}
        def predict_parameters():
            return {"activation": 'softmax',
                    "W_regularizer": l2(w_decay),
                    "b_regularizer": l2(w_decay)}
                    
        F_embed = Embedding(V, E, **embedding_parameters())
        F_seq1 = LSTM(R, **sequence_parameters())
        F_seq2 = LSTM(R*int(1/p_dense), **sequence_parameters())
        F_drop = Dropout(p_dense)
        F_predict = Distribute(Dense(V, **predict_parameters()))
        

        words_in = Input(batch_shape=(B,S), dtype='int32')
        predictions = compose(F_predict,
                              F_drop,
                              F_seq2,
                              F_drop,
                              F_seq1,
                              F_embed)(words_in)
        
        #self.F_p = K.Function([words_in, K.learning_phase()], predictions)

        optimizer = Adam(self.igor.LR, clipnorm=self.igor.max_grad_norm, 
                                       clipvalue=self.igor.max_grad_value)
        self.model = Model(input=[words_in], 
                           output=[predictions])
        self.model.compile(loss='categorical_crossentropy', 
                           optimizer=optimizer, 
                           metrics=['accuracy', 'perplexity'])

        if self.igor.from_checkpoint:
            self.load_checkpoint_weights()
            
    def load_checkpoint_weights(self):
        weight_file = join(self.igor.model_location, 
                           self.igor.saving_prefix,
                           self.igor.checkpoint_weights)
        if exists(weight_file):
            self.logger.info("+ Loading checkpoint weights")
            self.model.load_weights(weight_file, by_name=True)
        else:
            self.logger.warning("- Checkpoint weights do not exist; {}".format(weight_file))

    def train(self):
        train_data = self.igor.train_gen(forever=True)
        dev_data = self.igor.dev_gen(forever=True)
        N = self.igor.num_train_samples 
        E = self.igor.num_epochs
        # generator, samplers per epoch, number epochs
        callbacks = [ProgbarV2(3, 10)]
        checkpoint_fp = join(self.igor.model_location,
                             self.igor.saving_prefix,
                             self.igor.checkpoint_weights)
        self.logger.info("+ Model Checkpoint: {}".format(checkpoint_fp))
        callbacks += [ModelCheckpoint(filepath=checkpoint_fp, verbose=1, save_best_only=True)]
        callbacks += [LearningRateScheduler(lambda epoch: self.igor.LR * 0.95 ** (epoch % 15))]
        self.model.fit_generator(generator=train_data, samples_per_epoch=N, nb_epoch=E,
                                 callbacks=callbacks, verbose=1,
                                 validation_data=dev_data,
                                 nb_val_samples=self.igor.num_dev_samples)

    def test(self, num_samples=None):
        num_samples = num_samples or 100
        test_data = self.igor.test_gen()
        out = self.model.evaluate_generator(test_data, num_samples)
        try: 
            for o, label in zip(out, self.model.metric_names):
                print("{}: {}".format(o, label))
        except Exception as e:
            print("some sort of error.. {}".format(e))
            import pdb
            pdb.set_trace()

    def format_sentence(self, sentence):
        ''' turn into indices here '''
        if not isinstance(sentence, list):
            sentence = sentence.split(" ")
        sentence = [self.igor.vocabs.words[w] for w in sentence]

        in_X = np.zeros(self.max_sequence_len)
        out_Y = np.zeros(self.max_sequence_len, dtype=np.int32)
        bigram_data = zip(sentence[0:-1], sentence[1:])
        for datum_j,(datum_in, datum_out) in enumerate(bigram_data):
            in_X[datum_j] = datum_in
            out_Y[datum_j] = datum_out
        return in_X, out_Y
    
    def eval_sentence(self, sentence):
        X, y = self.format_sentence(sentence)
        yout = self.F_p([X[None,:]]+[0.])
        yout = yout[0]
        return X, y, yout

    def sample(self):
        L = self.igor.train_vocab.lookup
        for dev_datum in self.igor.dev_gen():
            X, y = dev_datum # X.shape = (b,s); y.shape = (b,s,V)
            Px = self.model.predict_proba(X) # Px.shape = (b,s,V)
            for i in range(X.shape[0]): 
                w_in = []
                w_true = []
                w_tprob = []
                w_pprob = []
                w_pred = []
                for j in range(X.shape[1]):
                    if L(X[i][j]) == "<MASK>": continue
                    w_in.append(L(X[i][j]))
                    w_true.append(L(y[i][j].argmax()))
                    w_pred.append(L(Px[i][j].argmax()))
                    w_tprob.append(Px[i][j][y[i][j].argmax()])
                    w_pprob.append(Px[i][j].max())

                n = max([len(w) for w in w_true+w_pred]) + 6

                for wt,wi,pwt,wp,pwp in zip(w_true, w_in, w_tprob, w_pred,w_pprob):
                    s = "|\t{:0.6f}\t|{:>%d} => {:<%d}|{:^%d}|\t{:0.6f}\t|" % (n,n,n)
                    print(s.format(pwt, wi,wt, wp, pwp))
            
                perp = 2**(-sum([log(p,2) for p in w_tprob]) / (len(w_tprob)-1))
                print("Per word perplexity of sentence: {:0.3f}".format(perp))

                prompt = input("<enter to continue, y to enter pdb, exit to exit>")
                if prompt == "y":
                    import pdb
                    pdb.set_trace()
                elif prompt == "exit":
                    import sys
                    sys.exit(0)

    def examine(self):
        L = self.igor.train_vocab.lookup
        sent_ppls = []
        sent_lls = []
        count = 0
        for dev_datum in self.igor.dev_gen(False):
            X, y = dev_datum # X.shape = (b,s); y.shape = (b,s,V)
            Px = self.model.predict_proba(X) # Px.shape = (b,s,V)
            for i in range(X.shape[0]): 
                word_probs = []
                for j in range(X.shape[1]):
                    if L(X[i][j]) == "<MASK>": continue    
                    word_probs.append(Px[i][j][y[i][j].argmax()])            
                perp = 2**(-sum([log(p,2) for p in word_probs]) / (len(word_probs)-1))
                sent_lls.append(-sum([log(p,2) for p in word_probs]))
                count += len(word_probs)
                sent_ppls.append(perp)
        with open("ppls.pkl", "w") as fp:
            pickle.dump(sent_ppls, fp)

        print("PERPLEXITIES")
        print("Mean: {}".format(np.mean(sent_ppls)))
        print("Median: {}".format(np.median(sent_ppls)))


        
        ent = sum(sent_lls) / (count-1.0)
        print("from sent lls and then calculated after: {:0.5f}".format(2**ent))

        
        plot = plt.hist(sent_ppls, bins=20)
        plt.show()
Exemplo n.º 4
0
    #################################################################################################################################################################################################################
    # COMPILE, FINE-TUNE/TRAIN AND EVALUATE THE MODEL
    #################################################################################################################################################################################################################
    model.compile(loss=CLASS_MODE + "_crossentropy",
                  optimizer="adam",
                  metrics=["accuracy"])
    K.set_value(model.optimizer.lr, LEARNING_RATE)
    model.fit_generator(train_it,
                        shuffle=True,
                        epochs=NUM_EPOCHS,
                        steps_per_epoch=int(TOT_TRAINING_IMAGES / BATCH_SIZE),
                        validation_data=val_it,
                        validation_steps=int(TOT_VALIDATION_IMAGES /
                                             BATCH_SIZE))

    scores = model.evaluate_generator(test_it, steps=24)

    print("Accuracy: %.2f%%" % (scores[1] * 100))

    ################################################################################################################################################################
    # SAVE THE CURRENT SESSION AND THE FINAL MODEL
    ################################################################################################################################################################
    saver = tf.train.Saver()
    sess = K.get_session()
    saver.save(sess, SAVE_DIR + timestamp + "/session.ckpt")

    if (WEIGHTS == "vggface"):
        model.save(SAVE_DIR + timestamp +
                   "/vgg_face_2_side_by_side_resnet50_" +
                   str(round(scores[1] * 100, 2)).replace(".", ",") + ".h5")
    else: