Пример #1
0
def split_data():
    print("Data splitting . . ", end="")
    data = pd.read_table(helper.root_dir() + '/data/data.txt', sep="*")

    train_data, test_data = train_test_split(data, test_size=0.1, shuffle=True)

    train_data.to_csv(helper.root_dir() + '/data/train.txt',
                      sep="*",
                      index=None)
    test_data.to_csv(helper.root_dir() + '/data/test.txt', sep="*", index=None)
    print("Done. Data saved to data folder. Test: {} rows. Train: {} rows.".
          format(train_data.shape[0], test_data.shape[0]))
Пример #2
0
def run(epochs=50, batch_size=256):
    data_path = helper.root_dir() + '/data/'

    generator = Generator(batch_size=batch_size)
    num_training_samples = generator.training_dataset.shape[0]
    num_validation_samples = generator.validation_dataset.shape[0]

    print('Number of training samples:', num_training_samples)
    print('Number of validation samples:', num_validation_samples)

    nn = model.generate(max_token_length=generator.MAX_TOKEN_LENGTH,
                        vocabulary_size=generator.VOCABULARY_SIZE)
    nn.compile(loss='categorical_crossentropy', optimizer='adam')

    models_path = data_path + '/weights/' + datetime.now().strftime(
        '%Y-%m-%d-%H-%M')
    os.makedirs(models_path)
    model_names = (models_path + '/nn_weights.{epoch:02d}-{val_loss:.2f}.hdf5')
    model_checkpoint = ModelCheckpoint(model_names,
                                       monitor='loss',
                                       verbose=1,
                                       save_best_only=True,
                                       mode='min')
    tboard = TensorBoard(log_dir=data_path + '/tboard/' +
                         datetime.now().strftime('%Y-%m-%d-%H-%M'))

    callbacks = [model_checkpoint, tboard]

    nn.fit_generator(generator=generator.flow(train=True),
                     epochs=epochs,
                     steps_per_epoch=int(num_training_samples / batch_size),
                     validation_data=generator.flow(train=False),
                     validation_steps=int(num_validation_samples / batch_size),
                     callbacks=callbacks,
                     verbose=1)
Пример #3
0
 def __init__(self):
     self.image_model = InceptionV3(weights='imagenet')
     self.generator = Generator(batch_size=32)
     self.nn = model.generate(
         max_token_length=self.generator.MAX_TOKEN_LENGTH,
         vocabulary_size=self.generator.VOCABULARY_SIZE)
     data_path = helper.root_dir() + '/data/'
     models_path = data_path + '/weights/2018-03-25-14-18'
     model_names = (models_path + '/nn_weights.25-2.94.hdf5')
     self.nn.load_weights(model_names)
Пример #4
0
 def __init__(self, run_inception=False, word_threshold=-1):
     self.root_path = helper.root_dir()
     self.word_threshold = word_threshold
     self.max_caption_length = 0
     self.run_inception = run_inception
     self.IMG_FEATURES = 1000  # inception model
     self.BOS = '<S>'  # Beginning Of Sentence
     self.EOS = '<E>'  # End Of Sentence
     self.PAD = '<P>'
     self.word_frequencies = None
     self.captions = None
     self.image_files = None
     self.image_features = None
     self.word_to_id = None
     self.id_to_word = None
     self.extracted_features = None
     self.features_file_names = None
     self.image_feature_files = None
     self.vocabulary_size = 0
Пример #5
0
    def __init__(self, batch_size=1):
        self.data_path = helper.root_dir() + '/data/'
        self.dictionary = None
        self.training_dataset = None
        self.validation_dataset = None
        self.image_names_to_features = None

        data_logs = np.genfromtxt(self.data_path + 'data_parameters.log', delimiter=' ', dtype='str')
        data_logs = dict(zip(data_logs[:, 0], data_logs[:, 1]))

        self.MAX_TOKEN_LENGTH = int(data_logs['max_caption_length:']) + 2
        self.IMG_FEATS = int(data_logs['IMG_FEATS:'])
        self.BOS = str(data_logs['BOS:'])
        self.EOS = str(data_logs['EOS:'])
        self.PAD = str(data_logs['PAD:'])
        self.VOCABULARY_SIZE = None
        self.word_to_id = None
        self.id_to_word = None
        self.BATCH_SIZE = batch_size

        self._load_dataset()
        self._load_vocabulary()
        self._load_image_features()