def read_dataset(image_dir: str = IMAGE_DIR, dump: bool = True, **kwargs):
    """ Read and resize images
    Save all the data in:
        TRAIN_X - pixels
        TRAIN_Y - labels
    """
    logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = TensorBoard(log_dir=logdir)

    base_model = InceptionV3(include_top=False,
                             weights='imagenet',
                             input_shape=(WIDHT, HEIGHT, 3))
    for layer in base_model.layers:
        layer.trainable = False

    model = Sequential()
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(LABEL_SIZE, activation='softmax'))
    model.compile(
        loss='categorical_crossentropy',
        optimizer='rmsprop',
        metrics=['accuracy'],
    )

    #  def define_label(parent_name):
    #      return "-".join(parent_name.split('-')[1:])

    #  count = 0
    #  for subdir, dirs, files in os.walk(image_dir):
    #      print(f'PATH: {subdir} is processing')
    #      count += 1
    #      for file in files:
    #          path = pathlib.Path(subdir).absolute() / file
    #          image = load_img(str(path), target_size=WH)
    #          TRAIN_X.append(np.array(image))

    #          image_label = define_label(path.parent.name)
    #          TRAIN_Y.append(image_label)

    #  label_encoder = LabelEncoder()
    #  TRAIN_Y = label_encoder.fit_transform(TRAIN_Y)
    #  TRAIN_Y = np.array(to_categorical(TRAIN_Y, num_classes=LABEL_SIZE))

    #  x_train, x_test, y_train, y_test = train_test_split(
    #      np.array(TRAIN_X),
    #      TRAIN_Y,
    #      test_size=0.2,
    #      random_state=69,
    #  )

    train_generator = augs_gen.flow_from_directory(
        directory=IMAGE_DIR,
        target_size=WH,
        batch_size=BATCH_SIZE,
        seed=1,
        shuffle=True,
        subset='training',
    )
    test_generator = augs_gen.flow_from_directory(
        directory=IMAGE_DIR,
        target_size=WH,
        batch_size=BATCH_SIZE,
        seed=1,
        shuffle=True,
        subset='validation',
    )

    labels = (train_generator.class_indices)
    labels = dict((v, k) for k, v in labels.items())

    with open(DATA_DIR / 'generator_labels.dump', 'wb') as file:
        pickle.dump(labels, file)

    model.fit_generator(
        train_generator,
        validation_data=test_generator,
        steps_per_epoch=train_generator.samples // BATCH_SIZE,
        validation_steps=test_generator.samples // BATCH_SIZE,
        epochs=EPOCHS,
        verbose=1,
        callbacks=[tensorboard_callback],
    )
    print('Prepare to write data on the disk')

    model.save(f'{model_name}_without_iter.dump')
Exemplo n.º 2
0
def fit(image_dir: str = IMAGE_DIR, dump: bool = True, **kwargs):
    """ Read and resize images
    Save all the data in:
        TRAIN_X - pixels
        TRAIN_Y - labels
    """
    logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = TensorBoard(log_dir=logdir)

    model_checkpoint = ModelCheckpoint(
        str(MODELS_DIR /
            'weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5'),
        monitor='val_loss',
        verbose=0,
        save_best_only=False,
        save_weights_only=False,
        mode='auto',
        period=1)

    reduce_lron = ReduceLROnPlateau(monitor='val_loss',
                                    factor=0.1,
                                    patience=3,
                                    verbose=1,
                                    mode='auto')

    base_model = InceptionV3(include_top=False,
                             weights='imagenet',
                             input_shape=(WIDHT, HEIGHT, 3))
    for layer in base_model.layers:
        layer.trainable = False

    model = Sequential()
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(1024, activation='relu', kernel_regularizer=l2(0.0001)))
    model.add(Dense(LABEL_SIZE, activation='softmax'))
    model.compile(
        loss='categorical_crossentropy',
        optimizer='rmsprop',
        metrics=['accuracy'],
    )

    train_generator = augs_gen.flow_from_directory(
        directory=IMAGE_DIR,
        target_size=WH,
        batch_size=BATCH_SIZE,
        seed=1,
        shuffle=True,
        subset='training',
    )
    test_generator = augs_gen.flow_from_directory(
        directory=IMAGE_DIR,
        target_size=WH,
        batch_size=BATCH_SIZE,
        seed=1,
        shuffle=True,
        subset='validation',
    )
    labels = (train_generator.class_indices)
    labels = dict((v, k) for k, v in labels.items())

    with open(DATA_DIR / 'generator_labels.dump', 'wb') as file:
        pickle.dump(labels, file)

    with graph.as_default():
        model.fit_generator(
            train_generator,
            validation_data=test_generator,
            steps_per_epoch=train_generator.samples // BATCH_SIZE,
            validation_steps=test_generator.samples // BATCH_SIZE,
            epochs=EPOCHS,
            verbose=1,
            callbacks=[tensorboard_callback, model_checkpoint, reduce_lron],
        )
    print('Prepare to write data on the disk')

    model.save(f'{model_name}.dump')
def read_dataset(image_dir: str = IMAGE_DIR, dump: bool = True, **kwargs):
    """ Read and resize images
    Save all the data in:
        TRAIN_X - pixels
        TRAIN_Y - labels
    """
    global TRAIN_X, TRAIN_Y
    logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = TensorBoard(log_dir=logdir)

    base_model = InceptionV3(include_top=False,
                             weights='imagenet',
                             input_shape=(WIDHT, HEIGHT, 3))
    for layer in base_model.layers:
        layer.trainable = False

    model = Sequential()
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    #  model.add(Dense(512, activation='relu'))
    model.add(Dense(LABEL_SIZE, activation='softmax'))
    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy'],
    )

    def define_label(parent_name):
        return "-".join(parent_name.split('-')[1:])

    for subdir, dirs, files in os.walk(image_dir):
        for file in files:
            path = pathlib.Path(subdir).absolute() / file
            image_label = define_label(path.parent.name)
            TRAIN_Y.append(image_label)

    label_encoder = LabelEncoder()
    TRAIN_Y = label_encoder.fit_transform(TRAIN_Y)
    TRAIN_Y = np.array(to_categorical(TRAIN_Y, num_classes=LABEL_SIZE))

    count = 0
    current_length_train_x = 0

    for subdir, dirs, files in os.walk(image_dir):
        print(f'PATH: {subdir} is processing')
        count += 1
        for file in files:
            path = pathlib.Path(subdir).absolute() / file
            image = load_img(str(path), target_size=WH)
            TRAIN_X.append(np.array(image))

        if count % 40 == 0:
            slice_left = current_length_train_x
            slice_right = slice_left + len(TRAIN_X)
            current_length_train_x = slice_right
            # convert to binary matrix (120 labels at all) 2^10 = 128
            # normalize image
            # split image

            # TODO: make active on resume iterations
            #  if count == 40:
            #      # make empty
            #      TRAIN_X = []
            #      model = load_model(f'{model_name}_iter_40.dump')
            #      continue

            x_train, x_test, y_train, y_test = train_test_split(
                np.array(TRAIN_X),
                TRAIN_Y[slice_left:slice_right],
                test_size=0.2,
                random_state=69,
            )

            # make empty
            TRAIN_X = []

            augs_gen.fit(x_train)
            model.fit_generator(
                augs_gen.flow(x_train, y_train, batch_size=25),
                validation_data=(x_test, y_test),
                validation_steps=1000,
                steps_per_epoch=1000,
                epochs=20,
                verbose=1,
                callbacks=[tensorboard_callback],
            )
            del x_train, x_test, y_train, y_test
            model.save(f'{model_name}_iter_{count}.dump')

        print(f'Executed {count} / 121')
    print('Prepare to write data on the disk')