Пример #1
0
def review_model(test_dataflow, test_imgreview, model, history, model_id,
                 imgproc, image_data_pipeline):
    """ Model diagnostics written to disk; performs prediction """

    model_name = '{}_{}'.format(model_id, imgproc)
    datetime_now = datetime.now().strftime("%Y%m%d-%H%M%S")

    logger.info("STARTED model diagnostics for '{}'".format(model_name))

    # Managing review directory

    review_dir = os.path.join(os.getcwd(), 'review')
    if not os.path.isdir(review_dir):
        os.makedirs(review_dir)

    # Report train/val loss vs. epochs

    if history is not None:
        logger.info("Reporting train/val loss vs. epochs plot")
        model_history_name = 'history_{}_{}.png'.format(
            model_name, datetime_now)
        mh_filepath = os.path.join(review_dir, model_history_name)
        plot_loss(mh_filepath, history)

    # Evaluate using test set

    logger.info("Evaluating test set and generating report")
    evaluate = model.evaluate_generator(test_dataflow)
    test_eval = str(model.metrics_names) + str(evaluate)
    test_eval_name = 'test_eval_{}_{}.txt'.format(model_name, datetime_now)
    te_filepath = os.path.join(review_dir, test_eval_name)
    with open(te_filepath, "w") as outfile:
        outfile.write(test_eval)

    # Prediction and reconstrution of N images

    logger.info("Predicting test image and generating review images")

    for file_path in test_imgreview:

        y_test_set = [file_path]
        test_imgreview_dataflow = RaiseDataGenerator(y_test_set,
                                                     image_data_pipeline)

        Y_pred, Y_true = model_predict(test_imgreview_dataflow,
                                       model,
                                       image_data_pipeline,
                                       Y_true_fpath=file_path)

        img_pred_name = 'img_pred_{}_{}.png'.format(model_name, datetime_now)
        img_filepath = os.path.join(review_dir, img_pred_name)

        plot_imgpair(Y_pred, Y_true, img_filepath)
        logger.info("Wrote out review image: {}".format(img_pred_name))

    logger.info("FINISHED model diagnostics for '{}'".format(model_name))
Пример #2
0
def fit_model(X_train, Y_train, model, checkpoint_dir, imgtup):

    imgname, imgfunc = imgtup
    
    chk = os.listdir(checkpoint_dir)
    if len(chk) > 1:
    #    latest = tf.train.latest_checkpoint(checkpoint_dir)
    #    model.load_weights(latest)
        pass

    else:
        datagen = ImageDataGenerator(
            preprocessing_function=imgfunc)

        # Transform all training images
        datagen.fit(X_train)

        # Compile model

        learning_rate = 1e-3
        opt = AdamOptimizer(learning_rate=learning_rate)

        model.compile(optimizer=opt,
                      loss=mean_absolute_error,
                      metrics=['accuracy'])

        model.summary()

        # Fit model

        history = model.fit_generator(datagen.flow(X_train,Y_train,
                                                   batch_size=32),
                                      steps_per_epoch=X_train.shape[0] / 32,
                                      epochs=100)
        plot_loss('review/train_val_loss_021_{}.png'.format(imgname), history)

    return model
Пример #3
0
model = simple_sony()
#model = full_sony()
#opt = Adam(lr=1e-4)
opt = AdamOptimizer(learning_rate=learning_rate)

model.compile(optimizer=opt, loss=mean_absolute_error, metrics=['accuracy'])

# Fitting the model

history = model.fit(X_train,
                    Y_train,
                    validation_split=0.25,
                    epochs=100,
                    batch_size=32,
                    callbacks=[cp_callback])
plot_loss('review/train_val_loss.png', history)

# Predicting with the model

model.summary()

output = model.predict(X_test)
logger.debug("prediction output shape: {}".format(output.shape))

# Review image output

every = 10000
for i in range(output.shape[0]):

    base = "review/"
    if i % every == 0:
Пример #4
0
embedding_dim =vecs.shape[1]
MAX_SENT_LENGTH=100
MAX_SENTS=15
model =HAN(MAX_SENT_LENGTH,MAX_SENTS,max_features,embedding_dim,vecs)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=[fbeta_score,'acc'])

print("model fitting - Bidirectional LSTM")
model.summary()
plot_model(model,SAVEPATH+'/model.png',show_shapes=True)
x_train, y_train, x_val, y_val =train_val_split(train_data,train_label,2017,0.2)
loss_his = LossHistory()
result_his =ResultHistory(test_data,SAVEPATH,False)
result_dev_his =ResultHistory(x_val,SAVEPATH,False)
model.fit(x_train, y_train, validation_data=(x_val, y_val),
          epochs=20, batch_size=128, callbacks=[loss_his,result_his,result_dev_his])
plot_loss(SAVEPATH, loss_his)
i =0
for result in result_his.result:
    i += 1
    np.savetxt(SAVEPATH + '/result_' + str(i) + '.txt', result, fmt="%.4f", delimiter=" ")
i=0
for result in result_dev_his.result:
    i += 1
    np.savetxt(SAVEPATH + '/dev_result_' + str(i) + '.txt', result, fmt="%.4f", delimiter=" ")
np.savetxt(SAVEPATH+'/dev_label.txt',y_val,fmt="%i",delimiter=' ')

if __name__ == '__main__':
    pass
Пример #5
0
def run_training():
    image_files = glob.glob(os.path.join(config.DATA_DIR, "*.png"))
    image_files = image_files[:10]
    print(f"Number of Images Found: {len(image_files)}")
    # "../xywz.png" -> "xywz"
    targets_orig = [x.split("/")[-1].split(".")[0] for x in image_files]
    # separate the targets on character level
    targets = [[char for char in x] for x in targets_orig]
    targets_flat = [c for clist in targets for c in clist]

    lbl_encoder = preprocessing.LabelEncoder()
    lbl_encoder.fit(targets_flat)
    targets_enc = [lbl_encoder.transform(x) for x in targets]
    # label encodes from 0, so add 1 to start from 1: 0 will be saved for unknown
    targets_enc = np.array(targets_enc) + 1

    print(f"Number of Unique Classes: {len(lbl_encoder.classes_)}")

    train_imgs, test_imgs, train_targets, test_targets, train_orig_targets, test_orig_targets = \
        model_selection.train_test_split(image_files, targets_enc, targets_orig, test_size=0.1, random_state=42)

    train_dataset = dataset.ClassificationDataset(image_paths=train_imgs,
                                                  targets=train_targets,
                                                  resize=(config.IMAGE_HEIGHT,
                                                          config.IMAGE_WIDTH))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=config.BATCH_SIZE,
                                               num_workers=config.NUM_WORKERS,
                                               shuffle=True)

    test_dataset = dataset.ClassificationDataset(image_paths=test_imgs,
                                                 targets=test_targets,
                                                 resize=(config.IMAGE_HEIGHT,
                                                         config.IMAGE_WIDTH))
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=config.BATCH_SIZE,
                                              num_workers=config.NUM_WORKERS,
                                              shuffle=False)

    model = CaptchaModel(num_chars=len(lbl_encoder.classes_))
    model.to(config.DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           factor=0.8,
                                                           patience=5,
                                                           verbose=True)

    train_loss_data = []
    test_loss_data = []
    for epoch in range(config.EPOCHS):
        train_loss = engine.train_fn(model,
                                     train_loader,
                                     optimizer,
                                     save_model=True)
        eval_preds, test_loss = engine.eval_fn(model, test_loader)

        eval_captcha_preds = []
        for vp in eval_preds:
            current_preds = decode_predictions(vp, lbl_encoder)
            eval_captcha_preds.extend(current_preds)

        combined = list(zip(test_orig_targets, eval_captcha_preds))

        pprint(combined[:10])
        test_dup_rem = [remove_duplicates(c) for c in test_orig_targets]
        accuracy = metrics.accuracy_score(test_dup_rem, eval_captcha_preds)
        print(
            f"Epoch={epoch}, Train Loss={train_loss}, Test Loss={test_loss} Accuracy={accuracy}"
        )
        scheduler.step(test_loss)
        train_loss_data.append(train_loss)
        test_loss_data.append(test_loss)

    # print(train_dataset[0])
    plot_loss(train_loss_data, test_loss_data, plot_path=config.PLOT_PATH)
    print("done")