def train_binary_model(base_model, model_name, already_trained_model=None): x_train, y_train, x_test, y_test = prepare_data() if not already_trained_model: model = StandardModel(base_model, (512, 512, 3), classes=2, use_softmax=True) model = model.build_model() model.compile(Adamax(), loss='binary_crossentropy', metrics=['acc']) model.fit_generator(DataGenerator(x_train, labels=y_train, n_classes=2, batch_size=8), epochs=1) model.save(model_name) else: if os.path.exists(already_trained_model): model = keras.models.load_model(already_trained_model) model.compile(Adamax(), loss='binary_crossentropy', metrics=['acc']) model.fit_generator(DataGenerator(x_train, labels=y_train, n_classes=2, batch_size=8), epochs=1) model.save(model_name) else: print_error("Provided model file doesn't exist! Exiting...") sys.exit(1)
def train_model(model, partitions, train_labels, train_path, parameter, preprocessor): target_names = train_labels.drop(["Target", "Id"], axis=1).columns predictions = [] histories = [] for i, partition in enumerate(partitions): print("training in partition ", i + 1) training_generator = DataGenerator(partition['train'], train_labels, parameter, preprocessor) validation_generator = DataGenerator(partition['validation'], train_labels, parameter, preprocessor) predict_generator = PredictGenerator(partition['validation'], preprocessor, train_path) model.set_generators(training_generator, validation_generator) histories.append(model.learn()) proba_predictions = model.predict(predict_generator) proba_predictions = pd.DataFrame(index=partition['validation'], data=proba_predictions, columns=target_names) predictions.append(proba_predictions) return predictions, histories
def train_model(use_multiprocessing=false): date = datetime.datetime.now() model_filename = "{0}-{1}-{2}_lstm_model".format( date.month, date.day, date.year) train_generator = DataGenerator(IMDB_DATA_PATH, IMDB_DATA_PATH, 100) val_generator = DataGenerator(IMDB_DATA_PATH, IMDB_DATA_PATH, "val", 100) test_generator = DataGenerator(IMDB_DATA_PATH, IMDB_DATA_PATH, "test", 100) model = get_model(INPUT_SHAPE) # Creates a directory to save tensorboard callback logs. tensorboard_path_dir = BASE_PATH/"logs"/model_filename if not tensorboard_path_dir.exists(): tensorboard_path_dir.mkdir(exist_ok=True, parents=True) tbCallBack = keras.callbacks.TensorBoard( log_dir=str(tensorboard_path_dir), histogram_freq=10, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None ) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[f1, metrics.binary_accuracy]) model.fit_generator( generator=train_generator, epochs=100, callbacks=[tbCallBack], validation_data=val_generator, validation_freq=10, workers=2, use_multiprocessing=True, shuffle=True, ) score = model.evaluate_generator( test_generator, use_multiprocessing=use_multiprocessing, verbose=0) score_train = model.evaluate_generator( val_generator, use_multiprocessing=use_multiprocessing, verbose=0) score_cv = model.evaluate_generator( train_generator, use_multiprocessing=use_multiprocessing, verbose=0) # needs to be fixed, violates abstraction barriers model_dict = load_model_dict() vn = 0 if len(model_dict) == 0: vn = 1 else: vn = model_dict.keys()[-1] + 1 save_model(model, model_filename, vn)
def __init__(self, n_jobs=-1, n_epochs = 5, batch_size = 1, chunk_size = 32, learning_rate = .001, generator=None, **kwargs): super(HogWildRegressor, self).__init__(**kwargs) if self.loss not in self.losses: raise Exception("Loss '%s' not supported") self.batch_size = batch_size self.learning_rate = learning_rate self.gradient = self.losses.get(self.loss) self.n_jobs = n_jobs self.n_epochs = n_epochs self.chunk_size = chunk_size self.shared_weights = SharedWeights if not generator: self.generator = DataGenerator(shuffle= self.shuffle, chunk_size = self.chunk_size, verbose = self.verbose)
n_epochs = 100 sgd = optimizers.SGD(lr=LearningRate,decay=LearningRate/n_epochs,momentum = 0.9,nesterov = True) top_model.compile(optimizer = sgd,loss = 'categorical_crossentropy',metrics=['accuracy']) trainable_params = int(np.sum([K.count_params(p)for p in set(top_model.trainable_weights)])) non_trainable_params = int(np.sum([K.count_params(p) for p in set(top_model.non_trainable_weights)])) print("model Stats") print("="*30) print("Total Parameters:{:,}".format((trainable_params+non_trainable_params))) print("Non-Trainable Parameters:{:,}".format(non_trainable_params)) print("Trainable Parameters:{:,}\n".format(trainable_params)) train_folders = '/cptjack/totem/yanyiting/Eight_classification/gray/8_focus/train' validation_folders = '/cptjack/totem/yanyiting/Eight_classification/gray/8_focus/val/' img_width,img_height = 224,224 batch_size_for_generators = 32 train_datagen = DataGenerator(rescale = 1./255,rotation_range=178,horizontal_flip=True,vertical_flip=True,shear_range=0.6,fill_mode='nearest',stain_transformation = True) train_generator = train_datagen.flow_from_directory(train_folders,target_size = (img_width,img_height),batch_size = 32,class_mode = 'categorical') validation_datagen = DataGenerator(rescale = 1./255) validation_generator = validation_datagen.flow_from_directory(validation_folders,target_size=(img_width,img_height), batch_size = 32,class_mode = 'categorical') nb_train_samples = sum([len(files)for root,dirs,files in os.walk(train_folders)]) nb_validation_samples = sum([len(files)for root,dirs,files in os.walk(validation_folders)]) class Mycbk(ModelCheckpoint): def __init__(self, model, filepath ,monitor = 'val_loss',mode='min', save_best_only=True): self.single_model = model super(Mycbk,self).__init__(filepath, monitor, save_best_only, mode)
def train(nb_batch=32, nb_epochs=100, l_rate=1e-4, augmented=False, multi_gpu=0): # Load the data h5f = h5py.File(os.path.join(data_dir, "data.h5"), 'r') train_x, train_y = h5f['train_x'][:], h5f['train_y'][:] valid_x, valid_y = h5f['valid_x'][:], h5f['valid_y'][:] test_x, test_y = h5f['test_x'][:], h5f['test_y'][:] h5f.close() print("Data shapes: ", train_x.shape, valid_x.shape, test_x.shape) # Training parameters if multi_gpu: nb_batch = multi_gpu * nb_batch # Assigning same batch size to all the gpus # Build the model model_input = Input(shape=(24, 24, 24, 16)) model = Model(inputs=model_input, outputs=Squeeze_model(model_input)) if multi_gpu: model = multi_gpu_model(model, gpus=multi_gpu) # Compile the model model.compile(optimizer=optimizers.adam(lr=l_rate, beta_1=0.99, beta_2=0.999), loss='mean_squared_error') # checkpoint outputFolder = "weights" if not os.path.isdir(outputFolder): os.makedirs(outputFolder) weigts_filepath = os.path.join(outputFolder, "weights.h5") callbacks_list = [ ModelCheckpoint(weigts_filepath, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True, mode='auto', period=1) ] # Train without generators # history = model.fit(x=train_x, y=train_y, # batch_size=nb_batch, # epochs=nb_epochs, # callbacks=callbacks_list, # validation_data=(valid_x, valid_y), # verbose=True) # Generators if augmented: print("TRINING ON AUGMENTED DATA") data_gen = AugmentedDataGenerator(x=train_x, y=train_y, batch_size=nb_batch) val_gen = AugmentedDataGenerator(x=valid_x, y=valid_y, batch_size=nb_batch) else: data_gen = DataGenerator(x=train_x, y=train_y, batch_size=nb_batch) val_gen = DataGenerator(x=valid_x, y=valid_y, batch_size=nb_batch) # Train history = model.fit_generator(generator=data_gen, validation_data=val_gen, use_multiprocessing=False, epochs=nb_epochs, max_queue_size=10, workers=56, verbose=1, callbacks=callbacks_list) # Plot training history #plt.figure() #plt.plot(history['loss']) #plt.plot(history['val_loss']) #plt.xlabel("Epochs") #plt.ylabel("Loss (MSE)") #plt.legend(['Train Loss', 'Validation Loss']) #plt.savefig('training_history.png', format='png', dpi=1000) #plt.show() # Load the best weights model.load_weights(weigts_filepath) # Evaluate the model's performance train_r2 = r2_score(y_true=train_y, y_pred=model.predict(train_x)) print("Train r2: ", train_r2) test_r2 = r2_score(y_true=test_y, y_pred=model.predict(test_x)) print("Test r2: ", test_r2)
'input_dir': train_folder, 'samples': train_n, 'label_dict': data, 'image_shape': input_shape, 'batch_size': batch_size, 'augment': True, 'shuffle': True } dev_params = { 'input_dir': train_folder, 'samples': dev_n, 'label_dict': data, 'image_shape': input_shape, 'batch_size': batch_size, 'augment': False, 'shuffle': True } # create generators train_generator = DataGenerator(**train_params) validation_generator = DataGenerator(**dev_params) # do the training train_model(model, train_generator, validation_generator, epochs=num_epochs, use_multiprocessing=True, workers=4)
np.seterr(all='raise') #plt.close("all") if True: # Load data print('Loading data...') data = load_data.data() cfg = data.cfg labels = data.labels class_mapping = data.class_mapping # Create batch generators genTrain = DataGenerator(imagesMeta=data.trainMeta, GTMeta=data.trainGTMeta, cfg=cfg, data_type='train') genVal = DataGenerator(imagesMeta=data.valMeta, GTMeta=data.trainGTMeta, cfg=cfg, data_type='val') genTest = DataGenerator(imagesMeta=data.testMeta, GTMeta=data.testGTMeta, cfg=cfg, data_type='test') stats, count = utils.getLabelStats(data.trainMeta, data.labels) if True: # Save config utils.saveConfig(cfg)
valid_labels = np.array(valid_labels).astype(np.int32) Y_valid = to_categorical(valid_labels, num_classes=np.unique(valid_labels).shape[0]) print("\nBootstrapping to Balance - Training set size: %d (%d X %d)" % (train_labels.shape[0], MAX, np.unique(train_labels).shape[0])) print("=" * 30, "\n") #n_epochs = 70 batch_size_for_generators = 32 train_datagen = DataGenerator(rotation_range=178, horizontal_flip=True, vertical_flip=True, shear_range=0.6, stain_transformation=True) train_gen = train_datagen.flow(train_images, Y_train, batch_size=batch_size_for_generators) ### VALIDATION ### valid_datagen = DataGenerator() valid_gen = valid_datagen.flow(valid_images, Y_valid, batch_size=batch_size_for_generators) start = time.time()
early_stopping = EarlyStopping(monitor='val_acc', min_delta=0.001, patience=9) checkpointer = ModelCheckpoint(filepath='./models/model_{}'.format(1), verbose=True, save_best_only=True) # tb = TensorBoard(log_dir='./tf_logs/{}'.format(time.time()), # batch_size=BATCH_SIZE, # write_grads=True, # write_graph=True, # histogram_freq=1) DG = DataGenerator(data=train['data'], batch_size=BATCH_SIZE, tensor_maker=TM, shuffle=True, sentences=True, characters=False, word_features=False, tags=True) VG = DataGenerator(data=valid['data'], batch_size=BATCH_SIZE, tensor_maker=TM, shuffle=True, sentences=True, characters=False, word_features=False, tags=True) model.fit_generator(generator=DG, validation_data=VG, validation_steps=len(VG),
import numpy as np from sklearn import cross_validation, metrics #test_folders = "./NCT/validation/" test_folders = '/cptjack/totem/yanyiting/gray_focus_unfocus/gray/data/micro_png_224' batch_size_for_generators = 32 nb_test_samples = sum( [len(files) for root, dirs, files in os.walk(test_folders)]) test_steps = nb_test_samples // batch_size_for_generators print("\nImages for Testing") print("=" * 30) img_width, img_height = 224, 224 test_datagen = DataGenerator(rescale=1. / 255) test_generator = test_datagen.flow_from_directory(test_folders, target_size=(img_width, img_height), batch_size=32, class_mode='categorical', shuffle=False) test_loss, test_accuracy = top_model.evaluate_generator(test_generator, steps=test_steps, verbose=1) predictions = top_model.predict_generator(test_generator, steps=test_steps, verbose=1) prediction_list = np.argmax(predictions, axis=1)
'_log.csv', separator=',', append=True) return [es, msave, reduce_lr, tb_log, log_cv] folders = [ '/cptjack/totem/yanyiting/gray_focus_unfocus/gray/data/2_focus/focus/', '/cptjack/totem/yanyiting/gray_focus_unfocus/gray/data/2_focus/unfocus/' ] img_width, img_height = 224, 224 batch_size_for_generators = 32 train_datagen = DataGenerator(rescale=1. / 255, rotation_range=178, horizontal_flip=True, vertical_flip=True, shear_range=0.6, fill_mode='nearest', stain_transformation=True) valid_datagen = DataGenerator(rescale=1. / 255) all_images = [] all_labels = [] for index, folder in enumerate(folders): files = glob.glob(folder + "*.png") images = io.imread_collection(files) images = [image for image in images] ### Reshape to (299, 299, 3) ### labels = [index] * len(images) all_images = all_images + images all_labels = all_labels + labels
if classes_test.shape != classes_train.shape or not ( numpy.array(classes_test) == numpy.array(classes_train)).all(): print("Labels in train and test dataset are different") intersection = classes_test[numpy.in1d(classes_test, classes_train)] print( f"There is {len(intersection)} common labels, the rest is ignored." ) train_df = train_df[train_df.label.isin(intersection)] test_df = test_df[test_df.label.isin(intersection)] NUM_CLASSES = len(train_df.label.unique()) train_base_generator = DataGenerator( train_df, 'image_path', 'label', reduction=0.6, image_size=int(args.size), aug_sequence=get_augmenting_sequence(), batch_size=int(args.batch)) from PIL import Image import random Path("train_renders").mkdir(exist_ok=True) counter = 0 for data in train_base_generator: for image, label in zip(data[0], data[1]): Image.fromarray(image).save( "train_renders/" + f"{train_base_generator.one_hot_to_label(label)}_" + str(random.randint(0, 100)) + ".jpg") counter += 1
def __init__(self, generator: DataGenerator = None, num_log_batches=1): super().__init__() self.generator = generator self.num_batches = num_log_batches # store full names of classes self.flat_class_names = generator.extract_labels()
def main(settings): logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # load settings settings_path = Path("settings/" + settings) with open(str(settings_path), "r") as file: settings_dict = json.load(file) logger.info( "Training seq2seq model on math module << {} >> and difficulty level << {} >>" .format(settings_dict["math_module"], settings_dict["train_level"])) logger.info("Using TensorFlow version: {}".format(tf.__version__)) logger.info("GPU Available: {}".format(tf.test.is_gpu_available())) cpu_count = multiprocessing.cpu_count() logger.info("Number of CPUs: {}".format(cpu_count)) data_gen_pars, input_texts, target_texts = get_sequence_data(settings_dict) training_generator = DataGenerator(input_texts=input_texts["train"], target_texts=target_texts["train"], **data_gen_pars) validation_generator = DataGenerator(input_texts=input_texts["valid"], target_texts=target_texts["valid"], **data_gen_pars) interpolate_generator = DataGenerator( input_texts=input_texts["interpolate"], target_texts=target_texts["interpolate"], **data_gen_pars) extrapolate_generator = DataGenerator( input_texts=input_texts["extrapolate"], target_texts=target_texts["extrapolate"], **data_gen_pars) valid_dict = { "validation": validation_generator, "interpolation": interpolate_generator, "extrapolation": extrapolate_generator, } history = NValidationSetsCallback(valid_dict) gradient = GradientLogger(live_metrics=["loss", "exact_match_metric"], live_gaps=10) lstm = Seq2SeqLSTM( data_gen_pars["num_encoder_tokens"], data_gen_pars["num_decoder_tokens"], settings_dict["embedding_dim"], ) model = lstm.get_model() print(model.summary()) adam = Adam( lr=6e-4, beta_1=0.9, beta_2=0.995, epsilon=1e-9, decay=0.0, amsgrad=False, clipnorm=0.1, ) model.compile(optimizer=adam, loss="categorical_crossentropy", metrics=[exact_match_metric]) # directory where the checkpoints will be saved checkpoint_dir = settings_dict["save_path"] + "training_checkpoints" # name of the checkpoint files checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}") checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_prefix, save_weights_only=True) logger.info("Start training ...") # workers = cpu_count / 2 and no multiprocessing? train_hist = model.fit_generator( training_generator, epochs=settings_dict["epochs"], use_multiprocessing=False, workers=cpu_count // 2, callbacks=[history, gradient, checkpoint_callback], verbose=0, ) # save callbacks data with open(settings_dict["save_path"] + "callbacks.pkl", "wb") as file: pickle.dump(train_hist.history, file) # save model model.save(settings_dict["save_path"] + "model.h5") # save settings with open(settings_dict["save_path"] + "settings.json", "w") as file: json.dump(settings_dict, file)
print("Class: %s. Size: %d" % (folder.split("/")[-2], len(images))) valid_images = np.stack(valid_images) valid_images = (valid_images / 255).astype(np.float32) ### Standardise valid_labels = np.array(valid_labels).astype(np.int32) Y_valid = to_categorical(valid_labels, num_classes=np.unique(valid_labels).shape[0]) print("\nBootstrapping to Balance - Training set size: %d (%d X %d)" % (train_labels.shape[0], MAX, np.unique(train_labels).shape[0])) print("=" * 40, "\n") batch_size_for_generators = 64 train_datagen = DataGenerator(rotation_range=180, horizontal_flip=True, vertical_flip=True, shear_range=0.6, stain_transformation=True) # train_gen = train_datagen.flow(train_images, Y_train, batch_size=batch_size_for_generators) # VALIDATION valid_datagen = DataGenerator() valid_gen = valid_datagen.flow(valid_images, Y_valid, batch_size=batch_size_for_generators) start = time.time()
sys.path.append('../../classification/models/') sys.path.append('../../shared/') import utils from model_trainer import model_trainer from load_data import data from generators import DataGenerator import numpy as np import metrics import image if False: # Load data print('Loading data...') data = data(method='normal') cfg = data.cfg genTest = DataGenerator(imagesMeta=data.testMeta, GTMeta=data.testGTMeta, cfg=cfg, data_type='test') gt_label, _, _ = image.getYData(genTest.dataID, genTest.imagesMeta, genTest.GTMeta, genTest.cfg) path = 'C:\\Users\\aag14/Documents/Skole/Speciale/results/HICO/hoi80/yhat1' yhat = utils.load_obj(path) evalHOI = metrics.EvalResults(None, genTest, yhat=yhat, y=gt_label[0]) print(evalHOI.mAP, evalHOI.F1)