def get_data(self, task, nb_unlabelled, nb_labelled): if task in self.basic_task_names: task_index = self.basic_task_names.index(task) good_split = False nb_tries = 0 while not good_split: nb_tries += 1 X_train, _, _, _, X_test, y_test = data_utils.get_data_from_TH(self.tile_handler, nb_train=nb_unlabelled, nb_val=0, nb_test=nb_labelled) y_test = y_test[:,task_index] nb_unique = len(set(y_test)) if nb_unique == 2: good_split = True return X_train, X_test, y_test assert nb_tries < 10, "ERROR: too many attempts required for good data split" elif task in self.bongard_task_names: bp_num = task.split("_")[1] good_split = False nb_tries = 0 while not good_split: nb_tries += 1 X_train, _, _, _, X_test, y_test = data_utils.get_data(self.BPG, bp_num, nb_train=nb_unlabelled, nb_val=0, nb_test=nb_labelled) nb_unique = len(set(y_test)) if nb_unique == 2: good_split = True return X_train, X_test, y_test assert nb_tries < 10, "ERROR: too many attempts required for good data split"
def visualize_predictions(self): """ :return: """ print('[INFO] Visualization of the results starts') if os.path.exists(self.save_path): key = input( '[INFO] Taget directory already exists. You might lose previously saved images. Continue:Abort (y:n): ' ) if not key.lower() == 'y': print( '[ABORT] Script stopped running. Images have not been saved' ) sys.exit() else: os.makedirs(self.save_path) if self.test_loader is None: _, test_data = get_data(self.opt, use_train=False, use_test=True) print("[INFO] %s dataset has been retrieved" % self.dset_name) self.test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=1) print("[INFO] Test loader for %s dataset has been created" % self.dset_name) _, seg_class_num = next(iter(self.dset_info.items())) if self.model is None: # Read the FuseNet model path that will be used for prediction and load the weights to the initialized model self.model = FuseNet(seg_class_num, self.opt.gpu_id, self.opt.use_class) checkpoint = torch.load(self.model_path) self.model.load_state_dict(checkpoint['state_dict']) print( "[INFO] Weights from pretrained FuseNet model has been loaded. Checkpoint: %s" % self.model_path) self.model.eval() test_class_labels = None test_class_preds = None print( "[INFO] Prediction starts. Resulting comparision images will be saved under: %s" % self.save_path) for num, batch in enumerate(self.test_loader): test_rgb_inputs = Variable(batch[0].cuda(self.gpu_device)) test_depth_inputs = Variable(batch[1].cuda(self.gpu_device)) test_seg_labels = Variable(batch[2].cuda(self.gpu_device)) if self.opt.use_class: test_class_labels = Variable(batch[3].cuda(self.gpu_device)) # Predict the pixel-wise classification and scene classification results test_seg_outputs, test_class_outputs = self.model( test_rgb_inputs, test_depth_inputs) # Take the maximum values from the feature maps produced by the output layers for classification # Move the tensors to CPU as numpy arrays _, test_class_preds = torch.max(test_class_outputs, 1) test_class_labels = test_class_labels.data.cpu().numpy()[0] test_class_preds = test_class_preds.data.cpu().numpy()[0] else: test_seg_outputs = self.model(test_rgb_inputs, test_depth_inputs) # Take the maximum values from the feature maps produced by the output layers for segmentation # Move the tensors to CPU as numpy arrays _, test_seg_preds = torch.max(test_seg_outputs, 1) test_seg_preds = test_seg_preds.data.cpu().numpy()[0] test_seg_labels = test_seg_labels.data.cpu().numpy()[0] # Horizontally stack the predicted and ground-truth semantic segmentation labels comparison_images = np.hstack( (np.uint8(test_seg_labels), np.uint8(test_seg_preds + 1))) # Move the RGB image from GPU to CPU as numpy array and arrange dimensions appropriately test_rgb_inputs = test_rgb_inputs.data.cpu().numpy()[0].transpose( 1, 2, 0)[:, :, ::-1] # Color semantic segmentation labels, print scene classification labels, and save comparison images self.paint_and_save(comparison_images, np.uint8(test_rgb_inputs), test_class_labels, test_class_preds, num) print('[INFO] All %i images have been saved' % len(self.test_loader)) print( '[COMPLETED] Boring prediction images are now nice and colorful!')
# 将数据保存起来 save_data(configs['all_data_path'], configs['train_data_path'], configs['test_data_path'], configs['val_data_path']) with codecs.open(configs['all_data_path']) as f: content = f.readlines() length_list = [] for line in content: line = line.strip() if len(line) == 0: continue length_list.append(len(line)) # print(length_list) max_length = int(np.percentile(length_list, 95)) logger.info(f"max length: {max_length}") json2text(configs['test_data_path'], configs['ptest_x_path'], key='text') train_data = get_data(configs['train_data_path']) val_data = get_data(configs['val_data_path']) train_collections = [] val_collections = [] for item in train_data: tags = ['O' for _ in range(len(item['text']))] label = item['label'] for tag, info in label.items(): temp = list(info.values())[0][0] from_index = temp[0] to_index = temp[1] for index in range(from_index, min(to_index + 1, len(tags))): tags[index] = f"I_{tag}" tags[from_index] = f"B_{tag}" # tags = "".join(tags) train_collections.append((item['text'], tags))
from sklearn import svm from sklearn.metrics import mean_absolute_error from da_models import model_process from utils.data_utils import get_data, train_test_split from utils.metrics import rmse_score import logging import time def base_model(): return svm.SVR(kernel='rbf') # load datasets folder = '../data/' feature_ns1, feature_ds1, rul_ds1 = get_data(folder + 'xBearing1_1.xlsx', 1490) feature_ns2, feature_ds2, rul_ds2 = get_data(folder + 'xBearing1_2.xlsx', 827) feature_ns3, feature_ds3, rul_ds3 = get_data(folder + 'xBearing1_3.xlsx', 1684) feature_ns4, feature_ds4, rul_ds4 = get_data(folder + 'xBearing1_4.xlsx', 1083) feature_ns5, feature_ds5, rul_ds5 = get_data(folder + 'xBearing1_5.xlsx', 680) feature_ns6, feature_ds6, rul_ds6 = get_data(folder + 'xBearing1_6.xlsx', 649) feature_ns7, feature_ds7, rul_ds7 = get_data(folder + 'xBearing1_7.xlsx', 1026) print('Data loaded') # train\test split feature_ns = [ feature_ns1, feature_ns2, feature_ns3, feature_ns4, feature_ns5, feature_ns6, feature_ns7 ] feature_ds = [ feature_ds1, feature_ds2, feature_ds3, feature_ds4, feature_ds5,
import numpy as np from PIL import Image import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from utils.data_utils import get_data from sklearn.model_selection import train_test_split from keras import backend as K from keras.layers import Activation from keras.layers import Input, Lambda, Dense, Dropout, Convolution2D, MaxPooling2D, Flatten from keras.models import Sequential, Model from keras.optimizers import RMSprop size = 2 total_sample_size = 10000 X, Y = get_data(total_sample_size) x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=.25) def build_base_network(input_shape): seq = Sequential() nb_filter = [6, 12] kernel_size = 3 # convolutional layer 1 seq.add(Convolution2D(nb_filter[0], kernel_size, kernel_size, input_shape=input_shape, border_mode='valid', dim_ordering='th')) seq.add(Activation('relu')) seq.add(MaxPooling2D(pool_size=(2, 2))) seq.add(Dropout(.25))
def _fitness(learning_rate): """ Hyper-parameters: learning_rate: Learning-rate for the optimizer. hidden_dim: Size of Hidden Dimension """ # Print the hyper-parameters. print('learning rate: {0:.1e}'.format(learning_rate)) print() # Dir-name for the TensorBoard log-files. log_dir = _log_dir_name(learning_rate, self.model) # Create a callback-function for Keras which will be # run after each epoch has ended during training. # This saves the log-files for TensorBoard. # Note that there are complications when histogram_freq=1. # It might give strange errors and it also does not properly # support Keras data-generators for the validation-set. callback_log = TensorBoard(log_dir=log_dir, histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False) model = None history = None validation_data = None # Create the neural network with these hyper-parameters. #K.clear_session() if self.model == 'toy': X = np.random.randint(0, 6, size=(3000, 50)) Y = np.random.randint(0, 6, size=(3000, 50, 1)) model = Sequential() model.add(Embedding(6, 50, input_length=50)) model.add(Dense(300, activation='relu')) model.add(Dense(6, activation='softmax')) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) history = model.fit(X, Y, epochs=1, batch_size=1024, validation_split=0.2, validation_data=validation_data, verbose=1, callbacks=[callback_log] + self.custom_metrics) else: if self.model[:4] == "cap2" or self.model[:4] == "vae2": inputs, outputs = None, None datagen, valgen = None, None cap2 = None callbacks = [callback_log] hparams = HParams( learning_rate=learning_rate, hidden_dim=1024, optimizer='adam', dropout=0.5, max_seq_length=self.data_helper.max_caption_len, embed_dim=self.embedding_matrix.shape[-1], num_embeddings=self.embedding_matrix.shape[0], activation='relu', latent_dim=1000) if self.gen == 'train' or self.gen == 'all': data = get_data(self.model, self.data_helper, gen=True) if self.gen == 'all': val_data = get_data(self.model, self.val_helper, gen=True) else: val_data = get_data(self.model, self.val_helper) else: data = get_data(self.model, self.data_helper) val_data = get_data(self.model, self.val_helper) # _, X, Y1, Y2 = self.data_helper.cap2cap() # if self.max_samples is not None: # X, Y1, Y2, = X[:self.max_samples], Y1[:self.max_samples], Y2[:self.max_samples] # Y2 = np.expand_dims(Y2, axis=2) # validation_data=None # inputs = {'encoder_input': X, 'decoder_input': Y1} # outputs = {'decoder_output': Y2} if self.model != 'cap2img': self.custom_metrics[0].validation_data = val_data callbacks += self.custom_metrics # _, X, Y = self.data_helper.cap2resnet() # Y = Y[:,0,:] # inputs = {'encoder_input': X} # outputs = {'projection_output': Y} # _, X, Y1, Y2, Y3 = self.data_helper.cap2all() # #X, Y1, Y2, Y3 = X[:20], Y1[:20], Y2[:20], Y3[:20] # Y2 = np.expand_dims(Y2, axis=2) # Y3 = Y3[:,0,:] # if self.max_samples is not None: # X, Y1, Y2, Y3 = X[:self.max_samples], Y1[:self.max_samples], Y2[:self.max_samples], Y3[:self.max_samples] # inputs = {'encoder_input': X, 'decoder_input': Y1} # outputs = {'projection_output': Y3, 'decoder_output': Y2} ModelClass = get_model(self.model) model = ModelClass(hparams, embeddings=self.embedding_matrix) if self.path_load_model is not None: print("Loading model " + self.path_load_model + " ...") model.load_model(self.path_load_model) model.compile(num_gpu=self.gpu) # history = model.fit(inputs, # outputs, # epochs=3, # batch_size=256, # validation_split=0.2, # validation_data=validation_data, # callbacks=callbacks) if model.gpu_model is None: model_to_run = model.model else: model_to_run = model.gpu_model if isinstance(data, keras.utils.Sequence): history = model_to_run.fit_generator( data, epochs=self.epochs, validation_data=val_data, validation_steps=len(val_data), callbacks=callbacks, workers=4, use_multiprocessing=True) elif isinstance(data, tuple): history = model_to_run.fit(x=data[0], y=data[1], epochs=self.epochs, validation_data=val_data, callbacks=callbacks, batch_size=self.batch_size) # Get the classification accuracy on the validation-set # after the last training-epoch. if self.model != 'cap2img': f1 = self.custom_metrics[0].val_f1s[-1] print() print("Val F1: {0:.2%}".format(f1)) print() else: f1 = history.history['val_acc'][-1] print() print("Val Acc: {0:.2%}".format(f1)) print() # Print the classification accuracy. # Save the model if it improves on the best-found performance. # We use the global keyword so we update the variable outside # of this function. # If the classification accuracy of the saved model is improved ... print(self.best_f1) if f1 > self.best_f1: print("saving model at {0}".format(self.path_best_model)) # Save the new model to harddisk. model.model.save(self.path_best_model) # Update the classification accuracy. self.best_f1 = f1 # Delete the Keras model with these hyper-parameters from memory. del model # Clear the Keras session, otherwise it will keep adding new # models to the same TensorFlow graph each time we create # a model with a different set of hyper-parameters. K.clear_session() # NOTE: Scikit-optimize does minimization so it tries to # find a set of hyper-parameters with the LOWEST fitness-value. # Because we are interested in the HIGHEST classification # accuracy, we need to negate this number so it can be minimized. return -f1
from utils.utils import print_time_info if __name__ == '__main__': opt = TrainOptions().parse() dset_name = os.path.basename(opt.dataroot) if dset_name.lower().find('nyu') is not -1: dset_info = {'NYU': 40} elif dset_name.lower().find('sun') is not -1: dset_info = {'SUN': 37} else: raise NameError('Name of the dataset file should accordingly contain either nyu or sun in it') print('[INFO] %s dataset is being processed' % list(dset_info.keys())[0]) train_data, test_data = get_data(opt, use_train=True, use_test=True) train_loader = torch.utils.data.DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=opt.num_workers) print("[INFO] Data loaders for %s dataset have been created" % list(dset_info.keys())[0]) if opt.use_class: # Grid search for lambda values # Lambda is the coefficient of the classification loss # i.e.: total_loss = segmentation_loss + lambda * classification_loss start, end, steps = opt.lambda_class_range lambdas = torch.linspace(start, end, steps=int(steps)).cuda(opt.gpu_id) for i, lam in enumerate(lambdas): start_date_time = datetime.datetime.now().replace(microsecond=0) print('[INFO] Training session: [%i of %i]' % (i+1, steps))
def word_embed(args): Captions = CocoCaptions(args.data, args.max_samples) WV = FilteredGloveVectors() Captions.initialize_WV(WV) embedding_matrix = WV.get_embedding_matrix() if args.model[:4] == "cap2" or args.model[:4] == "vae2": inputs, outputs = None, None datagen, valgen = None, None cap2 = None hparams = HParams(learning_rate=args.learning_rate, hidden_dim=1024, optimizer='adam', dropout=0.5, max_seq_length=Captions.max_caption_len, embed_dim=embedding_matrix.shape[-1], num_embeddings=embedding_matrix.shape[0], activation='relu', latent_dim=1000) if args.gen == 'train' or args.gen == 'all': data = get_data(args.model, Captions, gen=True) else: data = get_data(args.model, Captions) ModelClass = get_model(args.model) model = ModelClass(hparams, embeddings=embedding_matrix) if args.load is not None: print("Loading model " + args.load + " ...") if args.model == "vae2all": model.load_model( args.load, custom_objects={"KLDivergenceLayer": KLDivergenceLayer}) else: model.load_model(args.load) model.compile() word_encoder = model.get_word_encoder() if isinstance(data, keras.utils.Sequence): embeddings = word_encoder.predict_generator(data, verbose=1) elif isinstance(data, tuple): embeddings = word_encoder.predict(x=data[0], verbose=1) X = Captions.ordered_IDs print("ordered_X1", len(X), " ") new_X = [] for image_id in X: captions = Captions.get_captions(image_id) X_group, Y_group = Captions.get_caption_convolutions(captions) for c, _ in zip(X_group, Y_group): new_X.append((c, image_id)) print("ordered_X2", len(new_X), " ") print("Predicted ", embeddings.shape, " preds") embeddings_to_txt(new_X, embeddings, WV)
def train(args): Captions = CocoCaptions(args.data, args.max_samples) WV = FilteredGloveVectors() Captions.initialize_WV(WV) Captions, ValCaptions = Captions.split_train_val() embedding_matrix = WV.get_embedding_matrix() metrics = Metrics() # Print the hyper-parameters. print('learning rate: {0:.1e}'.format(args.learning_rate)) print() # Dir-name for the TensorBoard log-files. log_dir = log_dir_name(args.learning_rate, args.model) # Create a callback-function for Keras which will be # run after each epoch has ended during training. # This saves the log-files for TensorBoard. # Note that there are complications when histogram_freq=1. # It might give strange errors and it also does not properly # support Keras data-generators for the validation-set. callback_log = TensorBoard(log_dir=log_dir, histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False) model = None history = None validation_data = None if args.model == 'toy': X = np.random.randint(0, 6, size=(3000, 50)) Y = np.random.randint(0, 6, size=(3000, 50, 1)) model = Sequential() model.add(Embedding(6, 50, input_length=50)) model.add(Dense(300, activation='relu')) model.add(Dense(6, activation='softmax')) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) history = model.fit(X, Y, epochs=1, batch_size=1024, validation_split=0.2, validation_data=validation_data, callbacks=[callback_log] + [metrics]) else: if args.model[:4] == "cap2" or args.model[:4] == "vae2": inputs, ordered_outputs = None, None datagen, valgen = None, None cap2 = None callbacks = [callback_log] hparams = HParams(learning_rate=args.learning_rate, hidden_dim=1024, optimizer='adam', dropout=0.5, max_seq_length=Captions.max_caption_len, embed_dim=embedding_matrix.shape[-1], num_embeddings=embedding_matrix.shape[0], activation='relu', latent_dim=1000) if args.gen == 'train' or args.gen == 'all': data = get_data(args.model, Captions, gen=True) if args.gen == 'all': val_data = get_data(args.model, ValCaptions, gen=True) else: val_data = get_data(args.model, ValCaptions) else: data = get_data(args.model, Captions) val_data = get_data(args.model, ValCaptions) if args.model is not 'cap2img': metrics.validation_data = val_data callbacks += [metrics] ModelClass = get_model(args.model) model = ModelClass(hparams, embeddings=embedding_matrix) if args.load is not None: print("Loading model " + args.load + " ...") model.load_model(args.load) model.compile() if isinstance(data, keras.utils.Sequence): history = model.model.fit_generator( data, epochs=args.epochs, validation_data=val_data, callbacks=callbacks, ) elif isinstance(data, tuple): history = model.model.fit( x=data[0], y=data[1], epochs=args.epochs, validation_data=val_data, callbacks=callbacks, ) # Get the classification accuracy on the validation-set # after the last training-epoch. if args.model != 'cap2img': f1 = metrics[0].val_f1s[-1] print() print("Val F1: {0:.2%}".format(f1)) print() else: f1 = history.history['val_acc'][-1] print() print("Val Acc: {0:.2%}".format(f1)) print() # Print the classification accuracy. # Save the model if it improves on the best-found performance. # We use the global keyword so we update the variable outside # of this function. # If the classification accuracy of the saved model is improved ... print("saving model at {0}".format(args.path)) # Save the new model to harddisk. model.save(args.path) # Update the classification accuracy. # Delete the Keras model with these hyper-parameters from memory. del model # Clear the Keras session, otherwise it will keep adding new # models to the same TensorFlow graph each time we create # a model with a different set of hyper-parameters. K.clear_session()
def encode(args): Captions = CocoCaptions(args.data, args.max_samples) WV = FilteredGloveVectors() Captions.initialize_WV(WV) embedding_matrix = WV.get_embedding_matrix() if args.model[:4] == "cap2" or args.model[:4] == "vae2": inputs, outputs = None, None datagen, valgen = None, None cap2 = None hparams = HParams(learning_rate=args.learning_rate, hidden_dim=1024, optimizer='adam', dropout=0.5, max_seq_length=Captions.max_caption_len, embed_dim=embedding_matrix.shape[-1], num_embeddings=embedding_matrix.shape[0], activation='relu', latent_dim=1000) if args.gen == 'train' or args.gen == 'all': data = get_data(args.model, Captions, gen=True) else: data = get_data(args.model, Captions) ModelClass = get_model(args.model) model = ModelClass(hparams, embeddings=embedding_matrix) if args.load is not None: print("Loading model " + args.load + " ...") if args.model == "vae2all": model.load_model( args.load, custom_objects={"KLDivergenceLayer": KLDivergenceLayer}) else: model.load_model(args.load) model.compile() encoder = model.get_encoder() if isinstance(data, keras.utils.Sequence): if args.model == "vae2all": pred_names = [output.name for output in encoder.output_layers] preds, mean, variance = encoder.predict_generator(data, verbose=1) else: preds = encoder.predict_generator(data, verbose=1) elif isinstance(data, tuple): if args.model == "vae2all": pred_names = [output.name for output in encoder.output_layers] preds, mean, variance = encoder.predict(x=data[0], verbose=1) else: preds = encoder.predict(x=data[0], verbose=1) X = Captions.ordered_IDs print("ordered_X1", len(X), " ") new_X = [] for image_id in X: captions = Captions.get_captions(image_id) X_group, Y_group = Captions.get_caption_convolutions(captions) for c, _ in zip(X_group, Y_group): new_X.append((c, image_id)) print("ordered_X2", len(new_X), " ") print("Predicted ", len(preds), " preds") output = [] for x, y in zip(new_X, preds): c, image_id = x sentence = Captions.WV.indices_to_words(c) sentence = " ".join(sentence[1:-1]) resnet = Captions.get_resnet_output(image_id) output.append((sentence, resnet, y)) print("U ", len(output), " outputs") save_loc = base_fp + "/skip-thoughts/our_model_encodings.pkl" pkl.dump(output, open(save_loc, "wb+"), 2) print("Output saved")