def validate(data_type, model, seq_length=40, saved_model=None, class_limit=None, image_shape=None): test_data_num = 1084 batch_size = 32 # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) test_generator = data.frame_generator(batch_size, 'test', data_type) # Get the model. #rm = ResearchModels(len(data.classes), model, seq_length, saved_model) model = load_model(saved_model) # Evaluate! #results = rm.model.evaluate_generator( # generator=val_generator, # val_samples=3200) results = model.evaluate_generator(generator=test_generator, steps=test_data_num // batch_size) print(results) print(model.metrics) print(model.metrics_names)
def validate(data_type, model, seq_length=40, saved_model=None, class_limit=None, image_shape=None): batch_size = 8 # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) val_generator = data.frame_generator(batch_size, 'test', data_type) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) # Evaluate! results = rm.model.evaluate_generator(generator=val_generator, val_samples=3200) print(results) print(rm.model.metrics_names)
def model_experiments(model, data_set, result_file): static_feature = data_set.static_feature dynamic_feature = data_set.dynamic_feature labels = data_set.labels # StratifiedShuffleSplit(n_splits, test_size, train_size) split = StratifiedShuffleSplit(ExperimentSetup.kfold, ExperimentSetup.test_size, ExperimentSetup.train_size) \ .split(static_feature, labels) n_output = labels.shape[1] # classes tol_pred = np.zeros(shape=(0, n_output)) #[0, n_output]的零向量 tol_label = np.zeros(shape=(0, n_output), dtype=np.int32) for train_idx, test_idx in split: train_static = static_feature[train_idx] train_dynamic = dynamic_feature[train_idx] train_y = labels[train_idx] train_set = DataSet(train_static, train_dynamic, train_y) # 包含多个data table,用dataset["表名"]可以得到 model.fit(train_set) test_static = static_feature[test_idx] test_dynamic = dynamic_feature[test_idx] test_y = labels[test_idx] test_set = DataSet(test_static, test_dynamic, test_y) y_score = model.predict(test_set) tol_pred = np.vstack((tol_pred, y_score)) # 按照行顺序垂直地把数组堆叠起来 tol_label = np.vstack((tol_label, test_y)) return evaluate(tol_label, tol_pred, result_file)
def __init__(self, train=True, common_params=None, solver_params=None, net_params=None, dataset_params=None): if common_params: self.device_id = int(common_params['gpus']) self.image_size = int(common_params['image_size']) self.height = self.image_size self.width = self.image_size self.batch_size = int(common_params['batch_size']) self.num_gpus = 1 self.d_repeat = int(common_params['d_repeat']) self.g_repeat = int(common_params['g_repeat']) self.ckpt = common_params[ 'ckpt'] if 'ckpt' in common_params else None self.init_ckpt = common_params[ 'init_ckpt'] if 'init_ckpt' in common_params else None self.restore_opt = True if common_params[ 'restore_opt'] == '1' else False self.gan = True if common_params['gan'] == '1' else False self.prior_boost = True if common_params[ 'prior_boost'] == '1' else False self.corr = True if common_params[ 'correspondence'] == '1' else False if self.corr: print('Discriminator has correspondence.') else: print('Discriminator has no correspondence.') if self.gan: print('Using GAN.') else: print('Not using GAN.') if self.prior_boost: print('Using prior boost.') else: print('Not using prior boost.') if solver_params: self.learning_rate = float(solver_params['learning_rate']) self.D_learning_rate = float(solver_params['d_learning_rate']) print("Learning rate G: {0} D: {1}".format(self.learning_rate, self.D_learning_rate)) # self.moment = float(solver_params['moment']) self.max_steps = int(solver_params['max_iterators']) self.train_dir = str(solver_params['train_dir']) self.lr_decay = float(solver_params['lr_decay']) self.decay_steps = int(solver_params['decay_steps']) self.moment = float(solver_params['moment']) self.train = train self.net = Net(train=train, common_params=common_params, net_params=net_params) self.dataset = DataSet(common_params=common_params, dataset_params=dataset_params) self.val_dataset = DataSet(common_params=common_params, dataset_params=dataset_params, training=False) print("Solver initialization done.")
def validate(data_type, model, seq_length=40, saved_model=None, class_limit=None, image_shape=None): batch_size = 463 # Get the data and process it. if image_shape is None: data = DataSet( seq_length=seq_length, class_limit=class_limit ) else: data = DataSet( seq_length=seq_length, class_limit=class_limit, image_shape=image_shape ) val_generator = data.frame_generator(batch_size, 'test', data_type) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) # # Evaluate! # results = rm.model.evaluate_generator( # generator=val_generator, # steps=10) # # print(results) # print(rm.model.metrics_names) print('Classification Metric for testing phase \n') metric_calculation(val_generator, rm.model, 0)
def validate(data_type, model, seq_length=125, saved_model=None, concat=False, class_limit=None, image_shape=None): batch_size = 1 # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) val_generator = data.frame_generator(batch_size, 'test', data_type, concat) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) # Evaluate! prediction = rm.model.predict_generator( generator=val_generator, val_samples=4) #put the value as the number of test files prediction = prediction.tolist() print(prediction) print("===========================") prediction1 = pd.DataFrame(prediction).to_csv('prediction.csv')
def RotationDataLoader(image_dir, is_validation=False, batch_size=256, crop_size=224, num_workers=4, shuffle=True): normalize = tfs.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transforms = tfs.Compose([ tfs.RandomResizedCrop(crop_size), tfs.RandomGrayscale(p=0.2), tfs.ColorJitter(0.4, 0.4, 0.4, 0.4), tfs.RandomHorizontalFlip(), tfs.Lambda(lambda img: torch.stack([normalize(tfs.ToTensor()( tfs.functional.rotate(img, angle))) for angle in [0, 90, 180, 270]] )) ]) if is_validation: dataset = DataSet(torchvision.datasets.ImageFolder(image_dir + '/val', transforms)) else: dataset = DataSet(torchvision.datasets.ImageFolder(image_dir + '/train', transforms)) loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True, drop_last=False ) return loader
def predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit, config): model = load_model(saved_model) feature_file_path= config.featureFileName work_dir = config.workDir classlist= config.classes # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit, feature_file_path = feature_file_path, repo_dir = config.repoDir, work_dir=work_dir, classlist=classlist) else: data = DataSet(seq_length=seq_length, image_shape=image_shape, class_limit=class_limit, feature_file_path = feature_file_path, repo_dir = config.repoDir, work_dir=work_dir, classlist=classlist) # Extract the sample from the data. sample = data.get_frames_by_filename(video_name, data_type) # Predict! prediction = model.predict(np.expand_dims(sample, axis=0)) print(prediction) data.print_class_from_prediction(np.squeeze(prediction, axis=0))
def do_experiments(self): n_output = 1 dynamic_features = self._data_set.dynamic_features labels = self._data_set.labels # tol_test_index = np.zeros(shape=0, dtype=np.int32) tol_pred = np.zeros(shape=(0, dynamic_features.shape[1], n_output)) tol_label = np.zeros(shape=(0, dynamic_features.shape[1], n_output), dtype=np.int32) train_dynamic_features, test_dynamic_features, train_labels, test_labels = split_data_set( dynamic_features, labels) for i in range(5): train_dynamic_res, train_labels_res = imbalance_preprocess( train_dynamic_features[i], train_labels[i], 'lstm') train_set = DataSet(train_dynamic_res, train_labels_res) # train_set = DataSet(train_dynamic_features[i],train_labels[i]) test_set = DataSet(test_dynamic_features[i], test_labels[i]) self._model.fit(train_set, test_set) y_score = self._model.predict(test_set) tol_pred = np.vstack((tol_pred, y_score)) tol_label = np.vstack((tol_label, test_labels[i])) print("Cross validation: {} of {}".format(i, 5), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) tol_test_index = np.arange(labels.shape[0] * labels.shape[1]) evaluate(tol_test_index, tol_label, tol_pred, self._filename) self._model.close()
def train_module(): #train_data_set = DataSet('train.txt') train_data_set = DataSet('smalltrain.txt') #dev_data_set = DataSet('val.txt') dev_data_set = DataSet('smallval.txt') model = Seq2Seq() model.to(cfg.device) trainIters(train_data_set, dev_data_set, model)
def train(data_type, seq_length, model, saved_model=None, class_limit=None, image_shape=None, load_to_memory=False, batch_size=32, nb_epoch=100): checkpointer = ModelCheckpoint( filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \ '.{epoch:03d}-{val_loss:.3f}.hdf5'), verbose=1, save_best_only=True) timestamp = time.time() csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \ str(timestamp) + '.log')) if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) steps_per_epoch = (len(data.data) * 0.7) // batch_size if load_to_memory: X, y = data.get_all_sequences_in_memory('train', data_type) X_test, y_test = data.get_all_sequences_in_memory('test', data_type) else: generator = data.frame_generator(batch_size, 'train', data_type) val_generator = data.frame_generator(batch_size, 'test', data_type) rm = ResearchModels(len(data.classes), model, seq_length, saved_model) if load_to_memory: rm.model.fit(X, y, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1, callbacks=[tb, early_stopper, csv_logger], epochs=nb_epoch) else: rm.model.fit_generator( generator=generator, steps_per_epoch=steps_per_epoch, epochs=nb_epoch, verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpointer], validation_data=val_generator, validation_steps=40, workers=4)
def __init__(self, attribute, subset, split_at=None, is_leaf=False): self.attribute = attribute self.subset = subset self.split_at = split_at self.is_leaf = is_leaf self.left_child = None self.right_child = None self.gamma = None if not self.is_leaf: self.left_subset = DataSet(dataset=subset, indexes=[]) self.right_subset = DataSet(dataset=subset, indexes=[]) self.compute_left_right_subset()
def __init__(self, name=None, description=None, **kwargs): # Algorithmic description self.name = name self.description = description self.parameters = DataSet(name='Parameter set') # List of parameters # (of type Parameter) self.measures = DataSet(name='Measure set') # List of measures # (the observation of the # algorithm) self.constraints = [] # Computational description self.parameter_file = self.name + '.param' self.sessions = {} # dictionary map between session id and parameter
def main(): if os.path.exists(FLAGS.data_set_path): ans = input('A file at {} already exists, do you wish to remove it?' ' y / n: '.format(FLAGS.data_set_path)).lower()[0] if ans == 'y': logging.info('removing file at {}'.format(FLAGS.data_set_path)) os.remove(FLAGS.data_set_path) data = DataSet(FLAGS.data_path, FLAGS.n_days_per_datapoint, FLAGS.input_n_datapoints, FLAGS.test_data_percentage, FLAGS.over_percentage, FLAGS.under_percentage) logging.info('amount of training data: {}'.format(len(data.train_X))) logging.info('amount of test data: {}'.format(len(data.test_X))) n_over = 0 n_under = 0 n_same = 0 for v in data.train_y: if v[0] == 1: n_over += 1 elif v[1] == 1: n_under += 1 else: n_same += 1 s = n_over + n_under + n_same print('percentile of over: {}'.format(n_over / s)) print('percentile of under: {}'.format(n_under / s)) print('percentile of same: {}'.format(n_same / s)) data.save(FLAGS.data_set_path)
def __init__(self, train=True, common_params=None, solver_params=None, net_params=None, dataset_params=None): if common_params: self.device = common_params['device'] self.image_size = int(common_params['image_size']) self.height = self.image_size self.width = self.image_size self.batch_size = int(common_params['batch_size']) self.num_gpus = 1 # end_to_end: if use end_to_end attention model or Richard Zhang's model self.end_to_end = False if common_params['end_to_end']=='False' else True # use_attention_in_cost: if use attention to weight loss in the cost function self.use_attention_in_cost = False if common_params['use_attention_in_cost']=='False' else True if solver_params: self.learning_rate = float(solver_params['learning_rate']) self.moment = float(solver_params['moment']) self.max_steps = int(solver_params['max_iterators']) self.train_dir = str(solver_params['train_dir']) self.lr_decay = float(solver_params['lr_decay']) self.decay_steps = int(solver_params['decay_steps']) self.common_params = common_params self.net_params = net_params self.train = train self.dataset = DataSet(common_params=common_params, dataset_params=dataset_params)
def main(): """ Store all Sequence histograms of all datasets found in data. """ callfiles = get_callfiles(None) data = [DataSet(c) for c in callfiles] for d in data: if d.analysed or len(d) < 2: continue print('analyzing', d.path) delayed_loaded_files = delayed_load(d.sequences) delayed_pairs = [dask.delayed(to_pairs)(sequences) for sequences in delayed_loaded_files] delayed_hists = [dask.delayed(histogram)(pairs) for pairs in delayed_pairs] from dask.diagnostics import ProgressBar with ProgressBar(): hist = dask.delayed(sum_hists)(delayed_hists).compute() hist /= np.sum(hist[:, 0]) name = str(d.callfile).split('.')[0] + '.dat' output_path = results_path.joinpath(name) store_histograms(output_path, hist)
def predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit): model = load_model(saved_model) # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, image_shape=image_shape, class_limit=class_limit) # Extract the sample from the data. sample = data.get_frames_by_filename(video_name, data_type) # Predict! prediction = model.predict(np.expand_dims(sample, axis=0)) print(prediction) data.print_class_from_prediction(np.squeeze(prediction, axis=0))
def main_news(checkpoint_filename=None, dataset_params_filename=None, initial_epoch=1): """Main""" dataset = DataSet(DATASET_FILENAME) if not os.path.exists(MODEL_CHECKPOINT_DIRECTORYNAME): os.makedirs(MODEL_CHECKPOINT_DIRECTORYNAME) if dataset_params_filename is not None: with open(dataset_params_filename, 'rb') as f: dataset_params = pickle.load(f) assert dataset_params['chars'] == dataset.chars assert dataset_params['y_max_length'] == dataset.y_max_length else: save_dataset_params(dataset) model = generate_model(dataset.y_max_length, dataset.chars) if checkpoint_filename is not None: model.load_weights(checkpoint_filename) iterate_training(model, dataset, initial_epoch)
def extract_features(seq_length=40, class_limit=2, image_shape=(299, 299, 3)): # Get the dataset. data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) # get the model. model = Extractor(image_shape=image_shape) # Loop through data. pbar = tqdm(total=len(data.data)) for video in data.data: # Get the path to the sequence for this video. path = os.path.join('/content','Geriatrics_Data','Video','sequences', video[2] + '-' + str(seq_length) + \ '-features') # numpy will auto-append .npy # Check if we already have it. if os.path.isfile(path + '.npy'): pbar.update(1) continue # Get the frames for this video. frames = data.get_frames_for_sample(video) # Now downsample to just the ones we need. frames = data.rescale_list(frames, seq_length) # Now loop through and extract features to build the sequence. sequence = [] for image in frames: features = model.extract(image) sequence.append(features) # Save the sequence. np.save(path, sequence) pbar.update(1) pbar.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model_file', help='Model file name with path. Should be under data/checkpoints/ dir', type=str, default=os.path.join( os.path.dirname(__file__), 'data/checkpoints/mlp-features.316-0.459-0.88.tflite')) parser.add_argument( '--extractor_model', help='Model file name with path. Should be under data/checkpoints/ dir', type=str) parser.add_argument( '--video_name', help= 'Inferenced video file in data/data_file.csv. Do not include the extension ', type=str, default='restRoom_001') args = parser.parse_args() cf = get_config() # Sequence length must match the lengh used during training. seq_length = cf.getint('sequence', 'seq_length') # Limit must match that used during training. class_limit = cf.get('sequence', 'class_limit') class_limit = int(class_limit) if class_limit != 'None' else None # Get the dataset. data = DataSet(seq_length=seq_length, class_limit=class_limit) sequence = extract(data, seq_length, args.extractor_model, args.video_name) predict(data, sequence, args.model_file)
def __init__(self, train=True, common_params=None, solver_params=None, net_params=None, dataset_params=None): if common_params: self.device_id = int(common_params['gpus']) self.image_size = int(common_params['image_size']) self.height = self.image_size self.width = self.image_size self.batch_size = int(common_params['batch_size']) self.num_gpus = 1 if solver_params: self.learning_rate = float(solver_params['learning_rate']) self.moment = float(solver_params['moment']) self.max_steps = int(solver_params['max_iterators']) self.train_dir = str(solver_params['train_dir']) self.lr_decay = float(solver_params['lr_decay']) self.decay_steps = int(solver_params['decay_steps']) self.train = train self.net = Net(train=train, common_params=common_params, net_params=net_params) self.dataset = DataSet(common_params=common_params, dataset_params=dataset_params)
def validate(model, saved_model, npoints=80, datafile='rect_same_period', pad=True, resized=False, **kargs): now = datetime.now() date = now.strftime("%d:%m:%Y-%H:%M") data = DataSet(npoints=npoints, datafile=datafile, **kargs) rm = ResearchModels(model, npoints=npoints, saved_model=saved_model) indices, X, y = data.get_all_sequences_in_memory('test', with_indices=True, pad=pad, resized=resized) eval = rm.model.evaluate(X, y) pred = rm.model.predict(X) print(eval) np.save( '.tmp/indices-%s-%s-%s' % (model, datafile, os.path.basename(saved_model)), indices) np.save( '.tmp/prediction-%s-%s-%s' % (model, datafile, os.path.basename(saved_model)), pred) np.save( '.tmp/true-%s-%s-%s' % (model, datafile, os.path.basename(saved_model)), y)
def main(): parser = argparse.ArgumentParser(description="Training DCGAN on CelebA dataset") parser.add_argument("--checkpoint_dir", type=str, default="./model/checkpoint", help="Path to write checkpoint") parser.add_argument("--progress_dir", type=str, default="./data/face_gan", help="Path to write training progress image") parser.add_argument("--dataset_dir", type=str, required=True, help="Path to dataset") parser.add_argument("--latent_dim", type=int, default=100, help="Latent space dimension") parser.add_argument("--test_size", type=int, default=4, help="Square root number of test images to control training progress") parser.add_argument("--batch_size", type=int, default=100, help="Number of training steps per epoch") parser.add_argument("--lr", type=float, default=0.0002, help="Learning rate") parser.add_argument("--epochs", type=int, default=20, help="Number of epochs for training") args = vars(parser.parse_args()) validate_path(args["checkpoint_dir"]) validate_path(args["progress_dir"]) datagen = DataSet(args["dataset_dir"]) dataset, total_steps = datagen.build(batch_size=args["batch_size"]) DCGAN = Trainer(progress_dir=args["progress_dir"], checkpoint_dir=args["checkpoint_dir"], z_dim=args["latent_dim"], test_size=args["test_size"], batch_size=args["batch_size"], learning_rate=args["lr"]) DCGAN.train_loop(dataset=dataset, epochs=args["epochs"], total_steps=total_steps)
def main(nb_images=10): """Spot-check `nb_images` images.""" data = DataSet() model = load_model('/data/d14122793/ucf101_full/checkpoints/inception.011-1.47.hdf5') # Get all our test images. images = glob.glob(os.path.join('/data/d14122793/ucf101_full', 'test', '**', '*.jpg')) for _ in range(nb_images): print('-'*80) # Get a random row. sample = random.randint(0, len(images) - 1) image = images[sample] # Turn the image into an array. print(image) image_arr = process_image(image, (299, 299, 3)) image_arr = np.expand_dims(image_arr, axis=0) # Predict. predictions = model.predict(image_arr) # Show how much we think it's each one. label_predictions = {} for i, label in enumerate(data.classes): label_predictions[label] = predictions[0][i] sorted_lps = sorted(label_predictions.items(), key=operator.itemgetter(1), reverse=True) for i, class_prediction in enumerate(sorted_lps): # Just get the top five. if i > 4: break print("%s: %.2f" % (class_prediction[0], class_prediction[1])) i += 1
def train_and_eval_network(config, args): test_data = TestSet(config["test_data"], Api(API_ENDPOINT)) full_train_data = get_full_train_data(config) if config["is_existing"] == False: with DataSet(config["train_data"], Api(API_ENDPOINT)) as train_data: train(config, train_data, full_train_data, test_data) models.save_model(config) # Find the best threshold for the training data train_data_config = copy.deepcopy(config["test_data"]) train_data_config["patient_filter"] = config["train_data"][ "patient_filter"] train_data = TestSet(train_data_config, Api(API_ENDPOINT)) config["alert_threshold"], config[ "alert_threshold_low"] = find_best_threshold(config, train_data) save_config(config) result = test(config, test_data) result["default"] = result["alarm_fscore"] result["threshold"] = config["alert_threshold"] result["threshold_low"] = config["alert_threshold_low"] result["tag"] = args["tag"] if args["results_path"] is not None: with open(args["results_path"], "a") as file: json.dump(result, file) file.write("\n") return result
def __init__(self, train=True, common_params=None, solver_params=None, net_params=None, dataset_params=None): if common_params: self.gpus = [ int(device) for device in str(common_params['gpus']).split(',') ] self.image_size = int(common_params['image_size']) self.height = self.image_size self.width = self.image_size self.batch_size = int(common_params['batch_size']) / len(self.gpus) if solver_params: self.learning_rate = float(solver_params['learning_rate']) self.moment = float(solver_params['moment']) self.max_steps = int(solver_params['max_iterators']) self.train_dir = str(solver_params['train_dir']) self.lr_decay = float(solver_params['lr_decay']) self.decay_steps = int(solver_params['decay_steps']) self.tower_name = 'Tower' self.num_gpus = len(self.gpus) self.train = train self.net = Net(train=train, common_params=common_params, net_params=net_params) self.dataset = DataSet(common_params=common_params, dataset_params=dataset_params) self.placeholders = []
def evaluate_on_validation(arousal_model_path, valence_model_path, output_file, istrain=True): arousal_model = load_custom_model(arousal_model_path) valence_model = load_custom_model(valence_model_path) model = 'trimodal_model' dataset = DataSet( istrain=istrain, model=model, ) x_valid, y_valid, valid_name_list = dataset.get_all_sequences_in_memory( 'Validation') arousal_pred = arousal_model.predict(x_valid) arousal_pred = np.squeeze(arousal_pred) valence_pred = valence_model.predict(x_valid) valence_pred = np.squeeze(valence_pred) print_out_csv(arousal_pred, valence_pred, valid_name_list, '../omg_ValidationVideos.csv', output_file) cmd = 'python ../calculateEvaluationCCC.py ../omg_ValidationVideos_pred.csv ../new_omg_ValidationVideos.csv' process = subprocess.Popen(cmd.split(), stderr=subprocess.STDOUT, universal_newlines=True) process.communicate()
def generate_df(filename, affect, standardize=True): # 从csv文件读取数据 df = pd.read_csv('data/{}'.format(filename), index_col='TrdDt') # 把时间作为index df.index = list( map(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d'), df.index)) # 取出要操作的列 df_all = df['ClPr'].copy() df_input = pd.DataFrame() if standardize: # 归一 mean = df_all.mean() std = df_all.std() df_standardized = (df_all - mean) / std # 重新调整df结构 for i in range(affect): df_input[i] = list(df_standardized)[i:-(affect - i)] df_input['y'] = list(df_standardized)[affect:] df_input.index = df.index[affect:] df_index = list(df_input.index) return { 'dataset': DataSet(torch.tensor(np.array(df_input)).cuda()), # 返回Dataset形式的数据 'real_data': df_all, # 返回所有数据 'mean': mean, # 返回数据的平均值 'std': std, # 返回数据的 'index': df_index, } else: for i in range(affect): df_input[i] = list(df_all)[i:-(affect - i)] df_input['y'] = list(df_all)[affect:] df_input.index = df.index[affect:] df_index = list(df_input.index) return { 'dataset': DataSet(torch.tensor(np.array(df_input)).cuda()), # 返回Dataset形式的数据 'real_data': df_all, # 返回所有数据 'index': df_index, }
def main(video): data = DataSet() model = load_model('data/savedmodels/inception.model.hdf5') file = video[2:-8] print(file) images = glob.glob( os.path.join('data', 'frames', 'test', file, video, '*.jpg')) path = os.path.join('data', 'frames', 'test', file, video) print(path) nb_images = len(images) print(nb_images) class_label_predictions = {} for i, label in enumerate(data.classes): class_label_predictions[i] = 0 for sample in range(nb_images): print('-' * 80) image = images[sample] print(image) image_arr = process_image(image, (299, 299, 3)) image_arr = np.expand_dims(image_arr, axis=0) predictions = model.predict(image_arr) label_predictions = {} for i, label in enumerate(data.classes): label_predictions[label] = predictions[0][i] class_label_predictions[i] += (predictions[0][i] / nb_images) sorted_lps = sorted(label_predictions.items(), key=operator.itemgetter(1), reverse=True) for i, class_prediction in enumerate(sorted_lps): if i > 4: break print(i) print("%s: %.2f" % (class_prediction[0], class_prediction[1])) i += 1 print('-' * 80) print('-' * 80) class_sorted_lps = sorted(class_label_predictions.items(), key=operator.itemgetter(1), reverse=True) for i, class_prediction in enumerate(class_sorted_lps): if i > 4: break print(i) print("%s: %.2f" % (data.classes[class_prediction[0]], class_prediction[1])) i += 1 return data.classes[class_sorted_lps[0][0]]
def testModelMain(): print('loading data.....') images_all, labels_all, inc_angle = read_clean(path, 'train_clean_size.json') train_dataset = DataSet(images_all, labels_all, inc_angle, train=False) batch_size = 1 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=2) # model = models.IceVGG(path) model = models.smallNet(path) # model = models.resModel(path) # model = models.lateralNet(path) # model = models.convNet(path) model.load('0_test_crop.pth') model.eval() # criterion = utils.CrossEntropy() predict_all = [] for idx, (x, labels, incs) in enumerate(train_loader): # x = Variable(torch.randn(3, 2, 75, 75)) if use_cuda: model.cuda() x = x.cuda() labels = labels.cuda() incs = incs.cuda() x = Variable(x, volatile=True) labels = Variable(labels, volatile=True) incs = Variable(incs, volatile=True) # testModel(model, x, angles) # teststn(model, x) # showstn(batch_size) # analyseResult(model, x, incs, labels) out = model(x, incs) # loss = criterion(out, labels) # import pdb; pdb.set_trace() # print(out.data, labels.data) # print('loss', loss.data[0]) # labels = labels.float() #[batch_size, 1] # print('out', out.shape) # input = out.squeeze() # target = labels # max_val = (-input).clamp(min=0) # loss = input - input * target + max_val + ((-max_val).exp() + (-input - max_val).exp()).log() # print('function loss', loss.mean().data[0]) # out = out.squeeze().sigmoid() #[batch_size, 1] # loss = -((out.log()*labels) + (1-out).log()*(1-labels)) # 2x2 so bad # print('shape test',out.log().shape, labels.shape, type(out.log()), type(labels), (out.log()*labels).shape) # print('compute loss ', loss.mean().data[0]) # print('shape', out.shape, labels.shape, loss.shape) out = out.squeeze().sigmoid() out = out.data.cpu().numpy() predict_all.extend(out) data = pd.read_json(path + 'train_clean_size.json') data['predict'] = predict_all data.to_json(path + 'train_clean_predict_small.json')