def read_partitioned_json(file_path: str, filter_function=lambda _: True) -> pd.DataFrame: data_source = Dataset(base_path=file_path, extension="json", filter_function=filter_function, loader_function=_json_loader_function, ignore_partitions=False) return data_source.to_pandas()
def dl_maker(x, y, conf): batch_size = conf.batch_size xt, xv, yt, yv = split(x, y) xt, xv, yt, yv = map(torch.tensor, (xt, xv, yt, yv)) train_ds, valid_ds = Dataset(xt.unsqueeze(1).float(), yt.float()), Dataset( xv.unsqueeze(1).float(), yv.float()) if conf.one_batch: train_dl = DataLoader(train_ds, len(train_ds)) valid_dl = DataLoader(valid_ds, len(valid_ds)) else: train_dl = DataLoader(train_ds, batch_size) valid_dl = DataLoader(valid_ds, batch_size) return train_dl, valid_dl
def __init__(self, args): # Set the folder to save the records and checkpoints log_base_dir = './logs/' if not osp.exists(log_base_dir): os.mkdir(log_base_dir) meta_base_dir = osp.join(log_base_dir, 'meta') if not osp.exists(meta_base_dir): os.mkdir(meta_base_dir) save_path1 = '_'.join([args.dataset, args.model_type, 'maml']) save_path2 = ('shot' + str(args.shot) + '_way' + str(args.way) + '_query' + str(args.train_query) + '_lr' + str(args.meta_lr) + '_batch' + str(args.num_batch) + '_maxepoch' + str(args.max_epoch) + '_baselr' + str(args.base_lr) + '_updatestep' + str(args.update_step) + '_' + args.meta_label) args.save_path = meta_base_dir + '/' + save_path1 + '_' + save_path2 ensure_path(args.save_path) self.args = args self.trainset = Dataset('train', self.args, train_aug=True) self.train_sampler = CategoriesSampler( self.trainset.label, self.args.num_batch, self.args.way, self.args.shot + self.args.train_query) #self.train_loader = DataLoader(dataset=self.trainset, batch_sampler=self.train_sampler, num_workers=8, pin_memory=True) self.train_loader = None self.valset = Dataset('val', self.args) self.val_sampler = CategoriesSampler( self.valset.label, self.args.val_batch, self.args.way, self.args.shot + self.args.val_query) #self.val_loader = DataLoader(dataset=self.valset, batch_sampler=self.val_sampler, num_workers=8, pin_memory=True) self.val_loader = None self.model = MetaLearner(self.args).to(self.args.device) ##self.model.encoder.load_state_dict(torch.load(self.args.pre_load_path)) self.model = torch.nn.DataParallel(self.model) print(self.model) self.optimizer = optim.SGD(self.model.parameters(), lr=self.args.meta_lr, momentum=0.9, weight_decay=args.weight_decay) # or adam
''' Load the training dataset: - Reads annotations from csv file - Sends each traning video to Visual Attention Module - Appends each stshi sequence outputted to training inputs. - Appends class and context labels as HOT encoded vectors to training outputs ''' #define phase - training/testing phase = "training" #define size of temporal window duration = 40 #define length of stshi sequence maxlen = 35 print("SETTING PHASE TO : ", phase, "\n\n") ds = Dataset(phase, duration, maxlen) print("\nDataset loaded.") ''' Inputs to Memory Module: X - Array of stshi sequences Y - Array of (1-HOT context vector, HOT-encoded class vector) ''' #Training the memory module hidden_dim = 64 X_all_stshiseq = ds.X_all_stshiseq Y_all = ds.Y_all input_dimensions = np.shape(X_all_stshiseq)
output_rtn = [] if not no_rpn_flag: rpn_network = RPN(x_static_segmentation_rpn, x_objects[2]) output_rpn = rpn_network.make_graph() resampled_output_rpn = tf_resample_hyps(output_rpn, float(RPN_RESOLUTION[0]/FLN_RESOLUTION[0]), float(RPN_RESOLUTION[1]/FLN_RESOLUTION[1])) rtn_network = RTN(x_static_segmentation, x_objects[2], x_imgs[2], x_egos, resampled_output_rpn) output_rtn = rtn_network.make_graph() fln_network = FLN(x_imgs, x_semantics, x_egos, x_objects, output_rtn) output_fln = fln_network.make_graph() # Load the model snapshot optimistic_restore(session, model_path) # Load the input dataset dataset = Dataset(data_path, dataset_name) nll_sum = 0 fde_sum = 0 iou_sum = 0 counter = 0 # Run the test for each sequence for each scene for scene_index in range(len(dataset.scenes)): scene = dataset.scenes[scene_index] scene_name = scene.scene_path.split('/')[-1] print('---------------- Scene %s ---------------------' % scene_name) if write_output_flag: result_scene_path = os.path.join(output_folder, dataset_name, scene_name) os.makedirs(result_scene_path, exist_ok=True) for i in range(len(scene.sequences)): testing_sequence = scene.sequences[i]
def eval(self): """The function for the meta-eval phase.""" # Load the logs with open(osp.join(self.args.save_path, 'trlog.json'), 'r') as f: trlog = yaml.load(f) # Load meta-test set test_set = Dataset('test', self.args, train_aug=False) sampler = CategoriesSampler(test_set.label, self.args.test_batch, self.args.way, self.args.shot + self.args.val_query) loader = DataLoader(test_set, batch_sampler=sampler, num_workers=self.args.num_work, pin_memory=True) test_data = self.inf_get(loader) # Load model for meta-test phase if self.args.eval_weights is not None: self.model.load_state_dict( torch.load(self.args.eval_weights)['params']) else: self.model.load_state_dict( torch.load(osp.join(self.args.save_path, 'max_acc' + '.pth'))['params']) # Set model to eval mode #self.model.eval() ################################ ????????? ################################################################ # Set accuracy averager ave_acc = Averager() acc_log = [] # Generate labels label_shot = torch.arange(self.args.way).repeat(self.args.shot).to( self.args.device).type(torch.long) label_query = torch.arange(self.args.way).repeat( self.args.train_query).to(self.args.device).type(torch.long) for i in tqdm.tqdm(range(self.args.test_batch // self.args.meta_batch)): data_list = [] label_shot_list = [] for _ in range(self.args.meta_batch): data_list.append(test_data.__next__().to(self.args.device)) label_shot_list.append(label_shot) pass data_list = torch.stack(data_list, dim=0) label_shot_list = torch.stack(label_shot_list, dim=0) out = self.model(data_list, label_shot_list).detach() for inner_id in range(self.args.meta_batch): cur_acc = count_acc(out[inner_id], label_query) acc_log.append(cur_acc) ave_acc.add(cur_acc) pass pass acc_np = np.array(acc_log, dtype=np.float) m, pm = compute_confidence_interval(acc_np) trlog['test_acc'] = [m, pm] cur_test_save_name = 'trlog_test_' + str(self.args.index) + '.json' with open(osp.join(self.args.save_path, cur_test_save_name), 'w') as f: json.dump(trlog, f) print('Val Best Epoch {}, Acc {:.4f}, Test Acc {:.4f}'.format( trlog['max_acc_epoch'], trlog['max_acc'], ave_acc.item())) print('Test Acc {:.4f} + {:.4f}'.format(m, pm))
data_path = DATASET_PATH[dataset_name] session = create_session() x_objects = tf.placeholder(tf.float32, shape=(3, 1, 1, 5, 1)) x_imgs = tf.placeholder(tf.float32, shape=(3, 1, 3, height, width)) # Build the network graph network = EWTA_MDF(x_imgs, x_objects) output = network.make_graph() # Load the model snapshot optimistic_restore(session, model_path) # Load the input dataset dataset = Dataset(data_path) nll_sum = 0 semd_sum = 0 counter = 0 # Run the test for each sequence for each scene for scene_index in range(len(dataset.scenes)): scene = dataset.scenes[scene_index] scene_name = scene.scene_path.split('/')[-1] print('---------------- Scene %s ---------------------' % scene_name) if write_output_flag: result_scene_path = os.path.join(output_folder, dataset_name, scene_name) os.makedirs(result_scene_path, exist_ok=True) for i in range(len(scene.sequences)): testing_sequence = scene.sequences[i]
from dataset_loader import Dataset from document_preprocessor import DocumentPreprocessor from document_vectors import StatsKeeper ''' Little simple showcase app showing the differences in searches using "matching score" and "cosine similarity" ''' dataset = Dataset() print("DATASET LOADED !") documentPreprocessor = DocumentPreprocessor(remove_apostrophes=True, remove_punctuation=True, remove_single_characters=True, remove_stop_words=True, stemming=True, number_converting=True, lower_case=True) statsKeeper = StatsKeeper() for path, (title, text) in dataset.texts.items(): preprocessed = documentPreprocessor.preprocess_document(path=path, text=text, title=title) statsKeeper.load_document(title, preprocessed.title, preprocessed.text) print("DATASET PREPARED FOR COMPILATION !") statsKeeper.compile() print("DATASET COMPILED !") while 1 : print("\nType \"__exit__\" if you want to leave.") query = input("What are you searching for ? : ")
rpn_network = RPN(x_static_segmentation_rpn, x_object) output_rpn = rpn_network.make_graph() resampled_output_rpn = tf_resample_hyps( output_rpn, float(RPN_RESOLUTION[0] / EPN_RESOLUTION[0]), float(RPN_RESOLUTION[1] / EPN_RESOLUTION[1])) rtn_network = RTN(x_static_segmentation, x_object, x_img, x_egos, resampled_output_rpn) output_rtn = rtn_network.make_graph() epn_network = EPN(x_img, x_semantic, x_egos, x_object, output_rtn) output_epn = epn_network.make_graph() # Load the model snapshot optimistic_restore(session, model_path) # Load the input dataset dataset = Dataset(data_path, dataset_name, type='EPN') nll_sum = 0 fde_sum = 0 iou_sum = 0 counter = 0 # Run the test for each sequence for each scene for scene_index in range(len(dataset.scenes)): scene = dataset.scenes[scene_index] scene_name = scene.scene_path.split('/')[-1] print('---------------- Scene %s ---------------------' % scene_name) if write_output_flag: result_scene_path = os.path.join(output_folder, dataset_name, scene_name) os.makedirs(result_scene_path, exist_ok=True) for i in range(len(scene.sequences)):