def __call__(self, data): if isinstance(data, list): data = [self._process(d) for d in tq(data)] data = list(itertools.chain(*data)) # 2d list needs to be flatten else: data = self._process(data) return data
def download_file(url: str, path: str, verbose: bool = False) -> None: """ Download file with progressbar Usage: download_file('http://web4host.net/5MB.zip') """ if not os.path.exists(path): os.makedirs(path) local_filename = os.path.join(path, url.split('/')[-1]) r = requests.get(url, stream=True) file_size = int(r.headers['Content-Length']) if 'Content-Length' in r.headers else 0 chunk_size = 1024 num_bars = int(file_size / chunk_size) if verbose: print(dict(file_size=file_size)) print(dict(num_bars=num_bars)) if not os.path.exists(local_filename): with open(local_filename, 'wb') as fp: for chunk in tq( r.iter_content(chunk_size=chunk_size), total=num_bars, unit='KB', desc=local_filename, leave=True # progressbar stays ): fp.write(chunk) # type: ignore if '.zip' in local_filename: if os.path.exists(local_filename): with zipfile.ZipFile(local_filename, 'r') as zip_ref: zip_ref.extractall(path)
def _process_filenames(self, filenames): data_raw_list = [] data_list = [] categories_ids = [self.category_ids[cat] for cat in self.categories] cat_idx = {categories_ids[i]: i for i in range(len(categories_ids))} has_pre_transform = self.pre_transform is not None id_scan = -1 for name in tq(filenames): cat = name.split(osp.sep)[0] if cat not in categories_ids: continue id_scan += 1 data = read_txt_array(osp.join(self.raw_dir, name)) pos = data[:, :3] x = data[:, 3:6] y = data[:, -1].type(torch.long) category = torch.ones(x.shape[0], dtype=torch.long) * cat_idx[cat] id_scan_tensor = torch.from_numpy(np.asarray([id_scan])).clone() data = Data(pos=pos, x=x, y=y, category=category, id_scan=id_scan_tensor) data = SaveOriginalPosId()(data) if self.pre_filter is not None and not self.pre_filter(data): continue data_raw_list.append(data.clone() if has_pre_transform else data) if has_pre_transform: data = self.pre_transform(data) data_list.append(data) if not has_pre_transform: return [], data_raw_list return data_raw_list, data_list
def generate(size, **kwargs): generator = DataGenerator.from_robbins_dataset( diamlims=kwargs["diamlims"], ellipse_limit=kwargs["ellipse_limit"], arc_lims=kwargs["arc_lims"], axis_threshold=kwargs["axis_threshold"], fov=kwargs["fov"], resolution=kwargs["resolution"], filled=kwargs["filled"], mask_thickness=kwargs["mask_thickness"], instancing=kwargs["instancing"]) date_dataset = np.empty((size, 3), int) images_dataset = np.empty((size, 1, *generator.resolution), np.float32) if kwargs["instancing"]: masks_dataset = np.empty((size, 1, *generator.resolution), np.int16) else: masks_dataset = np.empty((size, 1, *generator.resolution), np.bool_) position_dataset = np.empty((size, 3, 1), np.float64) attitude_dataset = np.empty((size, 3, 3), np.float64) sol_incidence_dataset = np.empty((size, 1), np.float16) A_craters = [] for i in tq(range(size), desc="Creating dataset"): date = dt.date(2021, np.random.randint(1, 12), 1) generator.set_random_position() generator.scene_time = date date_dataset[i] = np.array((date.year, date.month, date.day)) while not (kwargs["min_sol_incidence"] <= generator.solar_incidence_angle <= kwargs["max_sol_incidence"]): generator.set_random_position() # Generate random position position_dataset[i] = generator.position sol_incidence_dataset[i] = generator.solar_incidence_angle generator.point_nadir() if kwargs["randomized_orientation"]: # Rotations are incremental (order matters) generator.rotate('roll', np.random.randint(0, 360)) generator.rotate('pitch', np.random.randint(-30, 30)) generator.rotate('yaw', np.random.randint(-30, 30)) attitude_dataset[i] = generator.attitude image, mask = generator.image_mask_pair() masks_dataset[i] = mask[None, None, ...] images_dataset[i] = image[None, None, ...] if kwargs["save_craters"]: A_craters.append(generator.craters_in_image()) return images_dataset, masks_dataset, position_dataset, attitude_dataset, date_dataset, sol_incidence_dataset, A_craters
def sample_chains(susceptible, initial_infected, model, daily_ri_values, num_chains=1000, n_workers=None, pool=None, deterministic=False, show_progress=False): if n_workers is not None and n_workers > 1 and pool is None: pool = initialize_pool(n_workers, np.random.SeedSequence()) pbar = None if show_progress: try: from tqdm.auto import tqdm as tq pbar = tq(total=num_chains) except: warnings.warn( "Could not load tqdm to show progress, please install it to use the option show_progress" ) simulations = np.zeros(shape=(num_chains, len(daily_ri_values), len(STATE_NAMES))) if pool is None: it = (simulation(susceptible, initial_infected, model, daily_ri_values, deterministic=deterministic) for _ in range(num_chains)) else: it = pool.imap_unordered( _fn_simulation, [(susceptible, initial_infected, model.parameters, daily_ri_values, model.alphas, model.betas, deterministic) for _ in range(num_chains)]) for i, (st, _) in enumerate(it): simulations[i, :, :] = st if pbar is not None: pbar.update() if pbar is not None: pbar.close() if pool is not None: pool.terminate() return simulations
def download_data(url: str, path: str = "data/", verbose: bool = False) -> None: """ Download file with progressbar # Code taken from: https://gist.github.com/ruxi/5d6803c116ec1130d484a4ab8c00c603 # __author__ = "github.com/ruxi" # __license__ = "MIT" Usage: download_file('http://web4host.net/5MB.zip') """ if url == "NEED_TO_BE_CREATED": raise NotImplementedError if not os.path.exists(path): os.makedirs(path) local_filename = os.path.join(path, url.split('/')[-1]) r = requests.get(url, stream=True, verify=False) file_size = int( r.headers['Content-Length']) if 'Content-Length' in r.headers else 0 chunk_size = 1024 num_bars = int(file_size / chunk_size) if verbose: print(dict(file_size=file_size)) print(dict(num_bars=num_bars)) if not os.path.exists(local_filename): with open(local_filename, 'wb') as fp: for chunk in tq( r.iter_content(chunk_size=chunk_size), total=num_bars, unit='KB', desc=local_filename, leave=True # progressbar stays ): fp.write(chunk) # type: ignore if '.zip' in local_filename: if os.path.exists(local_filename): with zipfile.ZipFile(local_filename, 'r') as zip_ref: zip_ref.extractall(path)
def process_filenames(self, filenames): data_list = [] categories_ids = [self.category_ids[cat] for cat in self.categories] cat_idx = {categories_ids[i]: i for i in range(len(categories_ids))} for name in tq(filenames): cat = name.split(osp.sep)[0] if cat not in categories_ids: continue data = read_txt_array(osp.join(self.raw_dir, name)) pos = data[:, :3] x = data[:, 3:6] y = data[:, -1].type(torch.long) category = torch.ones(x.shape[0], dtype=torch.long) * cat_idx[cat] data = Data(pos=pos, x=x, y=y, category=category) if self.pre_filter is not None and not self.pre_filter(data): continue if self.pre_transform is not None: data = self.pre_transform(data) data_list.append(data) return data_list
def download_file(url: str, path: str, verbose: bool = False) -> None: """ Download file with progressbar Usage: download_file('http://web4host.net/5MB.zip') """ if not os.path.exists(path): os.makedirs(path) local_filename = os.path.join(path, url.split('/')[-1]) if not os.path.exists(local_filename): r = requests.get(url, stream=True) file_size = int(r.headers.get('Content-Length', 0)) chunk = 1 chunk_size = 1024 num_bars = int(file_size / chunk_size) if verbose: logging.info(f'file size: {file_size}\n# bars: {num_bars}') with open(local_filename, 'wb') as fp: for chunk in tq( r.iter_content(chunk_size=chunk_size), total=num_bars, unit='KB', desc=local_filename, leave=True # progressbar stays ): fp.write(chunk) # type: ignore if '.zip' in local_filename: if os.path.exists(local_filename): with zipfile.ZipFile(local_filename, 'r') as zip_ref: zip_ref.extractall(path) elif '.tar.gz' in local_filename: if os.path.exists(local_filename): with tarfile.open(local_filename, 'r') as tar_ref: tar_ref.extractall(path)
def process(self): if not os.path.exists(self.pre_processed_path): train_areas = [f for f in self.folders if str(self.test_area) not in f] test_areas = [f for f in self.folders if str(self.test_area) in f] train_files = [ (f, room_name, osp.join(self.raw_dir, f, room_name)) for f in train_areas for room_name in os.listdir(osp.join(self.raw_dir, f)) if os.path.isdir(osp.join(self.raw_dir, f, room_name)) ] test_files = [ (f, room_name, osp.join(self.raw_dir, f, room_name)) for f in test_areas for room_name in os.listdir(osp.join(self.raw_dir, f)) if os.path.isdir(osp.join(self.raw_dir, f, room_name)) ] # Gather data per area data_list = [[] for _ in range(6)] for (area, room_name, file_path) in tq(train_files + test_files): area_num = int(area[-1]) - 1 if self.debug: read_s3dis_format(file_path, room_name, label_out=True, verbose=self.verbose, debug=self.debug) continue else: xyz, rgb, room_labels, room_object_indices = read_s3dis_format( file_path, room_name, label_out=True, verbose=self.verbose, debug=self.debug ) rgb_norm = rgb.float() / 255.0 data = Data(pos=xyz, y=room_labels, rgb=rgb_norm) if room_name in VALIDATION_ROOMS: data.validation_set = True else: data.validation_set = False if self.keep_instance: data.room_object_indices = room_object_indices if self.pre_filter is not None and not self.pre_filter(data): continue data_list[area_num].append(data) raw_areas = cT.PointCloudFusion()(data_list) for i, area in enumerate(raw_areas): torch.save(area, self.raw_areas_paths[i]) for area_datas in data_list: # Apply pre_transform if self.pre_transform is not None: for data in area_datas: data = self.pre_transform(data) torch.save(data_list, self.pre_processed_path) else: data_list = torch.load(self.pre_processed_path) if self.debug: return train_data_list = {} val_data_list = {} trainval_data_list = {} for i in range(6): if i != self.test_area - 1: train_data_list[i] = [] val_data_list[i] = [] for data in data_list[i]: validation_set = data.validation_set del data.validation_set if validation_set: val_data_list[i].append(data) else: train_data_list[i].append(data) trainval_data_list[i] = val_data_list[i] + train_data_list[i] train_data_list = list(train_data_list.values()) val_data_list = list(val_data_list.values()) trainval_data_list = list(trainval_data_list.values()) test_data_list = data_list[self.test_area - 1] if self.pre_collate_transform: log.info("pre_collate_transform ...") log.info(self.pre_collate_transform) train_data_list = self.pre_collate_transform(train_data_list) val_data_list = self.pre_collate_transform(val_data_list) test_data_list = self.pre_collate_transform(test_data_list) trainval_data_list = self.pre_collate_transform(trainval_data_list) self._save_data(train_data_list, val_data_list, test_data_list, trainval_data_list)
num_workers=2) #confusion matrix conf_mat = np.zeros((len(LABELS), len(LABELS))) outputs = [] y_trues = [] # SVM, RF, NB and HGB y_trues_sota = [] y_preds_svm = [] y_preds_rf = [] y_preds_nb = [] y_preds_hgb = [] for idx, (image, y_true, _) in tq(enumerate(test_loader), total=len(test_loader)): if sota: # apply SVM, RF, NB and HGB methods features = s2_to_ndvifdi(image.squeeze(0).numpy()) y_trues_sota.append(y_true.squeeze(0)) # svm y_pred_svm = clf_svm.predict(features.reshape(2, -1).T).reshape( 128, 128) y_preds_svm.append(y_pred_svm) # rf y_pred_rf = clf_rf.predict(features.reshape(2, -1).T).reshape( 128, 128) y_preds_rf.append(y_pred_rf)
def download_data(url: str, path: str = "data/", verbose: bool = False) -> None: """Download file with progressbar. # Code adapted from: https://gist.github.com/ruxi/5d6803c116ec1130d484a4ab8c00c603 # __author__ = "github.com/ruxi" # __license__ = "MIT" Examples ________ .. doctest:: >>> import os >>> from flash.core.data.utils import download_data >>> download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", "./data") >>> os.listdir("./data") # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE [...] """ # Disable warning about making an insecure request urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) if not os.path.exists(path): os.makedirs(path) local_filename = os.path.join(path, url.split("/")[-1]) r = requests.get(url, stream=True, verify=False) file_size = int( r.headers["Content-Length"]) if "Content-Length" in r.headers else 0 chunk_size = 1024 num_bars = int(file_size / chunk_size) if verbose: print(dict(file_size=file_size)) print(dict(num_bars=num_bars)) if not os.path.exists(local_filename): with open(local_filename, "wb") as fp: for chunk in tq( r.iter_content(chunk_size=chunk_size), total=num_bars, unit="KB", desc=local_filename, leave=True, # progressbar stays ): fp.write(chunk) # type: ignore def extract_tarfile(file_path: str, extract_path: str, mode: str): if os.path.exists(file_path): with tarfile.open(file_path, mode=mode) as tar_ref: for member in tar_ref.getmembers(): try: tar_ref.extract(member, path=extract_path, set_attrs=False) except PermissionError: raise PermissionError( f"Could not extract tar file {file_path}") if ".zip" in local_filename: if os.path.exists(local_filename): with zipfile.ZipFile(local_filename, "r") as zip_ref: zip_ref.extractall(path) elif local_filename.endswith(".tar.gz") or local_filename.endswith(".tgz"): extract_tarfile(local_filename, path, "r:gz") elif local_filename.endswith(".tar.bz2") or local_filename.endswith( ".tbz"): extract_tarfile(local_filename, path, "r:bz2")
train_loss_list = [] valid_loss_list = [] dice_score_list = [] lr_rate_list = [] valid_loss_min = np.Inf # track change in validation loss for epoch in range(1, n_epochs+1): # keep track of training and validation loss train_loss = 0.0 valid_loss = 0.0 dice_score = 0.0 ################### # train the model # ################### model.train() bar = tq(train_loader, postfix={"train_loss":0.0}) for data, target in bar: # move tensors to GPU if CUDA is available if train_on_gpu: data, target = data.cuda(), target.cuda() # clear the gradients of all optimized variables optimizer.zero_grad() # forward pass: compute predicted outputs by passing inputs to the model output = model(data) # calculate the batch loss loss = criterion(output, target) #print(loss) # backward pass: compute gradient of the loss with respect to model parameters loss.backward() # perform a single optimization step (parameter update) optimizer.step()
def performance_metrics(self, iou_threshold=0.5, confidence_thresholds=None, distance_threshold=None): if confidence_thresholds is None: confidence_thresholds = torch.arange(start=0.05, end=0.99, step=0.05).to(self.device) loader = DataLoader(self.ds, batch_size=32, shuffle=True, num_workers=0, collate_fn=collate_fn) bar = tq(loader, desc=f"Testing", postfix={ "IoU": 0., "GA_distance": 0., "precision": 0., "recall": 0., "f1_score": 0. }) precision = torch.zeros((len(loader), loader.batch_size, len(confidence_thresholds)), device=self.device) recall = torch.zeros((len(loader), loader.batch_size, len(confidence_thresholds)), device=self.device) f1 = torch.zeros((len(loader), loader.batch_size, len(confidence_thresholds)), device=self.device) iou = torch.zeros((len(loader), loader.batch_size, len(confidence_thresholds)), device=self.device) dist = torch.zeros((len(loader), loader.batch_size, len(confidence_thresholds)), device=self.device) for batch, (images, targets_all) in enumerate(bar): images = list(image.to(self.device) for image in images) targets_all = [{k: v.to(self.device) for k, v in t.items()} for t in targets_all] pred_all = self._model(images) for i, (pred, targets) in enumerate(zip(pred_all, targets_all)): for j, confidence_threshold in enumerate(confidence_thresholds): precision[batch, i, j], recall[batch, i, j], f1[batch, i, j], \ iou[batch, i, j], dist[batch, i, j] = detection_metrics(pred, targets, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold, distance_threshold=distance_threshold) postfix = dict( IoU=iou[batch].mean().item(), GA_distance=dist[batch].mean().item(), precision=precision[batch].mean().item(), recall=recall[batch].mean().item(), f1_score=f1[batch].mean().item() ) bar.set_postfix(ordered_dict=postfix) del images, targets_all precision_out = torch.zeros(len(confidence_thresholds)) recall_out = torch.zeros(len(confidence_thresholds)) f1_out = torch.zeros(len(confidence_thresholds)) iou_out = torch.zeros(len(confidence_thresholds)) dist_out = torch.zeros(len(confidence_thresholds)) for i in range(len(confidence_thresholds)): precision_out[i], recall_out[i], f1_out[i], iou_out[i], dist_out[i] = map( lambda x: x[..., i][x[..., i] > 0.].mean(), (precision, recall, f1, iou, dist) ) precision, recall, f1, iou, dist = map(lambda x: x.mean((0, 1)), (precision, recall, f1, iou, dist)) return precision, recall, f1, iou, dist, confidence_thresholds
def train(self, train_loader, valid_loader, optimizer, scheduler, valid_score_fn, n_epochs, train_on_gpu=False, verbose=False, rst_path=None): """ :param train_loader: :param valid_loader: :param optimizer: :param scheduler: :param valid_score_fn: :param n_epochs: :param train_on_gpu: :param verbose: :param rst_path: a string. Path to the folder where the error and the best model should be stored. :return: """ if train_on_gpu: self.model.cuda() train_loss_list, valid_loss_list, dice_score_list = [], [], [] lr_rate_list = [] valid_loss_min = np.Inf for epoch in range(1, n_epochs + 1): # keep track of training and validation loss train_loss = 0.0 valid_loss = 0.0 dice_score = 0.0 ################### # train the model # ################### self.model.train() bar = tq(train_loader, postfix={"train_loss": 0.0}) for data, target in bar: # move tensors to GPU if CUDA is available if train_on_gpu: data, target = data.cuda(), target.cuda() optimizer.zero_grad() # forward pass: compute predicted outputs by passing inputs to the model output = self.model(data) # calculate the batch loss loss = self.criterion(output, target) # backward pass: compute gradient of the loss with respect to model parameters loss.backward() # perform a single optimization step (parameter update) optimizer.step() train_loss += loss.item() * data.size(0) # print("Loss item: {}, data_size:{}".format(loss.item(), data.size(0))) bar.set_postfix(ordered_dict={"train_loss": loss.item()}) ###################### # validate the model # ###################### self.model.eval() del data, target with torch.no_grad(): bar = tq(valid_loader, postfix={"valid_loss": 0.0, "dice_score": 0.0}) for data, target in bar: # move tensors to GPU if CUDA is available if train_on_gpu: data, target = data.cuda(), target.cuda() output = self.model(data) loss = self.criterion(output, target) # update average validation loss valid_loss += loss.item() * data.size(0) dice_cof = valid_score_fn(output.cpu(), target.cpu()).item() dice_score += dice_cof * data.size(0) bar.set_postfix(ordered_dict={"valid_loss": loss.item(), "dice_score": dice_cof}) # calculate average losses train_loss = train_loss / len(train_loader.dataset) valid_loss = valid_loss / len(valid_loader.dataset) dice_score = dice_score / len(valid_loader.dataset) train_loss_list.append(train_loss) valid_loss_list.append(valid_loss) dice_score_list.append(dice_score) lr_rate_list.append([param_group['lr'] for param_group in optimizer.param_groups]) # print training/validation statistics print('Epoch: {} Training Loss: {:.6f} Validation Loss: {:.6f} Dice Score: {:.6f}'.format( epoch, train_loss, valid_loss, dice_score)) if rst_path is not None: with open(join(rst_path, 'training_rst.txt'), 'w') as frst: frst.write(str(train_loss_list) + '\n') frst.write(str(valid_loss_list) + '\n') frst.write(str(dice_score_list) + '\n') # save model if validation loss has decreased if valid_loss <= valid_loss_min: print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format( valid_loss_min, valid_loss)) torch.save(self.model.state_dict(), join(rst_path, 'model_cifar.pt')) valid_loss_min = valid_loss scheduler.step(valid_loss) return train_loss_list, valid_loss_list, dice_score_list, lr_rate_list