def train_single_model(self, train_dir, train_csv, val_dir, val_csv, epochs): train_part = pd.read_csv(train_csv).values # array type val_part = pd.read_csv(val_csv).values train_dataset = utils.DYDataSet( train_dir, train_part, utils.get_transforms( mode='train', input_size=self.input_size, resize_size=self.input_size+42) ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=True, sampler=None) val_dataset = utils.DYDataSet( val_dir, val_part, utils.get_transforms(mode='valid', input_size=self.input_size, resize_size=self.input_size+42)) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=4, pin_memory=True) print('[+] trainning with total %d images' % len(train_dataset)) self.model = get_model(self.model_name, pretrained=True) criterion = torch.nn.CrossEntropyLoss().cuda() utils.train(self.model, train_loader, val_loader, criterion, checkpoint_file=self.checkpoint_file, epochs=epochs)
def main(args): device = 'cuda' if torch.cuda.is_available() else 'cpu' model = load_model(base_model_path=args.base_model_path, model_checkpoint=args.model_checkpoint, device=device) dataset = CustomDataset(root=args.input_image_folder, transform=get_transforms(), return_paths=True) dataloader = DataLoader(dataset=dataset, shuffle=False, batch_size=32, pin_memory=True, num_workers=8) output_folder = './classification_output_files' if not os.path.exists(output_folder): os.makedirs(output_folder) label_file = open( os.path.join( output_folder, 'correctly_classified_images_' + args.output_filename + '.txt'), 'w') results_file_path = os.path.join( output_folder, 'classified_images_' + args.output_filename + '.csv') tot = 0 correct_ = 0 results = pd.DataFrame() progress_bar = tqdm(dataloader, total=len(dataloader), desc='Classifying: {}'.format( args.inputImageFolder.split('/')[-1])) for x, l, p in progress_bar: tot += x.shape[0] x = x.to(device) y_hat = model(x, True).max(-1)[1].cpu() correct = torch.where(y_hat == l)[0] correct_ += y_hat.eq(l).sum().item() progress_bar.set_postfix(accuracy=correct_) [label_file.write(p[idx] + '\n') for idx in correct] for i in range(len(l)): result = pd.DataFrame(dict(label=l[i].item(), predicted=y_hat[i].item()), index=[0]) results = results.append(result, ignore_index=True) results.to_csv(results_file_path, index=False) label_file.close() print("Accuracy: {:.2f}%".format(correct_ / tot * 100))
def __init__(self, csv_path, img_path, kfold_path, fold, phase, transform_type, img_size, resize, mean, std): super(CloudTrainDataset, self).__init__() assert phase == 'train' or phase == 'validate' self.img_path = img_path self.phase = phase self.img_size = img_size with open(kfold_path, 'rb') as f: kfold_info = pickle.load(f) self.indexs = kfold_info['kfold'][fold][ 0] if phase == 'train' else kfold_info['kfold'][fold][1] self.id2name = kfold_info['id2name'] self.train_df = pd.read_csv(csv_path) self.train_df['ImageId'] = self.train_df['Image_Label'].apply( lambda x: x.split('_')[0]) self.train_df['ClassId'] = self.train_df['Image_Label'].apply( lambda x: x.split('_')[1]) self.train_df['hasMask'] = ~self.train_df['EncodedPixels'].isna() self.train_df = self.train_df.pivot(index='ImageId', columns='ClassId', values='EncodedPixels') self.transformer = utils.get_transforms(phase=phase, transform_type=transform_type, resize=resize) self.normalize = transforms.Normalize(mean, std)
def main(in_file, transforms_file, out_dir): im = cv2.imread(in_file, 1) transforms, _ = get_transforms(transforms_file) ims = apply_all_transforms(im, transforms) idx = 0 for im, transform in zip(ims, transforms): out_path = os.path.join(out_dir, str(idx) + '_' + transform.replace(' ', '_') + ".png") cv2.imwrite(out_path, im) idx += 1
def train(self, parameters): self.one_search_data.clear() self.one_search_data['parameters'] = vars(parameters) image_path = self.configer['trainingImagePath'] label_path = self.configer['trainingLabelPath'] training_csv = utils.get_csv_by_path_name(label_path) transforms = utils.get_transforms(parameters) isic_dataset = ISICDataset(image_path, training_csv[0], transforms) isic_dataset.__assert_equality__() trainingdata_loader = DataLoader(isic_dataset, batch_size=parameters.batchsize, shuffle=True, drop_last=True) self.model = Model(parameters) # 根据参数获取模型 optimizer = self.model.optimizer criteria = self.model.loss_function epoch_statics_list = [] # store epoch loss and training accuracy self.model.train() self.is_abandoned = 0 for EPOCH in range(self.setting.epoch): if EPOCH > 1: loss_descend_rate = epoch_statics_list[-1]['AVG LOSS']/epoch_statics_list[-2]['AVG LOSS'] if loss_descend_rate >= self.setting.lossDescendThreshold and EPOCH < 10: print('current loss descend rate is %f ,larger than threshold %f, abandon this SPD' % (loss_descend_rate, self.setting.lossDescendThreshold)) self.is_abandoned = 1 break epoch_statics_dict = {} # record epochly training statics loss_all_samples_per_epoch = 0 # 记录每个epoch,所有batch的loss总和 train_accuracy = 0 # trainnig accuaracy per epoch for idx, (x, y) in tqdm(enumerate(trainingdata_loader)): batch_statics_dict = {} x = x.to(self.device) y = torch.argmax(y, dim=1) y_hat = self.model.network(x.float()) train_accuracy += (y.to(self.device) == torch.argmax(y_hat, dim=1)).sum().item() loss = criteria(y_hat, y.long().to(self.device)) loss_all_samples_per_epoch += loss.item() # loss.item()获取的是每个batchsize的平均loss # 传入的data是一给字典,第个位置是epoch,后面是损失函数名:值 batch_statics_dict['EPOCH'] = EPOCH batch_statics_dict[parameters.lossfunction] = loss.item() # loss_dict_print,每个epoch,都是损失函数名:值(值是list) # visualizer.get_data_report(batch_statics_dict) optimizer.zero_grad() loss.backward() optimizer.step() loss_avg_per_epoch = loss_all_samples_per_epoch / (idx + 1) # 获取这个epoch中一个平input的均loss,idx从0开始,所以需要加1 train_accuracy_epoch = train_accuracy / len(isic_dataset) # training accuracy/sample numbers epoch_statics_dict['EPOCH'] = EPOCH epoch_statics_dict['AVG LOSS'] = loss_avg_per_epoch epoch_statics_dict['TRAINING ACCURACY'] = train_accuracy_epoch pkl_name = self.model.save_model(self.logger.date_string, self.logger.start_time_string) # save the nn every epoch epoch_statics_dict['saved_model'] = pkl_name epoch_statics_list.append(epoch_statics_dict) # record epoch loss for drawing print('epoch %s finished ' % EPOCH) self.visualizer.get_data_report(epoch_statics_dict) self.one_search_data['training_statics'] = epoch_statics_list self.logger.set_training_data(self.one_search_data)
def main(): # CHANGE LOSS FUNCTION TO CORRECT ONE wandb.init(project="cloud_segmentation") # Setup device selection device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') if torch.cuda.is_available(): print("Running on gpu") else: print("Running on cpu") # define hyper-paremeters batch_size = 2 learning_rate = 0.001 n_epochs = 2 wandb.config.update({ "epochs": n_epochs, "batch_size": batch_size, "learning_rate": learning_rate }) # Setup image transforms and data augmentation transforms = utils.get_transforms(False) # split train test set x_train, y_train, x_val, y_val = get_train_val_set(utils.TRAIN_LABELS) shape = (1400, 2100, 3) train_dataset = ImageDataset(utils.TRAIN_IMAGES, x_train, y_train, transforms, shape) val_dataset = ImageDataset(utils.TRAIN_IMAGES, x_val, y_val, transforms, shape) data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) data_loader_val = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4) # Define and train model model = SegNet() wandb.watch(model, log="all") model.to(device) optimizer = optim.Adam(model.parameters(), learning_rate) for epoch in range(n_epochs): print("Epoch:", epoch) train_step(model, data_loader, optimizer, device) eval_step(model, data_loader_val, device) # save model to W&B torch.save(model.state_dict(), wandb.run.dir + "/model.pt")
def main(transform_file, in_dir, out_dir): safe_mkdir(out_dir) transforms, _ = get_transforms(transform_file) transforms = reorder_transforms(transforms) all_metrics, transforms = load_metrics(transforms, in_dir) label_names, title_prefix = format_labels(transforms) invariance_sequences = format_invariances(all_metrics) equivariance_sequences = format_equivariances(all_metrics) plot_invariances(invariance_sequences, out_dir, label_names, title_prefix) plot_equivariances(equivariance_sequences, invariance_sequences, out_dir, label_names, title_prefix) plot_reductions(equivariance_sequences, invariance_sequences, out_dir, label_names, title_prefix) #plot_loss_equivariance_compare(equivariance_sequences, out_dir, label_names, title_prefix) #plot_model_equivariance_compare(equivariance_sequences, out_dir, label_names, title_prefix) plot_split_equivariance_compare(equivariance_sequences, out_dir, label_names, title_prefix)
def __init__( self, data_path, transform_type, preprocessing, ): super(CloudTestDataset, self).__init__() self.data_path = data_path self.img_names = os.listdir(data_path) self.preprocessing = preprocessing self.transform_type = transform_type self.transformer = utils.get_transforms(phase='test', transform_type=transform_type, resize=None)
def get_loaders(cfg): """Getting dataloaders for train, validation (and test, if needed).""" trainforms, testforms = get_transforms(cfg) # If test size is equal zero, we create the loaders only for train and validation parts, # otherwise we create the loaders for train, validation and test parts. if cfg.test_size != 0.0: trainimgs, traintargets, valimgs, valtargets, testimgs, testtargets = datagenerator(cfg) traindataset = CassavaDataset(cfg, trainimgs, traintargets, trainforms) valdataset = CassavaDataset(cfg, valimgs, valtargets, testforms) testdataset = CassavaDataset(cfg, testimgs, testtargets, testforms) trainloader = torch.utils.data.DataLoader(traindataset, shuffle=cfg.train_shuffle, batch_size=cfg.train_batchsize, pin_memory=False, num_workers=cfg.num_workers, persistent_workers=True) valloader = torch.utils.data.DataLoader(valdataset, shuffle=cfg.val_shuffle, batch_size=cfg.val_batchsize, pin_memory=False, num_workers=cfg.num_workers, persistent_workers=True) testloader = torch.utils.data.DataLoader(testdataset, shuffle=cfg.test_shuffle, batch_size=cfg.test_batchsize, pin_memory=False, num_workers=cfg.num_workers, persistent_workers=True) return trainloader, valloader, testloader else: trainimgs, traintargets, valimgs, valtargets = datagenerator(cfg) traindataset = CassavaDataset(cfg, trainimgs, traintargets, trainforms) valdataset = CassavaDataset(cfg, valimgs, valtargets, testforms) trainloader = torch.utils.data.DataLoader(traindataset, shuffle=cfg.train_shuffle, batch_size=cfg.train_batchsize, pin_memory=False, num_workers=cfg.num_workers, persistent_workers=True) valloader = torch.utils.data.DataLoader(valdataset, shuffle=cfg.val_shuffle, batch_size=cfg.val_batchsize, pin_memory=False, num_workers=cfg.num_workers, persistent_workers=True) return trainloader, valloader
def main(args): model = resnet50 model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) if args.mode == 'train': print('load train data') file_list = os.listdir('./train/train_data/train_image') file_list_a = glob.glob(f'{DATASET_PATH}/*') print('file_list: ', file_list_a) else: print('load test data') file_list = os.listdir('./test/test_data/test_image') start_time = datetime.datetime.now() print('start extracting...!') y_pred_dict = {} for fname in file_list: img_name = os.path.join(DATASET_PATH, args.mode, args.mode + '_data', args.mode + '_image', fname) image = utils.default_loader(img_name) data_transforms = utils.get_transforms( '[transforms.Resize((456, 232))]', verbose=False) image = data_transforms['train'](image) image = image.unsqueeze(0) image = image.cuda() # forward logits = model(image) y_pred_dict[fname[:-4]] = logits.cpu().squeeze().numpy() if len(y_pred_dict) % 100 == 0: print('current stack size : ', len(y_pred_dict), round(len(y_pred_dict) / len(file_list), 2) * 100, '%') print('extraction is done') dict_save_name = args.mode + '_image_features_0.pkl' with open(dict_save_name, 'wb') as handle: pickle.dump(y_pred_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) print('done')
def resnet_feature_extractor(phase): resnet50 = models.resnet50(pretrained=True) modules = list(resnet50.children())[:-1] resnet50 = nn.Sequential(*modules) model = resnet50 model = model.cuda() #optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for p in resnet50.parameters(): p.requires_grad = False if phase == 'train': print('load train data') file_list = os.listdir(DATASET_PATH + '/train/train_data/train_image') else: print('load test data') file_list = os.listdir(DATASET_PATH + '/test/test_data/test_image') print('start extracting...!') y_pred_dict = {} for fname in file_list: img_name = os.path.join(DATASET_PATH, phase, phase + '_data', phase + '_image', fname) image = utils.default_loader(img_name) data_transforms = utils.get_transforms( '[transforms.Resize((456, 232))]', verbose=False) image = data_transforms['train'](image) image = image.unsqueeze(0) image = image.cuda() # forward logits = model(image) y_pred_dict[fname[:-4]] = logits.cpu().squeeze().numpy() if len(y_pred_dict) % 100 == 0: print('current stack size : ', len(y_pred_dict), round(len(y_pred_dict) / len(file_list), 2) * 100, '%') print('extraction is done') dict_save_name = phase + '_image_features_50.pkl' with open(dict_save_name, 'wb') as handle: pickle.dump(y_pred_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) print('done')
def test(): model = models.construct_models() WEIGHTS_FILE = '/checkpoints/bestmodel_may28.pt' weights = torch.load(WEIGHTS_FILE) model.load_state_dict(weights['state_dict']) model.to(device) test_df = pd.read_csv(test_csv) transform = utils.get_transforms('test') test_set = utils.WheatTestDataset(test_df, test_dir, transform) def collate_fn(batch): return tuple(zip(*batch)) test_loader = DataLoader(test_set, batch_size=4, shuffle=False, num_workers=4, drop_last=False, collate_fn=collate_fn) detection_threshold = 0.5 model.eval() for images, image_ids in test_loader: images = list(image.to(device) for image in images) outputs = model(images) for i, image in enumerate(images): boxes = outputs[i]['boxes'].data.cpu().numpy() scores = outputs[i]['scores'].data.cpu().numpy() boxes = boxes[scores >= detection_threshold].astype(np.int32) scores = scores[scores >= detection_threshold] image_id = image_ids[i] boxes[:, 2] = boxes[:, 2] - boxes[:, 0] boxes[:, 3] = boxes[:, 3] - boxes[:, 1] visualizeHelper.vis_boxes(image, boxes, scores)
def extract_features(args): device = 'cuda' if torch.cuda.is_available() else 'cpu' model = load_model(base_model_path=args.base_model_path, model_checkpoint=args.model_checkpoint, device=device) out_folder = args.features_folder if not os.path.exists(out_folder): os.makedirs(out_folder) for n_ in tqdm(os.listdir(args.dataset_path), total=len(os.listdir(args.dataset_path)), desc='Extracting features'): dataloader = DataLoader(dataset=TinyDataset(root=os.path.join(args.dataset_path, n_), transform=get_transforms()), batch_size=args.batch_size, pin_memory=torch.cuda.is_available(), num_workers=8) nb_images = len(dataloader.dataset) n_processed = 0 with torch.no_grad(): ff = h5py.File(os.path.join(out_folder, n_+'.h5')) for x in dataloader: out = model(x.to(device)) for k in out.keys(): extracted = out[k] if k != 'avg_pool' and k != 'classifier' and extracted.ndimension() > 2: extracted = F.avg_pool2d(out[k], out[k].shape[-2:]).squeeze(3).squeeze(2) batch_size, feature_dims = extracted.shape if batch_size != out[k].shape[0]: print('error', batch_size, out[k].shape) dset = ff.require_dataset(k, (nb_images, feature_dims), dtype='float32', chunks=(50, feature_dims)) dset[n_processed:n_processed + batch_size, :] = extracted.to('cpu') n_processed += batch_size ff.close() del dataset del dataloader
def test(self, args): image_path = self.configer['testImagePath'] label_path = self.configer['testLabelPath'] test_csv = utils.get_csv_by_path_name(label_path) transforms = utils.get_transforms(args) isictest = ISICDataset(image_path, test_csv[0], transforms) isictest.__assert_equality__() testdata_loader = DataLoader(isictest, batch_size=1) self.model.eval() # 模型为测试,不使用dropput等 y_list = [] y_hat_list = [] error_classified_num_list = [] right_classified_num_list = [] right_classified_image_list = [] for idx, (x, y) in enumerate(testdata_loader): x = x.to(self.device) y_scalar = torch.argmax(y, dim=1) y_hat = self.model.network(x) y_hat_scalar = torch.argmax(y_hat, dim=1) # if y_scalar.item() == y_hat_scalar.item(): # if not 'tp' + '_' + str(y_scalar.item()) in metrics.keys(): # metrics['tp' + '_' + str(y_scalar.item())] = 0 # metrics['tp' + '_' + str(y_scalar.item())] += 1 # else: # if not 'fn' + '_' + str(y_scalar.item()) in metrics.keys(): # metrics['fn' + '_' + str(y_scalar.item())] = 0 # metrics['fn' + '_' + str(y_scalar.item())] += 1 y_list.append(y_scalar.item()) y_hat_list.append(y_hat_scalar.item()) if y_scalar.item() != y_hat_scalar.item(): error_classified_num_list.append(idx) else: right_classified_num_list.append(idx) class_number = y.size(1) metrics_dict = utils.calculate_test_metrics(y_list, y_hat_list, class_number) error_classified_image_list = utils.get_image_name_by_number(label_path, error_classified_num_list) right_classified_image_list = utils.get_image_name_by_number(label_path, right_classified_num_list) metrics_dict['ERROR LIST'] = error_classified_image_list metrics_dict['RIGHT LIST'] = right_classified_image_list self.visualizer.get_data_report(metrics_dict) self.one_search_data['test_data'] = metrics_dict
def set_transform_weights(args): # check if transform weights need to be done if args.tune_lmdbs == "": # no lmdb is provided for tuning the weights return None transforms, fixed_transforms = get_transforms(args.transform_file) if not fixed_transforms: # number of transforms varies by image, so no fixed set of weights return None try: caffenet = init_caffe(args) tune_dbs = open_dbs(args.tune_lmdbs.split(args.delimiter)) weights = np.zeros(shape=(len(transforms),)) num_total = 0 done = False while not done: if num_total % args.print_count == 0: print "Tuned %d images" % num_total num_total += 1 # get the per-transform vote for the correct label ims, label = prepare_images(tune_dbs, transforms, args) votes = get_vote_for_label(ims, caffenet, label, args) weights += votes # check stopping criteria done = (num_total == args.max_images) for env, txn, cursor in tune_dbs: has_next = cursor.next() done |= (not has_next) # set done if there are no more elements normalized = (weights / num_total)[:,np.newaxis] return normalized except Exception as e: traceback.print_exc() print e raise finally: close_dbs(tune_dbs)
def get_dataset(algo: str, meta_split='train'): """Retrieves dataset corresponding to parameters defined in config module for the meta learning algorithm used. Args: algo (str): Meta learning algorithm from [maml, fomaml,\ meta-sgd, reptile] meta_split (str, optional): 'train' or 'test' split of the data.\ Defaults to 'train'. Returns: Dataset: An instance of the Dataset class. """ transform = get_transforms() data_root = cfg['data_root'] n_ways = cfg[algo]['n_ways'] train_shots = cfg[algo]['train_shots'] test_shots = cfg[algo]['test_shots'] dataset = FSSDataset(data_root, n_ways, train_shots, test_shots, meta_split, transform) return dataset
def create_train_dataloader(root='../data', batch_size=64): dataset = FER2013(root, mode='train', transform=get_transforms()) dataloader = DataLoader(dataset, batch_size, shuffle=True) return dataloader
def get_data_loader(root, phase, batch_size=16, verbose=True): csv_path = root data_transforms = get_transforms('[transforms.Resize((456, 232))]', verbose=verbose) if phase == 'train': print('[debug] data local loader ', phase) built_in_args = {'mode': 'train', 'use_sex': True, 'use_age': True, 'use_exposed_time': True, 'use_read_history': True, #False 'num_workers': 2, } image_datasets = AIRUSH2dataset( csv_path, os.path.join(DATASET_PATH, 'train', 'train_data', 'train_image'), args=built_in_args, transform=data_transforms, mode='train' ) dataset_sizes = len(image_datasets) max_length = image_datasets.get_max_length() dataloaders = torch.utils.data.DataLoader(image_datasets, batch_size=batch_size, shuffle=(built_in_args['mode'] == 'train'), pin_memory=False, num_workers=built_in_args['num_workers']) return dataloaders, dataset_sizes elif phase == 'test': print('[debug] data local loader ', phase) built_in_args = {'mode': 'test', 'use_sex': True, 'use_age': True, 'use_exposed_time': True, 'use_read_history': True, #False, 'num_workers': 3, } image_datasets = AIRUSH2dataset( csv_path, os.path.join(DATASET_PATH, 'test', 'test_data', 'test_image'), args=built_in_args, transform=data_transforms, mode='test' ) dataset_sizes = len(image_datasets) dataloaders = torch.utils.data.DataLoader(image_datasets, batch_size=batch_size, shuffle=False, pin_memory=False, num_workers=built_in_args['num_workers']) return dataloaders, dataset_sizes elif phase == 'infer': print('[debug] data local loader ', phase) built_in_args = {'mode': 'infer', 'use_sex': True, 'use_age': True, 'use_exposed_time': True, 'use_read_history': False, 'num_workers': 8, } image_datasets = AIRUSH2dataset( csv_path, os.path.join(DATASET_PATH, 'test', 'test_data', 'test_image'), args=built_in_args, transform=data_transforms, mode='test' ) dataset_sizes = len(image_datasets) dataloaders = torch.utils.data.DataLoader(image_datasets, batch_size=batch_size, shuffle=False, pin_memory=False, num_workers=built_in_args['num_workers']) return dataloaders, dataset_sizes else: raise 'mode error'
options = TestOptions() logger = DataRecorder() configer = Configer().get_configer() args = options.get_args() model = Model(args) #load model being trained previously model.load_model(args.date, args.time) image_path = configer['testImagePath'] label_path = configer['testLabelPath'] test_csv = utils.get_csv_by_path_name(label_path) dataprober = DataProber(image_path, test_csv[0]) # dataprober.get_size_profile() # dataprober.get_type_profile() # dataprober.get_data_difference() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") transforms = utils.get_transforms(args) visualizer = Visualizer() isic = ISICDataset(image_path, test_csv[0], transforms) testdata_loader = DataLoader(isic, batch_size=args.batchsize) model.network.eval() #模型为测试,不使用dropput等 y_list = [] y_hat_list = [] # error_classified_num_list = [] # error_classified_image_list = [] # right_classified_num_list = [] # right_classified_image_list = [] for idx, (x, y) in enumerate(testdata_loader): x = x.to(device) y_scalar = torch.argmax(y, dim=1)
def main(): # setup config cfg = config() cfg['device'] = torch.device( "cuda" if torch.cuda.is_available() else "cpu") timestr = time.strftime("%Y%m%d-%H%M%S") cfg['logdir'] += f"{cfg['arch']}_" cfg['logdir'] += f"{cfg['exp_idx']}_" cfg['logdir'] += f"{cfg['input_size']}_" cfg['logdir'] += f"{cfg['criterion']}_" cfg['logdir'] += f"{cfg['optimizer']}_" cfg['logdir'] += f"split{cfg['data_split']}_" cfg['logdir'] += timestr set_global_seed(cfg['random_state']) pprint(cfg) # load data train_df = pd.read_csv(cfg['train_csv_path']) test_df = pd.read_csv(cfg['test_csv_path']) print(len(train_df), len(test_df)) train_img_weights = compute_dataset_weights(train_df) train_transforms, test_transforms = get_transforms(cfg['input_size']) train_dataset = LeafDataset( img_root=cfg['img_root'], df=train_df, img_transforms=train_transforms, is_train=True, ) test_dataset = LeafDataset( img_root=cfg['img_root'], df=test_df, img_transforms=test_transforms, is_train=False, ) print( f"Training set size:{len(train_dataset)}, Test set size:{len(test_dataset)}") # prepare train and test loader if cfg['sampling'] == 'weighted': # image weight based on statistics train_img_weights = compute_dataset_weights(train_df) # weighted sampler weighted_sampler = WeightedRandomSampler( weights=train_img_weights, num_samples=len(train_img_weights), replacement=False) # batch sampler from weigted sampler batch_sampler = BatchSampler( weighted_sampler, batch_size=cfg['batch_size'], drop_last=True) # train loader train_loader = DataLoader( train_dataset, batch_sampler=batch_sampler, num_workers=4) elif cfg['sampling'] == 'normal': train_loader = DataLoader( train_dataset, cfg['batch_size'], shuffle=True, num_workers=2) test_loader = DataLoader( test_dataset, cfg['test_batch_size'], shuffle=False, num_workers=1, drop_last=True) loaders = { 'train': train_loader, 'valid': test_loader } # model setup model = timm.create_model(model_name=cfg['arch'], num_classes=len( cfg['class_names']), drop_rate=0.5, pretrained=True) model.train() # loss if cfg['criterion'] == 'label_smooth': criterion = LabelSmoothingCrossEntropy() elif cfg['criterion'] == 'cross_entropy': criterion = nn.CrossEntropyLoss() # optimizer if cfg['optimizer'] == 'adam': optimizer = torch.optim.Adam( model.parameters(), lr=cfg['lr'], weight_decay=cfg['wd']) elif cfg['optimizer'] == 'adamw': optimizer = AdamW( model.parameters(), lr=cfg['lr'], weight_decay=cfg['wd']) elif cfg['optimizer'] == 'radam': optimizer = RAdam( model.parameters(), lr=cfg['lr'], weight_decay=cfg['wd']) # learning schedule if cfg['lr_schedule'] == 'reduce_plateau': scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=0.5, patience=4) # trainer runner = SupervisedRunner(device=cfg['device']) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ AccuracyCallback( num_classes=len(cfg['class_names']), threshold=0.5, activation="Softmax" ), ], logdir=cfg['logdir'], num_epochs=cfg['num_epochs'], verbose=cfg['verbose'], # set this true to run for 3 epochs only check=cfg['check'], )
def test_single_model(self, checkpoint_file, test_dir, test_csv, prediction_file_path='test_prediction.npy', ten_crop=False, prob=False): print('[+] checkpoint file:{0:s}'.format(checkpoint_file)) transform = utils.get_transforms(mode='valid', input_size=self.input_size, resize_size=self.input_size + self.add_size) if (ten_crop): print('[+] Using Ten-Crop Testting strategy') transform = utils.get_transforms(mode='test', input_size=self.input_size, resize_size=self.input_size + self.add_size) # get the value of pd.DataFrame object test_array = pd.read_csv(test_csv).values test_dataset = utils.DYDataSet(test_dir, test_array, transform) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=4, pin_memory=True) self.model = get_model(self.model_name, pretrained=False) load_model_multiGPU(self.model, checkpoint_file) # load_model(self.model, checkpoint_file) self.model = torch.nn.DataParallel(self.model).cuda() self.model.eval() all_idxs = [] all_labels = [] with torch.no_grad(): print('testting total %d images' % len(test_dataset)) for i, (input, labels) in enumerate(test_loader): # tensor type print('testting batch: %d/%d' % (i, len(test_dataset) / self.batch_size)) input = input.cuda() if (ten_crop): bs, ncrops, c, h, w = input.size() input = input.view(-1, c, h, w) output = self.model(input).view(bs, ncrops, -1).mean(1).view(bs, -1) else: output = self.model(input) # 2-D tensor if (not prob): pred = output.topk(1)[-1] # pytorch tensor type else: pred = output all_idxs.append(labels) all_labels.append(pred.data.cpu()) all_labels = torch.cat(all_labels, dim=0).numpy() all_idxs = torch.cat(all_idxs, dim=0).numpy().reshape(-1, 1) res = np.concatenate((all_idxs, all_labels), axis=1) print('writing pred file %s ...' % prediction_file_path) np.save(prediction_file_path, res) print('done.')
def main(): cfg = config() cfg['device'] = torch.device( "cuda" if torch.cuda.is_available() else "cpu") timestr = time.strftime("%Y%m%d-%H%M%S") cfg['logdir'] += timestr pprint(cfg) train_df, test_df = balance_data(cfg['train_csv_path']) print("Train Stats:") print("No DR:", len(train_df[train_df['diagnosis'] == 0])) print("Mild:", len(train_df[train_df['diagnosis'] == 1])) print("Moderate:", len(train_df[train_df['diagnosis'] == 2])) print("Severe:", len(train_df[train_df['diagnosis'] == 3])) print("Proliferative DR:", len(train_df[train_df['diagnosis'] == 4])) print("\nTest Stats:") print("No DR:", len(test_df[test_df['diagnosis'] == 0])) print("Mild:", len(test_df[test_df['diagnosis'] == 1])) print("Moderate:", len(test_df[test_df['diagnosis'] == 2])) print("Severe:", len(test_df[test_df['diagnosis'] == 3])) print("Proliferative DR:", len(test_df[test_df['diagnosis'] == 4])) train_transforms, test_transforms = get_transforms() train_dataset = AptosDataset( img_root=cfg['img_root'], df=train_df, img_transforms=train_transforms, is_train=True, ) test_dataset = AptosDataset( img_root=cfg['img_root'], df=test_df, img_transforms=test_transforms, is_train=False, ) print( f"Training set size:{len(train_dataset)}, Test set size:{len(test_dataset)}" ) train_loader = DataLoader(train_dataset, cfg['batch_size'], shuffle=True, num_workers=1) test_loader = DataLoader(test_dataset, cfg['test_batch_size'], shuffle=False, num_workers=1) loaders = {'train': train_loader, 'valid': test_loader} model = AptosModel(arch=cfg['arch'], freeze=cfg['freeze']) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=cfg['lr']) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=2) runner = SupervisedRunner(device=cfg['device']) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ AccuracyCallback(num_classes=cfg['num_classes'], threshold=0.5, activation="Sigmoid"), # PrecisionRecallF1ScoreCallback( # class_names=cfg['class_names'], # num_classes=cfg['num_classes'] # ) ], logdir=cfg['logdir'], num_epochs=cfg['num_epochs'], verbose=cfg['verbose'], # set this true to run for 3 epochs only check=cfg['check'])
from image_dataset import ImageDataset import utils import os from PIL import Image import torch import csv import matplotlib.pyplot as plt device = torch.device('cuda') model = SegNet() model_dir = "wandb/run-20191017_073956-zukd8wh5/model.pt" model.load_state_dict(torch.load(model_dir)) model.eval() model = model.to(device) transforms = utils.get_transforms(False) shape = (1400, 2100, 3) test_dataset = ImageDataset(utils.TEST_IMAGES, os.listdir(utils.TEST_IMAGES), None, transforms, shape, True) batch_size = 1 data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=4) encodes = [["Image_Label", "EncodedPixels"]] for i, data in enumerate(data_loader): image, path = data image = image.to(device) out = model(image.view(-1, 3, 350, 525)) out = out.cpu().detach().numpy()
def create_test_dataloader(root='../data', batch_size=1): # transform = transforms.ToTensor() transform = get_transforms() dataset = FER2013(root, mode='test', transform=transform) dataloader = DataLoader(dataset, batch_size, shuffle=False) return dataloader
def main(args): log(args, str(args)) safe_mkdir(args.out_dir) all_transforms, _ = get_transforms(args.transform_file) # don't redo work that we have already done all_transforms, do_first = filter_existing(all_transforms, args.out_dir) if len(all_transforms) <= 1: log(args, "No transforms to do. Exiting...") exit() log(args, "Loaded Transforms. %d transforms" % len(all_transforms)) model = init_model(args.network_file, args.weight_file, gpu=args.gpu) train_lmdbs = args.train_lmdbs.split(args.delimiter) test_lmdbs = args.test_lmdbs.split(args.delimiter) base_transform = all_transforms[0] log(args, "Starting on Baseline Transform: %r\n" % base_transform) base_train_features, base_train_output_probs, base_train_classifications, _ = get_activations(model, [base_transform], train_lmdbs, args) base_test_features, base_test_output_probs, base_test_classifications, _ = get_activations(model, [base_transform], test_lmdbs, args) transform_partitions = partition_transforms(all_transforms, args.num_transforms) log(args, "Transform Partitions: %r" % transform_partitions) for transforms in transform_partitions: log(args, "Starting on Transforms: %r\n" % transforms) train_features, train_output_probs, train_classifications, train_labels = get_activations(model, transforms[1:], train_lmdbs, args) train_features.update(base_train_features) train_output_probs.update(base_train_output_probs) train_classifications.update(base_train_classifications) test_features, test_output_probs, test_classifications, test_labels = get_activations(model, transforms[1:], test_lmdbs, args) test_features.update(base_test_features) test_output_probs.update(base_test_output_probs) test_classifications.update(base_test_classifications) log(args, "Measuring invariances...") train_invariance_metrics = measure_invariances(train_features, train_output_probs, train_classifications, train_labels, transforms, do_first, args) test_invariance_metrics = measure_invariances(test_features, test_output_probs, test_classifications, test_labels, transforms, do_first, args) log(args, "Done...") setup_scratch_space(args) log(args, "Measuring equivariances...") train_equivariance_metrics, test_equivariance_metrics = measure_equivariances(train_features, train_labels, train_classifications, train_output_probs, test_features, test_labels, test_classifications, test_output_probs, transforms, model, do_first, args) for transform in transforms[(0 if do_first else 1):]: write_output(args.out_dir, transform, train_invariance_metrics[transform], test_invariance_metrics[transform], train_equivariance_metrics[transform], test_equivariance_metrics[transform]) do_first = False log(args, "Done Measure Equivariances") cleanup_scratch_space(args) log(args, "Exiting...") if args.log_file: args.log.close()
momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, scheduler_step, args.min_lr) train_idx, valid_idx, _, _ = train_test_split(train_df.index, train_df['split_label'], test_size=0.2, random_state=43) train_data = SteelDataset(train_df.iloc[train_idx], mode='train', fine_size=args.fine_size, pad_left=args.pad_left, pad_right=args.pad_right, transforms=get_transforms()) train_loader = DataLoader( train_data, shuffle=RandomSampler(train_data), batch_size=args.batch_size, num_workers=0, #cpu_count(), pin_memory=True) val_data = SteelDataset(train_df.iloc[valid_idx], mode='valid', fine_size=args.fine_size, pad_left=args.pad_left, pad_right=args.pad_right, transforms=get_transforms()) val_loader = DataLoader( val_data,
from torch.utils.data import Dataset, DataLoader from torchvision.datasets import CIFAR10 import torch.nn.functional as F from resnet import * from arguments import parse_args from utils import get_optimizer, get_transforms args = parse_args() device = f'cuda:{args.gpu_id[0]}' args.device = torch.device(device) print(args) print(torch.cuda.get_device_name(0)) transform_train, transform_test = get_transforms() trainset = CIFAR10(root=args.data_dir, train=True, download=True, transform=transform_train) trainloader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) testset = CIFAR10(root=args.data_dir, train=False, download=True, transform=transform_test) testloader = DataLoader(testset,
def prepare_image(img): img_dict = {'image': img, 'label': 0} _, val_tf = get_transforms() img_dict = val_tf(img_dict) img_dict['image'] = img_dict['image'].unsqueeze(0) return img_dict
def main(): global model # setup config cfg = config() cfg['device'] = torch.device( "cuda" if torch.cuda.is_available() else "cpu") # cfg['logdir'] += timestr set_global_seed(cfg['random_state']) pprint(cfg) # load data train_df = pd.read_csv(cfg['train_csv_path']) test_df = pd.read_csv(cfg['test_csv_path']) print(len(train_df), len(test_df)) # train_img_weights = compute_dataset_weights(train_df) _, test_transforms = get_transforms(cfg['input_size']) # model setup if model is None: checkpoint_location1 = "./logs2/tf_efficientnet_b5_ns_exp10_456_labelsmooth_adamw_split2_20200511-045005" model = load_timm_model( cfg['arch'], len(cfg['class_names']), checkpoint_location1+"/checkpoints/best.pth", location='cpu') model.to(cfg['device']) model.eval() print("Run on test set ...") test_true, test_pred, test_probs, misses = run_evaluation( './processed/test2.csv', cfg['img_root'], test_transforms, cfg['device']) misses_df = pd.DataFrame.from_dict( misses, orient='index', columns=['y_true', 'y_pred']) misses_df.index.name = 'image_id' misses_df = misses_df.reset_index() print("Number of miss:", len(misses_df)) print(classification_report(test_true, test_pred, target_names=cfg['class_names'])) print( f"ROC AUC Score:{roc_auc_score(test_true, np.array(test_probs), multi_class='ovr')}") print("Creating CM ...") cm = confusion_matrix(test_true, test_pred) fig, ax = plt.subplots(figsize=(12, 12)) sns.heatmap(cm, annot=True,) plt.title('Confustion Matrix for prediction') plt.savefig(f"./result/{cfg['arch']}_exp{cfg['exp_idx']}_cm.pdf") print("Plotting Misses ....") plot_misses(misses_df, cfg['img_root'], f"./result/{cfg['arch']}_exp{cfg['exp_idx']}_misses.pdf") print("Run on eval set ...") submission_dict = run_on_held_out( cfg['eval_csv_path'], cfg['test_img_root'], test_transforms, cfg['device']) print("Writing submissions...") submission_df = pd.DataFrame.from_dict( submission_dict, orient='index', columns=cfg['class_names']) submission_df.index.name = 'image_id' submission_df.to_csv(f"./result/submission_{cfg['arch']}_{cfg['exp_idx']}.csv")
import utils import glob from PIL import Image import os.path print("Loading models...") models = [] for model_path in glob.glob("models/*.pth"): print(f"Loading '{model_path}'...", end=' ') model = cycleganime.CycleGANime(n_blocks=15,ngf=128) model.load_weights(model_path) models.append(model) print("done") print("Loading transforms...", end=' ') transform = utils.get_transforms() print("done") print("Starting inference") start_time = time.time() im_paths = glob.glob("test_images/*.jpg") for im_path in im_paths: print(im_path) im = Image.open(im_path).convert('RGB') im = im.resize((256,256), Image.BICUBIC) im = Image.fromarray(utils.set_im_mean(np.array(im), mean=185)) im = transform(im) basename = os.path.basename(im_path) for idx, model in enumerate(models): print(f"model {idx} starting...") start_time = time.time()
def run_inference(save_output_path, data_path, device): experiment_path = "best_model/" with open(os.path.join(experiment_path, "config.yaml")) as config_file: config = yaml.full_load(config_file) config["experiment_path"] = experiment_path config["model"]["weights_path"] = "last_model.h5" config["save_outs"] = True config["use_tta"] = True config["save_output_path"] = save_output_path if not os.path.exists(save_output_path): os.makedirs(save_output_path) test_transform = get_transforms(config["val"]["transform"]) test_ds = ImageNetSegmentationTest(data_path, transform=test_transform) test_dl = torch.utils.data.DataLoader(test_ds, batch_size=1, shuffle=True, num_workers=12, drop_last=True) model_path = os.path.join(config["experiment_path"], config["model"]["weights_path"]) model = get_network(config["model"]) state_dict = torch.load(model_path, map_location=device) model.load_state_dict(state_dict) model = model.to(device) model.eval() with torch.no_grad(): for X, name, orig_im in tqdm.tqdm(test_dl): list_saved_names = os.listdir(config["save_output_path"]) if name[0] + ".png" not in list_saved_names: X = X.to(device) if config["use_tta"]: y_pred = tta_model_predict(X, model) else: y_pred = model(X)["out"] y_pred = F.interpolate(y_pred, orig_im.size()[1:-1], mode="nearest") if config["save_outs"]: img_pred = np.argmax(y_pred.cpu().numpy().squeeze(), axis=0) cv2.imwrite( os.path.join(config["save_output_path"], name[0] + ".png"), img_pred.astype(np.uint8), ) del X del y_pred del orig_im torch.cuda.empty_cache() print("Inference completed!")
def main(args): log(args, str(sys.argv)) # load transforms from file log(args, "Loading transforms") transforms, fixed_transforms = get_transforms(args.transform_file) log(args, "Fixed Transforms: %s" % str(fixed_transforms)) # get per-transform weights. Can be none if transforms produce variable numbers of images, or # no lmdb is provided to tune the weights log(args, "Setting the transform weights...") weights = set_transform_weights(args) weight_str = np.array_str(weights, max_line_width=80, precision=4) if weights is not None else str(weights) log(args, "Weights: %s" % weight_str) log(args, "Initializing network for testing") caffenet = init_caffe(args) log(args, "Opening test lmdbs") test_dbs = open_dbs(args.test_lmdbs.split(args.delimiter)) try: # set up the class confusion matrix num_output = caffenet.blobs["prob"].data.shape[1] conf_mat = np.zeros(shape=(num_output, num_output), dtype=np.int) num_total = 0 num_correct = 0 all_num_correct = np.zeros(shape=(len(transforms),)) done = False while not done: if num_total % args.print_count == 0: print "Processed %d images" % num_total num_total += 1 ims, label = prepare_images(test_dbs, transforms, args) predicted_label, all_predictions = predict(ims, caffenet, args, weights) # keep track of correct predictions if predicted_label == label: num_correct += 1 conf_mat[label,predicted_label] += 1 # compute per-transformation accuracy if all_predictions.shape[0] == all_num_correct.shape[0]: all_num_correct[all_predictions == label] += 1 # check stopping criteria done = (num_total == args.max_images) for env, txn, cursor in test_dbs: has_next = cursor.next() done |= (not has_next) # set done if there are no more elements overall_acc = float(num_correct) / num_total transform_accs = all_num_correct / num_total log(args, "Done") log(args, "Conf Mat:\n %r" % conf_mat) log(args, "\nTransform Accuracy:\n %r" % transform_accs) log(args, "\nOverall Accuracy: %f" % overall_acc) except Exception as e: traceback.print_exc() print e raise finally: close_dbs(test_dbs) if args.log_file: args.log.close()
def setUp(self): self.env = gym.envs.make("Breakout-v0") self.tfs = utils.get_transforms() self.num_obs_in_state = 4
def extract_features(feature_extractor, data_dir, data_csv, prediction_file_path): print('[+] Using Ten-Crop Extracting strategy') transform = utils.get_transforms( mode='test', input_size=args.input_size, resize_size=args.input_size+args.add_size) data_array = pd.read_csv(data_csv).values dataset = utils.DYDataSet( data_dir, data_array, transform ) data_loader = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True) feature_extractor = torch.nn.DataParallel(feature_extractor).cuda() feature_extractor.eval() all_labels = [] all_fts = [] with torch.no_grad(): print('extracting total %d images' % len(dataset)) for i, (input, labels) in enumerate(data_loader): # tensor type print('extracting batch: %d/%d' % (i, len(dataset)/args.batch_size)) bs, ncrops, c, h, w = input.size() input = input.view(-1, c, h, w).cuda() output = feature_extractor(input) output = output.view( bs, ncrops, -1).mean(1).view(bs, -1) # view to 2-D tensor all_labels.append(labels) all_fts.append(output.data.cpu()) if((i+1) % 800 == 0): all_labels = torch.cat( all_labels, dim=0).numpy().reshape(-1, 1) all_fts = torch.cat(all_fts, dim=0).numpy() print(f'[+] features shape: {all_fts.shape}') res = np.concatenate((all_fts, all_labels), axis=1) print(f'[+] save npy shape: {res.shape}') part = (i+1)/800 fts_file_name = prediction_file_path+'.' + str(part) print('[+] writing fts file: %s, part %d ...' % (fts_file_name, part)) np.save(fts_file_name, res) all_labels = [] all_fts = [] all_labels = torch.cat( all_labels, dim=0).numpy().reshape(-1, 1) all_fts = torch.cat(all_fts, dim=0).numpy() print(f'[+] features shape: {all_fts.shape}') res = np.concatenate((all_fts, all_labels), axis=1) print(f'[+] save npy shape: {res.shape}') part = (int(len(dataset)/args.batch_size))/800+1 fts_file_name = prediction_file_path+'.' + str(part) print('[+] writing fts file: %s, part %d ...' % (fts_file_name, part)) np.save(fts_file_name, res)
def main(): parser = argparse.ArgumentParser(description='DQN Breakout Script') parser.add_argument('--use-cuda', action='store_true', default=False, help='whether to use CUDA (default: False)') parser.add_argument('--batch-size', type=int, default=128, metavar='M', help='batch size (default: 128)') parser.add_argument('--gamma', type=float, default=0.999, metavar='M', help='gamma (default: 0.999)') parser.add_argument('--eps-start', type=float, default=0.9, metavar='M', help='eps start (default: 0.9)') parser.add_argument('--eps-end', type=float, default=0.05, metavar='M', help='eps end (default: 0.05)') parser.add_argument('--eps-decay', type=int, default=200, metavar='M', help='eps decay (default: 200)') parser.add_argument('--num-obs-in-state', type=int, default=4, metavar='M', help='num observations in state (default: 4)') parser.add_argument('--replay-memory-capacity', type=int, default=10000, metavar='M', help='replay memory capacity (default: 10000)') parser.add_argument('--num-episodes', type=int, default=10, metavar='M', help='num of episodes (default: 10)') parser.add_argument('--reset-period', type=int, default=5, metavar='M', help='period to reset target network (default: 5)') parser.add_argument('--atari-env', type=str, default='Breakout-v0', metavar='M', help='Atari environment to use (default: Breakout-v0)') args = parser.parse_args() env = gym.envs.make(args.atari_env) model = DQN(args.num_obs_in_state, (84, 84), env.action_space.shape[0]) model_target = DQN(args.num_obs_in_state, (84, 84), env.action_space.shape[0]) if args.use_cuda: model.cuda() model_target.cuda() optimizer = optim.RMSprop(model.parameters()) memory = ReplayMemory(args.replay_memory_capacity) epsilons = np.linspace(args.eps_start, args.eps_end, args.eps_decay) step_idx = 1 reset_idx = 1 tfs = get_transforms() episode_reward = 0. episode_length = 0 for i_episode in range(args.num_episodes): # Initialize the environment and state obs = env.reset() state_processor = StateProcessor(args.num_obs_in_state, tfs, obs) state = state_processor.get_state() while True: episode_length += 1 if step_idx < args.eps_decay: eps = epsilons[step_idx] else: eps = args.eps_end action = select_action(model, state, env.action_space.shape[0], eps, args.use_cuda) # print('%d %d' % (episode_length, action[0,0])) next_obs, reward, done, info = env.step(action[0, 0]) episode_reward += reward reward = torch.Tensor([reward]) if args.use_cuda: reward = reward.cuda() if not done: state_processor.push_obs(next_obs) next_state = state_processor.get_state() else: next_state = None # None next_state marks done memory.push(state, action, next_state, reward) # optimize optimize_model(optimizer, memory, model, model_target, args.batch_size, args.gamma, args.use_cuda) step_idx += 1 reset_idx += 1 if reset_idx == args.reset_period: reset_idx = 1 model_target.load_state_dict(model.state_dict()) if done: break print(episode_reward) print(episode_length) episode_reward = 0. episode_length = 0