def main(arguments): if arguments.operation == 'train': # fix random seed for reproducibility seed=7 print(seed) np.random.seed(seed) print(seed) # get the train data # features: train_data[0], labels: train_data[1] train_features, train_labels = data.load_data(dataset=arguments.train_dataset) #numerizing/normalizig on scale [0,1] the train dataset/labels #returns numpy arrays train_features,train_labels=data.normalize(train_features,train_labels) # split into 70% for train and 30% for test train_features,validation_features,train_labels, validation_labels = train_test_split(train_features, train_labels, test_size=0.30, random_state=seed) #reshaping to 3d so the data fit into the lstm model #if you are using embedding layer as first layer then you can comment out the next two lines train_features = np.reshape(train_features, (train_features.shape[0], 1, train_features.shape[1])) validation_features = np.reshape(validation_features, (validation_features.shape[0], 1, validation_features.shape[1])) print("Prining Training Features Shape:") print(train_features.shape) print("Labels") print(train_labels.shape) print("Printing Validation Features Shape:") print(validation_features.shape) print("Labels") print(validation_labels.shape) # create model model=lstm_class.create_model(lstm_class(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, cell_size=CELL_SIZE, dropout=DROPOUT, sequence_length=SEQUENCE_LENGTH)) # train model lstm_class.train(lstm_class(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, cell_size=CELL_SIZE, dropout=DROPOUT, sequence_length=SEQUENCE_LENGTH),checkpoint_path=arguments.checkpoint_path,batch_size=BATCH_SIZE,model=model ,model_path=arguments.save_model, epochs=HM_EPOCHS, X_train=train_features,y_train= train_labels, X_val=validation_features,y_val=validation_labels, result_path=arguments.result_path) elif arguments.operation == 'test': # get the test data # features: test_features[0], labels: test_labels[1] print("Loading Test Data...") test_features, test_labels = data.load_data(dataset=arguments.test_dataset) # numerizing/normalizig on scale [0,1] the train dataset/labels # returns numpy arrays print("Normallizing Data...") test_features,test_labels=data.normalize(test_features,test_labels) #rehaping to 3d so the data match the trained shape of our model #if you are trained a model starting with embedding layer then you can comment out the next line #test_features = np.reshape(test_features, (test_features.shape[0], 1, test_features.shape[1])) lstm_class.predict(batch_size=BATCH_SIZE_TESTING,X_test=test_features,y_test=test_labels,model_path=arguments.load_model, result_path=arguments.result_path)
def __getitem__(self, batch_index): batch_img_paths = self.img_path_list[ self.data_index[batch_index * self.batch_size:(batch_index + 1) * self.batch_size]] batch_labels = self.label_list[ self.data_index[batch_index * self.batch_size:(batch_index + 1) * self.batch_size]] one_hot_batch_labels = np.zeros([len(batch_img_paths), self.num_class]) batch_imgs = [] if self.mode == "valid": for i in range(len(batch_img_paths)): img = self.read_img(batch_img_paths[i]) img = self.resize(img, self.resize_size) img = self.center_crop_transform(image=img)['image'] if self.args.pretrain: if self.args.model[0:3] == "Res": img = normalize(img, mode='caffe') else: img = normalize(img, mode='tf') batch_imgs.append(img) one_hot_batch_labels[i, batch_labels[i]] = 1 batch_imgs = np.array(batch_imgs) else: for i in range(len(batch_img_paths)): img = self.read_img(batch_img_paths[i]).astype(np.uint8) img = self.resize(img, self.resize_size) img = self.random_crop_transform(image=img)['image'] if self.augment == 'auto_augment': img = self.auto_augment.distort(tf.constant(img)).numpy() elif self.augment == 'rand_augment': img = self.rand_augment.distort(tf.constant(img)).numpy() elif self.augment == 'custom_augment': img = train_augment(img) # print(self.resize_size,self.crop_size) # cv2.imshow("db",img) # cv2.waitKey() if self.args.pretrain: if self.args.model[0:3] == "Res": img = normalize(img, mode='caffe') else: img = normalize(img, mode='tf') pass batch_imgs.append(img) one_hot_batch_labels[i, batch_labels[i]] = 1 batch_imgs = np.array(batch_imgs) # # if np.random.rand(1) < 0.5: if self.augment == 'cutmix': batch_imgs, one_hot_batch_labels = cutmix( batch_imgs, one_hot_batch_labels, 3) elif self.augment == 'mixup': batch_imgs, one_hot_batch_labels = mixup( batch_imgs, one_hot_batch_labels, 1) return batch_imgs, one_hot_batch_labels
def __getitem__(self, batch_index): cur_batch_imgs = self.imgs[ self.data_index[batch_index * self.batch_size:(batch_index + 1) * self.batch_size]] cur_batch_labels = self.labels[ self.data_index[batch_index * self.batch_size:(batch_index + 1) * self.batch_size]] one_hot_batch_labels = np.zeros([len(cur_batch_imgs), self.num_class]) batch_imgs = [] if self.mode == "valid": for i in range(len(cur_batch_imgs)): img = cur_batch_imgs[i] if self.args.pretrain: if self.args.model[0:3] == "Res": img = normalize(img, mode='caffe') else: img = normalize(img, mode='tf') batch_imgs.append(img) one_hot_batch_labels[i, cur_batch_labels[i]] = 1 batch_imgs = np.array(batch_imgs) else: for i in range(len(cur_batch_imgs)): img = cur_batch_imgs[i] img = np.pad(img, ((4, 4), (4, 4), (0, 0))) img = self.train_transform(image=img)['image'] if self.args.pretrain: if self.args.model[0:3] == "Res": img = normalize(img, mode='caffe') else: img = normalize(img, mode='tf') batch_imgs.append(img) one_hot_batch_labels[i, cur_batch_labels[i]] = 1 batch_imgs = np.array(batch_imgs) # # # if np.random.rand(1) < 0.5: # if self.augment == 'cutmix': # batch_imgs, one_hot_batch_labels = cutmix(batch_imgs,one_hot_batch_labels,3) # elif self.augment == 'mixup': # batch_imgs, one_hot_batch_labels = mixup(batch_imgs,one_hot_batch_labels,1) return batch_imgs, one_hot_batch_labels
def __getitem__(self, item): with tf.device("/cpu:0"): groundtruth_valids = np.zeros([self.args.batch_size], np.int) # random_img_size = np.random.choice(self.args.multi_scale) random_img_size = self.img_size self.gluoncv_aug = aug_gluoncv.YOLO3DefaultTrainTransform( random_img_size, random_img_size) batch_img = np.zeros( [self.args.batch_size, random_img_size, random_img_size, 3]) batch_boxes = np.empty( [self.args.batch_size, self.args.max_box_num_per_image, 5]) batch_boxes_list = [] for batch_index, file_index in enumerate( self.data_index[item * self.args.batch_size:(item + 1) * self.args.batch_size]): #get image from file img_path = self.img_path_list[file_index] img = self.read_img(img_path) img, scale, pad = self.resize_fun( img, (random_img_size, random_img_size)) batch_img[batch_index, 0:img.shape[0], 0:img.shape[1], :] = img boxes = self.boxes_and_labels[file_index] boxes = copy.deepcopy(boxes) boxes[:, 0:4] *= scale half_pad = pad // 2 boxes[:, 0:4] += np.tile(half_pad, 2) batch_boxes_list.append(boxes) groundtruth_valids[batch_index] = boxes.shape[0] boxes = np.pad( boxes, [(0, self.args.max_box_num_per_image - boxes.shape[0]), (0, 0)], mode='constant') batch_boxes[batch_index] = boxes tail_batch_size = len(batch_boxes_list) #augment if self.args.augment == 'mosaic': new_batch_size = self.args.batch_size // 4 for bi in range(new_batch_size): four_img, four_boxes, one_img, one_boxes = data_augment.load_mosaic( batch_img[bi * 4:(bi + 1) * 4], batch_boxes_list[bi * 4:(bi + 1) * 4]) data_augment.random_hsv(one_img) data_augment.random_left_right_flip(one_img, one_boxes) groundtruth_valids[bi] = one_boxes.shape[0] one_boxes = np.pad(one_boxes, [(0, self.args.max_box_num_per_image - one_boxes.shape[0]), (0, 0)], mode='constant') batch_img[bi] = one_img batch_boxes[bi] = one_boxes batch_img = batch_img[0:new_batch_size] batch_boxes = batch_boxes[0:new_batch_size] elif self.args.augment == 'only_flip_left_right': for bi in range(self.args.batch_size): data_augment.random_left_right_flip( batch_img[bi], batch_boxes[bi]) elif self.args.augment == 'ssd_random_crop': batch_img = batch_img.astype(np.uint8) for di in range(self.args.batch_size): batch_img[di], batch_boxes_list[di] = self.gluoncv_aug( batch_img[di], batch_boxes_list[di]) batch_boxes[di] = np.pad( batch_boxes_list[di], [(0, self.args.max_box_num_per_image - batch_boxes_list[di].shape[0]), (0, 0)]) groundtruth_valids[di] = batch_boxes_list[di].shape[0] batch_img = batch_img[0:tail_batch_size] batch_boxes = batch_boxes[0:tail_batch_size] groundtruth_valids = groundtruth_valids[0:tail_batch_size] ############### batch_img = preprocess.normalize(batch_img) batch_boxes[..., 0:4] /= np.tile(batch_img.shape[1:3][::-1], [2]) batch_img = batch_img.astype(np.float32) batch_boxes = batch_boxes.astype(np.float32) ############### # batch_boxes = np.array([[[0.2, 0.1, 0.5, 0.5, 3], [0.5, 0.3, 0.8, 0.9, 3]]]) # groundtruth_valids = np.array([2]) # y_true = self.get_labels_fun.get_labels(random_img_size, batch_boxes, groundtruth_valids) # batch_boxes = np.array([[[0.2, 0.1, 0.5, 0.5, 3], [0.5, 0.3, 0.8, 0.9, 3]]]) # groundtruth_valids = np.array([2]) # y_true1 = get_y_true(416, batch_boxes, groundtruth_valids, args) # print(np.all(y_true[0]==y_true1[0])) # print(np.all(y_true[1] == y_true1[1])) if self.mode == 'pred': return batch_img, batch_boxes, groundtruth_valids y_true = self.get_labels_fun.get_labels(random_img_size, batch_boxes, groundtruth_valids) return batch_img, y_true return batch_img, y_true, batch_boxes
def train(x_tr, y_tr, x_va, y_va, config): """Training function. Parameters ---------- x_tr : ndarray Training data. y_tr : ndarray Training labels. x_va : ndarray Validation data. y_va : ndarray Validation labels. config : namespace Arguments and configurations parsed by `argparse` Returns ------- train_res : dictionary Training results stored in a dictionary file. It should contain W and b when best validation accuracy was achieved, as well as the average losses per epoch during training, and the average accuracy of each epoch to analyze how training went. """ # ---------------------------------------- # Preprocess data # Report data statistic print("Training data before: mean {}, std {}, min {}, max {}".format( x_tr.mean(), x_tr.std(), x_tr.min(), x_tr.max())) # Normalize data using the normalize function. Note that we are remembering # the mean and the range of training data and applying that to the # validation/test data later on. x_tr_n, x_tr_mean, x_tr_range = normalize(x_tr) x_va_n, _, _ = normalize(x_va, x_tr_mean, x_tr_range) # Always a good idea to print some debug messages print("Training data after: mean {}, std {}, min {}, max {}".format( x_tr_n.mean(), x_tr_n.std(), x_tr_n.min(), x_tr_n.max())) # ---------------------------------------- # Initialize parameters of the classifier print("Initializing...") num_class = 10 # Initialize W to very small random values. W = (np.random.rand(np.prod(x_tr_n.shape[1:]), num_class) - 0.5) * 0.002 # Initialize b to zeros b = np.zeros(num_class) print("Testing...") get_accuracy = lambda p, t: np.sum(p == t) / len(t) # Test on validation data prediction = predict(W, b, x_va_n, config) acc = get_accuracy(prediction, y_va) print("Initial Validation Accuracy: {}%".format(acc)) batch_size = config.batch_size num_epoch = config.num_epoch num_batch = len(x_tr_n) // batch_size loss_epoch = [] tr_acc_epoch = [] va_acc_epoch = [] W_best = None b_best = None best_acc = 0 # For each epoch for idx_epoch in range(num_epoch): # Create a random order to go through the data x_batches = np.split(x_tr_n, num_batch) y_batches = np.split(y_tr, num_batch) order = np.arange(num_batch) np.random.shuffle(order) losses = np.zeros(num_batch) accs = np.zeros(num_batch) for idx_batch in range(num_batch): # Construct batch idx = order[idx_batch] y_b = np.copy(y_batches[idx]) x_b = np.copy(x_batches[idx]) # Get loss with compute_loss loss_cur, loss_c, pred_b = compute_loss(W, b, x_b, y_b, config) # Get gradient with compute_grad dW, db = compute_grad(W, x_b, y_b, loss_c, config) # Update parameters W -= (dW * config.learning_rate) b -= (db * config.learning_rate) # Record this batches result acc = get_accuracy(pred_b, y_b) losses[idx_batch] = loss_cur accs[idx_batch] = acc # Report average results within this epoch print("Epoch {} -- Train Loss: {}".format(idx_epoch, np.mean(losses) / num_batch)) print("Epoch {} -- Train Accuracy: {:.2f}%".format( idx_epoch, np.mean(accs) * 100)) # Test on validation data and report results prediction = predict(W, b, x_va_n, config) acc = get_accuracy(prediction, y_va) print("Epoch {} -- Validation Accuracy: {:.2f}%".format( idx_epoch, acc * 100)) # If best validation accuracy, update W_best, b_best, and best # accuracy. We will only return the best W and b if acc > best_acc: W_best = W b_best = b best_acc = acc # Record per epoch statistics loss_epoch += [losses.mean()] tr_acc_epoch += [accs.mean()] va_acc_epoch += [acc] # Pack results. Remeber to pack pre-processing related things here as # well train_res = { 'W_best': W_best, 'b_best': b_best, 'best_acc': acc, 'loss_epoch': loss_epoch, 'tr_acc_epoch': tr_acc_epoch, 'va_acc_epoch': va_acc_epoch, 'x_tr_mean': x_tr_mean, 'x_tr_range': x_tr_range } return train_res
def main(config): """The main function.""" # ---------------------------------------- # Load cifar10 train data print("Reading training data...") data_trva, y_trva = load_data(config.data_dir, "train") # ---------------------------------------- # Load cifar10 test data print("Reading test data...") data_te, y_te = load_data(config.data_dir, "test") # ---------------------------------------- # Extract features print("Extracting Features...") if config.feature_type == "hog": # HOG features from utils.features import extract_hog x_trva = extract_hog(data_trva) x_te = extract_hog(data_te) elif config.feature_type == "h_histogram": # Hue Histogram features from utils.features import extract_h_histogram x_trva = extract_h_histogram(data_trva) x_te = extract_h_histogram(data_te) elif config.feature_type == "rgb": # raw RGB features x_trva = data_trva.astype(float).reshape(len(data_trva), -1) x_te = data_te.astype(float).reshape(len(data_te), -1) # ---------------------------------------- # Create folds num_fold = 5 # Randomly shuffle data and labels. x_trva, y_trva = _shuffle(x_trva, y_trva) # Reshape the data into 5x(N/5)xD, so that the first dimension is the fold x_trva = np.reshape(x_trva, (num_fold, len(x_trva) // num_fold, -1)) y_trva = np.reshape(y_trva, (num_fold, len(y_trva) // num_fold)) # Cross validation setup. If you set cross_validate as False, it will not # do all 5 folds, but report results only for the first fold. This is # useful when you want to debug. if config.cross_validate: va_fold_to_test = np.arange(num_fold) else: va_fold_to_test = np.arange(1) # ---------------------------------------- # Cross validation loop train_res = [] for idx_va_fold in va_fold_to_test: # Select train and validation. Notice that `idx_va_fold` will be # the fold that you use as validation set for this experiment va_idx = [i for i in range(num_fold) if i != idx_va_fold] x_tr = np.delete(x_trva, idx_va_fold, 0) x_tr = x_tr.reshape(-1, x_tr.shape[-1]) y_tr = np.delete(y_trva, idx_va_fold, 0) y_tr = y_tr.reshape(np.prod(y_tr.shape)) x_va = np.delete(x_trva, va_idx, 0) x_va = x_va.reshape(-1, x_va.shape[-1]) y_va = np.delete(y_trva, va_idx, 0) y_va = y_va.reshape(np.prod(y_va.shape)) # ---------------------------------------- # Train print("Training for fold {}...".format(idx_va_fold)) # Run training cur_train_res = train(x_tr, y_tr, x_va, y_va, config) # Save results train_res += [cur_train_res] # Average results to see the average performance for this set of # hyper parameters on the validation set. This will be used to see how good # the design was. However, this should all be done *after* you are sure # your implementation is working. Do check how the training is going on by # looking at `loss_epoch` `tr_acc_epoch` and `va_acc_epoch` losses = np.array([tr['loss_epoch'] for tr in train_res]) accs = np.array([max(*tr['va_acc_epoch']) for tr in train_res]) avg_loss = losses.mean() avg_acc = accs.mean() print('Average Loss: {}\nAverage Accuracy: {:.2f}%'.format( avg_loss, avg_acc * 100)) # Find model with best validation accuracy and test it. Remember you # don't want to use this result to make **any** decisions. This is purely # the number that you show other people for them to evaluate your model's # performance. best_acc = 0 best_model_W = None best_model_b = None x_tr_mean = None x_tr_range = None for tr in train_res: if tr['best_acc'] > best_acc: best_acc = tr['best_acc'] best_model_b = tr['b_best'] best_model_W = tr['W_best'] x_tr_mean = tr['x_tr_mean'] x_tr_range = tr['x_tr_range'] x_te_n, _, _ = normalize(x_te, x_tr_mean, x_tr_range) pred = predict(best_model_W, best_model_b, x_te_n, config) correct_pred_count = np.sum(pred == y_te) acc = (correct_pred_count / float(y_te.shape[0])) print("Testing Results -- Accuracy: {:.2f}%".format(acc * 100))
def preprocess(config, model_dir, train_features, train_targets, test_features, dae_features): N_ORIGINAL_FEATURES = 872 g_features_columns = [col for col in train_features.columns if col.startswith('g-')] c_features_columns = [col for col in train_features.columns if col.startswith('c-')] # Assign DAE features if config.dae_strategy == 'replace': train_features, test_features = assign_dae_features( train_features, test_features, dae_features, N_ORIGINAL_FEATURES) else: train_features, test_features, _ = merge_dae_features( train_features, test_features, dae_features, len(g_features_columns), len(c_features_columns)) # Drop ctl_vehicle train_targets = train_targets.loc[train_features['cp_type'] == 'trt_cp'].reset_index(drop=True) train_features = train_features.loc[train_features['cp_type'] == 'trt_cp'].reset_index(drop=True) # Categorical encoding train_features, test_features, onehot_feature_columns = encode_categorical_features(train_features, test_features) # Normalize nomalizing_columns = g_features_columns + c_features_columns + onehot_feature_columns train_features, test_features = normalize(train_features, test_features, nomalizing_columns, norm_fun=config.norm_fun, concat_mode=config.norm_concat_mode, n_quantiles=config.gauss_n_quantiles) # Grouping features feature_groups = [g_features_columns, c_features_columns] # Add stats as futures train_features, test_features, _ = add_stats(train_features, test_features, feature_groups, concat_mode=config.stat_concat_mode) train_features, test_features, _ = c_squared(train_features, test_features, c_features_columns, square_nums=config.square_nums, concat_mode=config.sqrt_concat_mode) # PCA feature_names_pca = [] if config.skip_pca is False: train_features, test_features, feature_names_pca = apply_pca(train_features, test_features, feature_groups=feature_groups, n_comp_ratio=config.pca_n_comp_ratio, concat_mode=config.pca_concat_mode) print( f'(PCA) Adding {len(feature_names_pca)} features ' + f'and having a total of {len(train_features.columns)} features.', flush=True ) print('(PCA) train:', train_features.shape, flush=True) print('(PCA) test:', test_features.shape, flush=True) # Variance encoding variance_target_features = list(train_features.iloc[:, 4:].columns) pickle_path = f'{model_dir}/variance_encoder.pkl' if not os.path.exists(pickle_path): vt = variance_reduction_fit(train_features, variance_target_features, config.variance_threshold) save_pickle(vt, pickle_path) vt = load_pickle(pickle_path) train_features = variance_reduction_transform(vt, train_features, variance_target_features) test_features = variance_reduction_transform(vt, test_features, variance_target_features) print('(variance_reduction) Number of features after applying:', len(train_features.columns), flush=True) return train_features, train_targets, test_features
def run(try_num, config): args = get_args() print('args', args, flush=True) print('config:', config.to_dict(), flush=True) set_seed(config.rand_seed) pretrained_model = f"tf_efficientnet_b3_ns" model_dir = f'deepinsight-{try_num}' if not os.path.exists(model_dir): os.mkdir(model_dir) train_features = pd.read_csv(f"../input/lish-moa/train_features.csv") train_targets = pd.read_csv(f"../input/lish-moa/train_targets_scored.csv") test_features = pd.read_csv(f"../input/lish-moa/test_features.csv") if config.dae_path: dae_features = pd.read_csv(config.dae_path) if args.debug: train_features = train_features.iloc[:500] train_targets = train_targets.iloc[:500] if config.dae_path: dae_features = pd.concat([dae_features.iloc[:500], dae_features.iloc[-3982:]]).reset_index(drop=True) config.update(dict( kfolds=3, n_epoch=3 )) train_features = train_features.sort_values(by=["sig_id"], axis=0, inplace=False).reset_index(drop=True) train_targets = train_targets.sort_values(by=["sig_id"], axis=0, inplace=False).reset_index(drop=True) cat_features_columns = ["cp_dose", 'cp_time'] num_feature_columns = [c for c in train_features.columns if c != "sig_id" and c not in cat_features_columns + ['cp_type']] all_features_columns = cat_features_columns + num_feature_columns target_columns = [c for c in train_targets.columns if c != "sig_id"] g_feature_columns = [c for c in num_feature_columns if c.startswith("g-")] c_feature_columns = [c for c in num_feature_columns if c.startswith("c-")] if config.dae_path: if config.dae_strategy == 'replace': train_features, test_features = assign_dae_features( train_features, test_features, dae_features, len(num_feature_columns)) else: train_features, test_features, dae_feature_columns = merge_dae_features( train_features, test_features, dae_features, len(g_feature_columns), len(c_feature_columns)) all_features_columns += dae_feature_columns train_targets = train_targets.loc[train_features['cp_type'] == 'trt_cp'].reset_index(drop=True) train_features = train_features.loc[train_features['cp_type'] == 'trt_cp'].reset_index(drop=True) if config.normalizer == 'rank': train_features, test_features = normalize(train_features, test_features, num_feature_columns) for df in [train_features, test_features]: df['cp_type'] = df['cp_type'].map({'ctl_vehicle': 0, 'trt_cp': 1}) df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1}) df['cp_time'] = df['cp_time'].map({24: 0, 48: 0.5, 72: 1}) if config.variance_target_type == 1: pickle_path = f'{model_dir}/variance_reduction.pkl' variance_target_features = num_feature_columns if config.dae_path and config.dae_strategy != 'replace': variance_target_features += dae_feature_columns if not os.path.exists(pickle_path): vt = variance_reduction_fit(train_features, variance_target_features, config.variance_threshold) save_pickle(vt, pickle_path) vt = load_pickle(pickle_path) train_features = variance_reduction_transform(vt, train_features, variance_target_features) test_features = variance_reduction_transform(vt, test_features, variance_target_features) print('(variance_reduction) Number of features after applying:', len(train_features.columns), flush=True) all_features_columns = list(train_features.columns[1:]) skf = MultilabelStratifiedKFold(n_splits=config.kfolds, shuffle=True, random_state=config.rand_seed) y_labels = np.sum(train_targets.drop("sig_id", axis=1), axis=0).index.tolist() logger = Logger() for fold_index, (train_index, val_index) in enumerate(skf.split(train_features, train_targets[y_labels])): if args.only_pred: print('Skip training', flush=True) break print(f'Fold: {fold_index}', train_index.shape, val_index.shape, flush=True) X_train = train_features.loc[train_index, all_features_columns].copy().values y_train = train_targets.iloc[train_index, 1:].copy().values X_valid = train_features.loc[val_index, all_features_columns].copy().values y_valid = train_targets.iloc[val_index, 1:].copy().values if config.normalizer == 'log': scaler = LogScaler() if config.norm_apply_all: scaler.fit(X_train) X_train = scaler.transform(X_train) X_valid = scaler.transform(X_valid) else: target_features = [i for i, c in enumerate(all_features_columns) if c in num_feature_columns] non_target_features = [i for i, c in enumerate(all_features_columns) if c not in num_feature_columns] scaler.fit(X_train[:, target_features]) X_train_tr = scaler.transform(X_train[:, target_features]) X_valid_tr = scaler.transform(X_valid[:, target_features]) X_train = np.concatenate([X_train[:, non_target_features], X_train_tr], axis=1) X_valid = np.concatenate([X_valid[:, non_target_features], X_valid_tr], axis=1) save_pickle(scaler, f'{model_dir}/scaler-{fold_index}.pkl') transformer = DeepInsightTransformer( feature_extractor=config.extractor, pixels=config.resolution, perplexity=config.perplexity, random_state=config.rand_seed, n_jobs=-1 ).fit(X_train) save_pickle(transformer, f'{model_dir}/transformer-{fold_index}.pkl') model = MoAEfficientNet( pretrained_model_name=pretrained_model, fc_size=config.fc_size, drop_rate=config.drop_rate, drop_connect_rate=config.drop_connect_rate, weight_init='goog', ).to(DEVICE) if config.smoothing is not None: if config.weighted_loss_weights is not None: indices = get_minority_target_index(train_targets, threshold=config.weighted_loss_threshold) indices = [int(i not in indices) for i, c in enumerate(target_columns)] train_loss_function = SmoothBCEwLogits( smoothing=config.smoothing, weight=config.weighted_loss_weights, weight_targets=indices, n_labels=len(target_columns)) else: train_loss_function = SmoothBCEwLogits(smoothing=config.smoothing) else: train_loss_function = bce_loss eval_loss_function = bce_loss optimizer = optim.Adam(model.parameters(), weight_decay=config.weight_decay, lr=config.learning_rate) if config.scheduler_type == 'ca': scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.t_max, eta_min=0, last_epoch=-1) elif config.scheduler_type == 'ms': scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=config.ms_scheduler_milestones, gamma=0.1) else: scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.1, patience=config.rp_patience, eps=1e-4, verbose=True) early_stopping = EarlyStopping(patience=7) best_score = np.inf start_time = time.time() for epoch in range(config.n_epoch): if config.swap_enable: dataset = MoAImageSwapDataset( X_train, y_train, transformer, image_size=config.image_size, swap_prob=config.swap_prob, swap_portion=config.swap_portion) else: dataset = MoAImageDataset(X_train, y_train, transformer, image_size=config.image_size) dataloader = DataLoader( dataset, batch_size=config.batch_size, shuffle=True, num_workers=8, pin_memory=True, drop_last=False) loss = loop_train(model, train_loss_function, dataloader, optimizer) if config.scheduler_type == 'rp': scheduler.step(loss) else: scheduler.step() for param_group in optimizer.param_groups: print('current learning rate:', param_group['lr']) del dataset, dataloader dataset = MoAImageDataset(X_valid, y_valid, transformer, image_size=config.image_size) dataloader = DataLoader( dataset, batch_size=config.infer_batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) valid_loss, valid_preds = loop_valid(model, eval_loss_function, dataloader) del dataset, dataloader logger.update({'fold': fold_index, 'epoch': epoch + 1, 'train_loss': loss, 'val_loss': valid_loss}) print(f'epoch {epoch + 1}/{config.n_epoch} - train_loss: {loss:.5f} - ' + f'valid_loss: {valid_loss:.5f} - elapsed: {time_format(time.time() - start_time)}', flush=True) if valid_loss < best_score: best_score = valid_loss torch.save(model.state_dict(), f'./{model_dir}/deepinsight-{fold_index}.pt') if early_stopping.should_stop(valid_loss): print('Early stopping', flush=True) break print(f'Done -> Fold {fold_index}/{config.kfolds} - best_valid_loss: {best_score:.5f} - ' + f'elapsed: {time_format(time.time() - start_time)}', flush=True) torch.cuda.empty_cache() gc.collect() if args.return_first_fold: logger.save(f'{model_dir}/log.csv') return test_preds = np.zeros((test_features.shape[0], len(target_columns))) start_time = time.time() print('Start infarence', flush=True) oof_preds = np.zeros((len(train_features), len(target_columns))) eval_loss_function = bce_loss for fold_index, (train_index, val_index) in enumerate(skf.split(train_features, train_targets[y_labels])): print(f'Infarence Fold: {fold_index}', train_index.shape, val_index.shape, flush=True) X_valid = train_features.loc[val_index, all_features_columns].copy().values y_valid = train_targets.iloc[val_index, 1:].copy().values X_test = test_features[all_features_columns].values if config.normalizer == 'log': scaler = load_pickle(f'{model_dir}/scaler-{fold_index}.pkl') X_valid = scaler.transform(X_valid) X_test = scaler.transform(X_test) transformer = load_pickle(f'{model_dir}/transformer-{fold_index}.pkl') model = MoAEfficientNet( pretrained_model_name=pretrained_model, fc_size=config.fc_size, drop_rate=config.drop_rate, drop_connect_rate=config.drop_connect_rate, weight_init='goog', ).to(DEVICE) model.load_state_dict(torch.load(f'./{model_dir}/deepinsight-{fold_index}.pt')) dataset = MoAImageDataset(X_valid, y_valid, transformer, image_size=config.image_size) dataloader = DataLoader( dataset, batch_size=config.infer_batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) valid_loss, valid_preds = loop_valid(model, eval_loss_function, dataloader) print(f'Fold {fold_index}/{config.kfolds} - fold_valid_loss: {valid_loss:.5f}', flush=True) logger.update({'fold': fold_index, 'val_loss': valid_loss}) oof_preds[val_index, :] = valid_preds dataset = TestDataset(X_test, None, transformer, image_size=config.image_size) dataloader = DataLoader( dataset, batch_size=config.infer_batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) preds = loop_preds(model, dataloader) test_preds += preds / config.kfolds oof_preds_df = train_targets.copy() oof_preds_df.loc[:, target_columns] = oof_preds.clip(0, 1) oof_preds_df.to_csv(f'{model_dir}/oof_preds.csv', index=False) oof_loss = mean_log_loss(train_targets.loc[:, target_columns].values, oof_preds) print(f'OOF Validation Loss: {oof_loss:.6f}', flush=True) print(f'Done infarence Elapsed {time_format(time.time() - start_time)}', flush=True) logger.update({'fold': 'oof', 'val_loss': oof_loss}) logger.save(f'{model_dir}/log.csv') submission = pd.DataFrame(data=test_features['sig_id'].values, columns=['sig_id']) submission = submission.reindex(columns=['sig_id'] + target_columns) submission.loc[:, target_columns] = test_preds.clip(0, 1) submission.loc[test_features['cp_type'] == 0, submission.columns[1:]] = 0 submission.to_csv(f'{model_dir}/submission.csv', index=False)
def get_model(args, training=True): model_args = efficientdet_config.get_struct_args(args) if training: cur_num_classes = model_args.num_classes model_args.num_classes = 90 model_pretrain = EfficientDetNet(model_args) model_inputs_pretrain = tf.keras.layers.Input( shape=(model_args.image_size, model_args.image_size, 3)) model_outputs_pretrain = model_pretrain(model_inputs_pretrain, training=True) model_pretrain = tf.keras.Model(inputs=model_inputs_pretrain, outputs=model_outputs_pretrain) model_args.num_classes = cur_num_classes if args.use_pretrain: try: model_pretrained_weights = "./pretrain/efficientdet-{}/model".format( args.model_type) model_pretrain.load_weights( model_pretrained_weights).expect_partial() except: raise ValueError('weight file {} is invalid!'.format( model_pretrained_weights)) model = EfficientDetNet(model_args) model_inputs = tf.keras.layers.Input(shape=(model_args.image_size, model_args.image_size, 3)) model_outputs = model(model_inputs) num_level = model_args.max_level - model_args.min_level + 1 level_cls_outputs = [ tf.keras.layers.Lambda(lambda x: x, name='level_{}_cls'.format(level))( model_outputs[0][level]) for level in range(num_level) ] level_box_outputs = [ tf.keras.layers.Lambda(lambda x: x, name='level_{}_box'.format(level))( model_outputs[1][level]) for level in range(num_level) ] model = tf.keras.Model(inputs=model_inputs, outputs=(level_cls_outputs, level_box_outputs)) for layer in model_pretrain.layers[-1].layers: if layer.name != 'class_net': model.layers[-11].get_layer(layer.name).set_weights( model_pretrain.layers[-1].get_layer( layer.name).get_weights()) return model else: model = EfficientDetNet(model_args) image_size = model_args.image_size model_inputs = tf.keras.layers.Input(shape=(None, None, 3), dtype=tf.dtypes.uint8) resized_inputs = tf.keras.layers.Lambda( lambda x: preprocess.resize_img_tf(x, (image_size, image_size)))( model_inputs) preprocessed_inputs = tf.keras.layers.Lambda( lambda x: tf.cast(x, tf.dtypes.float32))(resized_inputs[0]) preprocessed_inputs = tf.keras.layers.Lambda( lambda x: preprocess.normalize(x))(preprocessed_inputs) model_outputs = model(preprocessed_inputs, training=False) cls_out_list, box_out_list = model_outputs cls_outputs, box_outputs = {}, {} for i in range(model_args.min_level, model_args.max_level + 1): cls_outputs[i] = cls_out_list[i - model_args.min_level] box_outputs[i] = box_out_list[i - model_args.min_level] if args.nms == 'hard_nms_tf': nms_boxes, nms_scores, nms_classes, nms_num_valid = postprocess.postprocess( args, cls_outputs, box_outputs, tf.cast([image_size, image_size], tf.dtypes.float32)) nms_boxes = (nms_boxes - tf.cast(tf.tile(resized_inputs[2], [2]), tf.dtypes.float32)) / resized_inputs[1] else: raise ValueError('Unsupported nms type {}'.format( args.postprocess.nms)) model = tf.keras.Model( inputs=model_inputs, outputs=[nms_boxes, nms_scores, nms_classes, nms_num_valid]) return model
def train(self, x_tr, y_tr, x_va, y_va): """Training function. Parameters ---------- x_tr : ndarray Training data. y_tr : ndarray Training labels. x_va : ndarray Validation data. y_va : ndarray Validation labels. """ # ---------------------------------------- # Preprocess data # Report data statistic print("Training data before: mean {}, std {}, min {}, max {}".format( x_tr.mean(), x_tr.std(), x_tr.min(), x_tr.max())) # Normalize data using the normalize function. Note that we are # remembering the mean and the range of training data and applying that # to the validation/test data later on. We will only compute mean and # range to use later. This will be used "inside" the computation graph. _, x_tr_mean, x_tr_range = normalize(x_tr) # ---------------------------------------- # Run TensorFlow Session with tf.Session() as sess: print("Initializing...") # TODO: Initialize all variables in the computation graph init = tf.global_variables_initializer() sess.run(init) # TODO: Assign normalization variables from statistics of the train # data. Do `sess.run` on the `self.n_assign_op` in a proper way. sess.run( fetches={ "self.n_assign_op": self.n_assign_op, }, feed_dict={ self.n_mean_in: x_tr_mean, self.n_range_in: x_tr_range, }, ) # TODO: Test on validation data to record initial # performance. Again, do `sess.run` but fetch the `self.summary_op` # and also the `self.global_step` to write to be used when writing # the summary function. For the `feed_dict` you probably want to # feed the validation data and labels. print("Testing...") resu = sess.run( fetches={ "self.summary_op": self.summary_op, "self.global_step": self.global_step, }, feed_dict={ self.x_in: x_va, self.y_in: y_va, }, ) # TODO: Write validation Summary. Use `add_summary` on the validation # summary writer with the results you fetched above. self.summary_va.add_summary(resu['self.summary_op']) print("Training...") batch_size = config.batch_size num_epoch = config.num_epoch num_batch = len(x_tr) // batch_size best_acc = 0 # For each epoch. Note the fancy `trange`! for idx_epoch in trange(num_epoch): # Create a random order to go through the data ind_data = np.random.permutation(len(x_tr)) # For each training batch for idx_batch in range(num_batch): # Construct batch ind_cur = ind_data[batch_size * idx_batch:batch_size * (idx_batch + 1)] # I noticed that a lot of you guys did a way better job at # this than me. However, I'm doing it this way because in # some cases x_tr[_i] could be your hdf5 file directly! In # which case, you do not need to load the entire data into # memory :-). This way, you only load them into memory at # this precise moment. Just FYI :-) x_b = np.array([x_tr[_i] for _i in ind_cur]) y_b = np.array([y_tr[_i] for _i in ind_cur]) # TODO: Optimize, get summary for losses and accuracy, get # global_step. So you want to now fetch `self.optim`, # `self.summary_op`, and `self.global_step`, asll with x_b # and y_b in the feed_dict. resu = sess.run( fetches={ "self.optim": self.updates, "self.summary_op": self.summary_op, "self.global_step": self.global_step, }, feed_dict={ self.x_in: x_b, self.y_in: y_b, }, ) # TODO: Write Training Summary. Same as above, but using # the training summary writer self.summary_tr.add_summary(resu["self.summary_op"], global_step=idx_epoch) # Write immediate after one epoch. Otherwise, summary writer # won't write until he thinks it's time to do so. You can alter # this behaviour in another way, but I just wanted to show this # to you. self.summary_tr.flush() # TODO: Test on validation data and report results. Here. we # want to fetch not only the `self.summary_op` and # `self.global_step` as above, but also the `self.acc`, as we # are going to check if this is the best model that we've # trained so far. resu = sess.run( fetches={ "self.summary_op": self.summary_op, "self.global_step": self.global_step, "acc": self.acc, }, feed_dict={ self.x_in: x_va, self.y_in: y_va, }, ) # TODO: Write Validation Summary. Same as a bit above self.summary_va.add_summary(resu["self.summary_op"]) # Write immediate for validation self.summary_va.flush() # TODO: Save current model to resume later if we want to. Note # that we will never do this for our assignment, but hey, why # not. Use the `save` method for the `self.saver_cur`. Be sure # to say `write_meta_graph=False` or otherwise you will end up # with a VERY big save file. Also, you want to pass # `self.global_step` directly to the saver instance, instead of # the fetched value, as TF wants it that way for some reason. self.saver_cur.save(sess, self.save_file_cur, write_meta_graph=False, global_step=self.global_step) # TODO: If best validation accuracy, update W_best, b_best, and # best accuracy. We will only return the best W and b if resu['acc'] > best_acc: best_acc = resu['acc'] # TODO: Save the best model. Similar to above save, but # this time, we will simply save using the # `self.saver_best` saver instance and at # `self.save_file_best`. We will also not pass the # `self.global_step` as we only want a single save # file. Again, let's not save the meta graph. self.saver_best.save(sess, self.save_file_best)
def train(self, x_tr, y_tr, x_va, y_va): print("Training data before: mean {}, std {}, min {}, max {}".format( x_tr.mean(), x_tr.std(), x_tr.min(), x_tr.max())) _, x_tr_mean, x_tr_range = normalize(x_tr) # ---------------------------------------- # Run TensorFlow Session with tf.Session() as sess: print("Initializing...") init = tf.global_variables_initializer() sess.run(init) sess.run(self.n_assign_op, feed_dict={ self.n_mean_in: x_tr_mean, self.n_range_in: x_tr_range, }) print("Testing...") res = sess.run(fetches={ "summary": self.summary_op, "global_step": self.global_step, }, feed_dict={ self.x_in: x_va, self.y_in: y_va, }) self.summary_va.add_summary(res["summary"], global_step=res["global_step"]) print("Training...") batch_size = config.batch_size num_epoch = config.num_epoch num_batch = len(x_tr) // batch_size best_acc = 0 for idx_epoch in trange(num_epoch): ind_data = np.random.permutation(len(x_tr)) for idx_batch in range(num_batch): ind_cur = ind_data[batch_size * idx_batch:batch_size * (idx_batch + 1)] x_b = np.array([x_tr[_i] for _i in ind_cur]) y_b = np.array([y_tr[_i] for _i in ind_cur]) res = sess.run(fetches={ "accuracy": self.acc, "optim": self.optim, "summary": self.summary_op, "global_step": self.global_step, }, feed_dict={ self.x_in: x_b, self.y_in: y_b, }) self.summary_tr.add_summary(res["summary"], global_step=res["global_step"]) self.summary_tr.flush() res = sess.run(fetches={ "accuracy": self.acc, "summary": self.summary_op, "global_step": self.global_step, }, feed_dict={ self.x_in: x_va, self.y_in: y_va, }) self.summary_va.add_summary(res["summary"], global_step=res["global_step"]) self.summary_va.flush() self.saver_cur.save(sess, self.save_file_cur, global_step=self.global_step, write_meta_graph=False) if res["accuracy"] > best_acc: best_acc = res["accuracy"] self.saver_best.save(sess, self.save_file_best, write_meta_graph=False)
def train(x_tr, y_tr, x_va, y_va, config): """Training function. Parameters ---------- x_tr : ndarray Training data. y_tr : ndarray Training labels. x_va : ndarray Validation data. y_va : ndarray Validation labels. config : namespace Arguments and configurations parsed by `argparse` Returns ------- train_res : dictionary Training results stored in a dictionary file. It should contain W and b when best validation accuracy was achieved, as well as the average losses per epoch during training, and the average accuracy of each epoch to analyze how training went. """ # ---------------------------------------- # Preprocess data # Report data statistic print("Training data before: mean {}, std {}, min {}, max {}".format( x_tr.mean(), x_tr.std(), x_tr.min(), x_tr.max() )) # Normalize data using the normalize function. Note that we are remembering # the mean and the range of training data and applying that to the # validation/test data later on. x_tr_n, x_tr_mean, x_tr_range = normalize(x_tr) x_va_n, _, _ = normalize(x_va, x_tr_mean, x_tr_range) # Always a good idea to print some debug messages print("Training data after: mean {}, std {}, min {}, max {}".format( x_tr_n.mean(), x_tr_n.std(), x_tr_n.min(), x_tr_n.max() )) # ---------------------------------------- # Initialize parameters of the classifier print("Initializing...") num_class = 10 # TODODone: Initialize W to very small random values. e.g. random values between # -0.001 and 0.001 1000/10000000 0.00999 1/1000000 W = np.random.uniform(-0.001,0.001,(x_tr[0].shape[0], num_class)) # TODODone: Initialize b to zeros b = np.zeros(num_class) print("Testing...") # TODODone: Test on validation data y_pred = predict(W, b, x_va, config) true_pred = (y_pred == y_va) #https://stackoverflow.com/questions/45418491/estimating-accuracy-with-x-y-mean-how-does-it-work acc = true_pred.mean() #acc number of correct prediction/number of predictions print("Initial Validation Accuracy: {}%".format(acc * 100)) batch_size = config.batch_size num_epoch = config.num_epoch num_batch = len(x_tr_n) // batch_size loss_epoch = [] tr_acc_epoch = [] va_acc_epoch = [] W_best = None b_best = None best_acc = 0 # For each epoch for idx_epoch in range(num_epoch): # TODO: Create a random order to go through the data # TODO: For each training batch randomizer = np.arange(len(x_tr_n)) np.random.shuffle(randomizer) x_tr_n = x_tr_n[randomizer] # x features y_tr = y_tr[randomizer] # label losses = np.zeros(num_batch) accs = np.zeros(num_batch) x_tr_b = np.reshape(x_tr_n, (num_batch, batch_size , -1)) y_tr_b = np.reshape(y_tr, (num_batch, batch_size)) for idx_batch in range(num_batch): # TODO: Construct batch x_b = x_tr_b[idx_batch] y_b = y_tr_b[idx_batch] # Get loss with compute_loss loss_cur, loss_c, pred_b = compute_loss(W, b, x_b, y_b, config) # Get gradient with compute_grad dW, db = compute_grad(W, x_b, y_b, loss_c, config) # implement? # TODODone: Update parameters http://wiki.fast.ai/index.php/Gradient_Descent W = W - (config.learning_rate * dW) #w - alpha dw, alpha from config b = b - (config.learning_rate * db)#b - alpha db # TODODone: Record this batches result losses[idx_batch] = loss_cur # single scaler batch_pred = predict(W, b, x_b, config) batch_accs = (batch_pred == y_b) accs[idx_batch] = batch_accs.mean() #single scaler # print("Batch accuracy is: ", accs[idx_batch]*100) # Report average results within this epoch print("Epoch {} -- Train Loss: {}".format( idx_epoch, np.mean(losses))) print("Epoch {} -- Train Accuracy: {:.2f}%".format( idx_epoch, np.mean(accs) * 100)) # TODODone: Test on validation data and report results final_pred = predict(W, b, x_va, config) pred_accs = (final_pred == y_va) acc = pred_accs.mean() print("Epoch {} -- Validation Accuracy: {:.2f}%".format( idx_epoch, acc * 100)) # TODODone: If best validation accuracy, update W_best, b_best, and best # accuracy. We will only return the best W and b if acc > best_acc: W_best = W b_best = b best_acc = acc # TODODone: Record per epoch statistics loss_epoch += np.mean(losses) tr_acc_epoch += np.mean(accs) va_acc_epoch += best_acc # TODO: Pack results. Remeber to pack pre-processing related things here as # well train_res = { 'best_acc': best_acc, 'W_best': W_best, 'b_best': b_best, 'loss_epoch': loss_epoch, 'tr_acc_epoch': tr_acc_epoch, 'va_acc_epoch': va_acc_epoch } return train_res