def validate_classifier(val_loader, model, loss_function, labels_names): """Validate the model on all validation batches from one epoch. Same validation function is used to validate the base and TCN classifier. Same validation function is used to validate unimodal and multimodal models. Args: val_loader: PyTorch validation data loader model: PyTorch model loss_function: PyTorch loss function labels_names (array/list): Names of labels/targets Returns: val_metrics (dict of dict of lists): Dictionary that maps class/label/target name and metric name to a corresponding validation metric value. Besides the class/label/target names provided in labels_names, it also contains a key 'overall' referring to the average over all targets/labels. """ with torch.no_grad(): all_predictions = [] all_labels = [] all_losses = [] # Apply loss weights based on validation dataset class weights loss_function.pos_weight = val_loader.dataset.pos_class_weights.cuda() for i, val_data in enumerate(tqdm(val_loader, desc=' valid')): features, labels = val_data features = [f.cuda() for f in features] labels = labels.cuda() logits, predictions = model.forward(*features) # Get (already weighted) loss matrix (batch_size x class_num) batch_losses = loss_function(logits, labels) all_predictions.extend(predictions.tolist()) all_labels.extend(labels.tolist()) all_losses.extend(batch_losses.tolist()) # Calculate validation metrics val_metrics = calculate_metrics( all_predictions, all_labels, labels_names, losses=all_losses, loss_function_pos_weights=loss_function.pos_weight.tolist()) return val_metrics
def test_model(model, data, labels, mc_steps=None): '''Test models and return probalities, predicted labels, entropy, and metrics.''' start_time = time() if mc_steps is None: pred, pred_labels, entropy = model.predict( np.expand_dims(data, axis=-1)) else: pred, pred_labels, entropy = model.predict(np.expand_dims(data, axis=-1), mc_steps=mc_steps) elapsed_time = time() - start_time metrics = calculate_metrics(pred_labels, labels) + [elapsed_time] print('{} ~ Accuracy: {:.4f} ~ Time: {:.2f} seconds'.format( model.model.name, metrics[0], metrics[4])) return pred, pred_labels, entropy, metrics
def __predict(self, return_df_metrics=True): start_time = time.time() model_path = self.output_folder + '/best_model.hdf5' model = keras.models.load_model(model_path, custom_objects={'TCN': TCN}) y_pred = model.predict(self.X_test, batch_size=self.batch_size) if return_df_metrics: y_pred = np.argmax(y_pred, axis=1) metrics = calculate_metrics(self.y_true, y_pred, 0.0, self.output_folder) return metrics else: test_duration = time.time() - start_time save_test_duration(self.output_folder + '/test_duration.csv', test_duration) return y_pred
def train(): channel_name = 'training' training_path = os.path.join(input_path, channel_name) train_df = pd.read_csv(f"{training_path}/train.csv") y = train_df.pop('narrowing-diagnosis') X = train_df params_file = "params.json" with open(params_file) as f: params = json.load(f) X_train, X_test, y_train, y_test = train_test_split(X, y) train_data = Pool(data=X_train, cat_features=params['data_params']['cat_features'], label=y_train) test_data = Pool(data=X_test, cat_features=params['data_params']['cat_features'], label=y_test) model = CatBoostClassifier(**params['model_params']) model.fit(train_data, eval_set=test_data, verbose=False, plot=False) model.save_model(f"{model_path}/heart.cbm") print('model has been trained successfully') predict_probas = model.predict_proba(test_data) model_metrics = calculate_metrics(params['data_params']['metrics'], y_test, predict_probas) print('Metrics for trained model:') for k, v in model_metrics.items(): print(f'{k}={v}') with open(f"{model_path}/metrics.json", 'w') as fp: fp.write(json.dumps(model_metrics))
def on_step_end(self, episode_step, logs): """Calculate metrics every `interval`-steps. Save target_model if conditions are met.""" self.step += 1 self.loss.append(logs.get("metrics")[0]) if not self.step % self.interval: y_pred = make_predictions(self.model.target_model, self.X_val) stats = calculate_metrics(self.y_val, y_pred) if np.isnan(self.loss).all(): # If all entries are NaN, this happens during training stats["loss"] = 0 else: stats["loss"] = np.nanmean(self.loss) self.loss = [] # Reset loss every `self.interval` for k, v in stats.items(): summary = Summary(value=[Summary.Value(tag=k, simple_value=v)]) self.writer.add_summary(summary, global_step=self.step) if stats.get("FN") <= self.FN_bound and stats.get("FP") <= self.FP_bound and self.step >= self.save_after: print(f"Model saved! FN: {stats.get('FN')}; FP: {stats.get('FP')}") self.model.target_model.save(f"./models/{datetime.now().strftime('%Y%m%d')}_FN{stats.get('FN')}_FP{stats.get('FP')}.h5")
def test_classifier(test_loader, model, labels_names): """Test the model on all test batches. Same test function is used to test the base and TCN classifier. Same test function is used to test unimodal and multimodal models. Args: test_loader: PyTorch test data loader model: PyTorch model labels_names (array/list): Names of labels/targets Returns: test_metrics (dict of dict of lists): Dictionary that maps class/label/target name and metric name to a corresponding test metric value. Besides the class/label/target names provided in labels_names, it also contains a key 'overall' referring to the average over all targets/labels. """ with torch.no_grad(): # Aggregates from all test batches all_predictions = [] all_labels = [] for i, test_data in enumerate(tqdm(test_loader, desc=' test')): features, labels = test_data features = [f.cuda() for f in features] labels = labels.cuda() _, predictions = model.forward(*features) all_predictions.extend(predictions.tolist()) all_labels.extend(labels.tolist()) # Calculate test metrics test_metrics = calculate_metrics(all_predictions, all_labels, labels_names) return test_metrics
roc = 0.0 ap = 0.0 folds = data[experiment_type] for fold_idx, (X_train, _, X_test, y_test) in enumerate(folds): for iter_idx in range(cfg["num_iterations"]): np.random.seed(iter_idx) params.update({"get_top": get_top}) model = model_cls(**params) model.fit(X_train) y_test_pred = model.predict_proba(X_test)[:, 1] iter_roc, iter_ap = calculate_metrics(y_test, y_test_pred) roc += iter_roc ap += iter_ap roc = np.round(roc / num_experiments, decimals=4) ap = np.round(ap / num_experiments, decimals=4) res += f"{model_name},{data_name},{str(get_top)},{str(roc)},{str(ap)}\n" res_f = open(fname, "w+") res_f.write(res)
def _train(self): FLAGS = self.FLAGS for _ in range(FLAGS.valid_step_period): threshold = (1.0 - 1.0 / (1.0 + self.FLAGS.decay_rate * np.floor(self.step / self.FLAGS.decay_steps))) loss_and_info = { "loss": {}, "info": { "threshold": threshold }, "metrics": {}, "csv_fieldnames": self.csv_fieldnames } outputs = {} if self.rng.rand() > threshold: inputs = self.data_generator.get_unsup_data() struc_loss_and_info, struc_outputs = self.train_one_step_struc( inputs) outputs.update(struc_outputs) loss_and_info["loss"]["struc"] = struc_loss_and_info["loss"] loss_and_info["info"]["struc"] = struc_loss_and_info["info"] loss_and_info["info"]["struc_lr"] = ( self.optimizer_struc.lr(self.optimizer_struc.iterations - 1).numpy() if callable(self.optimizer_struc.lr) else self.optimizer_struc.lr.numpy()) else: inputs = self.data_generator.get_data() meta_loss_and_info, meta_outputs = self.train_one_step_meta( inputs) outputs.update(meta_outputs) loss_and_info["loss"]["meta"] = meta_loss_and_info["loss"] loss_and_info["info"]["meta"] = meta_loss_and_info["info"] loss_and_info["info"]["meta_lr"] = ( self.optimizer_meta.lr(self.optimizer_meta.iterations - 1).numpy() if callable(self.optimizer_meta.lr) else self.optimizer_meta.lr.numpy()) self._iteration += 1 self._iterations_since_restore += 1 self._iteration -= 1 self._iterations_since_restore -= 1 if "meta" in outputs: metrics = calculate_metrics(inputs["meta"]["test_labels"].numpy(), outputs["meta"]["logits"].numpy()) loss_and_info["metrics"]["metatrain"] = metrics if (self.step + 1) % FLAGS.valid_step_period == 0: valid_metrics = self._test("valid") test_metrics = self._test("test") loss_and_info["metrics"]["metaval"] = valid_metrics loss_and_info["metrics"]["metatest"] = test_metrics def _func(x): try: y = float(x) except (TypeError, ValueError): y = x return y result = nest.map_structure(_func, loss_and_info) ahb_metric = (loss_and_info["metrics"]["metaval"]["roc_auc"] + loss_and_info["metrics"]["metaval"]["f1"]) if not hasattr(self, "_best_ahb_metric"): self._best_ahb_metric = ahb_metric self._metric_after_best = 0.0 self._step_after_best = 0 if ahb_metric > self._best_ahb_metric: self._best_ahb_metric = ahb_metric self._metric_after_best = 0.0 self._step_after_best = 0 deviation_after_best = 0.0 else: self._metric_after_best += ahb_metric self._step_after_best += 1 deviation_after_best = self._best_ahb_metric - self._metric_after_best / self._step_after_best result["best_ahb_metric"] = self._best_ahb_metric result["deviation_after_best"] = deviation_after_best result["hpo_metric"] = self._best_ahb_metric - deviation_after_best return result
def train_model(model, data_loaders, criterion, optimizer, args): # create states df and csv file stats_df = pd.DataFrame(columns=[ 'epoch', 'train_loss', 'train_acc', 'train_f1', 'val_loss', 'val_acc', 'val_f1' ]) sub_dump_dir = get_sub_dump_dir(args) stats_path = os.path.join(sub_dump_dir, 'stats.csv') stats_df.to_csv(stats_path, sep=',', index=False) # write loss and acc values fprint('\nCreated stats file\t-> {}'.format(stats_path), args) fprint('\nTRAINING {} EPOCHS...\n'.format(args.epochs), args) since = time.time() # initialize best values best_model_state_dict = copy.deepcopy(model.state_dict()) best_opt_state_dict = copy.deepcopy(optimizer.state_dict()) best_loss = 999999.9 best_acc = 0.0 best_epoch = 0 for epoch in range(args.epochs): # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode phase_loss = 0.0 phase_corrects = 0 phase_preds = torch.LongTensor() phase_category_ids = torch.LongTensor() # Iterate over data for inputs, category_ids in data_loaders[phase]: inputs = inputs.to(torch.device(args.device)) category_ids = category_ids.to(torch.device(args.device)) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): # Get model outputs and calculate loss outputs = model(inputs) loss = criterion(outputs, category_ids) _, preds = torch.max(outputs, 1) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # stats batch_loss = loss.item() * inputs.size(0) batch_corrects = torch.sum(preds == category_ids.data) phase_loss += batch_loss phase_corrects += batch_corrects phase_preds = torch.cat((phase_preds, preds), 0) phase_category_ids = torch.cat( (phase_category_ids, category_ids), 0) epoch_loss = phase_loss / len(data_loaders[phase].dataset) epoch_acc, epoch_f1 = calculate_metrics(phase_preds, phase_category_ids) stats_df.at[0, 'epoch'] = epoch stats_df.at[0, phase + '_loss'] = round(epoch_loss, 6) stats_df.at[0, phase + '_acc'] = round(epoch_acc, 6) stats_df.at[0, phase + '_f1'] = round(epoch_f1, 6) # define the new bests if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model_state_dict = copy.deepcopy(model.state_dict()) best_opt_state_dict = copy.deepcopy(optimizer.state_dict()) best_loss = copy.deepcopy(epoch_loss) best_epoch = epoch # append epoch stats to file fprint( stats_df.to_string(index=False, header=(epoch == 0), col_space=10, justify='right'), args) stats_df.to_csv(stats_path, mode='a', header=False, index=False) time_elapsed = time.time() - since fprint( '\nTraining completed in {:.0f}m {:.0f}s\n'.format( time_elapsed // 60, time_elapsed % 60), args) # reload best model weights and best optimizer variables model.load_state_dict(best_model_state_dict) optimizer.load_state_dict(best_opt_state_dict) # save best checkpoint if not os.path.exists(cfg.MODEL_DIR): os.makedirs(cfg.MODEL_DIR) cp_path = os.path.join( cfg.MODEL_DIR, '{}_{}_{:.6f}.pth'.format('pt' if args.pretrained else 'fs', args.t_start, best_acc)) if args.save: torch.save( { 'epoch': best_epoch, 'model_state_dict': best_model_state_dict, 'optimizer_state_dict': best_opt_state_dict, 'loss': best_loss, 'acc': best_acc }, cp_path) fprint('Saved best checkpoint\t-> {}'.format(cp_path), args) return model, optimizer
model.save_model(f"{project_dir}/model/model.cbm") builtin_metrics = model.eval_metrics(train_data, metrics=['Logloss', 'AUC', 'F1', 'PRAUC']) # write results hold_out_score = model.get_best_score() # write_eval_summary_file(cv_scores, hold_out_score) predict_probas = model.predict_proba(test_data) test_data_metrics = calculate_metrics(build_spec['standard_metrics'], build_spec['custom_metrics'], y_test, predict_probas[:, 1]) write_eval_summary_file(cv_scores, test_data_metrics) predict_probas = pd.DataFrame(predict_probas, columns=['False', 'True']) predict_probas.to_csv(f"{project_dir}/model/test_data_prediction.csv") shap_values = model.get_feature_importance(data=test_data, type=EFstrType.ShapValues, shap_calc_type='Exact') shap_values_df = pd.DataFrame(shap_values, columns=features + ['expected_value']) shap_values_df.to_csv(f"{project_dir}/model/test_data_shap_values.csv")
# for localization, 80, 96, 96 => 84, 108, 108 # for segmentation, 64, 160, 160 => 76, 196, 196 pred_sitk_obj = generate_sitk_obj_from_npy_array( image_sitk_obj, label_prediction, True, os.path.join(dir_name, "{}_{}_prediction.nrrd".format(dataset, patient_id))) # get arrays from data image_arr_org = sitk.GetArrayFromImage(image_sitk_obj) label_arr_org = sitk.GetArrayFromImage(label_sitk_obj) # get arrays from prediction pred_arr_org = sitk.GetArrayFromImage(pred_sitk_obj) # metrics result, dice, bbox_metrics = calculate_metrics( patient_id, spacing, label_arr_org, pred_arr_org, HAUSDORFF_PERCENT, OVERLAP_TOLERANCE, SURFACE_DICE_TOLERANCE) # append results.append(result) # plot 5x3 views plot_images(dataset, patient_id, image_arr_org, label_arr_org, pred_arr_org, dir_name, True, bbox_metrics, dice) print("{} done. dice :: {}".format(patient_id, result["dice"])) # extract ROI from image_interpolated_resized if SAVE_CANDIDATES: # create folder dir = "{}/{}/{}".format(MASTER_FOLDER, dataset, IMAGE_INTERPOLATED_ROI_PR_FOLDER) if not os.path.exists(dir):
"Recall", "Specifity", ] metrics = [ "g_mean", "precision", "recall", "specifity", ] experiment_name = "final" calculate_metrics(method_names, streams, metrics, experiment_name, recount=True) plt = Ploting() plt.plot_streams_matplotlib(method_names, streams, metrics, experiment_name, gauss=3, methods_alias=methods_alias, metrics_alias=metrics_alias) rnk = Ranking() rnk.pairs_metrics(method_names, streams,
def train_classifier(train_loader, model, optimizer, loss_function, labels_names): """Train the model on all training batches from one epoch. Same train function is used to train the base and TCN classifier. Same train function is used to train unimodal and multimodal models. Args: train_loader: PyTorch train data loader model: PyTorch model optimizer: PyTorch optimizer loss_function: PyTorch loss function labels_names (array/list): Names of labels/targets Returns: train_metrics (dict of dict of lists): Dictionary that maps class/label/target name and metric name to a corresponding training metric value. Besides the class/label/target names provided in labels_names, it also contains a key 'overall' referring to the average over all targets/labels. """ # Aggregates from all training batches from one epoch all_predictions = [] all_labels = [] all_losses = [] # Apply loss weights based on the train dataset class weights loss_function.pos_weight = train_loader.dataset.pos_class_weights.cuda() for i, train_data in enumerate(tqdm(train_loader, desc=' train')): features, labels = train_data optimizer.zero_grad() features = [f.cuda() for f in features] labels = labels.cuda() logits, predictions = model.forward(*features) # Get (already weighted) loss matrix (batch_size x class_num) batch_losses = loss_function(logits, labels) # Reduce/average loss for each class # In case of reduction='none', the loss needs to be normalized manually pos_labels_cnt = labels.sum(dim=0) neg_labels_cnt = len(labels) - pos_labels_cnt class_losses = batch_losses.sum(dim=0) / ( loss_function.pos_weight * pos_labels_cnt + 1. * neg_labels_cnt) loss = class_losses.mean() loss.backward() optimizer.step() all_predictions.extend(predictions.tolist()) all_labels.extend(labels.tolist()) all_losses.extend(batch_losses.tolist()) # Calculate training metrics train_metrics = calculate_metrics( all_predictions, all_labels, labels_names, losses=all_losses, loss_function_pos_weights=loss_function.pos_weight.tolist()) return train_metrics
def train(self, X, label, X_val=None, label_val=None, X_test=None, label_test=None, param=None): eta1 = param.eta_1 C = param.C maxium_epoch = param.maxium_epoch decay = param.decay # weights_return=param["weights_return # print(X.shape,X_test.shape) eta_origin = eta1 np.random.seed(2) # label_expand=label[:,np.newaxis] epoch_train_acc = [] time_step = 0.0 train_acc_list = [] test_acc_list = [] val_acc_list = [] w_best = [] step_list = [] best_val = 0.0 time_step = 0 W = np.random.normal(0, 0.001, X.shape[1]) b = np.random.normal(0, 0.001, (1)) W_best = W b_best = b d = 1 for j in range(maxium_epoch): lr = eta_origin #print("current lea",lr) if decay == True: d = 1 / (1 + j) ind = np.arange(X.shape[0]) # print(ind) np.random.shuffle(ind) for hh in range(X.shape[0]): k = ind[hh] x = X[k, :] l = label[k] # print(l*(x@W+b)) # print(W.mean()) lr = eta1 if hasattr(param, "pos_weight") and l == 1: lr = eta1 * param.pos_weight if l * (x @ W + b) <= 1: W = (1 - lr) * W + x * l * lr * d * C b = (1 - lr) * b + l * lr * d * C # print(b) else: #print("neg") W = (1 - lr) * W b = (1 - lr) * b # time_step=time_step+1 # #print("learnign",lr) # # print(k,"here is th wrong one") # W=W+x*l*lr*d # #print(lr) # b=b+l*lr*d train_acc = calculate_metrics(X, label, W, b, param) # print("epoch: #",str(j)," current "+ param.metrics +" is",str(train_acc)) epoch_train_acc.append(train_acc) if j % param.valid_each == 0: val_acc = calculate_metrics(X_val, label_val, W, b, param) # print("########################/n epoch: #",str(j)," current validating "+ param.metrics+"is",str(val_acc)+"########################/n ") if val_acc > best_val: best_val = val_acc W_best = W b_best = b # np.savez(directory+"epoch #"+str(j)+param.metrics+"="+str(val_acc)+'.npz', w=W, b=b) # print("found better one, checkpoint saved ") # val_acc_list.append(val_acc) #step_list=step_list.append(time_step) # test_acc=calculate_metrics(X_test,label_test,W_best,b_best,param) # print("the final test "+param.metrics+" is ",test_acc) # os.rename(directory,param.log_file+"best"+ param.metrics+"+"+str(best_val)+" "+str(datetime.now())) results = { "metrics": best_val, "W": W_best, "b": b_best, "param": [W_best, b_best] } return results
label_high = np.ones([feat_high.shape[0], 1]) x = np.vstack((feat_low, feat_high)) y = np.vstack((label_low, label_high)) logo = LeaveOneGroupOut() logo.get_n_splits(x, y, y_subs) logo.get_n_splits(groups=y_subs) acc_list = [] for train_index, test_index in logo.split(x, y, y_subs): model = RandomForestClassifier(n_estimators = 20) #acc = cross_val_score(model, x[train_index, :], y[train_index, :].ravel(), cv=5, scoring='accuracy') #acc_train = max(np.mean(acc), 1-np.mean(acc)) #results_dict_train[norm].append(np.asarray(acc_train)) x_train, x_source_test, y_train, y_source_test = train_test_split(x[train_index, :], y[train_index, :].ravel(), test_size=0.33, stratify=y_subs[train_index]) acc_test, acc_train = utils.calculate_metrics(model, x_train, y_train, x[test_index, :], y[test_index, :].ravel(), x_source_test, y_source_test) acc_test = max(acc_test, 1-acc_test) results_dict_test[norm].append(np.asarray(acc_test)) acc_train = max(acc_train, 1-acc_train) results_dict_train[norm].append(np.asarray(acc_train)) pd.DataFrame.from_dict(results_dict_train).to_csv('./Results/all_normalizations_classification_train.csv', index=False) pd.DataFrame.from_dict(results_dict_test).to_csv('./Results/all_normalizations_classification_test.csv', index=False)
def val_full_video(dataloader, dataset, model, device, classify_thresh, pred_size, contacts_out_path=None, viz_out_path=None, fps=30): ''' Evaluates each batch as if it contains the windows for every frame of a full video. Visualizes result if desired. ''' model.eval() loss_sum = 0.0 loss_count = 0 confusion_count = np.zeros((pred_size, 4), dtype=int) merged_confusion = np.zeros((4), dtype=int) batch_size = -1 target_idx = -1 for batch_idx, batch_data in enumerate(dataloader): # prepare the data for this batch input_data = batch_data['joint2d'] if batch_size == -1: batch_size = input_data.size()[0] input_data = input_data.to(device) have_contacts = False if 'contacts' in batch_data.keys(): label_data = batch_data['contacts'].to(device) if target_idx == -1: target_idx = label_data.size()[1] // 2 have_contacts = True # run model output_data = model(input_data) # B x contact_size x 4 if have_contacts: loss = model.loss(output_data, label_data) for target_frame_idx in range(pred_size): n_tp, n_fp, n_fn, n_tn = model.accuracy( output_data, label_data, thresh=classify_thresh, tgt_frame=target_frame_idx) confusion_count[target_frame_idx] += np.array( [n_tp, n_fp, n_fn, n_tn], dtype=int) # save loss loss_sum += torch.sum(loss).to('cpu').item() loss_count += loss.size()[0] * loss.size()[1] * loss.size()[2] # merge together to get full video labels model_predictions, model_probs = model.prediction(output_data) model_predictions = model_predictions.to( 'cpu').numpy() # B x contact_size x 4 # sliding window through entire video to aggregate votes window_size = input_data.size()[1] vote_aggregation = np.zeros( (batch_size + 2 * (pred_size // 2), 4)) # only collect for frames we directly predict for window_start_idx in range(model_predictions.shape[0]): window_end_idx = window_start_idx + pred_size vote_aggregation[ window_start_idx:window_end_idx] += model_predictions[ window_start_idx] # define threshold for considering in contact # on edges there are less possible votes b/c were never a target frame # must account for this # don't need majority in order to be considered in contact (this pushes towards more false positives than negatives) vote_thresh = np.ones( (vote_aggregation.shape[0])) * (((pred_size + 1) / 2)) for edge_offset in range(pred_size - 1): vote_thresh[edge_offset] = (edge_offset // 2) + 1 vote_thresh[(-1 - edge_offset)] = (edge_offset // 2) + 1 # print(vote_thresh) vote_predictions = vote_aggregation >= vote_thresh.reshape((-1, 1)) contact_preds = vote_predictions.astype(np.int) # # NOTE: uncomment this to turn off vote merging (majority voting) # # want contact predictions # contact_preds = model_predictions[:,target_idx,:].copy() # B x 4 # # still need edges # # take as much as we can from predictions # leading_preds = model_predictions[0,:target_idx,:].reshape((-1, 4)) # tailing_preds = model_predictions[batch_size-1,target_idx+1:,:].reshape((-1, 4)) # contact_preds = np.concatenate([leading_preds, contact_preds, tailing_preds], axis=0) # fill in the rest with copies contact_offset = (window_size - pred_size) // 2 leading_pad = np.repeat(contact_preds[0].reshape((1, 4)), contact_offset, axis=0) tailing_pad = np.repeat(contact_preds[-1].reshape((1, 4)), contact_offset, axis=0) contact_preds = np.concatenate( [leading_pad, contact_preds, tailing_pad], axis=0) # F x 4 if have_contacts: # same thing for labels contact_label_data = label_data.to( 'cpu').numpy() # B x contact_size x 4 contact_labels = contact_label_data[:, target_idx, :].copy() # B x 4 leading_labels = contact_label_data[0, :target_idx, :].reshape( (-1, 4)) tailing_labels = contact_label_data[batch_size - 1, target_idx + 1:, :].reshape( (-1, 4)) contact_labels = np.concatenate( [leading_labels, contact_labels, tailing_labels], axis=0) # fill in the rest with copies leading_pad = np.repeat(contact_labels[0].reshape((1, 4)), contact_offset, axis=0) tailing_pad = np.repeat(contact_labels[-1].reshape((1, 4)), contact_offset, axis=0) contact_labels = np.concatenate( [leading_pad, contact_labels, tailing_pad], axis=0) # F x 4 # evaluate accuracy after merging n_tp, n_fp, n_fn, n_tn = model.accuracy(torch.from_numpy(contact_preds.reshape((-1, 1, 4))).to(torch.float),\ torch.from_numpy(contact_labels.reshape((-1, 1, 4))).to(torch.float), \ thresh=0.5, tgt_frame=0) merged_confusion += np.array([n_tp, n_fp, n_fn, n_tn], dtype=int) # save predictions if contacts_out_path: video_name = batch_data['name'][0] contact_dir_out = os.path.join(contacts_out_path, video_name) if not os.path.exists(contact_dir_out): os.makedirs(contact_dir_out, exist_ok=True) contact_path_out = os.path.join(contact_dir_out, 'foot_contacts') true_seq_len = batch_data['seq_len'][0] # trim to actual seq_len save_contact_preds = contact_preds.astype(np.int)[:true_seq_len] np.save(contact_path_out, save_contact_preds) # visualization if have_contacts and viz_out_path: video_name = batch_data['name'][0] result_vid_path = os.path.join( viz_out_path, video_name.replace('/', '-') + '.mp4') print('Saving video to %s...' % (result_vid_path)) frame_data = batch_data['frames'].to( 'cpu').numpy() # B x x H x W x 3 _, H, W, _ = frame_data.shape # frame data is only target frames, need leading and trailing frames for whole first and last window leading_frames = [] trailing_frames = [] for frame_idx in range(model.window_size // 2): cur_frame_paths = None if isinstance(dataset, RealVideoDataset): cur_frame_paths = dataset.frame_paths[batch_idx] else: # synthetic dataset cur_frame_paths = get_frame_paths( dataset.view_dirs[batch_idx]) cur_lead_frame = io.imread( cur_frame_paths[frame_idx])[:, :, :3] # remove alpha cur_trail_frame = io.imread( cur_frame_paths[-(frame_idx + 1)])[:, :, :3] if cur_lead_frame.shape[0] != H or cur_lead_frame.shape[1] != W: cur_lead_frame = transform.resize( cur_lead_frame, (TRAIN_DIM[1], TRAIN_DIM[0])) cur_trail_frame = transform.resize( cur_trail_frame, (TRAIN_DIM[1], TRAIN_DIM[0])) leading_frames.append(cur_lead_frame) trailing_frames.append(cur_trail_frame) leading_frames = np.stack(leading_frames, axis=0) trailing_frames = np.stack(trailing_frames[::-1], axis=0) frame_seq = np.concatenate( [leading_frames, frame_data, trailing_frames], axis=0) # need 2d joint sequence joint_data = input_data.to( 'cpu').numpy() # B x window_size x J x 3 window_tgt = joint_data.shape[1] // 2 joint2d_seq = joint_data[:, window_tgt, :, :2].copy( ) # B x J x 2 (x,y) # unnormalize root_idx = openpose_dataset.OP_LOWER_JOINTS_MAP['MidHip'] joint_trans_normalization = joint2d_seq[:, root_idx, :].copy() joint2d_seq[:, root_idx, :] -= joint_trans_normalization # zero it out so it's correct when added back in joint_trans_normalization = joint_trans_normalization.reshape( (batch_size, 1, 2)) joint2d_seq += joint_trans_normalization # need all frames num_joints = input_data.size()[2] leading_joint2d = joint_data[0, :window_tgt, :, :2].reshape( (-1, num_joints, 2)) leading_joint2d += joint_data[0, window_tgt, root_idx, :2].reshape( (1, 1, 2)) # unnormalize tailing_joint2d = joint_data[batch_size - 1, window_tgt + 1:, :, :2].reshape( (-1, num_joints, 2)) tailing_joint2d += joint_data[batch_size - 1, window_tgt, root_idx, :2].reshape( (1, 1, 2)) # unnormalize joint2d_seq = np.concatenate( [leading_joint2d, joint2d_seq, tailing_joint2d], axis=0) joint2d_seq *= dataset.get_joint_scaling() # now visualize viz_full_video_simple(frame_seq, joint2d_seq, contact_preds, contact_labels, show=False, save_path=result_vid_path, fps=fps) # viz_full_video_simple(frame_seq, joint2d_seq, contact_preds, contact_labels, show=True, save_path=None, fps=fps) frame_data = None frame_seq = None gc.collect() mean_loss = 0.0 metrics = [] merged_metrics = None if have_contacts: mean_loss = loss_sum / loss_count metrics = [] for target_frame_idx in range(pred_size): metrics.append(calculate_metrics( confusion_count[target_frame_idx])) merged_metrics = calculate_metrics(merged_confusion) return mean_loss, metrics, merged_metrics
#epoch_loss_list2.append(valid_loss) print('Epoch', epoch+1, 'completed out of', epochs, 'loss:', epoch_loss, ' validation loss: ',valid_loss) #utils.plot_epoch_loss(epoch_index_list,epoch_loss_list,epoch_loss_list2) print("Optimization Finished!") #testing net_output = tf.round(tf.nn.sigmoid(logit)) correct_preds = tf.equal(net_output,yplaceholder) accuracy = tf.reduce_mean(tf.cast(correct_preds, "float")) test_inputs = test_inputs.reshape((-1,input_length,number_of_sequences)) t_labels = test_labels test_labels = test_labels.reshape((-1,n_classes)) print("Tf-Accuracy: ",accuracy.eval({xplaceholder: test_inputs, yplaceholder: test_labels})) predictions = correct_preds.eval({xplaceholder: test_inputs, yplaceholder: test_labels}) utils.figure_faults_timeseries(predictions,test_asset) preds = sess.run([net_output], feed_dict = {xplaceholder: test_inputs}) preds = np.array(preds)[0] utils.calculate_metrics(preds,t_labels)
def train(flags): data_root = flags.data window_size = flags.window_size pred_size = flags.pred_size batch_size = flags.batch_size out_dir = flags.out num_epochs = flags.epochs val_every = flags.val_every classify_thresh = flags.classify_thresh # optim args lr = flags.lr betas = (flags.beta1, flags.beta2) eps = flags.eps weight_decay = flags.decay use_confidence = flags.use_confidence joint_set = flags.joint_set if not os.path.exists(data_root): print('Could not find training data at ' + data_root) return if not os.path.exists(out_dir): os.mkdir(out_dir) weights_out_path = os.path.join(out_dir, 'op_only_weights.pth') best_weights_out_path = os.path.join(out_dir, 'op_only_weights_BEST.pth') # load training and validation data train_dataset = OpenPoseDataset(data_root, split='train', window_size=window_size, contact_size=pred_size, use_confidence=use_confidence, joint_set=joint_set) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2) val_dataset = OpenPoseDataset(data_root, split='val', window_size=window_size, contact_size=pred_size, use_confidence=use_confidence, joint_set=joint_set) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2) num_joints = len( openpose_dataset.OP_JOINT_SUBSETS[train_dataset.joint_set]) # create the model and optimizer device_str = 'cpu' if flags.cpu else None device = get_device(device_str) op_model = create_model(window_size, num_joints, pred_size, device, use_confidence=use_confidence) op_optim = optim.Adam(op_model.parameters(), lr=lr, betas=betas, \ eps=eps, weight_decay=weight_decay) model_parameters = filter(lambda p: p.requires_grad, op_model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print('Num model params: ' + str(params)) # viz stats train_steps = [] train_losses = [] train_accs = [] val_steps = [] val_losses = [] val_accs = [] # train loss_sum = 0.0 loss_count = 0 best_val_f1 = -float('inf') confusion_count = np.zeros((4), dtype=int) for epoch_idx in range(num_epochs): for batch_idx, batch_data in enumerate(train_loader): # prepere the data for this batch input_data = batch_data['joint2d'].to(device) label_data = batch_data['contacts'].to(device) # zero the gradients op_optim.zero_grad() # forward + backward + optimize output_data = op_model(input_data) loss = op_model.loss(output_data, label_data) n_tp, n_fp, n_fn, n_tn = op_model.accuracy(output_data, label_data, thresh=classify_thresh) loss = torch.mean(loss) loss.backward() op_optim.step() loss_sum += loss.to('cpu').item() loss_count += 1 confusion_count += np.array([n_tp, n_fp, n_fn, n_tn], dtype=int) if epoch_idx % 5 == 0: print('=================== TRAIN (' + str(epoch_idx + 1) + ' epochs) ================================================') mean_loss = loss_sum / loss_count print('Mean loss: %0.3f' % (mean_loss)) loss_sum = 0.0 loss_count = 0 metrics = calculate_metrics(confusion_count) cur_acc, _, _, _, _ = metrics print_metrics(metrics) confusion_count = np.zeros((4), dtype=int) print( '======================================================================================' ) train_steps.append(epoch_idx * len(train_loader) + batch_idx) train_losses.append(mean_loss) train_accs.append(cur_acc) # save plot plot_train_stats((train_steps, train_losses, train_accs), \ (val_steps, val_losses, val_accs), \ out_dir, accuracy_metrics=metrics) if epoch_idx % val_every == 0: # run on the validation data print('==================== VALIDATION (' + str(epoch_idx + 1) + ' epochs) ===========================================') val_loss, val_metrics = val_epoch(val_loader, op_model, device, classify_thresh, pred_size) print('Mean Loss: %0.3f' % (val_loss)) for tgt_frame_idx in range(pred_size): print('----- Pred Frame ' + str(tgt_frame_idx) + ' ------') print_metrics(val_metrics[tgt_frame_idx]) val_acc, _, _, _, _ = val_metrics[ pred_size // 2] # only want accuracy for middle target print( '======================================================================================' ) op_model.train() val_steps.append(epoch_idx * len(train_loader) + batch_idx) val_losses.append(val_loss) val_accs.append(val_acc) # save confusion matrix for tgt_frame_idx in range(pred_size): accuracy, precision, recall, f1, cm = val_metrics[ tgt_frame_idx] plot_confusion_mat( cm, os.path.join( out_dir, 'val_confusion_matrix_%d.png' % (tgt_frame_idx))) # also save model weights print('Saving checkpoint...') torch.save(op_model.state_dict(), weights_out_path) # check if this is the best so far and save (in terms of f1 score) if f1 > best_val_f1: best_val_f1 = f1 print('Saving best model so far...') torch.save(op_model.state_dict(), best_weights_out_path) # save final model print('Saving final checkpoint...') torch.save(op_model.state_dict(), os.path.join(out_dir, 'op_only_weights_FINAL.pth')) # save plot metrics = calculate_metrics(confusion_count) plot_train_stats((train_steps, train_losses, train_accs), \ (val_steps, val_losses, val_accs), \ out_dir, accuracy_metrics=metrics) print('FINISHED Training!')
steps_in_epoch += 1 epoch_loss += total_loss epoch_slot_loss += slot_loss epoch_intent_loss += intent_loss # if epoch is finished if data_processor.end == 1: epochs += 1 # clean up data_processor data_processor.close() data_processor = None # calculate train metrics f1, precision, recall, accuracy, semantic_acc = calculate_metrics( pred_intents, correct_intents, slot_outputs_pred, correct_slots) log_in_tensorboard(tb_log_writer, epochs, "train", epoch_loss / steps_in_epoch, epoch_intent_loss / steps_in_epoch, epoch_intent_loss / steps_in_epoch, f1, accuracy, semantic_acc) # reset steps and epoch loss steps_in_epoch = 0 epoch_loss = 0.0 epoch_slot_loss = 0.0 epoch_intent_loss = 0.0 # clean up epoch variables pred_intents = [] correct_intents = []
close = divide_crypto(cryptos[key], ["BTC-USD", "ETH-USD"], close, 100) # Update wallet, ledger & portfolio wallet, ledger, portfolio = update_db(wallet, ledger, portfolio, d, cryptos[key], c_d_df["order_type"], close) except KeyError as e: print(f"KeyError -- {cryptos[key]} -- {e}") # ROI portfolio = undivide_crypto(["BTC-USD", "ETH-USD"], portfolio, 100) portfolio_value, overall_value, roi = calculate_metrics( portfolio, df_list, cryptos, wallet, 5000) display_results(cryptos, 5000, wallet, portfolio, portfolio_value, overall_value, roi) generate_log(ledger) ledger_df = pd.DataFrame(ledger) ledger_df.columns = [ "date", "symbol", "order_type", "close", "qty", "order_value" ] signals_df = {} for crypto in cryptos: signals_df[crypto] = {} buy_df = ledger_df[(ledger_df["symbol"] == crypto) & (ledger_df["order_type"] == "buy")].copy()
nb_steps=EPS_STEPS) dqn = DQNAgent(model=model, policy=policy, nb_actions=2, memory=memory, processor=processor, nb_steps_warmup=WARMUP_STEPS, gamma=GAMMA, target_model_update=TARGET_MODEL_UPDATE, train_interval=4, delta_clip=1, batch_size=BATCH_SIZE, enable_double_dqn=DOUBLE_DQN) dqn.compile(Adam(lr=LR)) metrics = Metrics(X_val, y_val) dqn.fit(env, nb_steps=TRAINING_STEPS, log_interval=LOG_INTERVAL, callbacks=[metrics], verbose=0) y_pred = make_predictions(dqn.target_model, X_test) stats = calculate_metrics(y_test, y_pred) # Get stats as dictionairy writer.writerow(stats) # Write dictionairy as row f.flush( ) # Save results to file inbetween iterations as to not lose results if not i % LOG_EVERY: print(f"{i}: FN: {stats.get('FN')}, FP: {stats.get('FP')}")
def test(weights_path: Optional[str], only_show: bool) -> None: """ Testing classifier and visualizing predictions. :param weights_path: path to saved weights or None. :param only_show: if True then only show predictions and labels. If False, calculate test metric before showing. """ device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') test_loader = get_data_loader(is_train=False) class_names = test_loader.dataset.classes class_names = [ CLASS_NAMES[class_names[i]] for i in range(len(class_names)) ] testing_samples_num = len(test_loader.dataset) model = get_model(weights_path) model = model.to(device) loss_func = torch.nn.CrossEntropyLoss() # Set model to evaluating mode. model.eval() running_loss, running_corrects = 0.0, 0 if not only_show: all_labels, all_predictions = [], [] for inputs, labels in tqdm(test_loader, desc='Testing. Batch'): all_labels.append(labels.numpy()) inputs = inputs.to(device) labels = labels.to(device) with torch.set_grad_enabled(False): outputs = model(inputs) _, preds = torch.max(outputs, 1) loss = loss_func(outputs, labels) all_predictions.append(preds.cpu().numpy()) # Get loss and corrects. running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / testing_samples_num epoch_acc = running_corrects.double() / testing_samples_num print('Testing: loss = {:.4f}, accuracy = {:.4f}.\n'.format( epoch_loss, epoch_acc)) report = calculate_metrics(all_labels, all_predictions, class_names) print(report) test_loader = get_data_loader(is_train=False, batch_size=1) for inputs, labels in test_loader: inputs = inputs.to(device) with torch.set_grad_enabled(False): outputs = model(inputs) _, preds = torch.max(outputs, 1) outputs_softmax = torch.nn.Softmax(-1).forward(outputs) image = inputs.cpu().numpy()[0, :, :, :].transpose((1, 2, 0)) image = np.clip(image * NORMALIZE_STD + NORMALIZE_MEAN, 0, 1) label_class = class_names[labels.numpy()[0]] pred_class_index = preds.cpu().numpy()[0] pred_class = class_names[pred_class_index] pred_value = outputs_softmax.cpu().numpy()[0, pred_class_index] plt.imshow(image) plt.title('Prediction: {}, {:.02f}%.\nLabel: {}.'.format( pred_class, pred_value * 100, label_class)) if plt.waitforbuttonpress(0): plt.close('all') return
if (test_mode == 0): correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print("Tf-Accuracy:", accuracy.eval({ X: test_inputs, Y: test_labels, keep_prob: 1.0 })) y_p = tf.argmax(logits, 1) preds = sess.run([y_p], feed_dict={X: test_inputs, keep_prob: 1.0}) preds = np.array(preds)[0] utils.calculate_metrics(preds, np.argmax(test_labels, 1)) predictions = correct_prediction.eval({ X: test_inputs, Y: test_labels, keep_prob: 1.0 }) utils.figure_faults_timeseries(predictions, test_asset) else: Accuracy = 0 pred_list = [] for i in range(test_iters): next_input = utils.next_batch(test_inputs, i, batch_size)
def train(self, X, label, X_val=None, label_val=None, param=None): # directory=param.log_file+"current/" # if not os.path.exists(directory): # os.makedirs(directory) eta1 = param.eta_1 # eta2=param.eta_2 maxium_epoch = param.maxium_epoch decay = param.decay eta_origin = eta1 np.random.seed(2) epoch_train_acc = [] time_step = 0.0 train_acc_list = [] test_acc_list = [] val_acc_list = [] w_best = [] step_list = [] best_val = 0.0 time_step = 0 W = np.random.uniform(-0.01, 0.01, X.shape[1]) b = np.random.uniform(-0.01, 0.01, (1)) W_best = W b_best = b d = 1 running_W = W running_b = b for j in range(maxium_epoch + 1): lr = eta_origin #print("current lea",lr) if decay == True: d = 1 / (1 + j) ind = np.arange(X.shape[0]) # print(ind) np.random.shuffle(ind) for k in range(X.shape[0]): k = ind[k] x = X[k, :] l = label[k] running_W = 0.99 * running_W + W * 0.01 running_b = b * 0.01 + 0.99 * running_b # if l*(x@W+b)<=0: # we will do update all the time which is to improve acc if l * (x @ W + b) <= 0: lr = eta1 if hasattr(param, "pos_weight") and l == 1: lr = eta1 * param.pos_weight time_step = time_step + 1 #print("learnign",lr) # print(k,"here is th wrong one") W = W + x * l * lr * d #print(lr) b = b + l * lr * d train_acc = calculate_metrics(X, label, W, b, param) if param.verbose == 1: print("epoch: #", str(j), " current for training" + param.metrics + " is", str(train_acc)) # epoch_train_acc.append(train_acc) if j % param.valid_each == 0: val_acc = calculate_metrics(X_val, label_val, running_W, running_b, param) if param.verbose == 1: print("########################/n epoch: #", str(j), " current validating " + param.metrics + "is", str(val_acc) + "########################/n ") if val_acc > best_val: best_val = val_acc W_best = running_W b_best = running_b results = { "W": W_best, "b": b_best, "metrics": best_val, "param": [W_best, b_best] } return results
m.dropout_keep_prob: 1. }) else: cost, b_id = sess.run( [m.classify_loss, m.baseline_sample_id], feed_dict={ m.encoder_inputs: X, m.encoder_inputs_length: X_len, m.decoder_inputs: Y, m.decoder_inputs_length: Y_len, m.decoder_targets: Y_t, m.dropout_keep_prob: 1. }) baseline = calculate_metrics(b_id, np.transpose(Y_t), Y_len, metric="bleu") valid_score += np.mean(baseline) cost = cost * len(X[0]) valid_cost += cost valid_step += 1 valid_len += len_ avg_valid_score = valid_score / valid_step avg_cost = valid_cost / valid_step valid_perplexity = np.exp(valid_cost / valid_len) if valid_perplexity < best_valid_perplexity: best_valid_perplexity = valid_perplexity
if not os.path.exists('../results/'): os.makedirs('../results/') #save result to prediction file prediction_file = os.path.join( '../results/', 'predictions_{}.txt'.format(args.model_name + '_for_test_set_' + str(args.ini_epochs))) with open(prediction_file, "w") as f: for pd, gt in zip(predicts, labels): f.write("Y {}\nP {}\n\n".format(gt, pd)) #calculate metrics to assess the model evaluate = utils.calculate_metrics(predicts=predicts, ground_truth=labels) e_corpus = "\n".join([ "Total test audios: {}".format(len(labels)), "Total time: {}\n".format(total_time), "Metrics:", "Character Error Rate: {}".format(evaluate[0]), "Word Error Rate: {}".format(evaluate[1]), "Sequence Error Rate: {}".format(evaluate[2]), ]) evaluate_file = os.path.join( '../results/', "evaluate_{}.txt".format(args.model_name + '_for_test_set_' + str(args.ini_epochs))) with open(evaluate_file, "w") as ev_f:
def main(): parser = argparse.ArgumentParser() parser.add_argument('frame_number', default=os.getcwd()) parser.add_argument('-m', '--mode', help='train|test', default='test') parser.add_argument('--export-image', help='export predicted image, gt and intensity', action='store_true') args = parser.parse_args() item_no = args.frame_number squeeze_seg = SqueezeSeg() file_names = [] mean_iou = {'unknown': [], 'car': [], 'pedestrian': [], 'cyclist': []} mean_precision = { 'unknown': [], 'car': [], 'pedestrian': [], 'cyclist': [] } mean_recall = {'unknown': [], 'car': [], 'pedestrian': [], 'cyclist': []} if item_no == 0: data_set_paths = glob( os.path.join('..', 'dataset', 'lidar_2d', args.mode, '*.npy')) for path in data_set_paths: file_names.append(os.path.splitext(path)[0].split('/')[-1]) else: file_names.append(item_no) classifier = tf.estimator.Estimator( model_fn=squeeze_seg.squeeze_seg_fn, model_dir='./model/', ) idx = 0 for item_no in file_names: print('{}/{}'.format(idx, len(file_names))) image, labels = get_item(item_no, mode=args.mode) eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": image}, num_epochs=1, shuffle=False) before = time() predictions = classifier.predict(input_fn=eval_input_fn) after = time() pred = next(predictions)['classes'] ious, precisions, recalls = calculate_metrics(labels, pred, 4) print('IOUS: {}'.format(ious)) print('PRECISIONS: {}'.format(precisions)) print('RECALL: {}'.format(recalls)) print('Time elapsed: {}'.format((after - before) * 1000)) for cls, iou in ious.iteritems(): mean_iou[cls].append(iou) for cls, precision in precisions.iteritems(): mean_precision[cls].append(precision) for cls, recall in recalls.iteritems(): mean_recall[cls].append(recall) if args.export_image: export_image('intensity-{}-pred.png'.format(item_no), image[:, :, 0]) export_image('gt-{}-pred.png'.format(item_no), labels) pred[pred == 1] = 5e6 pred[pred == 2] = 10e6 pred[pred == 3] = 15e6 pred[pred == 0] = 0 export_image('predict-{}-pred.png'.format(item_no), pred) idx += 1 mean_iou = {cls: mean(iou) for cls, iou in mean_iou.iteritems()} mean_precision = { cls: mean(precision) for cls, precision in mean_precision.iteritems() } mean_recall = { cls: mean(recall) for cls, recall in mean_recall.iteritems() } print('IoU:', mean_iou) print('PRECISION:', mean_precision) print('RECALL:', mean_recall)
elif args.pretrain == 'txt': model.pretrain('txt') elif args.pretrain == 'load_all': model.load_cpt(args.dm2c_cptpath) elif args.pretrain == 'load_ae': model.load_pretrain_cpt(args.img_cptpath, 'img', only_weight=True) model.load_pretrain_cpt(args.txt_cptpath, 'txt', only_weight=True) for epoch in range(args.n_epochs): model.train(epoch) train_embedding, train_target, train_modality = model.embedding( model.train_loader_ordered, unify_modal='img') test_embedding, test_target, test_modality = model.embedding( model.test_loader, unify_modal='img') kmeans = KMeans(config['n_clusters'], max_iter=1000, tol=5e-5, n_init=20).fit(train_embedding) train_metrics = calculate_metrics(train_target, kmeans.labels_) y_pred = kmeans.predict(test_embedding) test_metrics = calculate_metrics(test_target, y_pred) print('>Train', METRIC_PRINT.format(*train_metrics)) print('>Test ', METRIC_PRINT.format(*test_metrics)) # sio.savemat('result/result_{}.mat'.format(epoch), # {'X_embed_train': train_embedding, # 'y_pred_train': kmeans.predict(train_embedding), # 'y_true_train': train_target, # 'modal_train': train_modality, # 'X_embed_test': test_embedding, # 'y_pred_test': y_pred, # 'y_true_test': test_target, # 'modal_test': test_modality})