# Iris データセットの読み込み x, t = load_iris(return_X_y=True) x = x.astype('float32') t = t.astype('int32') dataset = TupleDataset(x, t) train_val, test = split_dataset_random(dataset, int(len(dataset) * 0.7), seed=0) train, valid = split_dataset_random(train_val, int(len(train_val) * 0.7), seed=0) train_iter = SerialIterator(train, batch_size=4, repeat=True, shuffle=True) optimizer = optimizers.MomentumSGD(lr=0.001, momentum=0.9) optimizer.setup(net) for param in net.params(): if param.name != 'b': # バイアス以外だったら param.update_rule.add_hook(WeightDecay(0.0001)) # 重み減衰を適用 n_batch = 64 # バッチサイズ n_epoch = 50 # エポック数 # ログ results_train, results_valid = {}, {} results_train['loss'], results_train['accuracy'] = [], [] results_valid['loss'], results_valid['accuracy'] = [], []
def setUp(self): if self.multi_in_values: self.n_input = 2 else: self.n_input = 1 in_values_expect = [] for _ in range(self.n_input): in_value = [] for _ in range(5): H, W = np.random.randint(8, 16, size=2) in_value.append(np.random.randint(0, 256, size=(3, H, W))) in_values_expect.append(in_value) self.in_values_expect = tuple(in_values_expect) if self.multi_out_values: def func(*in_values): n_sample = len(in_values[0]) return ( [np.random.uniform(size=(10, 4)) for _ in range(n_sample)], [np.random.uniform(size=10) for _ in range(n_sample)], [np.random.uniform(size=10) for _ in range(n_sample)]) self.n_output = 3 else: def func(*in_values): n_sample = len(in_values[0]) return [np.random.uniform(size=(48, 64)) for _ in range(n_sample)] self.n_output = 1 self.func = func if self.with_rest_values: strs = ['a', 'bc', 'def', 'ghij', 'klmno'] nums = [0, 1, 2, 3, 4] arrays = [np.random.uniform(size=10) for _ in range(5)] self.rest_values_expect = (strs, nums, arrays) self.n_rest = 3 self.dataset = chainer.datasets.TupleDataset( *(self.in_values_expect + self.rest_values_expect)) else: self.rest_values_expect = () self.n_rest = 0 self. dataset = list(zip(*self.in_values_expect)) self.iterator = SerialIterator( self.dataset, 2, repeat=False, shuffle=False) if self.with_hook: def hook(in_values, out_values, rest_values): n_sample = len(in_values[0]) self.assertEqual(len(in_values), self.n_input) for in_vals in in_values: self.assertEqual(len(in_vals), n_sample) self.assertEqual(len(out_values), self.n_output) for out_vals in out_values: self.assertEqual(len(out_vals), n_sample) self.assertEqual(len(rest_values), self.n_rest) for rest_vals in rest_values: self.assertEqual(len(rest_vals), n_sample) self.hook = hook else: self.hook = None
y_val = y_val.astype(xp.float32) # change shape #print(X_train) #print(X_val) #print(y_train) #print(y_val) X_train = X_train[xp.newaxis, :, :] X_val = X_val[xp.newaxis, :, :] y_train = y_train[xp.newaxis, :, :] y_val = y_val[xp.newaxis, :, :] ds_train = list(zip(X_train, y_train)) ds_val = list(zip(X_val, y_val)) print(len(ds_train[0][0])) print(len(ds_val[0][0])) # iterator itr_train = SerialIterator(ds_train, batch_size=10, shuffle=False) itr_val = SerialIterator(ds_val, batch_size=10, shuffle=False, repeat=False) # updater updater = UpdaterLSTM(itr_train, optimizer, device=gpu_device) # trainer trainer = training.Trainer(updater, (100000, 'epoch'), out='results') # evaluation eval_model = model.copy() eval_rnn = eval_model.predictor trainer.extend( extensions.Evaluator(itr_val, eval_model, device=gpu_device, eval_hook=lambda _: eval_rnn.reset_state()))
def main(): args = parse_arguments() # Set up some useful variables that will be used later on. dataset_name = args.dataset method = args.method num_data = args.num_data if args.label: labels = args.label cache_dir = os.path.join( 'input', '{}_{}_{}'.format(dataset_name, method, labels)) else: labels = None cache_dir = os.path.join('input', '{}_{}_all'.format(dataset_name, method)) # Load the cached dataset. filename = dataset_part_filename('test', num_data) path = os.path.join(cache_dir, filename) if os.path.exists(path): print('Loading cached dataset from {}.'.format(path)) test = NumpyTupleDataset.load(path) else: _, _, test = download_entire_dataset(dataset_name, num_data, labels, method, cache_dir) # # Load the standard scaler parameters, if necessary. # if args.scale == 'standardize': # scaler_path = os.path.join(args.in_dir, 'scaler.pkl') # print('Loading scaler parameters from {}.'.format(scaler_path)) # with open(scaler_path, mode='rb') as f: # scaler = pickle.load(f) # else: # print('No standard scaling was selected.') # scaler = None # Model-related data is stored this directory. model_dir = os.path.join(args.in_dir, os.path.basename(cache_dir)) model_filename = { 'classification': 'classifier.pkl', 'regression': 'regressor.pkl' } task_type = molnet_default_config[dataset_name]['task_type'] model_path = os.path.join(model_dir, model_filename[task_type]) print("model_path=" + model_path) print('Loading model weights from {}...'.format(model_path)) if task_type == 'classification': model = Classifier.load_pickle(model_path, device=args.gpu) elif task_type == 'regression': model = Regressor.load_pickle(model_path, device=args.gpu) else: raise ValueError('Invalid task type ({}) encountered when processing ' 'dataset ({}).'.format(task_type, dataset_name)) # Proposed by Ishiguro # ToDo: consider go/no-go with following modification # Re-load the best-validation score snapshot serializers.load_npz( os.path.join(model_dir, "best_val_" + model_filename[task_type]), model) # # Replace the default predictor with one that scales the output labels. # scaled_predictor = ScaledGraphConvPredictor(model.predictor) # scaled_predictor.scaler = scaler # model.predictor = scaled_predictor # Run an evaluator on the test dataset. print('Evaluating...') test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False) eval_result = Evaluator(test_iterator, model, converter=concat_mols, device=args.gpu)() print('Evaluation result: ', eval_result) # Proposed by Ishiguro: add more stats # ToDo: considre go/no-go with the following modification if task_type == 'regression': #loss = cuda.to_cpu(numpy.array(eval_result['main/loss'])) #eval_result['main/loss'] = loss # convert to native values.. for k, v in eval_result.items(): eval_result[k] = float(v) with open(os.path.join(args.in_dir, 'eval_result.json'), 'w') as f: json.dump(eval_result, f) # end-with elif task_type == "classification": # For Classifier, we do not equip the model with ROC-AUC evalation function # use a seperate ROC-AUC Evaluator here rocauc_result = ROCAUCEvaluator(test_iterator, model, converter=concat_mols, device=args.gpu, eval_func=model.predictor, name='test', ignore_labels=-1)() print('ROCAUC Evaluation result: ', rocauc_result) with open(os.path.join(args.in_dir, 'eval_result.json'), 'w') as f: json.dump(rocauc_result, f) else: pass # Save the evaluation results. with open(os.path.join(model_dir, 'eval_result.json'), 'w') as f: json.dump(eval_result, f)
score_name = os.path.basename(npy_path) test_score_names.append(score_name) # 推論の準備 chainer.global_config.train = False model = Estimator() model.to_gpu() test_model_name = "snapshot_epoch_" + str(config["test_model_epoch"]) test_model_path = os.path.join(config["result_dir"], config["test_dir"], MODEL_DIR, test_model_name) load_npz(test_model_path, model, path="updater/model:main/") test_dataset = TupleDataset(test_scores, test_score_names) test_iterator = SerialIterator(test_dataset, int(config["batch_size"]), repeat=False, shuffle=False) test_log_name = "test_epoch_" + str(config["test_model_epoch"]) + ".txt" test_log_path = os.path.join(config["result_dir"], config["test_dir"], test_log_name) unit_list = [] est_lv_list = [] # 推論 with open(test_log_path, "w") as log_f: log_f.write("\t".join(["score_name", "lv", "likelihoods"]) + "\n") remaining = len(test_dataset) while remaining > 0: batch_size = min(test_iterator.batch_size, remaining) scores, names = concat_batch(test_iterator.next())
def main(): # Parse the arguments. args = parse_arguments() args.model_folder_name = os.path.join(theme_name, 'chainer') base_epoch = complexity_degree[high_low] args.epoch = int(base_epoch * 60 / method_complexity[method_name]) args.epoch = max(args.epoch, 5) #args.epoch = int(float(t_epochs.get())) args.out = parent_path / 'models' / theme_name / method_name / high_low args.method = method_name if t_model_path != "": args.source_transferlearning = Path(t_model_path.get()) print(theme_name) if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): return numpy.asarray(label_list, dtype=numpy.float32) # Apply a preprocessor to the dataset. print('Preprocessing dataset...') preprocessor = preprocess_method_dict[args.method]() smiles_col_name = t_smiles.get() parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_col=smiles_col_name) args.datafile = t_csv_filepath.get() dataset = parser.parse(args.datafile)['dataset'] # Scale the label values, if necessary. if args.scale == 'standardize': scaler = StandardScaler() scaler.fit(dataset.get_datasets()[-1]) else: scaler = None # Split the dataset into training and validation. train_data_size = int(len(dataset) * args.train_data_ratio) trainset, testset = split_dataset_random(dataset, train_data_size, args.seed) print((args.source_transferlearning / method_name / high_low / 'regressor.pickle')) print((args.source_transferlearning / method_name / high_low / 'regressor.pickle').exists()) # Set up the predictor. if Booleanvar_transfer_learning.get() == True \ and (args.source_transferlearning / method_name / high_low /'regressor.pickle').exists() == True: # refer https://github.com/pfnet-research/chainer-chemistry/issues/407 with open( args.source_transferlearning / method_name / high_low / 'regressor.pickle', 'rb') as f: regressor = cloudpickle.loads(f.read()) pre_predictor = regressor.predictor predictor = GraphConvPredictor(pre_predictor.graph_conv, MLP(out_dim=1, hidden_dim=16)) else: predictor = set_up_predictor(args.method, args.unit_num, args.conv_layers, class_num, label_scaler=scaler) # Set up the regressor. device = chainer.get_device(args.device) metrics_fun = {'mae': functions.mean_absolute_error, 'rmse': rmse} regressor = Regressor(predictor, lossfun=functions.mean_squared_error, metrics_fun=metrics_fun, device=device) print('Training... : ', method_name) run_train(regressor, trainset, valid=None, batch_size=args.batchsize, epoch=args.epoch, out=args.out, extensions_list=None, device=device, converter=concat_mols, resume_path=None) # Save the regressor's parameters. args.model_foldername = t_theme_name.get() model_path = os.path.join(args.out, args.model_foldername, args.model_filename) print('Saving the trained model to {}...'.format(model_path)) # TODO(nakago): ChainerX array cannot be sent to numpy array when internal # state has gradients. if hasattr(regressor.predictor.graph_conv, 'reset_state'): regressor.predictor.graph_conv.reset_state() with open( parent_path / 'models' / theme_name / method_name / high_low / ('regressor.pickle'), 'wb') as f: cloudpickle.dump(regressor, f) #with open(parent_path / 'models' / theme_name / method_name / high_low /('predictor.pickle'), 'wb') as f: # cloudpickle.dump(predictor, f) print('Evaluating... : ', method_name) test_iterator = SerialIterator(testset, 16, repeat=False, shuffle=False) eval_result = Evaluator(test_iterator, regressor, converter=concat_mols, device=device)() print('Evaluation result: : ', method_name) print(eval_result) @chainer.dataset.converter() def extract_inputs(batch, device=None): return concat_mols(batch, device=device)[:-1] pred_train = regressor.predict(trainset, converter=extract_inputs) pred_train = [i[0] for i in pred_train] pred_test = regressor.predict(testset, converter=extract_inputs) pred_test = [i[0] for i in pred_test] y_train = [i[2][0] for i in trainset] y_test = [i[2][0] for i in testset] title = args.label save_path = parent_path / 'results' / theme_name / method_name / high_low / 'scatter.png' save_scatter(y_train, pred_train, y_test, pred_test, title, save_path) global image_score image_score_open = Image.open(parent_path / 'results' / theme_name / method_name / high_low / 'scatter.png') image_score = ImageTk.PhotoImage(image_score_open, master=frame1) canvas.create_image(200, 200, image=image_score) from sklearn.metrics import mean_squared_error, mean_absolute_error from sklearn.metrics import r2_score train_mse = mean_squared_error(y_train, pred_train) test_mse = mean_squared_error(y_test, pred_test) train_rmse = np.sqrt(train_mse) test_rmse = np.sqrt(test_mse) train_mae = mean_absolute_error(y_train, pred_train) test_mae = mean_absolute_error(y_test, pred_test) train_r2score = r2_score(y_train, pred_train) test_r2score = r2_score(y_test, pred_test) print('train_mse : ', train_mse) print('test_mse : ', test_mse) print('train_rmse : ', train_rmse) print('test_rmse : ', test_rmse) print('train_mae : ', train_mae) print('test_mae : ', train_mae) print('train_r2score : ', train_r2score) print('test_r2score : ', test_r2score)
def setup(): # 設定ファイルの読み込み with open(CONFIG_FILE, "r") as f: config = yaml.load(f) xp = np if not config["use_gpu"] else cuda.cupy # 学習結果出力先の設定 restart = config["restart_dir"] is not None if restart: result_children_dir = config["restart_dir"] else: result_children_dir = "result_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") result_dir = os.path.join(config["result_dir"], result_children_dir) result_dir_train = os.path.join(result_dir, MODEL_DIR) result_dir_val = os.path.join(result_dir, VALIDATE_DIR) # 学習データの読み込み train_scores = [] with open(os.path.join(config["score_dir"], config["train_list"]), "r") as tr_f: train_info = list(map(lambda x: x.split("\n")[0], tr_f.readlines())) train_paths = list(map(lambda x: os.path.join(config["score_dir"], x.split("\t")[0]), train_info)) train_score_lvs = list(map(lambda x: int(x.split("\t")[1])-1, train_info)) for idx, npy_path in enumerate(train_paths): score = xp.load(npy_path) score[:, 8] /= 100.0 # 譜面を小節ごとに区切る score = score.reshape((-1, 1728)) train_scores.append(score) sys.stdout.write("\rtrain score loaded: {0:4d}/{1}".format(idx+1, len(train_paths))) sys.stdout.write("\n") # 検証データの読み込み val_scores = [] val_score_names = [] with open(os.path.join(config["score_dir"], config["validate_list"]), "r") as val_f: val_info = list(map(lambda x: x.split("\n")[0], val_f.readlines())) val_paths = list(map(lambda x: os.path.join(config["score_dir"], x.split("\t")[0]), val_info)) val_score_lvs = list(map(lambda x: int(x.split("\t")[1])-1, val_info)) for idx, npy_path in enumerate(val_paths): score = xp.load(npy_path) score[:, 8] /= 100.0 # 譜面を小節ごとに区切る score = score.reshape((-1, 1728)) val_scores.append(score) score_name = os.path.basename(npy_path) val_score_names.append(score_name) sys.stdout.write("\rvalidate score loaded: {0:4d}/{1}".format(idx+1, len(val_paths))) sys.stdout.write("\n") # model and optimizer model = Estimator() if xp is not np: model.to_device("@cupy:0") optimizer = Adam(float(config["lr"])) optimizer.setup(model) # iterator, updater, trainer, extension train_dataset = TupleDataset(train_scores, train_score_lvs) train_iterator = SerialIterator(train_dataset, int(config["batch_size"])) val_dataset = TupleDataset(val_scores, val_score_lvs, val_score_names) val_iterator = SerialIterator(val_dataset, int(config["batch_size"]), repeat=False, shuffle=False) updater = EstimatorUpdater(iterator=train_iterator, optimizer=optimizer) trainer = Trainer(updater, stop_trigger=(config["epochs"], "epoch"), out=result_dir_train) trainer.extend(Validator(val_iterator, result_dir_val), trigger=(1, "epoch")) trainer.extend(extensions.snapshot(filename="snapshot_epoch_{.updater.epoch}")) trainer.extend(extensions.LogReport(trigger=(1, "epoch")), trigger=(1, "epoch")) trainer.extend(extensions.PrintReport(["epoch", "train/loss", "train/acc", "val/loss", "val/acc", "val/rough_acc"])) trainer.extend(extensions.ProgressBar(update_interval=5)) if restart: # 学習を再開するモデルを特定 snapshot_path_format = os.path.join(result_dir_train, "snapshot_epoch_*") snapshots = [os.path.basename(fname) for fname in glob.glob(snapshot_path_format)] if len(snapshots) == 0: print("There does not exist a model to restart training.") exit() else: pattern = re.compile("snapshot_epoch_([0-9]+)") snapshot_epochs = list(map(lambda x: int(pattern.search(x).group(1)), snapshots)) prev_snapshot_idx = snapshot_epochs.index(max(snapshot_epochs)) prev_snapshot = snapshots[prev_snapshot_idx] load_npz(os.path.join(result_dir_train, prev_snapshot), trainer) shutil.copy2(CONFIG_FILE, result_dir) return trainer
def main(): # Parse the arguments. args = parse_arguments() device = args.gpu # Set up some useful variables that will be used later on. method = args.method if args.label != 'all': label = args.label cache_dir = os.path.join('input', '{}_{}'.format(method, label)) labels = [label] else: labels = D.get_qm9_label_names() cache_dir = os.path.join('input', '{}_all'.format(method)) # Get the filename corresponding to the cached dataset, based on the amount # of data samples that need to be parsed from the original dataset. num_data = args.num_data if num_data >= 0: dataset_filename = 'data_{}.npz'.format(num_data) else: dataset_filename = 'data.npz' # Load the cached dataset. dataset_cache_path = os.path.join(cache_dir, dataset_filename) dataset = None if os.path.exists(dataset_cache_path): print('Loading cached data from {}.'.format(dataset_cache_path)) dataset = NumpyTupleDataset.load(dataset_cache_path) if dataset is None: print('Preprocessing dataset...') preprocessor = preprocess_method_dict[method]() dataset = D.get_qm9(preprocessor, labels=labels) # Cache the newly preprocessed dataset. if not os.path.exists(cache_dir): os.mkdir(cache_dir) NumpyTupleDataset.save(dataset_cache_path, dataset) # Use a predictor with scaled output labels. model_path = os.path.join(args.in_dir, args.model_filename) regressor = Regressor.load_pickle(model_path, device=device) label_scaler = regressor.predictor.label_scaler if label_scaler is not None: original_t = dataset.get_datasets()[-1] if args.gpu >= 0: scaled_t = cuda.to_cpu(label_scaler.transform( cuda.to_gpu(original_t))) else: scaled_t = label_scaler.transform(original_t) dataset = NumpyTupleDataset(*(dataset.get_datasets()[:-1] + (scaled_t,))) # Split the dataset into training and testing. train_data_size = int(len(dataset) * args.train_data_ratio) _, test = split_dataset_random(dataset, train_data_size, args.seed) # This callback function extracts only the inputs and discards the labels. def extract_inputs(batch, device=None): return concat_mols(batch, device=device)[:-1] def postprocess_fn(x): if label_scaler is not None: scaled_x = label_scaler.inverse_transform(x) return scaled_x else: return x # Predict the output labels. print('Predicting...') y_pred = regressor.predict( test, converter=extract_inputs, postprocess_fn=postprocess_fn) # Extract the ground-truth labels. t = concat_mols(test, device=device)[-1] original_t = cuda.to_cpu(label_scaler.inverse_transform(t)) # Construct dataframe. df_dict = {} for i, l in enumerate(labels): df_dict.update({'y_pred_{}'.format(l): y_pred[:, i], 't_{}'.format(l): original_t[:, i], }) df = pandas.DataFrame(df_dict) # Show a prediction/ground truth table with 5 random examples. print(df.sample(5)) n_eval = 10 for target_label in range(y_pred.shape[1]): label_name = labels[target_label] diff = y_pred[:n_eval, target_label] - original_t[:n_eval, target_label] print('label_name = {}, y_pred = {}, t = {}, diff = {}' .format(label_name, y_pred[:n_eval, target_label], original_t[:n_eval, target_label], diff)) # Run an evaluator on the test dataset. print('Evaluating...') test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False) eval_result = Evaluator(test_iterator, regressor, converter=concat_mols, device=device)() print('Evaluation result: ', eval_result) # Save the evaluation results. save_json(os.path.join(args.in_dir, 'eval_result.json'), eval_result) # Calculate mean abs error for each label mae = numpy.mean(numpy.abs(y_pred - original_t), axis=0) eval_result = {} for i, l in enumerate(labels): eval_result.update({l: mae[i]}) save_json(os.path.join(args.in_dir, 'eval_result_mae.json'), eval_result)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)' ) # open_crf layer only works for CPU mode parser.add_argument( "--model", "-m", help="pretrained model file path") # which contains pretrained target parser.add_argument("--pretrained_model", "-pre", default="resnet101") parser.add_argument("--memcached_host", default="127.0.0.1") parser.add_argument('--mean_rgb', default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy", help='image mean .npy file') parser.add_argument('--mean_flow', default=config.ROOT_PATH + "BP4D/idx/mean_flow.npy", help='image mean .npy file') parser.add_argument('--proc_num', type=int, default=10, help="multiprocess fetch data process number") parser.add_argument('--batch', '-b', type=int, default=10, help='mini batch size') args = parser.parse_args() if not args.model.endswith("model.npz"): return model_info = extract_mode(args.model) database = model_info["database"] fold = model_info["fold"] split_idx = model_info["split_idx"] backbone = model_info["backbone"] use_paper_num_label = model_info["use_paper_num_label"] use_roi_align = model_info["use_roi_align"] two_stream_mode = model_info['two_stream_mode'] T = model_info["T"] adaptive_AU_database(database) paper_report_label, class_num = squeeze_label_num_report( database, use_paper_num_label) paper_report_label_idx = list(paper_report_label.keys()) if not paper_report_label_idx: paper_report_label_idx = None class_num = len(config.AU_SQUEEZE) else: class_num = len(paper_report_label_idx) model_print_dict = OrderedDict() for key, value in model_info.items(): model_print_dict[key] = str(value) print(""" {0} ====================================== INFO: {1} ====================================== """.format(args.model, json.dumps(model_print_dict, sort_keys=True, indent=8))) au_rcnn_train_chain_list = [] if backbone == 'resnet101': if two_stream_mode != TwoStreamMode.rgb_flow: pretrained_model = backbone au_rcnn = AU_RCNN_Resnet101( pretrained_model=pretrained_model, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=use_roi_align, use_optical_flow_input=( two_stream_mode == TwoStreamMode.optical_flow), temporal_length=T) au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn) au_rcnn_train_chain_list.append(au_rcnn_train_chain) else: # rgb_flow mode au_rcnn_rgb = AU_RCNN_Resnet101(pretrained_model=backbone, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=use_roi_align, use_optical_flow_input=False, temporal_length=T) au_rcnn_optical_flow = AU_RCNN_Resnet101( pretrained_model=backbone, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=use_roi_align, use_optical_flow_input=True, temporal_length=T) au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor(au_rcnn_rgb) au_rcnn_train_chain_optical_flow = AU_RCNN_ROI_Extractor( au_rcnn_optical_flow) au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb) au_rcnn_train_chain_list.append(au_rcnn_train_chain_optical_flow) au_rcnn = au_rcnn_rgb model = Wrapper(au_rcnn_train_chain_list, class_num, database, T, two_stream_mode=two_stream_mode, gpus=[args.gpu, args.gpu]) chainer.serializers.load_npz(args.model, model) print("loading {}".format(args.model)) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() mc_manager = PyLibmcManager(args.memcached_host) img_dataset = AUDataset(database=database, L=T, fold=fold, split_name='test', split_index=split_idx, mc_manager=mc_manager, train_all_data=False, two_stream_mode=two_stream_mode, paper_report_label_idx=paper_report_label_idx) video_dataset = TransformDataset( img_dataset, Transform(L=T, mean_rgb_path=args.mean_rgb, mean_flow_path=args.mean_flow)) if args.proc_num == 1: test_iter = SerialIterator(video_dataset, batch_size=args.batch, repeat=False, shuffle=False) else: test_iter = MultiprocessIterator(video_dataset, batch_size=args.batch, n_processes=args.proc_num, repeat=False, shuffle=False, n_prefetch=10, shared_mem=10000000) with chainer.no_backprop_mode(), chainer.using_config( 'cudnn_deterministic', True), chainer.using_config('train', False): predict_data_path = os.path.dirname( args.model) + os.path.sep + "pred_" + os.path.basename( args.model)[:os.path.basename(args.model).rindex("_")] + ".npz" print("npz_path: {}".format(predict_data_path)) au_evaluator = ActionUnitEvaluator( test_iter, model, args.gpu, database=database, paper_report_label=paper_report_label, converter=lambda batch, device: concat_examples_not_labels( batch, device, padding=0), T=T, output_path=predict_data_path) observation = au_evaluator.evaluate() with open(os.path.dirname(args.model) + os.path.sep + "evaluation_result_{0}.json".format(os.path.basename(args.model)\ [:os.path.basename(args.model).rindex("_")] ), "w") as file_obj: file_obj.write( json.dumps(observation, indent=4, separators=(',', ': '))) file_obj.flush()
def run_train(model, train, valid=None, batch_size=16, epoch=10, optimizer=None, out='result', extensions_list=None, device=-1, converter=convert.concat_examples, use_default_extensions=True, resume_path=None): """Util function to train chainer's model with StandardUpdater. Typical Regression/Classification tasks suffices to use this method to train chainer model. Args: model (chainer.Chain): model to train train (dataset or Iterator): training dataset or train iterator valid (dataset or Iterator): validation dataset or valid iterator batch_size (int): batch size for training epoch (int): epoch for training optimizer (Optimizer): out (str): path for `trainer`'s out directory extensions_list (None or list): list of extensions to add to `trainer` device (Device): chainer Device converter (callable): use_default_extensions (bool): If `True`, default extensions are added to `trainer`. resume_path (None or str): If specified, `trainer` is resumed with this serialized file. """ if optimizer is None: # Use Adam optimizer as default optimizer = optimizers.Adam() elif not isinstance(optimizer, Optimizer): raise ValueError("[ERROR] optimizer must be instance of Optimizer, " "but passed {}".format(type(Optimizer))) optimizer.setup(model) if isinstance(train, Iterator): train_iter = train else: # Assume `train` as training dataset, Use SerialIterator as default. train_iter = SerialIterator(train, batch_size=batch_size) updater = training.StandardUpdater(train_iter, optimizer, device=device, converter=converter) trainer = training.Trainer(updater, (epoch, 'epoch'), out=out) if use_default_extensions: if valid is not None: if isinstance(valid, Iterator): valid_iter = valid else: # Assume `valid` as validation dataset, # Use SerialIterator as default. valid_iter = SerialIterator(valid, batch_size=batch_size, shuffle=False, repeat=False) trainer.extend( extensions.Evaluator(valid_iter, model, device=device, converter=converter)) trainer.extend(extensions.LogReport()) trainer.extend(AutoPrintReport()) trainer.extend(extensions.ProgressBar(update_interval=10)) # TODO: consider to include snapshot as default extension. # trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) if extensions_list is not None: for e in extensions_list: trainer.extend(e) if resume_path: chainer.serializers.load_npz(resume_path, trainer) trainer.run() return
def _forward(self, data, batchsize=16, converter=concat_examples, retain_inputs=False, preprocess_fn=None, postprocess_fn=None, noise_sampler=None): """Forward data by iterating with batch Args: data: "train_x array" or "chainer dataset" batchsize (int): batch size converter (Callable): convert from `data` to `inputs` retain_inputs (bool): If True, this instance keeps inputs in `self.inputs` or not. preprocess_fn (Callable): Its input is numpy.ndarray or cupy.ndarray, it can return either Variable, cupy.ndarray or numpy.ndarray postprocess_fn (Callable): Its input argument is Variable, but this method may return either Variable, cupy.ndarray or numpy.ndarray. Returns (tuple or numpy.ndarray): forward result """ input_list = None output_list = None it = SerialIterator(data, batch_size=batchsize, repeat=False, shuffle=False) if isinstance(self.target_extractor, LinkHook): add_linkhook(self.target_extractor, prefix='/saliency/target/', logger=self.logger) if isinstance(self.output_extractor, LinkHook): add_linkhook(self.output_extractor, prefix='/saliency/output/', logger=self.logger) for batch in it: inputs = converter(batch, self._device) inputs = _to_tuple(inputs) if preprocess_fn: inputs = preprocess_fn(*inputs) inputs = _to_tuple(inputs) inputs = [_to_variable(x) for x in inputs] # --- Main saliency computation ---- if noise_sampler is None: # VanillaGrad computation outputs = self._compute_core(*inputs) else: # SmoothGrad computation if self.target_extractor is None: # inputs[0] is considered as "target_var" noise = noise_sampler.sample(inputs[0].array) inputs[0].array += noise outputs = self._compute_core(*inputs) else: # Add process to LinkHook def add_noise(hook, args, target_var): noise = noise_sampler.sample(target_var.array) target_var.array += noise self.target_extractor.add_process('/saliency/add_noise', add_noise) outputs = self._compute_core(*inputs) self.target_extractor.delete_process('/saliency/add_noise') # --- Main saliency computation end --- # Init if retain_inputs: if input_list is None: input_list = [[] for _ in range(len(inputs))] for j, input in enumerate(inputs): input_list[j].append(cuda.to_cpu(input)) if output_list is None: output_list = [[] for _ in range(len(outputs))] if postprocess_fn: outputs = postprocess_fn(*outputs) outputs = _to_tuple(outputs) for j, output in enumerate(outputs): output_list[j].append(_extract_numpy(output)) if isinstance(self.target_extractor, LinkHook): delete_linkhook(self.target_extractor, prefix='/saliency/target/', logger=self.logger) if isinstance(self.output_extractor, LinkHook): delete_linkhook(self.output_extractor, prefix='/saliency/output/', logger=self.logger) if retain_inputs: self.inputs = [ numpy.concatenate(in_array) for in_array in input_list ] result = [_concat(output) for output in output_list] if len(result) == 1: return result[0] else: self.logger.error('return multiple result handling is not ' 'implemented yet and not supported.') return result
def main(): parser = argparse.ArgumentParser( description='Predict with a trained model.') parser.add_argument('--in-dir', '-i', type=str, default='result', help='Path to the result directory of the training ' 'script.') parser.add_argument('--batchsize', '-b', type=int, default=128, help='batch size') parser.add_argument( '--device', type=str, default='-1', help='Device specifier. Either ChainerX device specifier or an ' 'integer. If non-negative integer, CuPy arrays with specified ' 'device id are used. If negative integer, NumPy arrays are used') parser.add_argument('--model-filename', type=str, default='classifier.pkl', help='file name for pickled model') parser.add_argument('--num-data', type=int, default=-1, help='Number of data to be parsed from parser.' '-1 indicates to parse all data.') args = parser.parse_args() with open(os.path.join(args.in_dir, 'config.json'), 'r') as i: config = json.loads(i.read()) method = config['method'] labels = config['labels'] _, test, _ = data.load_dataset(method, labels, num_data=args.num_data) y_test = test.get_datasets()[-1] device = chainer.get_device(args.device) # Load pretrained model clf = Classifier.load_pickle(os.path.join(args.in_dir, args.model_filename), device=device) # type: Classifier # ---- predict --- print('Predicting...') # We need to feed only input features `x` to `predict`/`predict_proba`. # This converter extracts only inputs (x1, x2, ...) from the features which # consist of input `x` and label `t` (x1, x2, ..., t). def extract_inputs(batch, device=None): return concat_mols(batch, device=device)[:-1] def postprocess_pred(x): x_array = cuda.to_cpu(x.data) return numpy.where(x_array > 0, 1, 0) y_pred = clf.predict(test, converter=extract_inputs, postprocess_fn=postprocess_pred) y_proba = clf.predict_proba(test, converter=extract_inputs, postprocess_fn=F.sigmoid) # `predict` method returns the prediction label (0: non-toxic, 1:toxic) print('y_pread.shape = {}, y_pred[:5, 0] = {}'.format( y_pred.shape, y_pred[:5, 0])) # `predict_proba` method returns the probability to be toxic print('y_proba.shape = {}, y_proba[:5, 0] = {}'.format( y_proba.shape, y_proba[:5, 0])) # --- predict end --- if y_pred.ndim == 1: y_pred = y_pred[:, None] if y_pred.shape != y_test.shape: raise RuntimeError('The shape of the prediction result array and ' 'that of the ground truth array do not match. ' 'Contents of the input directory may be corrupted ' 'or modified.') statistics = [] for t, p in six.moves.zip(y_test.T, y_pred.T): idx = t != -1 n_correct = (t[idx] == p[idx]).sum() n_total = len(t[idx]) accuracy = float(n_correct) / n_total statistics.append([n_correct, n_total, accuracy]) print('{:>6} {:>8} {:>8} {:>8}'.format('TaskID', 'Correct', 'Total', 'Accuracy')) for idx, (n_correct, n_total, accuracy) in enumerate(statistics): print('task{:>2} {:>8} {:>8} {:>8.4f}'.format(idx, n_correct, n_total, accuracy)) prediction_result_file = 'prediction.npz' print('Save prediction result to {}'.format(prediction_result_file)) numpy.savez_compressed(prediction_result_file, y_pred) # --- evaluate --- # To calc loss/accuracy, we can use `Evaluator`, `ROCAUCEvaluator` print('Evaluating...') test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False) eval_result = Evaluator(test_iterator, clf, converter=concat_mols, device=device)() print('Evaluation result: ', eval_result) rocauc_result = ROCAUCEvaluator(test_iterator, clf, converter=concat_mols, device=device, eval_func=clf.predictor, name='test', ignore_labels=-1)() print('ROCAUC Evaluation result: ', rocauc_result) with open(os.path.join(args.in_dir, 'eval_result.json'), 'w') as f: json.dump(rocauc_result, f)
class BCIterator(SerialIterator): """Dataset iterator that serially reads the examples and mix sub-examples. This is a simple implementation of :class:`~chainer.dataset.Iterator` that just visits each example in either the order of indexes or a shuffled order and mix sub-examples. To avoid unintentional performance degradation, the ``shuffle`` option is set to ``True`` by default. For validation, it is better to set it to ``False`` when the underlying dataset supports fast slicing. If the order of examples has an important meaning and the updater depends on the original order, this option should be set to ``False``. This iterator saves ``-1`` instead of ``None`` in snapshots since some serializers do not support ``None``. Args: dataset: Dataset to iterate. batch_size (int): Number of examples within each batch. repeat (bool): If ``True``, it infinitely loops over the dataset. Otherwise, it stops iteration at the end of the first epoch. shuffle (bool): If ``True``, the order of examples is shuffled at the beginning of each epoch. Otherwise, examples are extracted in the order of indexes. If ``None`` and no ``order_sampler`` is given, the behavior is the same as the case with ``shuffle=True``. order_sampler (callable): A callable that generates the order of the indices to sample in the next epoch when a epoch finishes. This function should take two arguements: the current order and the current position of the iterator. This should return the next order. The size of the order should remain constant. This option cannot be used when ``shuffle`` is not ``None``. mixer_image (callable): A callable that mix two images. This function should take three arguements: base-image, sub-image and ratio of mixing. This should return the mixed image. mixer_label (callable): A callable that mix two labels. This function should take four arguements: base-label, sub-label, ratio of mixing and number of classes. This should return the mixed label. The format of mixed label will not (int) and should match the format of args of ``lossfun`` and ``accfun`` of Classifier. force_2class (bool): If ``True``, sub-examples are extracted by iterative random-choice until sub-label and base-label are different. If ``False``, sub-examples are extracted from the ``SerialIterator``. _range (float): The max ratio of mixing sub-examples. If ``_range=0``, ``BCIterator`` can be used for non-mix iterator which return same format with mix iterator. classes (int): Number of classes. It is necessary for mixer_label. """ def __init__(self, dataset, batch_size, repeat=True, shuffle=None, order_sampler=None, mixer_image=mixers.mix_plus, mixer_label=mixers.mix_labels, force_2class=False, _range=0.5, classes=1): super(BCIterator, self).__init__( dataset, batch_size, repeat, shuffle, order_sampler) self.mixer_image = mixer_image self.mixer_label = partial(mixer_label, classes=classes) self._range = _range self.force_2class = force_2class if not force_2class: self.sub_iter = SerialIterator( dataset, batch_size, repeat, shuffle, order_sampler) def get_sub(self, label): while True: x, t = self.dataset[numpy.random.randint(self._epoch_size)] if t != label: return x, t def mix_sample(self, base, sub=None, r=0): xb, tb = base xs, ts = sub or self.get_sub(tb) return self.mixer_image(xb, xs, r), self.mixer_label(tb, ts, r) def __next__(self): bases = super(BCIterator, self).__next__() rands = numpy.random.uniform(0, self._range, self.batch_size) if self.force_2class: return [self.mix_sample(b, None, r) for b, r in zip(bases, rands)] subs = self.sub_iter.__next__() return [self.mix_sample(b, s, r) for b, s, r in zip(bases, subs, rands)] next = __next__
def main(): # Parse the arguments. args = parse_arguments() augment = False if args.augment == 'False' else True multi_gpu = False if args.multi_gpu == 'False' else True if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. logging.info('Preprocess train dataset and test dataset...') preprocessor = preprocess_method_dict[args.method]() parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) train = parser.parse(args.train_datafile)['dataset'] test = parser.parse(args.test_datafile)['dataset'] if augment: logging.info('Utilizing data augmentation in train set') train = augment_dataset(train) num_train = train.get_datasets()[0].shape[0] num_test = test.get_datasets()[0].shape[0] logging.info('Train/test split: {}/{}'.format(num_train, num_test)) if len(args.net_hidden_dims): net_hidden_dims = tuple([ int(net_hidden_dim) for net_hidden_dim in args.net_hidden_dims.split(',') ]) else: net_hidden_dims = () fp_attention = True if args.fp_attention else False update_attention = True if args.update_attention else False weight_tying = False if args.weight_tying == 'False' else True attention_tying = False if args.attention_tying == 'False' else True fp_batch_normalization = True if args.fp_bn == 'True' else False layer_aggregator = None if args.layer_aggregator == '' else args.layer_aggregator context = False if args.context == 'False' else True output_activation = functions.relu if args.output_activation == 'relu' else None predictor = set_up_predictor( method=args.method, fp_hidden_dim=args.fp_hidden_dim, fp_out_dim=args.fp_out_dim, conv_layers=args.conv_layers, concat_hidden=args.concat_hidden, layer_aggregator=layer_aggregator, fp_dropout_rate=args.fp_dropout_rate, fp_batch_normalization=fp_batch_normalization, net_hidden_dims=net_hidden_dims, class_num=class_num, sim_method=args.sim_method, fp_attention=fp_attention, weight_typing=weight_tying, attention_tying=attention_tying, update_attention=update_attention, fp_max_degree=args.fp_max_degree, context=context, context_layers=args.context_layers, context_dropout=args.context_dropout, message_function=args.message_function, readout_function=args.readout_function, num_timesteps=args.num_timesteps, num_output_hidden_layers=args.num_output_hidden_layers, output_hidden_dim=args.output_hidden_dim, output_activation=output_activation, symmetric=args.symmetric) train_iter = SerialIterator(train, args.batchsize) test_iter = SerialIterator(test, args.batchsize, repeat=False, shuffle=False) metrics_fun = {'accuracy': F.binary_accuracy} classifier = Classifier(predictor, lossfun=loss_func, metrics_fun=metrics_fun, device=args.gpu) # Set up the optimizer. optimizer = optimizers.Adam(alpha=args.learning_rate, weight_decay_rate=args.weight_decay_rate) # optimizer = optimizers.Adam() # optimizer = optimizers.SGD(lr=args.learning_rate) optimizer.setup(classifier) # add regularization if args.max_norm > 0: optimizer.add_hook( chainer.optimizer.GradientClipping(threshold=args.max_norm)) if args.l2_rate > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.l2_rate)) if args.l1_rate > 0: optimizer.add_hook(chainer.optimizer.Lasso(rate=args.l1_rate)) # Set up the updater. if multi_gpu: logging.info('Using multiple GPUs') updater = training.ParallelUpdater(train_iter, optimizer, devices={ 'main': 0, 'second': 1 }, converter=concat_mols) else: logging.info('Using single GPU') updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) # Set up the trainer. logging.info('Training...') # add stop_trigger parameter early_stop = triggers.EarlyStoppingTrigger(monitor='validation/main/loss', patients=10, max_trigger=(500, 'epoch')) out = 'output' + '/' + args.out trainer = training.Trainer(updater, stop_trigger=early_stop, out=out) # trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( E.Evaluator(test_iter, classifier, device=args.gpu, converter=concat_mols)) train_eval_iter = SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend( AccuracyEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_acc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( AccuracyEvaluator(test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_acc', pos_labels=1, ignore_labels=-1)) trainer.extend( ROCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_roc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( ROCAUCEvaluator(test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_roc', pos_labels=1, ignore_labels=-1)) trainer.extend( PRCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_prc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( PRCAUCEvaluator(test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_prc', pos_labels=1, ignore_labels=-1)) trainer.extend( F1Evaluator(train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_f', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( F1Evaluator(test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_f', pos_labels=1, ignore_labels=-1)) # apply shift strategy to learning rate every 10 epochs # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch')) if args.exp_shift_strategy == 1: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger( [10, 20, 30, 40, 50, 60], 'epoch')) elif args.exp_shift_strategy == 2: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger( [5, 10, 15, 20, 25, 30], 'epoch')) elif args.exp_shift_strategy == 3: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger( [5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 'epoch')) else: raise ValueError('No such strategy to adapt learning rate') # # observation of learning rate trainer.extend(E.observe_lr(), trigger=(1, 'iteration')) entries = [ 'epoch', 'main/loss', 'train_acc/main/accuracy', 'train_roc/main/roc_auc', 'train_prc/main/prc_auc', # 'train_p/main/precision', 'train_r/main/recall', 'train_f/main/f1', 'validation/main/loss', 'val_acc/main/accuracy', 'val_roc/main/roc_auc', 'val_prc/main/prc_auc', # 'val_p/main/precision', 'val_r/main/recall', 'val_f/main/f1', 'lr', 'elapsed_time' ] trainer.extend(E.PrintReport(entries=entries)) # change from 10 to 2 on Mar. 1 2019 trainer.extend(E.snapshot(), trigger=(2, 'epoch')) trainer.extend(E.LogReport()) trainer.extend(E.ProgressBar()) trainer.extend( E.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( E.PlotReport(['train_acc/main/accuracy', 'val_acc/main/accuracy'], 'epoch', file_name='accuracy.png')) if args.resume: resume_path = os.path.join(out, args.resume) logging.info( 'Resume training according to snapshot in {}'.format(resume_path)) chainer.serializers.load_npz(resume_path, trainer) trainer.run() # Save the regressor's parameters. model_path = os.path.join(out, args.model_filename) logging.info('Saving the trained model to {}...'.format(model_path)) classifier.save_pickle(model_path, protocol=args.protocol)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--pretrained', type=str, help= 'path to model that has trained classifier but has not been trained through GAIN routine', default='classifier_padding_1_model_594832') parser.add_argument( '--trained', type=str, help='path to model trained through GAIN', default='result/MYGAIN_5_to_1_padding_1_all_update_model_20000') parser.add_argument('--device', type=int, default=0, help='gpu id') parser.add_argument('--shuffle', type=bool, default=False, help='whether to shuffle dataset') parser.add_argument( '--whole', type=bool, default=False, help='whether to test for the whole validation dataset') parser.add_argument('--no', type=int, default=5, help='if not whole, then no of images to visualize') parser.add_argument( '--name', type=str, default='viz1', help='name of the subfolder or experiment under which to save') args = parser.parse_args() pretrained_file = args.pretrained trained_file = args.trained device = args.device shuffle = args.shuffle whole = args.whole name = args.name N = args.no dataset = MyTrainingDataset(split='val') iterator = SerialIterator(dataset, 1, shuffle=shuffle, repeat=False) converter = chainer.dataset.concat_examples os.makedirs('viz/' + name, exist_ok=True) no_of_classes = 21 device = 0 pretrained = FCN8s_hand() trained = FCN8s_hand() load_npz(pretrained_file, pretrained) load_npz(trained_file, trained) if device >= 0: pretrained.to_gpu() trained.to_gpu() i = 0 true_positive = [0 for j in range(21)] true_negative = [0 for j in range(21)] false_positive = [0 for j in range(21)] false_negative = [0 for j in range(21)] while not iterator.is_new_epoch: if not whole and i >= N: break image, labels, metadata = converter(iterator.next()) np_input_img = image np_input_img = np.uint8(np_input_img[0]) np_input_img = np.transpose(np_input_img, (1, 2, 0)) image = Variable(image) if device >= 0: image.to_gpu() xp = get_array_module(image.data) to_substract = np.array((-1, 0)) noise_classes = np.unique(labels[0]).astype(np.int32) target = xp.asarray([[0] * (no_of_classes)]) gt_labels = np.setdiff1d(noise_classes, to_substract) - 1 target[0][gt_labels] = 1 gcam1, cl_scores1, class_id1 = pretrained.stream_cl(image) gcam2, cl_scores2, class_id2 = trained.stream_cl(image) # gcams1, cl_scores1, class_ids1 = pretrained.stream_cl_multi(image) # gcams2, cl_scores2, class_ids2 = trained.stream_cl_multi(image) target = cp.asnumpy(target) cl_scores2 = cp.asnumpy(cl_scores2.data) # print(target) # print(cl_scores2) # print() # score_sigmoid = F.sigmoid(cl_scores2) for j in range(0, len(target[0])): # print(target[0][j] == 1) if target[0][j] == 1: if cl_scores2[0][j] >= 0: true_positive[j] += 1 else: false_negative[j] += 1 else: if cl_scores2[0][j] <= 0: true_negative[j] += 1 else: false_positive[j] += 1 # bboxes = gcams_to_bboxes(gcams2, class_ids2, input_image=np_input_img) # cv2.imshow('input', np_input_img) # cv2.waitKey(0) if device > -0: class_id = cp.asnumpy(class_id) # fig1 = plt.figure(figsize=(20, 10)) # ax1 = plt.subplot2grid((3, 9), (0, 0), colspan=3, rowspan=3) # ax1.axis('off') # ax1.imshow(cp.asnumpy(F.transpose(F.squeeze(image, 0), (1, 2, 0)).data) / 255.) # # ax2 = plt.subplot2grid((3, 9), (0, 3), colspan=3, rowspan=3) # ax2.axis('off') # ax2.imshow(cp.asnumpy(F.transpose(F.squeeze(image, 0), (1, 2, 0)).data) / 255.) # ax2.imshow(cp.asnumpy(F.squeeze(gcam1[0], 0).data), cmap='jet', alpha=.5) # ax2.set_title("Before GAIN for class - " + str(dataset.class_names[cp.asnumpy(class_id1)+1]), # color='teal') # # ax3 = plt.subplot2grid((3, 9), (0, 6), colspan=3, rowspan=3) # ax3.axis('off') # ax3.imshow(cp.asnumpy(F.transpose(F.squeeze(image, 0), (1, 2, 0)).data) / 255.) # ax3.imshow(cp.asnumpy(F.squeeze(gcam2[0], 0).data), cmap='jet', alpha=.5) # ax3.set_title("After GAIN for class - " + str(dataset.class_names[cp.asnumpy(class_id2)+1]), # color='teal') # fig1.savefig('viz/' + name + '/' + str(i) + '.png') # plt.close() print(i) i += 1 print("true postive {}".format(true_positive)) print("true negative {}".format(true_negative)) print("false positive {}".format(false_positive)) print("false negative {}".format(false_negative))
def main(): model_class = MobileYOLO train_x, train_y, val_x, val_y = load_pascal_voc_dataset(DATASET_ROOT) train_dataset = YoloDataset(train_x, train_y, target_size=model_class.img_size, n_grid=model_class.n_grid, augment=True) test_dataset = YoloDataset(val_x, val_y, target_size=model_class.img_size, n_grid=model_class.n_grid, augment=False) class_weights = [1.0 for i in range(train_dataset.n_classes)] class_weights[0] = 0.1 model = model_class(n_classes=train_dataset.n_classes, n_base_units=3, class_weights=class_weights) if os.path.exists(RESULT_DIR + '/model_last.npz'): print('continue from previous result') chainer.serializers.load_npz(RESULT_DIR + '/model_last.npz', model) elif os.path.exists(RESULT_DIR + '/best_loss.npz'): print('continue from previous result') chainer.serializers.load_npz(RESULT_DIR + '/best_loss.npz', model) optimizer = Adam() optimizer.setup(model) train_iter = SerialIterator(train_dataset, batch_size=BATCH_SIZE) test_iter = SerialIterator(test_dataset, batch_size=BATCH_SIZE, shuffle=False, repeat=False) updater = StandardUpdater(train_iter, optimizer, device=DEVICE) trainer = Trainer(updater, (N_EPOCHS, 'epoch'), out=RESULT_DIR) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.Evaluator(test_iter, model, device=DEVICE)) trainer.extend( extensions.PrintReport([ 'main/loss', 'validation/main/loss', 'main/cl_loss', 'validation/main/cl_loss', 'main/cl_acc', 'validation/main/cl_acc', 'main/pos_loss', 'validation/main/pos_loss', ])) trainer.extend(extensions.snapshot_object(model, 'best_loss.npz'), trigger=triggers.MinValueTrigger('validation/main/loss')) trainer.extend(extensions.snapshot_object(model, 'best_classification.npz'), trigger=triggers.MaxValueTrigger('validation/main/cl_acc')) trainer.extend( extensions.snapshot_object(model, 'best_position.npz'), trigger=triggers.MinValueTrigger('validation/main/pos_loss')) trainer.run() chainer.serializers.save_npz(RESULT_DIR + '/model_last.npz', model.to_cpu())
def main(): print("chainer cudnn enabled: {}".format(chainer.cuda.cudnn_enabled)) parser = argparse.ArgumentParser( description='Action Unit R-CNN training example:') parser.add_argument('--pid', '-pp', default='/tmp/AU_R_CNN/') parser.add_argument('--gpu', '-g', default="0", help='GPU ID, multiple GPU split by comma, \ ' 'Note that BPTT updater do not support multi-GPU') parser.add_argument('--lr', '-l', type=float, default=0.001) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--database', default='BP4D', help='Output directory: BP4D/DISFA/BP4D_DISFA') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--iteration', '-i', type=int, default=70000) parser.add_argument('--epoch', '-e', type=int, default=20) parser.add_argument('--batch_size', '-bs', type=int, default=20) parser.add_argument('--snapshot', '-snap', type=int, default=1000) parser.add_argument('--need_validate', action='store_true', help='do or not validate during training') parser.add_argument('--mean', default=config.ROOT_PATH+"BP4D/idx/mean_rgb.npy", help='image mean .npy file') parser.add_argument('--feature_model', default="resnet101", help="vgg16/vgg19/resnet101 for train") parser.add_argument('--extract_len', type=int, default=1000) parser.add_argument('--optimizer', default='RMSprop', help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta') parser.add_argument('--pretrained_model', default='resnet101', help='imagenet/vggface/resnet101/*.npz') parser.add_argument('--pretrained_model_args', nargs='+', type=float, help='you can pass in "1.0 224" or "0.75 224"') parser.add_argument('--use_memcached', action='store_true', help='whether use memcached to boost speed of fetch crop&mask') # parser.add_argument('--memcached_host', default='127.0.0.1') parser.add_argument("--fold", '-fd', type=int, default=3) parser.add_argument("--split_idx",'-sp', type=int, default=1) parser.add_argument("--snap_individual", action="store_true", help="whether to snapshot each individual epoch/iteration") parser.add_argument("--proc_num", "-proc", type=int, default=1) parser.add_argument("--use_sigmoid_cross_entropy", "-sigmoid", action="store_true", help="whether to use sigmoid cross entropy or softmax cross entropy") parser.add_argument("--is_pretrained", action="store_true", help="whether is to pretrain BP4D later will for DISFA dataset or not") parser.add_argument("--pretrained_target", '-pt', default="", help="whether pretrain label set will use DISFA or not") parser.add_argument("--fix", '-fix', action="store_true", help="whether to fix first few conv layers or not") parser.add_argument('--occlude', default='', help='whether to use occlude face of upper/left/right/lower/none to test') parser.add_argument("--prefix", '-prefix', default="", help="_beta, for example 3_fold_beta") parser.add_argument('--eval_mode', action='store_true', help='Use test datasets for evaluation metric') parser.add_argument("--img_resolution", type=int, default=512) parser.add_argument("--FERA", action='store_true', help='whether to use FERA data split train and validate') parser.add_argument('--FPN', action="store_true", help="whether to use feature pyramid network for training and prediction") parser.add_argument('--fake_box', action="store_true", help="whether to use fake average box coordinate to predict") parser.add_argument('--roi_align', action="store_true", help="whether to use roi_align or roi_pooling") parser.add_argument("--train_test", default="trainval", type=str) parser.add_argument("--trail_times", default=20, type=int) parser.add_argument("--each_trail_iteration", default=1000, type=int) args = parser.parse_args() if not os.path.exists(args.pid): os.makedirs(args.pid) pid = str(os.getpid()) pid_file_path = args.pid + os.sep + "{0}_{1}_fold_{2}.pid".format(args.database, args.fold, args.split_idx) # with open(pid_file_path, "w") as file_obj: # file_obj.write(pid) # file_obj.flush() config.IMG_SIZE = (args.img_resolution, args.img_resolution) print('GPU: {}'.format(args.gpu)) if args.is_pretrained: adaptive_AU_database(args.pretrained_target) else: adaptive_AU_database(args.database) np.random.seed(args.seed) # 需要先构造一个list的txt文件:id_trainval_0.txt, 每一行是subject + "/" + emotion_seq + "/" frame mc_manager = None if args.use_memcached: from collections_toolkit.memcached_manager import PyLibmcManager mc_manager = PyLibmcManager(args.memcached_host) if mc_manager is None: raise IOError("no memcached found listen in {}".format(args.memcached_host)) if args.feature_model == 'vgg19': faster_rcnn = ROI_NetsVGG19(pretrained_model=args.pretrained_model, mean_file=args.mean, min_size=args.img_resolution, max_size=args.img_resolution) elif args.feature_model == 'resnet101': faster_rcnn = FasterRCNNResnet101(n_fg_class=len(config.AU_SQUEEZE), pretrained_model=args.pretrained_model, mean_file=args.mean, min_size=args.img_resolution,max_size=args.img_resolution, extract_len=args.extract_len) # 可改为/home/nco/face_expr/result/snapshot_model.npz batch_size = args.batch_size with chainer.no_backprop_mode(), chainer.using_config("train",False): test_data = AUDataset(database=args.database, fold=args.fold, img_resolution=args.img_resolution, split_name=args.train_test, split_index=args.split_idx, mc_manager=mc_manager, train_all_data=False, prefix=args.prefix, pretrained_target=args.pretrained_target, is_FERA=args.FERA) test_data = TransformDataset(test_data, Transform(faster_rcnn, mirror=False)) if args.proc_num == 1: test_iter = SerialIterator(test_data, args.batch_size, repeat=False, shuffle=True) else: test_iter = MultiprocessIterator(test_data, batch_size=args.batch_size, n_processes=args.proc_num, repeat=False, shuffle=True, n_prefetch=10, shared_mem=10000000) gpu = int(args.gpu) if "," not in args.gpu else int(args.gpu[:args.gpu.index(",")]) chainer.cuda.get_device_from_id(gpu).use() faster_rcnn.to_gpu(gpu) evaluator = SpeedEvaluator(test_iter, faster_rcnn, lambda batch, device: concat_examples_not_none(batch, device, padding=-99), device=gpu, trail_times=args.trail_times, each_trail_iteration=args.each_trail_iteration, database=args.database) observation = evaluator.evaluate() with open(args.out + os.path.sep + "evaluation_speed_test.json", "w") as file_obj: file_obj.write(json.dumps(observation, indent=4, separators=(',', ': '))) file_obj.flush()
def main(): # Parse the arguments. args = parse_arguments() if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): return numpy.asarray(label_list, dtype=numpy.float32) # Apply a preprocessor to the dataset. print('Preprocessing dataset...') preprocessor = preprocess_method_dict[args.method]() parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_col='SMILES') dataset = parser.parse(args.datafile)['dataset'] # Scale the label values, if necessary. if args.scale == 'standardize': scaler = StandardScaler() labels = scaler.fit_transform(dataset.get_datasets()[-1]) dataset = NumpyTupleDataset(*(dataset.get_datasets()[:-1] + (labels, ))) else: scaler = None # Split the dataset into training and validation. train_data_size = int(len(dataset) * args.train_data_ratio) train, _ = split_dataset_random(dataset, train_data_size, args.seed) # Set up the predictor. predictor = set_up_predictor(args.method, args.unit_num, args.conv_layers, class_num) # Set up the iterator. train_iter = SerialIterator(train, args.batchsize) # Set up the regressor. metrics_fun = { 'mean_abs_error': MeanAbsError(scaler=scaler), 'root_mean_sqr_error': RootMeanSqrError(scaler=scaler) } regressor = Regressor(predictor, lossfun=F.mean_squared_error, metrics_fun=metrics_fun, device=args.gpu) # Set up the optimizer. optimizer = optimizers.Adam() optimizer.setup(regressor) # Set up the updater. updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) # Set up the trainer. print('Training...') trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/mean_abs_error', 'main/root_mean_sqr_error', 'elapsed_time' ])) trainer.extend(E.ProgressBar()) trainer.run() # Save the regressor's parameters. model_path = os.path.join(args.out, args.model_filename) print('Saving the trained model to {}...'.format(model_path)) regressor.save_pickle(model_path, protocol=args.protocol) # Save the standard scaler's parameters. if scaler is not None: with open(os.path.join(args.out, 'scaler.pkl'), mode='wb') as f: pickle.dump(scaler, f, protocol=args.protocol)
def main(args): random.seed(0) np.random.seed(0) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() cuda.cupy.random.seed(0) dataset, id2ene = load_dataset(args.dataset, args.features, args.redirects) print(f'# of examples in dataset: {len(dataset)}') def batch2tensors(batch, device): xp = cuda.cupy if device >= 0 else np xf = xp.zeros((len(batch), args.n_feature), dtype='f') xe = xp.zeros((len(batch), args.embed_size), dtype='f') t = xp.zeros((len(batch), len(id2ene)), dtype='i') for i, item in enumerate(batch): for feature_id in item['feature_ids']: if feature_id < args.n_feature: xf[i, feature_id] = 1.0 if item['embedding']: xe[i] = xp.array(item['embedding'], dtype='f') for ene_id in item['ene_ids']: t[i, ene_id] = 1 x = xp.concatenate((xf, xe), axis=1) return x, t cv_datasets = get_cross_validation_datasets(dataset, args.cv) ys = [] ts = [] for split_idx, cv_dataset in enumerate(cv_datasets): print(f'cross validation ({split_idx + 1}/{len(cv_datasets)})') train, test = cv_dataset train_iter = SerialIterator(train, batch_size=args.batch) test_iter = SerialIterator(test, batch_size=args.batch, repeat=False, shuffle=False) model = ENEClassifier(in_size=args.n_feature + args.embed_size, hidden_size=args.hidden_size, out_size=len(id2ene)) if args.gpu >= 0: model.to_gpu(args.gpu) optimizer = optimizers.Adam() optimizer.setup(model) updater = StandardUpdater(train_iter, optimizer, converter=batch2tensors, device=args.gpu) trainer = Trainer(updater, (args.epoch, 'epoch'), out=args.out_dir) trainer.extend(extensions.LogReport()) trainer.extend( extensions.snapshot_object( model, filename='epoch_{.updater.epoch}.model')) trainer.extend( extensions.Evaluator(test_iter, model, converter=batch2tensors, device=args.gpu)) trainer.extend( extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time'])) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.run() test_iter.reset() for batch in test_iter: x, t = batch2tensors(batch, device=args.gpu) with chainer.using_config('train', False): y = model.predict(x) ys.append(y) ts.append(t) y_all = F.concat(ys, axis=0) t_all = F.concat(ts, axis=0) prediction_matrix = (y_all.data >= 0.5).astype('f') reference_matrix = (t_all.data == 1).astype('f') accuracy_matrix = prediction_matrix * reference_matrix eb_pred = prediction_matrix.sum( axis=1) # entity-based num. of predicted classes eb_ref = reference_matrix.sum( axis=1) # entity-based num. of reference classes eb_acc = accuracy_matrix.sum( axis=1) # entity-based num. of accurate classes eb_nopred = (eb_pred == 0.).astype('f') # for avoiding zero-division eb_precision = (eb_acc / (eb_pred + eb_nopred)).mean() eb_recall = (eb_acc / eb_ref).mean() eb_f1 = (2 * eb_acc / (eb_pred + eb_ref)).mean() cb_pred = prediction_matrix.sum( axis=0) # class-based num. of predicted examples cb_ref = reference_matrix.sum( axis=0) # class-based num. of reference examples cb_acc = accuracy_matrix.sum( axis=0) # class-based num. of accurate examples cb_nopred = (cb_pred == 0.).astype('f') # for avoiding zero-division cb_macro_precision = (cb_acc / (cb_pred + cb_nopred)).mean() cb_macro_recall = (cb_acc / cb_ref).mean() cb_macro_f1 = (2 * cb_acc / (cb_pred + cb_ref)).mean() cb_micro_precision = cb_acc.sum() / cb_pred.sum() cb_micro_recall = cb_acc.sum() / cb_ref.sum() cb_micro_f1 = (2 * cb_acc.sum()) / (cb_pred.sum() + cb_ref.sum()) print(f'Entity-based Precision: {float(eb_precision):.2%}') print(f'Entity-based Recall: {float(eb_recall):.2%}') print(f'Entity-based F1 score: {float(eb_f1):.2%}') print(f'Class-based macro Precision: {float(cb_macro_precision):.2%}') print(f'Class-based macro Recall: {float(cb_macro_recall):.2%}') print(f'Class-based macro F1 score: {float(cb_macro_f1):.2%}') print(f'Class-based micro Precision: {float(cb_micro_precision):.2%}') print(f'Class-based micro Recall: {float(cb_micro_recall):.2%}') print(f'Class-based micro F1 score: {float(cb_micro_f1):.2%}') print(f'writing out classification results') with open(Path(args.out_dir) / 'classification_result.json', 'w') as fo: for i, item in tqdm(enumerate(dataset)): title = item['title'] predicted_classes = [ id2ene[j] for j, v in enumerate(prediction_matrix[i]) if v == 1.0 ] reference_classes = [ id2ene[j] for j, v in enumerate(reference_matrix[i]) if v == 1.0 ] out = { 'title': title, 'prediction': predicted_classes, 'reference': reference_classes } print(json.dumps(out, ensure_ascii=False), file=fo)
def main(): # Parse the arguments. args = parse_arguments() args.out = os.path.join(args.out, args.method) save_args(args, args.out) if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label_float(label_list): return numpy.asarray(label_list, dtype=numpy.float32) def postprocess_label_int(label_list): return numpy.asarray(label_list, dtype=numpy.int64) # Apply a preprocessor to the dataset. if args.train: ## training data fn,ext = os.path.splitext(args.train) if ext==".npz": print('Loading training dataset...') train = NumpyTupleDataset.load(args.train) else: print('Preprocessing training dataset...') preprocessor = preprocess_method_dict[args.method]() if args.classification: parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_int,labels=labels, smiles_col='SMILES') else: parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_float,labels=labels, smiles_col='SMILES') train = parser.parse(args.train)['dataset'] NumpyTupleDataset.save(os.path.join(args.out,os.path.split(fn)[1]), train) # Scale the label values, if necessary. if args.scale == 'standardize': scaler = StandardScaler() scaler.fit(train.get_datasets()[-1]) else: scaler = None ## test data fn,ext = os.path.splitext(args.val) if ext==".npz": print('Loading test dataset...') test = NumpyTupleDataset.load(args.val) else: print('Preprocessing test dataset...') preprocessor = preprocess_method_dict[args.method]() if args.classification: parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_int,labels=labels, smiles_col='SMILES') else: parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_float,labels=labels, smiles_col='SMILES') test = parser.parse(args.val)['dataset'] NumpyTupleDataset.save(os.path.join(args.out,os.path.split(fn)[1]), test) # Set up the model. device = chainer.get_device(args.device) converter = converter_method_dict[args.method] metrics_fun = {'mae': F.mean_absolute_error, 'rmse': rmse} if args.classification: if args.load_model: model = Classifier.load_pickle(args.load_model, device=device) print("model file loaded: ",args.load_model) else: predictor = set_up_predictor(args.method, args.unit_num, args.conv_layers, class_num) model = Classifier(predictor, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy, device=device) else: if args.load_model: model = Regressor.load_pickle(args.load_model, device=device) print("model file loaded: ",args.load_model) else: predictor = set_up_predictor( args.method+args.method_suffix, args.unit_num, args.conv_layers, class_num, label_scaler=scaler) model = Regressor(predictor, lossfun=F.mean_squared_error, metrics_fun=metrics_fun, device=device) if args.train: if args.balanced_iter: train = BalancedSerialIterator(train, args.batchsize, train.features[:, -1], ignore_labels=-1) train.show_label_stats() print('Training...') log_keys = ['main/mae','main/rmse','validation/main/mae','validation/main/rmse','validation/main/roc_auc'] extensions_list = [extensions.PlotReport(log_keys, 'iteration', trigger=(100, 'iteration'), file_name='loss.png')] if args.eval_roc and args.classification: extensions_list.append(ROCAUCEvaluator( test, model, eval_func=predictor, device=device, converter=converter, name='validation', pos_labels=1, ignore_labels=-1, raise_value_error=False)) save_json(os.path.join(args.out, 'args.json'), vars(args)) run_train(model, train, valid=test, batch_size=args.batchsize, epoch=args.epoch, out=args.out, extensions_list=extensions_list, device=device, converter=converter) #, resume_path=args.resume) # Save the model's parameters. model_path = os.path.join(args.out, args.model_filename) print('Saving the trained model to {}...'.format(model_path)) if hasattr(model.predictor.graph_conv, 'reset_state'): model.predictor.graph_conv.reset_state() model.save_pickle(model_path, protocol=args.protocol) ## prediction it = SerialIterator(test, args.batchsize, repeat=False, shuffle=False) result = [] for batch in it: in_arrays = convert._call_converter(converter, batch, device) with chainer.using_config('train', False), chainer.function.no_backprop_mode(): if isinstance(in_arrays, tuple): res = model(*in_arrays) elif isinstance(in_arrays, dict): res = model(**in_arrays) else: res = model(in_arrays) result.extend(model.y.array.get()) numpy.savetxt(os.path.join(args.out,"result.csv"), numpy.array(result)) eval_result = Evaluator(it, model, converter=converter,device=device)() print('Evaluation result: ', eval_result)
def _forward(self, data, fn, batchsize=16, converter=concat_examples, retain_inputs=False, preprocess_fn=None, postprocess_fn=None): """Forward data by iterating with batch Args: data: "train_x array" or "chainer dataset" fn (Callable): Main function to forward. Its input argument is either Variable, cupy.ndarray or numpy.ndarray, and returns Variable. batchsize (int): batch size converter (Callable): convert from `data` to `inputs` retain_inputs (bool): If True, this instance keeps inputs in `self.inputs` or not. preprocess_fn (Callable): Its input is numpy.ndarray or cupy.ndarray, it can return either Variable, cupy.ndarray or numpy.ndarray postprocess_fn (Callable): Its input argument is Variable, but this method may return either Variable, cupy.ndarray or numpy.ndarray. Returns (tuple or numpy.ndarray): forward result """ input_list = None output_list = None it = SerialIterator(data, batch_size=batchsize, repeat=False, shuffle=False) for batch in it: inputs = converter(batch, self._dev_id) inputs = _to_tuple(inputs) if preprocess_fn: inputs = preprocess_fn(*inputs) inputs = _to_tuple(inputs) outputs = fn(*inputs) outputs = _to_tuple(outputs) # Init if retain_inputs: if input_list is None: input_list = [[] for _ in range(len(inputs))] for j, input in enumerate(inputs): input_list[j].append(cuda.to_cpu(input)) if output_list is None: output_list = [[] for _ in range(len(outputs))] if postprocess_fn: outputs = postprocess_fn(*outputs) outputs = _to_tuple(outputs) for j, output in enumerate(outputs): output_list[j].append(_extract_numpy(output)) if retain_inputs: self.inputs = [ numpy.concatenate(in_array) for in_array in input_list ] result = [numpy.concatenate(output) for output in output_list] if len(result) == 1: return result[0] else: return result
def main(): # Parse the arguments. args = parse_arguments() theme_name = t_theme_name.get() args.model_folder_name = os.path.join(theme_name, 'chainer') #args.epoch = int(float(t_epochs.get())) args.out = parent_path / 'models' / theme_name / method_name args.method = method_name if args.label: labels = args.label else: raise ValueError('No target label was specified.') # Dataset preparation. def postprocess_label(label_list): return numpy.asarray(label_list, dtype=numpy.float32) smiles_col_name = t_smiles.get() print('Preprocessing dataset...') preprocessor = preprocess_method_dict[args.method]() parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_col=t_smiles.get()) #args.datafile=parent_path / 'results' / theme_name / method_name / high_low /'brics_virtual' / 'virtual.csv' args.datafile = csv_path dataset = parser.parse(args.datafile)['dataset'] @chainer.dataset.converter() def extract_inputs(batch, device=None): return concat_mols(batch, device=device)[:-1] print('Predicting the virtual library') # Set up the regressor. device = chainer.get_device(args.device) model_path = os.path.join(args.out, args.model_foldername, args.model_filename) with open( parent_path / 'models' / theme_name / method_name / high_low / ('regressor.pickle'), 'rb') as f: regressor = cloudpickle.loads(f.read()) # Perform the prediction. print('Evaluating...') converter = converter_method_dict[args.method] data_iterator = SerialIterator(dataset, 16, repeat=False, shuffle=False) eval_result = Evaluator(data_iterator, regressor, converter=converter, device=device)() print('Evaluation result: ', eval_result) predict_ = regressor.predict(dataset, converter=extract_inputs) predict_ = [i[0] for i in predict_] df_data = pd.read_csv(csv_path) df_predict = df_data df_predict[t_task.get()] = predict_ df_predict = df_predict.dropna() PandasTools.AddMoleculeColumnToFrame(frame=df_predict, smilesCol=t_smiles.get()) df_predict['sascore'] = df_predict.ROMol.map(sascorer.calculateScore) df_predict.to_csv(csv_path) png_generator = (parent_path / 'results' / theme_name / method_name / high_low / data_name / 'molecular-structure').glob('*.png') #png_generator.sort() for i, png_path in enumerate(png_generator): #print((png_path.name)[4:10]) i = int((png_path.name)[4:10]) if i < len(df_predict[t_task.get()]): img = Image.open(png_path) draw = ImageDraw.Draw(img) font = ImageFont.truetype('arial.ttf', 26) draw.text((0, 0), t_task.get() + ' : ' + str(round(df_predict[t_task.get()][i], 2)), (0, 0, 0), font=font) draw.text( (0, 30), 'sascore : ' + str(round(df_predict['sascore'][i], 2)), (0, 0, 0), font=font) img.save(png_path) save_json(os.path.join(args.out, 'eval_result.json'), eval_result)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--pretrained', type=str, help='path to model that has trained classifier but has not been trained through GAIN routine', default='classifier_padding_1_model_594832') parser.add_argument('--trained', type=str, help='path to model trained through GAIN', default='result/MYGAIN_5_to_1_padding_1_all_update_model_20000') parser.add_argument('--device', type=int, default=0, help='gpu id') parser.add_argument('--shuffle', type=bool, default=False, help='whether to shuffle dataset') parser.add_argument('--whole', type=bool, default=False, help='whether to test for the whole validation dataset') parser.add_argument('--no', type=int, default=50, help='if not whole, then no of images to visualize') parser.add_argument('--name', type=str, default='viz1', help='name of the subfolder or experiment under which to save') args = parser.parse_args() # pretrained_file = args.pretrained trained_file = args.trained device = args.device shuffle = args.shuffle whole = args.whole name = args.name N = args.no dataset = MyTrainingDataset(split='val') iterator = SerialIterator(dataset, 1, shuffle=shuffle, repeat=False) converter = chainer.dataset.concat_examples os.makedirs('viz/' + name, exist_ok=True) no_of_classes = 20 device = 0 pretrained = FCN8s_hand() trained = FCN8s_hand() # load_npz(pretrained_file, pretrained) load_npz(trained_file, trained) if device >= 0: pretrained.to_gpu() trained.to_gpu() i = 0 while not iterator.is_new_epoch: if not whole and i >= N: break image, labels, metadata = converter(iterator.next()) np_input_img = image np_input_img = np.uint8(np_input_img[0]) np_input_img = np.transpose(np_input_img, (1,2,0)) image = Variable(image) if device >= 0: image.to_gpu() xp = get_array_module(image.data) to_substract = np.array((-1, 0)) noise_classes = np.unique(labels[0]).astype(np.int32) target = xp.asarray([[0] * (no_of_classes)]) gt_labels = np.setdiff1d(noise_classes, to_substract) - 1 # gcam1, cl_scores1, class_id1 = pretrained.stream_cl(image) # gcam2, cl_scores2, class_id2 = trained.stream_cl(image) # gcams1, cl_scores1, class_ids1 = pretrained.stream_cl_multi(image) gcams2, cl_scores2, class_ids2 = trained.stream_cl_multi(image) print(np_input_img.shape) bboxes_per_class, pointed_bbox = gcams_to_bboxes(gcams2, class_ids2, input_image=np_input_img) # for bboxes in bboxes_per_class: # for bbox in bboxes: # cv2.rectangle(np_input_img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), [255,255,255], 2) display_img = cv2.cvtColor(np_input_img.copy(), cv2.COLOR_RGB2BGR) # if there's a hand and a pointed obj, draw rects if len(class_ids2) >= 2 and class_ids2[-1] == 20: cv2.rectangle(display_img, (int(pointed_bbox[0]), int(pointed_bbox[1])), (int(pointed_bbox[2]), int(pointed_bbox[3])), [255, 255, 255], 2) # redraw hand bounding box with different color for bbox in bboxes_per_class[-1]: cv2.rectangle(display_img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), [0,255,0], 2) cv2.imshow('input img', display_img) cv2.waitKey(0) if device > -0: class_id = cp.asnumpy(class_id) # fig1 = plt.figure(figsize=(20, 10)) # ax1 = plt.subplot2grid((3, 9), (0, 0), colspan=3, rowspan=3) # ax1.axis('off') # ax1.imshow(cp.asnumpy(F.transpose(F.squeeze(image, 0), (1, 2, 0)).data) / 255.) # # ax2 = plt.subplot2grid((3, 9), (0, 3), colspan=3, rowspan=3) # ax2.axis('off') # ax2.imshow(cp.asnumpy(F.transpose(F.squeeze(image, 0), (1, 2, 0)).data) / 255.) # ax2.imshow(cp.asnumpy(F.squeeze(gcam1[0], 0).data), cmap='jet', alpha=.5) # ax2.set_title("Before GAIN for class - " + str(dataset.class_names[cp.asnumpy(class_id1)+1]), # color='teal') # # ax3 = plt.subplot2grid((3, 9), (0, 6), colspan=3, rowspan=3) # ax3.axis('off') # ax3.imshow(cp.asnumpy(F.transpose(F.squeeze(image, 0), (1, 2, 0)).data) / 255.) # ax3.imshow(cp.asnumpy(F.squeeze(gcam2[0], 0).data), cmap='jet', alpha=.5) # ax3.set_title("After GAIN for class - " + str(dataset.class_names[cp.asnumpy(class_id2)+1]), # color='teal') # fig1.savefig('viz/' + name + '/' + str(i) + '.png') # plt.close() print(i) i += 1