def prepare_multi_modal_data(files_path, task: Task, images_size=(128, 128), with_split=True): path = os.path.join(str(fedot_project_root()), files_path) unpack_archived_data(path) data = InputData.from_json_files(path, fields_to_use=['votes', 'year'], label='rating', task=task) class_labels = np.asarray([0 if t <= 7 else 1 for t in data.target]) data.target = class_labels ratio = 0.5 img_files_path = f'{files_path}/*.jpeg' img_path = os.path.join(str(fedot_project_root()), img_files_path) data_img = InputData.from_image(images=img_path, labels=class_labels, task=task, target_size=images_size) data_text = InputData.from_json_files(path, fields_to_use=['plot'], label='rating', task=task, data_type=DataTypesEnum.text) data_text.target = class_labels if with_split: train_num, test_num = train_test_data_setup(data, shuffle_flag=False, split_ratio=ratio) train_img, test_img = train_test_data_setup(data_img, shuffle_flag=False, split_ratio=ratio) train_text, test_text = train_test_data_setup(data_text, shuffle_flag=False, split_ratio=ratio) else: train_num, test_num = data, data train_img, test_img = data_img, data_img train_text, test_text = data_text, data_text return train_num, test_num, train_img, test_img, train_text, test_text
def run_image_classification_problem(train_dataset: tuple, test_dataset: tuple, composite_flag: bool = True): task = Task(TaskTypesEnum.classification) x_train, y_train = train_dataset[0], train_dataset[1] x_test, y_test = test_dataset[0], test_dataset[1] dataset_to_train = InputData.from_image(images=x_train, labels=y_train, task=task) dataset_to_validate = InputData.from_image(images=x_test, labels=y_test, task=task) chain = get_composite_chain(composite_flag) chain.fit(input_data=dataset_to_train) predictions = chain.predict(dataset_to_validate) roc_auc_on_valid = calculate_validation_metric(predictions, dataset_to_validate) print(f'ROCAUC: {roc_auc_on_valid}') return roc_auc_on_valid, dataset_to_train, dataset_to_validate