def process(self, dataset_obj: CRAFTDataset): self.__dataset_obj = dataset_obj original_questions = [] instance_ids = sorted( list(dataset_obj.video_index_to_questions_map.keys())) for i, instance_id in enumerate(instance_ids): sid = int(dataset_obj.video_index_to_questions_map[instance_id][0] ["simulation_id"]) original_qa_json = FileIO.read_json( self.__dataset_obj.get_original_questions_path( sid, instance_id)) for qa in original_qa_json["questions"]: original_questions.append( dataset_obj.get_question_from_question_obj(qa, sid)) logger.info(f"Processed: {instance_id}/{len(instance_ids)}") os.makedirs(self.output_folder_path, exist_ok=True) FileIO.write_json(original_questions, f"{self.output_folder_path}/dataset_minimal.json") self.__dataset_obj = CRAFTDataset(self.output_folder_path, self.__dataset_obj.metadata)
def process(self, dataset_obj: CRAFTDataset): logger.info("Initiating dataset balancing stage...") dataset_obj.generate_statistics( output_folder=f"{dataset_obj.dataset_folder_path}/stats/imbalanced" ) logger.info( f"Performing various under-sampling operations on dataset...") balanced_dataset_output_path = f"{dataset_obj.dataset_folder_path}/balanced_dataset.json" DatasetUnderSampler(dataset_obj, balanced_dataset_output_path) \ .balance_answers_within_each_template_and_simulation_ids(self.purge) \ .dump() logger.info(f"Copying imbalanced dataset to its file") FileIO.copy( f"{dataset_obj.dataset_folder_path}/dataset_minimal.json", f"{dataset_obj.dataset_folder_path}/imbalanced_dataset.json") logger.info(f"Copying balanced dataset to original file") FileIO.copy(f"{dataset_obj.dataset_folder_path}/balanced_dataset.json", f"{dataset_obj.dataset_folder_path}/dataset_minimal.json") self.balanced_dataset = CRAFTDataset(dataset_obj.dataset_folder_path, dataset_obj.metadata) self.balanced_dataset.generate_statistics( output_folder=f"{dataset_obj.dataset_folder_path}/stats/balanced")
def process(self, dataset_obj: CRAFTDataset): self.__dataset_obj = dataset_obj videos_with_no_questions = [] ground = list(range(0, 10000)) for idx in ground: if idx not in dataset_obj.video_index_to_questions_map: videos_with_no_questions.append(idx) with open( f"{dataset_obj.dataset_folder_path}/videos_with_no_questions.json", "w") as vwnq_file: json.dump(videos_with_no_questions, vwnq_file) os.makedirs( f"{dataset_obj.dataset_folder_path}/videos_with_no_questions", exist_ok=True) # Move videos without questions to a separate folder. for idx in videos_with_no_questions: vid_path = glob.glob( f"{dataset_obj.dataset_folder_path}/videos/**/{idx:06d}.mpg" )[0] dest = vid_path.replace("videos", "videos_with_no_questions") os.makedirs(Path(dest).parent.as_posix(), exist_ok=True) FileIO.move(vid_path, dest)
def object_segmentation(video_index: int): old_snapshots_folder = str( Path(f"./{video_index}_snapshots/").absolute().as_posix()) new_snapshots_folder = str( Path(f"./{video_index}_new_snapshots/").absolute().as_posix()) new_controllers_folder = str( Path(f"./{video_index}_new_controllers/").absolute().as_posix()) new_screenshots_folder = str( Path(f"./{video_index}_screenshots/").absolute().as_posix()) new_screenshots_folder_dynamic = str( Path(f"./{video_index}_screenshots/dynamics_ss").absolute().as_posix()) os.makedirs(old_snapshots_folder, exist_ok=True) os.makedirs(new_snapshots_folder, exist_ok=True) os.makedirs(new_controllers_folder, exist_ok=True) os.makedirs(new_screenshots_folder, exist_ok=True) os.makedirs(new_screenshots_folder_dynamic, exist_ok=True) input_scene_path = str( Path(f"./run/{video_index:06}.json").absolute().as_posix()) exec_path = Path( "../../../simulation/2d/SVQA-Box2D/Build/bin/x86_64/Release/Testbed" ).absolute().as_posix() working_directory = Path( "../../../simulation/2d/SVQA-Box2D/Testbed").absolute().as_posix() input_scene_json = FileIO.read_json(input_scene_path) video_fn = input_scene_json["original_video_output"][ "video_filename"] if "original_video_output" in input_scene_json else input_scene_json[ "video_filename"] simulation_id = int(video_fn.split("/")[-2].split("_")[-1]) cj = json.loads(f"""{{ "simulationID": {simulation_id}, "offline": false, "outputVideoPath": "output.mpg", "outputJSONPath": "output.json", "width": 256, "height": 256, "inputScenePath": "{input_scene_path}", "snapshotOutputFolder": "{old_snapshots_folder}/", "stepCount": 600 }}""") os.makedirs(Path("./run/controllers/").absolute().as_posix(), exist_ok=True) controller_path = Path(f"./run/controllers/{video_index}_controller.json" ).absolute().as_posix() FileIO.write_json(cj, controller_path) run_simulation(exec_path, controller_path, working_dir=working_directory) produce_snapshots_and_controllers(old_snapshots_folder, new_snapshots_folder, new_controllers_folder, exec_path, new_screenshots_folder) combine(new_screenshots_folder)
def generate_random_parts(nparts: int): split_info_random = FileIO.read_json( f"{dataset.dataset_folder_path}/split_info_random.json") split_info_hard = FileIO.read_json( f"{dataset.dataset_folder_path}/split_info_hard.json") test_questions = [] split_setting = {} for pair in split_info_random["test"]: video_index = pair["video_index"] question_index = pair["question_index"] for question in dataset.video_index_to_questions_map[video_index]: if question["question_index"] == question_index: test_questions.append(question) split_setting[f"{video_index}-{question_index}"] = "random" continue for pair in split_info_hard["test"]: video_index = pair["video_index"] question_index = pair["question_index"] for question in dataset.video_index_to_questions_map[video_index]: if question["question_index"] == question_index: test_questions.append(question) split_setting[f"{video_index}-{question_index}"] = "hard" continue human_test_dataset = CRAFTDataset(dataset_folder_path, metadata, load_immediately=False) human_test_dataset.questions = test_questions human_test_dataset.prepare_auxiliaries() human_test_dataset.build_sid_vi_q_map() visited = set() parts = [] for i in range(nparts): vis = random_simulation_select(i + 3123, human_test_dataset.sid_vi_q_map, visited) parts.append(vis) chosen_qs = [] for i in range(len(parts)): part = parts[i] for vi in part: qlist = human_test_dataset.video_index_to_questions_map[vi] for q in qlist: q["part"] = i + 1 chosen_qs.extend(qlist) return parts, chosen_qs, split_setting, test_questions
def get_q(path): a = FileIO.read_json(path) q = [] for i in a: for j in a[i]: q.append(j) return q
def process(self, config: DatasetGenerationConfig): logger.info("Initiating dataset generation process...") dataset_generator = DatasetGenerator(config) dataset_generator.execute() dataset_folder_path = dataset_generator.config.output_folder_path self.__dataset = CRAFTDataset( dataset_folder_path, FileIO.read_json(config.dataset_metadata_file_path))
def main(): dataset_folder_path = "../human_eval/data" metadata = FileIO.read_json("../../svqa/metadata.json") dataset = CRAFTDataset(dataset_folder_path, metadata) #get_videos_by_number_of_question(dataset, "Descriptive", 10) #get_videos_by_number_of_question(dataset, "Prevent", 3) #get_videos_by_number_of_question(dataset, "Counterfactual", 6) #get_videos_by_number_of_question(dataset, "Enable", 3) get_videos_by_number_of_question(dataset, "Cause", 3)
def load_dataset(self): logger.info("Loading dataset...") self.path = self.le_dataset_folder.text() with open(".state", "w") as state_file: state_file.write(self.path) global g_dataset g_dataset = CRAFTDataset(self.path, FileIO.read_json("../svqa/metadata.json")) logger.info(f"Dataset at {self.path} loaded...") self.populate_lists()
def run_simulation_instance(output_json_path, controller_file_path, scene_id: int, id: int): logger.info(f"Running a perturbation of {id:06d}") new_output_json_path = Path( f"./perturbed_outputs/{id:06d}.json").absolute().as_posix() new_output_video_path = Path( f"./perturbed_outputs/{id:06d}.mpg").absolute().as_posix() new_controller_file_path = Path( f"./perturbed_controllers/controller_{scene_id}_{id:06d}.json" ).absolute().as_posix() new_variations_file_path = Path( f"./perturbed_outputs/variations_{scene_id}_{id:06d}.json").absolute( ).as_posix() new_questions_file_path = Path( f"./perturbed_outputs/questions_{scene_id}_{id:06d}.json").absolute( ).as_posix() new_debug_file_path = Path( f"./perturbed_outputs/debug_{scene_id}_{id:06d}.txt").absolute( ).as_posix() controller = FileIO.read_json(controller_file_path) with open(new_controller_file_path, 'w') as controller_file: json.dump(json.loads(f"""{{ "simulationID": {scene_id}, "offline": true, "outputVideoPath": "{new_output_video_path}", "outputJSONPath": "{new_output_json_path}", "width": {controller["width"]}, "height": {controller["height"]}, "inputScenePath": "{output_json_path}", "stepCount": {controller["stepCount"]}, "noiseAmount": {NOISE_AMOUNT} }}"""), controller_file, indent=2) # Executable of the submodule at the branch new-dataset-with-noise must be built before running. exec_path = Path( "../../../simulation/2d/SVQA-Box2D/Build/bin/x86_64/Release/Testbed" ).absolute().as_posix() working_dir = Path( "../../../simulation/2d/SVQA-Box2D/Testbed").absolute().as_posix() runner = SimulationRunner(exec_path, working_directory=working_dir) instance = SimulationInstance(id, new_controller_file_path, new_variations_file_path, new_questions_file_path, runner) instance.run_simulation(debug_output_path=new_debug_file_path) instance.run_variations()
def run_variations(self, controller_json_path: str, variations_output_path: str, debug_output_path: str): final_output_json = {} controller_json = FileIO.read_json(controller_json_path) original_output_path: str = controller_json["outputJSONPath"] original_output: dict = FileIO.read_json(original_output_path) final_output_json["original_video_output"] = original_output variation_outputs = {} controller_paths = self.__create_variations(original_output_path, controller_json, original_output) for c in controller_paths: self.__runner.run_simulation(c[1], debug_output_path) variation_outputs[str(c[0])] = self.__get_variation_output(c[1]) final_output_json["variations_outputs"] = variation_outputs self.__write_enables_prevents(final_output_json) with open(variations_output_path, "w") as f: json.dump(final_output_json, f)
def analyse(filename, perturbation_results_path): data = FileIO.read_json(filename) by_template_id = by(data, "template_id") df = pd.DataFrame(by_template_id) df.to_csv(f"{perturbation_results_path}/perturbation_by_template_id.csv") by_sid = by(data, "simulation_id") df = pd.DataFrame(by_sid) df.to_csv(f"{perturbation_results_path}/perturbation_by_sid.csv") by_video_index = by(data, "video_index") df = pd.DataFrame(by_video_index) df.to_csv(f"{perturbation_results_path}/perturbation_by_video_index.csv")
def regenerate_answers(original_variations_output_file_path, perturbed_variations_output_path, original_questions_path, new_perturbed_qa_file_path, simulation_id, video_index): logger.info(f"Regenerating answers for {video_index:06d}") variations_output = FileIO.read_json(perturbed_variations_output_path) metadata = FileIO.read_json("../../svqa/metadata.json") original_questions = FileIO.read_json(original_questions_path) original_variations_output = FileIO.read_json( original_variations_output_file_path) new_answers = {"questions": []} for qa in original_questions["questions"]: program = qa["program"] scene_structs = original_variations_output["original_video_output"][ "scene_states"] causal_graph = CausalGraph( original_variations_output["original_video_output"] ["causal_graph"]) start_scene_struct = [ scene['scene'] for scene in scene_structs if scene['step'] == 0 ][0] end_scene_struct = [ scene['scene'] for scene in scene_structs if scene['step'] != 0 ][0] scene_structs_array = [start_scene_struct, end_scene_struct] answer = None try: answer = QuestionGeneratorScript.answer_question_offline( variations_output, scene_structs_array, causal_graph, program, metadata) except Exception as e: logger.error(f"Answer could not be generated: {str(e)}") new_qa = copy.deepcopy(qa) new_qa["answer"] = answer new_answers["questions"].append(new_qa) # Because of parallelization, we need to write to file, to not make things more complex with process-safety FileIO.write_json(new_answers, new_perturbed_qa_file_path)
import json from collections import defaultdict from copy import deepcopy from framework.utils import FileIO import pandas as pd from spellchecker import SpellChecker import spacy spell = SpellChecker() nlp = spacy.load('en_core_web_md') questions = FileIO.read_json("../human_eval/questions.json") def getQuestionInfo(questionText: str, video_index: int): for q in questions: if q["question"].strip().lower().replace( " ", "") == questionText.strip().lower().replace( " ", "") and int(q["video_index"]) == video_index: return q class Question: def __init__(self, q_number, q_text): self.q_number = q_number self.q_text = q_text self.u_answers = [] self.actual_answer = ""
# For logging: os.makedirs("./out", exist_ok=True) logger.add( f"out/dataset_generation_{datetime.now().strftime('%m%d%Y_%H%M')}.log", enqueue=True) craft_dataset_generation_pipeline = Pipeline([ DatasetGenerationStage(), PreBalancingPostProcessStage(), BalancingStage(purge_single_answers=True), DatasetSplitStage("hard"), DatasetSplitStage("random"), FullDatasetWriteStage( "dataset.json" ), # Full dataset includes "program"s for each question AnnotationsFileCollector("annotations.json"), CleanupStage(), ]) logger.info("Dataset generation pipeline object initiated") dataset_generation_config_file_path = sys.argv[1] config = DatasetGenerationConfig( FileIO.read_json(dataset_generation_config_file_path)) logger.info("Dataset generation configuration file loaded") craft_dataset_generation_pipeline.feed_first_stage(config) logger.info("Starting execution...") craft_dataset_generation_pipeline.execute_all()
import json import pandas as pd from framework.utils import FileIO import math user_responses_csv = "data/May '21 ML Human Evaluation_July 10, 2021_05.41.csv" questions = FileIO.read_json("data/dataset_minimal.json") def get_q(path): a = FileIO.read_json(path) q = [] for i in a: for j in a[i]: q.append(j) return q parts = [ get_q("data/json_part_1_questions.json"), get_q("data/json_part_2_questions.json"), get_q("data/json_part_3_questions.json"), get_q("data/json_part_4_questions.json"), get_q("data/json_part_5_questions.json") ] #{'Descriptive', 'Prevent', 'Counterfactual', 'Enable', 'Cause'} def get_question_info(question_text: str, video_index: int):
def process(self, dataset_obj: CRAFTDataset): logger.info("Initiating dataset splitting stage...") rnd = Random(self.seed) self.__dataset_obj = dataset_obj splits = defaultdict(list) vi_qi_to_split = {} if self.config == "hard": split_sizes = {"train": 12, "validation": 4, "test": 4} # Similar scene types counterparts = {1: 18, 3: 16, 4: 17} sids = list(range(1, 21)) chosen = {"test": [], "validation": [], "train": []} # Bogo method. The best. I've spend a lot of time until I reached this ultimate conclusion. while True: rnd.shuffle(sids) chosen["train"] = sids[:split_sizes["train"]] chosen["validation"] = sids[ split_sizes["train"]:split_sizes["train"] + split_sizes["validation"]] chosen["test"] = sids[split_sizes["train"] + split_sizes["validation"]:sum(split_sizes .values())] ok = True for split, ss in chosen.items(): for s in ss: if s in counterparts and counterparts[s] in ss: ok = False break if not ok: break if ok: break counts = defaultdict(int) for question in dataset_obj.questions: if int(question["simulation_id"]) in chosen["train"]: counts["train"] += 1 if int(question["simulation_id"]) in chosen["validation"]: counts["validation"] += 1 if int(question["simulation_id"]) in chosen["test"]: counts["test"] += 1 logger.info(f"Splits: {json.dumps(chosen)}") logger.info( f"Number of questions for each split: {json.dumps(dict(counts))}" ) sid_to_split = {} for split, sids in chosen.items(): for sid in sids: sid_to_split[sid] = split for question in dataset_obj.questions: sid = int(question["simulation_id"]) splits[sid_to_split[sid]].append({ "video_index": question["video_index"], "question_index": question["question_index"] }) vi_qi_to_split[( question["video_index"], question["question_index"])] = sid_to_split[sid] elif self.config == "random": video_indices = sorted( self.__dataset_obj.video_index_to_questions_map.keys()) N = len(video_indices) test_count = int(N * 0.2) val_count = int(N * 0.2) train_count = int(N * 0.6) train_count += N - test_count - val_count - train_count rnd.shuffle(video_indices) train = video_indices[:train_count] val = video_indices[train_count:train_count + val_count] test = video_indices[train_count + val_count:N] assert len(train) + len(val) + len(test) == N for video_index in train: questions = self.__dataset_obj.video_index_to_questions_map[ video_index] for question in questions: splits["train"].append({ "video_index": question["video_index"], "question_index": question["question_index"] }) for video_index in val: questions = self.__dataset_obj.video_index_to_questions_map[ video_index] for question in questions: splits["validation"].append({ "video_index": question["video_index"], "question_index": question["question_index"] }) for video_index in test: questions = self.__dataset_obj.video_index_to_questions_map[ video_index] for question in questions: splits["test"].append({ "video_index": question["video_index"], "question_index": question["question_index"] }) split_to_vid = defaultdict(set) for s in splits: for pair in splits[s]: split_to_vid[s].add(pair["video_index"]) print(f"Stats for {self.config}:") for s in split_to_vid: print(s, len(split_to_vid[s])) logger.info( f"Number of questions for each split: {json.dumps({k: len(v) for k, v in splits.items()})}" ) FileIO.write_json( dict(splits), f"{dataset_obj.dataset_folder_path}/split_info_{self.config}.json")
def start_experiment(dataset: CRAFTDataset): logger.info(f"Starting experiment with noise amount %{NOISE_AMOUNT * 100}") os.makedirs("./perturbed_outputs", exist_ok=True) os.makedirs("./perturbed_controllers", exist_ok=True) video_sid_set = set() for question in dataset.questions: video_index = question["video_index"] simulation_id = question["simulation_id"] video_sid_set.add((video_index, simulation_id)) simulation_jobs = [] simulation_args = [] video_sid_set = list(video_sid_set) video_sid_set.sort(key=lambda x: x[0]) # Perturbation of videos original_questions = [] outputs = [] for video_sid in video_sid_set: # Test with only 10 videos for now video_index = video_sid[0] simulation_id = video_sid[1] original_variations_output_file_path = f"{dataset.intermediates_folder_path}/sid_{simulation_id}/{video_index:06d}.json" original_questions_file_path = f"{dataset.intermediates_folder_path}/sid_{simulation_id}/qa_{video_index:06d}.json" old_controller_file_path = f"{dataset.intermediates_folder_path}/sid_{simulation_id}/debug/controller_{video_index:06d}.json" simulation_jobs.append(run_simulation_instance) simulation_args.append([ original_variations_output_file_path, old_controller_file_path, simulation_id, video_index ]) new_variations_output_file_path = f"./perturbed_outputs/variations_{simulation_id}_{video_index:06d}.json" outputs.append( (video_index, simulation_id, new_variations_output_file_path, original_questions_file_path, original_variations_output_file_path)) original_questions.extend(dataset.get_questions_for_video(video_index)) logger.info(f"{len(simulation_jobs)} simulations will be perturbed") parallel_worker = ParallelWorker(simulation_jobs, simulation_args, 4) parallel_worker.execute_all() question_ask_jobs = [] question_ask_args = [] # Regenerate answers for perturbed simulations qa_outputs = [] for output in outputs: video_index = output[0] simulation_id = output[1] new_variations_output_file_path = output[2] original_questions_file_path = output[3] original_variations_output_file_path = output[4] new_perturbed_qa_file_path = f"./perturbed_outputs/qa_{video_index:06d}.json" question_ask_jobs.append(regenerate_answers) question_ask_args.append([ original_variations_output_file_path, new_variations_output_file_path, original_questions_file_path, new_perturbed_qa_file_path, simulation_id, video_index ]) qa_outputs.append( (video_index, simulation_id, new_perturbed_qa_file_path)) logger.info(f"Asking questions for perturbed simulations") parallel_worker = ParallelWorker(question_ask_jobs, question_ask_args, 8) parallel_worker.execute_all() questions_perturbed = [] for qa in qa_outputs: video_index = qa[0] simulation_id = qa[1] qa_file_path = qa[2] qa_file = FileIO.read_json(qa_file_path) questions_perturbed.extend(qa_file["questions"]) logger.info(f"Measuring similarity, this might take a while...") data, orig_size, found, ratio = measure_similarity(original_questions, questions_perturbed) logger.info(f"Number of questions from original simulations: {orig_size}") logger.info( f"Number of questions from perturbed simulations: {len(questions_perturbed)}" ) logger.info(f"Number of perturbed counterparts: {found}") logger.info(f"Match ratio: {found / orig_size}") logger.info(f"Correctness: {ratio}") logger.info(f"Dumping analysis data...") FileIO.write_json( data, f"analysis_data_{datetime.now().strftime('%m%d%Y_%H%M')}.json")
print(s, len(split_to_vid[s])) print("Questions") question_count_per_split = defaultdict(int) for s in split_to_vid: for vi in split_to_vid[s]: question_count_per_split[s] += len( dataset.video_index_to_questions_map[vi]) for s, c in question_count_per_split.items(): print(s, c) def proof_read(): with open("./dataset_minimal.json", "r") as dataset_file: questions = json.load(dataset_file) for q in questions: if not os.path.isfile(q["video_file_path"]): print(q["video_file_path"], False) if __name__ == '__main__': dataset_folder_path = "../../framework/out/CRAFT_10K" metadata = FileIO.read_json("../../svqa/metadata.json") dataset = CRAFTDataset(dataset_folder_path, metadata) print("Number of videos:", len(dataset.video_index_to_questions_map.keys())) split_info(dataset, "random") split_info(dataset, "hard")
def process(self, dataset_obj: CRAFTDataset): logger.info("Initiating post process stage before balancing...") self.__dataset_obj = dataset_obj for i, instance_id in enumerate( sorted(dataset_obj.video_index_to_questions_map.keys())): question_list = dataset_obj.video_index_to_questions_map[ instance_id] sid = int(question_list[0]["simulation_id"]) annotations = FileIO.read_json( dataset_obj.get_simulation_with_variations_output_path( sid, instance_id)) objects_in_scene = annotations["original_video_output"][ "scene_states"][0]["scene"]["objects"] dynamic_objects = [ object for object in objects_in_scene if object["bodyType"] == 2 ] new_questions_list = [] for question in question_list: # Postprocess Before Balancing 1: Do not ask shape if only one shape is present in the scene. answer_type = dataset_obj.get_answer_type_for_answer( question["answer"]) if answer_type == "Shape": if len( set([ f"{object['shape']}" for object in dynamic_objects ])) <= 1: # Remove the question that asks shape even though there's only one shape present logger.info( f"Question asks shape even though there's only 1 " f"shape present in the scene. Removing {question['video_index']}/{question['question_index']}" ) continue if "hexagon" in question["question"]: logger.info( f"Question asks about hexagons, which are not present in any of the videos. " f"Removing {question['video_index']}/{question['question_index']}" ) continue # Postprocess Before Balancing 2: Remove questions regarding collisions with the basket # to avoid ambiguity. Note that these are not yet removed from the question template # files in svqa/SVQA_1.0_templates. Following can be removed from post processing once # they are removed from the question template files and if the dataset is generated # according to the updated question templates. if question["template_id"] in [ "cause_2", "cause_5", "counterfactual_2", "counterfactual_5", "counterfactual_8", "descriptive_12", "descriptive_13", "descriptive_14", "descriptive_15", "descriptive_20", "descriptive_21", "descriptive_30", "descriptive_31", "descriptive_36", "descriptive_37", "enable_2", "enable_5", "prevent_2", "prevent_5", ]: continue # Postprocess Before Balancing 3: Correct typos in the question templates. # These are also corrected in the question template files in svqa/SVQA_1.0_templates, # so the following can be deleted. if question["template_id"] == "counterfactual_2": question_text: str = question["question"] if question_text.startswith("Will"): question_text = question_text.replace( "the basket the", "the basket if the") question_text = question_text.replace( "the container the", "the container if the") question_text = question_text.replace( "the bucket the", "the bucket if the") question["question"] = question_text if question["template_id"] in [ "prevent_0", "prevent_1", "prevent_2" ]: question_text: str = question["question"] if question_text.startswith("Is"): question_text = question_text.replace( "is prevented by", "prevented by") question_text = question_text.replace( "is kept by", "kept by") question_text = question_text.replace( "is held by", "held by") question_text = question_text.replace( "is blocked by", "blocked by") question["question"] = question_text new_questions_list.append(question) question_list[:] = new_questions_list logger.info( f"Processed: {i}/{len(dataset_obj.video_index_to_questions_map.keys())}" ) # Continue postprocessing before balancing here self.__rewrite_dataset()
total_correct = 0 total_wrong = 0 total_nan = 0 for p in ret: total_correct += p["correct_count"] total_wrong += p["wrong_count"] total_nan += p["nan_count"] overall = { "num_people_valid": len(ret), "total_correct": total_correct, "total_wrong": total_wrong, "total_nan": total_nan, "total_correct_percent": (total_correct / (total_correct + total_wrong)) * 100, "all": ret, } return overall if __name__ == '__main__': participants = FileIO.read_default_json("participants.json") analysis = calculate( participants, lambda participant: float(participant["qualtrics_progress"]) >= 75, lambda participant: True, "No") FileIO.write_default_json(analysis, "analysis_p75.json")
def __save_state(self, state_obj: dict): FileIO.write_json(state_obj, self.get_balancing_state_file_path())
[1]) N = len(min_present_qcat[1]) for qcat in qcat_qs_map: qs = list(qcat_qs_map[qcat]) rnd.shuffle(qs) undersampled = qs[:N] chosen_qs_qcat_balanced.extend(undersampled) return chosen_qs_qcat_balanced if __name__ == '__main__': output_folder_path = "./human_eval_CRAFT_10K_balanced" dataset_folder_path = "../../framework/out/CRAFT_10K" metadata = FileIO.read_json("../../svqa/metadata.json") dataset = CRAFTDataset(dataset_folder_path, metadata) os.makedirs(f"{output_folder_path}/", exist_ok=True) parts, chosen_qs, split_setting, test_questions = generate_random_parts(5) chosen_qs_qcat_balanced = undersample(chosen_qs) undersampled_human_test_dataset = CRAFTDataset(dataset_folder_path, metadata, load_immediately=False) undersampled_human_test_dataset.questions = chosen_qs_qcat_balanced undersampled_human_test_dataset.prepare_auxiliaries() undersampled_human_test_dataset.build_sid_vi_q_map()
def get_state(self): if os.path.exists(self.get_balancing_state_file_path()): return FileIO.read_json(self.get_balancing_state_file_path()) else: return None
def evaluate(): part1_data = extract_user_responses("./part1.csv") part2_data = extract_user_responses("./part2.csv") part3_data = extract_user_responses("./part3.csv") part4_data = extract_user_responses("./part4.csv") part5_data = extract_user_responses("./part5.csv") all_questions = [] all_questions.extend(part1_data) all_questions.extend(part2_data) all_questions.extend(part3_data) all_questions.extend(part4_data) all_questions.extend(part5_data) evaluation = { "total": { "question_count": 0, "answered": 0, "true": 0, "false": 0, "empty": { "did_not_understand": 0, "was_too_hard_to_answer": 0, "no_reason": 0 } }, "per_question": {} } flattened = [] for i in range(0, len(all_questions), 2): d = all_questions[i] if d.info is None: continue for ii in range(len(d.u_answers)): ans = d.u_answers[ii] q_empty_rationale = all_questions[i + 1] r = q_empty_rationale.u_answers[ii] human_answer_rationale = "answered" if ans != "nan" else "no_reason" if ans == "nan" and r == "true": human_answer_rationale = "did_not_understand" elif ans == "nan" and r == "false": human_answer_rationale = "was_too_hard_to_answer" qinfo = deepcopy(d.info) if ans == "grey": ans = "gray" qinfo["human_answer"] = ans qinfo["human_answer_rationale"] = human_answer_rationale flattened.append(qinfo) hards = defaultdict(list) for a in flattened: evaluation["total"]["question_count"] += 1 if a["human_answer_rationale"] == "answered": ground_truth = a["answer"].strip().lower() ans = a["human_answer"].strip().lower() evaluation["total"]["answered"] += 1 if ground_truth == ans: evaluation["total"]["true"] += 1 else: def denote_false(): evaluation["total"]["false"] += 1 hards["false"].append(a) if ans.isnumeric(): denote_false() continue if (ans == "true" and ground_truth == "false") or ( ans == "false" and ground_truth == "true"): denote_false() continue print() similarity = get_similarity([ground_truth, ans]) if similarity > 0.85: print(f"Question: {a['question']}") print( f"Automatically accepted: Truth: '{ground_truth}', Answer: '{ans}'." ) evaluation["total"]["true"] += 1 continue else: print(f"Question: {a['question']}") det = str( input( f"Truth: '{ground_truth}', Answer: '{ans}'. Is this correct? y/n " )) if det == "y": evaluation["total"]["true"] += 1 continue denote_false() else: evaluation["total"]["empty"][a["human_answer_rationale"]] += 1 if a["human_answer_rationale"] != "no_reason": hards[a["human_answer_rationale"]].append(a) FileIO.write_json(hards, "hard_not_understandable_questions.json") print( f"Performance among answered questions: {(evaluation['total']['true'] / (evaluation['total']['answered'])) * 100}" ) print( f"Performance among questions including 'too hard to answer':" f" {(evaluation['total']['true'] / (evaluation['total']['answered'] + evaluation['total']['empty']['was_too_hard_to_answer'])) * 100}" ) print(json.dumps(evaluation, indent=2))
def start_balancing(self, video_generation_max_try: int = 30): logger.info( "Starting to balance the dataset by appending new items...") self.__start_time = time.time() answers_needed = self.__answers_needed state_json = self.get_state() if state_json is not None: logger.info( f"Previous unfinished balancing state found. Continuing...") answers_needed = state_json["answers_needed"] self.__video_index = state_json["current_video_index"] answers_needed = json.loads(json.dumps(answers_needed, sort_keys=True)) total_qas_needed = DatasetBalancer.total_number_of_qas_needed( answers_needed) total_number_of_qas_generated = 0 logger.info( f"There are {total_qas_needed} questions with specific answers needed for total balance" ) prev_answers_needed = copy.deepcopy(answers_needed) for sid in answers_needed: number_of_video_tries = 0 sid = int(sid) while True: t1 = time.time() # To measure remaining time. logger.info( f"For the scene type with SID {sid}, there are currently " f"{DatasetBalancer.number_of_qa_needed_for_sid(str(sid), prev_answers_needed)} Q-As" f" with TIDs {DatasetBalancer.list_of_tids_needed(str(sid), answers_needed)} needed" ) instance_id: int = self.__video_index simulation_config = self.__sid_to_simulation_configs[sid] # Create controller file for current simulation instance. controller_file_path = self.dump_controller_file( instance_id, simulation_config) variations_output_path = self.get_temp_simulation_with_variations_output_path( sid, instance_id) questions_file_path = self.get_temp_questions_output_path( sid, instance_id) simulation = SimulationInstance(instance_id, controller_file_path, variations_output_path, questions_file_path, self.__runner) # Run simulation. logger.info( f"Running simulation with SID: {sid}, index: {instance_id}" ) simulation.run_simulation( self.get_temp_debug_output_path(sid, instance_id)) # Run its variations. logger.info(f"Running its variations") simulation.run_variations( self.get_temp_varations_debug_output_path( sid, instance_id)) # Generate questions. logger.info(f"Generating questions") simulation.generate_questions(simulation_config) with open(questions_file_path) as qa_json_file: qa_json = json.load(qa_json_file) logger.info( f"{len(qa_json['questions'])} number of Q-As generated") logger.info(f"Filtering questions according to needs...") output = self.filter_answers_according_to_needs( qa_json, str(sid), copy.deepcopy(prev_answers_needed)) new_answers_needed = output["new_needed_answers"] filtered_questions = output["filtered_questions"] FileIO.write_json(filtered_questions, questions_file_path) self.__save_state({ "answers_needed": new_answers_needed, "current_video_index": self.__video_index }) prev_number_of_answers_needed = DatasetBalancer.number_of_qa_needed_for_sid( str(sid), prev_answers_needed) curr_number_of_answers_needed = DatasetBalancer.number_of_qa_needed_for_sid( str(sid), new_answers_needed) obtained_number_of_questions = prev_number_of_answers_needed - curr_number_of_answers_needed logger.info( f"{obtained_number_of_questions} " f"questions with needed answers have been obtained") total_number_of_qas_generated += obtained_number_of_questions progress = total_number_of_qas_generated / total_qas_needed logger.info( f"Current progress: {total_number_of_qas_generated}/{total_qas_needed} ({round(progress * 100, 3)})" ) if obtained_number_of_questions != 0: self.__update_clock(t1 / obtained_number_of_questions, total_qas_needed, total_number_of_qas_generated) diff: DeepDiff = DeepDiff(new_answers_needed, prev_answers_needed, ignore_order=True) if prev_answers_needed == new_answers_needed: number_of_video_tries += 1 logger.info( f"Cannot reduce number of Q-As needed, retrying {video_generation_max_try - number_of_video_tries} times before giving up..." ) else: logger.info( f"Was able to reduce the answer imbalance for SID: {sid}" ) self.__increment_video_index() number_of_video_tries = 0 if curr_number_of_answers_needed == 0: logger.info( f"We have got total balance for scene type with SID {sid}! It's a miracle, no, science!" ) break prev_answers_needed = copy.deepcopy(new_answers_needed) if number_of_video_tries >= video_generation_max_try: logger.info( f"Maximum number of video generation attempts reached for this SID: {sid}" ) logger.info( f"There are still {DatasetBalancer.number_of_qa_needed_for_sid(str(sid), new_answers_needed)} " f"Q-As with TIDs {DatasetBalancer.list_of_tids_needed(str(sid), new_answers_needed)} needed for SID: {sid}" ) FileIO.write_json( new_answers_needed, self.get_left_out_needed_answers_path(sid)) break logger.info( f"Dataset balancing process is complete. Process took {round((time.time() - self.__start_time) / 60, 2)} minutes." )
'video': { 'codec': 'h264', 'width': 720, 'height': 720, 'fps': 60 } }) for timecode in convert: pass logger.info(f"Converted: {video_index} --> mp4") if __name__ == '__main__': vq = FileIO.read_json("./human_eval/questions_per_video.json") i = 0 video_index_question_tuples = [] for video_index, questions in vq.items(): video_index_question_tuples.append((video_index, questions)) concurrent_process_count = 8 for i in range(0, len(video_index_question_tuples), concurrent_process_count): jobs = [] args = [] start = i for j in range(start, start + concurrent_process_count): if j >= len(video_index_question_tuples): continue