def build_bbox_tensors(infos, max_length): num_bbox = min(max_length, len(infos)) # After num_bbox, everything else should be zero coord_tensor = torch.zeros((max_length, 4), dtype=torch.float) width_tensor = torch.zeros(max_length, dtype=torch.float) height_tensor = torch.zeros(max_length, dtype=torch.float) bbox_types = ["xyxy"] * max_length infos = infos[:num_bbox] sample = Sample() for idx, info in enumerate(infos): bbox = info["bounding_box"] x = bbox["top_left_x"] y = bbox["top_left_y"] width = bbox["width"] height = bbox["height"] coord_tensor[idx][0] = x coord_tensor[idx][1] = y coord_tensor[idx][2] = x + width coord_tensor[idx][3] = y + height width_tensor[idx] = width height_tensor[idx] = height sample.coordinates = coord_tensor sample.width = width_tensor sample.height = height_tensor sample.bbox_types = bbox_types return sample
def build_bbox_tensors(infos, max_length, feats, img_id, obj_bbox): # num of ocr bbox num_bbox = min(max_length, len(infos)) # ocr bbox coord_tensor = torch.zeros((max_length, 4), dtype=torch.float) infos = infos[:num_bbox] sample = Sample() for idx, info in enumerate(infos): bbox = info["bounding_box"] if "top_left_x" in bbox: x = bbox["top_left_x"] # key might be 'topLeftX' y = bbox["top_left_y"] # key might be 'topLeftY' else: x = bbox["topLeftX"] y = bbox["topLeftY"] width = bbox["width"] height = bbox["height"] coord_tensor[idx][0] = x coord_tensor[idx][1] = y coord_tensor[idx][2] = x + width coord_tensor[idx][3] = y + height sample.coordinates = coord_tensor sample.ocr_mask = num_bbox image_path_org = './data/open_images/textvqa_gcy/' # image_path_org = './data/open_images/GT_OBJ_FRCN/' # image_path_org = './data/open_images/visual_genome/' oo_edge_path = image_path_org + 'edge_oo/' ot_edge_path = image_path_org + 'edge_ot/' tt_edge_path = image_path_org + 'edge_tt/' to_edge_path = image_path_org + 'edge_to/' set_name = search_file(image_path_org, img_id) knn_k = 5 try: oo_node_matrix = torch.load(oo_edge_path + img_id + '_oo.pdh') sample.edge_oo = oo_node_matrix oo_feats = torch.load(oo_edge_path + img_id + '_oofeats.pdh') sample.edge_oofeats = oo_feats ot_node_matrix = torch.load(ot_edge_path + img_id + '_ot.pdh') sample.edge_ot = ot_node_matrix ot_feats = torch.load(ot_edge_path + img_id + '_otfeats.pdh') sample.edge_otfeats = ot_feats tt_node_matrix = torch.load(tt_edge_path + img_id + '_tt.pdh') sample.edge_tt = tt_node_matrix tt_feats = torch.load(tt_edge_path + img_id + '_ttfeats.pdh') sample.edge_ttfeats = tt_feats to_node_matrix = torch.load(to_edge_path + img_id + '_to.pdh') sample.edge_to = to_node_matrix to_feats = torch.load(to_edge_path + img_id + '_tofeats.pdh') sample.edge_tofeats = to_feats except: #Todo: generate obj-obj relation edge oo_node_matrix = finde_k_nearest_node(obj_bbox, knn_k) sample.edge_oo = oo_node_matrix oo_edge_file_name = oo_edge_path + img_id + "_oo.pdh" torch.save(oo_node_matrix, oo_edge_file_name) obj_obj_feat_variable = gen_oo_edge_feature(obj_bbox, oo_node_matrix, knn_k=knn_k) oo_edge_file_name = oo_edge_path + img_id + "_oofeats.pdh" torch.save(obj_obj_feat_variable, oo_edge_file_name) sample.edge_oofeats = obj_obj_feat_variable #Todo: generate object-text relation edge ot_node_matrix = dc_finde_k_nearest_node(obj_bbox, coord_tensor, knn_k) sample.edge_ot = ot_node_matrix ot_edge_file_name = ot_edge_path + img_id + "_ot.pdh" torch.save(ot_node_matrix, ot_edge_file_name) obj_text_feat_variable = gen_ot_edge_feature(obj_bbox, coord_tensor, ot_node_matrix, knn_k=knn_k) ot_edge_file_name = ot_edge_path + img_id + "_otfeats.pdh" torch.save(obj_text_feat_variable, ot_edge_file_name) sample.edge_otfeats = obj_text_feat_variable #Todo: generate text-text relation edge tt_node_matrix = finde_k_nearest_node(coord_tensor, knn_k) sample.edge_tt = tt_node_matrix tt_edge_file_name = tt_edge_path + img_id + "_tt.pdh" torch.save(tt_node_matrix, tt_edge_file_name) text_text_edge_feature = gen_tt_edge_feature(coord_tensor, tt_node_matrix, knn_k=knn_k) tt_edge_file_name = tt_edge_path + img_id + "_ttfeats.pdh" torch.save(text_text_edge_feature, tt_edge_file_name) sample.edge_ttfeats = text_text_edge_feature #Todo: generate text-obj relation edge to_node_matrix = dc_finde_k_nearest_node(coord_tensor, obj_bbox, knn_k) sample.edge_to = to_node_matrix to_edge_file_name = to_edge_path + img_id + "_to.pdh" torch.save(to_node_matrix, to_edge_file_name) text_obj_feat_variable = gen_to_edge_feature(coord_tensor, obj_bbox, to_node_matrix, knn_k=knn_k) to_edge_file_name = to_edge_path + img_id + "_tofeats.pdh" torch.save(text_obj_feat_variable, to_edge_file_name) sample.edge_tofeats = text_obj_feat_variable return sample