def create_dataset(source="", dest="main", \ num_houses=-1, batch_size=1000): """ Create a pickled version of the dataset from the json files Parameters ---------- dest (string, optional): directory to save to num_houses (int, optional): If -1, then all the houses will be loaded, otherwise the first num_houses houses will be loaded batch_size (int, optional): number of houses in one .pkl file """ data_dir = utils.get_data_root_dir() data_dir = utils.get_data_root_dir() dest_dir = f"{data_dir}/{dest}" if not os.path.exists(dest_dir): os.makedirs(dest_dir) to_save = [] cur_index = 0 def pkl_name(index): return f"{dest_dir}/{cur_index}.pkl" if source == "": house_dir = f"{data_dir}/data/house" house_ids = dict(enumerate(os.listdir(house_dir))) print(f"There are {len(house_ids)} houses in the dataset.") num_houses = len(house_ids) if num_houses == -1 else num_houses start_house_i = 0 while os.path.exists(pkl_name(cur_index)): print(f'Batch file {pkl_name(cur_index)} exists, skipping batch') cur_index += 1 start_house_i = cur_index * batch_size for i in range(start_house_i, num_houses): print(f"Now loading house id={house_ids[i]} {i+1}/{num_houses}...", end="\r") house = House(i) if house.rooms: to_save.append(house) if len(to_save) == batch_size: with open(pkl_name(cur_index), "wb") as f: pickle.dump(to_save, f, pickle.HIGHEST_PROTOCOL) to_save = [] cur_index += 1 print() with open(pkl_name(cur_index), "wb") as f: pickle.dump(to_save, f, pickle.HIGHEST_PROTOCOL) else: print("Currently only supports loading nothing")
def __init__(self, details=False, model_details=False, save_freq=True, save_dest=""): """ Parameters ---------- details (bool, optional): If true, then frequency information will be shown on screen model_details (bool, optional): since there are so many model ids, this additional bool controls if those should be printed save_freq (bool, optional): if true, then the category frequency information will be saved save_dest (string, optional): directory to which frequency information is saved """ self.room_count = 0 self.object_count = 0 self.room_types_count = {} self.fine_categories_count = {} self.coarse_categories_count = {} self.final_categories_count = {} self.models_count = {} self.object_category = ObjectCategories() self.floor_node_only = False self.details = details self.model_details = model_details self.save_freq = save_freq data_dir = utils.get_data_root_dir() self.save_dest = f"{data_dir}/{save_dest}"
def build_cat2scene(self): self.cat_index2scenes = defaultdict(list) data_root_dir = self.data_root_dir or utils.get_data_root_dir() data_dir = f'{data_root_dir}/{self.data_folder}' filename = f'{data_dir}/cat_index2scenes' # Create new cached map file if not os.path.exists(filename): print( 'Building map of category to scenes containing an instance...') pkls = [ path for path in os.listdir(data_dir) if path.endswith('.pkl') ] pklnames = [os.path.splitext(path)[0] for path in pkls] # Only get the .pkl files which are numbered scenes indices = [ int(pklname) for pklname in pklnames if pklname.isdigit() ] i = 0 for idx in indices: i += 1 sys.stdout.write(f' {i}/{len(indices)}\r') sys.stdout.flush() scene = RenderedScene(idx, self.data_folder, self.data_root_dir) object_nodes = scene.object_nodes for node in object_nodes: self.cat_index2scenes[node['category']].append(idx) pickle.dump(self.cat_index2scenes, open(filename, 'wb')) print('') # Load an existing cached map file from disk else: self.cat_index2scenes = pickle.load(open(filename, 'rb'))
def __init__(self, dest, trim=False): data_dir = utils.get_data_root_dir() self.dest_dir = f"{data_dir}/{dest}" self.cur_index = 0 self.trim = trim if not os.path.exists(self.dest_dir): os.makedirs(self.dest_dir)
def learn(self, data_folder="bedroom_final", data_root_dir=None): if not data_root_dir: data_root_dir = utils.get_data_root_dir() data_dir = f"{data_root_dir}/{data_folder}" self.data_dir = data_dir self.category_map = ObjectCategories() files = os.listdir(data_dir) files = [f for f in files if ".pkl" in f and not "domain" in f and not "_" in f] with open(f"{data_dir}/final_categories_frequency", "r") as f: lines = f.readlines() cats = [line.split()[0] for line in lines] self.category_count = [int(line.split()[1]) for line in lines if line.split()[0] not in ["window", "door"]] self.categories = [cat for cat in cats if cat not in set(['window', 'door'])] self.cat_to_index = {self.categories[i]:i for i in range(len(self.categories))} self.num_categories = len(self.categories) self.categories.append("floor") N = self.num_categories self.support_count = [[0 for i in range(N+1)] for j in range(N)] for index in range(len(files)): print(index) with open(f"{data_dir}/{index}.pkl", "rb") as f: (_, _, nodes), _ = pickle.load(f) object_nodes = [] id_to_cat = {} for node in nodes: modelId = node["modelId"] category = self.category_map.get_final_category(modelId) if not category in ["door", "window"]: object_nodes.append(node) id_to_cat[node["id"]] = self.cat_to_index[category] node["category"] = self.cat_to_index[category] for node in object_nodes: parent = node["parent"] category = node["category"] if parent == "Floor" or parent is None: self.support_count[category][-1] += 1 else: self.support_count[category][id_to_cat[parent]] += 1 #quit() self.possible_supports={} for i in range(self.num_categories): print(f"Support for {self.categories[i]}:") supports = [(c, self.support_count[i][c]/self.category_count[i]) for c in range(N+1)] supports = sorted(supports, key = lambda x:-x[1]) supports = [s for s in supports if s[1] > 0.01] for s in supports: print(f" {self.categories[s[0]]}:{s[1]:4f}") self.possible_supports[i] = [s[0] for s in supports] print(self.possible_supports) self.N = N
def __init__(self, dest): self.step_idx = -1 self.room_count = 0 self.data_root_dir = utils.get_data_root_dir() self.data_dir = dest dest_dir = f"{self.data_root_dir}/{self.data_dir}/graph" if not os.path.exists(dest_dir): os.makedirs(dest_dir)
def __init__(self, scene_indices=(0, 4000), data_folder="bedroom_fin_256", data_root_dir=None, seed=None, do_rotation_augmentation=False, cat_only=False, use_same_category_batches=False, importance_order=False): super(LatentDataset, self).__init__() self.category_map = ObjectCategories() self.seed = seed self.data_folder = data_folder self.data_root_dir = data_root_dir self.scene_indices = scene_indices self.do_rotation_augmentation = do_rotation_augmentation self.cat_only = cat_only self.cat_name2index = None self.cat_index2scenes = None if self.data_root_dir is None: self.data_root_dir = utils.get_data_root_dir() with open( f"{self.data_root_dir}/{self.data_folder}/final_categories_frequency", "r") as f: lines = f.readlines() names = [line.split()[0] for line in lines] names = [ name for name in names if ((name != 'door') and (name != 'window')) ] self.catnames = names self.cat_name2index = {names[i]: i for i in range(0, len(names))} self.n_categories = len(names) self.cat2freq = {} for line in lines: cat, freq = line.split(' ') self.cat2freq[cat] = int(freq) maxfreq = max(self.cat2freq.values()) self.cat2freq_normalized = { cat: freq / maxfreq for cat, freq in self.cat2freq.items() } self.build_cat2scene() self.build_cats_in_scene_indices() self.compute_cat_sizes() # See 'prepare_same_category_batches' below for info self.use_same_category_batches = use_same_category_batches if use_same_category_batches: self.same_category_batch_indices = [] else: self.same_category_batch_indices = None self.importance_order = importance_order
def __init__(self, dest, size=256, room_size=6.05): self.dest = dest self.size = size self.count = 0 self.renderer = TopDownView(size=self.size, length_cap=room_size) data_dir = utils.get_data_root_dir() self.dest_dir = f"{data_dir}/{dest}" if not os.path.exists(self.dest_dir): os.makedirs(self.dest_dir)
def __init__(self, index, data_dir, data_root_dir=None, \ shuffle=True, load_objects=True, seed=None): """ Load a rendered scene from file Parameters ---------- index (int): room number data_dir (string): location of the pre-rendered rooms data_root_dir (string or None, optional): if specified, use this as the root directory shuffle (bool, optional): If true, randomly order the objects in the room. Otherwise use the default order as written in the original dataset load_objects (bool, optional): If false, only load the doors and windows. Otherwise load all objects in the room seed (int or None, optional): if set, use a fixed random seed so we can replicate a particular experiment """ if seed: random.seed(seed) if not data_root_dir: data_root_dir = utils.get_data_root_dir() if RenderedScene.categories is None or RenderedScene.current_data_dir != data_dir: with open(f"{data_root_dir}/{data_dir}/final_categories_frequency", "r") as f: lines = f.readlines() cats = [line.split()[0] for line in lines] RenderedScene.categories = [ cat for cat in cats if cat not in set(['window', 'door']) ] RenderedScene.cat_to_index = { RenderedScene.categories[i]: i for i in range(len(RenderedScene.categories)) } RenderedScene.current_data_dir = data_dir with open(f"{data_root_dir}/{data_dir}/{index}.pkl", "rb") as f: (self.floor, self.wall, nodes), self.room = pickle.load(f) self.object_nodes = [] self.door_window_nodes = [] for node in nodes: category = RenderedScene.category_map.get_final_category( node["modelId"]) if category in ["door", "window"]: node["category"] = category self.door_window_nodes.append(node) elif load_objects: node["category"] = RenderedScene.cat_to_index[category] self.object_nodes.append(node) if shuffle: random.shuffle(self.object_nodes)
def save(self): data_dir = utils.get_data_root_dir() dest_dir = f"{data_dir}/object/{self.modelId}" if not os.path.exists(dest_dir): os.makedirs(dest_dir) with open(f"{dest_dir}/vertices.pkl", "wb") as f: pickle.dump(self.vertices, f, pickle.HIGHEST_PROTOCOL) with open(f"{dest_dir}/faces.pkl", "wb") as f: pickle.dump(self.faces, f, pickle.HIGHEST_PROTOCOL)
def room_criteria(room, house): data_dir = utils.get_data_root_dir() if room.height > height_cap: return False if room.length > length_cap: return False if room.width > width_cap: return False if not os.path.isfile(f"{data_dir}/suncg_data/room/{room.house_id}/{room.modelId}f.obj"): return False if not os.path.isfile(f"{data_dir}/suncg_data/room/{room.house_id}/{room.modelId}w.obj"): return False for node in room.nodes: if node.type == "Box": return False return True
def __init__(self, scene_indices=(0, 4000), data_folder="bedroom_fin_256", data_root_dir=None, seed=None, do_rotation_augmentation=False, cat_only=False, use_same_category_batches=False, importance_order=False, epoch_size=None): super(LatentDataset, self).__init__() self.category_map = ObjectCategories() self.seed = seed self.data_folder = data_folder self.data_root_dir = data_root_dir self.scene_indices = scene_indices self.do_rotation_augmentation = do_rotation_augmentation self.cat_only = cat_only self.cat_name2index = None self.cat_index2scenes = None if self.data_root_dir is None: self.data_root_dir = utils.get_data_root_dir() self.catnames = self.category_map.all_non_arch_categories( self.data_root_dir, data_folder) self.cat_name2index = { self.catnames[i]: i for i in range(0, len(self.catnames)) } self.n_categories = len(self.catnames) self.build_cat2scene() self.build_cats_in_scene_indices() self.cat_importances = self.category_map.all_non_arch_category_importances( self.data_root_dir, data_folder) # See 'prepare_same_category_batches' below for info self.use_same_category_batches = use_same_category_batches if use_same_category_batches: self.same_category_batch_indices = [] assert (epoch_size is not None) self.epoch_size = epoch_size else: self.same_category_batch_indices = None self.importance_order = importance_order
def parse_objects(): """ parse .obj objects and save them to pickle files """ data_dir = utils.get_data_root_dir() obj_dir = data_dir + "/suncg_data/object/" print("Parsing SUNCG object files...") l = len(os.listdir(obj_dir)) for (i, modelId) in enumerate(os.listdir(obj_dir)): print(f"{i+1} of {l}...", end="\r") if not modelId in ["mgcube", ".DS_Store"]: o = Obj(modelId, from_source=True) o.save() o = Obj(modelId, from_source=True, mirror=True) o.save() print()
def __init__(self, dest, batch_size=1000, trim=False): """ Parameters ---------- dest (string): directory to save to batch_size (int, optional): number of houses in one .pkl file trim (bool, optional): If true, removes certain unused attributes to reduce file size """ data_dir = utils.get_data_root_dir() self.dest_dir = f"{data_dir}/{dest}" self.to_save = [] self.cur_index = 0 self.batch_size = batch_size self.trim = trim if not os.path.exists(self.dest_dir): os.makedirs(self.dest_dir)
def __init__(self, scene_indices=(0, 4000), data_folder="bedroom", data_root_dir=None, seed=None): super(LocDataset, self).__init__() self.category_map = ObjectCategories() self.seed = seed self.data_folder = data_folder self.data_root_dir = data_root_dir self.scene_indices = scene_indices data_root_dir = utils.get_data_root_dir() with open(f"{data_root_dir}/{data_folder}/final_categories_frequency", "r") as f: lines = f.readlines() self.n_categories = len(lines) - 2 # -2 for 'window' and 'door'
def __init__(self, actions, source="main", num_batches=0): """ Parameters ---------- action (list[DatasetAction]): a list of methods that are applied to each loaded house sequentially source (string): location of pickled dataset to be loaded num_batches (int): if not 0, only the first num_batches bathces of houses will be loaded """ data_dir = utils.get_data_root_dir() source_dir = f"{data_dir}/{source}" if not isinstance(actions, list): actions = [actions] self.actions = actions files = sorted([s for s in os.listdir(source_dir) if "pkl" in s], \ key = lambda x: int(x.split(".")[0])) if num_batches > 0: files = files[0:num_batches] self.files = [f"{source_dir}/{f}" for f in files]
def __init__(self, mode='direct', bullet_server_binary=None, data_dir_base=None, verbose=False): self._mode = mode self._verbose = verbose module_dir = os.path.dirname(os.path.abspath(__file__)) data_root_dir = utils.get_data_root_dir() if data_dir_base: self._data_dir_base = data_dir_base else: self._data_dir_base = os.path.join(data_root_dir, 'suncg_data') if bullet_server_binary: self._bullet_server_binary = bullet_server_binary else: self._bullet_server_binary = os.path.join( module_dir, '..', 'bullet_shared_memory_server') self._obj_id_to_body = {} self._bid_to_body = {} self._pid = None self._bullet_server = None self.connect()
logfile.write(msg + '\n') logfile.flush() LOG('Building model...') model = resnet101(num_classes=2, num_input_channels=num_input_channels) loss = nn.CrossEntropyLoss() softmax = nn.Softmax(dim=1) LOG('Converting to CUDA...') model.cuda() loss.cuda() softmax.cuda() LOG('Building dataset...') train_dataset = RotationDataset(data_root_dir=utils.get_data_root_dir(), data_dir=args.data_dir, scene_indices=(0, args.train_size), ablation=args.ablation) #Size of validation set is 160 by default validation_dataset = RotationDataset(data_root_dir=utils.get_data_root_dir(), data_dir=args.data_dir, scene_indices=(args.train_size, args.train_size + 160), seed=42, ablation=args.ablation) LOG('Building data loader...') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=args.num_workers,
utils.ensuredir(save_dir) logfile = open(f"{save_dir}/log_count.txt", 'w') def LOG(msg): print(msg) logfile.write(msg + '\n') logfile.flush() start_epoch = 0 num_epochs = 52 learning_rate = args.lr batch_size = 128 LOG('Building dataset...') train_dataset = CategoryCountsDataset( data_root_dir = utils.get_data_root_dir() data_dir = args.data_folder, scene_indices = (0, args.train_size), ) LOG('Building data loader...') train_loader = torch.utils.data.DataLoader( train_dataset, batch_size = batch_size, num_workers = args.num_workers, shuffle = True ) LOG('Building model...') data_size = train_dataset.data_size data_domain_sizes = train_dataset.data_domain_sizes
def bedroom_filter(version, source): data_dir = utils.get_data_root_dir() with open(f"{data_dir}/{source}/coarse_categories_frequency", "r") as f: coarse_categories_frequency = ([s[:-1] for s in f.readlines()]) coarse_categories_frequency = [ s.split(" ") for s in coarse_categories_frequency ] coarse_categories_frequency = dict([ (a, int(b)) for (a, b) in coarse_categories_frequency ]) category_map = ObjectCategories() if version == "final": filtered, rejected, door_window = GlobalCategoryFilter.get_filter() with open(f"{data_dir}/{source}/final_categories_frequency", "r") as f: frequency = ([s[:-1] for s in f.readlines()]) frequency = [s.split(" ") for s in frequency] frequency = dict([(a, int(b)) for (a, b) in frequency]) def node_criteria(node, room): category = category_map.get_final_category(node.modelId) if category in filtered: return False return True def room_criteria(room, house): node_count = 0 bed_count = 0 #Must have one bed for node in room.nodes: category = category_map.get_final_category(node.modelId) if category in rejected: return False if not category in door_window: node_count += 1 t = np.asarray(node.transform).reshape((4, 4)).transpose() a = t[0][0] b = t[0][2] c = t[2][0] d = t[2][2] xscale = (a**2 + c**2)**0.5 yscale = (b**2 + d**2)**0.5 zscale = t[1][1] if not 0.8 < xscale < 1.2: #Reject rooms where any object is scaled by too much return False if not 0.8 < yscale < 1.2: return False if not 0.8 < zscale < 1.2: return False if "bed" in category: bed_count += 1 if frequency[category] < 500: return False if node_count < 5 or node_count > 20: return False if bed_count < 1: return False return True else: raise NotImplementedError dataset_f = DatasetFilter(room_filters=[room_criteria], node_filters=[node_criteria]) return dataset_f
def learn(self, data_folder="bedroom_final", data_root_dir=None): if not data_root_dir: data_root_dir = utils.get_data_root_dir() data_dir = f"{data_root_dir}/{data_folder}" self.data_dir = data_dir self.category_map = ObjectCategories() files = os.listdir(data_dir) files = [ f for f in files if ".pkl" in f and not "domain" in f and not "_" in f ] self.categories = self.category_map.all_non_arch_categories( data_root_dir, data_folder) self.cat_to_index = { self.categories[i]: i for i in range(len(self.categories)) } with open(f"{data_dir}/model_frequency", "r") as f: lines = f.readlines() models = [line.split()[0] for line in lines] self.model_freq = [int(l[:-1].split()[1]) for l in lines] self.models = [ model for model in models if not self.category_map.is_arch( self.category_map.get_final_category(model)) ] self.model_to_index = { self.models[i]: i for i in range(len(self.models)) } N = len(self.models) self.num_categories = len(self.categories) self.model_index_to_cat = [ self.cat_to_index[self.category_map.get_final_category( self.models[i])] for i in range(N) ] self.count = [[0 for i in range(N)] for j in range(N)] for index in range(len(files)): #for index in range(100): with open(f"{data_dir}/{index}.pkl", "rb") as f: (_, _, nodes), _ = pickle.load(f) object_nodes = [] for node in nodes: modelId = node["modelId"] category = self.category_map.get_final_category(modelId) if not self.category_map.is_arch(category): object_nodes.append(node) for i in range(len(object_nodes)): for j in range(i + 1, len(object_nodes)): a = self.model_to_index[object_nodes[i]["modelId"]] b = self.model_to_index[object_nodes[j]["modelId"]] self.count[a][b] += 1 self.count[b][a] += 1 print(index) self.N = N
def toilet_filter(version, room_size, second_tier, source): data_dir = utils.get_data_root_dir() with open(f"{data_dir}/{source}/coarse_categories_frequency", "r") as f: coarse_categories_frequency = ([s[:-1] for s in f.readlines()]) coarse_categories_frequency = [ s.split(" ") for s in coarse_categories_frequency ] coarse_categories_frequency = dict([ (a, int(b)) for (a, b) in coarse_categories_frequency ]) category_map = ObjectCategories() if version == "set1": rejected_object_categories = ["bathroom_stuff", "partition", "kitchenware", \ "sofa", "kitchen_appliance", "column", "bed", \ "table", "hanger"] filtered_object_categories = ["person", "pet", "rug", "curtain", "trash_can", \ "toy", "mirror", "picture_frame"] no_count_object_categories = category_map.all_arch_categories() freq_threshold = 1000 def node_criteria(node, room): coarse_category = category_map.get_coarse_category(node.modelId) #Recursively check if all children are filtered out or not #This is done because if there is a node that we do not want #But it has a child that isn't filtered out #Then we have to reject the room completely, see room_criteria if node.child: if any(node_criteria(c, room) for c in node.child): return True #Still nodes without parent, so can't just check if floor parent if node.parent and node.parent !="Floor" and \ not coarse_category in no_count_object_categories: return False if coarse_category in rejected_object_categories: return True if coarse_categories_frequency[coarse_category] < freq_threshold: return False if coarse_category in filtered_object_categories: return False if node.length * node.width < 0.05: return False return True def room_criteria(room, house): if room.height > 4: return False if room.length > 6: return False if room.width > 6: return False true_node_count = 0 category_dict = {} for node in room.nodes: coarse_category = category_map.get_coarse_category( node.modelId) if coarse_categories_frequency[ coarse_category] < freq_threshold: return False if coarse_category in rejected_object_categories: return False if coarse_category in filtered_object_categories: return False if not coarse_category in no_count_object_categories: if node.zmin - room.zmin > 0.1: return False category_dict[coarse_category] = category_dict.get( coarse_category, 0) + 1 true_node_count += 1 if true_node_count < 4 or true_node_count > 20: return False if not os.path.isfile( f"./data/data/room/{room.house_id}/{room.modelId}f.obj"): return False if not os.path.isfile( f"./data/data/room/{room.house_id}/{room.modelId}w.obj"): return False return True elif version == "final": filtered, rejected, door_window = GlobalCategoryFilter.get_filter() rejected += ["short_kitchen_cabinet", "coffee_table", "straight_chair"] with open(f"./data/{source}/final_categories_frequency", "r") as f: frequency = ([s[:-1] for s in f.readlines()]) frequency = [s.split(" ") for s in frequency] frequency = dict([(a, int(b)) for (a, b) in frequency]) def node_criteria(node, room): category = category_map.get_final_category(node.modelId) if category in filtered: return False return True def room_criteria(room, house): node_count = 0 toilet_count = 0 bathtub_count = 0 for node in room.nodes: category = category_map.get_final_category(node.modelId) if category in rejected: return False if not category in door_window: node_count += 1 t = np.asarray(node.transform).reshape((4, 4)).transpose() a = t[0][0] b = t[0][2] c = t[2][0] d = t[2][2] xscale = (a**2 + c**2)**0.5 yscale = (b**2 + d**2)**0.5 zscale = t[1][1] if not 0.8 < xscale < 1.2: return False if not 0.8 < yscale < 1.2: return False if not 0.8 < zscale < 1.2: return False if category == "toilet": toilet_count += 1 if category == "bathtub": bathtub_count += 1 if frequency[category] < 200: return False if node_count < 3 or node_count > 10: return False if toilet_count > 2: return False if bathtub_count > 3: return False return True elif version == "latent": filtered, rejected, door_window, second_tier_include = \ GlobalCategoryFilter.get_filter_latent() rejected += ["desk", "television", "towel_rack", "sofa"] filtered, rejected, door_window, second_tier_include = \ set(filtered), set(rejected), set(door_window), set(second_tier_include) with open(f"{data_dir}/{source}/final_categories_frequency", "r") as f: frequency = ([s[:-1] for s in f.readlines()]) frequency = [s.split(" ") for s in frequency] frequency = dict([(a, int(b)) for (a, b) in frequency]) def node_criteria(node, room): category = category_map.get_final_category(node.modelId) if category in door_window: return True if category in filtered: return False if second_tier: if node.zmin - room.zmin > 0.1 and \ (category not in second_tier_include or node.parent is None): return False if node.parent: if isinstance(node.parent, Node) and node.zmin < node.parent.zmax - 0.1: return False node_now = node while isinstance(node_now, Node) and node_now.parent: node_now = node_now.parent if node_now != "Floor": return False else: if node.zmin - room.zmin > 0.1: return False #Quick filter for second-tier non ceiling mount #if node.zmin - room.zmin < 0.1: # return False #else: # if node.zmax - room.zmax > -0.2: # return False return True def room_criteria(room, house): if not room.closed_wall: return False if room.height > 4: return False if room.length > room_size: return False if room.width > room_size: return False floor_node_count = 0 node_count = 0 scaled = False #dirty fix! for i in range(5): room.nodes = [node for node in room.nodes if not \ ((node.parent and isinstance(node.parent, Node) and \ (node.parent) not in room.nodes)) ] for node in room.nodes: category = category_map.get_final_category(node.modelId) if category in rejected: return False if not category in door_window: node_count += 1 if node.zmin - room.zmin < 0.1: floor_node_count += 1 t = np.asarray(node.transform).reshape((4, 4)).transpose() a = t[0][0] b = t[0][2] c = t[2][0] d = t[2][2] xscale = (a**2 + c**2)**0.5 yscale = (b**2 + d**2)**0.5 zscale = t[1][1] if not 0.9 < xscale < 1.1: #Reject rooms where any object is scaled by too much return False if not 0.9 < yscale < 1.1: return False if not 0.9 < zscale < 1.1: return False #if not 0.99<xscale<1.01: # scaled = True #if not 0.99<yscale<1.01: # scaled = True #if not 0.99<zscale<1.01: # scaled = True t[0][0] /= xscale t[0][2] /= yscale t[2][0] /= xscale t[2][2] /= yscale t[1][1] /= zscale node.transform = list(t.transpose().flatten()) if frequency[category] < 500: return False #if not scaled: #return False if floor_node_count < 4 or node_count > 20: return False return True else: raise NotImplementedError dataset_f = DatasetFilter(room_filters=[room_criteria], node_filters=[node_criteria]) return dataset_f
def learn(self, data_folder, data_root_dir=None): if data_root_dir is None: data_root_dir = utils.get_data_root_dir() data_dir = f"{data_root_dir}/{data_folder}" self.data_dir = data_dir self.category_map = ObjectCategories() files = os.listdir(data_dir) files = [f for f in files if ".pkl" in f and not "domain" in f] with open(f"{data_dir}/final_categories_frequency", "r") as f: lines = f.readlines() cats = [line.split()[0] for line in lines] self.categories = [ cat for cat in cats if cat not in set(['window', 'door']) ] self.cat_to_index = { self.categories[i]: i for i in range(len(self.categories)) } with open(f"{data_dir}/model_frequency", "r") as f: lines = f.readlines() models = [line.split()[0] for line in lines] self.model_freq = [int(l[:-1].split()[1]) for l in lines] self.models = [ model for model in models if self.category_map.get_final_category( model) not in set(['window', 'door']) ] self.model_to_index = { self.models[i]: i for i in range(len(self.models)) } N = len(self.models) self.num_categories = len(self.categories) self.model_index_to_cat = [ self.cat_to_index[self.category_map.get_final_category( self.models[i])] for i in range(N) ] self.count = [[0 for i in range(N)] for j in range(N)] for index in range(len(files)): #for index in range(100): with open(f"{data_dir}/{index}.pkl", "rb") as f: (_, _, nodes), _ = pickle.load(f) object_nodes = [] for node in nodes: modelId = node["modelId"] category = self.category_map.get_final_category(modelId) if not category in ["door", "window"]: object_nodes.append(node) for i in range(len(object_nodes)): for j in range(i + 1, len(object_nodes)): a = self.model_to_index[object_nodes[i]["modelId"]] b = self.model_to_index[object_nodes[j]["modelId"]] self.count[a][b] += 1 self.count[b][a] += 1 print(index, end="\r") self.N = N
def __init__(self, modelId, houseId=None, from_source=False, is_room=False, mirror=False): """ Parameters ---------- modelId (string): name of the object to be loaded houseId (string, optional): If loading a room, specify which house does the room belong to from_source (bool, optional): If false, loads the pickled version of the object need to call object.py once to create the pickled version. does not apply for rooms mirror (bool, optional): If true, loads the mirroed version """ if is_room: from_source = True #Don't want to save rooms... data_dir = utils.get_data_root_dir() self.vertices = [] self.faces = [] if from_source: if is_room: path = f"{data_dir}/suncg_data/room/{houseId}/{modelId}.obj" else: path = f"{data_dir}/suncg_data/object/{modelId}/{modelId}.obj" with open(path, "r") as f: for line in f: data = line.split() if len(data) > 0: if data[0] == "v": v = np.asarray([float(i) for i in data[1:4]] + [1]) self.vertices.append(v) if data[0] == "f": face = [int(i.split("/")[0]) - 1 for i in data[1:]] if len(face) == 4: self.faces.append([face[0], face[1], face[2]]) self.faces.append([face[0], face[2], face[3]]) elif len(face) == 3: self.faces.append([face[0], face[1], face[2]]) else: print( f"Found a face with {len(face)} edges!!!") self.vertices = np.asarray(self.vertices) data = ObjectData() if not is_room and data.get_alignment_matrix(modelId) is not None: self.transform(data.get_alignment_matrix(modelId)) else: with open(f"{data_dir}/object/{modelId}/vertices.pkl", "rb") as f: self.vertices = pickle.load(f) with open(f"{data_dir}/object/{modelId}/faces.pkl", "rb") as f: self.faces = pickle.load(f) if mirror: t = np.asarray([[-1, 0, 0, 0], \ [0, 1, 0, 0], \ [0, 0, 1, 0], \ [0, 0, 0, 1]]) self.transform(t) self.modelId = modelId + "_mirror" else: self.modelId = modelId
if args.use_count: fc = FullyConnected(2048 + num_categories, num_categories + 3) else: fc = FullyConnected(2048, num_categories + 3) cross_entropy = nn.CrossEntropyLoss() softmax = nn.Softmax(dim=1) LOG('Converting to CUDA...') model.cuda() fc.cuda() cross_entropy.cuda() softmax.cuda() LOG('Building dataset...') train_dataset = LocationDataset(data_root_dir=utils.get_data_root_dir(), data_dir=args.data_dir, scene_indices=(0, args.train_size), p_auxiliary=args.p_auxiliary, ablation=args.ablation) #Size of validation set is 160 by default validation_dataset = LocationDataset( data_root_dir=utils.get_data_root_dir(), data_dir=args.data_dir, scene_indices=(args.train_size, args.train_size + 160), seed=42, p_auxiliary=0, #Only tests positive examples in validation ablation=args.ablation) LOG('Building data loader...') train_loader = torch.utils.data.DataLoader(train_dataset,
def __init__(self, index=0, id_=None, house_json=None, file_dir=None, include_support_information=True, include_arch_information=True): """ Get a set of rooms from the house which satisfies a certain criteria Parameters ---------- index (int): The index of the house among all houses sorted in alphabetical order default way of loading a house id_ (string, optional): If set, then the house with the specified directory name is chosen house_json(json, optional): If set, then the specified json object is used directly to initiate the house file_dir (string, optional): If set, then the json pointed to by file_dir will be loaded include_support_information(bool): If true, then support information is loaded from data/house_relations might not be available, so defaults to False include_arch_information (bool): If true, then arch information is loaded from data/wall might not be available, so defaults to False """ data_dir = utils.get_data_root_dir() if house_json is None: if file_dir is None: house_dir = data_dir + "/data/house/" if id_ is None: houses = dict(enumerate(os.listdir(house_dir))) self.__dict__ = json.loads( open(house_dir + houses[index] + "/house.json", 'r').read()) else: self.__dict__ = json.loads( open(house_dir + id_ + "/house.json", 'r').read()) else: self.__dict__ = json.loads(open(file_dir, 'r').read()) else: self.__dict__ = house_json self.filters = [] self.levels = [Level(l, self) for l in self.levels] self.rooms = [r for l in self.levels for r in l.rooms] self.nodes = [n for l in self.levels for n in l.nodes] self.node_dict = { id_: n for l in self.levels for id_, n in l.node_dict.items() } if include_support_information: house_stats_dir = data_dir + "/data/house_relations/" stats = json.loads( open(house_stats_dir + self.id + "/" + self.id + ".stats.json", 'r').read()) supports = [(s["parent"], s["child"]) for s in stats["relations"]["support"]] for parent, child in supports: if child not in self.node_dict: print( f'Warning: support relation {supports} involves not present {child} node' ) continue if "f" in parent: self.get_node(child).parent = "Floor" elif "c" in parent: self.get_node(child).parent = "Ceiling" elif len(parent.split("_")) > 2: self.get_node(child).parent = "Wall" else: if parent not in self.node_dict: print( f'Warning: support relation {supports} involves not present {parent} node' ) continue self.get_node(parent).child.append(self.get_node(child)) self.get_node(child).parent = self.get_node(parent) if include_arch_information: house_arch_dir = data_dir + '/data/wall/' arch = json.loads( open(house_arch_dir + self.id + '/' + self.id + '.arch.json', 'r').read()) default_depth = arch["defaults"]["Wall"]["depth"] extra_height = arch["defaults"]["Wall"]["extraHeight"] self.walls = [w for w in arch['elements'] if w['type'] == 'Wall'] r_dict = {r.original_id: r for r in self.rooms} for wall in self.walls: if "depth" not in wall: wall["depth"] = default_depth wall["height"] += extra_height rid = wall['roomId'] if rid in r_dict: r_dict[rid].walls.append(Wall(wall, len(r_dict[rid].walls))) def wall_adjacent(wa, wb): def f(pa, pb): return np.linalg.norm(np.asarray(pa) - np.asarray(pb)) < 1e-3 return f(wa.points[0], wb.points[0]) \ or f(wa.points[0], wb.points[1]) \ or f(wa.points[1], wb.points[0]) \ or f(wa.points[1], wb.points[1]) for r in self.rooms: N = len(r.walls) for i in range(N): for j in range(i + 1, N): if wall_adjacent(r.walls[i], r.walls[j]): r.walls[i].adjacent.append(j) r.walls[j].adjacent.append(i) if len(r.walls) > 2: r.closed_wall = True else: r.closed_wall = False if r.closed_wall: for wall in r.walls: if len(wall.adjacent) != 2: r.closed_wall = False if r.closed_wall: visited = [0] cur = 0 while True: if r.walls[cur].adjacent[0] not in visited: cur = r.walls[cur].adjacent[0] visited.append(cur) elif r.walls[cur].adjacent[1] not in visited: cur = r.walls[cur].adjacent[1] visited.append(cur) else: break if len(visited) < len(r.walls): r.closed_wall = False
""" Creates all the necessary dataset from raw SUNCG Make sure to read README IMPORTANT: make sure you don't have a directory named `temp` under SCENESYNTH_ROOT_PATH, since that will be removed relentlessly """ #Didn't implement any checkpoints, comment out parts as you wish... import shutil from utils import get_data_root_dir from data.object import parse_objects root_dir = get_data_root_dir() parse_objects() from data.dataset import create_dataset print("Creating initial dataset...") create_dataset() from scene_filter import get_filter, run_filter from model_prior import ModelPrior print("Extracting houses with acceptable quality...") filter_description = [("good_house",)] run_filter(filter_description, "main", "good", 1, 1, 0, 1, 0) print() print("Creating bedroom dataset...") filter_description = [("room_type", ["Bedroom"]), ("floor_node",), ("renderable",)] run_filter(filter_description, "good", "temp", 1, 1, 0, 1, 0) filter_description = [("bedroom", "final"), ("collision",)] run_filter(filter_description, "temp", "bedroom", 1, 1, 1, 0, 1) print()
def __init__(self, dest): data_dir = utils.get_data_root_dir() self.dest_dir = f"{data_dir}/{dest}/json" self.count = 0 if not os.path.exists(self.dest_dir): os.makedirs(self.dest_dir)
def __init__(self, index=0, id_=None, house_json=None, file_dir=None, include_support_information=True, include_arch_information=False): """ Get a set of rooms from the house which satisfies a certain criteria Parameters ---------- index (int): The index of the house among all houses sorted in alphabetical order default way of loading a house id_ (string, optional): If set, then the house with the specified directory name is chosen house_json(json, optional): If set, then the specified json object is used directly to initiate the house file_dir (string, optional): If set, then the json pointed to by file_dir will be loaded include_support_information(bool): If true, then support information is loaded from suncg_data/house_relations might not be available, so defaults to False include_arch_information (bool): If true, then arch information is loaded from suncg_data/wall might not be available, so defaults to False """ data_dir = utils.get_data_root_dir() if house_json is None: if file_dir is None: house_dir = data_dir + "/suncg_data/house/" if id_ is None: houses = dict(enumerate(os.listdir(house_dir))) self.__dict__ = json.loads( open(house_dir + houses[index] + "/house.json", 'r').read()) else: self.__dict__ = json.loads( open(house_dir + id_ + "/house.json", 'r').read()) else: self.__dict__ = json.loads(open(file_dir, 'r').read()) else: self.__dict__ = house_json self.filters = [] self.levels = [Level(l, self) for l in self.levels] self.rooms = [r for l in self.levels for r in l.rooms] self.nodes = [n for l in self.levels for n in l.nodes] self.node_dict = { id_: n for l in self.levels for id_, n in l.node_dict.items() } if include_support_information: house_stats_dir = data_dir + "/suncg_data/house_relations/" stats = json.loads( open(house_stats_dir + self.id + "/" + self.id + ".stats.json", 'r').read()) supports = [(s["parent"], s["child"]) for s in stats["relations"]["support"]] for parent, child in supports: if child not in self.node_dict: print( f'Warning: support relation {supports} involves not present {child} node' ) continue if "f" in parent: self.get_node(child).parent = "Floor" elif "c" in parent: self.get_node(child).parent = "Ceiling" elif len(parent.split("_")) > 2: self.get_node(child).parent = "Wall" else: if parent not in self.node_dict: print( f'Warning: support relation {supports} involves not present {parent} node' ) continue self.get_node(parent).child.append(self.get_node(child)) self.get_node(child).parent = self.get_node(parent) if include_arch_information: house_arch_dir = data_dir + '/suncg_data/wall/' arch = json.loads( open(house_arch_dir + self.id + '/' + self.id + '.arch.json', 'r').read()) self.walls = [w for w in arch['elements'] if w['type'] == 'Wall']
def office_filter(version, room_size, second_tier, source): data_dir = utils.get_data_root_dir() with open(f"{data_dir}/{source}/coarse_categories_frequency", "r") as f: coarse_categories_frequency = ([s[:-1] for s in f.readlines()]) coarse_categories_frequency = [ s.split(" ") for s in coarse_categories_frequency ] coarse_categories_frequency = dict([ (a, int(b)) for (a, b) in coarse_categories_frequency ]) category_map = ObjectCategories() if version == "final": filtered, rejected, door_window = GlobalCategoryFilter.get_filter() rejected += ["gym_equipment"] with open(f"{data_dir}/{source}/final_categories_frequency", "r") as f: frequency = ([s[:-1] for s in f.readlines()]) frequency = [s.split(" ") for s in frequency] frequency = dict([(a, int(b)) for (a, b) in frequency]) def node_criteria(node, room): category = category_map.get_final_category(node.modelId) if category in filtered: return False return True def room_criteria(room, house): node_count = 0 for node in room.nodes: category = category_map.get_final_category(node.modelId) if category in rejected: return False if not category in door_window: node_count += 1 t = np.asarray(node.transform).reshape((4, 4)).transpose() a = t[0][0] b = t[0][2] c = t[2][0] d = t[2][2] xscale = (a**2 + c**2)**0.5 yscale = (b**2 + d**2)**0.5 zscale = t[1][1] if not 0.8 < xscale < 1.2: #Reject rooms where any object is scaled by too much return False if not 0.8 < yscale < 1.2: return False if not 0.8 < zscale < 1.2: return False if frequency[category] < 200: return False if node_count < 4 or node_count > 20: return False return True elif version == "latent": filtered, rejected, door_window, second_tier_include = \ GlobalCategoryFilter.get_filter_latent() rejected += ["gym_equipment", "small_refrigerator", "glass", "bottle", "computer", \ "trash_can", "chair", "dining_table"] filtered, rejected, door_window, second_tier_include = \ set(filtered), set(rejected), set(door_window), set(second_tier_include) with open(f"{data_dir}/{source}/final_categories_frequency", "r") as f: frequency = ([s[:-1] for s in f.readlines()]) frequency = [s.split(" ") for s in frequency] frequency = dict([(a, int(b)) for (a, b) in frequency]) def node_criteria(node, room): category = category_map.get_final_category(node.modelId) if category in door_window: return True if category in filtered: return False if second_tier: if node.zmin - room.zmin > 0.1 and \ (category not in second_tier_include or node.parent is None): return False if node.parent: if isinstance(node.parent, Node) and node.zmin < node.parent.zmax - 0.1: return False node_now = node while isinstance(node_now, Node) and node_now.parent: node_now = node_now.parent if node_now != "Floor": return False else: if node.zmin - room.zmin > 0.1: return False if category in ["vase", "console", "book"]: return False #Quick filter for second-tier non ceiling mount #if node.zmin - room.zmin < 0.1: # return False #else: # if node.zmax - room.zmax > -0.2: # return False return True def room_criteria(room, house): if not room.closed_wall: return False if room.height > 4: return False if room.length > room_size: return False if room.width > room_size: return False floor_node_count = 0 node_count = 0 scaled = False #dirty fix! for i in range(5): room.nodes = [node for node in room.nodes if not \ ((node.parent and isinstance(node.parent, Node) and \ (node.parent) not in room.nodes)) ] for node in room.nodes: category = category_map.get_final_category(node.modelId) if category in rejected: return False if not category in door_window: node_count += 1 if node.zmin - room.zmin < 0.1: floor_node_count += 1 t = np.asarray(node.transform).reshape((4, 4)).transpose() a = t[0][0] b = t[0][2] c = t[2][0] d = t[2][2] xscale = (a**2 + c**2)**0.5 yscale = (b**2 + d**2)**0.5 zscale = t[1][1] if not 0.9 < xscale < 1.1: #Reject rooms where any object is scaled by too much return False if not 0.9 < yscale < 1.1: return False if not 0.9 < zscale < 1.1: return False #if not 0.99<xscale<1.01: # scaled = True #if not 0.99<yscale<1.01: # scaled = True #if not 0.99<zscale<1.01: # scaled = True t[0][0] /= xscale t[0][2] /= yscale t[2][0] /= xscale t[2][2] /= yscale t[1][1] /= zscale node.transform = list(t.transpose().flatten()) if frequency[category] < 200: return False #if not scaled: #return False if floor_node_count < 4 or node_count > 20: return False return True else: raise NotImplementedError dataset_f = DatasetFilter(room_filters=[room_criteria], node_filters=[node_criteria]) return dataset_f