def add_entries(self, entries: [DatasetEntryVO]): try: entries = [vo.to_array() for vo in entries] for batch in chunked(entries, 100): DatasetEntryEntity.insert_many(batch).execute() except IntegrityError as ex: raise Exception( "one or more files have already been loaded into this dataset")
def get_label(self, entity_id: int): dse = DatasetEntryEntity.alias() lbl = LabelEntity.alias() query = (dse.select(lbl.name).join( lbl, on=(dse.label == lbl.id)).where(dse.id == entity_id)) result = list(query.dicts().execute()) return result[0]["name"] if len(result) > 0 else None
def fetch_entries_for_classification(self, ds_id): en: DatasetEntryEntity = DatasetEntryEntity.alias("en") lbl: LabelEntity = LabelEntity.alias("lbl") query = (en.select(en.file_path, lbl.name).join(lbl, JOIN.INNER, on=en.label == lbl.id).where( en.dataset == ds_id and en.label.is_null(False))) cursor = query.dicts().execute() return list(cursor)
def fetch_labels(self, dataset_id: int = None): e = DatasetEntryEntity.alias("e") l = LabelEntity.alias("l") query = (e.select( e.file_path.alias("image"), l.name.alias("label")).join( l, on=(e.label == l.id)).where((e.dataset_id == dataset_id) & (e.label.is_null(False)))) cursor = query.dicts().execute() result = [] for row in cursor: result.append(row) return result
def find_by_path(self, ds_id, image_path): query = (DatasetEntryEntity.select().where( (DatasetEntryEntity.dataset == ds_id) & (DatasetEntryEntity.file_path.endswith( os.path.split(image_path)[1])))) # print(query) cursor = query.dicts().execute() result = list(cursor) vo = DatasetEntryVO() if len(result) > 0: row = result[0] for k, v in row.items(): setattr(vo, k, v) return vo return None
def fetch_all_with_size(self): ds = DatasetEntity.alias("ds") m = DatasetEntryEntity.alias("m") query = (ds.select(ds.id, ds.name, ds.data_type, fn.SUM(m.file_size).alias("size"), fn.COUNT(ds.id).alias("count")).join( m, JOIN.LEFT_OUTER, on=ds.id == m.dataset_id).group_by(ds.id)) query_results = list(query.dicts().execute()) result = [] for ds in query_results: vo = DatasetVO() result.append(vo) for k, v in ds.items(): setattr(vo, k, v) return result
def fetch_all_by_dataset(self, dataset_id: int = None): ann = AnnotationEntity.alias("a") ds_entry = DatasetEntryEntity.alias("i") lbl = LabelEntity.alias("l") query = (ann.select(ds_entry.file_path.alias("image"), ann.kind.alias("annot_kind"), ann.points.alias("annot_points"), lbl.name.alias("label_name"), lbl.color.alias("label_color")).join( ds_entry, on=(ann.entry == ds_entry.id)).join( lbl, on=(ann.label == lbl.id), join_type=JOIN.LEFT_OUTER).where( ds_entry.dataset == dataset_id)) cursor = query.dicts().execute() result = [] for row in cursor: result.append(row) return result
def delete_entry(self, id): return DatasetEntryEntity.delete_by_id(id)
def tag_entries(self, entries: [DatasetEntryVO], label: LabelVO): ids = [vo.id for vo in entries] for chunk in MiscUtilities.chunk(ids, 100): rows = (DatasetEntryEntity.update(label=label.id).where( DatasetEntryEntity.id.in_(list(chunk))).execute())
def delete(self, id: int): result = LabelEntity.delete_by_id(id) DatasetEntryEntity.update(label=None).where(DatasetEntryEntity.label == id).execute() return result