def __init__(self, data_dir, json_file, transform=None): """ :param data_dir: the base directory where the data is located :type data_dir: str :param json_file: the name of the json file containing the data :type json_file: str :param transform: a transform object (can be the result of a composition of transforms) :type transform: callable .. code-block:: python from eisen.datasets import JsonDataset dset = JsonDataset( data_dir='/abs/path/to/data', json_file='/abs/path/to/file.json', transform=transform ) <json> [ {"name": "json_file", "type": "string", "value": ""} ] </json> """ json_file = os.path.join(data_dir, json_file) self.json_dataset = read_json_from_file(json_file) self.transform = transform
def __init__(self, data_dir, json_file, phase, transform=None): """ :param data_dir: the base directory where the data is located (dataset location after unzipping) :type data_dir: str :param json_file: the name of the json file containing for the MSD dataset :type json_file: str :param phase: training or test phase as per MSD dataset convention (look at MSD json file) :type phase: string :param transform: a transform object (can be the result of a composition of transforms) :type transform: callable .. code-block:: python from eisen.datasets import MSDDataset dataset = MSDDataset( data_dir='/abs/path/to/data', json_file='/path/to/dataset.json', phase='training', transform=transform, ) <json> [ {"name": "json_file", "type": "string", "value": ""}, {"name": "phase", "type": "string", "value": ["training", "test"]} ] </json> """ json_file = os.path.join(data_dir, json_file) msd_dataset = read_json_from_file(json_file) self.json_dataset = msd_dataset[phase] msd_dataset.pop("training", None) msd_dataset.pop("test", None) if phase == "test": # test images are stored as list of filenames instead of dictionaries. Need to convert that. dset = [] for elem in self.json_dataset: dset.append({"image": elem}) self.json_dataset = dset self.attributes = msd_dataset self.transform = transform
def __init__(self, data_dir, json_file, aws_id=None, aws_secret=None, transform=None): self.s3_client = boto3.client('s3', aws_access_key_id=aws_id, aws_secret_access_key=aws_secret) self.tempdir = tempfile.mkdtemp() json_file = get_file_from_s3(self.s3_client, os.path.join(data_dir, json_file), self.tempdir) self.json_dataset = read_json_from_file(json_file) self.transform = transform
def __init__(self, data_dir, json_file, phase, aws_id=None, aws_secret=None, transform=None): self.s3_client = boto3.client('s3', aws_access_key_id=aws_id, aws_secret_access_key=aws_secret) self.tempdir = tempfile.mkdtemp() json_file = get_file_from_s3(self.s3_client, os.path.join(data_dir, json_file), self.tempdir) msd_dataset = read_json_from_file(json_file) self.json_dataset = msd_dataset[phase] msd_dataset.pop('training', None) msd_dataset.pop('test', None) if phase == 'test': # test images are stored as list of filenames instead of dictionaries. Need to convert that. dset = [] for elem in self.json_dataset: dset.append({'image': elem}) self.json_dataset = dset self.attributes = msd_dataset self.transform = transform