def get_data(self, file_format=MediaType.AVRO.value): # type: (str) -> Iterator[Resource] """Get the processed delivery data Args: file_format (str): File format of delivery. Returns: list (:obj:`crux.models.Resource`): List of resources. """ params = {} params["delivery_resource_format"] = file_format response = self.connection.api_call( "GET", ["deliveries", self.dataset_id, self.id, "data"], params=params) resource_list = response.json()["resources"] if resource_list: for resource in resource_list: obj = File(raw_model={"resourceId": resource["resource_id"]}) obj.connection = self.connection obj.refresh() yield obj
def get_raw(self, use_cache=None): # type: (bool) -> Iterator[Resource] """Get the raw delivery data Args: use_cache (bool): Preference to set cached response Returns: list (:obj:`crux.models.Resource`): List of resources. """ params = {} if use_cache is not None: params["useCache"] = use_cache response = self.connection.api_call( "GET", ["deliveries", self.dataset_id, self.id, "raw"], params=params) resource_list = response.json()["resource_ids"] if resource_list: for resource in resource_list: obj = File(raw_model={"resourceId": resource}, connection=self.connection) obj.refresh() yield obj
def download_files(self, folder, local_path, only_use_crux_domains=None): # type: (str, str, bool) -> List[str] """Downloads the resources recursively. Args: folder (str): Crux Dataset Folder from where the file resources should be recursively downloaded. local_path (str): Local OS Path where the file resources should be downloaded. only_use_crux_domains (bool): True if content is required to be downloaded from Crux domains else False. Returns: list (:obj:`str`): List of location of download files. Raises: ValueError: If Folder or local_path is None. OSError: If local_path is an invalid directory location. """ if folder is None: raise ValueError("Folder value shouldn't be empty") if local_path is None: raise ValueError("Local Path value shouldn't be empty") if not os.path.exists(local_path) and not os.path.isdir(local_path): raise OSError("local_path is an invalid directory location") local_file_list = [] # type: List[str] resources = self._list_resources( sort=None, folder=folder, offset=0, limit=None, include_folders=True, model=Resource, ) for resource in resources: resource_path = posixpath.join(folder, resource.name) resource_local_path = os.path.join(local_path, resource.name) if resource.type == "folder": os.mkdir(resource_local_path) log.debug("Created local directory %s", resource_local_path) local_file_list += self.download_files( folder=resource_path, local_path=resource_local_path, only_use_crux_domains=only_use_crux_domains, ) elif resource.type == "file": file_resource = File.from_dict(resource.to_dict(), connection=self.connection) file_resource.download( resource_local_path, only_use_crux_domains=only_use_crux_domains) local_file_list.append(resource_local_path) log.debug("Downloaded file at %s", resource_local_path) return local_file_list
def get_raw(self): # type: () -> Iterator[Resource] """Get the raw delivery data Returns: list (:obj:`crux.models.Resource`): List of resources. """ response = self.connection.api_call( "GET", ["deliveries", self.dataset_id, self.id, "raw"]) resource_list = response.json()["resource_ids"] if resource_list: for resource in resource_list: obj = File(raw_model={"resourceId": resource}, connection=self.connection) obj.refresh() yield obj
def create_file(self, path, tags=None, description=None): # type: (str, List[str], str) -> File """Creates File resource in Dataset. Args: path (str): Path of the file resource. tags (:obj:`list` of :obj:`str`): Tags of the file resource. Defaults to None. description (str): Description of the file resource. Defaults to None. Returns: crux.models.File: File Object. """ headers = { "Content-Type": "application/json", "Accept": "application/json" } tags = tags if tags else [] file_name, folder = split_posixpath_filename_dirpath(path) file_resource = File(name=file_name, type="file", tags=tags, description=description) file_resource.folder = folder return self.connection.api_call( "POST", ["datasets", self.id, "resources"], params=file_resource.to_dict(), model=File, headers=headers, )
def create_file(self, path, tags=None, description=None): # type: (str, List[str], str) -> File """Creates File resource in Dataset. Args: path (str): Path of the file resource. tags (:obj:`list` of :obj:`str`): Tags of the file resource. Defaults to None. description (str): Description of the file resource. Defaults to None. Returns: crux.models.File: File Object. """ headers = Headers({ "content-type": "application/json", "accept": "application/json" }) tags = tags if tags else [] file_name, folder = split_posixpath_filename_dirpath(path) raw_model = { "name": file_name, "type": "file", "tags": tags, "description": description, "folder": folder, } file_resource = File(raw_model=raw_model) return self.connection.api_call( "POST", ["datasets", self.id, "resources"], json=file_resource.raw_model, model=File, headers=headers, )
def get_resource_object(resource_type, data, connection=None): # type: (str, Dict[str, Any], CruxClient) -> Union[File, Folder] """Creates resource object based on its type. Args: resource_type (str): Type of resource which needs to be created. data (dict): Dictionary which contains serialized resource data. connection (CruxClient): Connection Object. Defaults to None. Returns: crux.models.Resource: Resource or its Child Object. Raises: TypeError: If it is unable to detect resource type. """ if resource_type == "file": return File.from_dict(data, connection=connection) elif resource_type == "folder": return Folder.from_dict(data, connection=connection) else: raise TypeError("Invalid Resource Type")