Exemplo n.º 1
0
    def get_data(self, file_format=MediaType.AVRO.value):
        # type: (str) -> Iterator[Resource]
        """Get the processed delivery data

        Args:
            file_format (str): File format of delivery.

        Returns:
            list (:obj:`crux.models.Resource`): List of resources.
        """
        params = {}

        params["delivery_resource_format"] = file_format

        response = self.connection.api_call(
            "GET", ["deliveries", self.dataset_id, self.id, "data"],
            params=params)

        resource_list = response.json()["resources"]

        if resource_list:
            for resource in resource_list:
                obj = File(raw_model={"resourceId": resource["resource_id"]})
                obj.connection = self.connection
                obj.refresh()
                yield obj
Exemplo n.º 2
0
    def get_raw(self, use_cache=None):
        # type: (bool) -> Iterator[Resource]
        """Get the raw delivery data

        Args:
            use_cache (bool): Preference to set cached response

        Returns:
            list (:obj:`crux.models.Resource`): List of resources.
        """
        params = {}
        if use_cache is not None:
            params["useCache"] = use_cache

        response = self.connection.api_call(
            "GET", ["deliveries", self.dataset_id, self.id, "raw"],
            params=params)

        resource_list = response.json()["resource_ids"]

        if resource_list:
            for resource in resource_list:
                obj = File(raw_model={"resourceId": resource},
                           connection=self.connection)
                obj.refresh()
                yield obj
    def download_files(self, folder, local_path, only_use_crux_domains=None):
        # type: (str, str, bool) -> List[str]
        """Downloads the resources recursively.

        Args:
            folder (str): Crux Dataset Folder from where the
                file resources should be recursively downloaded.
            local_path (str): Local OS Path where the file resources should be downloaded.
            only_use_crux_domains (bool): True if content is required to be downloaded
                from Crux domains else False.

        Returns:
            list (:obj:`str`): List of location of download files.

        Raises:
            ValueError: If Folder or local_path is None.
            OSError: If local_path is an invalid directory location.
        """
        if folder is None:
            raise ValueError("Folder value shouldn't be empty")

        if local_path is None:
            raise ValueError("Local Path value shouldn't be empty")

        if not os.path.exists(local_path) and not os.path.isdir(local_path):
            raise OSError("local_path is an invalid directory location")

        local_file_list = []  # type: List[str]

        resources = self._list_resources(
            sort=None,
            folder=folder,
            offset=0,
            limit=None,
            include_folders=True,
            model=Resource,
        )

        for resource in resources:
            resource_path = posixpath.join(folder, resource.name)
            resource_local_path = os.path.join(local_path, resource.name)
            if resource.type == "folder":
                os.mkdir(resource_local_path)
                log.debug("Created local directory %s", resource_local_path)
                local_file_list += self.download_files(
                    folder=resource_path,
                    local_path=resource_local_path,
                    only_use_crux_domains=only_use_crux_domains,
                )
            elif resource.type == "file":
                file_resource = File.from_dict(resource.to_dict(),
                                               connection=self.connection)
                file_resource.download(
                    resource_local_path,
                    only_use_crux_domains=only_use_crux_domains)
                local_file_list.append(resource_local_path)
                log.debug("Downloaded file at %s", resource_local_path)

        return local_file_list
Exemplo n.º 4
0
    def get_raw(self):
        # type: () -> Iterator[Resource]
        """Get the raw delivery data

        Returns:
            list (:obj:`crux.models.Resource`): List of resources.
        """
        response = self.connection.api_call(
            "GET", ["deliveries", self.dataset_id, self.id, "raw"])

        resource_list = response.json()["resource_ids"]

        if resource_list:
            for resource in resource_list:
                obj = File(raw_model={"resourceId": resource},
                           connection=self.connection)
                obj.refresh()
                yield obj
Exemplo n.º 5
0
    def create_file(self, path, tags=None, description=None):
        # type: (str, List[str], str) -> File
        """Creates File resource in Dataset.

        Args:
            path (str): Path of the file resource.
            tags (:obj:`list` of :obj:`str`): Tags of the file resource.
                Defaults to None.
            description (str): Description of the file resource.
                Defaults to None.

        Returns:
            crux.models.File: File Object.
        """

        headers = {
            "Content-Type": "application/json",
            "Accept": "application/json"
        }

        tags = tags if tags else []

        file_name, folder = split_posixpath_filename_dirpath(path)

        file_resource = File(name=file_name,
                             type="file",
                             tags=tags,
                             description=description)
        file_resource.folder = folder

        return self.connection.api_call(
            "POST",
            ["datasets", self.id, "resources"],
            params=file_resource.to_dict(),
            model=File,
            headers=headers,
        )
    def create_file(self, path, tags=None, description=None):
        # type: (str, List[str], str) -> File
        """Creates File resource in Dataset.

        Args:
            path (str): Path of the file resource.
            tags (:obj:`list` of :obj:`str`): Tags of the file resource.
                Defaults to None.
            description (str): Description of the file resource.
                Defaults to None.

        Returns:
            crux.models.File: File Object.
        """

        headers = Headers({
            "content-type": "application/json",
            "accept": "application/json"
        })

        tags = tags if tags else []

        file_name, folder = split_posixpath_filename_dirpath(path)

        raw_model = {
            "name": file_name,
            "type": "file",
            "tags": tags,
            "description": description,
            "folder": folder,
        }

        file_resource = File(raw_model=raw_model)

        return self.connection.api_call(
            "POST",
            ["datasets", self.id, "resources"],
            json=file_resource.raw_model,
            model=File,
            headers=headers,
        )
Exemplo n.º 7
0
def get_resource_object(resource_type, data, connection=None):
    # type: (str, Dict[str, Any], CruxClient) -> Union[File, Folder]
    """Creates resource object based on its type.

    Args:
        resource_type (str): Type of resource which needs to be created.
        data (dict): Dictionary which contains serialized resource data.
        connection (CruxClient): Connection Object. Defaults to None.

    Returns:
        crux.models.Resource: Resource or its Child Object.

    Raises:
        TypeError: If it is unable to detect resource type.
    """
    if resource_type == "file":
        return File.from_dict(data, connection=connection)
    elif resource_type == "folder":
        return Folder.from_dict(data, connection=connection)
    else:
        raise TypeError("Invalid Resource Type")