Exemplo n.º 1
0
    def __init__(
        self,
        type_name: str,
        uuid: Hashable,
        save_dir: str,
        subdir_split: Optional[int] = None,
        pickle_protocol: int = -1
    ):
        super(FileClassificationElement, self).__init__(type_name, uuid)

        # TODO: Remove absolute path conversion (allow relative)
        self.save_dir = osp.abspath(osp.expanduser(save_dir))
        self.pickle_protocol = pickle_protocol

        # Saving components
        self.subdir_split = subdir_split
        if subdir_split and int(subdir_split) > 0:
            self.subdir_split = subdir_split = int(subdir_split)
            # Using all but the last split segment. This is so we don't create
            # a whole bunch of directories with a single element in them.
            save_dir = osp.join(self.save_dir,
                                *partition_string(str(uuid).replace('-', ''),
                                                  subdir_split)[:subdir_split-1]
                                )
        else:
            save_dir = self.save_dir

        self.filepath = osp.join(save_dir,
                                 "%s.%s.classification.pickle"
                                 % (self.type_name, str(self.uuid)))
Exemplo n.º 2
0
    def __init__(self,
                 type_name,
                 uuid,
                 save_dir,
                 subdir_split=None,
                 pickle_protocol=-1):
        """
        Initialize a file-base descriptor element.

        :param type_name: Type of classification. This is usually the name of
            the classifier that generated this result.
        :type type_name: str

        :param uuid: uuid for this classification
        :type uuid: collections.abc.Hashable

        :param save_dir: Directory to save this element's contents. If this path
            is relative, we interpret as relative to the current working
            directory.
        :type save_dir: str | unicode

        :param subdir_split: If a positive integer, this will cause us to store
            the vector file in a subdirectory under the ``save_dir`` that was
            specified. The integer value specifies the number of splits that we
            will make in the stringification of this descriptor's UUID. If
            there happen to be dashes in this stringification, we will
            remove them (as would happen if given an uuid.UUID instance as
            the uuid element).
        :type subdir_split: None | int

        :param pickle_protocol: Pickling protocol to use. We will use -1 by
            default (latest version, probably binary).
        :type pickle_protocol: int

        """
        super(FileClassificationElement, self).__init__(type_name, uuid)

        # TODO: Remove absolute path conversion (allow relative)
        self.save_dir = osp.abspath(osp.expanduser(save_dir))
        self.pickle_protocol = pickle_protocol

        # Saving components
        self.subdir_split = subdir_split
        if subdir_split and int(subdir_split) > 0:
            self.subdir_split = subdir_split = int(subdir_split)
            # Using all but the last split segment. This is so we don't create
            # a whole bunch of directories with a single element in them.
            save_dir = osp.join(
                self.save_dir,
                *partition_string(str(uuid).replace('-', ''),
                                  subdir_split)[:subdir_split - 1])
        else:
            save_dir = self.save_dir

        self.filepath = osp.join(
            save_dir,
            "%s.%s.classification.pickle" % (self.type_name, str(self.uuid)))
Exemplo n.º 3
0
    def _containing_dir(self, uuid: Hashable) -> str:
        """
        Return the containing directory for something with the given UUID value
        """
        if not self._uuid_chunk:
            # No sub-directory storage configured
            return self._root_dir

        str_uuid = str(uuid)
        # TODO(paul.tunison): Modify uuid string if to short for set UUID chunk.
        #     e.g. if uuid is the integer 1 and chunk size is 10, we should
        #     convert strigified result to be at least length 10?
        #     Do this in _fp_for_uuid method?
        leading_parts = partition_string(str_uuid, self._uuid_chunk)[:-1]
        return osp.join(self._root_dir, *leading_parts)
Exemplo n.º 4
0
    def __init__(self,
                 type_str: str,
                 uuid: Hashable,
                 save_dir: str,
                 subdir_split: Optional[int] = None):
        """
        Initialize a file-base descriptor element.

        :param type_str: Type of descriptor. This is usually the name of the
            content descriptor that generated this vector.
        :param uuid: uuid for this descriptor
        :param save_dir: Directory to save this element's contents. If this path
            is relative, we interpret as relative to the current working
            directory.
        :param subdir_split: If a positive integer and greater than 1, this will
            cause us to store the vector file in a subdirectory under the
            ``save_dir`` based on our ``uuid``. The integer value specifies the
            number of splits that we will make in the stringification of this
            descriptor's UUID. The last split component is left off when
            determining the save directory (thus the >1 above).

            Dashes are stripped from this string (as would happen if given an
            uuid.UUID instance as the uuid element).

        """
        super(DescriptorFileElement, self).__init__(type_str, uuid)
        self._save_dir = osp.abspath(osp.expanduser(save_dir))
        self._subdir_split = subdir_split

        # Generate filepath from parameters
        if self._subdir_split and int(self._subdir_split) > 1:
            # TODO: If uuid is an integer, create string with left-padded 0's
            #       to expand out the "length" before partitioning.
            save_dir = osp.join(
                self._save_dir,
                *partition_string(
                    str(self.uuid()).replace('-', ''),
                    int(self._subdir_split))[:-1])
        else:
            save_dir = self._save_dir
        self._vec_filepath = osp.join(
            save_dir, "%s.%s.vector.npy" % (self.type(), str(self.uuid())))