def choice(self, size=None, indexes=None):
        """
        Return a random sample based on this attribute's probability.
        If indexes and n are both set, ignore n.

        Parameters
        ----------
        size : int
            size of random sample

        indexes : array-like
            array of indexes in bins
        """
        if indexes is None:
            if size is None:
                size = len(self)
            indexes = Series(
                np.random.choice(len(self.prs), size=size, p=self.prs))
        column = indexes.map(lambda x: self._sampling_bins(x))
        if self.atype == 'datetime':
            if not self.categorical:
                column = column.map(self._date_formatter)
        elif self.atype == 'integer':
            column = column.round().astype(int)
        elif self.atype == 'string':
            if not self.categorical:
                column = column.map(lambda x: utils.randomize_string(int(x)))
        return column
    def random(self, size=None):
        """
        Return an random array with same length (usually used for
        non-categorical attribute).
        """
        if size is None:
            size = len(self)
        if self._min == self._max:
            rands = np.ones(size) * self._min
        else:
            rands = np.arange(self._min, self._max,
                              (self._max - self._min) / size)

        np.random.shuffle(rands)
        if self.atype == 'string':
            if self._min == self._max:
                length = self._min
            else:
                length = np.random.randint(self._min, self._max)
            vectorized = np.vectorize(lambda x: utils.randomize_string(length))
            rands = vectorized(rands)
        elif self.atype == 'integer':
            rands = list(map(int, rands))
        elif self.atype == 'datetime':
            rands = list(map(self._date_formatter, rands))
        return Series(rands)
    def choice(self, size=None, indexes=None):
        """
        Return a random sample based on this attribute's probability and
        distribution bins (default value is base random distribution bins based
        on its probability).

        Parameters
        ----------
        size : int
            size of random sample

        indexes : array-like
            array of indexes in distribution bins
        """
        if indexes is None:
            size = size or self.size
            indexes = Series(
                np.random.choice(len(self.prs), size=size, p=self.prs))
        column = indexes.map(self._random_sample_at)
        if self.type == 'datetime':
            if not self.categorical:
                column = column.map(self._date_formatter)
        elif self.type == 'float':
            column = column.round(self._decimals)
        elif self.type == 'integer':
            column = column.round().astype(int)
        elif self.type == 'string':
            if not self.categorical:
                column = column.map(lambda x: utils.randomize_string(int(x)))
        return column
    def random(self, size=None):
        """
        Return an random array with same length (usually used for
        non-categorical attribute).
        """
        size = size or self.size
        if self.min_ == self.max_:
            rands = np.ones(size) * self.min_
        else:
            rands = np.arange(self.min_, self.max_,
                              (self.max_ - self.min_) / size)

        np.random.shuffle(rands)
        if self.type == 'string':
            if self.min_ == self.max_:
                length = self.min_
            else:
                length = np.random.randint(self.min_, self.max_)
            vectorized = np.vectorize(lambda x: utils.randomize_string(length))
            rands = vectorized(rands)
        elif self.type == 'integer':
            rands = list(map(int, rands))
        elif self.type == 'datetime':
            rands = list(map(self._date_formatter, rands))
        return Series(rands)
def test_set_domain_for_string_attribute():
    strings = list(map(lambda x: randomize_string(5), range(size)))
    attr = Attribute(Series(strings, name='String'), categorical=True)
    bins = attr.bins
    attr.domain = ['a', 'b', 'China', 'USA']
    assert len(bins) + 4 == len(attr.bins)
def test_string_attribute():
    strings = list(map(lambda x: randomize_string(5), range(size)))
    attr = Attribute(Series(strings, name='String'), categorical=True)
    assert attr.atype == 'string'
    assert attr._min == 5
    assert attr.categorical
def test_random_strings():
    strings = list(map(lambda x: randomize_string(5), range(size)))
    attr = Attribute(Series(strings, name='String'))
    randoms = attr.random()
    assert len(randoms) == size
def test_choice_strings():
    strings = list(map(lambda x: randomize_string(5), range(size)))
    attr = Attribute(Series(strings, name='String'))
    choices = attr.choice()
    assert len(choices) == size