Exemplo n.º 1
0
def str_lower(x):
    """Converts string samples to lower case.

    :returns: an expression containing the converted strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.lower()
    Expression = str_lower(text)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    something
    1  very pretty
    2    is coming
    3          our
    4         way.
    """
    sl = _to_string_sequence(x).lower()
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 2
0
def str_lstrip(x, to_strip=None):
    """Remove leading characters from a string sample.

    :param str to_strip: The string to be removed
    :returns: an expression containing the modified string column.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.lstrip(to_strip='very ')
    Expression = str_lstrip(text, to_strip='very ')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0  Something
    1     pretty
    2  is coming
    3        our
    4       way.
    """
    # in c++ we give empty string the same meaning as None
    sl = _to_string_sequence(x).lstrip('' if to_strip is None else to_strip) if to_strip != '' else x
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 3
0
def str_center(x, width, fillchar=' '):
    """ Fills the left and right side of the strings with additional characters, such that the sample has a total of `width`
    characters.

    :param int width: The total number of characters of the resulting string sample.
    :param str fillchar: The character used for filling.
    :returns: an expression containing the filled strings.

    Example:
    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.center(width=11, fillchar='!')
    Expression = str_center(text, width=11, fillchar='!')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0  !Something!
    1  very pretty
    2  !is coming!
    3  !!!!our!!!!
    4  !!!!way.!!!
    """
    sl = _to_string_sequence(x).pad(width, fillchar, True, True)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 4
0
def str_ljust(x, width, fillchar=' '):
    """Fills the right side of string samples with a specified character such that the strings are right-hand justified.

    :param int width: The minimal width of the strings.
    :param str fillchar: The character used for filling.
    :returns: an expression containing the filled strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.ljust(width=10, fillchar='!')
    Expression = str_ljust(text, width=10, fillchar='!')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0   Something!
    1  very pretty
    2   is coming!
    3   our!!!!!!!
    4   way.!!!!!!
    """
    sl = _to_string_sequence(x).pad(width, fillchar, False, True)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 5
0
def str_zfill(x, width):
    """Pad strings in a column by prepanding "0" characters.

    :param int width: The minimum length of the resulting string. Strings shorter less than `width` will be prepended with zeros.
    :returns: an expression containing the modified strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.zfill(width=12)
    Expression = str_zfill(text, width=12)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0  000Something
    1  0very pretty
    2  000is coming
    3  000000000our
    4  00000000way.
    """
    sl = _to_string_sequence(x).pad(width, '0', True, False)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 6
0
def str_capitalize(x):
    """Capitalize the first letter of a string sample.

    :returns: an expression containing the capitalized strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.capitalize()
    Expression = str_capitalize(text)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    Something
    1  Very pretty
    2    Is coming
    3          Our
    4         Way.
    """
    sl = _to_string_sequence(x).capitalize()
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 7
0
def str_upper(x):
    """Converts all strings in a column to uppercase.

    :returns: an expression containing the converted strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.


    >>> df.text.str.upper()
    Expression = str_upper(text)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    SOMETHING
    1  VERY PRETTY
    2    IS COMING
    3          OUR
    4         WAY.

    """
    sl = _to_string_sequence(x).upper()
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 8
0
def str_replace(x, pat, repl, n=-1, flags=0, regex=False):
    """Replace occurences of a pattern/regex in a column with some other string.

    :param str pattern: string or a regex pattern
    :param str replace: a replacement string
    :param int n: number of replacements to be made from the start. If -1 make all replacements.
    :param int flags: ??
    :param bool regex: If True, ...?
    :returns: an expression containing the string replacements.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.replace(pat='et', repl='__')
    Expression = str_replace(text, pat='et', repl='__')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    Som__hing
    1  very pr__ty
    2    is coming
    3          our
    4         way.
    """
    sl = _to_string_sequence(x).replace(pat, repl, n, flags, regex)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 9
0
def str_repeat(x, repeats):
    """Duplicate each string in a column.

    :param int repeats: number of times each string sample is to be duplicated.
    :returns: an expression containing the duplicated strings

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.repeat(3)
    Expression = str_repeat(text, 3)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0        SomethingSomethingSomething
    1  very prettyvery prettyvery pretty
    2        is comingis comingis coming
    3                          ourourour
    4                       way.way.way.
    """
    sl = _to_string_sequence(x).repeat(repeats)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 10
0
def str_pad(x, width, side='left', fillchar=' '):
    """Pad strings in a given column.

    :param int width: The total width of the string
    :param str side: If 'left' than pad on the left, if 'right' than pad on the right side the string.
    :param str fillchar: The character used for padding.
    :returns: an expression containing the padded strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.pad(width=10, side='left', fillchar='!')
    Expression = str_pad(text, width=10, side='left', fillchar='!')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0   !Something
    1  very pretty
    2   !is coming
    3   !!!!!!!our
    4   !!!!!!way.
    """
    sl = _to_string_sequence(x).pad(width, fillchar, side in ['left', 'both'], side in ['right', 'both'])
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 11
0
def str_strip(x, to_strip=None):
    """Removes leading and trailing characters.

    Strips whitespaces (including new lines), or a set of specified
    characters from each string saple in a column, both from the left
    right sides.

    :param str to_strip: The characters to be removed. All combinations of the characters will be removed.
                         If None, it removes whitespaces.
    :param returns: an expression containing the modified string samples.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.strip(to_strip='very')
    Expression = str_strip(text, to_strip='very')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0  Something
    1      prett
    2  is coming
    3         ou
    4       way.
    """
    # in c++ we give empty string the same meaning as None
    sl = _to_string_sequence(x).strip('' if to_strip is None else to_strip) if to_strip != '' else x
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 12
0
def str_get(x, i):
    """Extract a character from each sample at the specified position from a string column.
    Note that if the specified position is out of bound of the string sample, this method returns '', while pandas retunrs nan.

    :param int i: The index location, at which to extract the character.
    :returns: an expression containing the extracted characters.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.get(5)
    Expression = str_get(text, 5)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    h
    1    p
    2    m
    3
    4
    """
    x = _to_string_sequence(x)
    if i == -1:
        sl = x.slice_string_end(-1)
    else:
        sl = x.slice_string(i, i+1)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 13
0
def str_join(x, sep):
    """Same as find (difference with pandas is that it does not raise a ValueError)"""
    sl = _to_string_list_sequence(x).join(sep)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 14
0
def format(x, format):
    """Uses http://www.cplusplus.com/reference/string/to_string/ for formatting"""
    # don't change the dtype, otherwise for each block the dtype may be different (string length)
    sl = vaex.strings.format(x, format)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
Exemplo n.º 15
0
def to_string(x):
    # don't change the dtype, otherwise for each block the dtype may be different (string length)
    sl = vaex.strings.to_string(x)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)