Exemplo n.º 1
0
def _normalize_matrix(df):
    """
    Normalizes a matrix df to a probability matrix prob_df
    """

    # Validate matrix
    df = validate_matrix(df)

    # Make sure all df values are greater than or equal to zero
    check(all(df.values.ravel() >= 0), 'Some data frame entries are negative.')

    # Check to see if values sum to one
    sums = df.sum(axis=1).values

    # If any sums are close to zero, abort
    check(not any(np.isclose(sums, 0.0)),
          'Some columns in df sum to nearly zero.')

    # Create normalized version of input matrix
    prob_df = df.copy()
    prob_df.loc[:, :] = df.values / sums[:, np.newaxis]

    # Validate and return probability matrix
    prob_df = validate_matrix(prob_df, matrix_type='probability')
    return prob_df
Exemplo n.º 2
0
    def highlight_position(self, p, **kwargs):
        """
        Draws a rectangular box highlighting a specific position.

        parameters
        ----------
        p: (int)
            Single position to highlight.

        **kwargs:
            Other parameters to pass to highlight_position_range()

        returns
        -------
        None
        """

        # validate p
        check(isinstance(p, int), 'type(p) = %s must be of type int' % type(p))

        # to avoid highlighting positions outside of the logo
        #check(0 <= p < len(self.df),
        #      'position p is invalid, must be between 0 and %d' %len(self.df))

        # pass the buck to highlight_position_range
        self.highlight_position_range(pmin=p, pmax=p, **kwargs)
Exemplo n.º 3
0
    def fade_glyphs_in_probability_logo(self, v_alpha0=0.0, v_alpha1=1.0):
        """
        Fades glyphs in probability logo according to value.

        parameters
        ----------

        v_alpha0, v_alpha1: (number in [0,1])
            Matrix values marking values that are rendered using
            alpha=0 and alpha=1, respectively. These values must satisfy
            v_alpha0 < v_alpha1.

        returns
        -------
        None
         """

        # validate alpha0
        check(isinstance(v_alpha0, (float, int)),
              'type(v_alpha0) = %s must be a number' % type(v_alpha0))

        # ensure that v_alpha0 is between 0 and 1
        check(0.0 <= v_alpha0 <= 1.0,
              'v_alpha0 must be between 0 and 1; value is %f.' % v_alpha0)

        # validate alpha1
        check(isinstance(v_alpha1, (float, int)),
              'type(v_alpha1) = %s must be a number' % type(v_alpha1))

        # ensure that v_alpha1 is between 0 and 1
        check(0.0 <= v_alpha1 <= 1.0,
              'v_alpha1 must be between 0 and 1; value is %f' % v_alpha1)

        # check that v_alpha0 < v_alpha1
        check(
            v_alpha0 < v_alpha1, 'must have v_alpha0 < v_alpha1;'
            'here, v_alpha0 = %f and v_alpha1 = %f' % (v_alpha0, v_alpha1))

        # make sure matrix is a probability matrix
        self.df = validate_matrix(self.df, matrix_type='probability')

        # iterate over all positions and characters
        for p in self.ps:
            for c in self.cs:

                # grab both glyph and value
                v = self.df.loc[p, c]
                g = self.glyph_df.loc[p, c]

                # compute new alpha
                if v <= v_alpha0:
                    alpha = 0
                elif v >= v_alpha1:
                    alpha = 1
                else:
                    alpha = (v - v_alpha0) / (v_alpha1 - v_alpha0)

                # Set glyph attributes
                g.set_attributes(alpha=alpha)
Exemplo n.º 4
0
def open_example_datafile(name=None, print_description=True):
    """
    Returns a file handle to an example dataset

    parameters
    ----------

    name: (None or str)
        Name of example matrix.

    print_description: (bool)
        If true, a description of the example matrix will be printed

    returns
    -------

    f: (file handle)
        A handle to the requested file
    """

    # get list of valid data files
    valid_datafiles = list_example_datafiles()

    # check that specified datafile is valid
    check(
        name in valid_datafiles,
        'Matrix "%s" not recognized. Please choose from: \n%s' %
        (name, '\n'.join([repr(x) for x in valid_datafiles])))

    # check that print_description is boolean
    check(
        isinstance(print_description, bool),
        'type(print_description) = %s; must be of type bool ' %
        type(print_description))

    # set datafile file name
    file_name = '%s/%s' % (data_dir, name)
    assert os.path.isfile(file_name), 'File %s does not exist!' % file_name

    # if user wants a description of the datafile, provide it
    if print_description:
        print('Description of example matrix "%s":' % name)
        with open(file_name, 'r') as f:
            lines = f.readlines()
            lines = [l for l in lines if len(l) > 0 and l[0] == '#']
            description = "".join(lines)
            print(description)

    # if file is a gzipped file, use gzip.open()
    if len(file_name) >= 3 and file_name[-3:] == '.gz':
        f = gzip.open(file_name, 'r')

    # otherwise, use regular open()
    else:
        f = open(file_name, 'r')

    # return file handle to user
    return f
Exemplo n.º 5
0
def get_example_matrix(name=None, print_description=True):
    """
    Returns an example matrix from which a logo can be made.

    parameters
    ----------

    name: (None or str)
        Name of example matrix.

    print_description: (bool)
        If true, a description of the example matrix will be printed

    returns
    -------

    df: (data frame)
        A data frame containing an example matrix.
    """

    # get list of valid matrices
    valid_matrices = list_example_matrices()

    # check that matrix name is valid
    check(
        name in valid_matrices,
        'Matrix "%s" not recognized. Please choose from: \n%s' %
        (name, '\n'.join([repr(x) for x in valid_matrices])))

    # check that print_description is boolean
    check(
        isinstance(print_description, bool),
        'type(print_description) = %s; must be of type bool ' %
        type(print_description))

    # set matrix file
    file_name = '%s/%s.txt' % (matrix_dir, name)
    assert os.path.isfile(file_name), 'File %s does not exist!' % file_name

    # if user wants a description of the example matrix, provide it
    if print_description:
        print('Description of example matrix "%s":' % name)
        with open(file_name, 'r') as f:
            lines = f.readlines()
            lines = [l for l in lines if len(l) > 0 and l[0] == '#']
            description = "".join(lines)
            print(description)

    # return matrix data frame
    return pd.read_csv(file_name, sep='\t', index_col=0, comment='#')
Exemplo n.º 6
0
    def style_single_glyph(self, p, c, **kwargs):
        """
        Modifies the properties of a single character in Logo.

        parameters
        ----------

        p: (int)
            Position of modified glyph. Must index a row in the matrix df passed
            to the Logo constructor.

        c: (str of length 1)
            Character to modify. Must be the name of a column in the matrix df
            passed to the Logo constructor.

        **kwargs:
            Keyword arguments to pass to Glyph.set_attributes()

        returns
        -------
        None
        """

        # validate p is an integer
        check(isinstance(p, (int, np.int64)),
              'type(p) = %s must be of type int or numpy.int64' % type(p))

        # check p is a valid position
        check(p in self.glyph_df.index,
              'p=%s is not a valid position' % p)

        # validate c is a str
        check(isinstance(c, str),
              'type(c) = %s must be of type str' % type(c))

        # validate that c has length 1
        check(len(c) == 1,
              'c = %s; must have length 1.' % repr(c))

        # check c is a valid character
        check(c in self.glyph_df.columns,
              'c=%s is not a valid character' % c)

        # Get glyph from glyph_df
        g = self.glyph_df.loc[p, c]

        # update glyph attributes
        g.set_attributes(**kwargs)
Exemplo n.º 7
0
    def draw(self, clear=False):
        """
        Draws characters in Logo.

        parameters
        ----------

        clear: (bool)
            If True, Axes will be cleared before logo is drawn.

        returns
        -------
        None
        """

        # validate clear
        check(isinstance(clear, bool),
              'type(clear) = %s; must be of type bool ' %
              type(clear))

        # clear previous content from ax if requested
        if clear:
            self.ax.clear()

            # draw each glyph
            for g in self.glyph_list:
                g.draw()

        # draw baseline
        self.draw_baseline(linewidth=self.baseline_width)

        # set xlims
        xmin = min([g.p - .5*g.width for g in self.glyph_list])
        xmax = max([g.p + .5*g.width for g in self.glyph_list])
        self.ax.set_xlim([xmin, xmax])

        # set ylims
        ymin = min([g.floor for g in self.glyph_list])
        ymax = max([g.ceiling for g in self.glyph_list])
        self.ax.set_ylim([ymin, ymax])

        # style spines if requested
        if self.show_spines is not None:
            self.style_spines(visible=self.show_spines)
Exemplo n.º 8
0
def demo(name='fig1b'):

    """
    Performs a demonstration of the Logomaker software.

    parameters
    -----------

    name: (str)
        Must be one of {'fig1b', 'fig1c', 'fig1d', 'fig1e', 'fig1f', 'logo'}.

    returns
    -------
    None.

    """

    # build list of demo names and corresponding file names
    example_dir = '%s/examples' % os.path.dirname(__file__)
    all_base_file_names = os.listdir(example_dir)
    example_file_names = ['%s/%s' % (example_dir, temp_name)
                     for temp_name in all_base_file_names
                     if re.match('demo_.*\.py', temp_name)]
    examples_dict = {}
    for file_name in example_file_names:
        key = file_name.split('_')[-1][:-3]
        examples_dict[key] = file_name

    # check that name is valid
    check(name in examples_dict.keys(),
          'name = %s is not valid. Must be one of %s'
          % (repr(name), examples_dict.keys()))

    # open and run example file
    file_name = examples_dict[name]
    with open(file_name, 'r') as f:
        content = f.read()
        line = '-------------------------------------------------------------'
        print('Running %s:\n%s\n%s\n%s' % \
              (file_name, line, content, line))
    exec(open(file_name).read())

    # return the current matplotlib Figure object
    return plt.gcf()
Exemplo n.º 9
0
def _counts_mat_to_probability_mat(counts_df, pseudocount=1.0):
    """
    Converts a counts matrix to a probability matrix
    """

    # Validate matrix before use
    counts_df = validate_matrix(counts_df)

    # Check pseudocount value
    check(pseudocount >= 0, "pseudocount must be >= 0.")

    # Compute prob_df
    prob_df = counts_df.copy()
    vals = counts_df.values + pseudocount
    prob_df.loc[:, :] = vals / vals.sum(axis=1)[:, np.newaxis]
    prob_df = _normalize_matrix(prob_df)

    # Validate and return
    prob_df = validate_matrix(prob_df, matrix_type='probability')
    return prob_df
Exemplo n.º 10
0
def validate_probability_mat(df):
    """
    Verifies that the input dataframe df indeed represents a
    probability matrix. Renormalizes df with a text warning if it is not
    already normalized. Throws an error if df cannot be reliably normalized.

    parameters
    ----------

    df: (dataframe)
        A pandas dataframe where each row represents an (integer) position
        and each column represents to a (single) character.

    returns
    -------
    prob_df: (dataframe)
        A cleaned-up and normalized version of df (if possible).
    """

    # Validate as a matrix. Make sure this contains no NaN values
    prob_df = validate_matrix(df, allow_nan=False)

    # Make sure all values are non-negative
    check(all(prob_df.values.ravel() >= 0), 'not all values in df are >=0.')

    # Check to see if values sum to one
    sums = prob_df.sum(axis=1).values

    # If any sums are close to zero, abort
    check(not any(np.isclose(sums, 0.0)),
          'some columns in prob_df sum to nearly zero.')

    # If any sums are not close to one, renormalize all sums
    if not all(np.isclose(sums, 1.0)):
        print('in validate_probability_mat(): '
              'Row sums in df are not close to 1. '
              'Reormalizing rows...')
        prob_df.loc[:, :] = prob_df.values / sums[:, np.newaxis]

    # Return validated probability matrix to user
    return prob_df
Exemplo n.º 11
0
    def style_glyphs_in_sequence(self,
                                 sequence,
                                 **kwargs):
        """
        Restyles the glyphs in a specific sequence.

        parameters
        ----------
        sequence: (str)
            A string the same length as the logo, specifying which character
            to restyle at each position. Characters in sequence that are not
            in the columns of the Logo's df are ignored.

        **kwargs:
            Keyword arguments to pass to Glyph.set_attributes()

        returns
        -------
        None
        """

        # validate sequence is a string
        check(isinstance(sequence, str),
              'type(sequence) = %s must be of type str' % type(sequence))

        # validate sequence has correct length
        check(len(sequence) == self.L,
              'sequence to restyle (length %d) ' % len(sequence) +
              'must have same length as logo (length %d).' % self.L)

        # for each position in the logo...
        for i, p in enumerate(self.glyph_df.index):

            # get character to highlight
            c = sequence[i]

            # modify the glyph corresponding character c at position p.
            # only modify if c is a valid character; if not, ignore position
            if c in self.cs:
                self.style_single_glyph(p, c, **kwargs)
Exemplo n.º 12
0
def get_rgb(color_spec):
    """
    Safely returns an RGB np.ndarray given a valid color specification
    """

    # TODO: the following code should be reviewed for edge-cases:
    # initalizing rgb to handle referenced before assignment type error
    rgb = None

    # If color specification is a string
    if isinstance(color_spec, str):
        try:
            rgb = np.array(to_rgb(color_spec))

        # This will trigger if to_rgb does not recognize color_spec.
        # In this case, raise an error to user.
        except:
            check(False, 'invalid choice: color_spec=%s' % color_spec)

    # Otherwise, if color_specification is array-like, it should
    # specify RGB values; convert to np.ndarray
    elif isinstance(color_spec, (list, tuple, np.ndarray)):

        # color_spec must have length 3 to be RGB
        check(
            len(color_spec) == 3,
            'color_scheme, if array, must be of length 3.')

        # color_spec must only contain numbers between 0 and 1
        check(all(0 <= x <= 1 for x in color_spec),
              'Values of color_spec must be between 0 and 1 inclusive.')

        # Cast color_spec as RGB
        rgb = np.array(color_spec)

    # Otherwise, throw error
    else:
        check(False, 'type(color_spec) = %s is invalid.' % type(color_spec))

    # Return RGB as an np.ndarray
    return rgb
Exemplo n.º 13
0
    def draw_baseline(self,
                      zorder=-1,
                      color='black',
                      linewidth=0.5,
                      **kwargs):
        """
        Draws a horizontal line along the x-axis.

        parameters
        ----------

        zorder: (number)
            This governs what other objects drawn on ax will appear in front or
            behind the baseline. Logo characters are, by default, drawn in front
            of the baseline.

        color: (matplotlib color)
            Color to use for the baseline. Can be a named matplotlib color or an
            RGB array.

        linewidth: (number >= 0)
            Width of the baseline.

        **kwargs:
            Additional keyword arguments to be passed to ax.axhline()

        returns
        -------
        None
        """

        # validate zorder
        check(isinstance(zorder, (float, int)),
              'type(zorder) = %s; must a float or int.' % type(zorder))

        # validate color
        color = get_rgb(color)

        # validate that linewidth is a number
        check(isinstance(linewidth, (float, int)),
              'type(linewidth) = %s; must be a number ' % type(linewidth))

        # validate that linewidth >= 0
        check(linewidth >= 0, 'linewidth must be >= 0')

        # Render baseline
        self.ax.axhline(zorder=zorder,
                        color=color,
                        linewidth=linewidth,
                        **kwargs)
Exemplo n.º 14
0
def _get_background_mat(df, background):
    """
    Creates a background matrix given a background specification. There
    are three possiblities:

    1. background is None => out_df represents a uniform background
    2. background is a vector => this vector is normalized then used as
        the entries of the rows of out_df. Vector must be the same length
        as the number of columns in df
    3. background is a dataframe => it is then normalized and use as out_df.
        In this case, background must have the same rows and cols as df
    """

    # Get dimensions of df
    num_pos, num_cols = df.shape

    # Create background using df as template
    bg_df = df.copy()

    # If background is not specified, use uniform background
    if background is None:
        bg_df.loc[:, :] = 1 / num_cols

    # If background is array-like
    elif isinstance(background, (np.ndarray, list, tuple)):
        background = np.array(background)
        check(
            len(background) == num_cols,
            'df and background have mismatched dimensions.')
        bg_df.loc[:, :] = background[np.newaxis, :]
        bg_df = _normalize_matrix(bg_df)

    # If background is a dataframe
    elif isinstance(background, pd.core.frame.DataFrame):
        bg_df = validate_matrix(background)
        check(all(df.index == bg_df.index),
              'Error: df and bg_mat have different indexes.')
        check(all(df.columns == bg_df.columns),
              'Error: df and bg_mat have different columns.')
        bg_df = _normalize_matrix(bg_df)

    # validate as probability matrix
    bg_df = validate_matrix(bg_df, matrix_type='probability')
    return bg_df
Exemplo n.º 15
0
def transform_matrix(df,
                     center_values=False,
                     normalize_values=False,
                     from_type=None,
                     to_type=None,
                     background=None,
                     pseudocount=1):
    """
    Performs transformations on a matrix. There are three types of
    transformations that can be performed:

    1. Center values:
        Subtracts the mean from each row in df. This is common for weight
        matrices or energy matrices. To do this, set center_values=True.

    2. Normalize values:
        Divides each row by the sum of the row. This is needed for probability
        matrices. To do this, set normalize_values=True.

    3. From/To transformations:
        Transforms from one type of matrix (e.g. 'counts') to another type
        of matrix (e.g. 'information'). To do this, set from_type and to_type
        arguments.

    Here are the mathematical formulas invoked by From/To transformations:

        from_type='counts' ->  to_type='probability':
            P_ic = (N_ic + l)/(N_i + C*l), N_i = sum_c(N_ic)

        from_type='probability' -> to_type='weight':
            W_ic = log_2(P_ic / Q_ic)

        from_type='weight' -> to_type='probability':
            P_ic = Q_ic * 2^(W_ic)

        from_type='probability' -> to_type='information':
            I_ic = P_ic * sum_d(P_id * log2(P_id / W_id))

        from_type='information' -> to_type='probability':
            P_ic = I_ic / sum_d(I_id)

        notation:
            i = position
            c, d = character
            l = pseudocount
            C = number of characters
            N_ic = counts matrix element
            P_ic = probability matrix element
            Q_ic = background probability matrix element
            W_ic = weight matrix element
            I_ic = information matrix element

    Using these five 1-step transformations, 2-step transformations
    are also enabled, e.g., from_type='counts' -> to_type='information'.

    parameters
    ----------

    df: (dataframe)
        The matrix to be transformed.

    center_values: (bool)
        Whether to center matrix values, i.e., subtract the mean from each
        row.

    normalize_values: (bool)
        Whether to normalize each row, i.e., divide each row by
        the sum of that row.

    from_type: (str)
        Type of input matrix. Must be one of 'counts', 'probability',
        'weight', or 'information'.

    to_type: (str)
        Type of output matrix. Must be one of 'probability', 'weight', or
        'information'. Can be 'counts' ONLY if from_type is 'counts' too.

    background: (array, or df)
        Specification of background probabilities. If array, should be the
        same length as df.columns and correspond to the probability of each
        column's character. If df, should be a probability matrix the same
        shape as df.

    pseudocount: (number >= 0)
        Pseudocount to use when transforming from a counts matrix to a
        probability matrix.

    returns
    -------
    out_df: (dataframe)
        Transformed matrix
    """

    # validate matrix dataframe
    df = validate_matrix(df)

    # validate center_values
    check(
        isinstance(center_values, bool),
        'type(center_values) = %s must be of type bool' % type(center_values))

    # validate normalize_values
    check(
        isinstance(normalize_values,
                   bool), 'type(normalize_values) = %s must be of type bool' %
        type(normalize_values))

    # validate from_type
    check((from_type in MATRIX_TYPES) or (from_type is None),
          'from_type = %s must be None or in %s' % (from_type, MATRIX_TYPES))

    # validate to_type
    check((to_type in MATRIX_TYPES) or (to_type is None),
          'to_type = %s must be None or in %s' % (to_type, MATRIX_TYPES))

    # validate background
    check(
        isinstance(background, (type([]), np.ndarray, pd.DataFrame))
        or (background is None),
        'type(background) = %s must be None or array-like or a dataframe.' %
        type(background))

    # validate pseudocount
    check(isinstance(pseudocount, (int, float)),
          'type(pseudocount) = %s must be a number' % type(pseudocount))
    check(pseudocount >= 0, 'pseudocount=%s must be >= 0' % pseudocount)

    # If centering values, do that
    if center_values is True:
        check((from_type is None) and (to_type is None),
              "If center_values is True, both from_type and to_type"
              "must be None. Here, from_type=%s, to_type=%s" %
              (from_type, to_type))

        # Do centering
        out_df = _center_matrix(df)

    # Otherwise, if normalizing values, do that
    elif normalize_values is True:
        check((from_type is None) and (to_type is None),
              "If normalize_values is True, both from_type and to_type"
              "must be None. Here, from_type=%s, to_type=%s" %
              (from_type, to_type))

        # Do centering
        out_df = _normalize_matrix(df)

    # otherwise, if to_type == from_type, just return matrix
    # Note, this is the only way that to_type='counts' is valid
    elif from_type == to_type:
        out_df = df.copy()

    # Otherwise, we're converting from one type of matrix to another. Do this.
    else:
        # Check that from_type and to_type are not None
        check((from_type is not None) and (to_type is not None),
              'Unless center_values is True or normalize_values is True,'
              'Neither from_type (=%s) nor to_type (=%s) can be None.' %
              (from_type, to_type))

        # Check that to_type != 'counts'
        check(
            to_type != 'counts', "Can only have to_type='counts' if "
            "from_type='counts'. Here, however, "
            "from_type='%s'" % from_type)

        # If converting from a probability matrix
        if from_type == 'probability':

            # ... to a weight matrix
            if to_type == 'weight':
                out_df = _probability_mat_to_weight_mat(df, background)

            # ... to an information matrix
            elif to_type == 'information':
                out_df = _probability_mat_to_information_mat(df, background)

            # This should never execute
            else:
                assert False, 'THIS SHOULD NEVER EXECUTE'

        # Otherwise, convert to probability matrix, then call function again
        else:

            # If converting from a counts matrix,
            # convert to probability matrix first
            if from_type == 'counts':
                prob_df = _counts_mat_to_probability_mat(df, pseudocount)

            # If converting from a weight matrix,
            # convert to probability matrix first
            elif from_type == 'weight':
                prob_df = _weight_mat_to_probability_mat(df, background)

            # If converting from an information matrix,
            # convert to probability matrix first
            elif from_type == 'information':
                prob_df = _information_mat_to_probability_mat(df, background)

            # This should never execute
            else:
                assert False, 'THIS SHOULD NEVER EXECUTE'

            # Now that we have the probability matrix,
            # onvert to user-specified to_type
            out_df = transform_matrix(prob_df,
                                      from_type='probability',
                                      to_type=to_type,
                                      background=background)

    # Validate and return
    out_df = validate_matrix(out_df)
    return out_df
Exemplo n.º 16
0
    def style_spines(self,
                     spines=('top', 'bottom', 'left', 'right'),
                     visible=True,
                     color='black',
                     linewidth=1.0,
                     bounds=None):
        """
        Styles the spines of the Axes object in which the logo is drawn.
        Note: "spines" refers to the edges of the Axes bounding box.

        parameters
        ----------

        spines: (tuple of str)
            Specifies which of the four spines to modify. The default value
            for this parameter lists all four spines.

        visible: (bool)
            Whether to show or not show the spines listed in the parameter
            spines.

        color: (matplotlib color)
            Color of the spines. Can be a named matplotlib color or an
            RGB array.

        linewidth: (float >= 0)
            Width of lines used to draw the spines.

        bounds: (None or [float, float])
            If not None, specifies the values between which a spine (or spines)
            will be drawn.

        returns
        -------
        None
        """

        # clear the self.show_spines attribute;
        # the user calling this means they want to override this attribute
        self.show_spines = None

        # validate that spines is a set-like object
        check(
            isinstance(spines, (tuple, list, set)),
            'type(spines) = %s; must be of type (tuple, list, set) ' %
            type(spines))
        spines = set(spines)

        # validate that spines is a subset of a the valid spines choices
        valid_spines = {'top', 'bottom', 'left', 'right'}
        check(
            spines <= valid_spines,
            'spines has invalid entry; valid entries are: %s' %
            repr(valid_spines))

        # validate visible
        check(isinstance(visible, bool),
              'type(visible) = %s; must be of type bool ' % type(visible))

        # validate that linewidth is a number
        check(isinstance(linewidth, (float, int)),
              'type(linewidth) = %s; must be a number ' % type(linewidth))

        # validate that linewidth >= 0
        check(linewidth >= 0, 'linewidth must be >= 0')

        # validate color
        color = get_rgb(color)

        # validate bounds. If not None, validate entries.
        if bounds is not None:

            # check that bounds are of valid type
            bounds_types = (list, tuple, np.ndarray)
            check(
                isinstance(bounds, bounds_types),
                'type(bounds) = %s; must be one of %s' %
                (type(bounds), bounds_types))

            # check that bounds has right length
            check(
                len(bounds) == 2,
                'len(bounds) = %d; must be %d' % (len(bounds), 2))

            # ensure that elements of bounds are numbers
            check(all([isinstance(bound, (float, int)) for bound in bounds]),
                  'bounds = %s; all entries must be numbers' % repr(bounds))

            # bounds entries must be sorted
            check(
                bounds[0] < bounds[1],
                'bounds = %s; must have bounds[0] < bounds[1]' % repr(bounds))

        # iterate over all spines
        for name, spine in self.ax.spines.items():

            # If name is in the set of spines to modify
            if name in spines:

                # Modify given spine
                spine.set_visible(visible)
                spine.set_color(color)
                spine.set_linewidth(linewidth)

                if bounds is not None:
                    spine.set_bounds(bounds[0], bounds[1])
Exemplo n.º 17
0
    def style_xticks(self,
                     anchor=0,
                     spacing=1,
                     fmt='%d',
                     rotation=0.0,
                     **kwargs):
        """
        Formats and styles tick marks along the x-axis.

        parameters
        ----------

        anchor: (int)
            Anchors tick marks at a specific number. Even if this number
            is not within the x-axis limits, it fixes the register for
            tick marks.

        spacing: (int > 0)
            The spacing between adjacent tick marks

        fmt: (str)
            String used to format tick labels.

        rotation: (number)
            Angle, in degrees, with which to draw tick mark labels.

        **kwargs:
            Additional keyword arguments to be passed to ax.set_xticklabels()

        returns
        -------
        None
        """

        # validate anchor
        check(isinstance(anchor, int),
              'type(anchor) = %s must be of type int' % type(anchor))

        # validate spacing
        check(
            isinstance(spacing, int) and spacing > 0,
            'spacing = %s must be an int > 0' % repr(spacing))

        # validate fmt
        check(isinstance(fmt, str),
              'type(fmt) = %s must be of type str' % type(fmt))

        # validate rotation
        check(
            isinstance(rotation, (float, int)),
            'type(rotation) = %s; must be of type float or int ' %
            type(rotation))

        # Get list of positions that span all positions in the matrix df
        p_min = min(self.ps)
        p_max = max(self.ps)
        ps = np.arange(p_min, p_max + 1)

        # Compute and set xticks
        xticks = ps[(ps - anchor) % spacing == 0]
        self.ax.set_xticks(xticks)

        # Compute and set xticklabels
        xticklabels = [fmt % p for p in xticks]
        self.ax.set_xticklabels(xticklabels, rotation=rotation, **kwargs)
Exemplo n.º 18
0
    def highlight_position_range(self,
                                 pmin,
                                 pmax,
                                 padding=0.0,
                                 color='yellow',
                                 edgecolor=None,
                                 floor=None,
                                 ceiling=None,
                                 zorder=-2,
                                 **kwargs):
        """
        Draws a rectangular box highlighting multiple positions within the Logo

        parameters
        ----------
        pmin: (int)
            Lowest position to highlight.
            
        pmax: (int)
            Highest position to highlight.
            
        padding: (number >= -0.5)
            Amount of padding to add on the left and right sides of highlight.
            
        color: (None or matplotlib color)
            Color to use for highlight. Can be a named matplotlib color or
            an RGB array.

        edgecolor: (None or matplotlib color)
            Color to use for highlight box edges. Can be a named matplotlib
            color or an RGB array.
            
        floor: (None number)
            Lowest y-axis extent of highlight box. If None, is set to
            ymin of the Axes object.
            
        ceiling: (None or number)
            Highest y-axis extent of highlight box. If None, is set to
            ymax of the Axes object.
            
        zorder: (number)
            This governs which other objects drawn on ax will appear in front or
            behind of the highlight. Logo characters are, by default, drawn in
            front of the highlight box.

        returns
        -------
        None
        """

        # get ymin and ymax from Axes object
        ymin, ymax = self.ax.get_ylim()

        # validate pmin
        check(isinstance(pmin, (float, int)),
              'type(pmin) = %s must be a number' % type(pmin))

        # validate pmax
        check(isinstance(pmax, (float, int)),
              'type(pmax) = %s must be a number' % type(pmax))

        # Make sure pmin <= pmax
        check(pmin <= pmax, 'pmin <= pmax not satisfied.')

        # validate that padding is a valid number
        check(
            isinstance(padding, (float, int)) and padding >= -0.5,
            'padding = %s must be a number >= -0.5' % repr(padding))

        # validate color
        if color is not None:
            color = get_rgb(color)

        # validate edegecolor
        if edgecolor is not None:
            edgecolor = get_rgb(edgecolor)

        # validate floor and set to ymin if None
        if floor is None:
            floor = ymin
        else:
            check(isinstance(floor, (float, int)),
                  'type(floor) = %s must be a number' % type(floor))

        # validate ceiling and set to ymax if None
        if ceiling is None:
            ceiling = ymax
        else:
            check(isinstance(ceiling, (float, int)),
                  'type(ceiling) = %s must be a number' % type(ceiling))

        # now that floor and ceiling are set, validate that floor <= ceiling
        check(
            floor <= ceiling,
            'must have floor <= ceiling; as is, floor = %f, ceiling = %s' %
            (floor, ceiling))

        # validate zorder
        check(isinstance(zorder, (float, int)),
              'type(zorder) = %s; must a float or int.' % type(zorder))

        # compute coordinates of highlight rectangle
        x = pmin - .5 - padding
        y = floor
        width = pmax - pmin + 1 + 2 * padding
        height = ceiling - floor

        # specify rectangle
        patch = Rectangle(xy=(x, y),
                          width=width,
                          height=height,
                          facecolor=color,
                          edgecolor=edgecolor,
                          zorder=zorder,
                          **kwargs)

        # add rectangle to Axes
        self.ax.add_patch(patch)
Exemplo n.º 19
0
    def style_glyphs_below(self,
                           color=None,
                           alpha=None,
                           shade=0.0,
                           fade=0.0,
                           flip=None,
                           **kwargs):
        """
        Modifies the properties of all characters drawn below the x-axis.

        parameters
        ----------

        color: (color specification)
            Color to use before shade is applied.

        alpha: (number in [0,1])
            Opacity to use when rendering characters, before fade is applied.

        shade: (number in [0,1])
            The amount to shade characters below the x-axis.

        fade: (number in [0,1])
            The amount to fade characters below the x-axis.

        flip: (bool)
            If True, characters below the x-axis will be flipped upside down.

        **kwargs:
            Keyword arguments to pass to Glyph.set_attributes(), but only
            for characters below the x-axis.

        returns
        -------
        None
        """

        # validate color and transform to RBG
        if color is not None:
            color = get_rgb(color)

        # validate alpha
        if alpha is not None:
            # check alpha is a number
            check(isinstance(alpha, (float, int)),
                  'type(alpha) = %s must be a float or int' % type(alpha))
            self.alpha = float(alpha)

            # check 0 <= alpha <= 1.0
            check(0 <= alpha <= 1.0,
                  'alpha must be between 0.0 and 1.0 (inclusive)')

        # validate shade
        check(isinstance(shade, (float, int)),
              'type(shade) = %s must be a number' % type(shade))

        # ensure that v_alpha0 is between 0 and 1
        check(0.0 <= shade <= 1.0,
              'shade must be between 0 and 1; value is %f.' % shade)

        # validate fade
        check(isinstance(fade, (float, int)),
              'type(fade) = %s must be a number' % type(fade))

        # ensure that fade is between 0 and 1
        check(0.0 <= fade <= 1.0,
              'fade must be between 0 and 1; value is %f' % fade)

        # check that flip is a boolean
        if flip is not None:
            check(isinstance(flip, (bool)),
                  'type(flip) = %s; must be of type bool ' % type(flip))

        # iterate over all positions and characters
        for p in self.ps:
            for c in self.cs:

                # check if matrix value is < 0
                v = self.df.loc[p, c]
                if v < 0:

                    # get glyph
                    g = self.glyph_df.loc[p, c]

                    # modify color
                    if color is None:
                        this_color = get_rgb(g.color)
                    else:
                        this_color = color

                    # modify alpha
                    if alpha is None:
                        this_alpha = g.alpha
                    else:
                        this_alpha = alpha

                    # set glyph attributes
                    g.set_attributes(color=this_color * (1.0 - shade),
                                     alpha=this_alpha * (1.0 - fade),
                                     flip=flip,
                                     **kwargs)
Exemplo n.º 20
0
def alignment_to_matrix(sequences,
                        counts=None,
                        to_type='counts',
                        background=None,
                        characters_to_ignore='.-',
                        center_weights=False,
                        pseudocount=1.0):
    """
    Generates matrix from a sequence alignment

    parameters
    ----------
    sequences: (list of strings)
        A list of sequences, all of which must be the same length

    counts: (None or list of numbers)
        If not None, must be a list of numbers the same length os sequences,
        containing the (nonnegative) number of times that each sequence was
        observed. If None, defaults to 1.

    to_type: (str)
        The type of matrix to output. Must be 'counts', 'probability',
        'weight', or 'information'

    background: (array, or df)
        Specification of background probabilities. If array, should be the
        same length as df.columns and correspond to the probability of each
        column's character. If df, should be a probability matrix the same
        shape as df.

    characters_to_ignore: (str)
        Characters to ignore within sequences. This is often needed when
        creating matrices from gapped alignments.

    center_weights: (bool)
        Whether to subtract the mean of each row, but only if to_type=='weight'.

    pseudocount: (number >= 0.0)
        Pseudocount to use when converting from counts to probabilities.

    returns
    -------
    out_df: (dataframe)
        A matrix of the requested type.
    """

    # validate inputs

    # Make sure sequences is list-like
    check(isinstance(sequences, (list, tuple, np.ndarray, pd.Series)),
          'sequences must be a list, tuple, np.ndarray, or pd.Series.')
    sequences = list(sequences)

    # Make sure sequences has at least 1 element
    check(len(sequences) > 0, 'sequences must have length > 0.')

    # Make sure all elements are sequences
    check(all(isinstance(seq, str) for seq in sequences),
          'sequences must all be of type string')

    # validate characters_to_ignore
    check(isinstance(characters_to_ignore, str),
          'type(seq) = %s must be of type str' % type(characters_to_ignore))

    # validate center_weights
    check(isinstance(center_weights, bool),
          'type(center_weights) = %s; must be bool.' % type(center_weights))

    # Get sequence length
    L = len(sequences[0])

    # Make sure all sequences are the same length
    check(all([len(s) == L for s in sequences]),
          'all elements of sequences must have the same length.')

    # validate counts as list-like
    check(
        isinstance(counts,
                   (list, tuple, np.ndarray, pd.Series)) or (counts is None),
        'counts must be None or a list, tuple, np.ndarray, or pd.Series.')

    # make sure counts has the same length as sequences
    if counts is None:
        counts = np.ones(len(sequences))
    else:
        check(
            len(counts) == len(sequences),
            'counts must be the same length as sequences;'
            'len(counts) = %d; len(sequences) = %d' %
            (len(counts), len(sequences)))

    # validate background
    check(
        isinstance(background, (type([]), np.ndarray, pd.DataFrame))
        or (background is None),
        'type(background) = %s must be None or array-like or a dataframe.' %
        type(background))

    # Define valid types
    valid_types = MATRIX_TYPES.copy()

    # Check that to_type is valid
    check(to_type in valid_types,
          'to_type=%s; must be in %s' % (to_type, valid_types))

    # Create a 2D array of characters
    char_array = np.array([np.array(list(seq)) for seq in sequences])

    # Get list of unique characters
    unique_characters = np.unique(char_array.ravel())
    unique_characters.sort()

    # Remove characters to ignore
    columns = [c for c in unique_characters if not c in characters_to_ignore]
    index = list(range(L))
    counts_df = pd.DataFrame(data=0, columns=columns, index=index)

    # Sum of the number of occurrences of each character at each position
    for c in columns:
        tmp_mat = (char_array == c).astype(float) * counts[:, np.newaxis]
        counts_df.loc[:, c] = tmp_mat.sum(axis=0).T

    # Convert counts matrix to matrix of requested type
    out_df = transform_matrix(counts_df,
                              from_type='counts',
                              to_type=to_type,
                              pseudocount=pseudocount,
                              background=background)

    # Center values only if center_weights is True and to_type is 'weight'
    if center_weights and to_type == 'weight':
        out_df = transform_matrix(out_df, center_values=True)

    return out_df
Exemplo n.º 21
0
def sequence_to_matrix(seq,
                       cols=None,
                       alphabet=None,
                       is_iupac=False,
                       to_type='probability',
                       center_weights=False):
    """
    Generates a matrix from a sequence. With default keyword arguments,
    this is a one-hot-encoded version of the sequence provided. Alternatively,
    is_iupac=True allows users to get matrix models based in IUPAC motifs.

    parameters
    ----------

    seq: (str)
        Sequence from which to construct matrix.

    cols: (str or array-like or None)
        The characters to use for the matrix columns. If None, cols is
        constructed from the unqiue characters in seq. Overriden by alphabet
        and is_iupac.

    alphabet: (str or None)
        The alphabet used to determine the columns of the matrix.
        Options are: 'dna', 'rna', 'protein'. Ignored if None. Overrides cols.

    is_iupac: (bool)
        If True, it is assumed that the sequence represents an IUPAC DNA
        string. In this case, cols is overridden, and alphabet must be None.

    to_type: (str)
        The type of matrix to output. Must be 'probability', 'weight',
        or 'information'

    center_weights: (bool)
        Whether to subtract the mean of each row, but only if to_type='weight'.

    returns
    -------
    seq_df: (dataframe)
        the matrix returned to the user.
    """

    # Define valid types
    valid_types = MATRIX_TYPES.copy()
    valid_types.remove('counts')

    # validate seq
    check(isinstance(seq, str),
          'type(seq) = %s must be of type str' % type(seq))

    # validate center_weights
    check(isinstance(center_weights, bool),
          'type(center_weights) = %s; must be bool.' % type(center_weights))

    # If cols is None, set to list of unique characters in sequence
    if cols is None:
        cols = list(set(seq))
        cols.sort()

    # Otherwise, validate cols
    else:
        cols_types = (str, list, set, np.ndarray)
        check(isinstance(cols, cols_types),
              'cols = %s must be None or a string, set, list, or np.ndarray')

    # If alphabet is specified, override cols
    if alphabet is not None:

        # Validate alphabet
        valid_alphabets = list(ALPHABET_DICT.keys())
        check(alphabet in valid_alphabets,
              'alphabet = %s; must be in %s.' % (alphabet, valid_alphabets))

        # Set cols
        cols = list(ALPHABET_DICT[alphabet])

    # validate to_type
    check(to_type in valid_types,
          'invalid to_type=%s; to_type must be in %s' % (to_type, valid_types))

    # validate is_iupac
    check(isinstance(is_iupac, bool),
          'type(is_iupac) = %s; must be bool.' % type(is_iupac))

    # If is_iupac, override alphabet and cols
    if is_iupac:

        # Check that alphabet has not been specified
        check(alphabet is None, 'must have alphabet=None if is_iupac=True')
        cols = list(ALPHABET_DICT['dna'])

    # Initialize counts dataframe
    L = len(seq)
    index = list(range(L))
    counts_df = pd.DataFrame(data=0.0, columns=cols, index=index)

    # If is_iupac, fill counts_df:
    if is_iupac:

        # Get list of valid IUPAC characters
        iupac_characters = list(IUPAC_DICT.keys())

        # Iterate over sequence positions
        for i, c in enumerate(seq):

            # Check that c is in the set of valid IUPAC characters
            check(
                c in iupac_characters,
                'character %s at position %d is not a valid IUPAC character;'
                'must be one of %s' % (c, i, iupac_characters))

            # Fill in a count for each possible base
            bs = IUPAC_DICT[c]
            for b in bs:
                counts_df.loc[i, b] = 1.0

    # Otherwise, fill counts the normal way
    else:

        # Iterate over sequence positions
        for i, c in enumerate(seq):

            # Check that c is in columns
            check(
                c in cols,
                'character %s at position %d is not in cols=%s' % (c, i, cols))

            # Increment counts_df
            counts_df.loc[i, c] = 1.0

    # Convert to requested type
    out_df = transform_matrix(counts_df,
                              pseudocount=0,
                              from_type='counts',
                              to_type=to_type)

    # Center values only if center_weights is True and to_type is 'weight'
    if center_weights and to_type == 'weight':
        out_df = transform_matrix(out_df, center_values=True)

    return out_df
Exemplo n.º 22
0
def saliency_to_matrix(seq, values, cols=None, alphabet=None):
    """
    Takes a sequence string and an array of values values and outputs a
    values dataframe. The returned dataframe is a L by C matrix where C is
    the number ofcharacters and L is sequence length.  If matrix is denoted as
    S, i indexes positions and c indexes characters, then S_ic will be non-zero
    (equal to the value in the values array at position p) only if character c
    occurs at position p in sequence. All other elements of S are zero.

    example usage:

    saliency_mat = logomaker.saliency_to_matrix(sequence,values)
    logomaker.Logo(saliency_mat)

    parameters
    ----------

    seq: (str or array-like list of single characters)
        sequence for which values matrix is constructed

    values: (array-like list of numbers)
        array of values values for each character in sequence

    cols: (str or array-like or None)
        The characters to use for the matrix columns. If None, cols is
        constructed from the unqiue characters in seq. Overridden by alphabet
        and is_iupac.

    alphabet: (str or None)
        The alphabet used to determine the columns of the matrix.
        Options are: 'dna', 'rna', 'protein'. Ignored if None. Overrides cols.

    returns
    -------
    saliency_df: (dataframe)
        values matrix in the form of a dataframe

    """

    # try to convert seq to str; throw exception if fail
    if isinstance(seq, (list, np.ndarray, pd.Series)):
        try:
            seq = ''.join([str(x) for x in seq])
        except:
            check(False, 'could not convert %s to type str' % repr(str))
    else:
        try:
            seq = str(seq)
        except:
            check(False, 'could not convert %s to type str' % repr(str))

    # validate seq
    check(isinstance(seq, str),
          'type(seq) = %s must be of type str' % type(seq))

    # validate values: check that it is a list or array
    check(isinstance(values, (type([]), np.ndarray, pd.Series)),
          'type(values) = %s must be of type list' % type(values))

    # cast values as a list just to be sure what we're working with
    values = list(values)

    # check length of seq and values are equal
    check(
        len(seq) == len(values),
        'length of seq and values list must be equal.')

    # If cols is None, set to list of unique characters in sequence
    if cols is None:
        cols = list(set(seq))
        cols.sort()

    # Otherwise, validate cols
    else:
        cols_types = (str, list, set, np.ndarray)
        check(isinstance(cols, cols_types),
              'cols = %s must be None or a string, set, list, or np.ndarray')

        # perform additional checks to validate cols
        check(
            len(set(cols)) == len(set(seq)),
            'length of set of unique characters must be equal for "cols " and "seq"'
        )
        check(
            set(cols) == set(seq),
            'unique characters for "cols" and "seq" must be equal.')

    # If alphabet is specified, override cols
    if alphabet is not None:

        # Validate alphabet
        valid_alphabets = list(ALPHABET_DICT.keys())
        check(alphabet in valid_alphabets,
              'alphabet = %s; must be in %s.' % (alphabet, valid_alphabets))

        # Set cols
        cols = list(ALPHABET_DICT[alphabet])

    # turn seq into binary one-hot encoded matrix.
    ohe_sequence = sequence_to_matrix(seq, cols=cols)

    # multiply values list with one-hot encoded seq to get
    # values matrix or dataframe
    saliency_df = ohe_sequence.copy()
    saliency_df.loc[:, :] = ohe_sequence.values * \
                            np.array(values)[:, np.newaxis]

    return saliency_df
Exemplo n.º 23
0
def validate_matrix(df, matrix_type=None, allow_nan=False):
    """
    Checks to make sure that the input dataframe, df, represents a valid
    matrix, i.e., an object that can be displayed as a logo.

    parameters
    ----------

    df: (dataframe)
        A pandas dataframe where each row represents an (integer) position
        and each column represents to a (single) character.

    matrix_type: (None or str)
        If 'probability', validates df as a probability matrix, i.e., all
        elements are in [0,1] and rows are normalized). If 'information',
        validates df as an information matrix, i.e., all elements >= 0.

    allow_nan: (bool)
        Whether to allow NaN entries in the matrix.

    returns
    -------
    out_df: (dataframe)
        A cleaned-up version of df (if possible).
    """

    # check that df is a dataframe
    check(
        isinstance(df, pd.DataFrame),
        'out_df needs to be a valid pandas out_df, '
        'out_df entered: %s' % type(df))

    # create copy of df so we don't overwrite the user's data
    out_df = df.copy()

    # check that type is valid
    check(
        matrix_type in {None, 'probability', 'information'},
        'matrix_type = %s; must be None, "probability", or "information"' %
        matrix_type)

    # check that allow_nan is boolean
    check(isinstance(allow_nan, bool),
          'allow_nan must be of type bool; is type %s.' % type(allow_nan))

    if not allow_nan:
        # make sure all entries are finite numbers
        check(
            np.isfinite(out_df.values).all(),
            'some matrix elements are not finite. '
            'Set allow_nan=True to allow this.')

    # make sure the matrix has a finite number of rows and columns
    check(out_df.shape[0] >= 1, 'df has zero rows. Needs multiple rows.')
    check(out_df.shape[1] >= 1, 'df has zero columns. Needs multiple columns.')

    # check that all column names are strings and have length 1
    for i, col in enumerate(out_df.columns):

        # convert from unicode to string for python 2
        col = str(col)
        check(isinstance(col, str),
              'column number %d is of type %s; must be a str' % (i, col))
        check(
            len(col) == 1, 'column %d is %s and has length %d; ' %
            (i, repr(col), len(col)) + 'must have length 1.')

    # sort columns alphabetically
    char_cols = list(out_df.columns)
    char_cols.sort()
    out_df = out_df[char_cols]

    # name out_df.index as 'pos'
    out_df.index.name = 'pos'

    # try casting df.index as type int
    try:
        int_index = out_df.index.astype(int)
    except TypeError:
        check(
            False, 'could not convert df.index to type int. Check that '
            'all positions have integer numerical values.')

    # make sure that df.index values have not changed
    check(
        all(int_index == out_df.index),
        'could not convert df.index values to int without changing'
        'some values. Make sure that df.index values are integers.')

    # check that all index values are unique
    check(
        len(set(out_df.index)) == len(out_df.index),
        'not all values of df.index are unique. Make sure all are unique.')

    # if type is 'information', make sure elements are nonnegative
    if matrix_type is 'information':

        # make sure all elements are nonnegative
        check(all(df.values.ravel() >= 0), 'not all values in df are >=0.')

    # if type is 'probability', make sure elements are valid probabilities
    elif matrix_type is 'probability':

        # make sure all values are non-negative
        check(all(df.values.ravel() >= 0), 'not all values in df are >=0.')

        # check to see if values sum to one
        sums = df.sum(axis=1).values

        # if any sums are close to zero, abort
        check(not any(np.isclose(sums, 0.0)),
              'some columns in df sum to nearly zero.')

        # if any sums are not close to one, renormalize all sums
        if not all(np.isclose(sums, 1.0)):
            print('in validate_matrix(): Row sums in df are not close to 1. '
                  'Reormalizing rows...')
            df.loc[:, :] = df.values / sums[:, np.newaxis]
            out_df = df.copy()

    # nothing more to check if type is None
    elif matrix_type is None:
        pass

    # return cleaned-up out_df
    return out_df
Exemplo n.º 24
0
    def _input_checks(self):
        """
        check input parameters in the Logo constructor for correctness
        """

        from numbers import Number
        # validate p
        check(isinstance(int(self.p), (float, int)),
              'type(p) = %s must be a number' % type(self.p))

        # check c is of type str
        check(isinstance(self.c, str),
              'type(c) = %s; must be of type str ' % type(self.c))

        # validate floor
        check(isinstance(self.floor, (float, int)),
              'type(floor) = %s must be a number' % type(self.floor))
        self.floor = float(self.floor)

        # validate ceiling
        check(isinstance(self.ceiling, (float, int)),
              'type(ceiling) = %s must be a number' % type(self.ceiling))
        self.ceiling = float(self.ceiling)

        # check floor <= ceiling
        check(
            self.floor <= self.ceiling,
            'must have floor <= ceiling. Currently, '
            'floor=%f, ceiling=%f' % (self.floor, self.ceiling))

        # check ax
        check((self.ax is None) or isinstance(self.ax, Axes),
              'ax must be either a matplotlib Axes object or None.')

        # validate width
        check(
            isinstance(self.width, (float, int)),
            'type(width) = %s; must be of type float or int ' %
            type(self.width))
        check(self.width > 0, "width = %d must be > 0 " % self.width)

        # validate vpad
        check(
            isinstance(self.vpad, (float, int)),
            'type(vpad) = %s; must be of type float or int ' % type(self.vpad))
        check(0 <= self.vpad < 1,
              "vpad = %d must be >= 0 and < 1 " % self.vpad)

        # validate font_name
        check(
            isinstance(self.font_name, str),
            'type(font_name) = %s must be of type str' % type(self.font_name))

        # check font_weight
        check(
            isinstance(self.font_weight, (str, int)),
            'type(font_weight) = %s should either be a string or an int' %
            (type(self.font_weight)))
        if isinstance(self.font_weight, str):
            check(self.font_weight in VALID_FONT_WEIGHT_STRINGS,
                  'font_weight must be one of %s' % VALID_FONT_WEIGHT_STRINGS)
        elif isinstance(self.font_weight, int):
            check(0 <= self.font_weight <= 1000,
                  'font_weight must be in range [0,1000]')

        # check color safely
        self.color = get_rgb(self.color)

        # validate edgecolor safely
        self.edgecolor = get_rgb(self.edgecolor)

        # Check that edgewidth is a number
        check(isinstance(self.edgewidth, (float, int)),
              'type(edgewidth) = %s must be a number' % type(self.edgewidth))
        self.edgewidth = float(self.edgewidth)

        # Check that edgewidth is nonnegative
        check(self.edgewidth >= 0,
              ' edgewidth must be >= 0; is %f' % self.edgewidth)

        # check dont_stretch_more_than is of type str
        check(
            isinstance(self.dont_stretch_more_than, str),
            'type(dont_stretch_more_than) = %s; must be of type str ' %
            type(self.dont_stretch_more_than))

        # check that dont_stretch_more_than is a single character
        check(
            len(self.dont_stretch_more_than) == 1,
            'dont_stretch_more_than must have length 1; '
            'currently len(dont_stretch_more_than)=%d' %
            len(self.dont_stretch_more_than))

        # check that flip is a boolean
        check(isinstance(self.flip, (bool, np.bool_)),
              'type(flip) = %s; must be of type bool ' % type(self.flip))
        self.flip = bool(self.flip)

        # check that mirror is a boolean
        check(isinstance(self.mirror, (bool, np.bool_)),
              'type(mirror) = %s; must be of type bool ' % type(self.mirror))
        self.mirror = bool(self.mirror)

        # validate zorder
        if self.zorder is not None:
            check(
                isinstance(self.zorder, (float, int)),
                'type(zorder) = %s; must be of type float or int ' %
                type(self.zorder))

        # Check alpha is a number
        check(isinstance(self.alpha, (float, int)),
              'type(alpha) = %s must be a float or int' % type(self.alpha))
        self.alpha = float(self.alpha)

        # Check 0 <= alpha <= 1.0
        check(0 <= self.alpha <= 1.0,
              'alpha must be between 0.0 and 1.0 (inclusive)')

        # validate that figsize is array=like
        check(
            isinstance(self.figsize, (tuple, list, np.ndarray)),
            'type(figsize) = %s; figsize must be array-like.' %
            type(self.figsize))
        self.figsize = tuple(self.figsize)  # Just to pin down variable type.

        # validate length of figsize
        check(len(self.figsize) == 2, 'figsize must have length two.')

        # validate that each element of figsize is a number
        check(
            all([isinstance(n, (int, float)) and n > 0 for n in self.figsize]),
            'all elements of figsize array must be numbers > 0.')
Exemplo n.º 25
0
def get_color_dict(color_scheme, chars):
    """
    Return a color_dict constructed from a user-specified color_scheme and
    a list of characters
    """

    # Check that chars is a list
    check(isinstance(chars, (str, list, tuple, np.ndarray)),
          "chars must be a str or be array-like")

    # Check that chars has length of at least 1
    check(len(chars) >= 1, 'chars must have length >= 1')

    # Sort characters
    chars = list(chars)
    chars.sort()

    # Check that all entries in chars are strings of length 1
    for i, c in enumerate(chars):
        c = str(c)  # convert from unicode to string to work with python 2
        check(
            isinstance(c, str) and len(c) == 1,
            'entry number %d in chars is %s; ' % (i, repr(c)) +
            'must instead be a single character')

    # if color_scheme is None, choose default based on chars
    if color_scheme is None:
        key = tuple(chars)
        color_scheme = CHARS_TO_COLORS_DICT.get(key, 'gray')
        color_dict = get_color_dict(color_scheme, chars)

    # otherwise, if color_scheme is a dictionary
    elif isinstance(color_scheme, dict):

        # make sure all the keys are strings
        for key in color_scheme.keys():
            check(
                isinstance(key, str),
                'color_scheme dict contains a key (%s) ' % repr(key) +
                'that is not of type str.')

        # expand the dictionary
        color_dict = _expand_color_dict(color_scheme)

        # set all values to rgb
        for key in color_dict.keys():
            color_dict[key] = to_rgb(color_dict[key])

    # otherwise, if color_scheme is a string, it must either be a valid key in
    # COLOR_SCHEME_DICT or a named matplotlib color
    elif isinstance(color_scheme, str):

        # If a valid key, get the color scheme dict and expand
        if color_scheme in COLOR_SCHEME_DICT.keys():
            tmp_dict = COLOR_SCHEME_DICT[color_scheme]
            color_dict = _expand_color_dict(tmp_dict)

            # Force each color to rgb
            for c in color_dict.keys():
                color = color_dict[c]
                rgb = to_rgb(color)
                color_dict[c] = np.array(rgb)

        # Otherwise, try to convert color_scheme to RGB value, then create
        # color_dict using keys from chars and setting all values to RGB value.
        else:
            try:
                rgb = to_rgb(color_scheme)
                color_dict = dict([(c, rgb) for c in chars])

            # This will trigger if to_rgb does not recognize color_scheme.
            # In this case, raise an error to user.
            except:
                check(False, 'invalid choice: color_scheme=%s' % color_scheme)

    # Otherwise, if color_scheme is array-like, it should be an RGB value
    elif isinstance(color_scheme, (list, tuple, np.ndarray)):

        # color_scheme must have length 3 to be RGB
        check(
            len(color_scheme) == 3,
            'color_scheme, if array, must be of length 3.')

        # Cast color_scheme as RGB
        rgb = np.ndarray(color_scheme)

        # Construct color_dict with rgb as value for every character in chars
        color_dict = dict([(c, rgb) for c in chars])

    # Otherwise, raise error
    else:
        check(False,
              'Error: color_scheme has invalid type %s' % type(color_scheme))

    # If all the characters in chars are not also within the keys of color_dict,
    # add them with color 'black'
    if not set(chars) <= set(color_dict.keys()):
        for c in chars:
            if not c in color_dict:
                warnings.warn(
                    " Warning: Character '%s' is not in color_dict. " % c +
                    "Using black.")
                color_dict[c] = to_rgb('black')

    return color_dict
Exemplo n.º 26
0
    def _input_checks(self):
        """
        Validate parameters passed to the Logo constructor EXCEPT for
        color_scheme; that is validated in the Logo constructor
        """

        # validate dataframe
        self.df = validate_matrix(self.df)

        # CANNOT validate color_scheme here; this is done in Logo constructor.

        # validate that font_name is a str
        check(
            isinstance(self.font_name, str),
            'type(font_name) = %s must be of type str' % type(self.font_name))

        # validate stack_order
        valid_stack_orders = {'big_on_top', 'small_on_top', 'fixed'}
        check(
            self.stack_order in valid_stack_orders,
            'stack_order = %s; must be in %s.' %
            (self.stack_order, valid_stack_orders))

        # check that center_values is a boolean
        check(
            isinstance(self.center_values, bool),
            'type(center_values) = %s; must be of type bool.' %
            type(self.center_values))

        # check baseline_width is a number
        check(
            isinstance(self.baseline_width, (int, float)),
            'type(baseline_width) = %s must be of type number' %
            (type(self.baseline_width)))

        # check baseline_width >= 0.0
        check(self.baseline_width >= 0.0,
              'baseline_width = %s must be >= 0.0' % self.baseline_width)

        # check that flip_below is boolean
        check(
            isinstance(self.flip_below,
                       bool), 'type(flip_below) = %s; must be of type bool ' %
            type(self.flip_below))

        # validate that shade_below is a number
        check(
            isinstance(self.shade_below, (float, int)),
            'type(shade_below) = %s must be of type float' %
            type(self.shade_below))

        # validate that shade_below is between 0 and 1
        check(0.0 <= self.shade_below <= 1.0,
              'shade_below must be between 0 and 1')

        # validate that fade_below is a number
        check(
            isinstance(self.fade_below, (float, int)),
            'type(fade_below) = %s must be of type float' %
            type(self.fade_below))

        # validate that fade_below is between 0 and 1
        check(0.0 <= self.fade_below <= 1.0,
              'fade_below must be between 0 and 1')

        # validate that fade_probabilities is boolean
        check(
            isinstance(self.fade_probabilities, bool),
            'type(fade_probabilities) = %s; must be of type bool ' %
            type(self.fade_probabilities))

        # validate that vpad is a number
        check(isinstance(self.vpad, (float, int)),
              'type(vpad) = %s must be of type float' % type(self.vpad))

        # validate that vpad is between 0 and 1
        check(0.0 <= self.vpad <= 1.0, 'vpad must be between 0 and 1')

        # validate that vsep is a number
        check(
            isinstance(self.vsep, (float, int)),
            'type(vsep) = %s; must be of type float or int ' % type(self.vsep))

        # validate that vsep is >= 0
        check(self.vsep >= 0, "vsep = %d must be greater than 0 " % self.vsep)

        # validate that alpha is a number
        check(isinstance(self.alpha, (float, int)),
              'type(alpha) = %s must be of type float' % type(self.alpha))

        # validate that alpha is between 0 and 1
        check(0.0 <= self.alpha <= 1.0, 'alpha must be between 0 and 1')

        # validate show_spines is None or boolean
        check(
            isinstance(self.show_spines, bool) or (self.show_spines is None),
            'show_spines = %s; show_spines must be None or boolean.' %
            repr(self.show_spines))

        # validate ax
        check(
            isinstance(self.ax, Axes) or (self.ax is None),
            'ax = %s; ax must be None or a matplotlib.Axes object.' %
            repr(self.ax))

        # validate zorder
        check(
            isinstance(self.zorder, (float, int)),
            'type(zorder) = %s; zorder must be a number.' % type(self.zorder))

        # validate that figsize is array=like
        check(
            isinstance(self.figsize, (tuple, list, np.ndarray)),
            'type(figsize) = %s; figsize must be array-like.' %
            type(self.figsize))
        self.figsize = tuple(self.figsize)  # Just to pin down variable type.

        # validate length of figsize
        check(len(self.figsize) == 2, 'figsize must have length two.')

        # validate that each element of figsize is a number
        check(
            all([isinstance(n, (int, float)) and n > 0 for n in self.figsize]),
            'all elements of figsize array must be numbers > 0.')