Пример #1
0
def chromosome_collections(df, y_positions, height, **kwargs):
    """
    Yields BrokenBarHCollection of features that can be added to an Axes
    object.
    Parameters
    ----------
    df : pandas.DataFrame
        Must at least have columns ['chrom', 'start', 'end', 'color']. If no
        column 'width', it will be calculated from start/end.
    y_positions : dict
        Keys are chromosomes, values are y-value at which to anchor the
        BrokenBarHCollection
    height : float
        Height of each BrokenBarHCollection
    Additional kwargs are passed to BrokenBarHCollection
    """
    del_width = False
    if 'width' not in df.columns:
        del_width = True
        df['width'] = df['end'] - df['start']
    for s, group in df.groupby('sample'):
        print s
        yrange = (y_positions[s], height)
        xranges = group[['start', 'width']].values
        yield BrokenBarHCollection(xranges,
                                   yrange,
                                   facecolors=group['colors1'],
                                   **kwargs)
        yrange = (y_positions[s] - 0.4, height)
        yield BrokenBarHCollection(xranges,
                                   yrange,
                                   facecolors=group['colors2'],
                                   **kwargs)
    if del_width:
        del df['width']
Пример #2
0
def regions_to_hbar(region_list_chr):
    """Make a MathPlotLIb 'BrokenbarCollection' from upd sites data,
       Isodisomy will have one block and one color. Heterodisomy will
       be two adjecent bars with two colors. Both plots will have a
       transperant middle line for aestetics."""
    return_list = []
    for i in region_list_chr:
        hbar_upper = BrokenBarHCollection(i["xranges"], (0.52, 1),
                                          facecolors=i["upper"],
                                          label=i["chr"])
        hbar_lower = BrokenBarHCollection(i["xranges"], (0, 0.48),
                                          facecolors=i["lower"],
                                          label=i["chr"])
        return_list.append([hbar_upper, hbar_lower])
    return return_list
Пример #3
0
 def plot(self, ax, chrom_region, region_start, region_end):
     self.ax = ax
     grange = GenomeRange(chrom_region, region_start, region_end)
     if grange.chrom not in self.interval_tree:
         grange.change_chrom_names()
     bands_in_region = sorted(
         self.interval_tree[grange.chrom][grange.start:grange.end])
     band_height = self.properties['height']
     xranges, colors = [], []
     for itv in bands_in_region:
         start, end = itv.begin, itv.end
         band_name, band_type = itv.data[:2]
         band_color = self.lookup_band_color(band_type)
         xranges.append((start, end))
         colors.append(band_color)
         if self.properties['show_band_name'] != 'no':
             if grange.length < 80_000_000:
                 self.plot_text(band_name, start, end, band_color)
     coll = BrokenBarHCollection(xranges, (0, band_height),
                                 facecolors=colors,
                                 linewidths=self.properties['border_width'],
                                 edgecolors=self.properties['border_color'])
     ax.add_collection(coll)
     ax.set_ylim(-0.1, band_height + 0.1)
     ax.set_xlim(region_start, region_end)
     self.plot_label()
Пример #4
0
def _chromosome_collections(df, y_positions, height, **kwargs):
    """

    Yields BrokenBarHCollection of features that can be added to an Axes
    object.

    Parameters
    ----------

    df : pandas.DataFrame
        Must at least have columns ['chrom', 'start', 'end', 'color']. If no
        column 'width', it will be calculated from start/end.

    y_positions : dict
        Keys are chromosomes, values are y-value at which to anchor the
        BrokenBarHCollection

    height : float
        Height of each BrokenBarHCollection

    Additional kwargs are passed to BrokenBarHCollection
    """
    del_width = False
    if "width" not in df.columns:
        del_width = True
        df["width"] = df["end"] - df["start"]
    for chrom, group in df.groupby("chrom"):
        yrange = (y_positions["chr" + chrom], height)
        xranges = group[["start", "width"]].values
        yield BrokenBarHCollection(xranges,
                                   yrange,
                                   facecolors=group["colors"],
                                   **kwargs)
    if del_width:
        del df["width"]
Пример #5
0
    def plot_bars(self, ax):
        '''
        Once xranges are determined, create bars for each chr and sample.
        '''

        # Some fancy sorting to get the chromosomes in 1, 2. ..10, M order
        keys = sorted(self.xranges,
                      key=lambda x:x.replace('chr','').isdigit() \
                        and x.replace('chr','').rjust(2,'0') \
                        or x.replace('chr',''))

        ystart = 0
        colors = self.get_colors(len(self.datasets))
        yticks = []
        yticklabels = []

        for chrom in keys:
            for i, xranges in enumerate(self.xranges[chrom]):
                yrange = (ystart, self.height)

                coll = BrokenBarHCollection(xranges, yrange, color=colors[i])
                ax.add_collection(coll)

                # save ticks and labels for plotting
                center = yrange[0] + yrange[1] / 2.
                yticks.append(center)

                # Only label the topmost row with the chromosome.
                label = i == (len(self.xranges[chrom]) - 1) and chrom or ''
                yticklabels.append(label)

                # Increment ystart for next iteration
                ystart += self.height + self.spacing

        return yticks, yticklabels
Пример #6
0
def chromosome_collections(df: pandas.DataFrame, y_positions: dict, height: float,
                           to_log: bool=False, **kwargs):
    """
    Yields BrokenBarHCollection of features that can be added to an Axes object

    :param bool to_log: whether to log info
    :param DataFrame df: must at least have columns ['chrom', 'chromStart', 'chromEnd', 'colors']. If no column 'width',
    it will be calculated from start/end
    :param dict y_positions: keys are chromosomes, value are y-value at which to anchor the BrokenBarHCollection
    :param float height: height of each BrokenBarHCollection
    :param kwargs: are passed to BrokenBarHCollection
    :return: BrokenBarHCollection
    """
    del_width = False
    if 'width' not in df.columns:
        del_width = True
        df['width'] = df['chromEnd'] - df['chromStart']
    for chrom, group in df.groupby('chrom'):
        if to_log:
            print(chrom)
        yrange = (y_positions[chrom], height)
        xranges = group[['chromStart', 'width']].values
        yield BrokenBarHCollection(
            xranges, yrange, facecolors=group['colors'], **kwargs)
    if del_width:
        del df['width']
Пример #7
0
 def plot_thresholded_envelope(self, ax_main: Axes, ax_dist: Axes):
     # e_t, with segment bands, and summary stats to the right :)
     logger.info("Plotting thresholded envelope..")
     self.plot_signal(self.e_t,
                      ax=ax_main,
                      color=envelope_color,
                      lw=thicc_lw)
     rm = self.reference_maker
     ax_main.hlines(rm.ripple_threshold_high, *self.time_range, lw=thin_lw)
     ax_main.hlines(rm.ripple_threshold_low, *self.time_range, lw=thin_lw)
     add_scalebar(ax_main)
     add_title(ax_main, "Thresholds $T$", threshold_color, y=0.58)
     segs = self.reference_segs_test
     visible_segs = segs.intersection(ax_main.get_xlim())
     bars = BrokenBarHCollection(
         xranges=[
             tup for tup in zip(visible_segs.start, visible_segs.duration)
         ],
         yrange=(0, rm.ripple_threshold_low),
         facecolors=segment_color,
         alpha=segment_alpha,
     )
     ax_main.add_collection(bars)
     # Find and plot crossings of lower threshold
     crossings_ix = nonzero(diff(self.e_t > rm.ripple_threshold_low))[0]
     crossings_t = crossings_ix / self.fs
     crossings_y = [rm.ripple_threshold_low] * len(crossings_t)
     ax_main.plot(crossings_t, crossings_y, ".", c="black")
     logger.info("Done")
     logger.info("Plotting envelope density..")
     self.plot_envelope_dist(ax_dist)
     logger.info("Done")
     ax_dist.set_ylim(ax_main.get_ylim())
Пример #8
0
    def render(self, ax, chrom, pos=None):

        try:
            xranges = self.xranges[chrom]
            colors = self.colors[chrom]
            centromeres = self.centromeres[chrom]
        except:
            print("Error: No chromosome named: {}".format(chrom))
            return

        yranges = (0, 0.5)

        coll = BrokenBarHCollection(xranges,
                                    yranges,
                                    facecolors=colors,
                                    edgecolors='black',
                                    linewidths=0.5)
        ax.add_collection(coll)

        if pos:
            ax.axvline(pos, color='red', lw=4)
        w = xranges[-1][0] + xranges[-1][1]

        pad = w * 0.05

        ax.set_xlim(0 - pad, xranges[-1][0] + xranges[-1][1] + pad)
        ax.xaxis.set_visible(False)

        center = yranges[0] + yranges[1] / 2.

        x0, y0 = centromeres[0][0], yranges[0]
        x1, y1 = centromeres[0][0], yranges[1]
        x2, y2 = centromeres[0][0] + centromeres[0][1], center
        cent = Polygon(np.array([[x0, y0], [x1, y1], [x2, y2]]),
                       closed=True,
                       fc=color_lookup['acen'],
                       ec='black',
                       linewidth=0.5)
        ax.add_patch(cent)

        x0, y0 = centromeres[1][0], center
        x1, y1 = centromeres[1][0] + centromeres[1][1], yranges[1]
        x2, y2 = centromeres[1][0] + centromeres[1][1], yranges[0]

        cent = Polygon(np.array([[x0, y0], [x1, y1], [x2, y2]]),
                       closed=True,
                       fc=color_lookup['acen'],
                       ec='black',
                       linewidth=0.5)
        ax.add_patch(cent)

        ax.set_yticks([center])
        ax.set_yticklabels([chrom])
        ax.set_ylim(-0.2, 0.7)

        [
            ax.spines[loc].set_color('none')
            for loc in ['top', 'left', 'right', 'bottom']
        ]
Пример #9
0
 def plot_sample_chrom(i, sample):
     """Draw the given coordinates and colors as a horizontal series."""
     xranges = [(start, end - start)
                for start, end in zip(sample.start, sample.end)]
     bars = BrokenBarHCollection(xranges, (i, i + 1),
                                 edgecolors="none",
                                 facecolors=sample["color"])
     axis.add_collection(bars)
Пример #10
0
def _chromosome_collections(df,
                            y_positions,
                            height,
                            print_names=False,
                            **kwargs):
    """

  Yields BrokenBarHCollection of features that can be added to an Axes
  object.

  Parameters
  ----------

  df : pandas.DataFrame
      Must at least have columns ['chrom', 'start', 'end', 'color']. If no
      column 'width', it will be calculated from start/end.

  y_positions : dict
      Keys are chromosomes, values are y-value at which to anchor the
      BrokenBarHCollection

  height : float
      Height of each BrokenBarHCollection

  Additional kwargs are passed to BrokenBarHCollection
  """
    del_width = False
    if 'width' not in df.columns:
        del_width = True
        df['width'] = df['end'] - df['start']
    for chrom, group in df.groupby('chrom'):
        yrange = (y_positions[chrom], height)
        xranges = group[['start', 'width']].values
        if print_names:
            names = group[['name', 'colors']].values
            names = [i for i in names if i[1] != '#ffffff']
            ax = plt.gca()
            t = ax.transData
            canvas = ax.figure.canvas
            # Plot names with different colors and spaced
            for i, n in enumerate(names):
                text = ax.text(0,
                               y_positions[chrom] - 2,
                               f'{n[0]}',
                               color=n[1],
                               transform=t,
                               fontsize=10)
                text.draw(canvas.get_renderer())
                ex = text.get_window_extent()
                t = transforms.offset_copy(text._transform,
                                           x=ex.width,
                                           units='dots')
        yield BrokenBarHCollection(xranges,
                                   yrange,
                                   facecolors=group['colors'],
                                   **kwargs)
    if del_width:
        del df['width']
Пример #11
0
def contiguity_plot():

    for chrom, group in chrs_contigs_regions.groupby('chrom'):

        yrange = (chrom_centers[chrom], 0.5)
        group['width'] = group['width'].apply(lambda x: x)
        xranges = group[['chrom_start', 'width']].values

        yield BrokenBarHCollection(xranges, yrange, facecolors=group['colors'])
Пример #12
0
    def plot_enso_background(self, ensoindices=None, lag=0, **optinfo):
        """
    Plots colored stripes in the background of plot to represent ENSO phases.

    Parameters
    ----------
    ensophases: {array-like}, optional
        Array of ENSO indices (``+1`` for El Niño, ``0`` for Neutral and
        ``-1`` for La Niña episodes).
        If None, the ENSO indices of the underlying series are used instead.
    
        """
        if ensoindices is None:
            series = self._series
            if series is None or not hasattr(series, 'ensoindices') or \
                series.ensoindices is None:
                raise ValueError("Undefined ENSO indices!")
            ensoindices = series.ensoindices
        if self.xdata is None:
            errmsg = "Unable to retrieve the dates of the current plot!"
            raise ValueError(errmsg)
        #
        dates = self.xdata
        clust_indices = Cluster(ensoindices.filled(0), 0)
        _dates = np.empty(len(dates) + 1, int)
        _dates[:-1] = dates
        _dates[-1] = dates[-2]
        episodes = dict([(k, zip(_dates[v[:, 0]], _dates[v[:, 1]]))
                         for (k, v) in clust_indices.grouped_limits().items()])
        #
        colors = ENSOcolors['polygons']
        for (key, idx) in {'C':-1, 'N':0, 'W':+1}.iteritems():
            colors[idx] = colors[key]
        #
        trans = blended_transform_factory(self.transData, self.transAxes)
        for (k, lim) in episodes.iteritems():
            _bbc = BrokenBarHCollection([(x + lag, y - x) for (x, y) in lim],
                                         (0, 1),
                                         facecolors=colors[k],
                                         edgecolors=colors[k],)
            _bbc.set_alpha(0.2)
            _bbc.set_transform(trans)
            self.add_collection(_bbc)
Пример #13
0
def bed_collections_generator(df, y_positions, height):
    """ Interate dataframe
    Yeilds:
    Brokenbarhcollection --from BED DF to be plotted
    """
    for chrom, group in df.groupby('chrom'):
        yrange = (0, height)
        xranges = group[['start', 'width']].values
        yield BrokenBarHCollection(xranges,
                                   yrange,
                                   facecolors=group['colors'],
                                   label=chrom)
Пример #14
0
def bed_collections_generator(dataframe, height):
    """ Interate dataframe
    Yeilds:
    Brokenbarhcollection --from BED DF to be plotted
    """
    for chrom, group in dataframe.groupby("chrom"):
        yrange = (0, height)
        xranges = group[["start", "width"]].values
        yield BrokenBarHCollection(xranges,
                                   yrange,
                                   facecolors=group["colors"],
                                   label=chrom)
Пример #15
0
 def make_collection(self, df, chrom, ypos=0.25, height=0.5, **kwargs):
     if 'width' not in df.columns:
         df['width'] = df.end - df.start
     df0 = df[df['chrom'] == chrom]
     xranges = df0[['start', 'width']].values
     colors = df0['colors']  #.values
     yrange = (ypos, height)
     bbhc = BrokenBarHCollection(xranges,
                                 yrange,
                                 facecolors=colors,
                                 **kwargs)
     xmin = df0.start.min()
     xmax = df0.end.max()
     return bbhc, xmin, xmax
Пример #16
0
 def chromosome_collections(df, y_positions, height, **kwargs):
     del_width = False
     if 'width' not in df.columns:
         del_width = True
         df['width'] = df['end'] - df['start']
     for chrom, group in df.groupby('chrom'):
         yrange = (y_positions[chrom], height)
         xranges = group[['start', 'width']].values
         yield BrokenBarHCollection(xranges,
                                    yrange,
                                    edgecolors=("black", ),
                                    facecolors=group['colors'],
                                    **kwargs)
     if del_width:
         del df['width']
Пример #17
0
def bed_collections_generator_combine(dataframe, y_positions, height):
    """ Iterate dataframe

        Args:
            dataframe(pandas dataframe)
            y_positions()
            height
        Yields:
            BrokenBarHCollection
    """
    for chrom, group in dataframe.groupby("chrom"):
        print("chrom: {}".format(chrom))
        yrange = (y_positions[chrom], height)
        xranges = group[["start", "width"]].values
        yield BrokenBarHCollection(xranges,
                                   yrange,
                                   facecolors=group["colors"],
                                   label=chrom)
Пример #18
0
def coverage_generator_combine(dataframe, height):
    """Iterate dataframe and yeild per chromosome, like coverage_generator()
    -with additional positional

    Args:
        dataframe --
        height ---

    Yeilds:
        BrokenBarhcollection
    """
    for chrom, group in dataframe.groupby("chrom"):
        yrange = (0, height)
        xranges = group[["start", "width"]].values
        yield BrokenBarHCollection(xranges,
                                   yrange,
                                   facecolors=group["colors"],
                                   label=chrom)
Пример #19
0
def bed_collections_generatorCombine(df, y_positions, height, **kwargs):
    """ Iterate dataframe

        Args:
            df(pandas dataframe)
            y_positions()
            height
        Yields:
            BrokenBarHCollection
    """
    for chrom, group in df.groupby('chrom'):
        print("chrom: {}".format(chrom))
        yrange = (y_positions[chrom], height)
        xranges = group[['start', 'width']].values
        yield BrokenBarHCollection(xranges,
                                   yrange,
                                   facecolors=group['colors'],
                                   label=chrom)
Пример #20
0
def coverage_generatorCombine(df, y_positions, height):
    """Iterate dataframe and yeild per chromosome, like coverage_generator()
    -with additional positional

    Args:
        df --
        y_positions --
        height ---

    Yeilds:
        BrokenBarhcollection
    """
    for chrom, group in df.groupby('chrom'):
        yrange = (0, height)
        xranges = group[['start', 'width']].values
        yield BrokenBarHCollection(xranges,
                                   yrange,
                                   facecolors=group['colors'],
                                   label=chrom)
Пример #21
0
 def plot(self, ax, gr: GenomeRange, **kwargs):
     self.ax = ax
     df = self.fetch_data(gr)
     xranges, colors = [], []
     band_height = self.properties['height']
     for _, row in df.iterrows():
         start, end = row['start'], row['end']
         band_name, band_type = row['band_name'], row['band_type']
         band_color = self.lookup_band_color(band_type)
         xranges.append((start, end))
         colors.append(band_color)
         if self.properties['show_band_name'] != 'no':
             if gr.length < 80_000_000:
                 self.plot_text(band_name, start, end, band_color)
     coll = BrokenBarHCollection(xranges, (0, band_height), facecolors=colors,
                                 linewidths=self.properties['border_width'],
                                 edgecolors=self.properties['border_color'])
     ax.add_collection(coll)
     ax.set_ylim(-0.1, band_height + 0.1)
     ax.set_xlim(gr.start, gr.end)
     self.plot_label()
Пример #22
0
    def plot_enso_background(self, ensoindices=None, lag=0, **optinfo):
        """
    Plots colored stripes in the background of plot to represent ENSO phases.

    Parameters
    ----------
    ensophases: {array-like}, optional
        Array of ENSO indices (``+1`` for El Niño, ``0`` for Neutral and
        ``-1`` for La Niña episodes).
        If None, the ENSO indices of the underlying series are used instead.
    
        """
        if ensoindices is None:
            series = self._series
            if series is None or not hasattr(series, 'ensoindices') or \
                series.ensoindices is None:
                raise ValueError("Undefined ENSO indices!")
            ensoindices = series.ensoindices
        if self.xdata is None:
            errmsg = "Unable to retrieve the dates of the current plot!"
            raise ValueError(errmsg)
        #
        dates = self.xdata
        clust_indices = Cluster(ensoindices.filled(0), 0)
        _dates = np.empty(len(dates) + 1, int)
        _dates[:-1] = dates
        _dates[-1] = dates[-2]
        episodes = dict([(k, zip(_dates[v[:, 0]], _dates[v[:, 1]]))
                         for (k, v) in clust_indices.grouped_limits().items()])
        #
        colors = ENSOcolors['polygons']
        for (key, idx) in {'C': -1, 'N': 0, 'W': +1}.iteritems():
            colors[idx] = colors[key]
        #
        trans = blended_transform_factory(self.transData, self.transAxes)
        for (k, lim) in episodes.iteritems():
            _bbc = BrokenBarHCollection(
                [(x + lag, y - x) for (x, y) in lim],
                (0, 1),
                facecolors=colors[k],
                edgecolors=colors[k],
            )
            _bbc.set_alpha(0.2)
            _bbc.set_transform(trans)
            self.add_collection(_bbc)
Пример #23
0
    def get_steps(self, sigs, weights, cutoff_hz=1.0, order=2, window=4.0,
                  x_threshold=2.0, figname=None, min_step_window=2.0):
        """
        Parameters
        ----------

        sigs : list(ndarray(n), ndarray(n))
            List of signals (1D-arrays)
        """

        x = sigs[0]
        y = sigs[1]
        xf, xf_ds, xf_ds_dt, x_regress = self.conditioning(x, cutoff_hz=cutoff_hz,
                                                    order=order, window=window)
        xf_ds_regress = x_regress[:,0]
        yf, yf_ds, yf_ds_dt, y_regress = self.conditioning(y, cutoff_hz=cutoff_hz,
                                                    order=order, window=window)
        yf_ds_regress = y_regress[:,0]

        # select on the product of yf_ds_regress * xf_ds_regress,
        # both need to be steady!
        xw = weights[0]
        yw = weights[1]
        xy_ds = np.abs(xf_ds_regress*xw) + np.abs(yf_ds_regress*yw)

        xf_sel_mask, xf_sel_arg = self.select(xy_ds, x_threshold)

        step_ds_mask, steps_ds = self.steady_steps(xf_sel_mask,
                                                   step_lenght=min_step_window)
        # save steps in high-res sampling of the original signal
        steps = np.round(steps_ds * self.sps / self.freq_ds, 0).astype(np.int)
        np.savetxt(figname.replace('.png', '_steps.txt'), steps)
#        steps_ds_times = self.t_ds[steps_ds.flatten()]
#        steps = np.ndarray(steps_ds.shape) * np.nan
#        for k in range(steps.shape[0]):
#            t0 = self.t_ds[steps[k,0]]
#            t1 = self.t_ds[steps[k,1]]
#            steps[k,0] = np.abs(self.time - t0).argmin()
#            steps[k,0] = np.abs(self.time - t1).argmin()

        if figname is not None:
            print('start plotting...')
            fig, axes = plotting.subplots(nrows=3, ncols=1, figsize=(8,9),
                                          dpi=120)
            ax = axes[0,0]
            ax.set_title('original and filtered signals')
            ax.plot(self.time, x, 'r-', alpha=0.3)
            ax.plot(self.time, xf, 'r-')
            ax.grid()
            axr = ax.twinx()
            axr.plot(self.time, y, 'g-', alpha=0.3)
            axr.plot(self.time, yf, 'g-')

            ax = axes[1,0]
            ax.set_title('lin regr window: %1.02f sec' % window)
            t_mask = self.t_ds.copy()
            t_mask[~xf_sel_mask] = np.nan
            x_mask = xf_ds.copy()
            x_mask[~xf_sel_mask] = np.nan
            ax.plot(self.t_ds, xf_ds, 'r-', alpha=1.0, label='xf ds')
            ax.plot(self.t_ds, x_mask, 'k-+', alpha=0.7, label='xf select')
            ax.grid()
            axr = ax.twinx()
            axr.plot(self.t_ds, yf_ds, 'g-', alpha=0.8, label='yx ds')
            y_mask = yf_ds.copy()
            y_mask[~xf_sel_mask] = np.nan
            axr.plot(self.t_ds, y_mask, 'k-+', alpha=0.7, label='yf select')
            xmin = axr.get_ylim()[0]
            xmax = axr.get_ylim()[1]
            collection = region.span_where(self.t_ds, ymin=xmin, ymax=xmax,
                                           where=xf_sel_mask, facecolor='grey',
                                           alpha=0.4)
            axr.add_collection(collection)
            leg = plotting.one_legend(ax, axr, loc='best')
            leg.get_frame().set_alpha(0.5)

            ax = axes[2,0]
            rpl = (x_threshold, min_step_window)
            ax.set_title('threshold: %1.02f, min step window: %1.2f sec' % rpl)
            ax.plot(self.t_ds, np.abs(xf_ds_regress), 'r-',
                    label='xf lin regress', alpha=0.9)
            ax.plot(self.t_ds, np.abs(yf_ds_regress), 'g-',
                     label='yf lin regress', alpha=0.9)
            ax.plot(self.t_ds, np.abs(xy_ds), 'k-', label='xy*w', alpha=0.7)
            ax.axhline(y=x_threshold, linewidth=1, color='k', linestyle='--',
                       aa=False)
            ax.set_ylim([0,5])
            xmin = ax.get_ylim()[0]
            xmax = ax.get_ylim()[1]
            collection = region.span_where(self.t_ds, ymin=xmin, ymax=xmax,
                                           where=step_ds_mask, facecolor='grey',
                                           alpha=0.4)
            ax.add_collection(collection)
#            axr = ax.twinx()
#            axr.plot(self.t_ds, np.abs(yf_ds_regress), 'g-',
#                     label='yf lin regress', alpha=0.9)
#            ax, axr = plotting.match_yticks(ax, axr)
#            axr.set_ylim([0,5])
#            leg = plotting.one_legend(ax, axr, loc='best')
#            leg.get_frame().set_alpha(0.5)

            ax.grid()
            leg = ax.legend(loc='best')
            leg.get_frame().set_alpha(0.5)

            fig.tight_layout()
            fig.savefig(figname)
            print(figname)

        return steps
Пример #24
0
    def draw(self, ax):
        a = self.attr
        st, ed = a['st'], a['ed']
        xmin = self.st  #- width*self.margin
        xmax = self.ed  #+ width*self.margin
        width = xmax - xmin  #ed-st
        #print 'gene,st,ed,width,margin,xmin,xmax',st,ed,width,self.margin,xmin,xmax
        xmid = (xmin + xmax) / 2.
        ymax = 0.8
        h = 0.4
        ymin = ymax - h
        ymid = ymax - h / 2.
        # draw genome line
        ax.plot([xmin, xmax], [ymid, ymid], 'grey')  # base
        # draw arrows in introns
        estsi = zip(a['estarts'], a['esizes'])
        ists = [st + est + esi for est, esi in estsi[:-1]]
        ieds = [st + est for est, esi in estsi[1:]]
        if a['strand'] == '+':
            dx, shape, hw = 1, 'right', 0.25
        else:
            dx, shape, hw = -1, 'right', 0.4
        hl = 0.05 * width
        arrowargs = dict(y=ymid,
                         dx=dx,
                         dy=0,
                         shape=shape,
                         fc='grey',
                         linewidth=0,
                         head_width=hw,
                         head_length=hl)
        #print estsi
        #print zip(ists,ieds)
        for ist, ied in zip(ists, ieds):
            iwidth = ied - ist
            imid = (ist + ied) / 2.
            if iwidth < 0.15 * width:
                continue
            #print imid
            ax.arrow(imid - dx * (hl + 1) / 2., **arrowargs)
        # put arrow at xmin and xmax
        if st > xmin:
            if dx < 0:
                axmin = xmin + hl - dx
                axmax = xmax - dx
            else:
                axmin = xmin - dx
                axmax = xmax - hl - dx
            ax.arrow(axmin, **arrowargs)
            ax.arrow(axmax, **arrowargs)
        # draw exons => BrokenBarHCollection
        # draw st=>TSS & TSE=>ed
        tss = a['tst'] - st  # exon coords are st based
        tse = a['ted'] - st
        if tss == tse:
            estsi1 = estsi3 = []
            estsi2 = estsi
        else:
            estsi1 = [(x, y) for x, y in estsi if x < tss]
            estsi2 = [(x, y) for x, y in estsi
                      if ((x + y) >= tss) & (x <= tse)]
            estsi3 = [(x, y) for x, y in estsi if (x + y) > tse]
            if estsi1:
                x0, y0 = estsi1[-1]  # last size needs fixing
                if (x0 + y0) > tss:
                    estsi1[-1] = (x0, tss - x0)
            if estsi2:
                x0, y0 = estsi2[0]
                if x0 < tss:  # first start and size need fixing
                    estsi2[0] = (tss, y0 - (tss - x0))
                x0, y0 = estsi2[-1]
                if (x0 + y0) > tse:
                    estsi2[-1] = (x0, tse - x0)
            if estsi3:
                x0, y0 = estsi3[0]
                if x0 < tse:
                    estsi3[0] = (tse, y0 - (tse - x0))
        c = self.color
        cargs = dict(facecolor=c, edgecolor=c)  #, alpha=0.8)
        cargs.update(self.kwargs)
        #print 'estsi1',estsi1
        #print 'estsi2',estsi2
        #print 'estsi3',estsi3
        # draw UTR
        if (len(estsi1) + len(estsi3)) > 0:
            yrange = (ymid - h / 4., h / 2.)
            xranges = [(st + x, y) for x, y in estsi1] + [(st + x, y)
                                                          for x, y in estsi3]
            bbhc = BrokenBarHCollection(xranges, yrange, **cargs)
            ax.add_collection(bbhc)
            #print '1,3 xranges', xranges
            #print '1,3 yrange', yrange
        # draw coding
        yrange = (ymid - h / 2., h)
        xranges = [(st + x, y) for x, y in estsi2]
        #print '2 xranges', xranges
        #print '2 yrange', yrange
        bbhc = BrokenBarHCollection(xranges, yrange, **cargs)
        ax.add_collection(bbhc)

        # draw gene name
        txt = '%s (%.1fkb)' % (a['name'], width / 1000)
        ax.text(xmid, 0, txt, ha='center', va='bottom', fontsize=FONTSIZE)
        PP.setp(ax,
                xticks=[],
                yticks=[],
                frame_on=False,
                xlim=(xmin, xmax),
                ylim=(0, 1))
Пример #25
0
    chrRng = ['R','7','6','5','4','3','2','1'] 
    mydata = []
    for ichr in chrRng:
        os.chdir("chr%s"%ichr)
        tcom = np.genfromtxt("compress.out",skip_header=1)
        for itemcom in tcom:
            i = itemcom[0]
            c = itemcom[2]
            mydata.append(["chr%s"%ichr,12500+i*50000,12500+(i+1)*50000,0,c])
        os.chdir("..")    
    #mydata = [["chr1",0,50000,0,1],
    #["chr1",50000,100000,0,2],
    #["chr1",100000,150000,0,1],["chr2",150000,200000,0,3],["chr2",200000,250000,0,4]]
    
    for xranges, yrange, colors, label in ideograms(mydata):
        coll = BrokenBarHCollection(xranges, yrange, facecolors=colors)
        ax.add_collection(coll)
        center = yrange[0] + yrange[1]/2.
        yticks.append(center)
        yticklabels.append(label)
        d[label] = xranges
    
    ax.axis('tight')
    ax.set_yticks(yticks)
    ax.set_yticklabels(yticklabels)
    ax.set_xticks([])
    plt.savefig("compress.pdf")
    plt.show()
    
### plot compressibilities out as curve        
if False:
Пример #26
0
def details(file):

    height = 0.9
    spacing = 0.9

    def ideograms(fn):
        last_chrom = None
        last_specie = None
        last_population = None
        fin = open(fn)
        fin.readline()
        xranges, color = [], []
        ymin = 0

        for line in fin:
            chrom, start, stop, specie, population, stain = line.strip().split(
            )
            start = int(start)
            stop = int(stop)
            width = stop - start
            if chrom == last_chrom or (last_chrom is None):
                xranges.append((start, width))
                color.append(colors[stain])
                last_chrom = chrom
                last_specie = specie
                last_population = population
                continue

            ymin += height + spacing
            yrange = (ymin, height)
            yield xranges, yrange, color, last_chrom, last_specie, last_population
            xranges, color = [], []
            xranges.append((start, width))
            color.append(colors[stain])
            last_chrom = chrom
            last_specie = specie
            last_population = population

        # last one
        ymin += height + spacing
        yrange = (ymin, height)
        yield xranges, yrange, color, last_chrom, last_specie, last_population

    fig = plt.figure(figsize=(15, 15))
    ax = fig.add_subplot(111)
    d = {}
    yticks = []
    yticklabels = []

    for xranges, yrange, color, chms, species, populations in ideograms(file):
        coll = BrokenBarHCollection(xranges, yrange, facecolors=color)
        ax.add_collection(coll)
        center = yrange[0] + yrange[1] / 2.
        yticks.append(center)
        label = '%s %s' % (species, populations)
        yticklabels.append(label)
        d[chms] = xranges
        values = []
        bp = []
        for inter in xranges:
            values.append(inter[0])
            bp.append(inter[1])
        values.append(bp[-1] + values[-1])

        for i in range(0, len(values) - 1):
            xlabel = '%d bp' % (bp[i])
            ax.annotate(xlabel,
                        xy=(values[i + 1] / 2 + values[i] / 2,
                            yrange[0] + yrange[1]),
                        xytext=(values[i + 1] / 2 + values[i] / 2,
                                yrange[0] + yrange[1]))

    ax.set_title('Chromosomes')
    ax.axis('tight')
    ax.set_yticks(yticks)
    ax.set_yticklabels(yticklabels)
    ax.set_xticks([])
    plt.show(block=False)
Пример #27
0
def main(args, parser):
    # Get the contig length list
    ctglens = dict()
    with open(args.fai, 'r') as fai:
        for l in fai:
            s = l.rstrip().split()
            ctglens[s[0]] = s[1]
            
    # Create windows 
    winlist = defaultdict(list)
    # offset bp to add for stitching contigs together in one line
    ctgoffset = dict()
    lastbp = 0
    for c in ctglens:
        ctgoffset[c] = lastbp + 100
        for i in range(0, ctglens[c], args.binsize):
            winlist[c].append(window(c, i, i + args.binsize))
        lastbp += ctglens[c]
        
    # read each sam region and count the reads
    with pysam.AlignmentFile(args.bam, 'rb') as bamfile:
        for c, w in winlist.items():
            for i, win in enumerate(w):
                count = 0
                for s in bamfile.fetch(c, win.start, win.end):
                    if s.is_secondary:
                        continue
                    count += 1
                winlist = updateWin(winlist, c, i, count)
                
    # Now, read in the human readable text file and process that 
    hapset = set()
    with open(args.human, 'r') as human:
        human.readline()
        for l in human:
            s = l.rstrip().split()
            # determine where the contig start falls
            for i, win in enumerate(winlist[s[2]]):
                if int(s[3]) < win.end and int(s[3]) >= win.start:
                    winlist = updateWin(winlist, s[2], i, int(s[6]), s[4])
                    print(f'Updating window: {s[2]} {win.start} {win.end} to {s[6]} for Hap {s[4]}')
                    hapset.add(s[4])
                    
    # OK, data is in! Let's try plotting
    raw = defaultdict(list)
    bars = list()
    for c, w in winlist.items():
        bars.append([ctgoffset[c], ctglens[c]])
        for win in winlist:
            for h in hapset:
                raw["contig"].append(c)
                raw["start"].append(win.start + ctgoffset[c])
                raw["end"].append(win.end + ctgoffset[c])
                raw["hap"].append(h)
                raw["count"].append(win.getCount(h))
                
    df = pandas.DataFrame(raw)
    df.to_csv(args.output + '.wins', sep='\t', header=True)
    
    fig = plt.figure(figsize=(6,8))
    ax = df[['start', 'hap', 'count']].plot.area(x='start', y='count', colormap='viridis')
    
    ax.add_collection(BrokenBarHCollection(bars, [-1, 1], facecolors=plt.get_cmap('tab20')))
    ax.axis('tight')
    plt.savefig(args.output + '.pdf')
Пример #28
0
def labeled_intervals(intervals,
                      labels,
                      label_set=None,
                      base=None,
                      height=None,
                      extend_labels=True,
                      ax=None,
                      tick=True,
                      **kwargs):
    '''Plot labeled intervals with each label on its own row.

    Parameters
    ----------
    intervals : np.ndarray, shape=(n, 2)
        segment intervals, in the format returned by
        :func:`mir_eval.io.load_intervals` or
        :func:`mir_eval.io.load_labeled_intervals`.

    labels : list, shape=(n,)
        reference segment labels, in the format returned by
        :func:`mir_eval.io.load_labeled_intervals`.

    label_set : list
        An (ordered) list of labels to determine the plotting order.
        If not provided, the labels will be inferred from
        ``ax.get_yticklabels()``.
        If no ``yticklabels`` exist, then the sorted set of unique values
        in ``labels`` is taken as the label set.

    base : np.ndarray, shape=(n,), optional
        Vertical positions of each label.
        By default, labels are positioned at integers
        ``np.arange(len(labels))``.

    height : scalar or np.ndarray, shape=(n,), optional
        Height for each label.
        If scalar, the same value is applied to all labels.
        By default, each label has ``height=1``.

    extend_labels : bool
        If ``False``, only values of ``labels`` that also exist in
        ``label_set`` will be shown.

        If ``True``, all labels are shown, with those in `labels` but
        not in `label_set` appended to the top of the plot.
        A horizontal line is drawn to indicate the separation between
        values in or out of ``label_set``.

    ax : matplotlib.pyplot.axes
        An axis handle on which to draw the intervals.
        If none is provided, a new set of axes is created.

    tick : bool
        If ``True``, sets tick positions and labels on the y-axis.

    kwargs
        Additional keyword arguments to pass to
        `matplotlib.collection.BrokenBarHCollection`.

    Returns
    -------
    ax : matplotlib.pyplot.axes._subplots.AxesSubplot
        A handle to the (possibly constructed) plot axes
    '''

    # Get the axes handle
    ax, _ = __get_axes(ax=ax)

    # Make sure we have a numpy array
    intervals = np.atleast_2d(intervals)

    if label_set is None:
        # If we have non-empty pre-existing tick labels, use them
        label_set = [_.get_text() for _ in ax.get_yticklabels()]
        # If none of the label strings have content, treat it as empty
        if not any(label_set):
            label_set = []
    else:
        label_set = list(label_set)

    # Put additional labels at the end, in order
    if extend_labels:
        ticks = label_set + sorted(set(labels) - set(label_set))
    elif label_set:
        ticks = label_set
    else:
        ticks = sorted(set(labels))

    style = dict(linewidth=1)

    style.update(next(ax._get_patches_for_fill.prop_cycler))
    # Swap color -> facecolor here so we preserve edgecolor on rects
    style['facecolor'] = style.pop('color')
    style.update(kwargs)

    if base is None:
        base = np.arange(len(ticks))

    if height is None:
        height = 1

    if np.isscalar(height):
        height = height * np.ones_like(base)

    seg_y = dict()
    for ybase, yheight, lab in zip(base, height, ticks):
        seg_y[lab] = (ybase, yheight)

    xvals = defaultdict(list)
    for ival, lab in zip(intervals, labels):
        if lab not in seg_y:
            continue
        xvals[lab].append((ival[0], ival[1] - ival[0]))

    for lab in seg_y:
        ax.add_collection(BrokenBarHCollection(xvals[lab], seg_y[lab],
                                               **style))
        # Pop the label after the first time we see it, so we only get
        # one legend entry
        style.pop('label', None)

    # Draw a line separating the new labels from pre-existing labels
    if label_set != ticks:
        ax.axhline(len(label_set), color='k', alpha=0.5)

    if tick:
        ax.grid(True, axis='y')
        ax.set_yticks([])
        ax.set_yticks(base)
        ax.set_yticklabels(ticks, va='bottom')
        ax.yaxis.set_major_formatter(IntervalFormatter(base, ticks))

    if base.size:
        __expand_limits(ax, [base.min(), (base + height).max()], which='y')
    if intervals.size:
        __expand_limits(ax, [intervals.min(), intervals.max()], which='x')

    return ax
Пример #29
0
def main(argv):

    file_metadata = ""
    dir_post = ""
    output_mark = ""
    genome = ""
    method = -1
    file_input_extension = ""
    file_output_extension = ""
    title_label = ""
    libraries = 'all'
    type_junction = 'all'

    good_directory_list = []
    check_result_tlx = False
    check_result_filter_stats_txt = False
    total_junction = 0

    #pd.options.mode.chained_assignment = None

    try:
        opts, args = getopt.getopt(sys.argv[1:], 'm:g:p:o:e:i:w:t:', [
            'file_metadata=', 'genome=', 'dir_post=', 'output_mark=',
            'method=', 'input_mark=', 'libraries=', 'type_junction=', 'help'
        ])
    except getopt.GetoptError:
        usage()
        sys.exit(2)

##############################OPTIONS##############################

    print("\n")
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage()
            sys.exit(2)
        elif opt in ('-m', '--file_metadata'):
            file_metadata = arg
        elif opt in ('-g', '--genome'):
            genome = arg
        elif opt in ('-p', '--dir_post'):
            dir_post = arg
        elif opt in ('-o', '--output_mark'):
            output_mark = arg
        elif opt in ('-e', '--method'):
            method = arg
        elif opt in ('-i', '--input_mark'):
            input_mark = arg
        elif opt in ('-w', '--libraries'):
            libraries = arg
        elif opt in ('-t', '--type_junction'):
            type_junction = arg
        else:
            print("Error : Bad option -> " + opt)
            usage()
            sys.exit(2)

##############################CHECK UP/SET UP##############################

# CHECK METADATA FILE
    if file_metadata == "" or not os.path.exists(file_metadata):
        print("Error : You have to set a metadata file !\n")
        usage()
        sys.exit(2)
    else:
        # READ METADATA FILE
        metadata = pd.read_table(file_metadata, sep='\t')

    # FILTER METADATA FILE IF GENOME INPUT
    if genome != "":
        metadata = metadata.loc[metadata['Assembly'] == genome]
        if metadata.empty:
            print(
                "Error : This assembly does not exist in the metadata file !\n"
            )
            usage()
            sys.exit(2)
    else:
        print("Error : You have to set a genome name !\n")
        usage()
        sys.exit(2)

    # CHECK POSTPROCESS DIRECTORY
    if not os.path.exists(dir_post):
        print("Error : You have to set a postprocess directory !\n")
        usage()
        sys.exit(2)
    else:
        if dir_post[-1] != "/":
            dir_post += "/"

    # CHECK METHOD
    try:
        method = int(method)
        if method == 0:
            title_label = "Number of double stranded breaks after the primer at each position divided by the maximum of breaks by library"
        elif method == 1:
            title_label = "Log2 of number of double stranded breaks after the primer at each position divided by log2 of the maximum of breaks by library"
        else:
            print("Error : Method option needs to be 0 or 1 !\n")
            usage()
            sys.exit(2)
    except:
        print(
            "Error : You have to set an integer (0 or 1) to method option !\n")
        usage()
        sys.exit(2)

    # CHECK INPUT MARKS HISTORY
    if input_mark == "":
        print("Warning : You will process the raw file !\n")

    # CHECK OUTPUT MARK
    if output_mark == "":
        print("Error : You have to set an output mark !\n")
        usage()
        sys.exit(2)

    # SELECT INPUT FILES
    if input_mark == "":
        file_input_extension = ".tlx"
    else:
        file_input_extension = "_" + "_".join(input_mark.split(",")) + ".tlx"

    # TEST IF INPUT EXIST IN AT LEAST ON LIBRARY
    check_input_mark = False
    for library in metadata['Library'].tolist():
        if os.path.exists(dir_post + library + "/" + library + "_Legitimate" +
                          file_input_extension) or os.path.exists(
                              dir_post + library + "/" + library +
                              "_Illegitimate" + file_input_extension):
            check_input_mark = True
    if not check_input_mark:
        print(
            "Error : Your input marks can not localize a good legitimate or illegitimate file !\n"
        )
        usage()
        sys.exit(2)

    # CHECK LIBRARIES
    array_libraries = []
    if libraries == 'all':
        array_libraries = metadata['Library'].tolist()
    else:
        for library in libraries.split(","):
            if library in metadata['Library'].tolist():
                array_libraries.append(library)
            else:
                print("Warning : {" + library +
                      "} does not exist in metadata file with this genome !")
                print("Warning : {" + library + "} will not be used !")

    if type_junction != 'all' and type_junction != 'legitimate' and type_junction != 'illegitimate':
        print(
            "Error : Type junction only stand for all, legitimate or illegitimate value !\n"
        )
        usage()
        sys.exit(2)

    # SELECT OUTPUT FILES
    if file_input_extension != "":
        file_output_extension = file_input_extension[:-4] + "_" + output_mark + ".png"
    else:
        file_output_extension = "_" + output_mark + ".png"


##############################PRINTS##############################

    print('\n-----------------------------------------')
    print('Metadata file : ' + file_metadata)
    print('Genome : ' + genome)
    print('Postprocess directory : ' + dir_post)
    print('Method : ' + str(method))
    print('Libraries : ' + libraries)
    print('Type junction : ' + type_junction)
    print('Input file extension: ' + file_input_extension)
    print('Output file extension : ' + file_output_extension)
    print('-----------------------------------------\n')

    ##############################PROGRAMS##############################

    fig = plt.figure()
    ax = fig.add_subplot(111)
    d = {}
    yticks = []
    yticklabels = []
    track_number = 0
    yrange = []
    coff_leg_ille_graph = 0

    # LOOP OVER EACH LIBRARIES
    for library in array_libraries:
        # print(library)
        # CHECK DIRECTORY EXISTS
        if not os.path.exists(dir_post + library):
            print("Warning : " + dir_post + " does not contains {" + library +
                  "}")
            print("Warning :  {" + library + "} will not be filtered")
        else:
            # CHECK INPUT FILE EXISTS
            if os.path.exists(dir_post + library + "/" + library +
                              "_Legitimate" + file_input_extension):
                df_legitimate = pd.read_csv(dir_post + library + "/" +
                                            library + "_Legitimate" +
                                            file_input_extension,
                                            sep='\t',
                                            header=0,
                                            index_col=None)
                df_legitimate = df_legitimate.drop(
                    columns=df_legitimate.columns[11:])

            else:
                print("Error : The Legitimate file for " + library +
                      " is missing !\n")
                usage()
                sys.exit(2)

            if os.path.exists(dir_post + library + "/" + library +
                              "_Illegitimate" + file_input_extension):
                df_illegitimate = pd.read_csv(dir_post + library + "/" +
                                              library + "_Illegitimate" +
                                              file_input_extension,
                                              sep='\t',
                                              header=0,
                                              index_col=None)
                df_illegitimate = df_illegitimate.drop(
                    columns=df_illegitimate.columns[11:])

            else:
                print("Error : The Illegitimate file for " + library +
                      " is missing !\n")
                usage()
                sys.exit(2)

    metadata = metadata[(metadata['Library'].isin(array_libraries))]

    metadata_sort = pd.DataFrame(columns=list(metadata.columns.values))
    for i in array_libraries:
        for index, row in metadata.iterrows():
            if i == row['Library']:
                metadata_sort = metadata_sort.append(row)
    metadata = metadata_sort
    #metadata = metadata.sort_values(['Library'], ascending=[True])

    global_list = dir_post + metadata['Library']
    # for i in global_list:
    #    print(i)

    nb_files = len(global_list)
    # DICTIONNARY WITH ILLEGITIMATES AND LEGITIMATES JUNCTIONS
    distance_dict = {"illegitimates": {}, "legitimates": {}}

    # OPEN TLX FILES ONE BY ONE
    for result_file in global_list:
        label = result_file.split("/")[-1]

        # YRANGE USED AT THE END TO HORIZONTAL SPACE BAR

        yrange.append(((track_number * 1.8) + (track_number * 1.8) + 1.8 +
                       coff_leg_ille_graph, 1))
        if type_junction == 'all':
            yrange.append(((track_number * 1.8) + (track_number * 1.8) + 3.6 +
                           coff_leg_ille_graph, 1))

            coff_leg_ille_graph += 1

        # DICTIONNARY LABEL INSIDE ILLEGITIMATES AND LEGITIMATES DICTIONNARIES
        distance_dict["illegitimates"][label] = {}
        distance_dict["legitimates"][label] = {}

        # with open(result_file+"/"+label+"_result.tlx", 'r') as f_tlx:
        with open(
                result_file + "/" + label + "_Legitimate" +
                file_input_extension, 'r') as f_tlx:
            f_tlx.readline()
            for line in f_tlx:
                if pd.isnull(metadata.loc[metadata['Library'] == label]
                             ['MID'].values[0]):
                    len_mid = 0
                else:
                    len_mid = len(metadata.loc[metadata['Library'] == label]
                                  ['MID'].values[0])
                if pd.isnull(metadata.loc[metadata['Library'] == label]
                             ['Primer'].values[0]):
                    len_primer = 0
                else:
                    len_primer = len(metadata.loc[metadata['Library'] == label]
                                     ['Primer'].values[0])
                distance = int(
                    int(line.split("\t")[9]) -
                    int(line.split("\t")[8])) - (len_mid + len_primer)
                if distance not in distance_dict["legitimates"][label]:
                    distance_dict["legitimates"][label][distance] = 1
                else:
                    distance_dict["legitimates"][label][distance] += 1
                total_junction += 1

        with open(
                result_file + "/" + label + "_Illegitimate" +
                file_input_extension, 'r') as f_tlx:
            f_tlx.readline()
            for line in f_tlx:
                if pd.isnull(metadata.loc[metadata['Library'] == label]
                             ['MID'].values[0]):
                    len_mid = 0
                else:
                    len_mid = len(metadata.loc[metadata['Library'] == label]
                                  ['MID'].values[0])
                if pd.isnull(metadata.loc[metadata['Library'] == label]
                             ['Primer'].values[0]):
                    len_primer = 0
                else:
                    len_primer = len(metadata.loc[metadata['Library'] == label]
                                     ['Primer'].values[0])
                distance = int(
                    int(line.split("\t")[9]) -
                    int(line.split("\t")[8])) - (len_mid + len_primer)
                if distance not in distance_dict["illegitimates"][label]:
                    distance_dict["illegitimates"][label][distance] = 1
                else:
                    distance_dict["illegitimates"][label][distance] += 1
                total_junction += 1

        # ADD MAX_DISTANCE AND MAX_DISTANCE_COUNT TO ILLEGITIMATES
        max_distance = 0
        max_distance_count = 0
        if len(distance_dict["illegitimates"][label]) > 0:
            # max_distance
            max_distance = max(
                distance_dict["illegitimates"][label].iteritems(),
                key=operator.itemgetter(0))[0]
            # max_distance_count
            max_distance_count = max(
                distance_dict["illegitimates"][label].iteritems(),
                key=operator.itemgetter(1))[1]
        distance_dict["illegitimates"][label]["max_distance"] = max_distance
        distance_dict["illegitimates"][label][
            "max_distance_count"] = max_distance_count

        # ADD MAX_DISTANCE AND MAX_DISTANCE_COUNT TO LEGITIMATES
        max_distance = 0
        max_distance_count = 0
        if len(distance_dict["legitimates"][label]) > 0:
            # max_distance
            max_distance = max(distance_dict["legitimates"][label].iteritems(),
                               key=operator.itemgetter(0))[0]
            # max_distance_count
            max_distance_count = max(
                distance_dict["legitimates"][label].iteritems(),
                key=operator.itemgetter(1))[1]
        distance_dict["legitimates"][label]["max_distance"] = max_distance
        distance_dict["legitimates"][label][
            "max_distance_count"] = max_distance_count
        track_number += 1

    # print(distance_dict)

    max_distance_all = 0
    max_distance_count_all = 0
    # Define max_distance_all and max_distance_count_all
    for legitimate, label_dict in distance_dict.items():
        for key, value in label_dict.items():
            if max_distance_all < int(label_dict[key]['max_distance']):
                max_distance_all = int(label_dict[key]['max_distance'])
            if max_distance_count_all < int(
                    label_dict[key]['max_distance_count']):
                max_distance_count_all = int(
                    label_dict[key]['max_distance_count'])

    # print(max_distance_all)
    # print(max_distance_count_all)

    yrange_count = 0

    for bad_label in global_list:
        label = bad_label.split("/")[-1]

        # MANAGE ARRAYS
        #distance_dict["illegitimates"][label] = manageArray(
        #    distance_dict["illegitimates"][label], max_distance_count_all, method)
        #distance_dict["legitimates"][label] = manageArray(
        #    distance_dict["legitimates"][label], max_distance_count_all, method)

        max_label_leg_ille = max(
            distance_dict["legitimates"][label]["max_distance_count"],
            distance_dict["illegitimates"][label]["max_distance_count"])
        distance_dict["illegitimates"][label] = manageArray(
            distance_dict["illegitimates"][label], max_label_leg_ille, method)
        distance_dict["legitimates"][label] = manageArray(
            distance_dict["legitimates"][label], max_label_leg_ille, method)

        #distance_dict["illegitimates"][label] = manageArray(distance_dict["illegitimates"][label], total_junction, method)
        #distance_dict["legitimates"][label] = manageArray(distance_dict["legitimates"][label], total_junction, method)

        distance_dict["illegitimates"][label] = collections.OrderedDict(
            sorted(distance_dict["illegitimates"][label].items()))
        distance_dict["legitimates"][label] = collections.OrderedDict(
            sorted(distance_dict["legitimates"][label].items()))

        # CONCATENATE LEGITIMATE AND ILLEGITIMATE DISTANCE AND COUNTS (USE FOR
        # COLORS)
        xranges_leg = []
        colors_leg = []
        xranges_illeg = []
        colors_illeg = []

        # print(distance_dict["illegitimates"][label])
        for key, value in distance_dict["legitimates"][label].items():
            xranges_leg.append((key, 1))
            colors_leg.append((0.0, 0.0, 0.0, value))

        for key, value in distance_dict["illegitimates"][label].items():
            xranges_illeg.append((key, 1))
            colors_illeg.append((1.0, 0.0, 0.0, value))

        ##############################WORKS##############################
        #xranges=[(29, 1),(30, 1),(31, 1)]
        #yrange=(1.8, 1)
        #colors=[(0.0, 0.0, 0.0, 0.012),(0.0, 0.0, 0.0, 0.008),(0.0, 0.0, 0.0, 0.012)]
        #################################################################

        # print("xranges")
        # print(xranges)
        # print(type(xranges))
        # print("yrange")
        # print(yrange[yrange_count])
        # print(type(yrange[yrange_count]))
        # print("colors")
        # print(colors)
        # print(type(colors))

        if type_junction == 'all':

            # LEGITIMATE DISPLAY
            coll = BrokenBarHCollection(xranges_leg,
                                        yrange[yrange_count],
                                        facecolors=colors_leg,
                                        edgecolors=colors_leg)
            ax.add_collection(coll)
            center = yrange[yrange_count][0] + yrange[yrange_count][1] / 2.0
            yticks.append(center)
            yticklabels.append(label + "_legi")
            d[label + "legi"] = xranges_leg

            yrange_count += 1

            # ILLEGITIMATE DISPLAY
            coll = BrokenBarHCollection(xranges_illeg,
                                        yrange[yrange_count],
                                        facecolors=colors_illeg,
                                        edgecolors=colors_illeg)
            ax.add_collection(coll)
            center = yrange[yrange_count][0] + yrange[yrange_count][1] / 2.0
            yticks.append(center)
            yticklabels.append(label + "_illegi")
            d[label + "illegi"] = xranges_illeg
            yrange_count += 1
        elif type_junction == 'legitimate':
            # LEGITIMATE DISPLAY
            coll = BrokenBarHCollection(xranges_leg,
                                        yrange[yrange_count],
                                        facecolors=colors_leg,
                                        edgecolors=colors_leg)
            ax.add_collection(coll)
            center = yrange[yrange_count][0] + yrange[yrange_count][1] / 2.0
            yticks.append(center)
            yticklabels.append(label + "_legi")
            d[label + "legi"] = xranges_leg

            yrange_count += 1

        elif type_junction == 'illegitimate':
            # ILLEGITIMATE DISPLAY
            coll = BrokenBarHCollection(xranges_illeg,
                                        yrange[yrange_count],
                                        facecolors=colors_illeg,
                                        edgecolors=colors_illeg)
            ax.add_collection(coll)
            center = yrange[yrange_count][0] + yrange[yrange_count][1] / 2.0
            yticks.append(center)
            yticklabels.append(label + "_illegi")
            d[label + "illegi"] = xranges_illeg
            yrange_count += 1
        else:
            print(
                "Error in Type junction attribute : all, legitimate or illegitimate value !\n"
            )
            usage()
            sys.exit(2)

    # SET UP DISPLAY BACKGROUND
    ax.axis('tight')
    ax.set_xlim([0, max_distance_all + 10])
    ax.set_yticks(yticks)
    ax.set_yticklabels(yticklabels)

    step_length = (int(max_distance_all / 100) * 100) / 20

    if step_length == 0:
        step_length = 5

    ax.set_xticks(range(0, max_distance_all + 10, step_length))
    ax.get_xaxis().get_major_formatter().set_scientific(False)
    plt.xlabel("Position from bait primer")
    plt.axes().xaxis.set_minor_locator(MultipleLocator(1))

    # SET UP TITLE
    plt.title(title_label, fontdict=None, loc='center')
    fig = plt.gcf()
    fig.set_size_inches(22, 10)

    # SAVE DISPLAY
    fig.savefig(dir_post + genome + file_output_extension,
                format='png',
                bbox_inches='tight')