Exemplo n.º 1
0
def split(parameters):
    parser = parsers.split_parser()

    args = parser.parse_args(parameters)

    if args.colormap is None:
        colormap = config.colormap_hic if not args.oe else 'bwr'
    else:
        colormap = args.colormap

    matrix_bottom = fanc.load(os.path.expanduser(args.hic_bottom), mode='r')
    matrix_top = fanc.load(os.path.expanduser(args.hic_top), mode='r')

    norm = "lin" if not args.log else "log"
    sp = kplt.HicComparisonPlot2D(
        matrix_top,
        matrix_bottom,
        colormap=colormap,
        norm=norm,
        vmin=args.vmin,
        adjust_range=args.adjust_range,
        vmax=args.vmax,
        scale_matrices=args.scaling,
        oe=args.oe,
        log=args.oe,
        colorbar_symmetry=0 if args.oe and args.colorbar_symmetry is None else
        args.colorbar_symmetry,
        show_colorbar=args.show_colorbar)
    return sp, args
Exemplo n.º 2
0
    def test_bed(self):
        this_dir = os.path.dirname(os.path.realpath(__file__))
        bed_file = this_dir + '/test_load/test.bed'

        with fanc.load(bed_file) as bed:
            assert isinstance(bed, gr.Bed)

        with pytest.raises(ValueError):
            foo_file = this_dir + '/test_load/foo.txt'
            fanc.load(foo_file)
Exemplo n.º 3
0
    def test_sambam(self):
        this_dir = os.path.dirname(os.path.realpath(__file__))
        sam_file = this_dir + '/test_load/test.sam'

        with fanc.load(sam_file, mode='r') as bw:
            assert isinstance(bw, pysam.AlignmentFile)

        bam_file = this_dir + '/test_load/test.bam'

        with fanc.load(bam_file, mode='r') as bw:
            assert isinstance(bw, pysam.AlignmentFile)
Exemplo n.º 4
0
def mirror(parameters):
    parser = parsers.mirror_parser()

    args = parser.parse_args(parameters)

    if args.colormap_lower is None:
        colormap_lower = config.colormap_hic if not args.oe_lower else 'bwr'
    else:
        colormap_lower = args.colormap_lower

    if args.colormap_upper is None:
        colormap_upper = config.colormap_hic if not args.oe_upper else 'bwr'
    else:
        colormap_upper = args.colormap_upper

    matrix_upper = fanc.load(os.path.expanduser(args.hic_upper), mode='r')
    matrix_lower = fanc.load(os.path.expanduser(args.hic_lower), mode='r')

    from fanc.tools.general import str_to_int

    norm_upper = "lin" if not args.log_upper else "log"
    upper_plot = kplt.HicPlot(matrix_upper,
                              colormap=colormap_upper,
                              max_dist=str_to_int(args.max_dist),
                              norm=norm_upper,
                              vmin=args.vmin_upper,
                              vmax=args.vmax_upper,
                              oe=args.oe_upper,
                              log=args.oe_upper,
                              colorbar_symmetry=0 if args.oe_upper
                              and args.colorbar_symmetry_upper is None else
                              args.colorbar_symmetry_upper,
                              show_colorbar=args.show_colorbar,
                              adjust_range=False)
    norm_lower = "lin" if not args.log_lower else "log"
    lower_plot = kplt.HicPlot(matrix_lower,
                              colormap=colormap_lower,
                              max_dist=str_to_int(args.max_dist),
                              norm=norm_lower,
                              vmin=args.vmin_lower,
                              vmax=args.vmax_lower,
                              oe=args.oe_lower,
                              log=args.oe_lower,
                              colorbar_symmetry=0 if args.oe_lower
                              and args.colorbar_symmetry_lower is None else
                              args.colorbar_symmetry_lower,
                              show_colorbar=args.show_colorbar,
                              adjust_range=False)
    vsp = kplt.VerticalSplitPlot(upper_plot, lower_plot)
    return vsp, args
Exemplo n.º 5
0
def bar(parameters):
    parser = parsers.bar_parser()
    args = parser.parse_args(parameters)

    regions = [fanc.load(file_name) for file_name in args.regions]
    attribute = args.attribute
    labels = args.labels
    ylim = args.ylim
    colors = args.colors
    alpha = args.alpha

    legend_location = args.legend_location

    if labels is not None and len(labels) != len(regions):
        parser.error("Number of labels ({}) must be the same as number "
                     "of datasets ({})".format(len(labels), len(regions)))

    p = kplt.BarPlot(regions,
                     attribute=attribute,
                     labels=labels,
                     ylim=ylim,
                     plot_kwargs={'alpha': alpha},
                     colors=colors,
                     legend_location=legend_location)
    return p, args
Exemplo n.º 6
0
def square(parameters):
    parser = parsers.square_parser()

    args = parser.parse_args(parameters)
    norm = "lin" if not args.log else "log"

    if args.colormap is None:
        colormap = config.colormap_hic if not args.oe else 'bwr'
    else:
        colormap = args.colormap

    matrix = fanc.load(os.path.expanduser(args.hic), mode='r')
    return kplt.HicPlot2D(
        matrix,
        colormap=colormap,
        norm=norm,
        vmin=args.vmin,
        vmax=args.vmax,
        show_colorbar=args.show_colorbar,
        adjust_range=args.adjust_range,
        oe=args.oe,
        log=args.oe,
        colorbar_symmetry=0 if args.oe and args.colorbar_symmetry is None else
        args.colorbar_symmetry,
        flip=args.flip,
        matrix_norm=args.norm,
        weight_field=args.weight_field), args
Exemplo n.º 7
0
def scores(parameters):
    parser = parsers.scores_parser()

    args = parser.parse_args(parameters)

    array = fanc.load(os.path.expanduser(args.scores), mode='r')
    norm = "linear" if not args.log else "log"

    if args.range is not None:
        data_selection = []
        for i, y in enumerate(array._parameters):
            if args.range[0] <= y <= args.range[1]:
                data_selection.append(y)
    elif args.parameters is not None:
        data_selection = args.parameters
    else:
        data_selection = array._parameters

    colorbar_symmetry = 0 if args.symmetry else None
    p = kplt.GenomicVectorArrayPlot(array,
                                    parameters=data_selection,
                                    y_scale=norm,
                                    colormap=args.colormap,
                                    colorbar_symmetry=colorbar_symmetry,
                                    vmin=args.vmin,
                                    vmax=args.vmax,
                                    show_colorbar=args.show_colorbar,
                                    replacement_color=args.replacement_color,
                                    genomic_format=args.genomic_format)
    return p, args
Exemplo n.º 8
0
    def test_conversion(self, tmpdir):
        file_name = str(tmpdir) + '/x.hic'
        with dummy.sample_hic(file_name=file_name) as hic:
            # simulate old-style object
            hic.file.remove_node('/meta_information', recursive=True)

        hic = fanc.load(file_name, mode='r')
        assert isinstance(hic, fanc.Hic)
        hic.close()

        hic = fanc.Hic(file_name)
        hic.close()

        hic = fanc.load(file_name, mode='r')
        hic.close()
        assert isinstance(hic, fanc.Hic)
Exemplo n.º 9
0
def load_score_data(data):
    # If it's already an instance of fanc data, just return it
    if isinstance(data, RegionBased):
        return data

    # If it's anything else let's hope fanc.load can deal with it
    return fanc.load(data)
Exemplo n.º 10
0
def write_insulation(hic_file):

    logging.info("working on %s", hic_file)
    hic = fanc.load(hic_file, mode='r')
    prefix = os.path.basename(hic_file).replace(".hic", "")

    res = int(re.findall('([0-9]+)kb', os.path.basename(hic_file))[0])
    logging.info("resolution detected as %s", str(res))

    window_sizes = [res * 1000 * w for w in [4, 6, 8, 10]]

    # calculate insulation index
    with InsulationScores.from_hic(hic,
                                   normalise=True,
                                   log=True,
                                   window_sizes=window_sizes,
                                   file_name=os.path.join(
                                       "data", "boundaries",
                                       prefix + "_micro-c.ii")) as ii:

        for window_size in window_sizes:
            w = window_size / res / 1000
            logging.info("Writing insulation index for window size %i",
                         window_size)
            output_file = os.path.join(
                "data", "boundaries", prefix + '_micro-c_{}.bw'.format(int(w)))
            ii.to_bigwig(output_file, window_size)
Exemplo n.º 11
0
def triangular(parameters):
    parser = parsers.triangular_parser()
    args = parser.parse_args(parameters)

    if args.colormap is None:
        colormap = config.colormap_hic if not args.oe else 'bwr'
    else:
        colormap = args.colormap

    matrix = fanc.load(os.path.expanduser(args.hic), mode='r')
    from fanc.tools.general import str_to_int

    norm = "lin" if not args.log else "log"
    return kplt.HicPlot(
        matrix,
        colormap=colormap,
        max_dist=str_to_int(args.max_dist)
        if args.max_dist is not None else None,
        norm=norm,
        vmin=args.vmin,
        vmax=args.vmax,
        show_colorbar=args.show_colorbar,
        adjust_range=args.adjust_range,
        oe=args.oe,
        log=args.oe,
        colorbar_symmetry=0 if args.oe and args.colorbar_symmetry is None else
        args.colorbar_symmetry,
        ylabel=args.ylabel,
        weight_field=args.weight_field,
        default_value=args.default_value,
        matrix_norm=args.norm), args
Exemplo n.º 12
0
def line(parameters):
    parser = parsers.line_parser()
    args = parser.parse_args(parameters)

    regions = [fanc.load(file_name) for file_name in args.regions]
    attribute = args.attribute
    bin_size = args.bin_size
    labels = args.labels
    colors = args.colors
    fill = args.fill
    line_style = args.line_style
    ylim = args.ylim
    alpha = args.alpha
    legend_location = args.legend_location

    if labels is not None and len(labels) != len(regions):
        parser.error("Number of labels ({}) must be the same as number "
                     "of datasets ({})".format(len(labels), len(regions)))

    p = kplt.LinePlot(regions,
                      bin_size=bin_size,
                      fill=fill,
                      attribute=attribute,
                      labels=labels,
                      style=line_style,
                      ylim=ylim,
                      colors=colors,
                      legend_location=legend_location,
                      plot_kwargs={'alpha': alpha})
    return p, args
def export_marginals(hic_file, output_file):
    hic = fanc.load(hic_file)
    marginals = hic.marginals(masked=False, norm=False)
    regions = list(hic.regions())

    for pos, r in enumerate(regions):
        r.set_attribute("score", marginals[pos])

    write_bed(output_file, regions)
Exemplo n.º 14
0
    def test_auto_identification(self, tmpdir):
        for class_name in ('Hic', 'AccessOptimisedHic',
                           'FragmentMappedReadPairs', 'Reads', 'GenomicTrack',
                           'RaoPeakInfo', 'AccessOptimisedReadPairs'):
            file_name = str(tmpdir) + '/{}.h5'.format(class_name)
            cls_ = class_name_dict[class_name]
            x = cls_(file_name=file_name, mode='w')
            x.close()

            x = fanc.load(file_name, mode='r')
            assert isinstance(x, cls_)
            x.close()
Exemplo n.º 15
0
    def test_old_style_index(self, tmpdir):
        with dummy.sample_hic() as hic:
            for class_name in ('ABDomains', 'ABDomainMatrix',
                               'ExpectedContacts', 'ObservedExpectedRatio',
                               'ABDomains', 'PossibleContacts',
                               'RegionContactAverage', 'InsulationIndex',
                               'DirectionalityIndex'):
                file_name = str(tmpdir) + '/{}.h5'.format(class_name)
                cls_ = class_name_dict[class_name]
                x = cls_(hic, file_name=file_name, mode='w')
                # simulate missing meta-information
                x.close()

                x = fanc.load(file_name, mode='r')
                assert isinstance(x, cls_)
                x.close()

            for class_name in ('FoldChangeMatrix', ):
                file_name = str(tmpdir) + '/{}.h5'.format(class_name)
                cls_ = class_name_dict[class_name]
                x = cls_(hic, hic, file_name=file_name, mode='w')
                # simulate missing meta-information
                x.close()

                x = fanc.load(file_name, mode='r')
                assert isinstance(x, cls_)
                x.close()

        for class_name in ('Hic', 'AccessOptimisedHic',
                           'FragmentMappedReadPairs', 'Reads', 'GenomicTrack'):
            file_name = str(tmpdir) + '/{}.h5'.format(class_name)
            cls_ = class_name_dict[class_name]
            x = cls_(file_name=file_name, mode='w')
            # simulate missing meta-information
            x.file.remove_node('/meta_information', recursive=True)
            x.close()

            x = fanc.load(file_name, mode='r')
            assert isinstance(x, cls_)
            x.close()
Exemplo n.º 16
0
def calc_and_write(hic_file):
    logging.info("working on %s", hic_file)

    eig1_out = hic_file.replace(".hic", "_eig1.bed")
    eig2_out = hic_file.replace(".hic", "_eig2.bed")
    if not os.path.exists(eig1_out):

        cor_file = hic_file.replace(".hic", ".cor")
        if os.path.exists(cor_file):
            logging.info("Correlation matrix %s exists, loading it", cor_file)
            ab = fanc.load(cor_file)
        else:
            logging.info("Calculating correlation matrix and saving to %s",
                         cor_file)
            hic = fanc.load(hic_file)
            ab = fanc.ABCompartmentMatrix.from_hic(hic, file_name=cor_file)

        eig1, eig2 = calc_2_eigenvectors(ab)
        write_bed_eig(ab, eig1, file_name=eig1_out)
        write_bed_eig(ab, eig2, file_name=eig2_out)
    else:
        logging.info("output file exists; skipping!")
def write_stats(input_dir, output_file):
    """Write stats to file."""
    pairs_files = [fn for fn in os.listdir(input_dir) if fn.endswith("pairs")]
    for p in pairs_files:
        logging.info("Working on %s", p)
        pairs = fanc.load(os.path.join(input_dir, p))
        statistics, total = stats(pairs, pairs._edges)

        with open(output_file, "a+") as out:
            out = csv.writer(out)
            out.writerow([p, "total", total])
            for key, val in statistics.items():
                out.writerow([p, key, val])
Exemplo n.º 18
0
    def test_auto_identification(self, tmpdir):
        for class_name in ('Hic', 'LegacyHic', 'ReadPairs', 'AggregateMatrix',
                           'ComparisonMatrix', 'FoldChangeMatrix',
                           'DifferenceMatrix', 'ComparisonRegions',
                           'FoldChangeRegions', 'DifferenceRegions',
                           'DirectionalityIndex'):
            file_name = str(tmpdir) + '/{}.h5'.format(class_name)
            cls_ = class_name_dict[class_name]
            x = cls_(file_name=file_name, mode='w')
            x.close()

            x = fanc.load(file_name, mode='r')
            assert isinstance(x, cls_)
            x.close()
Exemplo n.º 19
0
    def test_hic_based_auto_identification(self, tmpdir):
        with dummy.sample_hic() as hic:
            for class_name in ('ABDomains', 'ABDomainMatrix',
                               'ExpectedContacts', 'ObservedExpectedRatio',
                               'PossibleContacts', 'RegionContactAverage',
                               'InsulationIndex', 'DirectionalityIndex'):
                file_name = str(tmpdir) + '/{}.h5'.format(class_name)
                cls_ = class_name_dict[class_name]
                x = cls_(hic, file_name=file_name, mode='w')
                x.close()

                x = fanc.load(file_name, mode='r')
                assert isinstance(x, cls_)
                x.close()
            for class_name in ('FoldChangeMatrix', ):
                file_name = str(tmpdir) + '/{}.h5'.format(class_name)
                cls_ = class_name_dict[class_name]
                x = cls_(hic, hic, file_name=file_name, mode='w')
                x.close()

                x = fanc.load(file_name, mode='r')
                assert isinstance(x, cls_)
                x.close()
Exemplo n.º 20
0
def load_oe_contacts(matrix_file, regions_file=None):
    import fanc
    try:
        # try loading via fanc
        reference_loaded = fanc.load(matrix_file)
        edges = edges_dict_from_fanc(reference_loaded)
        regions = reference_loaded.regions
        region_trees = region_interval_trees(regions)
    except ValueError:
        try:
            assert regions_file is not None
            regions, ix_converter, _ = load_regions(regions_file)
            region_trees = region_interval_trees(regions)
            edges = edges_dict_from_sparse(
                edges_from_sparse_matrix(matrix_file, ix_converter))
        except AssertionError:
            raise ValueError
    return edges, region_trees, regions
Exemplo n.º 21
0
 def __init__(self, hic_data, adjust_range=False, buffering_strategy="relative",
              buffering_arg=1, weight_field=None, default_value=None, smooth_sigma=None,
              matrix_norm=True, oe=False, log=False, **kwargs):
     """
     :param hic_data: Path to Hi-C data on disk or
                     :class:`~fanc.data.genomic.Hic` or :class:`~fanc.data.genomic.RegionMatrix`
     :param adjust_range: Draw a slider to adjust vmin/vmax interactively. Default: False
     :param buffering_strategy: A valid buffering strategy for :class:`~BufferedMatrix`
     :param buffering_arg: Adjust range of buffering for :class:`~BufferedMatrix`
     """
     super(BasePlotterHic, self).__init__(**kwargs)
     if isinstance(hic_data, string_types):
         hic_data = fanc.load(hic_data, mode="r")
     self.hic_data = hic_data
     self.hic_buffer = prepare_hic_buffer(hic_data, buffering_strategy=buffering_strategy,
                                          buffering_arg=buffering_arg, weight_field=weight_field,
                                          default_value=default_value, smooth_sigma=smooth_sigma,
                                          norm=matrix_norm, oe=oe, log=log)
     self.slider = None
     self.adjust_range = adjust_range
     self.vmax_slider = None
Exemplo n.º 22
0
def load_contacts(matrix_file, regions_file=None):
    import fanc
    from chess.oe import observed_expected
    try:
        # try loading via fanc
        reference_loaded = fanc.load(matrix_file)
        edges = oe_edges_dict_from_fanc(reference_loaded)
        regions = reference_loaded.regions
        region_trees = region_interval_trees(regions)
    except ValueError as initial_error:
        print(initial_error)
        try:
            assert regions_file is not None, ("Regions file needs to be"
                                              "specified for sparse input.")
            regions, _ix_converter, _ = load_regions(regions_file)
            region_trees = region_interval_trees(regions)
            _, reference_oe = observed_expected(regions_file, matrix_file)
            edges = edges_dict_from_sparse(reference_oe)
        except AssertionError as error:
            raise ValueError(error)
    return edges, region_trees, regions
Exemplo n.º 23
0
def write_expected(input_file, output_prefix):
    hic = fanc.load(input_file, mode="a")
    intra_expected, expected_by_chromosome, inter_expected = hic.expected_values(
    )
    bin_size = hic.bin_size
    distances = list(range(0, len(intra_expected) * bin_size, bin_size))

    output_file = output_prefix + "_expected_values_all.txt"

    with open(output_file, "w+") as out:
        out.write("distance\texpected\n")
        for i in range(0, len(intra_expected)):
            out.write(str(distances[i]) + "\t" + str(intra_expected[i]) + "\n")

    output_file = output_prefix + "_expected_values_per_chrom.txt"
    with open(output_file, "w+") as out:
        out.write("chrom\tdistance\texpected\n")
        for chrom in expected_by_chromosome.keys():
            expected = expected_by_chromosome[chrom]
            for i in range(0, len(expected)):
                out.write(chrom + "\t" + str(distances[i]) + "\t" +
                          str(expected[i]) + "\n")
Exemplo n.º 24
0
    def test_call_peaks(self):
        dir = os.path.dirname(os.path.realpath(__file__))
        hic_10kb = fanc.load(dir +
                             "/test_peaks/rao2014.chr11_77400000_78600000.hic",
                             mode='r')

        peak_caller = RaoPeakCaller()
        peaks = peak_caller.call_peaks(hic_10kb)

        assert len(peaks.edges) == 6525

        valid_peaks = []

        has_43_57 = False
        for peak in peaks.edges:
            if peak.fdr_ll < 0.1 and peak.fdr_v < 0.1 and peak.fdr_h < 0.1 and peak.fdr_d < 0.1:
                valid_peaks.append(peak)
            if peak.source == 43 and peak.sink == 57:
                has_43_57 = True

        assert len(valid_peaks) == 134
        assert has_43_57
        hic_10kb.close()
        peaks.close()
Exemplo n.º 25
0
def auto(argv, **kwargs):
    from fanc.tools.general import which

    parser = auto_parser()
    args = parser.parse_args(argv[2:])

    verbosity = kwargs.get("verbosity", 2)
    if verbosity > 0:
        verbosity_flag = '-' + 'v' * verbosity
        fanc_base_command = ['fanc', verbosity_flag]
    else:
        fanc_base_command = ['fanc']

    log_file = kwargs.get("log_file", None)
    if log_file is not None:
        fanc_base_command += ['-l', log_file]

    bin_sizes = args.bin_sizes
    split_ligation_junction = args.split_ligation_junction
    restriction_enzyme = args.restriction_enzyme
    threads = args.threads
    genome = args.genome
    genome_index = args.genome_index
    basename = args.basename
    quality_cutoff = args.quality_cutoff
    iterative_quality_cutoff = args.iterative_quality_cutoff
    tmp = args.tmp
    mapper_parallel = args.mapper_parallel
    split_fastq = args.split_fastq
    memory_map = args.memory_map
    iterative = args.iterative
    step_size = args.step_size
    sam_sort = args.sam_sort
    filter_pairs = args.filter_pairs
    inward_cutoff = args.inward_cutoff
    outward_cutoff = args.outward_cutoff
    auto_le_cutoff = args.auto_le_cutoff
    process_hic = args.process_hic
    ice = args.ice
    norm_method = args.norm_method
    restore_coverage = args.restore_coverage
    run_with = args.run_with
    job_prefix = args.job_prefix
    grid_startup_commands = os.path.expanduser(args.grid_startup_commands) \
        if args.grid_startup_commands is not None else None
    grid_cleanup_commands = os.path.expanduser(args.grid_cleanup_commands) \
        if args.grid_cleanup_commands is not None else None
    force_overwrite = args.force_overwrite
    output_folder = os.path.expanduser(args.output_folder)

    file_names = [os.path.expanduser(file_name) for file_name in args.input]
    file_types = [file_type(file_name) for file_name in file_names]
    file_basenames = [file_basename(file_name) for file_name in file_names]

    if ice:
        warnings.warn("The --ice option is deprecated. Please use '--norm-method ice' instead!")
        norm_method = 'ice'

    for file_name in file_names:
        if not os.path.exists(file_name):
            parser.error("File '{}' does not exist!".format(file_name))

    runner = None
    if run_with == 'parallel':
        runner = ParallelTaskRunner(threads)
    elif run_with == 'sge':
        from fanc.config import config
        if which(config.sge_qsub_path) is None:
            parser.error("Using SGE not possible: "
                         "Cannot find 'qsub' at path '{}'. You can change "
                         "this path using fanc config files and the "
                         "'sge_qsub_path' parameter".format(config.sge_qsub_path))
        from fanc.tools.files import mkdir
        sge_log_dir = mkdir(output_folder, 'sge_logs')
        runner = SgeTaskRunner(log_dir=sge_log_dir, task_prefix=job_prefix,
                               startup_commands_file=grid_startup_commands,
                               cleanup_commands_file=grid_cleanup_commands)
    elif run_with == 'slurm':
        from fanc.config import config
        if which(config.slurm_sbatch_path) is None:
            parser.error("Using Slurm not possible: "
                         "Cannot find 'sbatch' at path '{}'. You can change "
                         "this path using fanc config files and the "
                         "'slurm_sbatch_path' parameter".format(config.slurm_sbatch_path))
        from fanc.tools.files import mkdir
        slurm_log_dir = mkdir(output_folder, 'slurm_logs')
        runner = SlurmTaskRunner(log_dir=slurm_log_dir, task_prefix=job_prefix,
                               startup_commands_file=grid_startup_commands,
                               cleanup_commands_file=grid_cleanup_commands)
    elif run_with == 'test':
        runner = ParallelTaskRunner(threads, test=True)
    else:
        parser.error("Runner '{}' is not valid. See --run-with "
                     "parameter for options".format(run_with))

    for i in range(len(file_types)):
        if file_types[i] not in ('fastq', 'sam', 'pairs_txt', 'pairs', 'hic'):
            import fanc
            try:
                ds = fanc.load(file_names[i], mode='r')
                if isinstance(ds, fanc.Hic):
                    file_types[i] = 'hic'
                elif isinstance(ds, fanc.ReadPairs):
                    file_types[i] = 'pairs'
                else:
                    raise ValueError("Could not detect file type using fanc load.")
            except ValueError:
                parser.error("Not a valid input file type: {}".format(file_types[i]))

    if basename is None:
        if len(file_basenames) == 1:
            basename = file_basenames[0]
        else:
            basename = []
            for pair in zip(*file_basenames):
                if pair[0] == pair[1]:
                    basename.append(pair[0])
                else:
                    break
            if len(basename) == 0:
                basename = file_basenames[0]
            else:
                if basename[-1] in ['.', '_']:
                    basename = "".join(basename[:-1])
                else:
                    basename = "".join(basename)

    if not output_folder[-1] == '/':
        output_folder += '/'

    # 0. Do some sanity checks on required flags
    is_bwa = False
    is_bowtie2 = False
    if 'fastq' in file_types:
        if args.genome_index is None:
            parser.error("Must provide genome index (-i) when mapping FASTQ files!")
        else:
            check_path = os.path.expanduser(genome_index)
            if check_path.endswith('.'):
                check_path = check_path[:-1]

            is_bowtie2 = True
            for i in range(1, 5):
                if not os.path.exists(check_path + '.{}.bt2'.format(i)):
                    is_bowtie2 = False
            for i in range(1, 3):
                if not os.path.exists(check_path + '.rev.{}.bt2'.format(i)):
                    is_bowtie2 = False

            is_bwa = True
            for ending in ('amb', 'ann', 'bwt', 'pac', 'sa'):
                if not os.path.exists(check_path + '.{}'.format(ending)):
                    is_bwa = False

            if not is_bowtie2 and not is_bwa:
                parser.error("Cannot detect Bowtie2 or BWA index.")

            if is_bowtie2 and not which('bowtie2'):
                parser.error("bowtie2 must be in PATH for mapping!")

            if is_bwa and not which('bwa'):
                parser.error("bwa must be in PATH for mapping!")

    if 'fastq' in file_types or 'sam' in file_types:
        if genome is None:
            parser.error("Must provide genome (-g) to process read pair files!")

        if restriction_enzyme is None:
            from fanc.regions import genome_regions
            try:
                genome_regions(genome)
            except ValueError:
                parser.error("Must provide restriction enzyme (-r) to process read pair files!")
        else:
            res = restriction_enzyme.split(",")
            from Bio import Restriction
            for r in res:
                try:
                    getattr(Restriction, r)
                except AttributeError:
                    parser.error("Restriction enzyme string '{}' is not recognized".format(restriction_enzyme))

    logger.info("Output folder: {}".format(output_folder))
    logger.info("Input files: {}".format(", ".join(file_names)))
    logger.info("Input file types: {}".format(", ".join(file_types)))

    logger.info("Final basename: %s (you can change this with the -n option!)" % basename)

    # 1. create default folders in root directory
    if run_with != 'test':
        logger.info("Creating output folders...")
        from ..tools.general import mkdir
        mkdir(output_folder, 'fastq')
        mkdir(output_folder, 'sam')
        mkdir(output_folder, 'pairs/')
        mkdir(output_folder, 'hic/binned')
        mkdir(output_folder, 'plots/stats')

    # 2. If input files are (gzipped) FASTQ, map them iteratively first
    fastq_files = []
    for i in range(len(file_names)):
        if file_types[i] != 'fastq':
            continue
        fastq_files.append(i)

    sam_created = [False] * len(file_names)
    mapping_tasks = []
    if len(fastq_files) > 0:
        if genome_index.endswith('.'):
            genome_index = genome_index[:-1]

        bam_files = []
        for i, ix in enumerate(fastq_files):
            bam_file = output_folder + 'sam/' + file_basenames[ix] + '.bam'
            if not force_overwrite and os.path.exists(bam_file):
                parser.error("File exists ({}), use -f to force overwriting it.".format(bam_file))
            bam_files.append(bam_file)

            mapping_command = fanc_base_command + ['map', '-m', '25',
                                                   '-s', str(step_size),
                                                   '-t', str(threads)]

            if iterative_quality_cutoff is not None:
                mapping_command += ['-q', str(iterative_quality_cutoff)]
            if tmp:
                mapping_command.append('-tmp')
            if not mapper_parallel:
                mapping_command.append('--fanc-parallel')
            if split_fastq:
                mapping_command.append('--split-fastq')
            if memory_map:
                mapping_command.append('--memory-map')
            if not iterative:
                mapping_command.append('--no-iterative')
            if split_ligation_junction:
                mapping_command.append('--restriction-enzyme')
                mapping_command.append(restriction_enzyme)

            mapping_command += [file_names[ix], genome_index, bam_file]

            mapping_task = CommandTask(mapping_command)
            runner.add_task(mapping_task, threads=threads)
            mapping_tasks.append(mapping_task)

        for ix, i in enumerate(fastq_files):
            file_names[i] = bam_files[ix]
            file_types[i] = 'sam'
            sam_created[i] = True

    if sam_sort:
        sort_threads = min(4, threads)

        sam_sort_tasks = []
        # sort SAM files
        sam_files = []
        in_place = []
        for i in range(len(file_names)):
            if file_types[i] != 'sam':
                continue
            sam_files.append(i)
            in_place.append(sam_created[i])

        if len(sam_files) > 0:
            sorted_sam_files = []
            for i, ix in enumerate(sam_files):
                sort_command = fanc_base_command + ['sort_sam', '-t', str(sort_threads),
                                                    file_names[ix]]
                if in_place[i]:
                    sorted_sam_files.append(file_names[ix])
                else:
                    sam_path, sam_extension = os.path.splitext(file_names[ix])
                    sam_basename = os.path.basename(sam_path)
                    sorted_sam_file = os.path.join(output_folder, 'sam', sam_basename + '_sort' + sam_extension)
                    if not force_overwrite and os.path.exists(sorted_sam_file):
                        parser.error("File exists ({}), use -f to force overwriting it.".format(sorted_sam_file))

                    sorted_sam_files.append(sorted_sam_file)
                    sort_command.append(sorted_sam_file)

                if tmp:
                    sort_command.append('-tmp')

                sam_sort_task = CommandTask(sort_command)
                runner.add_task(sam_sort_task, wait_for=mapping_tasks, threads=1)
                sam_sort_tasks.append(sam_sort_task)

            for ix, i in enumerate(sam_files):
                file_names[i] = sorted_sam_files[ix]
    else:
        sam_sort_tasks = mapping_tasks

    total_pairs = 0
    pairs_txt_tasks = []
    # sort SAM files
    pairs_txt_files = []
    for i in range(len(file_names)):
        if file_types[i] != 'pairs_txt':
            continue
        pairs_txt_files.append(i)

    if len(pairs_txt_files) > 0:
        load_threads = max(int(threads / len(pairs_txt_files)), 1)

        pairs_files = []
        for ix in pairs_txt_files:
            pairs_txt_file = file_names[ix]
            pairs_file = os.path.join(output_folder, 'pairs', '{}_{}.pairs'.format(basename, total_pairs))
            total_pairs += 1
            if not force_overwrite and os.path.exists(pairs_file):
                parser.error("File exists ({}), use -f to force overwriting it.".format(pairs_file))

            pairs_files.append(pairs_file)

            pairs_command = fanc_base_command + ['pairs', '-f',
                                                 # loading
                                                 '-g', genome,
                                                 '-t', str(load_threads)]

            if restriction_enzyme is not None:
                pairs_command += ['-r', restriction_enzyme]
            if is_bwa:
                pairs_command.append('--bwa')
            if tmp:
                pairs_command.append('-tmp')
            if sam_sort:
                pairs_command.append('-S')

            pairs_command += [pairs_txt_file, pairs_file]

            pairs_task = CommandTask(pairs_command)
            runner.add_task(pairs_task, wait_for=[], threads=load_threads)
            pairs_txt_tasks.append(pairs_task)

            pairs_files.append(pairs_file)

        for ix, i in enumerate(pairs_txt_files):
            file_names[i] = pairs_files[ix]
            file_types[i] = 'pairs'

    # load pairs directly from SAM
    sam_file_pairs = []
    i = 0
    while i < len(file_names):
        if file_types[i] == 'sam':
            if not file_types[i + 1] == 'sam':
                parser.error("Cannot create SAM pairs, because {} "
                             "is missing a partner file".format(file_names[i]))
            sam_file_pairs.append((i, i + 1))
            i += 1
        i += 1

    if len(sam_file_pairs) > 0:
        sam_to_pairs_tasks = pairs_txt_tasks
        load_threads = max(int(threads/len(sam_file_pairs)), 1)

        pairs_files = []
        for i, j in sam_file_pairs:
            if len(sam_file_pairs) > 1 or total_pairs > 0:
                pairs_file = os.path.join(output_folder, 'pairs', '{}_{}.pairs'.format(basename, total_pairs))
                total_pairs += 1
            else:
                pairs_file = output_folder + 'pairs/' + basename + '.pairs'

            if not force_overwrite and os.path.exists(pairs_file):
                parser.error("File exists ({}), use -f to force overwriting it.".format(pairs_file))

            pairs_command = fanc_base_command + ['pairs', '-f',
                                                 # loading
                                                 '-g', genome,
                                                 '-t', str(load_threads),
                                                 # filtering
                                                 '-us']
            if restriction_enzyme is not None:
                pairs_command += ['-r', restriction_enzyme]
            if quality_cutoff is not None:
                pairs_command += ['-q', str(quality_cutoff)]
            if is_bwa:
                pairs_command.append('--bwa')
            if tmp:
                pairs_command.append('-tmp')
            if sam_sort:
                pairs_command.append('-S')

            pairs_command += [file_names[i], file_names[j], pairs_file]

            pairs_task = CommandTask(pairs_command)
            runner.add_task(pairs_task, wait_for=sam_sort_tasks, threads=load_threads)
            sam_to_pairs_tasks.append(pairs_task)

            pairs_files.append(pairs_file)

        for ix, sam_pair in enumerate(reversed(sam_file_pairs)):
            file_names[sam_pair[0]] = pairs_files[ix]
            del file_names[sam_pair[1]]
            file_types[sam_pair[0]] = 'pairs'
            del file_types[sam_pair[1]]
    else:
        sam_to_pairs_tasks = pairs_txt_tasks + sam_sort_tasks

    # 7. Pairs stats and filtering
    pairs_files = []
    for i in range(len(file_names)):
        if file_types[i] != 'pairs':
            continue
        pairs_files.append(i)

    if len(pairs_files) > 0 and filter_pairs:
        pairs_tasks = []
        for ix in pairs_files:
            pair_basename = os.path.basename(os.path.splitext(file_names[ix])[0])

            pairs_stats_file = output_folder + 'plots/stats/' + pair_basename + '.pairs.stats.pdf'
            ligation_error_file = output_folder + 'plots/stats/' + pair_basename + '.pairs.ligation_error.pdf'
            re_dist_file = output_folder + 'plots/stats/' + pair_basename + '.pairs.re_dist.pdf'

            pairs_command = fanc_base_command + ['pairs',
                                                 # filtering
                                                 '-d', '10000',
                                                 '-l', '-p', '2']

            if tmp:
                pairs_command.append('-tmp')

            if inward_cutoff is not None:
                pairs_command += ['-i', str(inward_cutoff)]
                if outward_cutoff is None:
                    pairs_command += ['-o', '0']
            if outward_cutoff is not None:
                pairs_command += ['-o', str(outward_cutoff)]
                if inward_cutoff is None:
                    pairs_command += ['-i', '0']
            if inward_cutoff is None and outward_cutoff is None and auto_le_cutoff:
                pairs_command += ['--filter-ligation-auto']

            pairs_command += ['--statistics-plot', pairs_stats_file, file_names[ix]]
            pairs_task = CommandTask(pairs_command)
            runner.add_task(pairs_task, wait_for=sam_to_pairs_tasks, threads=1)
            pairs_tasks.append(pairs_task)

            ligation_error_command = fanc_base_command + ['pairs', '--ligation-error-plot',
                                                          ligation_error_file, file_names[ix]]
            ligation_error_task = CommandTask(ligation_error_command)
            runner.add_task(ligation_error_task, wait_for=pairs_task, threads=1)

            re_dist_command = fanc_base_command + ['pairs', '--re-dist-plot',
                                                   re_dist_file, file_names[ix]]
            re_dist_task = CommandTask(re_dist_command)
            runner.add_task(re_dist_task, wait_for=pairs_task, threads=1)
    else:
        pairs_tasks = sam_to_pairs_tasks

    # 8. Pairs to Hic
    pairs_files = []
    for i in range(len(file_names)):
        if file_types[i] != 'pairs':
            continue
        pairs_files.append(i)

    if len(pairs_files) > 0 and process_hic:
        hic_tasks = []
        hic_files = []
        for ix in pairs_files:
            hic_basename = os.path.basename(os.path.splitext(file_names[ix])[0])
            if hic_basename.endswith('_filtered'):
                hic_basename = hic_basename[:-9]
            hic_file = output_folder + 'hic/' + hic_basename + '.hic'

            if not force_overwrite and os.path.exists(hic_file):
                parser.error("File exists ({}), use -f to force overwriting it.".format(hic_file))

            hic_command = fanc_base_command + ['hic', '-f']
            if tmp:
                hic_command.append('-tmp')
            hic_command += [file_names[ix], hic_file]
            hic_task = CommandTask(hic_command)
            runner.add_task(hic_task, wait_for=pairs_tasks, threads=1)
            hic_tasks.append(hic_task)
            hic_files.append(hic_file)

        for ix, i in enumerate(pairs_files):
            file_names[i] = hic_files[ix]
            file_types[i] = 'hic'
    else:
        hic_tasks = pairs_tasks

    # 9. Merge Hic
    hic_files = []
    for i in range(len(file_names)):
        if file_types[i] != 'hic':
            continue
        hic_files.append(i)

    if len(hic_files) > 1:
        merge_hic_tasks = []
        output_hic = output_folder + 'hic/' + basename + '.hic'

        if not force_overwrite and os.path.exists(output_hic):
            parser.error("File exists ({}), use -f to force overwriting it.".format(output_hic))

        merge_hic_command = fanc_base_command + ['hic', '-f']
        if tmp:
            merge_hic_command.append('-tmp')

        hics = [file_names[i] for i in hic_files]
        merge_hic_command += hics + [output_hic]
        merge_hic_task = CommandTask(merge_hic_command)
        runner.add_task(merge_hic_task, wait_for=hic_tasks, threads=1)
        merge_hic_tasks.append(merge_hic_task)

        file_names[hic_files[0]] = output_hic
        hic_files.pop(0)
        for ix, i in enumerate(reversed(hic_files)):
            del file_names[i]
            del file_types[i]
    else:
        merge_hic_tasks = hic_tasks

    from fanc.tools.general import human_format, str_to_int

    hic_files = []
    for i in range(len(file_names)):
        if file_types[i] != 'hic':
            continue
        hic_files.append(i)

    if len(hic_files) > 0:
        for ix in hic_files:
            hic_file = file_names[ix]
            binned_hic_file_base = output_folder + 'hic/binned/' + basename + '_'
            bin_threads = min(4, threads)

            for bin_size in bin_sizes:
                bin_size = str_to_int(str(bin_size))
                bin_size_str = human_format(bin_size, 0).lower() + 'b'

                binned_hic_file = binned_hic_file_base + bin_size_str + '.hic'

                if not force_overwrite and os.path.exists(binned_hic_file):
                    parser.error("File exists ({}), use -f to force overwriting it.".format(binned_hic_file))

                hic_basename = os.path.basename(os.path.splitext(binned_hic_file)[0])
                hic_stats_file = output_folder + 'plots/stats/' + \
                                 hic_basename + '.stats.pdf'

                hic_command = fanc_base_command + ['hic', '-f', '-b', str(bin_size),
                                                   '-r', '0.1', '-t', str(bin_threads),
                                                   '--statistics-plot', hic_stats_file,
                                                   '-n', '--norm-method', norm_method]
                if tmp:
                    hic_command.append('-tmp')
                if restore_coverage:
                    hic_command.append('-c')

                hic_command += [hic_file, binned_hic_file]

                hic_task = CommandTask(hic_command)
                runner.add_task(hic_task, wait_for=merge_hic_tasks, threads=bin_threads)

    runner.run()
    return 0
Exemplo n.º 26
0
# chr18: 899140-(899308[-1])-899476 -- chr18: 1509911-(1510021[1])-1510076
# end snippet pairs filter masked

# start snippet pairs filter exclude
for pair in pairs.pairs(excluded_filters=['self-ligation']):
    print(pair)
# end snippet pairs filter exclude

# start snippet hic convert
hic_folder = mkdir(os.path.join(output_folder, 'hic'))
hic_file = os.path.join(hic_folder, 'example.hic')
hic = pairs.to_hic(file_name=hic_file)
# end snippet hic convert

hic.close()
hic = fanc.load(hic_file)

# start snippet hic bin
binned_hic = hic.bin(1000000,
                     file_name=os.path.join(hic_folder, 'binned_1mb.hic'),
                     threads=4)
# end snippet hic bin

# start snippet hic filter
from fanc.hic import LowCoverageFilter
lc_filter = LowCoverageFilter(binned_hic, rel_cutoff=0.2)
binned_hic.filter(lc_filter)
binned_hic.run_queued_filters()
# end snippet hic filter

# start snippet hic balance
Exemplo n.º 27
0
#                                mode="r")
# Tollrm910_nc14_hic_plot = fancplot.HicPlot(Tollrm910_nc14_hic, vmin=1e-03,
#                                            vmax=1e-01, norm="log",
#                        draw_minor_ticks=False, title="Tollrm910")

# Toll10B_nc14_hic = fanc.load(os.path.join("data", "hic", "merged",
#                                           "Toll10B-nc14", "hic",
#                                           "Toll10B-nc14_1kb.hic"), mode="r")
# Toll10B_nc14_hic_plot = fancplot.HicPlot(Toll10B_nc14_hic, vmin=1e-03,
#                                          vmax=1e-01, norm="log",
#                                          draw_minor_ticks=False,
#                                          title="Toll10B")


gd7_microc = fanc.load(os.path.join("data", "micro-c", "merged",
                                    "gd7", "hic",
                                    "gd7_" + res + ".hic"), mode="r")
gd7_microc_plot = fancplot.HicPlot(gd7_microc, vmin=1e-03,
                                     vmax=1e-01, norm="log",
                                     draw_minor_ticks=False, title="gd7")

control_microc = fanc.load(os.path.join("data", "micro-c", "merged",
                                        "control", "hic",
                                        "control_" + res + ".hic"),
                               mode="r")
control_microc_plot = fancplot.HicPlot(control_microc, vmin=1e-03,
                                           vmax=1e-01, norm="log",
                       draw_minor_ticks=False, title="control")


genes = "external_data/flybase/dmel-all-r6.30.gtf.gz"
Exemplo n.º 28
0
# start snippet ab setup
import fanc
import fanc.plotting as fancplot
import matplotlib.pyplot as plt

hic_1mb = fanc.load("output/hic/binned/fanc_example_1mb.hic")
# end snippet ab setup

# start snippet alternative cooler
hic_1mb = fanc.load("architecture/other-hic/fanc_example.mcool@1mb")
# end snippet alternative cooler

# start snippet alternative juicer
hic_1mb = fanc.load("architecture/other-hic/fanc_example.juicer.hic@1mb")
# end snippet alternative juicer

# start snippet ab matrix
ab = fanc.ABCompartmentMatrix.from_hic(hic_1mb)
# end snippet ab matrix

# start snippet ab subset
ab_chr18 = ab.matrix(('chr18', 'chr18'))
# end snippet ab subset

# start snippet ab fancplot-correlation
fig, ax = plt.subplots()
mp = fancplot.SquareMatrixPlot(ab, ax=ax,
                           norm='lin', colormap='RdBu_r',
                           vmin=-1, vmax=1,
                           draw_minor_ticks=False)
mp.plot('chr18')
Exemplo n.º 29
0
def plot_regions(regions):

    h = fanc.load(os.path.join("data", "hic", "merged", "3-4h", "hic",
                               "3-4h_2kb.hic"),
                  mode="r")
    h_plot = fancplot.HicPlot(h,
                              vmin=1e-03,
                              vmax=1e-01,
                              norm="log",
                              draw_minor_ticks=False)

    genes = "external_data/flybase/dmel-all-r6.30.gtf.gz"
    genes_plot = fancplot.GenePlot(genes,
                                   squash=True,
                                   group_by="gene_symbol",
                                   aspect=0.15,
                                   label_field="gene_symbol",
                                   show_labels=False,
                                   draw_minor_ticks=False)

    rnaseq_dict = {
        name:
        os.path.join("external_data", "koenecke_2016_2017", "rnaseq_aligned",
                     name + "_sorted_filtered_merged_canonical_chrs_rnaseq.bw")
        for name in ["gd7", "tlrm910", "tl10b"]
    }
    h3k27ac_dict = {
        name: os.path.join(
            "external_data", "koenecke_2016_2017", "chipseq_aligned",
            "H3K27ac_" + name + "_sorted_filtered_merged_canonical_chrs.bw")
        for name in ["gd7", "tl10b"]
    }
    h3k27ac_dict["Tollrm910"] = os.path.join(
        "external_data", "extra_chip-seq", "chipseq_aligned",
        "H3K27ac_Tollrm910_sorted_filtered_merged_canonical_chrs.bw")

    rnaseq_ylim = fancplot.helpers.LimitGroup()
    h3k27ac_ylim = fancplot.helpers.LimitGroup()
    polii_ylim = fancplot.helpers.LimitGroup()

    # polii_chip_early = os.path.join("external_data", "blythe_2015", "aligned",
    #                           "PolII-pSer5_NC14-early_sorted_filtered_merged_canonical_chrs.bw")
    # polii_chip_mid = os.path.join("external_data", "blythe_2015", "aligned",
    #                           "PolII-pSer5_NC14-middle_sorted_filtered_merged_canonical_chrs.bw")
    polii_chip_late = os.path.join(
        "external_data", "blythe_2015", "aligned",
        "PolII-pSer5_NC14-late_sorted_filtered_merged_canonical_chrs.bw")

    # polii_early_plot = fancplot.LinePlot(polii_chip_early, fill=True, plot_kwargs={'color': "black"},
    #                                  draw_minor_ticks=False, aspect=0.05,
    #                                  ylim=polii_ylim, n_yticks=2)
    # polii_mid_plot = fancplot.LinePlot(polii_chip_mid, fill=True, plot_kwargs={'color': "black"},
    #                                  draw_minor_ticks=False, aspect=0.05,
    #                                  ylim=polii_ylim, n_yticks=2)
    polii_late_plot = fancplot.LinePlot(polii_chip_late,
                                        fill=True,
                                        plot_kwargs={'color': "black"},
                                        draw_minor_ticks=False,
                                        aspect=0.05,
                                        ylim=polii_ylim,
                                        n_yticks=2)

    rnaseq_plot_gd7 = fancplot.LinePlot(rnaseq_dict['gd7'],
                                        fill=True,
                                        plot_kwargs={'color': "#648fff"},
                                        draw_minor_ticks=False,
                                        aspect=0.05,
                                        n_yticks=2)

    h3k27ac_plot_gd7 = fancplot.LinePlot(h3k27ac_dict['gd7'],
                                         fill=True,
                                         plot_kwargs={'color': "#648fff"},
                                         draw_minor_ticks=False,
                                         aspect=0.05,
                                         ylim=h3k27ac_ylim,
                                         n_yticks=2)

    rnaseq_plot_Tollrm910 = fancplot.LinePlot(rnaseq_dict['tlrm910'],
                                              fill=True,
                                              plot_kwargs={'color': "#dc267f"},
                                              draw_minor_ticks=False,
                                              aspect=0.05,
                                              n_yticks=2)

    h3k27ac_plot_Tollrm910 = fancplot.LinePlot(
        h3k27ac_dict['Tollrm910'],
        fill=True,
        plot_kwargs={'color': "#dc267f"},
        draw_minor_ticks=False,
        aspect=0.05,
        ylim=h3k27ac_ylim,
        n_yticks=2)

    rnaseq_plot_toll10b = fancplot.LinePlot(rnaseq_dict['tl10b'],
                                            fill=True,
                                            plot_kwargs={'color': "#ffb000"},
                                            draw_minor_ticks=False,
                                            aspect=0.05,
                                            n_yticks=2)

    h3k27ac_plot_toll10b = fancplot.LinePlot(h3k27ac_dict['tl10b'],
                                             fill=True,
                                             plot_kwargs={'color': "#ffb000"},
                                             draw_minor_ticks=False,
                                             aspect=0.05,
                                             ylim=h3k27ac_ylim,
                                             n_yticks=2)

    gd7_enh = "data/supplementary_tables/gd7_candidate_enhancers.bed"
    gd7_enh_plot = fancplot.GenomicFeaturePlot(gd7_enh,
                                               aspect=0.02,
                                               color="#648fff",
                                               draw_minor_ticks=False)

    Tollrm910_enh = "data/supplementary_tables/Tollrm910_candidate_enhancers.bed"
    Tollrm910_enh_plot = fancplot.GenomicFeaturePlot(Tollrm910_enh,
                                                     aspect=0.02,
                                                     color="#dc267f",
                                                     draw_minor_ticks=False)

    toll10b_enh = "data/supplementary_tables/Toll10B_candidate_enhancers.bed"
    toll10b_enh_plot = fancplot.GenomicFeaturePlot(toll10b_enh,
                                                   aspect=0.02,
                                                   color="#ffb000",
                                                   draw_minor_ticks=False)

    plots = [
        h_plot,
        # ins_plot,
        # boundaries_plot,
        genes_plot,
        # hk_plot,
        # polii_early_plot, polii_mid_plot,
        polii_late_plot,
        rnaseq_plot_gd7,
        rnaseq_plot_Tollrm910,
        rnaseq_plot_toll10b,
        h3k27ac_plot_gd7,
        h3k27ac_plot_Tollrm910,
        h3k27ac_plot_toll10b,
        gd7_enh_plot,
        Tollrm910_enh_plot,
        toll10b_enh_plot
    ]

    with PdfPages(output_file) as pdf:
        with fancplot.GenomicFigure(plots, ticks_last=True) as gfig:
            for name, region, rnaseq_ylim in regions:
                logging.info(region)
                fig, axes = gfig.plot(region)
                axes[3].set_ylim([0, rnaseq_ylim])
                axes[4].set_ylim([0, rnaseq_ylim])
                axes[5].set_ylim([0, rnaseq_ylim])
                pdf.savefig()
Exemplo n.º 30
0
# start snippet oe setup
import fanc
import matplotlib.pyplot as plt
import fanc.plotting as fancplot

hic_500kb = fanc.load("output/hic/binned/fanc_example_500kb.hic")
# end snippet oe setup

hic_500kb.close()
# start snippet oe append
hic_500kb = fanc.load("output/hic/binned/fanc_example_500kb.hic", mode='a')
# end snippet oe append

# start snippet alternative cooler
hic_500kb = fanc.load("architecture/other-hic/fanc_example.mcool@500kb")
# end snippet alternative cooler

# start snippet alternative juicer
hic_500kb = fanc.load("architecture/other-hic/fanc_example.juicer.hic@500kb")
# end snippet alternative juicer

# start snippet oe basic
intra_expected, intra_expected_chromosome, inter_expected = hic_500kb.expected_values(
)
# end snippet oe basic

# start snippet oe ddplot
# obtain bin distances
bin_size = hic_500kb.bin_size
distance = list(
    range(0, bin_size * len(intra_expected_chromosome['chr19']), bin_size))