Exemplos de GenomicRegionSet.sort em Python, exemplos de rgt.GenomicRegionSet.GenomicRegionSet.sort em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_Enrichment.py Projeto: CostaLab/reg-gen

    def test_subtract_exact(self):
        reference = GenomicRegionSet("reference")
        reference.read(os.path.join(os.path.dirname(__file__), "test_result.bed"))
        background = GenomicRegionSet("background")
        background.read(os.path.join(os.path.dirname(__file__), "test_background.bed"))
        target = GenomicRegionSet("target")
        target.read(os.path.join(os.path.dirname(__file__), "test_target.bed"))

        background_tmp = background.subtract(target, exact=True)

        reference.sort()

        self.assertEqual(len(background_tmp.sequences), len(reference.sequences))

        for region, region_ref in zip(background_tmp.sequences, reference.sequences):
            self.assertEqual(region.__cmp__(region_ref), 0)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: postprocessing.py Projeto: eggduzao/reg-gen

def merge_delete(ext_size, merge, peak_list, pvalue_list):
#     peaks_gain = read_diffpeaks(path)
    
    regions_plus = GenomicRegionSet('regions') #pot. mergeable
    regions_minus = GenomicRegionSet('regions') #pot. mergeable
    regions_unmergable = GenomicRegionSet('regions')
    last_orientation = ""
    
    for i, t in enumerate(peak_list):
        chrom, start, end, c1, c2, strand, ratio = t[0], t[1], t[2], t[3], t[4], t[5], t[6]
        r = GenomicRegion(chrom = chrom, initial = start, final = end, name = '', \
                          orientation = strand, data = str((c1, c2, pvalue_list[i], ratio)))
        if end - start > ext_size:
            if strand == '+':
                if last_orientation == '+':
                    region_plus.add(r)
                else:
                    regions_unmergable.add(r)
            elif strand == '-':
                if last_orientation == '-':
                    region_mins.add(r)
                else:
                    regions_unmergable.add(r)
                    
                    
    if merge:
        regions_plus.extend(ext_size/2, ext_size/2)
        regions_plus.merge()
        regions_plus.extend(-ext_size/2, -ext_size/2)
        merge_data(regions_plus)
        
        regions_minus.extend(ext_size/2, ext_size/2)
        regions_minus.merge()
        regions_minus.extend(-ext_size/2, -ext_size/2)
        merge_data(regions_minus)
    
    results = GenomicRegionSet('regions')
    for el in regions_plus:
        results.add(el)
    for el in regions_minus:
        results.add(el)
    for el in regions_unmergable:
        results.add(el)
    results.sort()
    
    return results

Exemplo n.º 3

0

Exibir arquivo

Arquivo: postprocessing.py Projeto: rafalcode/reg-gen

def merge_delete(ext_size, merge, peak_list, pvalue_list):
    #     peaks_gain = read_diffpeaks(path)

    regions_plus = GenomicRegionSet('regions')  #pot. mergeable
    regions_minus = GenomicRegionSet('regions')  #pot. mergeable
    regions_unmergable = GenomicRegionSet('regions')
    last_orientation = ""

    for i, t in enumerate(peak_list):
        chrom, start, end, c1, c2, strand, ratio = t[0], t[1], t[2], t[3], t[
            4], t[5], t[6]
        r = GenomicRegion(chrom = chrom, initial = start, final = end, name = '', \
                          orientation = strand, data = str((c1, c2, pvalue_list[i], ratio)))
        if end - start > ext_size:
            if strand == '+':
                if last_orientation == '+':
                    region_plus.add(r)
                else:
                    regions_unmergable.add(r)
            elif strand == '-':
                if last_orientation == '-':
                    region_mins.add(r)
                else:
                    regions_unmergable.add(r)

    if merge:
        regions_plus.extend(ext_size / 2, ext_size / 2)
        regions_plus.merge()
        regions_plus.extend(-ext_size / 2, -ext_size / 2)
        merge_data(regions_plus)

        regions_minus.extend(ext_size / 2, ext_size / 2)
        regions_minus.merge()
        regions_minus.extend(-ext_size / 2, -ext_size / 2)
        merge_data(regions_minus)

    results = GenomicRegionSet('regions')
    for el in regions_plus:
        results.add(el)
    for el in regions_minus:
        results.add(el)
    for el in regions_unmergable:
        results.add(el)
    results.sort()

    return results

Exemplo n.º 4

0

Exibir arquivo

    def test_subtract_exact(self):
        reference = GenomicRegionSet("reference")
        reference.read(
            os.path.join(os.path.dirname(__file__), "test_result.bed"))
        background = GenomicRegionSet("background")
        background.read(
            os.path.join(os.path.dirname(__file__), "test_background.bed"))
        target = GenomicRegionSet("target")
        target.read(os.path.join(os.path.dirname(__file__), "test_target.bed"))

        background_tmp = background.subtract(target, exact=True)

        reference.sort()

        self.assertEqual(len(background_tmp.sequences),
                         len(reference.sequences))

        for region, region_ref in zip(background_tmp.sequences,
                                      reference.sequences):
            self.assertEqual(region, region_ref)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: evidence.py Projeto: eggduzao/reg-gen

    def create_file(self):
        # Expanding summits
        tfbs_summit_regions = GenomicRegionSet("TFBS Summit Regions")
        tfbs_summit_regions.read_bed(self.tfbs_summit_fname)

        for region in iter(tfbs_summit_regions):
            summit = int(region.data.split()[-1]) + region.initial
            region.initial = max(summit - (self.peak_ext / 2), 0)
            region.final = summit + (self.peak_ext / 2)

        # Calculating intersections
        mpbs_regions = GenomicRegionSet("MPBS Regions")
        mpbs_regions.read_bed(self.mpbs_fname)

        tfbs_summit_regions.sort()
        mpbs_regions.sort()

        with_overlap_regions = mpbs_regions.intersect(tfbs_summit_regions, mode=OverlapType.ORIGINAL)
        without_overlap_regions = mpbs_regions.subtract(tfbs_summit_regions, whole_region=True)
        tfbs_regions = GenomicRegionSet("TFBS Regions")

        for region in iter(with_overlap_regions):
            region.name = region.name.split(":")[0] + ":Y"
            tfbs_regions.add(region)

        for region in iter(without_overlap_regions):
            region.name = region.name.split(":")[0] + ":N"
            tfbs_regions.add(region)

        tfbs_regions.sort()

        tfbs_fname = os.path.join(self.output_location, "{}.bed".format(self.mpbs_name))
        tfbs_regions.write_bed(tfbs_fname)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: Evidence.py Projeto: michael-kotliar/reg-gen

def evidence_run(args):
    # Expanding summits
    chip_summit_regions = GenomicRegionSet("TFBS Summit Regions")
    chip_summit_regions.read(args.chip_file)

    for region in iter(chip_summit_regions):
        summit = int(region.data.split()[-1]) + region.initial
        region.initial = max(summit - (args.peak_ext / 2), 0)
        region.final = summit + (args.peak_ext / 2)

    # Calculating intersections
    mpbs_regions = GenomicRegionSet("MPBS Regions")
    mpbs_regions.read(args.mpbs_file)

    chip_summit_regions.sort()
    mpbs_regions.sort()

    tfbs_regions = GenomicRegionSet("TFBS Regions")

    for mpbs_region in mpbs_regions:
        if chip_summit_regions.include(mpbs_region):
            mpbs_region.name = mpbs_region.name.split(":")[0] + ":Y"
        else:
            mpbs_region.name = mpbs_region.name.split(":")[0] + ":N"
        tfbs_regions.add(mpbs_region)

    tfbs_regions.sort()

    tfbs_fname = os.path.join(args.output_location,
                              "{}.bed".format(args.output_prefix))
    tfbs_regions.write(tfbs_fname)

Exemplo n.º 7

0

Exibir arquivo

    def create_file(self):
        # Expanding summits
        tfbs_summit_regions = GenomicRegionSet("TFBS Summit Regions")
        tfbs_summit_regions.read_bed(self.tfbs_summit_fname)

        for region in iter(tfbs_summit_regions):
            summit = int(region.data.split()[-1]) + region.initial
            region.initial = max(summit - (self.peak_ext / 2), 0)
            region.final = summit + (self.peak_ext / 2)

        # Calculating intersections
        mpbs_regions = GenomicRegionSet("MPBS Regions")
        mpbs_regions.read_bed(self.mpbs_fname)

        tfbs_summit_regions.sort()
        mpbs_regions.sort()

        with_overlap_regions = mpbs_regions.intersect(
            tfbs_summit_regions, mode=OverlapType.ORIGINAL)
        without_overlap_regions = mpbs_regions.subtract(tfbs_summit_regions,
                                                        whole_region=True)
        tfbs_regions = GenomicRegionSet("TFBS Regions")

        for region in iter(with_overlap_regions):
            region.name = region.name.split(":")[0] + ":Y"
            tfbs_regions.add(region)

        for region in iter(without_overlap_regions):
            region.name = region.name.split(":")[0] + ":N"
            tfbs_regions.add(region)

        tfbs_regions.sort()

        tfbs_fname = os.path.join(self.output_location,
                                  "{}.bed".format(self.mpbs_name))
        tfbs_regions.write_bed(tfbs_fname)

Exemplo n.º 8

0

Exibir arquivo

def diff_analysis_run(args):
    # Initializing Error Handler
    err = ErrorHandler()

    output_location = os.path.join(args.output_location, "Lineplots")
    try:
        if not os.path.isdir(output_location):
            os.makedirs(output_location)
    except Exception:
        err.throw_error("MM_OUT_FOLDER_CREATION")

    # check if they have same length
    mpbs_files = args.mpbs_files.strip().split(",")
    reads_files = args.reads_files.strip().split(",")
    conditions = args.conditions.strip().split(",")

    if args.colors is not None:
        colors = args.colors.strip().split(",")
    else:
        colors = [
            "#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33",
            "#a65628", "#f781bf", "#66c2a5", "#fc8d62", "#8da0cb", "#e78ac3",
            "#a6d854", "#ffd92f", "#e5c494", "#b3b3b3", "#8dd3c7", "#ffffb3",
            "#bebada", "#fb8072", "#80b1d3", "#fdb462", "#b3de69", "#fccde5",
            "#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e", "#e6ab02",
            "#a6761d", "#666666", "#7fc97f", "#beaed4", "#fdc086", "#ffff99",
            "#386cb0", "#f0027f", "#bf5b17", "#666666"
        ]

    assert len(mpbs_files) == len(reads_files) == len(conditions), \
        "Number of motif, read and condition names are not same: {}, {}, {}".format(len(mpbs_files), len(reads_files),
                                                                                    len(conditions))

    # Check if the index file exists
    for reads_file in reads_files:
        base_name = "{}.bai".format(reads_file)
        if not os.path.exists(base_name):
            pysam.index(reads_file)

    mpbs = GenomicRegionSet("Motif Predicted Binding Sites of All Conditions")
    for i, mpbs_file in enumerate(mpbs_files):
        mpbs.read(mpbs_file)

    mpbs.sort()
    mpbs.remove_duplicates()
    mpbs_name_list = list(set(mpbs.get_names()))

    signals = np.zeros(shape=(len(conditions), len(mpbs_name_list),
                              args.window_size),
                       dtype=np.float32)
    motif_len = list()
    motif_num = list()
    motif_pwm = list()

    print((" {} cpus are detected and {} of them will be used...\n".format(
        cpu_count(), args.nc)))

    genome_data = GenomeData(args.organism)
    fasta = Fastafile(genome_data.get_genome())

    print("generating signal for each motif and condition...\n")
    # differential analysis using bias corrected signal
    if args.bc:
        hmm_data = HmmData()
        table_forward = hmm_data.get_default_bias_table_F_ATAC()
        table_reverse = hmm_data.get_default_bias_table_R_ATAC()
        bias_table = BiasTable().load_table(table_file_name_F=table_forward,
                                            table_file_name_R=table_reverse)

        # do not use multi-processing
        if args.nc == 1:
            for i, condition in enumerate(conditions):
                for j, mpbs_name in enumerate(mpbs_name_list):
                    mpbs_regions = mpbs.by_names([mpbs_name])
                    arguments = (mpbs_regions, reads_files[i], args.organism,
                                 args.window_size, args.forward_shift,
                                 args.reverse_shift, bias_table)
                    try:
                        signals[i, j, :] = get_bc_signal(arguments)
                    except Exception:
                        logging.exception("get bias corrected signal failed")

                    # get motif length, number and pwm matrix
                    motif_len.append(mpbs_regions[0].final -
                                     mpbs_regions[0].initial)
                    motif_num.append(len(mpbs_regions))
                    motif_pwm.append(
                        get_pwm(fasta, mpbs_regions, args.window_size))

        # use multi-processing
        else:
            for i, condition in enumerate(conditions):
                print((
                    "generating signal for condition {} \n".format(condition)))
                with Pool(processes=args.nc) as pool:
                    arguments_list = list()
                    for mpbs_name in mpbs_name_list:
                        mpbs_regions = mpbs.by_names([mpbs_name])
                        arguments = (mpbs_regions, reads_files[i],
                                     args.organism, args.window_size,
                                     args.forward_shift, args.reverse_shift,
                                     bias_table)
                        arguments_list.append(arguments)

                        # get motif length, number and pwm matrix
                        motif_len.append(mpbs_regions[0].final -
                                         mpbs_regions[0].initial)
                        motif_num.append(len(mpbs_regions))
                        motif_pwm.append(
                            get_pwm(fasta, mpbs_regions, args.window_size))

                    res = pool.map(get_bc_signal, arguments_list)
                    signals[i] = np.array(res)

    # differential analysis using raw signal
    else:
        # do not use multi-processing
        if args.nc == 1:
            for i, condition in enumerate(conditions):
                for j, mpbs_name in enumerate(mpbs_name_list):
                    mpbs_regions = mpbs.by_names([mpbs_name])
                    arguments = (mpbs_regions, reads_files[i], args.organism,
                                 args.window_size, args.forward_shift,
                                 args.reverse_shift)
                    signals[i, j, :] = get_raw_signal(arguments)

                    # get motif length, number and pwm matrix
                    motif_len.append(mpbs_regions[0].final -
                                     mpbs_regions[0].initial)
                    motif_num.append(len(mpbs_regions))
                    motif_pwm.append(
                        get_pwm(fasta, mpbs_regions, args.window_size))

        # use multi-processing
        else:
            for i, condition in enumerate(conditions):
                print((
                    "generating signal for condition {} \n".format(condition)))
                with Pool(processes=args.nc) as pool:
                    arguments_list = list()
                    for mpbs_name in mpbs_name_list:
                        mpbs_regions = mpbs.by_names([mpbs_name])
                        arguments = (mpbs_regions, reads_files[i],
                                     args.organism, args.window_size,
                                     args.forward_shift, args.reverse_shift)
                        arguments_list.append(arguments)

                        # get motif length, number and pwm matrix
                        motif_len.append(mpbs_regions[0].final -
                                         mpbs_regions[0].initial)
                        motif_num.append(len(mpbs_regions))
                        motif_pwm.append(
                            get_pwm(fasta, mpbs_regions, args.window_size))

                    res = pool.map(get_raw_signal, arguments_list)
                    signals[i] = np.array(res)

    print("signal generation is done!\n")

    # compute normalization facotr for each condition
    factors = compute_factors(signals)
    output_factor(args, factors, conditions)

    # normalize signals by factor and number of motifs
    for i in range(len(conditions)):
        for j in range(len(mpbs_name_list)):
            signals[i, j, :] = signals[i, j, :] / (factors[i] * motif_num[j])

    if args.output_profiles:
        output_profiles(mpbs_name_list, signals, conditions,
                        args.output_location)

    print("generating line plot for each motif...\n")
    if args.nc == 1:
        for i, mpbs_name in enumerate(mpbs_name_list):
            output_line_plot(
                (mpbs_name, motif_num[i], signals[:, i, :], conditions,
                 motif_pwm[i], output_location, args.window_size, colors))
    else:
        with Pool(processes=args.nc) as pool:
            arguments_list = list()
            for i, mpbs_name in enumerate(mpbs_name_list):
                arguments_list.append(
                    (mpbs_name, motif_num[i], signals[:, i, :], conditions,
                     motif_pwm[i], output_location, args.window_size, colors))
            pool.map(output_line_plot, arguments_list)

    ps_tc_results = list()
    for i, mpbs_name in enumerate(mpbs_name_list):
        ps_tc_results.append(
            get_ps_tc_results(signals[:, i, :], motif_len[i],
                              args.window_size))

    # find the significant motifs and generate a scatter plot if two conditions are given
    if len(conditions) == 2:
        ps_tc_results = scatter_plot(args, ps_tc_results, mpbs_name_list,
                                     conditions)

    output_stat_results(ps_tc_results, conditions, mpbs_name_list, motif_num,
                        args)

Exemplo n.º 9

0

Exibir arquivo

def chip_evaluate(args):
    # Evaluate Statistics
    fpr = dict()
    tpr = dict()
    roc_auc_1 = dict()
    roc_auc_10 = dict()
    roc_auc_50 = dict()
    roc_auc_100 = dict()
    recall = dict()
    precision = dict()
    prc_auc_1 = dict()
    prc_auc_10 = dict()
    prc_auc_50 = dict()
    prc_auc_100 = dict()

    footprint_file = args.footprint_file.split(",")
    footprint_name = args.footprint_name.split(",")
    footprint_type = args.footprint_type.split(",")

    max_score = 0
    if "SEG" in footprint_type:
        mpbs_regions = GenomicRegionSet("TFBS")
        mpbs_regions.read(args.tfbs_file)

        # Verifying the maximum score of the MPBS file
        for region in iter(mpbs_regions):
            score = int(region.data.split("\t")[0])
            if score > max_score:
                max_score = score
        max_score += 1

    for i in range(len(footprint_file)):
        footprints_regions = GenomicRegionSet("Footprints Prediction")
        footprints_regions.read(footprint_file[i])
        footprints_regions.sort()

        if footprint_type[i] == "SEG":
            # Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
            increased_score_mpbs_regions = GenomicRegionSet("Increased Regions")
            intersect_regions = mpbs_regions.intersect(footprints_regions, mode=OverlapType.ORIGINAL)
            for region in iter(intersect_regions):
                region.data = str(int(region.data.split("\t")[0]) + max_score)
                increased_score_mpbs_regions.add(region)

            # Keep the score of remained MPBS entry unchanged
            without_intersect_regions = mpbs_regions.subtract(footprints_regions, whole_region=True)
            for region in iter(without_intersect_regions):
                increased_score_mpbs_regions.add(region)

            increased_score_mpbs_regions.sort_score()

            fpr[i], tpr[i], roc_auc_1[i], roc_auc_10[i], roc_auc_50[i], roc_auc_100[i] = \
                roc_curve(increased_score_mpbs_regions)
            recall[i], precision[i], prc_auc_1[i], prc_auc_10[i], prc_auc_50[i], prc_auc_100[i] = \
                precision_recall_curve(increased_score_mpbs_regions)
        elif footprint_type[i] == "SC":
            footprints_regions.sort_score()
            fpr[i], tpr[i], roc_auc_1[i], roc_auc_10[i], roc_auc_50[i], roc_auc_100[i] = \
                roc_curve(footprints_regions)
            recall[i], precision[i], prc_auc_1[i], prc_auc_10[i], prc_auc_50[i], prc_auc_100[i] = \
                precision_recall_curve(footprints_regions)

    # Output the statistics results into text
    stats_fname = os.path.join(args.output_location, "{}_stats.txt".format(args.output_prefix))
    stats_header = ["METHOD", "AUC_100", "AUC_50", "AUC_10", "AUC_1", "AUPR_100", "AUPR_50", "AUPR_10", "AUPR_1"]
    with open(stats_fname, "w") as stats_file:
        stats_file.write("\t".join(stats_header) + "\n")
        for i in range(len(footprint_name)):
            stats_file.write(footprint_name[i] + "\t" +
                             str(roc_auc_100[i]) + "\t" + str(roc_auc_50[i]) + "\t" + str(roc_auc_10[i]) + "\t" +
                             str(roc_auc_1[i]) + "\t" + str(prc_auc_100[i]) + "\t" + str(prc_auc_50[i]) + "\t" +
                             str(prc_auc_10[i]) + "\t" + str(prc_auc_1[i]) + "\n")

    # Output the curves
    if args.print_roc_curve:
        label_x = "False Positive Rate"
        label_y = "True Positive Rate"
        curve_name = "ROC"
        plot_curve(footprint_name, args.output_location, fpr, tpr, roc_auc_100, label_x, label_y, args.output_prefix,
                   curve_name)
    if args.print_pr_curve:
        label_x = "Recall"
        label_y = "Precision"
        curve_name = "PRC"
        plot_curve(footprint_name, args.output_location, recall, precision, prc_auc_100, label_x, label_y,
                   args.output_prefix, curve_name)

    output_points(footprint_name, args.output_location, args.output_prefix, fpr, tpr, recall, precision)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: tdf_regiontest.py Projeto: rafalcode/reg-gen

class RandomTest:
    def __init__(self,
                 rna_fasta,
                 rna_name,
                 dna_region,
                 organism,
                 showdbs=False):
        self.organism = organism
        genome = GenomeData(organism)
        self.genome_path = genome.get_genome()
        # RNA: Path to the FASTA file
        self.rna_fasta = rna_fasta
        self.showdbs = showdbs

        rnas = SequenceSet(name="rna", seq_type=SequenceType.RNA)
        rnas.read_fasta(self.rna_fasta)
        if rna_name:
            self.rna_name = rna_name
        else:
            self.rna_name = rnas[0].name

        # DNA: GenomicRegionSet
        self.dna_region = GenomicRegionSet(name="target")
        self.dna_region.read_bed(dna_region)
        self.dna_region = self.dna_region.gene_association(
            organism=self.organism, show_dis=True)

        self.topDBD = []
        self.stat = OrderedDict(name=rna_name, genome=organism)
        self.stat["target_regions"] = str(len(self.dna_region))

    def get_rna_region_str(self, rna):
        """Getting the rna region from the information header with the pattern:
                REGION_chr3_51978050_51983935_-_"""
        self.rna_regions = get_rna_region_str(rna)
        if self.rna_regions and len(self.rna_regions[0]) == 5:
            self.rna_expression = float(self.rna_regions[0][-1])
        else:
            self.rna_expression = "n.a."

    def connect_rna(self, rna, temp):
        d = connect_rna(rna, temp, self.rna_name)
        self.stat["exons"] = str(d[0])
        self.stat["seq_length"] = str(d[1])
        self.rna_len = d[1]

    def target_dna(self,
                   temp,
                   remove_temp,
                   cutoff,
                   l,
                   e,
                   c,
                   fr,
                   fm,
                   of,
                   mf,
                   par,
                   obed=False):
        """Calculate the true counts of triplexes on the given dna regions"""
        self.triplexator_p = [l, e, c, fr, fm, of, mf]

        txp = find_triplex(rna_fasta=os.path.join(temp, "rna_temp.fa"),
                           dna_region=self.dna_region,
                           temp=temp,
                           organism=self.organism,
                           remove_temp=remove_temp,
                           l=l,
                           e=e,
                           c=c,
                           fr=fr,
                           fm=fm,
                           of=of,
                           mf=mf,
                           par=par,
                           genome_path=self.genome_path,
                           prefix="targeted_region",
                           dna_fine_posi=False)
        txp.merge_rbs(rm_duplicate=True,
                      region_set=self.dna_region,
                      asgene_organism=self.organism,
                      cutoff=cutoff)
        self.txp = txp
        self.stat["DBSs_target_all"] = str(len(self.txp))
        txp.remove_duplicates()
        self.rbss = txp.merged_dict.keys()
        # if len(self.rbss) == 0:
        #     print("ERROR: No potential binding event. Please change the parameters.")
        #     sys.exit(1)

        txpf = find_triplex(rna_fasta=os.path.join(temp, "rna_temp.fa"),
                            dna_region=self.dna_region,
                            temp=temp,
                            organism=self.organism,
                            remove_temp=remove_temp,
                            l=l,
                            e=e,
                            c=c,
                            fr=fr,
                            fm=fm,
                            of=of,
                            mf=mf,
                            par=par,
                            genome_path=self.genome_path,
                            prefix="dbs",
                            dna_fine_posi=True)
        txpf.remove_duplicates()
        txpf.merge_rbs(rbss=self.rbss,
                       rm_duplicate=True,
                       asgene_organism=self.organism)
        self.txpf = txpf

        self.stat["DBSs_target_all"] = str(len(self.txpf))

        self.counts_tr = OrderedDict()
        self.counts_dbs = OrderedDict()

        for rbs in self.rbss:
            tr = len(self.txp.merged_dict[rbs])
            self.counts_tr[rbs] = [tr, len(self.dna_region) - tr]
            self.counts_dbs[rbs] = len(self.txpf.merged_dict[rbs])

        self.region_dbd = self.txpf.sort_rbs_by_regions(self.dna_region)

        self.region_dbs = self.txpf.sort_rd_by_regions(
            regionset=self.dna_region)
        self.region_dbsm = {}
        self.region_coverage = {}

        for region in self.dna_region:
            self.region_dbsm[region.toString()] = self.region_dbs[
                region.toString()].get_dbs().merge(w_return=True)
            self.region_coverage[region.toString()] = float(self.region_dbsm[region.toString()].total_coverage()) / len \
                (region)
        self.stat["target_regions"] = str(len(self.dna_region))

        if obed:
            # btr = self.txp.get_dbs()
            # btr = btr.gene_association(organism=self.organism, show_dis=True)
            # btr.write_bed(os.path.join(temp, obed + "_target_region_dbs.bed"))
            # dbss = txpf.get_dbs()
            # dbss.write_bed(os.path.join(temp, obed + "_dbss.bed"))

            # output = self.dna_region.gene_association(organism=self.organism, show_dis=True)

            self.txp.write_bed(filename=os.path.join(
                temp, obed + "_target_region_dbs.bed"),
                               dbd_tag=False,
                               remove_duplicates=False,
                               associated=self.organism)
            self.txpf.write_bed(filename=os.path.join(temp,
                                                      obed + "_dbss.bed"),
                                remove_duplicates=False)

    def random_test(self, repeats, temp, remove_temp, l, e, c, fr, fm, of, mf,
                    rm, par, filter_bed, alpha):
        """Perform randomization for the given times"""
        self.repeats = repeats
        marks = numpy.round(numpy.linspace(0, repeats - 1, num=41)).tolist()
        print("random_test")
        print(par)
        # Prepare the input lists for multiprocessing
        mp_input = []
        for i in range(repeats):
            mp_input.append([
                str(i),
                os.path.join(temp, "rna_temp.fa"), self.dna_region, temp,
                self.organism, self.rbss,
                str(marks.count(i)),
                str(l),
                str(e),
                str(c),
                str(fr),
                str(fm),
                str(of),
                str(mf),
                str(rm), filter_bed, self.genome_path, par
            ])
        # Multiprocessing
        print("\t\t|0%                  |                100%|")
        print("\t\t[", end="")
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count() - 2)
        mp_output = pool.map(random_each, mp_input)
        # print(mp_output)
        pool.close()
        pool.join()
        print("]")

        # Processing the result
        self.region_matrix = []
        self.dbss_matrix = []
        self.data = {
            "region": {
                "ave": [],
                "sd": [],
                "p": [],
                "sig_region": [],
                "sig_boolean": []
            },
            "dbs": {
                "ave": [],
                "sd": [],
                "p": [],
                "sig_region": [],
                "sig_boolean": []
            }
        }

        region_counts = [v[0] for v in mp_output]
        dbss_counts = [v[1] for v in mp_output]

        for i, rbs in enumerate(self.rbss):

            counts_regions = [v[i] for v in region_counts]

            self.data["region"]["ave"].append(numpy.mean(counts_regions))
            self.data["region"]["sd"].append(numpy.std(counts_regions))
            num_sig = len(
                [h for h in counts_regions if h > self.counts_tr[rbs][0]])
            p_region = float(num_sig) / repeats
            self.data["region"]["p"].append(p_region)
            self.region_matrix.append(counts_regions)

            if p_region < alpha:
                self.data["region"]["sig_region"].append(rbs)
                self.data["region"]["sig_boolean"].append(True)
            else:
                self.data["region"]["sig_boolean"].append(False)

            try:
                if p_region < self.topDBD[1]:
                    self.topDBD = [rbs.str_rna(pa=False), p_region]
            except:
                self.topDBD = [rbs.str_rna(pa=False), p_region]

            # Analysis based on DBSs
            if self.showdbs:
                counts_dbss = [v[i] for v in dbss_counts]

                self.data["dbs"]["ave"].append(numpy.mean(counts_dbss))
                self.data["dbs"]["sd"].append(numpy.std(counts_dbss))
                num_sig = len(
                    [h for h in counts_dbss if h > self.counts_dbs[rbs]])
                p_dbs = float(num_sig) / repeats
                self.data["dbs"]["p"].append(p_dbs)
                self.dbss_matrix.append(counts_dbss)
                if p_dbs < alpha:
                    self.data["dbs"]["sig_region"].append(rbs)
                    self.data["dbs"]["sig_boolean"].append(True)
                else:
                    self.data["dbs"]["sig_boolean"].append(False)
            try:
                self.stat["p_value"] = str(min(self.data["region"]["p"]))
            except:
                self.stat["p_value"] = "1"

        self.region_matrix = numpy.array(self.region_matrix)

        if self.showdbs: self.dbss_matrix = numpy.array(self.dbss_matrix)

        counts_dbss = [v[i] for v in dbss_counts]
        self.stat["DBSs_random_ave"] = numpy.mean(counts_dbss)
        try:
            self.stat["p_value"] = str(min(self.data["region"]["p"]))
        except:
            self.stat["p_value"] = "1"

    def dbd_regions(self, sig_region, output):
        """Generate the BED file of significant DBD regions and FASTA file of the sequences"""
        dbd_regions(exons=self.rna_regions,
                    sig_region=sig_region,
                    rna_name=self.rna_name,
                    output=output)

        self.stat["DBD_all"] = str(len(self.rbss))
        self.stat["DBD_sig"] = str(len(self.data["region"]["sig_region"]))

        sigDBD = GenomicRegionSet("DBD_sig")
        sigDBD.sequences = self.data["region"]["sig_region"]
        rbss = self.txp.get_rbs()
        overlaps = rbss.intersect(y=sigDBD, mode=OverlapType.ORIGINAL)
        self.stat["DBSs_target_DBD_sig"] = str(len(overlaps))

    def lineplot(self, txp, dirp, ac, cut_off, log, ylabel, linelabel, showpa,
                 sig_region, filename):
        """Generate lineplot for RNA"""

        lineplot(txp=txp,
                 rnalen=self.rna_len,
                 rnaname=self.rna_name,
                 dirp=dirp,
                 sig_region=sig_region,
                 cut_off=cut_off,
                 log=log,
                 ylabel=ylabel,
                 linelabel=linelabel,
                 filename=filename,
                 ac=ac,
                 showpa=showpa)

    def boxplot(self, dir, matrix, sig_region, truecounts, sig_boolean, ylabel,
                filename):
        """Generate the visualized plot"""
        tick_size = 8
        label_size = 9

        f, ax = plt.subplots(1, 1, dpi=300, figsize=(6, 4))
        max_y = int(max([matrix.max()] + truecounts) * 1.1) + 1
        min_y = max(int(matrix.min() * 0.9) - 1, 0)

        # Significant DBD
        rect = patches.Rectangle(xy=(1, 0),
                                 width=0.8,
                                 height=max_y,
                                 facecolor=sig_color,
                                 edgecolor="none",
                                 alpha=0.5,
                                 lw=None,
                                 label="Significant DBD")
        for i, r in enumerate(sig_boolean):
            if r:
                rect = patches.Rectangle(xy=(i + 0.6, min_y),
                                         width=0.8,
                                         height=max_y,
                                         facecolor=sig_color,
                                         edgecolor="none",
                                         alpha=0.5,
                                         lw=None,
                                         label="Significant DBD")
                ax.add_patch(rect)

        # Plotting

        bp = ax.boxplot(matrix.transpose(),
                        notch=False,
                        sym='o',
                        vert=True,
                        whis=1.5,
                        positions=None,
                        widths=None,
                        patch_artist=True,
                        bootstrap=None)
        z = 10
        plt.setp(bp['boxes'], color=nontarget_color, alpha=1, edgecolor="none")
        plt.setp(bp['whiskers'],
                 color='black',
                 linestyle='-',
                 linewidth=1,
                 zorder=z,
                 alpha=1)
        plt.setp(bp['fliers'],
                 markerfacecolor='gray',
                 color='white',
                 alpha=0.3,
                 markersize=1.8,
                 zorder=z)
        plt.setp(bp['caps'], color='white', zorder=-1)
        plt.setp(bp['medians'], color='black', linewidth=1.5, zorder=z + 1)

        # Plot target regions
        plt.plot(range(1,
                       len(self.rbss) + 1),
                 truecounts,
                 markerfacecolor=target_color,
                 marker='o',
                 markersize=5,
                 linestyle='None',
                 markeredgecolor="white",
                 zorder=z + 5)

        ax.set_xlabel(self.rna_name + " DNA Binding Domains",
                      fontsize=label_size)
        ax.set_ylabel(ylabel, fontsize=label_size, rotation=90)

        ax.set_ylim([min_y, max_y])
        ax.yaxis.set_major_locator(MaxNLocator(integer=True))

        ax.set_xticklabels([dbd.str_rna(pa=False) for dbd in self.rbss],
                           rotation=35,
                           ha="right",
                           fontsize=tick_size)
        for tick in ax.yaxis.get_major_ticks():
            tick.label.set_fontsize(tick_size)

        for spine in ['top', 'right']:
            ax.spines[spine].set_visible(False)
        ax.tick_params(axis='x',
                       which='both',
                       bottom='off',
                       top='off',
                       labelbottom='on')
        ax.tick_params(axis='y',
                       which='both',
                       left='on',
                       right='off',
                       labelbottom='off')

        # Legend
        dot_legend, = plt.plot([1, 1],
                               color=target_color,
                               marker='o',
                               markersize=5,
                               markeredgecolor="white",
                               linestyle='None')
        bp_legend, = plt.plot([1, 1],
                              color=nontarget_color,
                              linewidth=6,
                              alpha=1)

        ax.legend([dot_legend, bp_legend, rect],
                  ["Target Regions", "Non-target regions", "Significant DBD"],
                  bbox_to_anchor=(0., 1.02, 1., .102),
                  loc=2,
                  mode="expand",
                  borderaxespad=0.,
                  prop={'size': 9},
                  ncol=3,
                  numpoints=1)
        bp_legend.set_visible(False)
        dot_legend.set_visible(False)

        # f.tight_layout(pad=1.08, h_pad=None, w_pad=None)
        f.savefig(os.path.join(dir, filename + ".png"),
                  facecolor='w',
                  edgecolor='w',
                  bbox_extra_artists=(plt.gci()),
                  bbox_inches='tight',
                  dpi=300)
        # PDF
        for tick in ax.xaxis.get_major_ticks():
            tick.label.set_fontsize(12)
        for tick in ax.yaxis.get_major_ticks():
            tick.label.set_fontsize(12)
        ax.xaxis.label.set_size(14)
        ax.yaxis.label.set_size(14)

        pp = PdfPages(os.path.join(dir, filename + '.pdf'))
        pp.savefig(f, bbox_extra_artists=(plt.gci()), bbox_inches='tight')
        pp.close()

    def gen_html(self,
                 directory,
                 parameters,
                 obed,
                 align=50,
                 alpha=0.05,
                 score=False):
        """Generate the HTML file"""
        dir_name = os.path.basename(directory)
        html_header = "Genomic Region Test: " + dir_name
        link_ds = OrderedDict()
        link_ds["RNA"] = "index.html"
        link_ds["Sig Target Regions"] = "starget_regions.html"
        link_ds["Target Regions"] = "target_regions.html"
        link_ds["Parameters"] = "parameters.html"

        ##################################################
        # index.html

        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")
        # Plots
        html.add_figure("lineplot_region.png",
                        align="left",
                        width="45%",
                        more_images=["boxplot_regions.png"])
        if self.showdbs:
            html.add_figure("lineplot_dbs.png",
                            align="left",
                            width="45%",
                            more_images=["boxplot_dbs.png"])

        if self.showdbs:
            header_list = [[
                "#", "DBD", "Target Regions", None, "Non-target Regions", None,
                "Statistics", "Target Regions", "Non-target Regions", None,
                "Statistics"
            ],
                           [
                               "", "", "with DBS", "without DBS",
                               "with DBS (average)", "s.d.", "<i>p</i>-value",
                               "NO. DBSs", "NO. DBSs (average)", "s.d.",
                               "<i>p</i>-value"
                           ]]
            header_titles = [
                [
                    "Rank", "DNA Binding Domain",
                    "Given target regions on DNA", None,
                    "Regions from randomization", None,
                    "Statistics based on target regions",
                    "Given target regions on DNA",
                    "Regions from randomization", None,
                    "Statistics based on DNA Binding Sites"
                ],
                [
                    "", "", "Number of target regions with DBS binding",
                    "Number of target regions without DBS binding",
                    "Average number of regions from randomization with DBS binding",
                    "Standard deviation", "P value",
                    "Number of related DNA Binding Sites binding to target regions",
                    "Average number of DNA Binding Sites binding to random regions",
                    "Standard deviation", "P-value"
                ]
            ]
            border_list = [
                " style=\"border-right:1pt solid gray\"",
                " style=\"border-right:1pt solid gray\"", "",
                " style=\"border-right:1pt solid gray\"", "",
                " style=\"border-right:1pt solid gray\"",
                " style=\"border-right:2pt solid gray\"",
                " style=\"border-right:1pt solid gray\"", "",
                " style=\"border-right:1pt solid gray\"",
                " style=\"border-right:1pt solid gray\""
            ]
        else:
            header_list = [[
                "#", "DBD", "Target Regions", None, "Non-target Regions", None,
                "Statistics", None
            ],
                           [
                               "", "", "with DBS", "without DBS",
                               "with DBS (average)", "s.d.", "<i>p</i>-value",
                               "z-score"
                           ]]
            header_titles = [
                [
                    "Rank", "DNA Binding Domain",
                    "Given target regions on DNA", None,
                    "Regions from randomization", None,
                    "Statistics based on target regions", None
                ],
                [
                    "", "", "Number of target regions with DBS binding",
                    "Number of target regions without DBS binding",
                    "Average number of regions from randomization with DBS binding",
                    "Standard deviation", "P value", "Z-score"
                ]
            ]
            border_list = [
                " style=\"border-right:1pt solid gray\"",
                " style=\"border-right:1pt solid gray\"", "",
                " style=\"border-right:1pt solid gray\"", "",
                " style=\"border-right:1pt solid gray\"",
                " style=\"border-right:1pt solid gray\"", ""
            ]

        type_list = 'ssssssssssssssss'
        col_size_list = [
            50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50
        ]
        data_table = []

        for i, rbs in enumerate(self.rbss):
            if self.data["region"]["p"][i] < alpha:
                p_region = "<font color=\"red\">" + value2str(
                    self.data["region"]["p"][i]) + "</font>"

            else:
                p_region = value2str(self.data["region"]["p"][i])
            zs = (self.counts_tr[rbs][0] -
                  self.data["region"]["ave"][i]) / self.data["region"]["sd"][i]
            new_line = [
                str(i + 1),
                rbs.str_rna(pa=False), '<a href="dbd_region.html#' +
                rbs.str_rna() + '" style="text-align:left">' +
                str(self.counts_tr[rbs][0]) + '</a>',
                str(self.counts_tr[rbs][1]),
                value2str(self.data["region"]["ave"][i]),
                value2str(self.data["region"]["sd"][i]), p_region,
                value2str(zs)
            ]
            if self.showdbs:
                if self.data["dbs"]["p"][i] < alpha:
                    p_dbs = "<font color=\"red\">" + value2str(
                        self.data["dbs"]["p"][i]) + "</font>"
                else:
                    p_dbs = value2str(self.data["dbs"]["p"][i])

                new_line += [
                    str(self.counts_dbs[rbs]),
                    value2str(self.data["dbs"]["ave"][i]),
                    value2str(self.data["dbs"]["sd"][i]), p_dbs
                ]
            data_table.append(new_line)

        data_table = natsort.natsorted(data_table, key=lambda x: x[6])
        html.add_zebra_table(header_list,
                             col_size_list,
                             type_list,
                             data_table,
                             align=align,
                             cell_align="left",
                             auto_width=True,
                             header_titles=header_titles,
                             border_list=border_list,
                             sortable=True)

        html.add_heading("Notes")
        html.add_list([
            "RNA name: " + self.rna_name,
            "Randomization is performed for " + str(self.repeats) + " times.",
            "DBD stands for DNA Binding Domain on RNA.",
            "DBS stands for DNA Binding Site on DNA."
        ])
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "index.html"))

        #############################################################
        # RNA subpage: Profile of targeted regions for each merged DNA Binding Domain
        #############################################################

        header_list = [
            "#", "Target Region", "Associated Gene", "No. of DBSs",
            "DBS coverage"
        ]
        header_titles = [
            "Rank", "Given target regions from BED files",
            "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
            "Number of DNA Binding Sites locate within the region",
            "The proportion of the region covered by DBS binding"
        ]

        #########################################################
        # dbd_region.html
        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")

        for rbsm in self.rbss:
            html.add_heading("DNA Binding Domain: " + rbsm.str_rna(),
                             idtag=rbsm.str_rna())
            data_table = []
            for i, region in enumerate(self.txp.merged_dict[rbsm]):
                # Add information
                data_table.append([
                    str(i + 1),
                    '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' +
                    self.organism + "&position=" + region.chrom + "%3A" +
                    str(region.initial) + "-" + str(region.final) +
                    '" style="text-align:left">' +
                    region.toString(space=True) + '</a>',
                    split_gene_name(gene_name=region.name, org=self.organism),
                    str(len(self.region_dbs[region.toString()])),
                    value2str(self.region_coverage[region.toString()])
                ])

            html.add_zebra_table(header_list,
                                 col_size_list,
                                 type_list,
                                 data_table,
                                 align=align,
                                 cell_align="left",
                                 auto_width=True,
                                 header_titles=header_titles,
                                 sortable=True)
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "dbd_region.html"))

        #############################################################
        # Targeted regions centered
        #############################################################

        ##############################################################################################
        # target_regions.html
        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")

        if score:
            header_list = [
                "#", "Target region", "Associated Gene", "DBSs Count",
                "DBS coverage", "Score", "Sum of ranks"
            ]
            header_titles = [
                "Rank", "Target regions loaded from the given BED file",
                "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
                "Number of DNA Binding Sites within the region",
                "The proportion of the region covered by DBS binding",
                "Scores from BED file", "Sum of all the left-hand-side ranks"
            ]
        else:
            header_list = [
                "#", "Target region", "Associated Gene", "DBSs Count",
                "DBS coverage", "Sum of ranks"
            ]
            header_titles = [
                "Rank", "Target regions loaded from the given BED file",
                "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
                "Number of DNA Binding Sites within the region",
                "The proportion of the region covered by DBS binding",
                "Sum of all the left-hand-side ranks"
            ]
        html.add_heading("Target Regions")
        data_table = []

        if not self.dna_region.sorted: self.dna_region.sort()

        # Calculate the ranking
        rank_count = len(self.dna_region) - rank_array(
            [len(self.region_dbs[p.toString()]) for p in self.dna_region])
        rank_coverage = len(self.dna_region) - rank_array(
            [self.region_coverage[p.toString()] for p in self.dna_region])

        if score:
            try:
                score_list = [
                    float(p.data.split("\t")[0]) for p in self.dna_region
                ]
                rank_score = len(self.dna_region) - rank_array(
                    [abs(s) for s in score_list])
                rank_sum = [
                    x + y + z
                    for x, y, z in zip(rank_count, rank_coverage, rank_score)
                ]
                # sum_rank = rank_array(rank_sum)  # method='min'
            except ImportError:
                print(
                    "There is no score in BED file, please don't use '-score' argument."
                )
        else:
            rank_sum = [x + y for x, y in zip(rank_count, rank_coverage)]
            sum_rank = rank_array(rank_sum)

        for i, region in enumerate(self.dna_region):
            dbs_counts = str(len(self.region_dbs[region.toString()]))
            dbs_cover = value2str(self.region_coverage[region.toString()])

            newline = [
                str(i + 1),
                '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' +
                self.organism + "&position=" + region.chrom + "%3A" +
                str(region.initial) + "-" + str(region.final) +
                '" style="text-align:left">' + region.toString(space=True) +
                '</a>',
                split_gene_name(gene_name=region.name, org=self.organism),
                '<a href="region_dbs.html#' + region.toString() +
                '" style="text-align:left">' + dbs_counts + '</a>', dbs_cover
            ]

            if score:
                dbs_score = value2str(score_list[i])
                region.data = "\t".join(
                    [dbs_counts, dbs_cover, dbs_score,
                     str(rank_sum[i])])
                newline.append(dbs_score)
                newline.append(str(rank_sum[i]))
            else:
                region.data = "\t".join(
                    [dbs_counts, dbs_cover,
                     str(rank_sum[i])])
                newline.append(str(rank_sum[i]))
            data_table.append(newline)

        data_table = natsort.natsorted(data_table, key=lambda x: x[-1])
        # data_table = sorted(data_table, key=lambda x: x[-1])
        html.add_zebra_table(header_list,
                             col_size_list,
                             type_list,
                             data_table,
                             align=align,
                             cell_align="left",
                             auto_width=True,
                             header_titles=header_titles,
                             sortable=True)
        html.add_heading("Notes")
        html.add_list(["All target regions without any bindings are ignored."])
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "target_regions.html"))

        self.dna_region.sort_score()
        self.dna_region.write_bed(
            os.path.join(directory, obed + "_target_regions.bed"))

        ##############################################################################################
        # starget_regions.html    for significant target regions

        stargets = GenomicRegionSet("sig_targets")
        sig_dbs = {}
        sig_dbs_coverage = {}
        for i, r in enumerate(self.dna_region):
            sig_bindings = self.region_dbs[r.toString()].overlap_rbss(
                rbss=self.data["region"]["sig_region"])
            dbs = sig_bindings.get_dbs()
            if len(dbs) > 0:
                stargets.add(r)
                m_dbs = dbs.merge(w_return=True)
                sig_dbs[r] = len(dbs)
                # self.promoter["de"]["merged_dbs"][promoter.toString()] = len(m_dbs)
                sig_dbs_coverage[r] = float(m_dbs.total_coverage()) / len(r)

        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")

        # Select promoters in sig DBD
        if len(self.data["region"]["sig_region"]) == 0:
            html.add_heading("There is no significant DBD.")
        else:
            html.add_heading("Target regions bound by significant DBD")
            data_table = []
            # Calculate the ranking
            rank_count = len(stargets) - rank_array(
                [sig_dbs[p] for p in stargets])
            rank_coverage = len(stargets) - rank_array(
                [sig_dbs_coverage[p] for p in stargets])
            if score:
                score_list = [float(p.data.split("\t")[0]) for p in stargets]
                rank_score = len(stargets) - rank_array(
                    [abs(s) for s in score_list])
                rank_sum = [
                    x + y + z
                    for x, y, z in zip(rank_count, rank_coverage, rank_score)
                ]
                sum_rank = rank_array(rank_sum)  # method='min'
            else:
                rank_sum = [x + y for x, y in zip(rank_count, rank_coverage)]
                sum_rank = rank_array(rank_sum)

            for i, region in enumerate(stargets):
                dbssount = '<a href="region_dbs.html#' + region.toString() + \
                           '" style="text-align:left">' + str(sig_dbs[region]) + '</a>'

                region_link = region_link_internet(self.organism, region)

                newline = [
                    str(i + 1), region_link,
                    split_gene_name(gene_name=region.name, org=self.organism),
                    dbssount,
                    value2str(sig_dbs_coverage[region])
                ]
                if score:
                    dbs_score = value2str(score_list[i])
                    # region.data = "\t".join([dbs_counts, dbs_cover, dbs_score, str(sum_rank[i])])
                    newline.append(dbs_score)
                    newline.append(str(rank_sum[i]))
                    # print([dbs_score, str(sum_rank[i])])
                else:
                    # region.data = "\t".join([dbs_counts, dbs_cover, str(sum_rank[i])])
                    newline.append(str(rank_sum[i]))

                # newline += ["<i>" + str(rank_sum[i]) + "</i>"]
                # print(newline)
                data_table.append(newline)

            # print(data_table)
            # data_table = sorted(data_table, key=lambda x: x[-1])
            data_table = natsort.natsorted(data_table, key=lambda x: x[-1])
            html.add_zebra_table(header_list,
                                 col_size_list,
                                 type_list,
                                 data_table,
                                 align=align,
                                 cell_align="left",
                                 header_titles=header_titles,
                                 border_list=None,
                                 sortable=True)
            html.add_heading("Notes")
            html.add_list([
                "DBS stands for DNA Binding Site on DNA.",
                "DBS coverage is the proportion of the region where has potential to form triple helices with the given RNA."
            ])
            html.add_fixed_rank_sortable()
            html.write(os.path.join(directory, "starget_regions.html"))

        ############################
        # Subpages for targeted region centered page
        # region_dbs.html
        header_list = ["RBS", "DBS", "Strand", "Score", "Motif", "Orientation"]

        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")

        for i, region in enumerate(self.dna_region):
            if len(self.region_dbs[region.toString()]) == 0:
                continue
            else:
                html.add_heading(
                    "Associated gene: " +
                    split_gene_name(gene_name=region.name, org=self.organism),
                    idtag=region.toString())
                html.add_free_content([
                    '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' +
                    self.organism + "&position=" + region.chrom + "%3A" +
                    str(region.initial) + "-" + str(region.final) +
                    '" style="margin-left:50">' + region.toString(space=True) +
                    '</a>'
                ])
                data_table = []
                for rd in self.region_dbs[region.toString()]:
                    rbs = rd.rna.str_rna(pa=False)
                    for rbsm in self.data["region"]["sig_region"]:
                        # rbsm = rbsm.partition(":")[2].split("-")
                        if rd.rna.overlap(rbsm):
                            rbs = "<font color=\"red\">" + rbs + "</font>"
                    data_table.append([
                        rbs,
                        '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db='
                        + self.organism + "&position=" + rd.dna.chrom + "%3A" +
                        str(rd.dna.initial) + "-" + str(rd.dna.final) +
                        '" style="text-align:left">' +
                        rd.dna.toString(space=True) + '</a>',
                        rd.dna.orientation, rd.score, rd.motif, rd.orient
                    ])
                html.add_zebra_table(header_list,
                                     col_size_list,
                                     type_list,
                                     data_table,
                                     align=align,
                                     cell_align="left",
                                     auto_width=True)
        html.write(os.path.join(directory, "region_dbs.html"))

        ###############################################################################33
        ################ Parameters.html

        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")
        html.add_heading("Parameters")
        header_list = ["Description", "Arguments", "Value"]

        data_table = [
            ["RNA sequence name", "-rn", parameters.rn],
            ["Input RNA sequence file", "-r",
             os.path.basename(parameters.r)],
            ["Input BED file", "-bed",
             os.path.basename(parameters.bed)],
            ["Output directory", "-o",
             os.path.basename(parameters.o)],
            ["Organism", "-organism", parameters.organism],
            ["Number of repitetion of andomization", "-n",
             str(parameters.n)],
            ["Alpha level for rejection p value", "-a",
             str(parameters.a)],
            [
                "Cut off value for filtering out the low counts of DBSs",
                "-ccf",
                str(parameters.ccf)
            ], ["Remove temporary files", "-rt",
                str(parameters.rt)],
            [
                "Input BED file for masking in randomization", "-f",
                str(parameters.f)
            ], ["Input file for RNA accecibility", "-ac",
                str(parameters.ac)],
            [
                "Cut off value for RNA accecibility", "-accf",
                str(parameters.accf)
            ],
            [
                "Output the BED files for DNA binding sites.", "-obed",
                str(parameters.obed)
            ],
            [
                "Show parallel and antiparallel bindings in the plot separately.",
                "-showpa",
                str(parameters.showpa)
            ], ["Minimum length", "-l",
                str(self.triplexator_p[0])],
            ["Maximum error rate", "-e",
             str(self.triplexator_p[1])],
            [
                "Tolerated number of consecutive errors", "-c",
                str(self.triplexator_p[2])
            ], ["Filtering repeats", "-fr",
                str(self.triplexator_p[3])],
            ["Filtering mode", "-fm",
             str(self.triplexator_p[4])],
            ["Output format", "-of",
             str(self.triplexator_p[5])],
            ["Merge features", "-mf",
             str(self.triplexator_p[6])]
        ]
        html.add_zebra_table(header_list,
                             col_size_list,
                             type_list,
                             data_table,
                             align=align,
                             cell_align="left",
                             auto_width=True)
        html.add_free_content(
            ['<a href="summary.txt" style="margin-left:100">See details</a>'])
        html.write(os.path.join(directory, "parameters.html"))

Exemplo n.º 11

0

Exibir arquivo

Arquivo: evaluation.py Projeto: eggduzao/reg-gen

    def chip_evaluate(self):
        """
        This evaluation methodology uses motif-predicted binding sites (MPBSs) together with TF ChIP-seq data
        to evaluate the footprint predictions.

        return:
        """

        # Evaluate Statistics
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        roc_auc_1 = dict()
        roc_auc_2 = dict()
        recall = dict()
        precision = dict()
        prc_auc = dict()

        if "SEG" in self.footprint_type:
            mpbs_regions = GenomicRegionSet("TFBS")
            mpbs_regions.read_bed(self.tfbs_file)
            mpbs_regions.sort()

            # Verifying the maximum score of the MPBS file
            max_score = -99999999
            for region in iter(mpbs_regions):
                score = int(region.data)
                if score > max_score:
                    max_score = score
            max_score += 1

        for i in range(len(self.footprint_file)):
            footprints_regions = GenomicRegionSet("Footprints Prediction")
            footprints_regions.read_bed(self.footprint_file[i])

            # Sort footprint prediction bed files
            footprints_regions.sort()

            if self.footprint_type[i] == "SEG":
                # Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
                increased_score_mpbs_regions = GenomicRegionSet("Increased Regions")
                intersect_regions = mpbs_regions.intersect(footprints_regions, mode=OverlapType.ORIGINAL)
                for region in iter(intersect_regions):
                    region.data = str(int(region.data) + max_score)
                    increased_score_mpbs_regions.add(region)


                # Keep the score of remained MPBS entry unchanged
                without_intersect_regions = mpbs_regions.subtract(footprints_regions, whole_region=True)
                for region in iter(without_intersect_regions):
                    increased_score_mpbs_regions.add(region)

                increased_score_mpbs_regions.sort_score()

                fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(increased_score_mpbs_regions)
                recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(increased_score_mpbs_regions)
            elif self.footprint_type[i] == "SC":
                footprints_regions.sort_score()
                fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[i] = self.roc_curve(footprints_regions)
                recall[i], precision[i], prc_auc[i] = self.precision_recall_curve(footprints_regions)

        # Output the statistics results into text
        stats_fname = self.output_location + self.tf_name + "_stats.txt"
        stats_header = ["METHOD", "AUC_100", "AUC_10", "AUC_1", "AUPR"]
        with open(stats_fname, "w") as stats_file:
            stats_file.write("\t".join(stats_header) + "\n")
            for i in range(len(self.footprint_name)):
                stats_file.write(self.footprint_name[i] + "\t" + str(roc_auc[i]) + "\t" + str(roc_auc_1[i]) + "\t"
                                 + str(roc_auc_2[i]) + "\t" + str(prc_auc[i]) + "\n")

        # Output the curves
        if self.print_roc_curve:
            label_x = "False Positive Rate"
            label_y = "True Positive Rate"
            curve_name = "ROC"
            self.plot_curve(fpr, tpr, roc_auc, label_x, label_y, self.tf_name, curve_name)
        if self.print_pr_curve:
            label_x = "Recall"
            label_y = "Precision"
            curve_name = "PRC"
            self.plot_curve(recall, precision, prc_auc, label_x, label_y, self.tf_name, curve_name)

        self.output_points(self.tf_name, fpr, tpr, recall, precision)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: Evaluation.py Projeto: CostaLab/reg-gen

def chip_evaluate(args):
    # Evaluate Statistics
    fpr = dict()
    tpr = dict()
    roc_auc_1 = dict()
    roc_auc_10 = dict()
    roc_auc_50 = dict()
    roc_auc_100 = dict()
    recall = dict()
    precision = dict()
    prc_auc_1 = dict()
    prc_auc_10 = dict()
    prc_auc_50 = dict()
    prc_auc_100 = dict()

    footprint_file = args.footprint_file.split(",")
    footprint_name = args.footprint_name.split(",")
    footprint_type = args.footprint_type.split(",")

    max_score = 0
    if "SEG" in footprint_type:
        mpbs_regions = GenomicRegionSet("TFBS")
        mpbs_regions.read(args.tfbs_file)

        # Verifying the maximum score of the MPBS file
        for region in iter(mpbs_regions):
            score = int(region.data.split("\t")[0])
            if score > max_score:
                max_score = score
        max_score += 1

    max_points = []
    for i in range(len(footprint_file)):
        footprints_regions = GenomicRegionSet("Footprints Prediction")
        footprints_regions.read(footprint_file[i])
        footprints_regions.sort()

        if footprint_type[i] == "SEG":
            # Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
            increased_score_mpbs_regions = GenomicRegionSet("Increased Regions")
            intersect_regions = mpbs_regions.intersect(footprints_regions, mode=OverlapType.ORIGINAL)
            for region in iter(intersect_regions):
                region.data = str(int(region.data.split("\t")[0]) + max_score)
                increased_score_mpbs_regions.add(region)

            # Keep the score of remained MPBS entry unchanged
            without_intersect_regions = mpbs_regions.subtract(footprints_regions, whole_region=True)
            for region in iter(without_intersect_regions):
                increased_score_mpbs_regions.add(region)

            increased_score_mpbs_regions.sort_score()

            fpr[i], tpr[i], roc_auc_1[i], roc_auc_10[i], roc_auc_50[i], roc_auc_100[i] = \
                roc_curve(increased_score_mpbs_regions)
            recall[i], precision[i], prc_auc_1[i], prc_auc_10[i], prc_auc_50[i], prc_auc_100[i] = \
                precision_recall_curve(increased_score_mpbs_regions)

            max_points.append(len(intersect_regions))

        elif footprint_type[i] == "SC":
            footprints_regions.sort_score()
            fpr[i], tpr[i], roc_auc_1[i], roc_auc_10[i], roc_auc_50[i], roc_auc_100[i] = \
                roc_curve(footprints_regions)
            recall[i], precision[i], prc_auc_1[i], prc_auc_10[i], prc_auc_50[i], prc_auc_100[i] = \
                precision_recall_curve(footprints_regions)

            max_points.append(len(footprints_regions))

    # Output the statistics results into text
    stats_fname = os.path.join(args.output_location, "{}_stats.txt".format(args.output_prefix))
    stats_header = ["METHOD", "AUC_100", "AUC_50", "AUC_10", "AUC_1", "AUPR_100", "AUPR_50", "AUPR_10", "AUPR_1"]
    with open(stats_fname, "w") as stats_file:
        stats_file.write("\t".join(stats_header) + "\n")
        for i in range(len(footprint_name)):
            stats_file.write(footprint_name[i] + "\t" +
                             str(roc_auc_100[i]) + "\t" + str(roc_auc_50[i]) + "\t" + str(roc_auc_10[i]) + "\t" +
                             str(roc_auc_1[i]) + "\t" + str(prc_auc_100[i]) + "\t" + str(prc_auc_50[i]) + "\t" +
                             str(prc_auc_10[i]) + "\t" + str(prc_auc_1[i]) + "\n")

    # Output the curves
    if args.print_roc_curve:
        label_x = "False Positive Rate"
        label_y = "True Positive Rate"
        curve_name = "ROC"
        plot_curve(footprint_name, args.output_location, fpr, tpr, roc_auc_100, label_x, label_y, args.output_prefix,
                   curve_name, max_points=max_points)
    if args.print_pr_curve:
        label_x = "Recall"
        label_y = "Precision"
        curve_name = "PRC"
        plot_curve(footprint_name, args.output_location, recall, precision, prc_auc_100, label_x, label_y,
                   args.output_prefix, curve_name, max_points=max_points)

    output_points(footprint_name, args.output_location, args.output_prefix, fpr, tpr, recall, precision)

Exemplo n.º 13

0

Exibir arquivo

def match_single(motif,
                 sequence,
                 genomic_region,
                 unique_threshold=None,
                 normalize_bitscore=True,
                 sort=False):
    """
    Performs motif matching given sequence and the motif.pssm passed as parameter.
    The genomic_region is needed to evaluate the correct binding position.
    Please note that the arguments should be passed as a list, to allow for parallelization
    mapping function.

    Keyword arguments:
    motif -- TODO.
    sequence -- A DNA sequence (string).
    genomic_region -- A GenomicRegion.
    output_file -- TODO.  
    unique_threshold -- If this argument is provided, the motif search will be made using a threshold of 0 and
                        then accepting only the motif matches with bitscore/motif_length >= unique_threshold.
        
    Return:
    Print MPBSs to output_file.
    """

    # Establishing threshold
    if unique_threshold:
        current_threshold = 0.0
        eval_threshold = unique_threshold
        motif_max = motif.max / motif.len
    else:
        current_threshold = motif.threshold
        eval_threshold = motif.threshold
        motif_max = motif.max

    # Performing motif matching
    try:
        # old MOODS version
        results = MOODS.search(sequence, [motif.pssm_list],
                               current_threshold,
                               absolute_threshold=True,
                               both_strands=True)
    except:
        # TODO: we can expand this to use bg from sequence, for example,
        # or from organism.
        bg = MOODS.tools.flat_bg(4)
        results = MOODS.scan.scan_dna(sequence, [motif.pssm_list], bg,
                                      [current_threshold], 7)

    grs = GenomicRegionSet("mpbs")

    for search_result in results:
        for r in search_result:
            try:
                position = r.pos
                score = r.score
            except:
                (position, score) = r

            # Verifying unique threshold acceptance
            if unique_threshold and score / motif.len < unique_threshold:
                continue

            # If match forward strand
            if position >= 0:
                p1 = genomic_region.initial + position
                strand = "+"
            # If match reverse strand
            elif not motif.is_palindrome:
                p1 = genomic_region.initial - position
                strand = "-"
            else:
                continue

            # Evaluating p2
            p2 = p1 + motif.len

            # Evaluating score (integer between 0 and 1000 -- needed for bigbed transformation)
            if normalize_bitscore:
                # Normalized bitscore = standardize to integer between 0 and 1000 (needed for bigbed transformation)
                if motif_max > eval_threshold:
                    norm_score = int(((score - eval_threshold) * 1000.0) /
                                     (motif_max - eval_threshold))
                else:
                    norm_score = 1000
            else:
                # Keep the original bitscore
                if unique_threshold:
                    norm_score = score / motif.len
                else:
                    norm_score = score

            grs.add(
                GenomicRegion(genomic_region.chrom,
                              int(p1),
                              int(p2),
                              name=motif.name,
                              orientation=strand,
                              data=str(norm_score)))

    if sort:
        grs.sort()

    return grs

Exemplo n.º 14

0

Exibir arquivo

    def chip_evaluate(self):
        """
        This evaluation methodology uses motif-predicted binding sites (MPBSs) together with TF ChIP-seq data
        to evaluate the footprint predictions.

        return:
        """

        # Evaluate Statistics
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        roc_auc_1 = dict()
        roc_auc_2 = dict()
        recall = dict()
        precision = dict()
        prc_auc = dict()

        if "SEG" in self.footprint_type:
            mpbs_regions = GenomicRegionSet("TFBS")
            mpbs_regions.read_bed(self.tfbs_file)
            mpbs_regions.sort()

            # Verifying the maximum score of the MPBS file
            max_score = -99999999
            for region in iter(mpbs_regions):
                score = int(region.data)
                if score > max_score:
                    max_score = score
            max_score += 1

        for i in range(len(self.footprint_file)):
            footprints_regions = GenomicRegionSet("Footprints Prediction")
            footprints_regions.read_bed(self.footprint_file[i])

            # Sort footprint prediction bed files
            footprints_regions.sort()

            if self.footprint_type[i] == "SEG":
                # Increasing the score of MPBS entry once if any overlaps found in the predicted footprints.
                increased_score_mpbs_regions = GenomicRegionSet(
                    "Increased Regions")
                intersect_regions = mpbs_regions.intersect(
                    footprints_regions, mode=OverlapType.ORIGINAL)
                for region in iter(intersect_regions):
                    region.data = str(int(region.data) + max_score)
                    increased_score_mpbs_regions.add(region)

                # Keep the score of remained MPBS entry unchanged
                without_intersect_regions = mpbs_regions.subtract(
                    footprints_regions, whole_region=True)
                for region in iter(without_intersect_regions):
                    increased_score_mpbs_regions.add(region)

                increased_score_mpbs_regions.sort_score()

                fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[
                    i] = self.roc_curve(increased_score_mpbs_regions)
                recall[i], precision[i], prc_auc[
                    i] = self.precision_recall_curve(
                        increased_score_mpbs_regions)
            elif self.footprint_type[i] == "SC":
                footprints_regions.sort_score()
                fpr[i], tpr[i], roc_auc[i], roc_auc_1[i], roc_auc_2[
                    i] = self.roc_curve(footprints_regions)
                recall[i], precision[i], prc_auc[
                    i] = self.precision_recall_curve(footprints_regions)

        # Output the statistics results into text
        stats_fname = self.output_location + self.tf_name + "_stats.txt"
        stats_header = ["METHOD", "AUC_100", "AUC_10", "AUC_1", "AUPR"]
        with open(stats_fname, "w") as stats_file:
            stats_file.write("\t".join(stats_header) + "\n")
            for i in range(len(self.footprint_name)):
                stats_file.write(self.footprint_name[i] + "\t" +
                                 str(roc_auc[i]) + "\t" + str(roc_auc_1[i]) +
                                 "\t" + str(roc_auc_2[i]) + "\t" +
                                 str(prc_auc[i]) + "\n")

        # Output the curves
        if self.print_roc_curve:
            label_x = "False Positive Rate"
            label_y = "True Positive Rate"
            curve_name = "ROC"
            self.plot_curve(fpr, tpr, roc_auc, label_x, label_y, self.tf_name,
                            curve_name)
        if self.print_pr_curve:
            label_x = "Recall"
            label_y = "Precision"
            curve_name = "PRC"
            self.plot_curve(recall, precision, prc_auc, label_x, label_y,
                            self.tf_name, curve_name)

        self.output_points(self.tf_name, fpr, tpr, recall, precision)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: tdf_regiontest.py Projeto: eggduzao/reg-gen

class RandomTest:
    def __init__(self, rna_fasta, rna_name, dna_region, organism, showdbs=False):
        self.organism = organism
        genome = GenomeData(organism)
        self.genome_path = genome.get_genome()
        # RNA: Path to the FASTA file
        self.rna_fasta = rna_fasta
        self.showdbs = showdbs

        rnas = SequenceSet(name="rna", seq_type=SequenceType.RNA)
        rnas.read_fasta(self.rna_fasta)
        if rna_name:
            self.rna_name = rna_name
        else:
            self.rna_name = rnas[0].name

        # DNA: GenomicRegionSet
        self.dna_region = GenomicRegionSet(name="target")
        self.dna_region.read_bed(dna_region)
        self.dna_region = self.dna_region.gene_association(organism=self.organism, show_dis=True)

        self.topDBD = []
        self.stat = OrderedDict(name=rna_name, genome=organism)
        self.stat["target_regions"] = str(len(self.dna_region))


    def get_rna_region_str(self, rna):
        """Getting the rna region from the information header with the pattern:
                REGION_chr3_51978050_51983935_-_"""
        self.rna_regions = get_rna_region_str(rna)
        if self.rna_regions and len(self.rna_regions[0]) == 5:
            self.rna_expression = float(self.rna_regions[0][-1])
        else:
            self.rna_expression = "n.a."

    def connect_rna(self, rna, temp):
        d = connect_rna(rna, temp, self.rna_name)
        self.stat["exons"] = str(d[0])
        self.stat["seq_length"] = str(d[1])
        self.rna_len = d[1]

    def target_dna(self, temp, remove_temp, cutoff, l, e, c, fr, fm, of, mf, par, obed=False):
        """Calculate the true counts of triplexes on the given dna regions"""
        self.triplexator_p = [ l, e, c, fr, fm, of, mf ]

        txp = find_triplex(rna_fasta=os.path.join(temp, "rna_temp.fa"), dna_region=self.dna_region,
                           temp=temp, organism=self.organism, remove_temp=remove_temp,
                           l=l, e=e, c=c, fr=fr, fm=fm, of=of, mf=mf, par=par, genome_path=self.genome_path,
                           prefix="targeted_region", dna_fine_posi=False)
        txp.merge_rbs(rm_duplicate=True, region_set=self.dna_region, asgene_organism=self.organism, cutoff=cutoff)
        self.txp = txp
        self.stat["DBSs_target_all"] = str(len(self.txp))
        txp.remove_duplicates()
        self.rbss = txp.merged_dict.keys()
        # if len(self.rbss) == 0:
        #     print("ERROR: No potential binding event. Please change the parameters.")
        #     sys.exit(1)

        txpf = find_triplex(rna_fasta=os.path.join(temp, "rna_temp.fa"), dna_region=self.dna_region,
                            temp=temp, organism=self.organism, remove_temp=remove_temp,
                            l=l, e=e, c=c, fr=fr, fm=fm, of=of, mf=mf, par=par, genome_path=self.genome_path,
                            prefix="dbs", dna_fine_posi=True)
        txpf.remove_duplicates()
        txpf.merge_rbs(rbss=self.rbss, rm_duplicate=True, asgene_organism=self.organism)
        self.txpf = txpf

        self.stat["DBSs_target_all"] = str(len(self.txpf))

        self.counts_tr = OrderedDict()
        self.counts_dbs = OrderedDict()

        for rbs in self.rbss:
            tr = len(self.txp.merged_dict[rbs])
            self.counts_tr[rbs] = [tr, len(self.dna_region) - tr]
            self.counts_dbs[rbs] = len(self.txpf.merged_dict[rbs])

        self.region_dbd = self.txpf.sort_rbs_by_regions(self.dna_region)

        self.region_dbs = self.txpf.sort_rd_by_regions(regionset=self.dna_region)
        self.region_dbsm = {}
        self.region_coverage = {}

        for region in self.dna_region:
            self.region_dbsm[region.toString()] = self.region_dbs[region.toString()].get_dbs().merge(w_return=True)
            self.region_coverage[region.toString()] = float(self.region_dbsm[region.toString()].total_coverage()) / len \
                (region)
        self.stat["target_regions"] = str(len(self.dna_region))

        if obed:
            # btr = self.txp.get_dbs()
            # btr = btr.gene_association(organism=self.organism, show_dis=True)
            # btr.write_bed(os.path.join(temp, obed + "_target_region_dbs.bed"))
            # dbss = txpf.get_dbs()
            # dbss.write_bed(os.path.join(temp, obed + "_dbss.bed"))

            # output = self.dna_region.gene_association(organism=self.organism, show_dis=True)

            self.txp.write_bed(filename=os.path.join(temp, obed + "_target_region_dbs.bed"),
                               dbd_tag=False,
                               remove_duplicates=False, associated=self.organism)
            self.txpf.write_bed(filename=os.path.join(temp, obed + "_dbss.bed"),
                                remove_duplicates=False)


    def random_test(self, repeats, temp, remove_temp, l, e, c, fr, fm, of, mf, rm, par, filter_bed, alpha):
        """Perform randomization for the given times"""
        self.repeats = repeats
        marks = numpy.round(numpy.linspace(0, repeats - 1, num=41)).tolist()
        print("random_test")
        print(par)
        # Prepare the input lists for multiprocessing
        mp_input = []
        for i in range(repeats):
            mp_input.append([str(i), os.path.join(temp, "rna_temp.fa"), self.dna_region,
                             temp, self.organism, self.rbss, str(marks.count(i)),
                             str(l), str(e), str(c), str(fr), str(fm), str(of), str(mf), str(rm),
                             filter_bed, self.genome_path, par])
        # Multiprocessing
        print("\t\t|0%                  |                100%|")
        print("\t\t[", end="")
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()-2)
        mp_output = pool.map(random_each, mp_input)
        # print(mp_output)
        pool.close()
        pool.join()
        print("]")

        # Processing the result
        self.region_matrix = []
        self.dbss_matrix = []
        self.data = {"region": {"ave": [],
                                "sd": [],
                                "p": [],
                                "sig_region": [],
                                "sig_boolean": []},
                     "dbs": {"ave": [],
                             "sd": [],
                             "p": [],
                             "sig_region": [],
                             "sig_boolean": []}}

        region_counts = [v[0] for v in mp_output]
        dbss_counts = [v[1] for v in mp_output]

        for i, rbs in enumerate(self.rbss):

            counts_regions = [v[i] for v in region_counts]

            self.data["region"]["ave"].append(numpy.mean(counts_regions))
            self.data["region"]["sd"].append(numpy.std(counts_regions))
            num_sig = len([h for h in counts_regions if h > self.counts_tr[rbs][0]])
            p_region = float(num_sig) / repeats
            self.data["region"]["p"].append(p_region)
            self.region_matrix.append(counts_regions)

            if p_region < alpha:
                self.data["region"]["sig_region"].append(rbs)
                self.data["region"]["sig_boolean"].append(True)
            else:
                self.data["region"]["sig_boolean"].append(False)

            try:
                if p_region < self.topDBD[1]: self.topDBD = [rbs.str_rna(pa=False), p_region]
            except:
                self.topDBD = [rbs.str_rna(pa=False), p_region]

            # Analysis based on DBSs
            if self.showdbs:
                counts_dbss = [v[i] for v in dbss_counts]

                self.data["dbs"]["ave"].append(numpy.mean(counts_dbss))
                self.data["dbs"]["sd"].append(numpy.std(counts_dbss))
                num_sig = len([h for h in counts_dbss if h > self.counts_dbs[rbs]])
                p_dbs = float(num_sig) / repeats
                self.data["dbs"]["p"].append(p_dbs)
                self.dbss_matrix.append(counts_dbss)
                if p_dbs < alpha:
                    self.data["dbs"]["sig_region"].append(rbs)
                    self.data["dbs"]["sig_boolean"].append(True)
                else:
                    self.data["dbs"]["sig_boolean"].append(False)
            try:
                self.stat["p_value"] = str(min(self.data["region"]["p"]))
            except:
                self.stat["p_value"] = "1"

        self.region_matrix = numpy.array(self.region_matrix)

        if self.showdbs: self.dbss_matrix = numpy.array(self.dbss_matrix)

        counts_dbss = [v[i] for v in dbss_counts]
        self.stat["DBSs_random_ave"] = numpy.mean(counts_dbss)
        try: self.stat["p_value"] = str(min(self.data["region"]["p"]))
        except: self.stat["p_value"] = "1"

    def dbd_regions(self, sig_region, output):
        """Generate the BED file of significant DBD regions and FASTA file of the sequences"""
        dbd_regions(exons=self.rna_regions, sig_region=sig_region, rna_name=self.rna_name, output=output)

        self.stat["DBD_all"] = str(len(self.rbss))
        self.stat["DBD_sig"] = str(len(self.data["region"]["sig_region"]))

        sigDBD = GenomicRegionSet("DBD_sig")
        sigDBD.sequences = self.data["region"]["sig_region"]
        rbss = self.txp.get_rbs()
        overlaps = rbss.intersect(y=sigDBD, mode=OverlapType.ORIGINAL)
        self.stat["DBSs_target_DBD_sig"] = str(len(overlaps))


    def lineplot(self, txp, dirp, ac, cut_off, log, ylabel, linelabel, showpa, sig_region, filename):
        """Generate lineplot for RNA"""

        lineplot(txp=txp, rnalen=self.rna_len, rnaname=self.rna_name, dirp=dirp, sig_region=sig_region,
                 cut_off=cut_off, log=log, ylabel=ylabel, linelabel=linelabel,
                 filename=filename, ac=ac, showpa=showpa)

    def boxplot(self, dir, matrix, sig_region, truecounts, sig_boolean, ylabel, filename):
        """Generate the visualized plot"""
        tick_size = 8
        label_size = 9

        f, ax = plt.subplots(1, 1, dpi=300, figsize=(6, 4))
        max_y = int(max([matrix.max()] + truecounts) * 1.1) + 1
        min_y = max(int(matrix.min() * 0.9) - 1, 0)

        # Significant DBD
        rect = patches.Rectangle(xy=(1, 0), width=0.8, height=max_y, facecolor=sig_color,
                                 edgecolor="none", alpha=0.5, lw=None, label="Significant DBD")
        for i, r in enumerate(sig_boolean):
            if r:
                rect = patches.Rectangle(xy=(i + 0.6, min_y), width=0.8, height=max_y, facecolor=sig_color,
                                         edgecolor="none", alpha=0.5, lw=None, label="Significant DBD")
                ax.add_patch(rect)

        # Plotting

        bp = ax.boxplot(matrix.transpose(), notch=False, sym='o', vert=True,
                        whis=1.5, positions=None, widths=None,
                        patch_artist=True, bootstrap=None)
        z = 10
        plt.setp(bp['boxes'], color=nontarget_color, alpha=1, edgecolor="none")
        plt.setp(bp['whiskers'], color='black', linestyle='-', linewidth=1, zorder=z, alpha=1)
        plt.setp(bp['fliers'], markerfacecolor='gray', color='white', alpha=0.3, markersize=1.8, zorder=z)
        plt.setp(bp['caps'], color='white', zorder=-1)
        plt.setp(bp['medians'], color='black', linewidth=1.5, zorder=z + 1)

        # Plot target regions
        plt.plot(range(1, len(self.rbss) + 1), truecounts, markerfacecolor=target_color,
                 marker='o', markersize=5, linestyle='None', markeredgecolor="white", zorder=z + 5)

        ax.set_xlabel(self.rna_name + " DNA Binding Domains", fontsize=label_size)
        ax.set_ylabel(ylabel, fontsize=label_size, rotation=90)

        ax.set_ylim([min_y, max_y])
        ax.yaxis.set_major_locator(MaxNLocator(integer=True))

        ax.set_xticklabels([dbd.str_rna(pa=False) for dbd in self.rbss], rotation=35,
                           ha="right", fontsize=tick_size)
        for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(tick_size)

        for spine in ['top', 'right']:
            ax.spines[spine].set_visible(False)
        ax.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='on')
        ax.tick_params(axis='y', which='both', left='on', right='off', labelbottom='off')

        # Legend
        dot_legend, = plt.plot([1, 1], color=target_color, marker='o', markersize=5, markeredgecolor="white",
                               linestyle='None')
        bp_legend, = plt.plot([1, 1], color=nontarget_color, linewidth=6, alpha=1)

        ax.legend([dot_legend, bp_legend, rect], ["Target Regions", "Non-target regions", "Significant DBD"],
                  bbox_to_anchor=(0., 1.02, 1., .102), loc=2, mode="expand", borderaxespad=0.,
                  prop={'size': 9}, ncol=3, numpoints=1)
        bp_legend.set_visible(False)
        dot_legend.set_visible(False)

        # f.tight_layout(pad=1.08, h_pad=None, w_pad=None)
        f.savefig(os.path.join(dir, filename + ".png"), facecolor='w', edgecolor='w',
                  bbox_extra_artists=(plt.gci()), bbox_inches='tight', dpi=300)
        # PDF
        for tick in ax.xaxis.get_major_ticks():
            tick.label.set_fontsize(12)
        for tick in ax.yaxis.get_major_ticks():
            tick.label.set_fontsize(12)
        ax.xaxis.label.set_size(14)
        ax.yaxis.label.set_size(14)

        pp = PdfPages(os.path.join(dir, filename + '.pdf'))
        pp.savefig(f, bbox_extra_artists=(plt.gci()), bbox_inches='tight')
        pp.close()

    def gen_html(self, directory, parameters, obed, align=50, alpha=0.05, score=False):
        """Generate the HTML file"""
        dir_name = os.path.basename(directory)
        html_header = "Genomic Region Test: " + dir_name
        link_ds = OrderedDict()
        link_ds["RNA"] = "index.html"
        link_ds["Sig Target Regions"] = "starget_regions.html"
        link_ds["Target Regions"] = "target_regions.html"
        link_ds["Parameters"] = "parameters.html"

        ##################################################
        # index.html

        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")
        # Plots
        html.add_figure("lineplot_region.png", align="left", width="45%", more_images=["boxplot_regions.png"])
        if self.showdbs:
            html.add_figure("lineplot_dbs.png", align="left", width="45%", more_images=["boxplot_dbs.png"])

        if self.showdbs:
            header_list = [["#", "DBD", "Target Regions", None, "Non-target Regions", None, "Statistics",
                            "Target Regions", "Non-target Regions", None, "Statistics"],
                           ["", "", "with DBS", "without DBS", "with DBS (average)", "s.d.", "<i>p</i>-value",
                            "NO. DBSs", "NO. DBSs (average)", "s.d.", "<i>p</i>-value"]]
            header_titles = [["Rank", "DNA Binding Domain", "Given target regions on DNA", None,
                              "Regions from randomization", None, "Statistics based on target regions",
                              "Given target regions on DNA", "Regions from randomization", None,
                              "Statistics based on DNA Binding Sites"],
                             ["", "",
                              "Number of target regions with DBS binding",
                              "Number of target regions without DBS binding",
                              "Average number of regions from randomization with DBS binding",
                              "Standard deviation", "P value",
                              "Number of related DNA Binding Sites binding to target regions",
                              "Average number of DNA Binding Sites binding to random regions",
                              "Standard deviation", "P-value"]]
            border_list = [" style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:2pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\""]
        else:
            header_list = [["#", "DBD", "Target Regions", None, "Non-target Regions", None, "Statistics", None],
                           ["", "", "with DBS", "without DBS", "with DBS (average)", "s.d.", "<i>p</i>-value",
                            "z-score"]]
            header_titles = [["Rank", "DNA Binding Domain", "Given target regions on DNA", None,
                              "Regions from randomization", None, "Statistics based on target regions", None],
                             ["", "",
                              "Number of target regions with DBS binding",
                              "Number of target regions without DBS binding",
                              "Average number of regions from randomization with DBS binding",
                              "Standard deviation", "P value", "Z-score"]]
            border_list = [" style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", ""]

        type_list = 'ssssssssssssssss'
        col_size_list = [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
        data_table = []

        for i, rbs in enumerate(self.rbss):
            if self.data["region"]["p"][i] < alpha:
                p_region = "<font color=\"red\">" + value2str(self.data["region"]["p"][i]) + "</font>"

            else:
                p_region = value2str(self.data["region"]["p"][i])
            zs = (self.counts_tr[rbs][0] - self.data["region"]["ave"][i]) / self.data["region"]["sd"][i]
            new_line = [str(i + 1),
                        rbs.str_rna(pa=False),
                        '<a href="dbd_region.html#' + rbs.str_rna() +
                        '" style="text-align:left">' + str(self.counts_tr[rbs][0]) + '</a>',
                        str(self.counts_tr[rbs][1]),
                        value2str(self.data["region"]["ave"][i]),
                        value2str(self.data["region"]["sd"][i]),
                        p_region,
                        value2str(zs)]
            if self.showdbs:
                if self.data["dbs"]["p"][i] < alpha:
                    p_dbs = "<font color=\"red\">" + value2str(self.data["dbs"]["p"][i]) + "</font>"
                else:
                    p_dbs = value2str(self.data["dbs"]["p"][i])

                new_line += [str(self.counts_dbs[rbs]),
                             value2str(self.data["dbs"]["ave"][i]),
                             value2str(self.data["dbs"]["sd"][i]),
                             p_dbs]
            data_table.append(new_line)

        data_table = natsort.natsorted(data_table, key=lambda x: x[6])
        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                             auto_width=True, header_titles=header_titles, border_list=border_list, sortable=True)

        html.add_heading("Notes")
        html.add_list(["RNA name: " + self.rna_name,
                       "Randomization is performed for " + str(self.repeats) + " times.",
                       "DBD stands for DNA Binding Domain on RNA.",
                       "DBS stands for DNA Binding Site on DNA."])
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "index.html"))

        #############################################################
        # RNA subpage: Profile of targeted regions for each merged DNA Binding Domain
        #############################################################

        header_list = ["#", "Target Region",
                       "Associated Gene",
                       "No. of DBSs",
                       "DBS coverage"]
        header_titles = ["Rank", "Given target regions from BED files",
                         "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
                         "Number of DNA Binding Sites locate within the region",
                         "The proportion of the region covered by DBS binding"]

        #########################################################
        # dbd_region.html
        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")

        for rbsm in self.rbss:
            html.add_heading("DNA Binding Domain: " + rbsm.str_rna(),
                             idtag=rbsm.str_rna())
            data_table = []
            for i, region in enumerate(self.txp.merged_dict[rbsm]):
                # Add information
                data_table.append([str(i + 1),
                                   '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' + self.organism +
                                   "&position=" + region.chrom + "%3A" + str(region.initial) + "-" + str(region.final) +
                                   '" style="text-align:left">' + region.toString(space=True) + '</a>',
                                   split_gene_name(gene_name=region.name, org=self.organism),
                                   str(len(self.region_dbs[region.toString()])),
                                   value2str(self.region_coverage[region.toString()])
                                   ])

            html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                                 auto_width=True, header_titles=header_titles, sortable=True)
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "dbd_region.html"))

        #############################################################
        # Targeted regions centered
        #############################################################

        ##############################################################################################
        # target_regions.html
        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")

        if score:
            header_list = ["#", "Target region", "Associated Gene", "DBSs Count",
                           "DBS coverage", "Score", "Sum of ranks"]
            header_titles = ["Rank",
                             "Target regions loaded from the given BED file",
                             "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
                             "Number of DNA Binding Sites within the region",
                             "The proportion of the region covered by DBS binding",
                             "Scores from BED file",
                             "Sum of all the left-hand-side ranks"]
        else:
            header_list = ["#", "Target region", "Associated Gene", "DBSs Count",
                           "DBS coverage", "Sum of ranks"]
            header_titles = ["Rank",
                             "Target regions loaded from the given BED file",
                             "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
                             "Number of DNA Binding Sites within the region",
                             "The proportion of the region covered by DBS binding",
                             "Sum of all the left-hand-side ranks"]
        html.add_heading("Target Regions")
        data_table = []

        if not self.dna_region.sorted: self.dna_region.sort()

        # Calculate the ranking
        rank_count = len(self.dna_region) - rank_array([len(self.region_dbs[p.toString()]) for p in self.dna_region])
        rank_coverage = len(self.dna_region) - rank_array([self.region_coverage[p.toString()] for p in self.dna_region])

        if score:
            try:
                score_list = [float(p.data.split("\t")[0]) for p in self.dna_region]
                rank_score = len(self.dna_region) - rank_array([abs(s) for s in score_list])
                rank_sum = [x + y + z for x, y, z in zip(rank_count, rank_coverage, rank_score)]
                # sum_rank = rank_array(rank_sum)  # method='min'
            except ImportError:
                print("There is no score in BED file, please don't use '-score' argument.")
        else:
            rank_sum = [x + y for x, y in zip(rank_count, rank_coverage)]
            sum_rank = rank_array(rank_sum)

        for i, region in enumerate(self.dna_region):
            dbs_counts = str(len(self.region_dbs[region.toString()]))
            dbs_cover = value2str(self.region_coverage[region.toString()])

            newline = [str(i + 1),
                       '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' + self.organism +
                       "&position=" + region.chrom + "%3A" + str(region.initial) + "-" + str(region.final) +
                       '" style="text-align:left">' + region.toString(space=True) + '</a>',
                       split_gene_name(gene_name=region.name, org=self.organism),
                       '<a href="region_dbs.html#' + region.toString() +
                       '" style="text-align:left">' + dbs_counts + '</a>',
                       dbs_cover]

            if score:
                dbs_score = value2str(score_list[i])
                region.data = "\t".join([dbs_counts, dbs_cover, dbs_score, str(rank_sum[i])])
                newline.append(dbs_score)
                newline.append(str(rank_sum[i]))
            else:
                region.data = "\t".join([dbs_counts, dbs_cover, str(rank_sum[i])])
                newline.append(str(rank_sum[i]))
            data_table.append(newline)

        data_table = natsort.natsorted(data_table, key=lambda x: x[-1])
        # data_table = sorted(data_table, key=lambda x: x[-1])
        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                             auto_width=True, header_titles=header_titles, sortable=True)
        html.add_heading("Notes")
        html.add_list(["All target regions without any bindings are ignored."])
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "target_regions.html"))

        self.dna_region.sort_score()
        self.dna_region.write_bed(os.path.join(directory, obed + "_target_regions.bed"))



        ##############################################################################################
        # starget_regions.html    for significant target regions

        stargets = GenomicRegionSet("sig_targets")
        sig_dbs = {}
        sig_dbs_coverage = {}
        for i, r in enumerate(self.dna_region):
            sig_bindings = self.region_dbs[r.toString()].overlap_rbss(rbss=self.data["region"]["sig_region"])
            dbs = sig_bindings.get_dbs()
            if len(dbs) > 0:
                stargets.add(r)
                m_dbs = dbs.merge(w_return=True)
                sig_dbs[r] = len(dbs)
                # self.promoter["de"]["merged_dbs"][promoter.toString()] = len(m_dbs)
                sig_dbs_coverage[r] = float(m_dbs.total_coverage()) / len(r)

        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")

        # Select promoters in sig DBD
        if len(self.data["region"]["sig_region"]) == 0:
            html.add_heading("There is no significant DBD.")
        else:
            html.add_heading("Target regions bound by significant DBD")
            data_table = []
            # Calculate the ranking
            rank_count = len(stargets) - rank_array([sig_dbs[p] for p in stargets])
            rank_coverage = len(stargets) - rank_array([sig_dbs_coverage[p] for p in stargets])
            if score:
                score_list = [float(p.data.split("\t")[0]) for p in stargets]
                rank_score = len(stargets) - rank_array([abs(s) for s in score_list])
                rank_sum = [x + y + z for x, y, z in zip(rank_count, rank_coverage, rank_score)]
                sum_rank = rank_array(rank_sum)  # method='min'
            else:
                rank_sum = [x + y for x, y in zip(rank_count, rank_coverage)]
                sum_rank = rank_array(rank_sum)

            for i, region in enumerate(stargets):
                dbssount = '<a href="region_dbs.html#' + region.toString() + \
                           '" style="text-align:left">' + str(sig_dbs[region]) + '</a>'

                region_link = region_link_internet(self.organism, region)

                newline = [str(i + 1), region_link,
                           split_gene_name(gene_name=region.name, org=self.organism),
                           dbssount, value2str(sig_dbs_coverage[region]) ]
                if score:
                    dbs_score = value2str(score_list[i])
                    # region.data = "\t".join([dbs_counts, dbs_cover, dbs_score, str(sum_rank[i])])
                    newline.append(dbs_score)
                    newline.append(str(rank_sum[i]))
                    # print([dbs_score, str(sum_rank[i])])
                else:
                    # region.data = "\t".join([dbs_counts, dbs_cover, str(sum_rank[i])])
                    newline.append(str(rank_sum[i]))

                # newline += ["<i>" + str(rank_sum[i]) + "</i>"]
                # print(newline)
                data_table.append(newline)

            # print(data_table)
            # data_table = sorted(data_table, key=lambda x: x[-1])
            data_table = natsort.natsorted(data_table, key=lambda x: x[-1])
            html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                                 header_titles=header_titles, border_list=None, sortable=True)
            html.add_heading("Notes")
            html.add_list(["DBS stands for DNA Binding Site on DNA.",
                           "DBS coverage is the proportion of the region where has potential to form triple helices with the given RNA."])
            html.add_fixed_rank_sortable()
            html.write(os.path.join(directory, "starget_regions.html"))

        ############################
        # Subpages for targeted region centered page
        # region_dbs.html
        header_list = ["RBS", "DBS", "Strand", "Score", "Motif", "Orientation"]

        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")

        for i, region in enumerate(self.dna_region):
            if len(self.region_dbs[region.toString()]) == 0:
                continue
            else:
                html.add_heading("Associated gene: " + split_gene_name(gene_name=region.name, org=self.organism),
                                 idtag=region.toString())
                html.add_free_content(['<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' + self.organism +
                                       "&position=" + region.chrom + "%3A" + str(region.initial) +
                                       "-" + str(region.final) + '" style="margin-left:50">' +
                                       region.toString(space=True) + '</a>'])
                data_table = []
                for rd in self.region_dbs[region.toString()]:
                    rbs = rd.rna.str_rna(pa=False)
                    for rbsm in self.data["region"]["sig_region"]:
                        # rbsm = rbsm.partition(":")[2].split("-")
                        if rd.rna.overlap(rbsm):
                            rbs = "<font color=\"red\">" + rbs + "</font>"
                    data_table.append([rbs,
                                       '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' + self.organism +
                                       "&position=" + rd.dna.chrom + "%3A" + str(rd.dna.initial) + "-" + str(
                                           rd.dna.final) +
                                       '" style="text-align:left">' + rd.dna.toString(space=True) + '</a>',
                                       rd.dna.orientation, rd.score, rd.motif, rd.orient])
                html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                                     auto_width=True)
        html.write(os.path.join(directory, "region_dbs.html"))

        ###############################################################################33
        ################ Parameters.html

        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")
        html.add_heading("Parameters")
        header_list = ["Description", "Arguments", "Value"]

        data_table = [["RNA sequence name", "-rn", parameters.rn],
                      ["Input RNA sequence file", "-r", os.path.basename(parameters.r)],
                      ["Input BED file", "-bed", os.path.basename(parameters.bed)],
                      ["Output directory", "-o", os.path.basename(parameters.o)],
                      ["Organism", "-organism", parameters.organism],
                      ["Number of repitetion of andomization", "-n", str(parameters.n)],
                      ["Alpha level for rejection p value", "-a", str(parameters.a)],
                      ["Cut off value for filtering out the low counts of DBSs", "-ccf", str(parameters.ccf)],
                      ["Remove temporary files", "-rt", str(parameters.rt)],
                      ["Input BED file for masking in randomization", "-f", str(parameters.f)],
                      ["Input file for RNA accecibility", "-ac", str(parameters.ac)],
                      ["Cut off value for RNA accecibility", "-accf", str(parameters.accf)],
                      ["Output the BED files for DNA binding sites.", "-obed", str(parameters.obed)],
                      ["Show parallel and antiparallel bindings in the plot separately.", "-showpa",
                       str(parameters.showpa)],
                      ["Minimum length", "-l", str(self.triplexator_p[0])],
                      ["Maximum error rate", "-e", str(self.triplexator_p[1])],
                      ["Tolerated number of consecutive errors", "-c", str(self.triplexator_p[2])],
                      ["Filtering repeats", "-fr", str(self.triplexator_p[3])],
                      ["Filtering mode", "-fm", str(self.triplexator_p[4])],
                      ["Output format", "-of", str(self.triplexator_p[5])],
                      ["Merge features", "-mf", str(self.triplexator_p[6])]]
        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                             auto_width=True)
        html.add_free_content(['<a href="summary.txt" style="margin-left:100">See details</a>'])
        html.write(os.path.join(directory, "parameters.html"))