Пример #1
0
def main():
    try:
        description = [
            '~~~CRISPRessoCompare~~~',
            '-Comparison of two CRISPResso analyses-'
        ]
        compare_header = r'''
 ___________________________
| __ __      __      __  __ |
|/  /  \|\/||__) /\ |__)|_  |
|\__\__/|  ||   /--\| \ |__ |
|___________________________|
        '''
        compare_header = CRISPRessoShared.get_crispresso_header(
            description, compare_header)
        print(compare_header)

        parser = argparse.ArgumentParser(
            description='CRISPRessoCompare Parameters',
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        parser.add_argument(
            'crispresso_output_folder_1',
            type=str,
            help='First output folder with CRISPResso analysis')
        parser.add_argument(
            'crispresso_output_folder_2',
            type=str,
            help='Second output folder with CRISPResso analysis')

        #OPTIONALS
        parser.add_argument('-n', '--name', help='Output name', default='')
        parser.add_argument('-n1',
                            '--sample_1_name',
                            help='Sample 1 name',
                            default='Sample_1')
        parser.add_argument('-n2',
                            '--sample_2_name',
                            help='Sample 2 name',
                            default='Sample_2')
        parser.add_argument('-o', '--output_folder', help='', default='')
        parser.add_argument(
            '--min_frequency_alleles_around_cut_to_plot',
            type=float,
            help=
            'Minimum %% reads required to report an allele in the alleles table plot.',
            default=0.2)
        parser.add_argument(
            '--max_rows_alleles_around_cut_to_plot',
            type=int,
            help='Maximum number of rows to report in the alleles table plot. ',
            default=50)
        parser.add_argument(
            '--save_also_png',
            help='Save also .png images additionally to .pdf files',
            action='store_true')
        parser.add_argument('--debug',
                            help='Show debug messages',
                            action='store_true')

        args = parser.parse_args()
        debug_flag = args.debug

        #check that the CRISPResso output is present and fill amplicon_info
        quantification_file_1, amplicon_names_1, amplicon_info_1 = CRISPRessoShared.check_output_folder(
            args.crispresso_output_folder_1)
        quantification_file_2, amplicon_names_2, amplicon_info_2 = CRISPRessoShared.check_output_folder(
            args.crispresso_output_folder_2)

        get_name_from_folder = lambda x: os.path.basename(os.path.abspath(
            x)).replace('CRISPResso_on_', '')

        if not args.name:
            database_id = '%s_VS_%s' % (
                get_name_from_folder(args.crispresso_output_folder_1),
                get_name_from_folder(args.crispresso_output_folder_2))
        else:
            database_id = args.name

        OUTPUT_DIRECTORY = 'CRISPRessoCompare_on_%s' % database_id

        if args.output_folder:
            OUTPUT_DIRECTORY = os.path.join(
                os.path.abspath(args.output_folder), OUTPUT_DIRECTORY)

        _jp = lambda filename: os.path.join(
            OUTPUT_DIRECTORY, filename
        )  #handy function to put a file in the output directory
        log_filename = _jp('CRISPRessoCompare_RUNNING_LOG.txt')

        try:
            info('Creating Folder %s' % OUTPUT_DIRECTORY)
            os.makedirs(OUTPUT_DIRECTORY)
            info('Done!')
        except:
            warn('Folder %s already exists.' % OUTPUT_DIRECTORY)

        log_filename = _jp('CRISPRessoCompare_RUNNING_LOG.txt')
        logging.getLogger().addHandler(logging.FileHandler(log_filename))

        with open(log_filename, 'w+') as outfile:
            outfile.write(
                '[Command used]:\nCRISPRessoCompare %s\n\n[Execution log]:\n' %
                ' '.join(sys.argv))

        #LOAD DATA
        amplicon_names_in_both = [
            amplicon_name for amplicon_name in amplicon_names_1
            if amplicon_name in amplicon_names_2
        ]
        n_refs = len(amplicon_names_in_both)

        def get_plot_title_with_ref_name(plotTitle, refName):
            if n_refs > 1:
                return (plotTitle + ": " + refName)
            return plotTitle

        for amplicon_name in amplicon_names_in_both:
            profile_1 = parse_profile(
                amplicon_info_1[amplicon_name]['quantification_file'])
            profile_2 = parse_profile(
                amplicon_info_2[amplicon_name]['quantification_file'])

            try:
                assert np.all(profile_1[:, 0] == profile_2[:, 0])
            except:
                raise DifferentAmpliconLengthException(
                    'Different amplicon lengths for the two amplicons.')
            len_amplicon = profile_1.shape[0]
            effect_vector_any_1 = profile_1[:, 1]
            effect_vector_any_2 = profile_2[:, 1]
            cut_points, sgRNA_intervals = load_cut_points_sgRNA_intervals(
                args.crispresso_output_folder_1, amplicon_name)

            #Quantification comparison barchart
            fig = plt.figure(figsize=(30, 15))
            n_groups = 2

            N_TOTAL_1 = float(amplicon_info_1[amplicon_name]['Total'])
            N_UNMODIFIED_1 = float(
                amplicon_info_1[amplicon_name]['Unmodified'])
            N_MODIFIED_1 = float(amplicon_info_1[amplicon_name]['Modified'])

            N_TOTAL_2 = float(amplicon_info_2[amplicon_name]['Total'])
            N_UNMODIFIED_2 = float(
                amplicon_info_2[amplicon_name]['Unmodified'])
            N_MODIFIED_2 = float(amplicon_info_2[amplicon_name]['Modified'])

            means_sample_1 = np.array([N_UNMODIFIED_1, N_MODIFIED_1
                                       ]) / N_TOTAL_1 * 100
            means_sample_2 = np.array([N_UNMODIFIED_2, N_MODIFIED_2
                                       ]) / N_TOTAL_2 * 100

            ax1 = fig.add_subplot(1, 2, 1)

            index = np.arange(n_groups)
            bar_width = 0.35

            opacity = 0.4
            error_config = {'ecolor': '0.3'}

            rects1 = ax1.bar(index,
                             means_sample_1,
                             bar_width,
                             alpha=opacity,
                             color=(0, 0, 1, 0.4),
                             label=args.sample_1_name)

            rects2 = ax1.bar(index + bar_width,
                             means_sample_2,
                             bar_width,
                             alpha=opacity,
                             color=(1, 0, 0, 0.4),
                             label=args.sample_2_name)

            plt.ylabel('% Sequences')
            plt.title(
                get_plot_title_with_ref_name(
                    '%s VS %s' % (args.sample_1_name, args.sample_2_name),
                    amplicon_name))
            plt.xticks(index + bar_width / 2.0, ('Unmodified', 'Modified'))
            plt.legend()
            #            plt.xlim(index[0]-0.2,(index + bar_width)[-1]+bar_width+0.2)
            plt.tight_layout()

            ax2 = fig.add_subplot(1, 2, 2)
            ax2.bar(index,
                    means_sample_1 - means_sample_2,
                    bar_width + 0.35,
                    alpha=opacity,
                    color=(0, 1, 1, 0.4),
                    label='')

            plt.ylabel('% Sequences Difference')
            plt.title(
                get_plot_title_with_ref_name(
                    '%s - %s' % (args.sample_1_name, args.sample_2_name),
                    amplicon_name))
            plt.xticks(index, ['Unmodified', 'Modified'])

            #            plt.xlim(index[0]-bar_width/2, (index+bar_width)[-1]+2*bar_width)
            plt.tight_layout()
            plt.savefig(_jp('1.' + amplicon_name +
                            '.Comparison_Efficiency.pdf'),
                        bbox_inches='tight')
            if args.save_also_png:
                plt.savefig(_jp('1.' + amplicon_name +
                                '.Comparison_Efficiency.png'),
                            bbox_inches='tight')

            #profile comparion
            fig = plt.figure(figsize=(20, 10))

            ax1 = fig.add_subplot(1, 2, 1)
            plt.title(
                get_plot_title_with_ref_name('Mutation position distribution',
                                             amplicon_name))
            y_max = max(effect_vector_any_1.max(),
                        effect_vector_any_2.max()) * 1.2

            plt.plot(effect_vector_any_1,
                     color=(0, 0, 1, 0.3),
                     lw=4,
                     label='%s combined mutations' % args.sample_1_name)
            #            plt.hold(True)
            plt.plot(effect_vector_any_2,
                     color=(1, 0, 0, 0.3),
                     lw=4,
                     label='%s combined mutations' % args.sample_2_name)

            if cut_points:
                for idx, cut_point in enumerate(cut_points):
                    if idx == 0:
                        plt.plot([cut_point, cut_point], [0, y_max],
                                 '--k',
                                 lw=2,
                                 label='Predicted cleavage position')
                    else:
                        plt.plot([cut_point, cut_point], [0, y_max],
                                 '--k',
                                 lw=2,
                                 label='_nolegend_')

                for idx, sgRNA_int in enumerate(sgRNA_intervals):
                    if idx == 0:
                        plt.plot([sgRNA_int[0], sgRNA_int[1]], [0, 0],
                                 lw=10,
                                 c=(0, 0, 0, 0.15),
                                 label='sgRNA')
                    else:
                        plt.plot([sgRNA_int[0], sgRNA_int[1]], [0, 0],
                                 lw=10,
                                 c=(0, 0, 0, 0.15),
                                 label='_nolegend_')

            lgd = plt.legend(loc='center',
                             bbox_to_anchor=(0.5, -0.3),
                             ncol=1,
                             fancybox=True,
                             shadow=False)

            plt.xticks(
                np.arange(0, len_amplicon,
                          max(3, (len_amplicon / 6) -
                              (len_amplicon / 6) % 5)).astype(int))
            plt.xlabel('Reference amplicon position (bp)')
            plt.ylabel('Sequences %')
            plt.ylim(0, max(1, y_max))
            plt.xlim(xmax=len_amplicon - 1)

            ax2 = fig.add_subplot(1, 2, 2)

            effect_vector_any_diff = effect_vector_any_1 - effect_vector_any_2

            y_max = effect_vector_any_diff.max() * 1.2
            y_min = effect_vector_any_diff.min() * 1.2

            plt.title(
                get_plot_title_with_ref_name(
                    '%s - %s' % (args.sample_1_name, args.sample_2_name),
                    amplicon_name))
            plt.plot(effect_vector_any_diff,
                     color=(0, 1, 0, 0.4),
                     lw=3,
                     label='Difference')

            if cut_points:
                for idx, cut_point in enumerate(cut_points):
                    if idx == 0:
                        plt.plot(
                            [cut_point, cut_point],
                            [min(-1, y_min), max(1, y_max)],
                            '--k',
                            lw=2,
                            label='Predicted cleavage position')
                    else:
                        plt.plot(
                            [cut_point, cut_point],
                            [min(-1, y_min), max(1, y_max)],
                            '--k',
                            lw=2,
                            label='_nolegend_')

                for idx, sgRNA_int in enumerate(sgRNA_intervals):
                    if idx == 0:
                        plt.plot(
                            [sgRNA_int[0], sgRNA_int[1]],
                            [min(-1, y_min), min(-1, y_min)],
                            lw=10,
                            c=(0, 0, 0, 0.15),
                            label='sgRNA')
                    else:
                        plt.plot(
                            [sgRNA_int[0], sgRNA_int[1]],
                            [min(-1, y_min), min(-1, y_min)],
                            lw=10,
                            c=(0, 0, 0, 0.15),
                            label='_nolegend_')

            lgd2 = plt.legend(loc='center',
                              bbox_to_anchor=(0.5, -0.2),
                              ncol=1,
                              fancybox=True,
                              shadow=False)
            plt.xticks(
                np.arange(0, len_amplicon,
                          max(3, (len_amplicon / 6) -
                              (len_amplicon / 6) % 5)).astype(int))
            plt.xlabel('Reference amplicon position (bp)')
            plt.ylabel('Sequences Difference %')
            plt.xlim(xmax=len_amplicon - 1)

            plt.ylim(min(-1, y_min), max(1, y_max))

            plt.savefig(_jp(
                '2.' + amplicon_name +
                '.Comparison_Combined_Insertion_Deletion_Substitution_Locations.pdf'
            ),
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight')
            if args.save_also_png:
                plt.savefig(_jp(
                    '2.' + amplicon_name +
                    '.Comparison_Insertion_Deletion_Substitution_Locations.png'
                ),
                            bbox_extra_artists=(lgd, ),
                            bbox_inches='tight')

            mod_file_1 = amplicon_info_1[amplicon_name][
                'modification_count_file']
            amp_seq_1, mod_freqs_1 = CRISPRessoShared.parse_count_file(
                mod_file_1)
            mod_file_2 = amplicon_info_2[amplicon_name][
                'modification_count_file']
            amp_seq_2, mod_freqs_2 = CRISPRessoShared.parse_count_file(
                mod_file_2)
            consensus_sequence = amp_seq_1
            if amp_seq_2 != consensus_sequence:
                raise DifferentAmpliconLengthException(
                    'Different amplicon lengths for the two amplicons.')

            for mod in [
                    'Insertions', 'Deletions', 'Substitutions',
                    'All_modifications'
            ]:

                mod_counts_1 = np.array(mod_freqs_1[mod], dtype=float)
                tot_counts_1 = np.array(mod_freqs_1['Total'], dtype=float)
                unmod_counts_1 = tot_counts_1 - mod_counts_1

                mod_counts_2 = np.array(mod_freqs_2[mod], dtype=float)
                tot_counts_2 = np.array(mod_freqs_2['Total'], dtype=float)
                unmod_counts_2 = tot_counts_2 - mod_counts_2

                fisher_results = [
                    stats.fisher_exact([[z[0], z[1]], [z[2], z[3]]])
                    if max(z) > 0 else [nan, 1.0]
                    for z in zip(mod_counts_1, unmod_counts_1, mod_counts_2,
                                 unmod_counts_2)
                ]
                oddsratios, pvalues = [a for a, b in fisher_results
                                       ], [b for a, b in fisher_results]

                mod_df = []
                row = [args.sample_1_name + '_' + mod]
                row.extend(mod_counts_1)
                mod_df.append(row)

                row = [args.sample_1_name + '_total']
                row.extend(tot_counts_1)
                mod_df.append(row)

                row = [args.sample_2_name + '_' + mod]
                row.extend(mod_counts_2)
                mod_df.append(row)

                row = [args.sample_2_name + '_total']
                row.extend(tot_counts_2)
                mod_df.append(row)

                row = ['odds_ratios']
                row.extend(oddsratios)
                mod_df.append(row)

                row = ['pvalues']
                row.extend(pvalues)
                mod_df.append(row)

                colnames = ['Reference']
                colnames.extend(list(consensus_sequence))
                mod_df = pd.DataFrame(mod_df, columns=colnames)
                #                mod_df = pd.concat([mod_df.iloc[:,0:2], mod_df.iloc[:,2:].apply(pd.to_numeric)],axis=1)
                #write to file
                mod_df.to_csv(_jp(amplicon_name + '.' + mod +
                                  '_quantification.txt'),
                              sep='\t',
                              index=None)

                #plot
                fig = plt.figure(figsize=(20, 10))
                ax1 = fig.add_subplot(2, 1, 1)

                diff = np.divide(mod_counts_1, tot_counts_1) - np.divide(
                    mod_counts_2, tot_counts_2)
                diff_plot = ax1.plot(diff,
                                     color=(0, 1, 0, 0.4),
                                     lw=3,
                                     label='Difference')
                ax1.set_title(
                    get_plot_title_with_ref_name(
                        '%s: %s - %s' %
                        (mod, args.sample_1_name, args.sample_2_name),
                        amplicon_name))
                ax1.set_xticks(
                    np.arange(
                        0, len_amplicon,
                        max(3, (len_amplicon / 6) -
                            (len_amplicon / 6) % 5)).astype(int))
                ax1.set_ylabel('Sequences Difference %')
                ax1.set_xlim(xmin=0, xmax=len_amplicon - 1)

                pvalues = np.array(pvalues)
                min_nonzero = np.min(pvalues[np.nonzero(pvalues)])
                pvalues[pvalues == 0] = min_nonzero
                #ax2 = ax1.twinx()
                ax2 = fig.add_subplot(2, 1, 2)
                pval_plot = ax2.plot(-1 * np.log10(pvalues),
                                     color=(1, 0, 0, 0.4),
                                     lw=2,
                                     label='-log10 P-value')
                ax2.set_ylabel('-log10 P-value')
                ax2.set_xlim(xmin=0, xmax=len_amplicon - 1)
                ax2.set_xticks(
                    np.arange(
                        0, len_amplicon,
                        max(3, (len_amplicon / 6) -
                            (len_amplicon / 6) % 5)).astype(int))
                ax2.set_xlabel('Reference amplicon position (bp)')

                #bonferroni correction
                corrected_p = -1 * np.log10(
                    0.01 / float(len(consensus_sequence)))
                cutoff_plot = ax2.plot([0, len(consensus_sequence)],
                                       [corrected_p, corrected_p],
                                       color='k',
                                       dashes=(5, 10),
                                       label='Bonferronni corrected cutoff')

                plots = diff_plot + pval_plot + cutoff_plot

                diff_y_min, diff_y_max = ax1.get_ylim()
                p_y_min, p_y_max = ax2.get_ylim()
                if cut_points:
                    for idx, cut_point in enumerate(cut_points):
                        if idx == 0:
                            plot_cleavage = ax1.plot(
                                [cut_point, cut_point],
                                [diff_y_min, diff_y_max],
                                '--k',
                                lw=2,
                                label='Predicted cleavage position')
                            ax2.plot([cut_point, cut_point],
                                     [p_y_min, p_y_max],
                                     '--k',
                                     lw=2,
                                     label='Predicted cleavage position')
                            plots = plots + plot_cleavage
                        else:
                            ax1.plot([cut_point, cut_point],
                                     [diff_y_min, diff_y_max],
                                     '--k',
                                     lw=2,
                                     label='_nolegend_')
                            ax2.plot([cut_point, cut_point],
                                     [diff_y_min, diff_y_max],
                                     '--k',
                                     lw=2,
                                     label='_nolegend_')

                    for idx, sgRNA_int in enumerate(sgRNA_intervals):
                        if idx == 0:
                            p2 = ax1.plot([sgRNA_int[0], sgRNA_int[1]],
                                          [diff_y_min, diff_y_min],
                                          lw=10,
                                          c=(0, 0, 0, 0.15),
                                          label='sgRNA')
                            ax2.plot([sgRNA_int[0], sgRNA_int[1]],
                                     [p_y_min, p_y_min],
                                     lw=10,
                                     c=(0, 0, 0, 0.15),
                                     label='sgRNA')
                            plots = plots + p2
                        else:
                            ax1.plot([sgRNA_int[0], sgRNA_int[1]],
                                     [diff_y_min, diff_y_min],
                                     lw=10,
                                     c=(0, 0, 0, 0.15),
                                     label='_nolegend_')
                            ax2.plot([sgRNA_int[0], sgRNA_int[1]],
                                     [p_y_min, p_y_min],
                                     lw=10,
                                     c=(0, 0, 0, 0.15),
                                     label='_nolegend_')

                labs = [p.get_label() for p in plots]
                lgd = plt.legend(plots,
                                 labs,
                                 loc='upper center',
                                 bbox_to_anchor=(0.5, -0.2),
                                 ncol=1,
                                 fancybox=True,
                                 shadow=False)

                plt.savefig(_jp('2.' + amplicon_name + '.' + mod +
                                '.quantification.pdf'),
                            bbox_extra_artists=(lgd, ),
                            bbox_inches='tight')
                if args.save_also_png:
                    plt.savefig(_jp('2.' + amplicon_name + '.' + mod +
                                    '.quantification.png'),
                                bbox_extra_artists=(lgd, ),
                                bbox_inches='tight')

            #create merged heatmaps for each cut site
            allele_files_1 = amplicon_info_1[amplicon_name]['allele_files']
            allele_files_2 = amplicon_info_2[amplicon_name]['allele_files']
            for allele_file_1 in allele_files_1:
                allele_file_1_name = os.path.split(allele_file_1)[
                    1]  #get file part of path
                for allele_file_2 in allele_files_2:
                    allele_file_2_name = os.path.split(allele_file_2)[
                        1]  #get file part of path
                    #if files are the same (same amplicon, cut site, guide), run comparison
                    if allele_file_1_name == allele_file_2_name:
                        df1 = pd.read_csv(allele_file_1, sep="\t")
                        df2 = pd.read_csv(allele_file_2, sep="\t")

                        #find unmodified reference for comparison (if it exists)
                        ref_seq_around_cut = ""
                        if len(df1.loc[df1['Reference_Sequence'].str.contains(
                                '-') == False]) > 0:
                            ref_seq_around_cut = df1.loc[
                                df1['Reference_Sequence'].str.contains('-') ==
                                False]['Reference_Sequence'].iloc[0]
                        #otherwise figure out which sgRNA was used for this comparison
                        elif len(df2.loc[df2['Reference_Sequence'].str.
                                         contains('-') == False]) > 0:
                            ref_seq_around_cut = df2.loc[
                                df2['Reference_Sequence'].str.contains('-') ==
                                False]['Reference_Sequence'].iloc[0]
                        else:
                            seq_len = df2[df2['Unedited'] ==
                                          True]['Reference_Sequence'].iloc[0]
                            for sgRNA_interval, cut_point in zip(
                                    sgRNA_intervals, cut_points):
                                sgRNA_seq = consensus_sequence[
                                    sgRNA_interval[0]:sgRNA_interval[1]]
                                if sgRNA_seq in allele_file_1_name:
                                    this_sgRNA_seq = sgRNA_seq
                                    this_cut_point = cut_point
                                    ref_seq_around_cut = consensus_sequence[max(
                                        0, this_cut_point -
                                        args.offset_around_cut_to_plot +
                                        1):min(
                                            len(reference_seq), cut_point +
                                            args.offset_around_cut_to_plot +
                                            1)]
                                    break

                        merged = pd.merge(df1,
                                          df2,
                                          on=[
                                              'Aligned_Sequence',
                                              'Reference_Sequence', 'Unedited',
                                              'n_deleted', 'n_inserted',
                                              'n_mutated'
                                          ],
                                          suffixes=('_' + args.sample_1_name,
                                                    '_' + args.sample_2_name),
                                          how='outer')
                        quant_cols = [
                            '#Reads_' + args.sample_1_name,
                            '%Reads_' + args.sample_1_name,
                            '#Reads_' + args.sample_2_name,
                            '%Reads_' + args.sample_2_name
                        ]
                        merged[quant_cols] = merged[quant_cols].fillna(0)
                        lfc_error = 0.1
                        merged['each_LFC'] = np.log2(
                            ((merged['%Reads_' + args.sample_1_name] +
                              lfc_error) /
                             (merged['%Reads_' + args.sample_2_name] +
                              lfc_error)).astype(float)).replace(
                                  [np.inf, np.NaN], 0)
                        merged = merged.reset_index().set_index(
                            'Aligned_Sequence')
                        output_root = allele_file_1_name.replace(".txt", "")
                        merged.to_csv(_jp(output_root + ".txt"),
                                      sep="\t",
                                      index=None)
                        CRISPRessoPlot.plot_alleles_table_compare(
                            ref_seq_around_cut,
                            merged.sort_values(['each_LFC'], ascending=True),
                            args.sample_1_name,
                            args.sample_2_name,
                            _jp('3.' + output_root + "_top"),
                            MIN_FREQUENCY=args.
                            min_frequency_alleles_around_cut_to_plot,
                            MAX_N_ROWS=args.
                            max_rows_alleles_around_cut_to_plot,
                            SAVE_ALSO_PNG=args.save_also_png)
                        CRISPRessoPlot.plot_alleles_table_compare(
                            ref_seq_around_cut,
                            merged.sort_values(['each_LFC'], ascending=False),
                            args.sample_1_name,
                            args.sample_2_name,
                            _jp('3.' + output_root + "_bottom"),
                            MIN_FREQUENCY=args.
                            min_frequency_alleles_around_cut_to_plot,
                            MAX_N_ROWS=args.
                            max_rows_alleles_around_cut_to_plot,
                            SAVE_ALSO_PNG=args.save_also_png)

        info('All Done!')
        print(CRISPRessoShared.get_crispresso_footer())
        sys.exit(0)

    except Exception as e:
        debug_flag = False
        if 'args' in vars() and 'debug' in args:
            debug_flag = args.debug

        if debug_flag:
            traceback.print_exc(file=sys.stdout)

        error('\n\nERROR: %s' % e)
        sys.exit(-1)
Пример #2
0
def main():
    try:
        description = [
            '~~~CRISPRessoCompare~~~',
            '-Comparison of two CRISPResso analyses-'
        ]
        compare_header = r'''
 ___________________________
| __ __      __      __  __ |
|/  /  \|\/||__) /\ |__)|_  |
|\__\__/|  ||   /--\| \ |__ |
|___________________________|
        '''
        compare_header = CRISPRessoShared.get_crispresso_header(
            description, compare_header)
        print(compare_header)

        parser = argparse.ArgumentParser(
            description='CRISPRessoCompare Parameters',
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        parser.add_argument(
            'crispresso_output_folder_1',
            type=str,
            help='First output folder with CRISPResso analysis')
        parser.add_argument(
            'crispresso_output_folder_2',
            type=str,
            help='Second output folder with CRISPResso analysis')

        #OPTIONALS
        parser.add_argument('-n', '--name', help='Output name', default='')
        parser.add_argument('-n1', '--sample_1_name', help='Sample 1 name')
        parser.add_argument('-n2', '--sample_2_name', help='Sample 2 name')
        parser.add_argument('-o', '--output_folder', help='', default='')
        parser.add_argument(
            '--min_frequency_alleles_around_cut_to_plot',
            type=float,
            help=
            'Minimum %% reads required to report an allele in the alleles table plot.',
            default=0.2)
        parser.add_argument(
            '--max_rows_alleles_around_cut_to_plot',
            type=int,
            help='Maximum number of rows to report in the alleles table plot. ',
            default=50)
        parser.add_argument('--suppress_report',
                            help='Suppress output report',
                            action='store_true')
        parser.add_argument(
            '--place_report_in_output_folder',
            help=
            'If true, report will be written inside the CRISPResso output folder. By default, the report will be written one directory up from the report output.',
            action='store_true')
        parser.add_argument('--debug',
                            help='Show debug messages',
                            action='store_true')

        args = parser.parse_args()
        debug_flag = args.debug

        #check that the CRISPResso output is present and fill amplicon_info
        quantification_file_1, amplicon_names_1, amplicon_info_1 = CRISPRessoShared.check_output_folder(
            args.crispresso_output_folder_1)
        quantification_file_2, amplicon_names_2, amplicon_info_2 = CRISPRessoShared.check_output_folder(
            args.crispresso_output_folder_2)

        run_info_1_file = os.path.join(args.crispresso_output_folder_1,
                                       'CRISPResso2_info.pickle')
        if os.path.isfile(run_info_1_file) is False:
            raise CRISPRessoShared.OutputFolderIncompleteException(
                'The folder %s is not a valid CRISPResso2 output folder. Cannot find run data at %s'
                % (args.crispresso_output_folder_1, run_info_1_file))
        run_info_1 = cp.load(open(run_info_1_file, 'rb'))

        run_info_2_file = os.path.join(args.crispresso_output_folder_2,
                                       'CRISPResso2_info.pickle')
        if os.path.isfile(run_info_2_file) is False:
            raise CRISPRessoShared.OutputFolderIncompleteException(
                'The folder %s is not a valid CRISPResso2 output folder. Cannot find run data at %s'
                % (args.crispresso_output_folder_2, run_info_2_file))
        run_info_2 = cp.load(open(run_info_2_file, 'rb'))

        sample_1_name = args.sample_1_name
        if args.sample_1_name is None:
            sample_1_name = "Sample 1"
            if 'name' in run_info_1 and run_info_1['name'] != '':
                sample_1_name = run_info_1['name']

        sample_2_name = args.sample_2_name
        if args.sample_2_name is None:
            sample_2_name = "Sample 2"
            if 'name' in run_info_2 and run_info_2['name'] != '':
                sample_2_name = run_info_2['name']

        get_name_from_folder = lambda x: os.path.basename(os.path.abspath(
            x)).replace('CRISPResso_on_', '')

        if not args.name:
            database_id = '%s_VS_%s' % (
                get_name_from_folder(args.crispresso_output_folder_1),
                get_name_from_folder(args.crispresso_output_folder_2))
        else:
            database_id = args.name

        OUTPUT_DIRECTORY = 'CRISPRessoCompare_on_%s' % database_id

        if args.output_folder:
            OUTPUT_DIRECTORY = os.path.join(
                os.path.abspath(args.output_folder), OUTPUT_DIRECTORY)

        _jp = lambda filename: os.path.join(
            OUTPUT_DIRECTORY, filename
        )  #handy function to put a file in the output directory
        log_filename = _jp('CRISPRessoCompare_RUNNING_LOG.txt')

        try:
            info('Creating Folder %s' % OUTPUT_DIRECTORY)
            os.makedirs(OUTPUT_DIRECTORY)
            info('Done!')
        except:
            warn('Folder %s already exists.' % OUTPUT_DIRECTORY)

        log_filename = _jp('CRISPRessoCompare_RUNNING_LOG.txt')
        logging.getLogger().addHandler(logging.FileHandler(log_filename))

        with open(log_filename, 'w+') as outfile:
            outfile.write(
                '[Command used]:\nCRISPRessoCompare %s\n\n[Execution log]:\n' %
                ' '.join(sys.argv))

        crispresso2Compare_info_file = os.path.join(
            OUTPUT_DIRECTORY, 'CRISPResso2Compare_info.pickle')
        crispresso2_info = {
        }  #keep track of all information for this run to be pickled and saved at the end of the run
        crispresso2_info['version'] = CRISPRessoShared.__version__
        crispresso2_info['args'] = deepcopy(args)

        crispresso2_info['log_filename'] = os.path.basename(log_filename)

        crispresso2_info['summary_plot_names'] = []
        crispresso2_info['summary_plot_titles'] = {}
        crispresso2_info['summary_plot_labels'] = {}
        crispresso2_info['summary_plot_datas'] = {}

        save_png = True
        if args.suppress_report:
            save_png = False

        #LOAD DATA
        amplicon_names_in_both = [
            amplicon_name for amplicon_name in amplicon_names_1
            if amplicon_name in amplicon_names_2
        ]
        n_refs = len(amplicon_names_in_both)

        def get_plot_title_with_ref_name(plotTitle, refName):
            if n_refs > 1:
                return (plotTitle + ": " + refName)
            return plotTitle

        for amplicon_name in amplicon_names_in_both:
            profile_1 = parse_profile(
                amplicon_info_1[amplicon_name]['quantification_file'])
            profile_2 = parse_profile(
                amplicon_info_2[amplicon_name]['quantification_file'])

            amplicon_plot_name = amplicon_name + "."
            if len(amplicon_names_in_both
                   ) == 1 and amplicon_name == "Reference":
                amplicon_plot_name = ""

            try:
                assert np.all(profile_1[:, 0] == profile_2[:, 0])
            except:
                raise DifferentAmpliconLengthException(
                    'Different amplicon lengths for the two amplicons.')
            len_amplicon = profile_1.shape[0]
            effect_vector_any_1 = profile_1[:, 1]
            effect_vector_any_2 = profile_2[:, 1]
            cut_points = run_info_1['refs'][amplicon_name]['sgRNA_cut_points']
            sgRNA_intervals = run_info_1['refs'][amplicon_name][
                'sgRNA_intervals']

            #Quantification comparison barchart
            fig = plt.figure(figsize=(30, 15))
            n_groups = 2

            N_TOTAL_1 = float(amplicon_info_1[amplicon_name]['Reads_aligned'])
            N_UNMODIFIED_1 = float(
                amplicon_info_1[amplicon_name]['Unmodified'])
            N_MODIFIED_1 = float(amplicon_info_1[amplicon_name]['Modified'])

            N_TOTAL_2 = float(amplicon_info_2[amplicon_name]['Reads_aligned'])
            N_UNMODIFIED_2 = float(
                amplicon_info_2[amplicon_name]['Unmodified'])
            N_MODIFIED_2 = float(amplicon_info_2[amplicon_name]['Modified'])

            means_sample_1 = np.array([N_UNMODIFIED_1, N_MODIFIED_1
                                       ]) / N_TOTAL_1 * 100
            means_sample_2 = np.array([N_UNMODIFIED_2, N_MODIFIED_2
                                       ]) / N_TOTAL_2 * 100

            ax1 = fig.add_subplot(1, 2, 1)

            index = np.arange(n_groups)
            bar_width = 0.35

            opacity = 0.4
            error_config = {'ecolor': '0.3'}

            rects1 = ax1.bar(index,
                             means_sample_1,
                             bar_width,
                             alpha=opacity,
                             color=(0, 0, 1, 0.4),
                             label=sample_1_name)

            rects2 = ax1.bar(index + bar_width,
                             means_sample_2,
                             bar_width,
                             alpha=opacity,
                             color=(1, 0, 0, 0.4),
                             label=sample_2_name)

            plt.ylabel('% Sequences')
            plt.title(
                get_plot_title_with_ref_name(
                    '%s VS %s' % (sample_1_name, sample_2_name),
                    amplicon_name))
            plt.xticks(index + bar_width / 2.0, ('Unmodified', 'Modified'))
            plt.legend()
            #            plt.xlim(index[0]-0.2,(index + bar_width)[-1]+bar_width+0.2)
            plt.tight_layout()

            ax2 = fig.add_subplot(1, 2, 2)
            ax2.bar(index,
                    means_sample_1 - means_sample_2,
                    bar_width + 0.35,
                    alpha=opacity,
                    color=(0, 1, 1, 0.4),
                    label='')

            plt.ylabel('% Sequences Difference')
            plt.title(
                get_plot_title_with_ref_name(
                    '%s - %s' % (sample_1_name, sample_2_name), amplicon_name))
            plt.xticks(index, ['Unmodified', 'Modified'])

            #            plt.xlim(index[0]-bar_width/2, (index+bar_width)[-1]+2*bar_width)
            plt.tight_layout()
            plot_name = '1.' + amplicon_plot_name + 'Editing_comparison'
            plt.savefig(_jp(plot_name) + '.pdf', bbox_inches='tight')
            if save_png:
                plt.savefig(_jp(plot_name) + '.png', bbox_inches='tight')

            crispresso2_info['summary_plot_names'].append(plot_name)
            crispresso2_info['summary_plot_titles'][
                plot_name] = 'Editing efficiency comparison'
            crispresso2_info['summary_plot_labels'][
                plot_name] = 'Figure 1: Comparison for amplicon ' + amplicon_name + '; Left: Percentage of modified and unmodified reads in each sample; Right: relative percentage of modified and unmodified reads'
            output_1 = os.path.join(args.crispresso_output_folder_1,
                                    run_info_1['report_filename'])
            output_2 = os.path.join(args.crispresso_output_folder_1,
                                    run_info_2['report_filename'])
            crispresso2_info['summary_plot_datas'][plot_name] = []
            if os.path.isfile(output_1):
                crispresso2_info['summary_plot_datas'][plot_name].append(
                    (sample_1_name + ' output',
                     os.path.relpath(output_1, OUTPUT_DIRECTORY)))
            if os.path.isfile(output_2):
                crispresso2_info['summary_plot_datas'][plot_name].append(
                    (sample_2_name + ' output',
                     os.path.relpath(output_2, OUTPUT_DIRECTORY)))

            mod_file_1 = amplicon_info_1[amplicon_name][
                'modification_count_file']
            amp_seq_1, mod_freqs_1 = CRISPRessoShared.parse_count_file(
                mod_file_1)
            mod_file_2 = amplicon_info_2[amplicon_name][
                'modification_count_file']
            amp_seq_2, mod_freqs_2 = CRISPRessoShared.parse_count_file(
                mod_file_2)
            consensus_sequence = amp_seq_1
            if amp_seq_2 != consensus_sequence:
                raise DifferentAmpliconLengthException(
                    'Different amplicon lengths for the two amplicons.')

            for mod in [
                    'Insertions', 'Deletions', 'Substitutions',
                    'All_modifications'
            ]:
                mod_name = mod
                if mod == "All_modifications":
                    mod_name = "Combined modifications (insertions, deletions and substitutions)"

                mod_counts_1 = np.array(mod_freqs_1[mod], dtype=float)
                tot_counts_1 = np.array(mod_freqs_1['Total'], dtype=float)
                unmod_counts_1 = tot_counts_1 - mod_counts_1

                mod_counts_2 = np.array(mod_freqs_2[mod], dtype=float)
                tot_counts_2 = np.array(mod_freqs_2['Total'], dtype=float)
                unmod_counts_2 = tot_counts_2 - mod_counts_2

                fisher_results = [
                    stats.fisher_exact([[z[0], z[1]], [z[2], z[3]]])
                    if max(z) > 0 else [nan, 1.0]
                    for z in zip(mod_counts_1, unmod_counts_1, mod_counts_2,
                                 unmod_counts_2)
                ]
                oddsratios, pvalues = [a for a, b in fisher_results
                                       ], [b for a, b in fisher_results]

                mod_df = []
                row = [sample_1_name + '_' + mod]
                row.extend(mod_counts_1)
                mod_df.append(row)

                row = [sample_1_name + '_total']
                row.extend(tot_counts_1)
                mod_df.append(row)

                row = [sample_2_name + '_' + mod]
                row.extend(mod_counts_2)
                mod_df.append(row)

                row = [sample_2_name + '_total']
                row.extend(tot_counts_2)
                mod_df.append(row)

                row = ['odds_ratios']
                row.extend(oddsratios)
                mod_df.append(row)

                row = ['pvalues']
                row.extend(pvalues)
                mod_df.append(row)

                colnames = ['Reference']
                colnames.extend(list(consensus_sequence))
                mod_df = pd.DataFrame(mod_df, columns=colnames)
                #                mod_df = pd.concat([mod_df.iloc[:,0:2], mod_df.iloc[:,2:].apply(pd.to_numeric)],axis=1)
                #write to file
                mod_filename = _jp(amplicon_plot_name + mod +
                                   "_quantification.txt")
                mod_df.to_csv(mod_filename, sep='\t', index=None)

                #plot
                fig = plt.figure(figsize=(20, 10))
                ax1 = fig.add_subplot(2, 1, 1)

                diff = np.divide(mod_counts_1, tot_counts_1) - np.divide(
                    mod_counts_2, tot_counts_2)
                diff_plot = ax1.plot(diff,
                                     color=(0, 1, 0, 0.4),
                                     lw=3,
                                     label='Difference')
                ax1.set_title(
                    get_plot_title_with_ref_name(
                        '%s: %s - %s' % (mod, sample_1_name, sample_2_name),
                        amplicon_name))
                ax1.set_xticks(
                    np.arange(
                        0, len_amplicon,
                        max(3, (len_amplicon / 6) -
                            (len_amplicon / 6) % 5)).astype(int))
                ax1.set_ylabel('Sequences Difference %')
                ax1.set_xlim(xmin=0, xmax=len_amplicon - 1)

                pvalues = np.array(pvalues)
                min_nonzero = np.min(pvalues[np.nonzero(pvalues)])
                pvalues[pvalues == 0] = min_nonzero
                #ax2 = ax1.twinx()
                ax2 = fig.add_subplot(2, 1, 2)
                pval_plot = ax2.plot(-1 * np.log10(pvalues),
                                     color=(1, 0, 0, 0.4),
                                     lw=2,
                                     label='-log10 P-value')
                ax2.set_ylabel('-log10 P-value')
                ax2.set_xlim(xmin=0, xmax=len_amplicon - 1)
                ax2.set_xticks(
                    np.arange(
                        0, len_amplicon,
                        max(3, (len_amplicon / 6) -
                            (len_amplicon / 6) % 5)).astype(int))
                ax2.set_xlabel('Reference amplicon position (bp)')

                #bonferroni correction
                corrected_p = -1 * np.log10(
                    0.01 / float(len(consensus_sequence)))
                cutoff_plot = ax2.plot([0, len(consensus_sequence)],
                                       [corrected_p, corrected_p],
                                       color='k',
                                       dashes=(5, 10),
                                       label='Bonferronni corrected cutoff')

                plots = diff_plot + pval_plot + cutoff_plot

                diff_y_min, diff_y_max = ax1.get_ylim()
                p_y_min, p_y_max = ax2.get_ylim()
                if cut_points:
                    for idx, cut_point in enumerate(cut_points):
                        if idx == 0:
                            plot_cleavage = ax1.plot(
                                [cut_point, cut_point],
                                [diff_y_min, diff_y_max],
                                '--k',
                                lw=2,
                                label='Predicted cleavage position')
                            ax2.plot([cut_point, cut_point],
                                     [p_y_min, p_y_max],
                                     '--k',
                                     lw=2,
                                     label='Predicted cleavage position')
                            plots = plots + plot_cleavage
                        else:
                            ax1.plot([cut_point, cut_point],
                                     [diff_y_min, diff_y_max],
                                     '--k',
                                     lw=2,
                                     label='_nolegend_')
                            ax2.plot([cut_point, cut_point],
                                     [diff_y_min, diff_y_max],
                                     '--k',
                                     lw=2,
                                     label='_nolegend_')

                    for idx, sgRNA_int in enumerate(sgRNA_intervals):
                        if idx == 0:
                            p2 = ax1.plot([sgRNA_int[0], sgRNA_int[1]],
                                          [diff_y_min, diff_y_min],
                                          lw=10,
                                          c=(0, 0, 0, 0.15),
                                          label='sgRNA')
                            ax2.plot([sgRNA_int[0], sgRNA_int[1]],
                                     [p_y_min, p_y_min],
                                     lw=10,
                                     c=(0, 0, 0, 0.15),
                                     label='sgRNA')
                            plots = plots + p2
                        else:
                            ax1.plot([sgRNA_int[0], sgRNA_int[1]],
                                     [diff_y_min, diff_y_min],
                                     lw=10,
                                     c=(0, 0, 0, 0.15),
                                     label='_nolegend_')
                            ax2.plot([sgRNA_int[0], sgRNA_int[1]],
                                     [p_y_min, p_y_min],
                                     lw=10,
                                     c=(0, 0, 0, 0.15),
                                     label='_nolegend_')

                labs = [p.get_label() for p in plots]
                lgd = plt.legend(plots,
                                 labs,
                                 loc='upper center',
                                 bbox_to_anchor=(0.5, -0.2),
                                 ncol=1,
                                 fancybox=True,
                                 shadow=False)

                plot_name = '2.' + amplicon_plot_name + mod + '_quantification'
                plt.savefig(_jp(plot_name + '.pdf'),
                            bbox_inches='tight',
                            bbox_extra_artists=(lgd, ))
                if save_png:
                    plt.savefig(_jp(plot_name + '.png'),
                                bbox_inches='tight',
                                bbox_extra_artists=(lgd, ))
                crispresso2_info['summary_plot_names'].append(plot_name)
                crispresso2_info['summary_plot_titles'][
                    plot_name] = mod_name + ' locations'
                crispresso2_info['summary_plot_labels'][
                    plot_name] = mod_name + ' location comparison for amplicon ' + amplicon_name + '; Top: percent difference; Bottom: p-value.'
                crispresso2_info['summary_plot_datas'][plot_name] = [
                    (mod_name + ' quantification',
                     os.path.basename(mod_filename))
                ]

            #create merged heatmaps for each cut site
            allele_files_1 = amplicon_info_1[amplicon_name]['allele_files']
            allele_files_2 = amplicon_info_2[amplicon_name]['allele_files']
            for allele_file_1 in allele_files_1:
                allele_file_1_name = os.path.split(allele_file_1)[
                    1]  #get file part of path
                for allele_file_2 in allele_files_2:
                    allele_file_2_name = os.path.split(allele_file_2)[
                        1]  #get file part of path
                    #if files are the same (same amplicon, cut site, guide), run comparison
                    if allele_file_1_name == allele_file_2_name:
                        df1 = pd.read_csv(allele_file_1, sep="\t")
                        df2 = pd.read_csv(allele_file_2, sep="\t")

                        #find unmodified reference for comparison (if it exists)
                        ref_seq_around_cut = ""
                        if len(df1.loc[df1['Reference_Sequence'].str.contains(
                                '-') == False]) > 0:
                            ref_seq_around_cut = df1.loc[
                                df1['Reference_Sequence'].str.contains('-') ==
                                False]['Reference_Sequence'].iloc[0]
                        #otherwise figure out which sgRNA was used for this comparison
                        elif len(df2.loc[df2['Reference_Sequence'].str.
                                         contains('-') == False]) > 0:
                            ref_seq_around_cut = df2.loc[
                                df2['Reference_Sequence'].str.contains('-') ==
                                False]['Reference_Sequence'].iloc[0]
                        else:
                            seq_len = df2[df2['Unedited'] ==
                                          True]['Reference_Sequence'].iloc[0]
                            for sgRNA_interval, cut_point in zip(
                                    sgRNA_intervals, cut_points):
                                sgRNA_seq = consensus_sequence[
                                    sgRNA_interval[0]:sgRNA_interval[1]]
                                if sgRNA_seq in allele_file_1_name:
                                    this_sgRNA_seq = sgRNA_seq
                                    this_cut_point = cut_point
                                    ref_seq_around_cut = consensus_sequence[max(
                                        0, this_cut_point -
                                        args.offset_around_cut_to_plot +
                                        1):min(
                                            len(reference_seq), cut_point +
                                            args.offset_around_cut_to_plot +
                                            1)]
                                    break

                        merged = pd.merge(df1,
                                          df2,
                                          on=[
                                              'Aligned_Sequence',
                                              'Reference_Sequence', 'Unedited',
                                              'n_deleted', 'n_inserted',
                                              'n_mutated'
                                          ],
                                          suffixes=('_' + sample_1_name,
                                                    '_' + sample_2_name),
                                          how='outer')
                        quant_cols = [
                            '#Reads_' + sample_1_name,
                            '%Reads_' + sample_1_name,
                            '#Reads_' + sample_2_name,
                            '%Reads_' + sample_2_name
                        ]
                        merged[quant_cols] = merged[quant_cols].fillna(0)
                        lfc_error = 0.1
                        merged['each_LFC'] = np.log2(
                            ((merged['%Reads_' + sample_1_name] + lfc_error) /
                             (merged['%Reads_' + sample_2_name] + lfc_error)
                             ).astype(float)).replace([np.inf, np.NaN], 0)
                        merged = merged.reset_index().set_index(
                            'Aligned_Sequence')
                        output_root = allele_file_1_name.replace(".txt", "")
                        allele_comparison_file = _jp(output_root + '.txt')
                        merged.to_csv(allele_comparison_file,
                                      sep="\t",
                                      index=None)

                        plot_name = '3.' + output_root + '_top'
                        CRISPRessoPlot.plot_alleles_table_compare(
                            ref_seq_around_cut,
                            merged.sort_values(['each_LFC'], ascending=True),
                            sample_1_name,
                            sample_2_name,
                            _jp(plot_name),
                            MIN_FREQUENCY=args.
                            min_frequency_alleles_around_cut_to_plot,
                            MAX_N_ROWS=args.
                            max_rows_alleles_around_cut_to_plot,
                            SAVE_ALSO_PNG=save_png)
                        crispresso2_info['summary_plot_names'].append(
                            plot_name)
                        crispresso2_info['summary_plot_titles'][
                            plot_name] = 'Alleles enriched in ' + sample_1_name
                        crispresso2_info['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \
                        'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_1_name+'.'
                        crispresso2_info['summary_plot_datas'][plot_name] = [
                            ('Allele comparison table',
                             os.path.basename(allele_comparison_file))
                        ]

                        plot_name = '3.' + output_root + '_bottom'
                        CRISPRessoPlot.plot_alleles_table_compare(
                            ref_seq_around_cut,
                            merged.sort_values(['each_LFC'], ascending=False),
                            sample_1_name,
                            sample_2_name,
                            _jp(plot_name),
                            MIN_FREQUENCY=args.
                            min_frequency_alleles_around_cut_to_plot,
                            MAX_N_ROWS=args.
                            max_rows_alleles_around_cut_to_plot,
                            SAVE_ALSO_PNG=save_png)
                        crispresso2_info['summary_plot_names'].append(
                            plot_name)
                        crispresso2_info['summary_plot_titles'][
                            plot_name] = 'Alleles enriched in ' + sample_2_name
                        crispresso2_info['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \
                        'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_2_name+'.'
                        crispresso2_info['summary_plot_datas'][plot_name] = [
                            ('Allele comparison table',
                             os.path.basename(allele_comparison_file))
                        ]

        if not args.suppress_report:
            if (args.place_report_in_output_folder):
                report_name = _jp("CRISPResso2Batch_report.html")
            else:
                report_name = OUTPUT_DIRECTORY + '.html'
            CRISPRessoReport.make_compare_report_from_folder(
                report_name, crispresso2_info, OUTPUT_DIRECTORY, _ROOT)
            crispresso2_info['report_location'] = report_name
            crispresso2_info['report_filename'] = os.path.basename(report_name)

        cp.dump(crispresso2_info, open(crispresso2Compare_info_file, 'wb'))

        info('Analysis Complete!')
        print(CRISPRessoShared.get_crispresso_footer())
        sys.exit(0)

    except Exception as e:
        debug_flag = False
        if 'args' in vars() and 'debug' in args:
            debug_flag = args.debug

        if debug_flag:
            traceback.print_exc(file=sys.stdout)

        error('\n\nERROR: %s' % e)
        sys.exit(-1)