示例#1
0
文件: function.py 项目: taiyunkim/PAD
def createJaccardDistribution(file_name1, file_name2, gene_database_name):
    path = os.path.join(settings.MEDIA_ROOT, 'peak_file_db',
                        str(gene_database_name))
    file_name1 = os.path.join(path, file_name1)
    file_name2 = os.path.join(path, file_name2)

    # Create shuffled BED files of file1 and file2
    file1 = bt(file_name1)
    file2 = bt(file_name2)

    genome_chrom_size = os.path.join(settings.MEDIA_ROOT, 'genome_chrom_sizes')

    randomised_files = []
    for rep in range(10000):
        shuffled_file1 = file1.shuffle(g=genome_chrom_size +
                                       '/mm9.chrom.sizes',
                                       chrom=True)
        shuffled_file2 = file2.shuffle(g=genome_chrom_size +
                                       '/mm9.chrom.sizes',
                                       chrom=True)
        randomised_files.append((shuffled_file1, shuffled_file2))

    # set multiprocessing
    pool = Pool(processes=4)
    bootstrapped_jaccard = pool.map(
        bootstrapRandom, randomised_files)  # returns list of jaccard indices
    pool.join()
    pool.close()

    return bootstrapped_jaccard
def createJaccardDistribution(file_name1,
                              file_name2,
                              genome_chrom_size,
                              bootstrap_num=10000,
                              process_num=4):
    print(file_name1, file_name2)
    #print(genome_chrom_size)

    global file1, file2, genome_chrom_sizes
    # Create shuffled BED files of file1 and file2
    file1 = bt(file_name1)
    file2 = bt(file_name2)
    genome_chrom_sizes = genome_chrom_size

    # set multiprocessing
    pool = Pool(processes=process_num)
    bootstrapped_jaccard = pool.map(
        bootstrapRandom,
        range(bootstrap_num))  # returns list of jaccard indices
    pool.close()
    pool.join()

    #step = 0.001
    #precision = 3
    #jaccard_count = [0 for i in range(int(1/step))]
    #print(bootstrapped_jaccard)
    #print(max(bootstrapped_jaccard))
    #for jaccard_value in bootstrapped_jaccard:
    #    rounded_value = round(jaccard_value, precision)
    #    jaccard_count[int(rounded_value/step)] += 1

    #jaccard_distribution = Histogram(step, jaccard_count)

    return numpy.sort(bootstrapped_jaccard)
示例#3
0
def calculate_jaccard_index(file_paths):
    file_one, file_two = sorted(file_paths)
    bed_file_one = bt(file_one)
    bed_file_two = bt(file_two)
    jaccard = bed_file_one.jaccard(bed_file_two)
    jaccard_index = jaccard['jaccard']
    if math.isnan(jaccard_index):
        jaccard_index = -1  # Jaccard index values range between 0 and 1 (inclusive), so NaN will be represented by -1

    proxdis = file_one.split("_")[-2]
    cutoff = file_one.split("_")[-1]

    return file_one.split("/")[-1], file_two.split(
        "/")[-1], proxdis, cutoff, str(jaccard_index)
def parse_annotations(f, genome):
    """
    Parse the annotations and write them into the db.
    """
    feat = bt(f)
    logging.info('processing: %s' % f.name)
    chunk_size = 1024
    con = Connection()
    seqdb = con['seqdb']
    features = seqdb.features
    strains = seqdb.genomestrains
    small_feature_chunks = ichunked(feat, chunk_size)
    try:
        for chunk in small_feature_chunks:
            feature_list = []
            strain_list = []
            for a in chunk:
                feature = {
                    'seqid': a[0],
                    'source': a[1],
                    'type': a[2],
                    'start': int(a[3]),
                    'end': int(a[4]),
                    'score': a[5],
                    'strand': a[6],
                    'phase': a[7],
                    'attributes': a.attrs
                }
                feature['attributes']['genome'] = genome
                feature_list.append(feature)
                try:
                    strain = feature['attributes']['Strain']
                    strain_list.append(strain)
                except:
                    pass
            strain_list = list(set(strain_list))
            strains.update(
                {'genome': genome},
                {'$addToSet': {'strains': {'$each': strain_list}}}
            )
            features.insert(feature_list, safe=True)
    except pybedtools.cbedtools.MalformedBedLineError:
        logging.error("Gff file is not formated corectly, please validate it!")
        sys.exit(0)
    logging.info('finished processing file')
    return
def snps_by_location(args):
    """
    Group and annotate SNPs by their location.

    Best is to use snpEff or other tools, but for quick course results
    this function can be used.
    """
    t0 = time.time()

    # save annotations and snps as single temp bedtools files
    annotations = bt(chain(*args.annot)).saveas().fn
    snps = bt(chain(*args.snps)).saveas().fn

    # group features by type
    featuretypes = ['gene', 'CDS', 'exon', 'five_prime_UTR', 'three_prime_UTR']
    # use multiprocessing to subset each type in sepatate thread
    pool = multiprocessing.Pool(len(featuretypes))
    annotation_list = repeat(annotations, times=len(featuretypes))
    results = pool.map(subset_features, zip(featuretypes, annotation_list))
    features_by_type = {ftype: feature for (ftype, feature) in results}

    # clean up features_by_type by removing empty bts from dict
    for k in features_by_type.keys():
        if (not bt(features_by_type[k]).count()):
            del features_by_type[k]

    available_features = features_by_type.keys()

    # create intron intervals if there are gene annotations
    if ('gene' in available_features):
        genes = features_by_type['gene']

        # if there are exons use them to determine introns
        if ('exon' in available_features):
            exons = features_by_type['exon']
            introns = bt(genes).subtract(bt(exons))
            introns = introns.sort().merge().remove_invalid()
            features_by_type['intron'] = introns.saveas().fn
            # after getting introns, exons are not needed any more
            del features_by_type['exon']

        # otherwise use 'CDS' and UTR information
        else:
            needed_set = set(['CDS', 'three_prime_UTR', 'five_prime_UTR'])
            if (needed_set.issubset(features_by_type.keys())):
                cdss = features_by_type['CDS']
                utr3s = features_by_type['three_prime_UTR']
                utr5s = features_by_type['five_prime_UTR']
                introns = bt(genes).subtract(bt(cdss)).subtract(bt(utr3s))
                introns = introns.subtract(bt(utr5s)).sort().merge()
                introns = introns.remove_invalid()
                features_by_type['intron'] = introns.saveas().fn

    # group snps by feature overlap
    pool_size = len(features_by_type.keys())
    pool = multiprocessing.Pool(pool_size)
    snps_list = [snps for i in range(pool_size)]
    features_list = repeat(features_by_type, times=pool_size)
    results = pool.map(subset_snps, zip(features_by_type.keys(),
                       features_list, snps_list))
    snps_by_location = {ftype: feature for (ftype, feature) in results}

    # remove 'gene' snps, they're not needed anymore
    try:
        del snps_by_location['gene']
    except:
        # there where no 'gene' snps
        pass

    # remove duplicate SNPs
    available_locs = snps_by_location.keys()
    if ('intron' in available_locs):
        intronic = bt(snps_by_location['intron'])
        if ('CDS' in available_locs):
            cdss = bt(snps_by_location['CDS'])
            intronic = intronic.intersect(cdss, v=True)
        if ('five_prime_UTR' in available_locs):
            utr5s = bt(snps_by_location['five_prime_UTR'])
            intronic = intronic.intersect(utr5s, v=True)
        if ('three_prime_UTR' in available_locs):
            utr3s = bt(snps_by_location['three_prime_UTR'])
            intronic = intronic.intersect(utr3s, v=True)
        snps_by_location['intron'] = intronic.saveas().fn

    if ('CDS' in available_locs):
        cdss = bt(snps_by_location['CDS'])
        if ('five_prime_UTR' in available_locs):
            utr5s = bt(snps_by_location['five_prime_UTR'])
            utr5s = utr5s.intersect(cdss, v=True)
            snps_by_location['five_prime_UTR'] = utr5s.saveas().fn
        if ('three_prime_UTR' in available_locs):
            utr3s = bt(snps_by_location['three_prime_UTR'])
            utr3s = utr3s.intersect(cdss, v=True)
            snps_by_location['three_prime_UTR'] = utr3s.saveas().fn

    # annotate snps
    data_dir = os.path.dirname(args.snps[0].name)
    pool_size = len(snps_by_location.keys())
    pool = multiprocessing.Pool(pool_size)
    dd_list = repeat(data_dir, times=pool_size)
    pool.map(annotate_snps, zip(snps_by_location.items(), dd_list))

    t1 = time.time()
    t = t1 - t0
    logging.info("time elapsed: %d" % t)
def subset_snps((featuretype, features, snps)):
    s = bt(snps)
    snps_in_location = s.intersect(bt(features[featuretype]),
                                   u=True).saveas().fn
    logging.info("subsetting snps in " + featuretype)
    return((featuretype, snps_in_location))
def subset_features((featuretype, annotations)):
    a = bt(annotations)
    features_of_type = a.filter(featuretype_filter,
                                featuretype).saveas().fn
    logging.info("subsetting features in " + featuretype)
    return ((featuretype, features_of_type))
    features_of_type = a.filter(featuretype_filter,
                                featuretype).saveas().fn
    logging.info("subsetting features in " + featuretype)
    return ((featuretype, features_of_type))


def subset_snps((featuretype, features, snps)):
    s = bt(snps)
    snps_in_location = s.intersect(bt(features[featuretype]),
                                   u=True).saveas().fn
    logging.info("subsetting snps in " + featuretype)
    return((featuretype, snps_in_location))


def annotate_snps(((loc, snps), data_dir)):
    annotated_snps = bt(snps).each(annotate_location, loc)
    fname = os.path.join(data_dir, loc + "_snps.gff")
    annotated_snps.saveas(fname)


def snps_by_location(args):
    """
    Group and annotate SNPs by their location.

    Best is to use snpEff or other tools, but for quick course results
    this function can be used.
    """
    t0 = time.time()

    # save annotations and snps as single temp bedtools files
    annotations = bt(chain(*args.annot)).saveas().fn
示例#9
0
文件: views.py 项目: taiyunkim/PAD
def tfClassifyResult(request):
    # cutoff value from post
    cutoff = long(request.session['cut_off'])
    pvalue = float(request.session['pvalue'])

    peak_database_names = request.session['peak_database_names']
    gene_database_name = request.session['gene_database_name']
    peakfile_names = request.session['peakfile_names']
    heatmap = request.session['heatmap']

    peakfile_choices = tuple([(x, x) for x in peakfile_names])
    form = VariableInputForm(request.POST or None,
                             request.FILES or None,
                             initial={
                                 'cut_off': cutoff,
                                 'selected_peaks':
                                 '\n'.join(peak_database_names),
                                 'uploaded_peak_File': peakfile_names,
                                 'pvalue': pvalue,
                             })
    # leave previous choices at field
    form.fields['uploaded_peak_File'].choices = peakfile_choices

    if form.is_valid():
        # new user input form is submitted
        cleaned_data = form.cleaned_data
        cutoff = cleaned_data.get('cut_off')
        pvalue = cleaned_data.get('pvalue')
        new_peak_File = request.FILES.getlist('new_peak_File')
        peakfile_names = request.POST.getlist('uploaded_peak_File')

        createPeakToBedFile(new_peak_File, str(request.session.session_key),
                            gene_database_name)
        for name in new_peak_File:
            peakfile_names.append(name.name)

        request.session['cut_off'] = cutoff
        request.session['pvalue'] = pvalue

        peak_name_strings = cleaned_data.get('selected_peaks')
        peak_database_names = peak_name_strings.rstrip().split()

        request.session['peak_database_names'] = peak_database_names
        request.session['gene_database_name'] = gene_database_name
        request.session['peakfile_names'] = peakfile_names
        request.session['heatmap'] = cleaned_data.get('heatmap')
        # redirect to this page with different parameters
        return redirect('/result')

    # THIS IS WHERE USER INPUT PEAK FILE IS SEPARATED TO PROXIMAL AND DISTAL
    fname = peakfile_names
    user_uploaded_filename = peakfile_names
    for name in peakfile_names:
        full_path = os.path.join(settings.MEDIA_ROOT, 'users_peak_files',
                                 str(request.session.session_key),
                                 str(gene_database_name), '')
        # grab the original file
        with open(full_path + name, 'rb') as orig_file:
            prox_file = open(full_path + name + '_proximal_' + str(cutoff),
                             'w')
            dist_file = open(full_path + name + '_distal_' + str(cutoff), 'w')
            for line in orig_file.readlines():
                gene_dist = re.search('\d+\s\n', line)
                if int(gene_dist.group(0)) <= int(cutoff):
                    prox_file.writelines(line)
                else:
                    dist_file.writelines(line)
            prox_file.close()
            dist_file.close()
        orig_file.close()

    fname = fname + peak_database_names
    fname.sort()
    matrix_size = len(fname)

    proximal_matrix = [[0 for x in range(matrix_size)]
                       for x in range(matrix_size)]
    proximal_pval_matrix = [[0 for x in range(matrix_size)]
                            for x in range(matrix_size)]
    proximal_dist_matrix = [[0 for x in range(matrix_size)]
                            for x in range(matrix_size)]
    distal_matrix = [[0 for x in range(matrix_size)]
                     for x in range(matrix_size)]
    distal_pval_matrix = [[0 for x in range(matrix_size)]
                          for x in range(matrix_size)]
    distal_dist_matrix = [[0 for x in range(matrix_size)]
                          for x in range(matrix_size)]

    path = os.path.join(settings.MEDIA_ROOT, 'peak_file_db',
                        str(gene_database_name), str(cutoff))
    user_path = os.path.join(settings.MEDIA_ROOT, 'users_peak_files',
                             str(request.session.session_key),
                             str(gene_database_name))

    start_time = time.time()
    # FOR PROXIMAL
    proximal_fc_limit = [float("inf"), float("-inf")]
    for i in range(matrix_size):
        file_one_user_uploaded = True
        if fname[i] not in user_uploaded_filename:
            full_filename_i = Peaks_db.objects.get(fileID=fname[i])
            full_filename_i = full_filename_i.origFile + '_proximal_' + str(
                cutoff)
            f1 = Peaks_db_file.objects.filter(
                filename=full_filename_i).values('path')
            f1 = os.path.join(f1[0]['path'], full_filename_i)
            file_one_user_uploaded = False
        else:
            full_filename_i = fname[i] + '_proximal_' + str(cutoff)

            f1 = os.path.join(user_path, full_filename_i)

        if not file_one_user_uploaded:
            jaccard_indices = getPrecomputedJaccardValuePerFile(
                full_filename_i)

        for j in range(matrix_size):
            file_two_user_uploaded = True
            if fname[j] not in user_uploaded_filename:
                full_filename_j = Peaks_db.objects.get(fileID=fname[j])
                full_filename_j = full_filename_j.origFile + '_proximal_' + str(
                    cutoff)

                f2 = Peaks_db_file.objects.filter(
                    filename=full_filename_j).values('path')
                f2 = os.path.join(f2[0]['path'], full_filename_j)
                file_two_user_uploaded = False
            else:
                full_filename_j = fname[j] + '_proximal_' + str(cutoff)

                f2 = os.path.join(user_path, full_filename_j)
            if i == j:
                proximal_matrix[i][j] = calculateJaccardFC(1, "proximal")
                proximal_pval_matrix[i][j] = 0
                proximal_dist_matrix[i][j] = 0
                break
            elif file_one_user_uploaded:
                f1 = os.path.join(user_path,
                                  fname[i] + '_proximal_' + str(cutoff))
            elif file_two_user_uploaded:
                f2 = os.path.join(user_path,
                                  fname[j] + '_proximal_' + str(cutoff))

            if file_one_user_uploaded or file_two_user_uploaded:
                # use jaccard index
                file1 = bt(f1)
                file2 = bt(f2)
                result = file1.jaccard(
                    file2)  # This is where the jaccard is calculated
                jaccard_index = result['jaccard']
            else:
                jaccard_index = jaccard_indices[full_filename_j]

            if math.isnan(jaccard_index) or jaccard_index < 0:
                proximal_matrix[i][j] = 0
                proximal_matrix[j][i] = 0

                proximal_pval_matrix[i][j] = 1
                proximal_pval_matrix[j][i] = 1

                proximal_dist_matrix[i][j] = 1
                proximal_dist_matrix[j][i] = 1
            else:
                jaccard_fc = calculateJaccardFC(jaccard_index, "proximal")
                proximal_matrix[i][j] = jaccard_fc
                proximal_matrix[j][i] = jaccard_fc

                jaccard_pval = calculateJaccardPval(jaccard_index, "proximal")
                proximal_pval_matrix[i][j] = jaccard_pval
                proximal_pval_matrix[j][i] = jaccard_pval

                proximal_dist_matrix[i][j] = 1 - jaccard_index
                proximal_dist_matrix[j][i] = 1 - jaccard_index

            if jaccard_fc > proximal_fc_limit[-1]:
                proximal_fc_limit[-1] = jaccard_fc
            elif jaccard_fc < proximal_fc_limit[0]:
                proximal_fc_limit[0] = jaccard_fc

    # FOR DISTAL
    distal_fc_limit = [float("inf"), float("-inf")]
    for i in range(matrix_size):
        file_one_user_uploaded = True
        if fname[i] not in user_uploaded_filename:
            full_filename_i = Peaks_db.objects.get(fileID=fname[i])
            full_filename_i = full_filename_i.origFile + '_distal_' + str(
                cutoff)

            f1 = Peaks_db_file.objects.filter(
                filename=full_filename_i).values('path')
            f1 = os.path.join(f1[0]['path'], full_filename_i)
            file_one_user_uploaded = False
        else:
            full_filename_i = fname[i] + '_distal_' + str(cutoff)

            f1 = os.path.join(user_path, full_filename_i)

        if not file_one_user_uploaded:
            jaccard_indices = getPrecomputedJaccardValuePerFile(
                full_filename_i)

        for j in range(matrix_size):
            file_two_user_uploaded = True
            if fname[j] not in user_uploaded_filename:
                full_filename_j = Peaks_db.objects.get(fileID=fname[j])
                full_filename_j = full_filename_j.origFile + '_distal_' + str(
                    cutoff)

                f2 = Peaks_db_file.objects.filter(
                    filename=full_filename_j).values('path')
                f2 = os.path.join(f2[0]['path'], full_filename_j)
                file_two_user_uploaded = False
            else:
                full_filename_j = fname[j] + '_distal_' + str(cutoff)

                f2 = os.path.join(user_path, full_filename_j)
            if i == j:
                distal_matrix[i][j] = calculateJaccardFC(1, "distal")
                distal_pval_matrix[i][j] = 0
                distal_dist_matrix[i][j] = 0
                break
            elif file_one_user_uploaded:
                f1 = os.path.join(user_path,
                                  fname[i] + '_distal_' + str(cutoff))
            elif file_two_user_uploaded:
                f2 = os.path.join(user_path,
                                  fname[j] + '_distal_' + str(cutoff))

            if file_one_user_uploaded or file_two_user_uploaded:
                # use jaccard index
                file1 = bt(f1)
                file2 = bt(f2)
                result = file1.jaccard(
                    file2)  # This is where the jaccard is calculated
                jaccard_index = result['jaccard']
            else:
                jaccard_index = jaccard_indices[full_filename_j]

            if math.isnan(jaccard_index) or jaccard_index < 0:
                distal_matrix[i][j] = 0
                distal_matrix[j][i] = 0

                distal_pval_matrix[i][j] = 1
                distal_pval_matrix[j][i] = 1

                distal_dist_matrix[i][j] = 1
                distal_dist_matrix[j][i] = 1
            else:
                jaccard_fc = calculateJaccardFC(jaccard_index, "distal")
                distal_matrix[i][j] = jaccard_fc
                distal_matrix[j][i] = jaccard_fc

                jaccard_pval = calculateJaccardPval(jaccard_index, "distal")
                distal_pval_matrix[i][j] = jaccard_pval
                distal_pval_matrix[j][i] = jaccard_pval

                distal_dist_matrix[i][j] = 1 - jaccard_index
                distal_dist_matrix[j][i] = 1 - jaccard_index

            if jaccard_fc > distal_fc_limit[-1]:
                distal_fc_limit[-1] = jaccard_fc
            elif jaccard_fc < distal_fc_limit[0]:
                distal_fc_limit[0] = jaccard_fc

    ########
    proximal_dendrogram = {}
    distal_dendrogram = {}
    # heatmap plots styles
    if heatmap == 'Independent':
        proximal_dist_vector = ssd.squareform(proximal_dist_matrix)
        proximal_linkage_matrix = linkage(proximal_dist_vector, "single",
                                          'euclidean')
        proximal_dendrogram = dendrogram(proximal_linkage_matrix, labels=fname)

        distal_dist_vector = ssd.squareform(distal_dist_matrix)
        distal_linkage_matrix = linkage(distal_dist_vector, "single",
                                        "euclidean")
        distal_dendrogram = dendrogram(distal_linkage_matrix, labels=fname)

        # fname = names of the files (ivl)
        f_order = proximal_dendrogram['ivl']
        # reorder the matrix by new order of the dendogram
        ordered_proximal_matrix = [[0 for x in range(matrix_size)]
                                   for x in range(matrix_size)]
        ordered_proximal_pval_matrix = [[0 for x in range(matrix_size)]
                                        for x in range(matrix_size)]
        # find the index of ordered item in the original matrix
        # use the index found and get the value from the original matrix and get the value and insert to new matrix
        for i, f_name1 in enumerate(f_order):
            index1 = fname.index(f_name1)
            for j, f_name2 in enumerate(f_order):
                index2 = fname.index(f_name2)

                if proximal_pval_matrix[index1][index2] <= pvalue:
                    ordered_proximal_matrix[i][j] = proximal_matrix[index1][
                        index2]
                else:
                    ordered_proximal_matrix[i][j] = float("-inf")

                ordered_proximal_pval_matrix[i][j] = 'p-value: {:1.5f}'.format(
                    proximal_pval_matrix[index1][index2])

        f_order = distal_dendrogram['ivl']
        ordered_distal_matrix = [[0 for x in range(matrix_size)]
                                 for x in range(matrix_size)]
        ordered_distal_pval_matrix = [[0 for x in range(matrix_size)]
                                      for x in range(matrix_size)]
        for i, f_name1 in enumerate(f_order):
            index1 = fname.index(f_name1)
            for j, f_name2 in enumerate(f_order):
                index2 = fname.index(f_name2)

                if distal_pval_matrix[index1][index2] <= pvalue:
                    ordered_distal_matrix[i][j] = distal_matrix[index1][index2]
                else:
                    ordered_distal_matrix[i][j] = float("-inf")

                ordered_distal_pval_matrix[i][j] = 'p-value: {:1.5f}'.format(
                    distal_pval_matrix[index1][index2])

        p_name = proximal_dendrogram['ivl']
        p_new_ls = p_name
        d_name = distal_dendrogram['ivl']
        d_new_ls = d_name
    else:
        f_order = []
        p_new_ls = []
        d_new_ls = []
        if heatmap == 'Follow proximal':
            proximal_dist_vector = ssd.squareform(proximal_dist_matrix)
            proximal_linkage_matrix = linkage(proximal_dist_vector, "single",
                                              'euclidean')
            proximal_dendrogram = dendrogram(proximal_linkage_matrix,
                                             labels=fname)
            distal_dendrogram = "None"
            f_order = proximal_dendrogram['ivl']
            p_name = proximal_dendrogram['ivl']
            p_new_ls = p_name
            d_new_ls = p_name
        elif heatmap == 'Follow distal':
            distal_dist_vector = ssd.squareform(distal_dist_matrix)
            distal_linkage_matrix = linkage(distal_dist_vector, "single",
                                            "euclidean")
            distal_dendrogram = dendrogram(distal_linkage_matrix, labels=fname)
            proximal_dendrogram = "None"
            f_order = distal_dendrogram['ivl']
            d_name = distal_dendrogram['ivl']
            p_new_ls = d_name
            d_new_ls = d_name
        ordered_proximal_matrix = [[0 for x in range(matrix_size)]
                                   for x in range(matrix_size)]
        ordered_proximal_pval_matrix = [[0 for x in range(matrix_size)]
                                        for x in range(matrix_size)]
        ordered_distal_matrix = [[0 for x in range(matrix_size)]
                                 for x in range(matrix_size)]
        ordered_distal_pval_matrix = [[0 for x in range(matrix_size)]
                                      for x in range(matrix_size)]
        for i, f_name1 in enumerate(f_order):
            index1 = fname.index(f_name1)
            for j, f_name2 in enumerate(f_order):
                index2 = fname.index(f_name2)

                if proximal_pval_matrix[index1][index2] <= pvalue:
                    ordered_proximal_matrix[i][j] = proximal_matrix[index1][
                        index2]
                else:
                    ordered_proximal_matrix[i][j] = float("-inf")

                if distal_pval_matrix[index1][index2] <= pvalue:
                    ordered_distal_matrix[i][j] = distal_matrix[index1][index2]
                else:
                    ordered_distal_matrix[i][j] = float("-inf")

                ordered_proximal_pval_matrix[i][j] = 'p-value: {:1.5f}'.format(
                    proximal_pval_matrix[index1][index2])
                ordered_distal_pval_matrix[i][j] = 'p-value: {:1.5f}'.format(
                    distal_pval_matrix[index1][index2])

    proc_time = time.time() - start_time

    json_data = json.dumps({
        'p_filename':
        p_new_ls,
        'd_filename':
        d_new_ls,
        'matrix_size':
        matrix_size,
        'proximal_matrix':
        ordered_proximal_matrix,
        'proximal_pval_matrix':
        ordered_proximal_pval_matrix,
        'proximal_dendrogram':
        proximal_dendrogram,
        'distal_matrix':
        ordered_distal_matrix,
        'distal_pval_matrix':
        ordered_distal_pval_matrix,
        'distal_dendrogram':
        distal_dendrogram,
        'proxdist_fc_limit': [proximal_fc_limit, distal_fc_limit],
        'proc_time':
        proc_time
    })
    table = Peaks_db.objects.all().values('protein', 'fileID', 'num_peaks',
                                          'cells', 'labs', 'year')
    context = {
        'form': form,
        'table': table,
        'peakfile_names': peakfile_names,
        'json_data': json_data,
        'proximal_dendrogram': proximal_dendrogram,
        'distal_dendrogram': distal_dendrogram
    }

    return render(request, 'tfClassify.html', context)