def output_data(data): # Print the header first filenames = [filename.split("/")[-1] for filename, _ in data] print_tab_delimited(["Chromosome", "Left", "Right", "Name", "Strand"] + filenames) for _, tsr in data[0][1]: print_tab_delimited(list(tsr) + [file_data[tsr] for _, file_data in data])
def output_pausing_distances(pausing_distances, sequencing_files): # Print the headers first print_tab_delimited( ["Transcript Length"] + [seq_filename.split("/")[-1] for seq_filename in sequencing_files]) for i in range(len(pausing_distances[0])): print_tab_delimited([i] + [x[i] for x in pausing_distances])
def output_data(combined_dict, sequencing_files, upstream_distance, interval_size): # Now the data is in the combined_dict, we need to reduce it back down to positions again # First print out the headers print_tab_delimited( ["Position"] + [seq_file.split("/")[-1] for seq_file in sequencing_files]) real_position = upstream_distance * -1 # Now output all of the data for position in combined_dict: print_tab_delimited([real_position] + combined_dict[position]) real_position += interval_size
def output_data(avgs_dict, region_length): # Write the header print_tab_delimited(["Position"] + list(avgs_dict.keys())) position = region_length / 2 * -1 # We go through each position and output the averages for i in range(region_length): if position == 0: position += 1 print_tab_delimited([position] + [avgs_dict[nt][i] for nt in avgs_dict.keys()]) position += 1
def run_sequence_searches(regions_file, searching_sequences, region_length): # Has keys of gene names and values of dictionary which has keys of "Sequence" and "Region" and "Motifs". master_dict = {} # 1. Read in the contents of the bed file with open(regions_file) as file: bed_lines = file.readlines() fasta_file = run_getfasta(regions_file) fasta_sequences = read_fasta(fasta_file) remove_files(fasta_file) # Fill the dictionary for i, line in enumerate(bed_lines): chromosome, left, right, gene_name, _, strand = line.split() master_dict[gene_name] = { "Sequence": "", "Region": line.split(), "Motifs": {} } master_dict[gene_name]["Sequence"] = fasta_sequences[i] for search in searching_sequences: sequence, _, _ = search master_dict[gene_name]["Motifs"][sequence] = False for gene_name in master_dict: for search in searching_sequences: find_sequences(master_dict[gene_name], search, region_length) # Output the results print_tab_delimited( ["Chromosome", "Left", "Right", "Gene", "Score", "Strand"] + [search[0] for search in searching_sequences]) for gene_name in master_dict: print_tab_delimited(master_dict[gene_name]["Region"] + [ has_motif for _, has_motif in master_dict[gene_name]["Motifs"].items() ])
def output_data(pausing_distances): output_dict = {} for tup in pausing_distances: ret_dict, seq_filename = tup for gene_name in ret_dict: if gene_name not in output_dict: output_dict[gene_name] = {} output_dict[gene_name][seq_filename] = ret_dict[gene_name] for i, gene_name in enumerate(output_dict): if i == 0: # We print the headers print_tab_delimited( ["Gene"] + [x.split("/")[-1] for x in output_dict[gene_name].keys()]) print_tab_delimited([gene_name] + [ output_dict[gene_name][sequencing_filename] for sequencing_filename in output_dict[gene_name] ])
def output_metaplot_data(averages, region_length, prime_name): """ :param averages: averages list from the metaplots programs :type averages: list :param region_length: length of the region :type region_length: int :param prime_name: either "five prime" or "three prime" :type prime_name: str :return: """ avgs_data, files = [x for x in zip(*averages)] # Merge all of the lists together merged_list = [list(chain.from_iterable(x)) for x in zip(*avgs_data)] # 5. Put the data into a file header = ["Position"] # Write the header first for file in files: if prime_name: # Include a space before print the prime name header.append( file.split("/")[-1] + " " + prime_name + " sense strand") header.append( file.split("/")[-1] + " " + prime_name + " divergent strand") else: header.append(file.split("/")[-1] + " sense strand") header.append(file.split("/")[-1] + " divergent strand") print_tab_delimited(header) for i, base_list in enumerate(merged_list): position = i - (region_length / 2) if position >= 0: position += 1 print_tab_delimited([position] + base_list)
def output_data(expanded_regions): for region in expanded_regions: print_tab_delimited(region)