def test_02_RunEntropyOnUnique(self): output_file = os.path.join(self.output_directory_path, 'entropy.txt') entropy_analysis(self.unique_alignment, output_file=output_file, uniqued=True, verbose=False) self.assertTrue(files_are_the_same(self.expected_result, output_file))
def vis_freq_curve(fasta_file_path, output_file=None, x_limit=20, display=False, freq_from_defline=None, entropy_output_file=None, verbose=False, mini=False, title=None): if freq_from_defline == None: freq_from_defline = lambda x: int( [t.split(':')[1] for t in x.split('|') if t.startswith('freq')][0]) fasta = u.SequenceSource(fasta_file_path) frequency_list = [] while next(fasta): try: frequency_list.append(freq_from_defline(fasta.id)) except: print('frequency info can not be read from defline.') sys.exit() frequency_list_to_plot = frequency_list[0:x_limit] + [0] * (x_limit - len(frequency_list) \ if len(frequency_list) < x_limit else 0) entropy_values = entropy_analysis(fasta_file_path, output_file=entropy_output_file, verbose=verbose, uniqued=True) if mini: plt.figure(figsize=(2, 2)) plt.subplots_adjust(left=0.01, bottom=0, top=1, right=1) plt.subplot(1, 1, 1) plt.grid(False) plt.xticks([]) plt.yticks([]) ax = plt.gca() plt.setp(ax, frame_on=False) y_maximum = 1.1 x_maximum = len(entropy_values) ind = np.arange(len(entropy_values)) text_x, text_y = x_maximum / 2, y_maximum / 2 plt.text(text_x, text_y, title if title else 'title', horizontalalignment='center', verticalalignment='center', backgroundcolor='white', fontsize=40, color='red') plt.ylim(ymax=y_maximum) plt.xlim(xmax=x_maximum) plt.bar(ind, entropy_values, color='black', lw=0.5) else: plt.figure(figsize=(24, 10)) plt.subplots_adjust(left=0.05, bottom=0.15, top=0.95, right=0.99) plt.subplot(2, 1, 1) plt.grid(True) plt.rcParams.update({'axes.linewidth': 0.9}) plt.rc('grid', color='0.50', linestyle='-', linewidth=0.1) plt.xticks(list(range(0, len(entropy_values), 5)), rotation=90, size='x-small') plt.plot(frequency_list_to_plot, lw=3, c='black') plt.xlabel('Order in the File', size='x-large') plt.ylabel('Frequency of the Unique Sequence', size='x-large') if title: plt.title(title) else: plt.title('Frequency Distribution of Unique Sequences in %s' % os.path.basename(fasta_file_path)) plt.ylim(ymin=-max(frequency_list_to_plot) * 0.05, ymax=max(frequency_list_to_plot) * 1.05) plt.xlim(xmin=-0.05, xmax=x_limit - 1) plt.xticks(list(range(0, x_limit)), [str(i) for i in range(1, x_limit + 1)], rotation=90, size='small') plt.subplot(2, 1, 2) plt.subplots_adjust(left=0.05, bottom=0.1, top=0.95, right=0.99) try: plt.grid(axis='y') except: plt.grid(True) plt.rcParams.update({'axes.linewidth': 0.9}) plt.rc('grid', color='0.40', linestyle='-', linewidth=0.1) y_maximum = max(entropy_values) * 1.1 y_maximum = 1.1 if y_maximum < 1 else y_maximum ind = np.arange(len(entropy_values)) plt.bar(ind, entropy_values, color='black', lw=0.5) plt.xlim([0, len(entropy_values)]) plt.ylim([0, y_maximum]) plt.xticks(list(range(0, len(entropy_values), 5)), rotation=90, size='x-small') plt.xlabel('Position in the Alignment', size='x-large') plt.ylabel('Shannon Entropy', size='x-large') if output_file: plt.savefig(output_file) if display: plt.show() plt.clf() plt.close('all')
def test_01_RunWeightedEntropy(self): output_file = os.path.join(self.output_directory_path, 'entropy.txt') QD = get_quals_dict(self.qual_scores_file, self.alignment, output_file_path = os.path.join(self.output_directory_path, 'QUALS_DICT'), verbose = False) QSD = get_qual_stats_dict(QD, output_file_path = os.path.join(self.output_directory_path, 'QUAL_STATS_DICT'), verbose = False) entropy_analysis(self.alignment, output_file = output_file, verbose = False, weighted = True, qual_stats_dict = QSD) self.assertTrue(files_are_the_same(self.expected_result, output_file))
def vis_freq_curve(fasta_file_path, output_file = None, x_limit = 20, display = False, freq_from_defline = None, entropy_output_file = None, verbose = False, mini = False, title = None): if freq_from_defline == None: freq_from_defline = lambda x: int([t.split(':')[1] for t in x.split('|') if t.startswith('freq')][0]) fasta = u.SequenceSource(fasta_file_path) frequency_list = [] while fasta.next(): try: frequency_list.append(freq_from_defline(fasta.id)) except: print 'frequency info can not be read from defline.' sys.exit() frequency_list_to_plot = frequency_list[0:x_limit] + [0] * (x_limit - len(frequency_list) \ if len(frequency_list) < x_limit else 0) entropy_values = entropy_analysis(fasta_file_path, output_file = entropy_output_file, verbose = verbose, uniqued = True) if mini: plt.figure(figsize=(2, 2)) plt.subplots_adjust(left=0.01, bottom = 0, top = 1, right = 1) plt.subplot(1, 1, 1) plt.grid(False) plt.xticks([]) plt.yticks([]) ax=plt.gca() plt.setp(ax, frame_on=False) y_maximum = 1.1 x_maximum = len(entropy_values) ind = np.arange(len(entropy_values)) text_x, text_y = x_maximum / 2, y_maximum / 2 plt.text(text_x, text_y, title if title else 'title', horizontalalignment='center', verticalalignment='center', backgroundcolor='white', fontsize=40, color='red') plt.ylim(ymax = y_maximum) plt.xlim(xmax = x_maximum) plt.bar(ind, entropy_values, color = 'black', lw = 0.5) else: plt.figure(figsize=(24, 10)) plt.subplots_adjust(left=0.05, bottom = 0.15, top = 0.95, right = 0.99) plt.subplot(2, 1, 1) plt.grid(True) plt.rcParams.update({'axes.linewidth' : 0.9}) plt.rc('grid', color='0.50', linestyle='-', linewidth=0.1) plt.xticks( range(0, len(entropy_values), 5), rotation=90, size = 'x-small') plt.plot(frequency_list_to_plot, lw = 3, c = 'black') plt.xlabel('Order in the File', size = 'x-large') plt.ylabel('Frequency of the Unique Sequence', size = 'x-large') if title: plt.title(title) else: plt.title('Frequency Distribution of Unique Sequences in %s' % os.path.basename(fasta_file_path)) plt.ylim(ymin = -max(frequency_list_to_plot) * 0.05, ymax = max(frequency_list_to_plot) * 1.05) plt.xlim(xmin = -0.05, xmax = x_limit - 1) plt.xticks(range(0, x_limit), [str(i) for i in range(1, x_limit + 1)], rotation=90, size='small') plt.subplot(2, 1, 2) plt.subplots_adjust(left=0.05, bottom = 0.1, top = 0.95, right = 0.99) try: plt.grid(axis='y') except: plt.grid(True) plt.rcParams.update({'axes.linewidth' : 0.9}) plt.rc('grid', color='0.40', linestyle='-', linewidth=0.1) y_maximum = max(entropy_values) * 1.1 y_maximum = 1.1 if y_maximum < 1 else y_maximum ind = np.arange(len(entropy_values)) plt.bar(ind, entropy_values, color = 'black', lw = 0.5) plt.xlim([0, len(entropy_values)]) plt.ylim([0, y_maximum]) plt.xticks( range(0, len(entropy_values), 5), rotation=90, size = 'x-small') plt.xlabel('Position in the Alignment', size = 'x-large') plt.ylabel('Shannon Entropy', size = 'x-large') if output_file: plt.savefig(output_file) if display: plt.show() plt.clf() plt.close('all')
def test_02_RunEntropyOnUnique(self): output_file = os.path.join(self.output_directory_path, 'entropy.txt') entropy_analysis(self.unique_alignment, output_file = output_file, uniqued = True, verbose = False) self.assertTrue(files_are_the_same(self.expected_result, output_file))