示例#1
0
def run_experiment(shoals=30,
                   fishes=2,
                   replicas_top=0,
                   replicas_bottom=0,
                   follow_refugia_force=0.2):
    frequencies = headless_simulations(
        shoals,
        fishes=fishes,
        replicas_top=replicas_top,
        replicas_bottom=replicas_bottom,
        follow_refugia_force=follow_refugia_force)

    fig, ax = plt.subplots(1, 1, constrained_layout=True)
    plot_histogram(ax, frequencies,
                   f'{replicas_bottom}: {replicas_top}, group size {fishes}')
    fig.show()
    return frequencies
示例#2
0
def show_hist_colors(x, colors):
  """
  histogram
  """

  for i, c in enumerate(colors):

    fig = plot_histogram(x, bins=None, color=c, y_log_scale=False, x_log_scale=False, context='None', title='', plot_path=None, name='hist', show_plot=False)
    
    # positioning
    if i >= 3: i, j = i%3, 600
    else: i, j = i, 0
    fig.canvas.manager.window.setGeometry(i*600, j, 600, 500)
示例#3
0
  def analyze_damaged_files(self):
    """
    analyze damaged files
    """

    # histogram
    if len(self.file_energy_list): plot_histogram(self.file_energy_list, bins=np.logspace(np.log10(0.0001),np.log10(10000), 50), y_log_scale=True, x_log_scale=True, context='None', title='Energy', plot_path=self.plot_paths['z_score'], name='energy_hist_n-{}'.format(self.dataset_cfg['n_examples']))
    if len(self.file_num_sample_list): plot_histogram(self.file_num_sample_list, bins=20, y_log_scale=True, context='None', title='Num Samples', plot_path=self.plot_paths['z_score'], name='num_sample_hist_n-{}'.format(self.dataset_cfg['n_examples']))
    if len(self.damaged_score_list): plot_histogram(self.damaged_score_list, bins=50, y_log_scale=True, context='None', title='Damaged Score', plot_path=self.plot_paths['z_score'], name='z_score_hist_n-{}'.format(self.dataset_cfg['n_examples']))

    print("\n--Analyze damaged files of ", self.__class__.__name__)
    print("too short files num: {}".format(len(self.short_file_list)))
    print("too weak files num: {}".format(len(self.weak_file_list)))
    print("damaged files num: {}".format(len(self.damaged_file_list)))

    # all audio files speakers
    if self.__class__.__name__ == 'SpeechCommandsDataset':

      # extract speaker info
      all_speakers_files = [re.sub(r'(\./)|(\w+/)|(\w+--)|(_nohash_[0-9]+.wav)', '', i) for i in list(np.concatenate(np.concatenate(np.array(self.set_audio_files, dtype='object'))))]
      
      # get all unique speakers
      all_speakers = np.unique(all_speakers_files)

      # print info
      print("number of audio files: ", len(all_speakers_files)), print("speakers: ", all_speakers), print("number of speakers: ", len(all_speakers))

      # save damaged files
      for wav, score in self.damaged_file_list: copyfile(wav, self.plot_paths['damaged_files'] + wav.split('/')[-1])
    
      # prints to files
      with open(self.info_file_damaged, 'w') as f: [print(i, file=f) for i in self.damaged_file_list]
      with open(self.info_file_short, 'w') as f: [print(i, file=f) for i in self.short_file_list]
      with open(self.info_file_weak, 'w') as f: [print(i, file=f) for i in self.weak_file_list]
      with open(self.info_file_strong, 'w') as f: [print(i, file=f) for i in self.strong_file_list]

    # broken file info
    plot_damaged_file_score(self.damaged_score_list, plot_path=self.plot_paths['z_score'], name='z_score_n-{}'.format(self.dataset_cfg['n_examples']), enable_plot=True)
示例#4
0
import functions as f
import os
import constant
import joblib
import plots

# f.merge_results()

# df = f.read_dataset(constant.CURR_PATH)
# pruned_df = f.prune_dataset(df)
# f.save_pruned(pruned_df, header=True)

# f.train_model()

plots.plot_vnfs_inst()
plots.plot_histogram(constant.PLOT_PATH)

# f.count_bins(constant.PRUNED_PATH)
# rf = joblib.load('rf_mod-e.sav')
# print(rf.best_params_)
示例#5
0
def get_taus(alignment_set, bas_reader, reference_information,
             output_directory, movie_length, movie_limited_threshold,
             first_adapter_template_position, second_adapter_template_position,
             template_min_start, template_max_start, coarse_grain_binsize,
             subsample_to, is_legacy):
    """Perform tau analysis, by reference
       Generate the following plots:
            1) template start histogram
            2) start time histogram
            3) template-start position by start-time
            4) survival by template-position
            5) termination rate by template-position
            6) termination rates w/ 1st, 2nd, and RC-circle tau fits
       Save a summary CSV storing tau values, by reference
    """
    framerate = alignment_set.resourceReaders()[0].readGroupTable.FrameRate[0]

    # set up results file by writing header
    results_file = os.path.join(output_directory, 'results_summary.csv')
    with open(results_file, 'wb') as file:
        fieldnames = [
            'referenceName', 'nTotalAlignments', 'nConsideredAlignments',
            'nCohortAlignments', 'percentMovieLimited', 'tau1', 'tau2', 'tauRC'
        ]
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()

    for ref in reference_information:
        logging.info(
            ('Processing alignments from reference ' + str(ref['FullName'])))
        # row in recarray w/ 'ID', 'FullName', and 'SMRTBellSize'
        if is_legacy:
            aln_to_ref_indices = np.flatnonzero(
                alignment_set.index['RefGroupID'] == int(ref['ID']))
        else:
            aln_to_ref_indices = np.flatnonzero(
                alignment_set.index['tId'] == int(ref['ID']))

        if len(aln_to_ref_indices) > 0:  # check that there are alignments
            max_start_time = 5
            (template_position_info, pol_rates) = get_template_positions(
                alignment_set,
                aln_to_ref_indices, bas_reader, ref, movie_length,
                float(movie_limited_threshold), template_min_start,
                template_max_start, max_start_time, framerate,
                coarse_grain_binsize, subsample_to, is_legacy)
            pickle.dump(
                template_position_info,
                open((output_directory + 'info_' + str(ref['FullName']) +
                      '.pkl'), 'wb'))
            pickle.dump(
                pol_rates,
                open((output_directory + 'rates_' + str(ref['FullName']) +
                      '.pkl'), 'wb'))
            # remove alignments that started late
            clean_termination_info = clean_template_positions(
                template_position_info, ref, template_min_start,
                template_max_start, max_start_time)
            # generate template start histogram
            plot_descriptors = ['templateStartHistogram']
            pl.plot_histogram(
                data=template_position_info['tStart'],
                output_directory=output_directory,
                title=ref['FullName'],
                xlabel='Template start (bp)',
                ylabel='Counts',
                bins=[0, np.max(template_position_info['tStart']),
                      10],  # bases binwidth
                descriptors=plot_descriptors)
            plot_descriptors = ['templateStartJustifiedHistogram']
            pl.plot_histogram(
                data=template_position_info['tStartJustified'],
                output_directory=output_directory,
                title=ref['FullName'],
                xlabel='Template start (bp)',
                ylabel='Counts',
                bins=[
                    0,
                    np.max(template_position_info['tStartJustified']), 10
                ],  # bases binwidth
                descriptors=plot_descriptors)

            if movie_length:
                # generate start time histogram
                plot_descriptors = ['startTimeHistogram']
                pl.plot_histogram(
                    data=template_position_info['startTime'],
                    output_directory=output_directory,
                    title=ref['FullName'],
                    xlabel='Start time (min)',
                    ylabel='Counts',
                    bins=[0, movie_length, 1],  # minute binwidth
                    descriptors=plot_descriptors)
                # generate tStart vs. start time plots, if time info exists
                plot_descriptors = ['tStart_vs_startTime_unjustified']
                pl.plot_line_plot(x=template_position_info['startTime'],
                                  y=template_position_info['tStart'],
                                  output_directory=output_directory,
                                  title=ref['FullName'],
                                  datapoint_mode='markers',
                                  xlabel='Start time (min)',
                                  ylabel='Template start position (bp)',
                                  yaxis_range=None,
                                  descriptors=plot_descriptors)
                plot_descriptors = ['tStart_vs_startTime_justified']
                pl.plot_line_plot(x=template_position_info['startTime'],
                                  y=template_position_info['tStartJustified'],
                                  output_directory=output_directory,
                                  title=ref['FullName'],
                                  datapoint_mode='markers',
                                  xlabel='Start time (min)',
                                  ylabel='Template start position (bp)',
                                  yaxis_range=None,
                                  descriptors=plot_descriptors)

                if not is_legacy and pol_rates:
                    # plot pol rates
                    xyz = []
                    for tpos in pol_rates:
                        rate, count = pol_rates[tpos]
                        xyz.append((tpos, rate, count))
                    xyz.sort(key=lambda tup: tup[0])  # sort for line plot
                    tpos, rate, count = zip(*xyz)
                    plot_descriptors = [
                        'polrate_by_template_position', 'tStartRange',
                        str(template_min_start),
                        str(template_max_start)
                    ]
                    pl.plot_line_plot(x=tpos,
                                      y=rate,
                                      output_directory=output_directory,
                                      title=ref['FullName'],
                                      datapoint_mode='lines',
                                      xlabel='Template position (bp)',
                                      ylabel='Mean pol rate (bases/sec)',
                                      yaxis_range=[0, 4],
                                      descriptors=plot_descriptors)
                    plot_descriptors = [
                        'numalns_by_template_position', 'tStartRange',
                        str(template_min_start),
                        str(template_max_start)
                    ]
                    pl.plot_line_plot(x=tpos,
                                      y=count,
                                      output_directory=output_directory,
                                      title=ref['FullName'],
                                      datapoint_mode='lines',
                                      xlabel='Template position (bp)',
                                      ylabel='Number of alignments',
                                      yaxis_range=[0,
                                                   len(aln_to_ref_indices)],
                                      descriptors=plot_descriptors)

            minimum_alignment_number = 500
            if len(clean_termination_info) > minimum_alignment_number:
                # produce survival curve
                (template_start_position, template_end_position,
                 survival) = calculate_survival(clean_termination_info)
                # plot survival curve
                plot_descriptors = [
                    'survival_by_template_position', 'tStartRange',
                    str(template_min_start),
                    str(template_max_start)
                ]
                pl.plot_line_plot(x=template_end_position,
                                  y=survival,
                                  output_directory=output_directory,
                                  title=ref['FullName'],
                                  datapoint_mode='lines',
                                  xlabel='Template position (bp)',
                                  ylabel='Fraction survivors left',
                                  yaxis_range=[0, 1],
                                  descriptors=plot_descriptors)
                # produce termination rates
                (template_position,
                 termination_rate) = calculate_termination_rate(
                     template_end_position, survival, coarse_grain_binsize)
                # plot termination rates
                plot_descriptors = [
                    'termination_rates', 'tStartRange',
                    str(template_min_start),
                    str(template_max_start)
                ]
                pl.plot_line_plot(x=template_position,
                                  y=termination_rate,
                                  output_directory=output_directory,
                                  title=ref['FullName'],
                                  datapoint_mode='lines',
                                  xlabel='Template position (bp)',
                                  ylabel='Termination rate (per bp)',
                                  yaxis_range=None,
                                  descriptors=plot_descriptors)
                # plot taus
                plot_descriptors = [
                    'termination_taus', 'tStartRange',
                    str(template_min_start),
                    str(template_max_start)
                ]
                pl.plot_line_plot(x=template_position[termination_rate != 0],
                                  y=np.divide(
                                      1,
                                      termination_rate[termination_rate != 0],
                                      dtype=float),
                                  output_directory=output_directory,
                                  title=ref['FullName'],
                                  datapoint_mode='lines',
                                  xlabel='Template position (bp)',
                                  ylabel='Tau (bp)',
                                  yaxis_range=None,
                                  descriptors=plot_descriptors)
                # fit termination rates to get taus
                if first_adapter_template_position is None:
                    first_adapter_template_position = np.divide(
                        ref['SMRTBellSize'], 2, dtype=int)
                if second_adapter_template_position is None:
                    second_adapter_template_position = ref['SMRTBellSize']
                tau_1, tau_2, tau_rc = fit_taus(
                    template_position, termination_rate,
                    int(first_adapter_template_position),
                    int(second_adapter_template_position), movie_length)
                logging.info(('\n\nTau1 is ' + str(tau_1) + ' bases\n'
                              'Tau2 is ' + str(tau_2) + ' bases\n'
                              'TauRC is ' + str(tau_rc) + ' bases\n'))
                # plot termination rate with fits
                plot_descriptors = ['fitted_termination_rates']
                pl.plot_fitted_line_plot(x=template_position,
                                         y=termination_rate,
                                         t_1=[
                                             np.min(template_position),
                                             first_adapter_template_position,
                                             np.divide(1., tau_1, dtype=float)
                                         ],
                                         t_2=[
                                             first_adapter_template_position,
                                             second_adapter_template_position,
                                             np.divide(1., tau_2, dtype=float)
                                         ],
                                         t_rc=[
                                             second_adapter_template_position,
                                             np.max(template_position),
                                             np.divide(1., tau_rc, dtype=float)
                                         ],
                                         output_directory=output_directory,
                                         title=ref['FullName'],
                                         xlabel='Template position (bp)',
                                         ylabel='Termination rate (per bp)',
                                         descriptors=plot_descriptors)
                # plot termination taus with fits
                plot_descriptors = ['fitted_termination_taus']
                pl.plot_fitted_line_plot(
                    x=template_position[termination_rate != 0],
                    y=np.divide(1.,
                                termination_rate[termination_rate != 0],
                                dtype=float),
                    t_1=[
                        np.min(template_position),
                        first_adapter_template_position, tau_1
                    ],
                    t_2=[
                        first_adapter_template_position,
                        second_adapter_template_position, tau_2
                    ],
                    t_rc=[
                        second_adapter_template_position,
                        np.max(template_position), tau_rc
                    ],
                    output_directory=output_directory,
                    title=ref['FullName'],
                    xlabel='Template position (bp)',
                    ylabel='Termination tau (bp)',
                    descriptors=plot_descriptors)
                taus = (tau_1, tau_2, tau_rc)
                save_taus(alignment_set, aln_to_ref_indices,
                          template_position_info, clean_termination_info, ref,
                          taus, results_file, fieldnames)
            else:
                logging.info(('Less than ' + str(minimum_alignment_number) +
                              ' alignments exist against reference ' +
                              str(ref['FullName']) + '. Analysis skipped.'))
        else:
            logging.info(('No alignments produced on reference ' +
                          str(ref['FullName']) + '.'))

    return None
示例#6
0
def main():

    global show_hist
    global show_pdfs
    global show_post_plots
    global threshold

    x_seabass = ctrl.create_sample_population(30, 3, 400)
    x_salmon = ctrl.create_sample_population(32, 2, 600)

    # Create the dataset objects
    sea_bass = DataObject(x_seabass)
    salmon = DataObject(x_salmon)
    total = DataObject(ctrl.concatenate_data_sets(x_seabass, x_salmon))

    # Check p-sums
    print(
        "----------------------------------------------------------------------------------------------------------"
    )
    print("Sums probabilities")
    sum_sea_bass = np.sum(sea_bass.get_probabilities()[0:] *
                          sea_bass.get_bin_width())
    sum_salmon = np.sum(salmon.get_probabilities()[0:] *
                        salmon.get_bin_width())
    print("Seabass: %f" % sum_sea_bass)
    print("Salmon: %f" % sum_salmon)
    print(
        "----------------------------------------------------------------------------------------------------------"
    )

    # ToDo: calculate decision vector
    # ToDo: calculate Accuracy
    # ToDo: Test samples to trained Model

    print(
        "----------------------------------------------------------------------------------------------------------"
    )
    print("Probabilities")
    p_sea_bass = sea_bass.get_probabilities()
    p_salmon = salmon.get_probabilities()
    print("Seabass: %s" % str(p_sea_bass))
    print("Salmon: %s" % str(p_salmon))
    print(
        "----------------------------------------------------------------------------------------------------------"
    )

    # Posteriors informative
    print(
        "----------------------------------------------------------------------------------------------------------"
    )
    print("Informative posteriors")
    p_sea_bass = sea_bass.get_probabilities()
    p_salmon = salmon.get_probabilities()
    post_sea_bass = ctrl.calc_posterior(p_sea_bass, p_salmon,
                                        sea_bass.get_data_set(),
                                        salmon.get_data_set())
    post_salmon = ctrl.calc_posterior(p_salmon, p_sea_bass,
                                      salmon.get_data_set(),
                                      sea_bass.get_data_set())
    print("Seabass: %s" % str(post_sea_bass))
    print("Salmon: %s" % str(post_salmon))
    print(
        "----------------------------------------------------------------------------------------------------------"
    )

    # Non informative posteriors
    print(
        "----------------------------------------------------------------------------------------------------------"
    )
    print("Non-Informative posteriors")
    post_sea_bass_non_inf = ctrl.calc_posterior_non_inf(p_sea_bass, p_salmon)
    post_salmon_non_inf = ctrl.calc_posterior_non_inf(p_salmon, p_sea_bass)
    print("Seabass: %s" % str(post_sea_bass_non_inf))
    print("Salmon: %s" % str(post_salmon_non_inf))
    print(
        "----------------------------------------------------------------------------------------------------------"
    )

    # Create Groundtrough Vector

    ground_truth_vec = ctrl.create_ground_truth_vector(x_salmon, x_seabass)

    # Train Model
    print(
        "----------------------------------------------------------------------------------------------------------"
    )
    print("Training of Model")
    result_training = ctrl.train_model(post_salmon_non_inf,
                                       total.get_data_set(),
                                       salmon.get_bin_width(),
                                       salmon.get_bin_center(),
                                       ground_truth_vec)

    print("Accuracy: %s" % str(result_training))
    print(
        "----------------------------------------------------------------------------------------------------------"
    )

    # Plots
    if show_post_plots:
        bin_center_total = total.get_bin_center()
        plots.plot_posteriors(post_sea_bass, post_salmon, bin_center_total,
                              "Seabass", "Salmon")
        plots.plot_posteriors_non_inf(post_sea_bass_non_inf,
                                      post_salmon_non_inf, bin_center_total,
                                      "Seabass", "Salmon")

    if show_pdfs:
        plots.plot_pdf(total.get_bin_center(), sea_bass.get_probabilities(),
                       salmon.get_probabilities(), "Seabass", "Salmon")

    if show_hist:
        plots.plot_histogram(sea_bass.get_frequencies(), sea_bass.get_bins())
        plots.plot_histogram(salmon.get_frequencies(), salmon.get_bins())
        plots.plot_histogram(total.get_frequencies(), total.get_bins())