def plot_spectrogram_from_magnitudes(self, experiment_id): if isinstance(experiment_id, str): # Got a signal_treatment as identifier: experiment = self.find_experiment(experiment_id) if experiment is None: raise ValueError( f"Could not find experiment given '{experiment_id}'") elif isinstance(experiment_id, Experiment): experiment = experiment_id else: raise TypeError( f"Value '{experiment_id}' must be string or Experiment instance" ) #******** spectrogram_info = DSPUtils.get_spectrogram__from_treatment(-40, 5) #****** Must find framerate: #plotter = Plotter(experiment.framerate) plotter = Plotter() #****** plotter.plot_spectrogram_from_magnitudes( spectrogram_info['freq_labels'], spectrogram_info['time_labels'], spectrogram_info['spectrogram']) print(experiment)
def perform_training(sc: SparkContext, params_dict: dict): normal_ekg_data_path = None if 'normal_ekg_data_path' not in params_dict else params_dict[ 'normal_ekg_data_path'] min_num_of_clusters = 5 if 'min_num_of_clusters' not in params_dict else int(params_dict['min_num_of_clusters']) max_num_of_clusters = 20 if 'max_num_of_clusters' not in params_dict else int( params_dict['max_num_of_clusters']) boundary_ratio = 0.8 if 'boundary_ratio' not in params_dict else int(params_dict['boundary_ratio']) ekg_rdd_data = sc.textFile(normal_ekg_data_path).map( lambda line: np.array([float(val) for val in line.split(',')])) # ekg_rdd_data.foreach(Plotter.plot_signal_window) k_range = range(min_num_of_clusters, max_num_of_clusters, 1) prev_cost = float(np.inf) final_km = KMeansModel(ekg_rdd_data.takeSample(False, 1)) cost_ratios = [] found_best = False for k in k_range: km = KMeans.train(ekg_rdd_data, k) # cost equals to sum of squared distances of samples to the nearest cluster centre cost = km.computeCost(ekg_rdd_data) ratio = cost / prev_cost prev_cost = cost cost_ratios.append(ratio) if (ratio > boundary_ratio) & (not found_best): final_km = km found_best = True Plotter.plot_elbow(cost_ratios, k_range) return final_km
def main(args: argparse.Namespace): """ Plot the given two columns of the csv at the given path :param args: The parsed command line arguments """ input_csv_path = Path(args.input_csv) # Assert input csv path exists assert input_csv_path.exists(), f"Input csv path does not exist" csv_df = pd.read_csv(input_csv_path) plotter = Plotter(csv_df) plotter.plot(args.col1_name, args.col2_name, args.limit)
def plot_data(Q: tuple, policy: tuple) -> None: """Creates a 3D visualisation for the Q-value function and a 2D visualisation of the policy values.""" # Format data Q_fv = POLICY.Q_to_plot(Q[0]) # Set plotters fv_plotter = Plotter(Q_fv) text = FigureText("Player's Current Sum", "Dealer's Showing Card", "State Value") # Plot blackjack values (prediction) fv_plotter.plot_blackjack_values(text=text, x_range=np.arange(11, 22), y_range=np.arange(1, 11), figsize=(10, 10))
def run_ekg_consumer(config_dict: dict): spark_home = 'C:/spark-2.3.3-bin-hadoop2.7' if 'spark_home' not in config_dict else config_dict[ 'spark_home'] findspark.init(spark_home) sc = SparkContext(appName='SparkEKGConsumer') logging_file = None if 'logging_file' not in config_dict else config_dict[ 'logging_file'] logger = initialize_logger(logging_file) topic = 'ekg-stream' if 'topic' not in config_dict else config_dict[ 'topic'] clustering_mode = None if 'clustering_mode' not in config_dict else config_dict[ 'clustering_mode'] normal_ekg_data_file = None if 'normal_ekg_data_file' not in config_dict else config_dict[ 'normal_ekg_data_file'] normal_ekg_file_path = os.getcwd() + normal_ekg_data_file min_num_of_clusters = 5 if 'min_num_of_clusters' not in config_dict else config_dict[ 'min_num_of_clusters'] max_num_of_clusters = 20 if 'max_num_of_clusters' not in config_dict else config_dict[ 'max_num_of_clusters'] batch_duration = 1 if 'batch_duration' not in config_dict else int( config_dict['batch_duration']) training_duration = 20 if 'training_duration' not in config_dict else int( config_dict['training_duration']) clustering_params_dict = dict() # basic clusterer params clustering_params_dict['normal_ekg_data_path'] = normal_ekg_file_path clustering_params_dict['min_num_of_clusters'] = min_num_of_clusters clustering_params_dict['max_num_of_clusters'] = max_num_of_clusters # streaming clusterer params clustering_params_dict['batch_duration'] = batch_duration clustering_params_dict['training_duration'] = training_duration clusterer = ClustererFactory.get_clusterer(clustering_mode) model = clusterer.perform_training(sc, clustering_params_dict) clusters = model.clusterCenters Plotter.plot_cluster_centers(clusters) logger.info('Number of clusters in a model: ' + str(len(clusters))) for index, center in enumerate(clusters): logger.info('Cluster center ' + str(index) + ' : \n' + str(center) + '\n') anomaly_detector = AnomalyDetector(model) # Spark DStream AnomalyDetector.perform_anomality_check_dstream( anomaly_detector, sc, topic)
def check_anomality(self, signal_window): closest_center_index = self.model.predict(signal_window) closest_center = self.model.centers[closest_center_index] diff = distance.euclidean(closest_center, signal_window) # correlation_coeff = distance.correlation(closest_center, window) # diff = closest_center - window # mean = scipy.mean(diff) outcome = 'Euclidean distance between closest cluster center and signal: ' + str(diff) + '\n' # outcome = 'Correlation distance: ' + str(correlation_coeff) + '\n' if diff > 2: outcome += 'ANOMALY DETECTED!\n' file_path = Plotter.plot_distances(closest_center, signal_window, diff, is_anomaly=True) else: outcome += 'Heartbeat is in normal range \n' file_path = Plotter.plot_distances(closest_center, signal_window, diff, is_anomaly=False) outcome += 'Signal saved to: ' + file_path return outcome
import pickle from cube.model import Cube from mcts.solver import Solver from performance.effectiveness import generate_random_cube from plotting.plotter import Plotter if __name__ == '__main__': with open('./nets/trained_net500.pkl', 'rb') as input: net = pickle.load(input) solver = Solver(net) cube = generate_random_cube(iterations=6) moves = solver.solve(cube) sequence = [Cube(cube)] + [Cube(cube.change_by(move)) for move in moves] # Plotter().save_sequence(sequence, 'demo.gif') Plotter().plot_sequence(sequence)
def _ensure_prerequisites(self, infiles, actions, framerate, threshold_db, low_freq, high_freq, nfft, outdir ): # Prerequisites: if outdir is not None and not os.path.exists(outdir): os.makedirs(outdir) if 'labelmask' in actions: # Need true framerate and spectrogram bin size # to compute time ranges in spectrogram: if nfft is None: self.log.warn(f"Assuming default time bin nfft of {self.NFFT}!\n" "If this is wrong, label allignment will be wrong") if framerate is None: self.log.warn(f"Assuming default framerate of {self.DEFAULT_FRAMERATE}!\n" "If this is wrong, label allignment will be wrong") # A .txt file must either be given in the list of infiles, # or must be in the directory of one of the infiles: if not any(Path(filename).parent.joinpath(Path(filename).stem + '.txt') for filename in infiles) and \ not any(filename.endswith('.txt') for filename in infiles): self.log.err("For creating a label mask, must have a Raven selection file") return False if 'spectro' in actions or 'melspectro' in actions: if not any(filename.endswith('.wav') for filename in infiles): self.log.err("For creating a spectrogram, a .wav file must be provided") return False if framerate is not None: self.log.warn(f"Framerate was provided, but will be ignore: using framerate from .wav file.") if threshold_db is None or low_freq is None or high_freq is None: self.log.err("This module always does bandwidth prefilter, and noise gating\n" "In future turning those off may be added. For now,\n" "set low_freq to 0 and high_freq to something high\n", "and set threshold_db to something like -100 to achieve" "the same goal" ) return False if 'plot' in actions or 'plotexcerpts' in actions or 'plothits' in actions: # We either need a .pickle file that must be # a spectrogram, or we need a .wav file that will # be turned into a spectrogram to be plotted if not any(filename.endswith('.pickle') or filename.endswith('.wav') for filename in infiles): self.log.err("To plot something, there must be either a .pickle spectrogram file\n" "or a .wav file" ) return False self.plotter = Plotter() if framerate is None: self.framerate = self.DEFAULT_FRAMERATE if nfft is None: self.log.info(f"Assuming NFFT == {Spectrogrammer.NFFT}") nfft = Spectrogrammer.NFFT if type(infiles) != list: infiles = [infiles] return True
) parser.add_argument('--actions', nargs='+', choices=['spectro', 'melspectro','cleanspectro','plot', 'plotexcerpts','plothits', 'labelmask'], help="Which tasks to accomplish (repeatable)" ) parser.add_argument('infiles', nargs='+', help="Input .wav/.pickle file(s)" ) args = parser.parse_args(); Spectrogrammer(args.infiles, args.actions, outdir=args.outdir, low_freq=args.min_freq, high_freq=args.max_freq, start_sec=args.start, end_sec=args.end, # If no_normalize is True, don't normalize: normalize=not args.no_normalize, framerate=args.framerate, spectrogram_freq_cap=args.freq_cap, nfft=args.nfft ) # Keep charts up till user kills the windows: Plotter.block_till_figs_dismissed()
def run(self, rhoNo=1, phiNo=0, tfNo=1): dataset = Dataset(rhoNo, phiNo, tfNo) texDimSize = 128 renderer = Renderer(self.eye, self.screen) rho = dataset.rho phi = dataset.phi tf = dataset.tf phiPlane = SplinePlane(phi, self.splineInterval, self.intersectTolerance) boundingBox = phiPlane.createBoundingBox() plotter = Plotter(self.splineInterval) refSplinePlotter = plotter.refSplineModelPlotter directSplinePlotter = plotter.directSplineModelPlotter voxelPlotter = plotter.voxelModelPlotter paramPlotter = plotter.paramPlotter refSplinePlotter.plotGrid(phi.evaluate, 10, 10) directSplinePlotter.plotGrid(phi.evaluate, 10, 10) paramPlotter.plotGrid(10, 10) paramPlotter.plotScalarField(rho, tf) refSplinePlotter.plotBoundingBox(boundingBox) directSplinePlotter.plotBoundingBox(boundingBox) voxelPlotter.plotBoundingBox(boundingBox) # Creating models refSplineModel = SplineModel(tf, phiPlane, rho, self.refTolerance) directSplineModel = SplineModel(tf, phiPlane, rho) samplingScalars = refSplineModel.generateScalarMatrix(boundingBox, texDimSize, texDimSize, self.voxelizationTolerance) #voxelPlotter.plotScalars(samplingScalars, boundingBox) scalarTexture = Texture2D(samplingScalars) plotter.plotScalarTexture(scalarTexture) voxelModel = VoxelModel(tf, scalarTexture, boundingBox) choice = 0 if choice == 0: model = voxelModel modelType = ModelType.VOXEL elif choice == 1: model = BoundaryAccurateModel(tf, directSplineModel, voxelModel) modelType = ModelType.BOUNDARYACCURATE elif choice == 2: voxelWidth = boundingBox.getHeight() / float(texDimSize) criterion = GeometricCriterion(self.screen.pixelWidth, voxelWidth) model = HybridModel(tf, directSplineModel, voxelModel, criterion) modelType = ModelType.HYBRID else: lodTextures = [scalarTexture] size = texDimSize / 2 while size >= 2: scalars = refSplineModel.generateScalarMatrix(boundingBox, size, size, self.voxelizationTolerance) lodTextures.append(Texture2D(scalars)) size /= 2 model = VoxelLodModel(tf, lodTextures, boundingBox, self.screen.pixelWidth) modelType = ModelType.VOXEL # Rendering refRenderData = RenderData(ModelType.REFERENCE, self.viewRayDeltaRef) refRenderData.renderResult = renderer.render(refSplineModel, self.viewRayDeltaRef, refSplinePlotter) directRenderData = RenderData(ModelType.DIRECT, self.viewRayDelta) directRenderData.renderResult = renderer.render(directSplineModel, self.viewRayDelta, directSplinePlotter) renderData = RenderData(modelType, self.viewRayDelta, texSize=texDimSize) renderData.renderResult = renderer.render(model, self.viewRayDelta, voxelPlotter) # Plotting refPixelColors = refRenderData.renderResult.colors directPixelColors = directRenderData.renderResult.colors pixelColors = renderData.renderResult.colors plotter.pixelReferencePlot.plotPixelColors(refPixelColors) plotter.pixelDirectPlot.plotPixelColors(directPixelColors) plotter.pixelVoxelizedPlot.plotPixelColors(pixelColors) directDiffs = colordiff.compare(refPixelColors, directPixelColors) diffs = colordiff.compare(refPixelColors, pixelColors) plotter.pixelDirectDiffPlot.plotPixelColorDiffs(directDiffs) plotter.pixelVoxelizedDiffPlot.plotPixelColorDiffs(diffs) plotter.draw() # Printing directSummary = Summary(directRenderData, directDiffs) directSummary.printData() print "" summary = Summary(renderData, diffs) summary.printData()
def __init__( self, infile, amplitude_cutoff=-40, # dB of peak low_freq=10, # high_freq=50, # spectrogram_freq_cap=60, # Hz normalize=True, logfile=None, framerate=None, # Only used for testing. spectrogram_dest=None, outdir=None, outfile=None, testing=False): ''' After completion, property percent_zeroed will contain the percentage of audio that was too low in amplitude to keep. During its work instances of this class may produce plots of results. By default those will not be plotted. To plot some or all, call add_task(plot_name) on the PlotterTasks class before creating this instance: Available plots are: o 'gated_wave_excerpt', o 'samples_plus_envelope', o 'spectrogram_excerpts', o 'low_pass_filter' PlotterTasks.add_task(<plotName>, **kwargs) @param infile: path to .wav file to be gated Can leave at None, if testing is True @type infile: str @param amplitude_cutoff: dB attenuation from maximum amplitude below which voltage is set to zero. If value of zero is passed in, no noise gating done. Only frequency gating. @type amplitude_cutoff: int @param low_freq: low end of front end bandpass filter @type low_freq: int @param high_freq: low end of front end bandpass filter @type high_freq: int @param framerate: normally extracted from the .wav file. Can be set here for testing. Samples/sec @type framerate: int @param spectrogram_dest: optionally a file to which a spectrogram of the entire noise-gated result is written. If None, no spectrogram is created. If a directory, a name will be constructed by appending '_spectrogram' to the input file before the extension @type spectrogram_dest: {None | str} @param logfile: file where to write logs; Default: stdout @type logfile: str @param logging_period: number of seconds between reporting envelope placement progress. @type logging_period: int @param outdir: where gated, normalized .wav will be written. If None: same outdir as input wav file. @type: outdir str @param outfile: where gated, normalized .wav will be written. If None: same outdir as input wav file, using infile root, and adding '_gated' @type: outfile str @param testing: whether or not unittests are being run. If true, __init__() does not initiate any action, allowing the unittests to call individual methods. @type testing: bool ''' if not testing: if outdir is None: outdir = os.path.dirname(infile) # Make sure the outfile can be opened for writing, # before going into lengthy computations: # Replace input wav file outdir with specified outdir: (path, ext) = os.path.splitext(infile) fileroot = os.path.basename(path) if outfile is None: outfile = f"{os.path.join(outdir, fileroot)}_gated{ext}" if spectrogram_dest is not None and os.path.isdir( spectrogram_dest): spectrogram_dest =\ f"{os.path.join(spectrogram_dest, fileroot)}_spectrogram.pickle" try: with open(outfile, 'wb') as _fd: pass except Exception as e: raise IOError( f"Outfile cannot be access for writing; doing nothing: {repr(e)}" ) AmplitudeGater.log = LoggingService(logfile=logfile) self.percent_zeroed = None # For testing; usually framerate is read from .wav file: self.framerate = framerate if not testing: try: self.log.info("Reading .wav file...") (self.framerate, samples) = wavfile.read(infile) self.log.info("Done reading .wav file.") except Exception as e: raise IOError(f"Cannot read .wav file {infile}: {repr(e)}") self.plotter = Plotter() if testing: self.recording_length_hhmmss = "<unknown>" else: num_samples = samples.size recording_length_secs = num_samples / self.framerate self.recording_length_hhmmss = str( datetime.timedelta(seconds=recording_length_secs)) self.samples_per_msec = round(self.framerate / 1000.) if testing: return samples_float = samples.astype(float) # Free memory: samples = None #************ #print(f"Pid {os.getpid()}: gating: about to norm") #************ # Normalize: if normalize: normed_samples = self.normalize(samples_float) else: normed_samples = samples_float.copy() #************ #print(f"Pid {os.getpid()}: gating: done norm") #************ # Free memory: samples_float = None self.log.info("Taking abs val of values...") samples_abs = np.abs(normed_samples) self.log.info("Done taking abs val of values.") # Free memory: normed_samples = None # Before doing anything else, cut frequencies that # would not hold elephant call; gets rid of them # damn birds: #************ #print(f"Pid {os.getpid()}: gating: about to freq gate") #************ self.log.info(f"Filtering unwanted frequencies ...") freq_gated_samples = self.frequency_gate(samples_abs, low_freq=low_freq, high_freq=high_freq) self.log.info(f"Done filtering unwanted frequencies.") #************ #print(f"Pid {os.getpid()}: gating: done freq gate") #************ # Free memory: samples_abs = None self.log.info(f"Computing abs val of filtered freqs...") freq_gated_samples_abs = np.abs(freq_gated_samples) self.log.info(f"Done computing abs val of filtered freqs.") # Free memory: samples_abs = None if amplitude_cutoff != 0: # Noise gate: Chop off anything with amplitude above amplitude_cutoff: #************ #print(f"Pid {os.getpid()}: gating: calling amplitude_gate") #************ gated_samples = self.amplitude_gate( freq_gated_samples_abs, amplitude_cutoff, spectrogram_freq_cap=spectrogram_freq_cap, spectrogram_dest=spectrogram_dest) #************ #print(f"Pid {os.getpid()}: gating: return from amplitude_gate)") #************ else: gated_samples = freq_gated_samples # Result back to int16: gated_samples = gated_samples.astype(np.int16) if outfile is not None and not testing: # Write out the result: self.log.info(f"Writing {outfile}...") wavfile.write(outfile, self.framerate, gated_samples) self.log.info(f"Done writing {outfile}...") # Make samples available to caller via # property 'gated_samples' (see below for its definition): self._gated_samples = gated_samples # Same with path to gated outfile: self._gated_outfile = outfile if PlotterTasks.has_task('gated_wave_excerpt'): # Find a series of 100 array elements where at least # the first is not zero. Just to show an interesting # area, not a flat line. The nonzero() function returns # a *tuple* of indices where arr is not zero. Therefore # the two [0][0] to get the first non-zero: start_indx = self.find_busy_array_section(gated_samples) end_indx = start_indx + 100 self.log.info( f"Plotting a 100 long series of result from {start_indx}...") self.plotter.plot( np.arange(start_indx, end_indx), gated_samples[start_indx:end_indx], title=f"Amplitude-Gated {os.path.basename(infile)}", xlabel='Sample Index', ylabel='Voltage') print('Done')
class AmplitudeGater(object): ''' classdocs ''' spectrogram_freq_cap = 50 # Hz. DEFAULT_FILTER_ORDER = 5 FFT_WIDTH = 2**12 # 4096 FRONT_END_HIGH_PASS_FREQ = 10 # Hz #****FRONT_END_LOW_PASS_FREQ = 40 # Hz FRONT_END_LOW_PASS_FREQ = 50 # Hz #------------------------------------ # Constructor #------------------- def __init__( self, infile, amplitude_cutoff=-40, # dB of peak low_freq=10, # high_freq=50, # spectrogram_freq_cap=60, # Hz normalize=True, logfile=None, framerate=None, # Only used for testing. spectrogram_dest=None, outdir=None, outfile=None, testing=False): ''' After completion, property percent_zeroed will contain the percentage of audio that was too low in amplitude to keep. During its work instances of this class may produce plots of results. By default those will not be plotted. To plot some or all, call add_task(plot_name) on the PlotterTasks class before creating this instance: Available plots are: o 'gated_wave_excerpt', o 'samples_plus_envelope', o 'spectrogram_excerpts', o 'low_pass_filter' PlotterTasks.add_task(<plotName>, **kwargs) @param infile: path to .wav file to be gated Can leave at None, if testing is True @type infile: str @param amplitude_cutoff: dB attenuation from maximum amplitude below which voltage is set to zero. If value of zero is passed in, no noise gating done. Only frequency gating. @type amplitude_cutoff: int @param low_freq: low end of front end bandpass filter @type low_freq: int @param high_freq: low end of front end bandpass filter @type high_freq: int @param framerate: normally extracted from the .wav file. Can be set here for testing. Samples/sec @type framerate: int @param spectrogram_dest: optionally a file to which a spectrogram of the entire noise-gated result is written. If None, no spectrogram is created. If a directory, a name will be constructed by appending '_spectrogram' to the input file before the extension @type spectrogram_dest: {None | str} @param logfile: file where to write logs; Default: stdout @type logfile: str @param logging_period: number of seconds between reporting envelope placement progress. @type logging_period: int @param outdir: where gated, normalized .wav will be written. If None: same outdir as input wav file. @type: outdir str @param outfile: where gated, normalized .wav will be written. If None: same outdir as input wav file, using infile root, and adding '_gated' @type: outfile str @param testing: whether or not unittests are being run. If true, __init__() does not initiate any action, allowing the unittests to call individual methods. @type testing: bool ''' if not testing: if outdir is None: outdir = os.path.dirname(infile) # Make sure the outfile can be opened for writing, # before going into lengthy computations: # Replace input wav file outdir with specified outdir: (path, ext) = os.path.splitext(infile) fileroot = os.path.basename(path) if outfile is None: outfile = f"{os.path.join(outdir, fileroot)}_gated{ext}" if spectrogram_dest is not None and os.path.isdir( spectrogram_dest): spectrogram_dest =\ f"{os.path.join(spectrogram_dest, fileroot)}_spectrogram.pickle" try: with open(outfile, 'wb') as _fd: pass except Exception as e: raise IOError( f"Outfile cannot be access for writing; doing nothing: {repr(e)}" ) AmplitudeGater.log = LoggingService(logfile=logfile) self.percent_zeroed = None # For testing; usually framerate is read from .wav file: self.framerate = framerate if not testing: try: self.log.info("Reading .wav file...") (self.framerate, samples) = wavfile.read(infile) self.log.info("Done reading .wav file.") except Exception as e: raise IOError(f"Cannot read .wav file {infile}: {repr(e)}") self.plotter = Plotter() if testing: self.recording_length_hhmmss = "<unknown>" else: num_samples = samples.size recording_length_secs = num_samples / self.framerate self.recording_length_hhmmss = str( datetime.timedelta(seconds=recording_length_secs)) self.samples_per_msec = round(self.framerate / 1000.) if testing: return samples_float = samples.astype(float) # Free memory: samples = None #************ #print(f"Pid {os.getpid()}: gating: about to norm") #************ # Normalize: if normalize: normed_samples = self.normalize(samples_float) else: normed_samples = samples_float.copy() #************ #print(f"Pid {os.getpid()}: gating: done norm") #************ # Free memory: samples_float = None self.log.info("Taking abs val of values...") samples_abs = np.abs(normed_samples) self.log.info("Done taking abs val of values.") # Free memory: normed_samples = None # Before doing anything else, cut frequencies that # would not hold elephant call; gets rid of them # damn birds: #************ #print(f"Pid {os.getpid()}: gating: about to freq gate") #************ self.log.info(f"Filtering unwanted frequencies ...") freq_gated_samples = self.frequency_gate(samples_abs, low_freq=low_freq, high_freq=high_freq) self.log.info(f"Done filtering unwanted frequencies.") #************ #print(f"Pid {os.getpid()}: gating: done freq gate") #************ # Free memory: samples_abs = None self.log.info(f"Computing abs val of filtered freqs...") freq_gated_samples_abs = np.abs(freq_gated_samples) self.log.info(f"Done computing abs val of filtered freqs.") # Free memory: samples_abs = None if amplitude_cutoff != 0: # Noise gate: Chop off anything with amplitude above amplitude_cutoff: #************ #print(f"Pid {os.getpid()}: gating: calling amplitude_gate") #************ gated_samples = self.amplitude_gate( freq_gated_samples_abs, amplitude_cutoff, spectrogram_freq_cap=spectrogram_freq_cap, spectrogram_dest=spectrogram_dest) #************ #print(f"Pid {os.getpid()}: gating: return from amplitude_gate)") #************ else: gated_samples = freq_gated_samples # Result back to int16: gated_samples = gated_samples.astype(np.int16) if outfile is not None and not testing: # Write out the result: self.log.info(f"Writing {outfile}...") wavfile.write(outfile, self.framerate, gated_samples) self.log.info(f"Done writing {outfile}...") # Make samples available to caller via # property 'gated_samples' (see below for its definition): self._gated_samples = gated_samples # Same with path to gated outfile: self._gated_outfile = outfile if PlotterTasks.has_task('gated_wave_excerpt'): # Find a series of 100 array elements where at least # the first is not zero. Just to show an interesting # area, not a flat line. The nonzero() function returns # a *tuple* of indices where arr is not zero. Therefore # the two [0][0] to get the first non-zero: start_indx = self.find_busy_array_section(gated_samples) end_indx = start_indx + 100 self.log.info( f"Plotting a 100 long series of result from {start_indx}...") self.plotter.plot( np.arange(start_indx, end_indx), gated_samples[start_indx:end_indx], title=f"Amplitude-Gated {os.path.basename(infile)}", xlabel='Sample Index', ylabel='Voltage') print('Done') #------------------------------------ # frequency_gate #------------------- def frequency_gate(self, samples_raw, low_freq, high_freq): ''' Input absolute values of time domain voltages. @param samples_raw: absolute values of input voltages @type samples_raw: int @param low_freq: lowest frequency of front end bandpass filter @type low_freq: int @param high_freq: highest frequency of front end bandpass filter @type high_freq: int ''' self.log.info( f"Applying front end band pass filter ({low_freq}Hz to {high_freq}Hz)" ) samples_band_passed = self.freq_filter( samples_raw, [low_freq, high_freq], pass_spec='bandpass', title="Frontend freq filter: "\ f"{low_freq}Hz to {high_freq}Hz" ) self.log.info("Done applying front end bandpass pass filter.") return samples_band_passed #------------------------------------ # amplitude_gate #------------------- def amplitude_gate( self, samples_abs, threshold_db, order=None, spectrogram_dest=None, spectrogram_freq_cap=150, # Hz ): ''' Given an array of raw audio samples, generate a noise-gated array of the same length. Optionally, create a full spectrogram into a .npy file. Optionally, plot 30-second spectrograms from 18 subsections of the gated samples. Procedure: o Normalize audio to fill 32 bits. o On the resulting signal, find all samples that are threshold-db below the root mean square (rms) peak of the envelope. The value must be negative. o At these very low-voltage times, set the original audio to zero. This takes signal areas that are clearly too low to be significant out of the picture, removing some noise. The result is the noise-gated signal, which will be returned. o Optionally: if spectrogram_dest is a file path destination, create a spectrogram over the full duration, and save it to that path as a DataFrame All frequencies above spectrogram_freq_cap are removed from the spectrogram before saving. o Optionally: plot 18 30-sec spectrograms from times evenly spaced across the total recording. Times and freqs in those plots correspond to the true times in the recording. @param samples_abs: raw audio @type samples_abs: np.array(int) @param threshold_db: voltage below which signal is set to zero; specified as dB below peak voltage: db FS. @type threshold_db: negative int @param order: polynomial of Butterworth filter. Default can be set with AmplitudeGater.DEFAULT_FILTER_ORDER @type order: int @param spectrogram_dest: optionally: file name where spectrogram is stored @type spectrogram_dest: str @param spectrogram_freq_cap: optionally: frequency above which all frequencies are removed from spectrogram. @type spectrogram_freq_cap: int ''' # Don't want to open the gate *during* a burst. # So make a low-pass filter that only roughly envelops if order is None: order = AmplitudeGater.DEFAULT_FILTER_ORDER #*********** # Since we are now putting a bandpass filter # ahead of this point, we no longer need to use # the envelope. # self.log.info(f"Applying low pass filter (cutoff {envelope_cutoff_freq})...") # envelope = self.freq_filter(samples_abs, # envelope_cutoff_freq, # pass_spec='lowpass', # title=f"Envelope lowpass filter: {envelope_cutoff_freq}Hz" # ) # self.log.info("Done applying low pass filter.") envelope = samples_abs #*********** if PlotterTasks.has_task('samples_plus_envelope') is not None: # Show a 1 second wide window: desired_secs = 1 duration_in_samples = self.framerate * desired_secs # Take the shown sample not right at the start: desired_start_point_secs = 10 start_point_in_samples = self.framerate * desired_start_point_secs self.plotter.over_plot( samples_abs[start_point_in_samples:start_point_in_samples + duration_in_samples], 'ABS(samples)', xlabel=f"Time in samples at {self.framerate} samples/sec") self.plotter.over_plot( envelope[start_point_in_samples:start_point_in_samples + duration_in_samples], f"Env Order {order}", xlabel=f"Time in samples at {self.framerate} samples/sec") # Compute the threshold below which we # set amplitude to 0. It's threshold_db of max # value. Note that for a normalized array # that max val == 1.0 #************ #print(f"Pid {os.getpid()}: compute RMS") #************ # max_voltage = np.amax(envelope) rms = np.sqrt(np.mean(np.square(envelope))) self.log.info(f"Signal RMS: {rms}") #************ #print(f"Pid {os.getpid()}: done compute RMS") #************ # Compute threshold_db of max voltage: #Vthresh = max_voltage * 10**(threshold_db/20) Vthresh = rms * 10**(threshold_db / 20) self.log.info(f"Cutoff threshold amplitude: {Vthresh}") # Zero out all amplitudes below threshold: self.log.info("Zeroing sub-threshold values...") #************ #print(f"Pid {os.getpid()}: mask ops") #************ mask_for_where_non_zero = 1 * np.ma.masked_greater(envelope, Vthresh).mask gated_samples = envelope * mask_for_where_non_zero self.percent_zeroed = 100 * gated_samples[gated_samples == 0].size / gated_samples.size self.log.info(f"Zeroed {self.percent_zeroed:.2f}% of signal.") #************ #print(f"Pid {os.getpid()}: done mask ops") #************ if spectrogram_dest: # Get a combined frequency x time matrix. The matrix values will # be complex: (freq_labels, time_labels, freq_time_dB) = self.make_spectrogram(gated_samples) if spectrogram_freq_cap is not None: self.log.info( f"Removing frequencies above {spectrogram_freq_cap}Hz...") # Remove all frequencies above, and including # spectrogram_freq_cap: (new_freq_labels, capped_spectrogram) = self.filter_spectrogram( freq_labels, freq_time_dB, [(None, spectrogram_freq_cap)]) self.log.info( f"Done removing frequencies above {spectrogram_freq_cap}Hz." ) else: capped_spectrogram = freq_time_dB new_freq_labels = freq_labels # Save the spectrogram to file: DSPUtils.save_spectrogram(capped_spectrogram, spectrogram_dest, new_freq_labels, time_labels) if spectrogram_dest and PlotterTasks.has_task( 'spectrogram_excerpts') is not None: # The matrix is large, and plotting takes forever, # so define a matrix excerpt: self.plotter.plot_spectrogram_from_magnitudes( new_freq_labels, time_labels, capped_spectrogram) #************ #print(f"Pid {os.getpid()}: exit amp gating") #************ return gated_samples #------------------------------------ # filter_spectrogram #------------------- def filter_spectrogram(self, freq_labels, freq_time, freq_bands): ''' Given a spectrogram, return a new spectrogram with only frequencies within given bands retained. freq_time is a matrix whose rows each contain energy contributions by one frequency over time. The freq_labels is an np.array with the frequency of each row. I.e. the y-axis labels. freq_bands is an array of frequency intervals. The following would only retain rows for frequencies 0 <= f < 5, 10 <= f < 20, and f >= 40: [(None, 5), (10,20), (40,None)] So: note that these extracts are logical OR. Contributions from each of these three intervals will be present, even though the (10,20) would squeeze out the last pair, due to its upper bound of 20. Note: Rows will be removed from the spectrogram. Its width will not change. But if the spectrogram were to be turned into a wav file, that file would be shorter than the original. @param freq_labels: array of frequencies highest first @type freq_labels: np.array[float] @param freq_time: 2d array of energy contributions @type freq_time: np.array(rows x cols) {float || int || complex} @param freq_bands: bands of frequencies to retain. @type freq_bands: [({float | int})] @return revised spectrogram, and correspondingly reduced frequency labels @rtype: (np_array(1), np_array(n,m)) ''' # Prepare a new spectrogram matrix with # the same num of cols as the one passed # in, but no rows: (_num_rows, num_cols) = freq_time.shape new_freq_time = np.empty((0, num_cols)) # Same for the list of frequencies: new_freq_labels = np.empty((0, )) for (min_freq, out_freq) in freq_bands: if min_freq is None and out_freq is None: # Degenerate case: keep all: continue if min_freq is None: min_freq = 0 if out_freq is None: # No upper bound, so make a ceiling # higher than maximum frequency: out_freq = np.amax(freq_labels) + 1. # Get the indices of the frequency array # where the frequency is within this interval. # The np.where returns a tuple, therefore [0] filter_indices = np.where( np.logical_and(freq_labels >= min_freq, freq_labels < out_freq))[0] # Keep only rows (axis=0) that contain the energies for # included frequencies: new_freq_time = np.vstack( (new_freq_time, np.take(freq_time, filter_indices, axis=0))) # Also take away the row labels that where excluded: new_freq_labels = np.hstack( (new_freq_labels, np.take(freq_labels, filter_indices))) return (new_freq_labels, new_freq_time) #------------------------------------ # freq_filter #------------------- def freq_filter(self, data, cutoffs, pass_spec='bandpass', title=None): ''' Given a voltage sequence and one or two frequency cutoffs, return a filtered version of the sequence. If cutoffs is an array of two frequencies, a bandpass filter is constructed. Else, pass_spec must be either 'lowpass', or 'highpass'. It is an error to specify low/high pass, and pass more than one cutoff frequency. The filter will be DEFAULT_FILTER_ORDER Chebyshev II. @param data: the signal to filter @type data: np_array @param cutoffs: one or two cutoff frequecies @type cutoffs: {int | [int] | [int,int]} @param title: title for performance plot, if 'filter_response' was added to the PlotterTasks beforehand. If plot was requested, and title is None, a title is constructed by plot_frequency_reponse() @type title: {None | str} ''' nyq = 0.5 * self.framerate if type(cutoffs) == list and len(cutoffs) == 1: cutoffs = cutoffs[0] if type(cutoffs) == list: # Two critical freqs were passed: if pass_spec != 'bandpass': raise ValueError( f"Asked for {pass_spec}, but passed two critical freqs.") if len(cutoffs) != 2: raise ValueError( "Asked for bandpass filter, but did not specify two critical freqs" ) normal_cutoffs = [cutoffs[0] / nyq, cutoffs[1] / nyq] else: # Only one critical freq. pass_spec must # be lowpass or highpass: if pass_spec not in ('lowpass', 'highpass'): raise ValueError( f"Passed one critical freq, but pass_spec was '{pass_spec}' instead of lowpass or highpass" ) normal_cutoffs = cutoffs / nyq # Get DEFAULT_FILTER_ORDER order segments version of # a filter, with minimum attenuation in # the stopband of 60 in dB: sos = iirfilter(self.DEFAULT_FILTER_ORDER, normal_cutoffs, rs=60, btype=pass_spec, analog=False, ftype='cheby2', output='sos') if PlotterTasks.has_task('filter_response') is not None: self.plotter.plot_frequency_response(sos, self.framerate, cutoffs, title) new_sig = sosfilt(sos, data) return new_sig #------------------------------------ # butter_lowpass_filter #------------------- def butter_lowpass_filter(self, data, cutoff, order=4): ''' Not used. ''' nyq = 0.5 * self.framerate normal_cutoff = cutoff / nyq b, a = butter(order, normal_cutoff, btype='low', analog=False) envelope = lfilter(b, a, data) if PlotterTasks.has_task('filter_response') is not None: self.plotter.plot_frequency_response(b, a, cutoff, order) # Plot a piece of envelope, roughly from the middle: mid_env_index = round(envelope.size / 2) end_index = mid_env_index + 100 self.plotter.plot(np.arange(mid_env_index, end_index), envelope[mid_env_index:end_index], f"Envelope {mid_env_index} to {end_index}", "Time", "Amplitude") return envelope #------------------------------------ # suppress_small_voltages #------------------- def suppress_small_voltages(self, volt_vec, thresh_volt, padding_secs): ''' Given an array of numbers, set all elements smaller than thres_volt to zero. But: leave padding array elements before and after each block of new zeroes alone. Return the resulting array of same length as volt_vec. Strategy: o say volt_vec == array([1, 2, 3, 4, 5, 6, 7, 8, 0, 1]) thres_volt == 5 padding == 2 # samples o to not worry about out of bound index, add padding zeros to the voltage vector: padded_volt_vec = np.hstack((volt_vec, np.zeros(2).astype(int))) ==> array([1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 0, 0]) o create an array of indexes that need to be set to 0, because voltages at their location exceed thres_volt. The np.nonzero returns a one-tuple, therefore the [0] indexes_to_zero = np.nonzero(a>5)[0] ==> (array([5, 6, 7]),) o we need to respect the padding ahead of the elements to zero. So add padding samples to each index: indexes_to_zero = indexes_to_zero + 2 o @param volt_vec: @type volt_vec: @param thresh_volt: @type thresh_volt: @param padding_secs: @type padding_secs: ''' padding = self.samples_from_secs(padding_secs) # Get a mask with True where we will zero out the voltage: volt_mask = volt_vec < thresh_volt pt_next_mask_pos = 0 while True: (volt_mask, pt_next_mask_pos) = self.narrow_mask_segment( volt_mask, pt_next_mask_pos, padding) if pt_next_mask_pos is None: # Got a finished mask with padding. break # Do the zeroing volt_vec_zeroed = ma.masked_array(volt_vec, volt_mask).filled(0) return volt_vec_zeroed #------------------------------------ # narrow_mask_segment #------------------- def narrow_mask_segment(self, mask, ptr_into_mask, padding): # Erroneous args or end of mask: mask_len = mask.size if ptr_into_mask >= mask_len: # None ptr to indicate end: return (mask, None) zeros_start_ptr = ptr_into_mask # Find next Truth value in mask, i.e. # the start of a zeroing sequence while zeros_start_ptr < mask_len and not mask[zeros_start_ptr]: zeros_start_ptr += 1 # Pointing to the first True (first zeroing index) # after a series of False, or end of mask: if zeros_start_ptr >= mask_len: return (mask, None) # Find end of the zeroing sequence (i.e. True vals in mask): zeros_end_ptr = zeros_start_ptr while zeros_end_ptr < mask_len and mask[zeros_end_ptr]: zeros_end_ptr += 1 # Is the zeroing sequence long enough to accommodate # padding to its left? zeros_len = zeros_end_ptr - zeros_start_ptr if zeros_len < padding: # Just don't zero at all for this seq: mask[zeros_start_ptr:zeros_end_ptr] = False else: # Don't zero padding samples: mask[zeros_start_ptr:min(zeros_start_ptr + padding, mask_len)] = False # New start of zeroing seq: in steady state # it's just the start pt moved right by the amount # of padding. But the burst of zeroing was too narrow, # zeros_start_ptr = min(zeros_start_ptr + padding, zeros_end_ptr) # Same at the end: Stop zeroing a bit earlier than # where the last below-threshold element sits: zeros_len = zeros_end_ptr - zeros_start_ptr if zeros_len <= padding: # Just don't do any zeroing: mask[zeros_start_ptr:zeros_end_ptr] = False else: # Just stop zeroing a bit earlier mask[zeros_end_ptr - padding:zeros_end_ptr] = False zeros_end_ptr = zeros_end_ptr - padding return (mask, zeros_end_ptr) #------------------------------------ # normalize #------------------- def normalize(self, samples): ''' Make audio occupy the maximum dynamic range of int16: -2**15 to 2**15 - 1 (-32768 to 32767) Formula to compute new Intensity of each sample: I = ((I-Min) * (newMax - newMin)/Max-Min)) + newMin @param samples: samples from .wav file @type samples: np.narray('int16') @result: a new np array with normalized values @rtype: np.narray('int16') ''' new_min = -2**15 # + 10 # Leave a little bit of room with min val of -32768 new_max = 2**15 # - 10 # same for max: min_val = np.amin(samples) max_val = np.amax(samples) self.log.info("Begin normalization...") normed_samples = ((samples - min_val) * (new_max - new_min) / (max_val - min_val)) + new_min # Or, using scikit-learn: # normed_samples = preprocessing.minmax_scale(samples, feature_range=[new_min, new_max]) self.log.info("Done normalization.") return normed_samples #------------------------------------ # make_sinewave #------------------- def make_sinewave(self, freq): time = np.arange(0, freq, 0.1) amplitude = np.sin(time) return (time, amplitude) #------------------------------------ # db_from_sample #------------------- def db_from_sample(self, sample): return 20 * np.log10(sample) #------------------------------------ # samples_from_msecs #------------------- def samples_from_msecs(self, msecs): return msecs * self.samples_per_msec #------------------------------------ # samples_from_secs #------------------- def samples_from_secs(self, secs): ''' Possibly fractional seconds turned into samples. Fractional seconds are rounded up. @param secs: number of seconds to convert @type secs: {int | float} @return: number of corresponding samples @rtype: int ''' return math.ceil(secs * self.framerate) #------------------------------------ # msecs_from_samples #------------------- def msecs_from_samples(self, num_samples): return num_samples * self.samples_per_msec #------------------------------------ # get_max_db #------------------ def get_max_db(self, npa): max_val = npa.amax() max_db = 20 * np.log10(max_val) return max_db #------------------------------------ # export_snippet #------------------- def export_snippet(self, samples, start_sample, end_sample, filename, to_int16=True): ''' Write part of the samples to a two-col CSV. @param samples: sample array @type samples: np.array @param start_sample: index of first sample to export @type start_sample: int @param end_sample: index of sample after the last one exported @type end_sample: int @param filename: output file name @type filename: str @param to_int16: whether or not to convert samples to 16 bit signed int before writing @type to_int16: bool ''' snippet = samples[start_sample:end_sample] if to_int16: snippet = snippet.astype(np.int16) with open(filename, 'w') as fd: for (indx, val) in enumerate(snippet): fd.write(f"{indx},{val}\n") #------------------------------------ # make_spectrogram #------------------- def make_spectrogram(self, data): ''' Given data, compute a spectrogram. Returned is a dB scaled spectrogram of the spectral power. I.e. values are squared, then dB is computed relative to highest value in the spectrogram. Assumptions: o self.framerate contains the data framerate o The (Hanning) window overlap used (Hanning === Hann === Half-cosine) o Length of each FFT segment: 4096 (2**12) o Number of points to overlap with each window slide: 1/2 the segments size: 2048 o Amount of zero-padding at the end of each segment: the length of the segment again, i.e. doubling the window so that the conversion to positive-only frequencies makes the correct lengths Returns a three-tuple: an array of sample frequencies, An array of segment times. And a 2D array of the SFTP: frequencies x segment times @param data: the time/amplitude data @type data: np.array([float]) @return: (frequency_labels, time_labels, spectrogram_matrix) @rtype: (np.array, np.array, np.array) ''' #************* # How Jonathan creates spectrograms: # self.log.info("TEMPORARY: Creating spectrogram USING ML...") # from matplotlib import mlab as ml # hop = 800 # NFFT= 4096 # chunk_size = 1000 # start_chunk = 0 # len_chunk = (chunk_size - 1) * hop + NFFT # [spectrum, freqs, t] = ml.specgram(data[start_chunk:start_chunk + len_chunk], # NFFT=NFFT, # Fs=8000, # noverlap=(NFFT-hop), # window=ml.window_hanning, # pad_to=4096) # # #[spectrum, freqs, t] = ml.specgram(data, NFFT=NFFT, Fs=8000, noverlap=(NFFT-hop), window=ml.window_hanning,pad_to=4096) # return (freqs, t, spectrum) # self.log.info("TEMPORARY: Creating spectrogram USING ML...") #************* self.log.info("Creating spectrogram...") (freq_labels, time_labels, complex_freq_by_time) = stft(data, self.framerate, nperseg=self.FFT_WIDTH #nperseg=int(self.framerate) ) self.log.info("Done creating spectrogram.") freq_time = np.absolute(complex_freq_by_time) freq_time = DSPUtils.spectrogram_to_db(freq_time) return (freq_labels, time_labels, freq_time) #------------------------------------ # make_inverse_spectrogram #------------------- def make_inverse_spectrogram(self, spectrogram): self.log.info("Inverting spectrogram to time domain...") (_time, voltage) = istft( spectrogram, self.framerate, nperseg=self.FFT_WIDTH, #nperseg=int(self.framerate) ) self.log.info("Done inverting spectrogram to time domain.") # Return just the voltages: return voltage.astype(np.int16) #------------------------------------ # find_busy_array_section #------------------- def find_busy_array_section(self, arr): non_zeros = np.nonzero(arr)[0] for indx_to_non_zero in non_zeros: if arr[indx_to_non_zero] > 0 and\ arr[indx_to_non_zero + 1] > 0 and\ arr[indx_to_non_zero + 2] > 0: return indx_to_non_zero # Nothing found, return start of array: return 0 #------------------------------------ # Readers/Writers #------------------- @property def gated_samples(self): return self._gated_samples @property def gated_outfile(self): return self._gated_outfile