def processTrainingSet(self, basedir="sounds/train/", signalword="oknavsa", savedir="data/"): clips = [clip for clip in os.listdir(basedir) if clip.endswith(".wav")] try: with open("sounds/train/thresh.dat","r") as fh: lines = [line.strip() for line in fh.readlines() if len(line)>1] bestThresh = {line.split(",")[0]:int(line.split(",")[1]) for line in lines} except: bestThresh = {} tr = Trigger() self.YXtot = [] fnames = [] durations = [] self.keywordDurations = [] with warnings.catch_warnings(): warnings.filterwarnings("ignore",category=Warning) for clip in clips: if clip in bestThresh: tr.setParams({"THRESHOLD": bestThresh[clip]}) else: tr.setParams({"THRESHOLD": 600}) if clip.lower().startswith(signalword): isSignal = True elif clip.lower().startswith("random"): isSignal = False elif clip.lower().startswith("background"): isSignal = False else: continue tr.readWav(basedir+clip) subsamples = tr.getSubsamples() framerate = tr.getFramerate() if self.verbosity > 1: print "Loading clip %s (isSignal: %i) ==> %i subsamples" % (clip, isSignal, len(subsamples)) for ss in subsamples: duration = self.getSampleDuration(ss, framerate) if isSignal: self.keywordDurations.append(duration) self.YXtot.append( self.getFeatures(ss,framerate,isSignal) ) fnames.append(clip) durations.append(duration) self.YXtot = np.array(self.YXtot) self.keywordDurations = np.array(self.keywordDurations) outputname = "%simagedata_%i_%i.npy" % (savedir,self.Nfreq,self.Ntime) outputname_meta = "%smetadata_%i_%i.npy" % (savedir,self.Nfreq,self.Ntime) np.save(outputname, self.YXtot) np.save(outputname_meta, self.keywordDurations) if self.verbosity > 1: print "made %s and %s" % (outputname, outputname_meta) idx_test, YXtest = self.trainAndTest() return np.array(fnames)[idx_test], np.array(durations)[idx_test], YXtest