class SavedJson: """Dict that saves to disk on modifications""" def __init__(self, filename): super().__init__() self.filename = filename self.fiti = Fitipy(self.filename) self.data = self.fiti.read().dict() def __getitem__(self, item): return self.data[item] def __setitem__(self, key, value): changed = self.data.get(key, ...) != value self.data[key] = value if changed: self.fiti.write().dict(self.data) def __delitem__(self, key): del self.data[key] self.fiti.write().dict(self.data) def get(self, k): return self.data.get(k) def update(self, data=None, **kwargs): self.data.update(data, **kwargs) self.fiti.write().dict(self.data)
class FilesystemService(ServicePlugin): def __init__(self, rt, root=None): ServicePlugin.__init__(self, rt) self.root = root or expanduser(rt.paths.user_config) self.fiti = Fitipy(self.root) if not self.isdir(''): self.mkdir('') def read(self, *path) -> FitiReader: return self.fiti.read(*path) def write(self, *path) -> FitiWriter: return self.fiti.write(*path) def subdir(self, *path): return FilesystemService(self.rt, join(self.root, *path)) def open(self, *path, mode='r'): return open(join(self.root, *path), mode) def isfile(self, *path): return isfile(join(self.root, *path)) def isdir(self, *path): return isdir(join(self.root, *path)) def mkdir(self, *path): makedirs(self.path(*path), exist_ok=True) def path(self, *path): return join(self.root, *path)
def __init__(self, parser=None): parser = parser or ArgumentParser() add_to_parser(parser, self.usage, True) args = TrainData.parse_args(parser) self.args = args = self.process_args(args) or args if args.invert_samples and not args.samples_file: parser.error( 'You must specify --samples-file when using --invert-samples') if args.samples_file and not isfile(args.samples_file): parser.error('No such file: ' + (args.invert_samples or args.samples_file)) if not 0.0 <= args.sensitivity <= 1.0: parser.error('sensitivity must be between 0.0 and 1.0') output_folder = os.path.join(args.folder, splitext(args.model)[0]) if not os.path.exists(output_folder): print('Creating output folder:', output_folder) os.makedirs(output_folder) args.model = os.path.join(output_folder, args.model) inject_params(args.model) save_params(args.model) self.train, self.test = self.load_data(self.args) set_loss_bias(1.0 - args.sensitivity) params = ModelParams(skip_acc=args.no_validation, extra_metrics=args.extra_metrics) self.model = create_model(args.model, params) self.model.summary() from keras.callbacks import ModelCheckpoint, TensorBoard checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) epoch_file = splitext(args.model)[0] epoch_file = os.path.join(epoch_file + '.epoch') epoch_fiti = Fitipy(epoch_file) self.epoch = epoch_fiti.read().read(0, int) def on_epoch_end(a, b): self.epoch += 1 epoch_fiti.write().write(self.epoch, str) self.model_base = splitext(self.args.model)[0] if args.samples_file: self.samples, self.hash_to_ind = self.load_sample_data( args.samples_file, self.train) else: self.samples = set() self.hash_to_ind = {} self.callbacks = [ checkpoint, TensorBoard(log_dir=self.model_base + '.logs', ), LambdaCallback(on_epoch_end=on_epoch_end) ]
class SampledTrainer(Trainer): def __init__(self): parser = create_parser(usage) super().__init__(parser) if self.args.invert_samples: parser.error('--invert-samples should be left blank') self.args.samples_file = (self.args.samples_file or '{model_base}.samples.json').format( model_base=self.model_base) self.samples, self.hash_to_ind = self.load_sample_data( self.args.samples_file, self.train) self.metrics_fiti = Fitipy(self.model_base + '.logs', 'sampling-metrics.txt') def write_sampling_metrics(self, predicted): correct = float( sum((predicted > 0.5) == (self.train[1] > 0.5)) / len(self.train[1])) print('Successfully calculated: {0:.3%}'.format(correct)) lines = self.metrics_fiti.read().lines() lines.append('{}\t{}'.format( len(self.samples) / len(self.train[1]), correct)) self.metrics_fiti.write().lines(lines) def choose_new_samples(self, predicted): failed_samples = { calc_sample_hash(inp, target) for i, (inp, pred, target) in enumerate( zip(self.train[0], predicted, self.train[1])) if (pred > 0.5) != (target > 0.5) } remaining_failed_samples = failed_samples - self.samples print('Remaining failed samples:', len(remaining_failed_samples)) return islice(remaining_failed_samples, self.args.num_sample_chunk) def run(self): print('Writing to:', self.args.samples_file) print('Writing metrics to:', self.metrics_fiti.path) for _ in range(self.args.cycles): print('Calculating on whole dataset...') predicted = self.model.predict(self.train[0]) self.samples.update(self.choose_new_samples(predicted)) Fitipy(self.args.samples_file).write().set(self.samples) print('Added', self.args.num_sample_chunk, 'samples') self.write_sampling_metrics(predicted) self.model.fit(*self.sampled_data, batch_size=self.args.batch_size, epochs=self.epoch + self.args.epochs, callbacks=self.callbacks, initial_epoch=self.epoch, validation_data=self.test)
def __init__(self, args): super().__init__(args) if args.invert_samples and not args.samples_file: raise ValueError( 'You must specify --samples-file when using --invert-samples') if args.samples_file and not isfile(args.samples_file): raise ValueError('No such file: ' + (args.invert_samples or args.samples_file)) if not 0.0 <= args.sensitivity <= 1.0: raise ValueError('sensitivity must be between 0.0 and 1.0') inject_params(args.model) save_params(args.model) params = ModelParams(skip_acc=args.no_validation, extra_metrics=args.extra_metrics, loss_bias=1.0 - args.sensitivity, freeze_till=args.freeze_till) self.model = create_model(args.model, params) self.train, self.test = self.load_data(self.args) from keras.callbacks import ModelCheckpoint, TensorBoard checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch') self.epoch = epoch_fiti.read().read(0, int) def on_epoch_end(_a, _b): self.epoch += 1 epoch_fiti.write().write(self.epoch, str) self.model_base = splitext(self.args.model)[0] if args.samples_file: self.samples, self.hash_to_ind = self.load_sample_data( args.samples_file, self.train) else: self.samples = set() self.hash_to_ind = {} self.callbacks = [ checkpoint, TensorBoard(log_dir=self.model_base + '.logs', ), LambdaCallback(on_epoch_end=on_epoch_end) ]
def __init__(self): parser = create_parser(usage) self.args = args = TrainData.parse_args(parser) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams(skip_acc=args.no_validation, extra_metrics=args.extra_metrics, loss_bias=1.0 - args.sensitivity) self.model = create_model(args.model, params) self.listener = Listener('', args.chunk_size, runner_cls=lambda x: None) from keras.callbacks import ModelCheckpoint, TensorBoard checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch') self.epoch = epoch_fiti.read().read(0, int) def on_epoch_end(a, b): self.epoch += 1 epoch_fiti.write().write(self.epoch, str) self.model_base = splitext(self.args.model)[0] self.callbacks = [ checkpoint, TensorBoard(log_dir=self.model_base + '.logs', ), LambdaCallback(on_epoch_end=on_epoch_end) ] self.data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) pos_files, neg_files = self.data.train_files self.neg_files_it = iter(cycle(neg_files)) self.pos_files_it = iter(cycle(pos_files))
class TrainSampledScript(TrainScript): usage = Usage(''' Train a model, sampling data points with the highest loss from a larger dataset :-c --cycles int 200 Number of sampling cycles of size {epoch} to run :-n --num-sample-chunk int 50 Number of new samples to introduce at a time between training cycles :-sf --samples-file str - Json file to write selected samples to. Default = {model_base}.samples.json :-is --invert-samples Unused parameter ... ''') | TrainScript.usage def __init__(self, args): super().__init__(args) if self.args.invert_samples: raise ValueError('--invert-samples should be left blank') self.args.samples_file = (self.args.samples_file or '{model_base}.samples.json').format( model_base=self.model_base) self.samples, self.hash_to_ind = self.load_sample_data( self.args.samples_file, self.train) self.metrics_fiti = Fitipy(self.model_base + '.logs', 'sampling-metrics.txt') def write_sampling_metrics(self, predicted): correct = float( sum((predicted > 0.5) == (self.train[1] > 0.5)) / len(self.train[1])) print('Successfully calculated: {0:.3%}'.format(correct)) lines = self.metrics_fiti.read().lines() lines.append('{}\t{}'.format( len(self.samples) / len(self.train[1]), correct)) self.metrics_fiti.write().lines(lines) def choose_new_samples(self, predicted): failed_samples = { calc_sample_hash(inp, target) for i, (inp, pred, target) in enumerate( zip(self.train[0], predicted, self.train[1])) if (pred > 0.5) != (target > 0.5) } remaining_failed_samples = failed_samples - self.samples print('Remaining failed samples:', len(remaining_failed_samples)) return islice(remaining_failed_samples, self.args.num_sample_chunk) def run(self): print('Writing to:', self.args.samples_file) print('Writing metrics to:', self.metrics_fiti.path) for _ in range(self.args.cycles): print('Calculating on whole dataset...') predicted = self.model.predict(self.train[0]) self.samples.update(self.choose_new_samples(predicted)) Fitipy(self.args.samples_file).write().set(self.samples) print('Added', self.args.num_sample_chunk, 'samples') self.write_sampling_metrics(predicted) self.model.fit(*self.sampled_data, batch_size=self.args.batch_size, epochs=self.epoch + self.args.epochs, callbacks=self.callbacks, initial_epoch=self.epoch, validation_data=self.test)
def main(): args = create_parser(usage).parse_args() filenames = glob(join(args.folder, '*.wav')) shuffle(filenames) wav_id = -1 stream = None stop_event = Event() stop_event.set() p = PyAudio() atexit.register(p.terminate) def play_audio(audio_file): nonlocal stream if stream: stop_event.clear() stop_event.wait() stream.stop_stream() stream.close() stream = None audio = load_audio(audio_file)[-pr.buffer_samples:] audio /= 2 * min(audio.mean() + 4 * audio.std(), abs(audio).max()) stream = p.open(format=paFloat32, channels=1, rate=pr.sample_rate, output=True) stream.start_stream() def write_audio(): data = audio.astype('float32').tostring() chunk_size = 1024 for pos in range(chunk_size, len(data) + chunk_size, chunk_size): if not stop_event.is_set(): stop_event.set() return stream.write(data[pos - chunk_size:pos]) while stop_event.is_set(): sleep(chunk_size / pr.sample_rate) stop_event.set() Thread(target=write_audio, daemon=True).start() tags_file = Fitipy(args.tags_file) tags = tags_file.read().dict() def submit(): nonlocal wav_id if wav_id >= 0: tags[basename(splitext(filenames[wav_id])[0])] = float( slider.get()) tags_file.write().dict(tags) wav_id += 1 play_audio(filenames[wav_id]) submit() master = Tk() label = Label(master, text='0') label.pack() def on_slider_change(x): label['text'] = str(int(float(x))) slider = Scale(master, from_=0, to=100, command=on_slider_change) slider.pack() Button(master, text='Submit', command=submit).pack() Button(master, text='Replay', command=lambda: play_audio(filenames[wav_id])).pack() mainloop() stream.stop_stream() stream.close()
def main(): args = create_parser(usage).parse_args() num_seen_file = Fitipy(args.cache_file + '.num') topics_cache = args.cache_file + '.topics.json' if not isfile(topics_cache): print('Generating topics...') with open(topics_cache, 'w') as f: json.dump(get_keywords_uiuc(), f) with open(topics_cache) as f: topics = json.load(f) num_seen = num_seen_file.read().read(0, int) with open(args.auth_file) as f: auth = yaml.load(f) email = auth['username'] password = auth['password'] server = auth.get('pop3_host', 'pop3.' + email.split('@')[-1]) client = StatelessClass(EmailReceiver, email=email, password=password, server=server) # type: EmailReceiver print('Waiting for emails...') while True: num_messages = len(client.get_list()) if num_messages < num_seen: num_seen = num_messages num_seen_file.write().write(num_seen) if num_messages <= num_seen: time.sleep(1) continue for msg_id in range(num_seen + 1, num_messages + 1): email = client.get_email(msg_id) print('Found new email from {} titled {}.'.format( email['From'], email['Subject'])) email_txt = '\n'.join(email['text']) email_txt = BeautifulSoup(email_txt).text email_txt = re.sub(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))', '', email_txt) freq = calc_freq(email_txt, topics) tags = relevant_topics(freq) print('Found the following tags:', ', '.join(tags)) events = Event.find() matched_events = [ event for event in events if event.get('emailSrc') and SequenceMatcher(a=event['emailSrc'], b=email_txt).ratio() > 0.9 ] if matched_events: print('Ignoring, similar to {} other emails'.format( len(matched_events))) else: Event.add({ 'name': email['Subject'], 'description': email_txt, 'location': '', 'time': int(time.time()), 'tags': tags, 'emailSrc': email_txt }) num_seen += 1 num_seen_file.write().write(num_seen, str)