class SavedJson:
    """Dict that saves to disk on modifications"""

    def __init__(self, filename):
        super().__init__()
        self.filename = filename
        self.fiti = Fitipy(self.filename)
        self.data = self.fiti.read().dict()

    def __getitem__(self, item):
        return self.data[item]

    def __setitem__(self, key, value):
        changed = self.data.get(key, ...) != value
        self.data[key] = value
        if changed:
            self.fiti.write().dict(self.data)

    def __delitem__(self, key):
        del self.data[key]
        self.fiti.write().dict(self.data)

    def get(self, k):
        return self.data.get(k)

    def update(self, data=None, **kwargs):
        self.data.update(data, **kwargs)
        self.fiti.write().dict(self.data)
Пример #2
0
class FilesystemService(ServicePlugin):
    def __init__(self, rt, root=None):
        ServicePlugin.__init__(self, rt)
        self.root = root or expanduser(rt.paths.user_config)
        self.fiti = Fitipy(self.root)

        if not self.isdir(''):
            self.mkdir('')

    def read(self, *path) -> FitiReader:
        return self.fiti.read(*path)

    def write(self, *path) -> FitiWriter:
        return self.fiti.write(*path)

    def subdir(self, *path):
        return FilesystemService(self.rt, join(self.root, *path))

    def open(self, *path, mode='r'):
        return open(join(self.root, *path), mode)

    def isfile(self, *path):
        return isfile(join(self.root, *path))

    def isdir(self, *path):
        return isdir(join(self.root, *path))

    def mkdir(self, *path):
        makedirs(self.path(*path), exist_ok=True)

    def path(self, *path):
        return join(self.root, *path)
Пример #3
0
    def __init__(self, parser=None):
        parser = parser or ArgumentParser()
        add_to_parser(parser, self.usage, True)
        args = TrainData.parse_args(parser)
        self.args = args = self.process_args(args) or args

        if args.invert_samples and not args.samples_file:
            parser.error(
                'You must specify --samples-file when using --invert-samples')
        if args.samples_file and not isfile(args.samples_file):
            parser.error('No such file: ' +
                         (args.invert_samples or args.samples_file))
        if not 0.0 <= args.sensitivity <= 1.0:
            parser.error('sensitivity must be between 0.0 and 1.0')

        output_folder = os.path.join(args.folder, splitext(args.model)[0])
        if not os.path.exists(output_folder):
            print('Creating output folder:', output_folder)
            os.makedirs(output_folder)

        args.model = os.path.join(output_folder, args.model)

        inject_params(args.model)
        save_params(args.model)
        self.train, self.test = self.load_data(self.args)

        set_loss_bias(1.0 - args.sensitivity)
        params = ModelParams(skip_acc=args.no_validation,
                             extra_metrics=args.extra_metrics)
        self.model = create_model(args.model, params)
        self.model.summary()

        from keras.callbacks import ModelCheckpoint, TensorBoard
        checkpoint = ModelCheckpoint(args.model,
                                     monitor=args.metric_monitor,
                                     save_best_only=args.save_best)
        epoch_file = splitext(args.model)[0]
        epoch_file = os.path.join(epoch_file + '.epoch')
        epoch_fiti = Fitipy(epoch_file)
        self.epoch = epoch_fiti.read().read(0, int)

        def on_epoch_end(a, b):
            self.epoch += 1
            epoch_fiti.write().write(self.epoch, str)

        self.model_base = splitext(self.args.model)[0]

        if args.samples_file:
            self.samples, self.hash_to_ind = self.load_sample_data(
                args.samples_file, self.train)
        else:
            self.samples = set()
            self.hash_to_ind = {}

        self.callbacks = [
            checkpoint,
            TensorBoard(log_dir=self.model_base + '.logs', ),
            LambdaCallback(on_epoch_end=on_epoch_end)
        ]
Пример #4
0
class SampledTrainer(Trainer):
    def __init__(self):
        parser = create_parser(usage)
        super().__init__(parser)
        if self.args.invert_samples:
            parser.error('--invert-samples should be left blank')
        self.args.samples_file = (self.args.samples_file
                                  or '{model_base}.samples.json').format(
                                      model_base=self.model_base)
        self.samples, self.hash_to_ind = self.load_sample_data(
            self.args.samples_file, self.train)
        self.metrics_fiti = Fitipy(self.model_base + '.logs',
                                   'sampling-metrics.txt')

    def write_sampling_metrics(self, predicted):
        correct = float(
            sum((predicted > 0.5) == (self.train[1] > 0.5)) /
            len(self.train[1]))
        print('Successfully calculated: {0:.3%}'.format(correct))

        lines = self.metrics_fiti.read().lines()
        lines.append('{}\t{}'.format(
            len(self.samples) / len(self.train[1]), correct))
        self.metrics_fiti.write().lines(lines)

    def choose_new_samples(self, predicted):
        failed_samples = {
            calc_sample_hash(inp, target)
            for i, (inp, pred, target) in enumerate(
                zip(self.train[0], predicted, self.train[1]))
            if (pred > 0.5) != (target > 0.5)
        }
        remaining_failed_samples = failed_samples - self.samples
        print('Remaining failed samples:', len(remaining_failed_samples))
        return islice(remaining_failed_samples, self.args.num_sample_chunk)

    def run(self):
        print('Writing to:', self.args.samples_file)
        print('Writing metrics to:', self.metrics_fiti.path)
        for _ in range(self.args.cycles):
            print('Calculating on whole dataset...')
            predicted = self.model.predict(self.train[0])

            self.samples.update(self.choose_new_samples(predicted))
            Fitipy(self.args.samples_file).write().set(self.samples)
            print('Added', self.args.num_sample_chunk, 'samples')

            self.write_sampling_metrics(predicted)

            self.model.fit(*self.sampled_data,
                           batch_size=self.args.batch_size,
                           epochs=self.epoch + self.args.epochs,
                           callbacks=self.callbacks,
                           initial_epoch=self.epoch,
                           validation_data=self.test)
Пример #5
0
    def __init__(self, args):
        super().__init__(args)

        if args.invert_samples and not args.samples_file:
            raise ValueError(
                'You must specify --samples-file when using --invert-samples')
        if args.samples_file and not isfile(args.samples_file):
            raise ValueError('No such file: ' +
                             (args.invert_samples or args.samples_file))
        if not 0.0 <= args.sensitivity <= 1.0:
            raise ValueError('sensitivity must be between 0.0 and 1.0')

        inject_params(args.model)
        save_params(args.model)
        params = ModelParams(skip_acc=args.no_validation,
                             extra_metrics=args.extra_metrics,
                             loss_bias=1.0 - args.sensitivity,
                             freeze_till=args.freeze_till)
        self.model = create_model(args.model, params)
        self.train, self.test = self.load_data(self.args)

        from keras.callbacks import ModelCheckpoint, TensorBoard
        checkpoint = ModelCheckpoint(args.model,
                                     monitor=args.metric_monitor,
                                     save_best_only=args.save_best)
        epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch')
        self.epoch = epoch_fiti.read().read(0, int)

        def on_epoch_end(_a, _b):
            self.epoch += 1
            epoch_fiti.write().write(self.epoch, str)

        self.model_base = splitext(self.args.model)[0]

        if args.samples_file:
            self.samples, self.hash_to_ind = self.load_sample_data(
                args.samples_file, self.train)
        else:
            self.samples = set()
            self.hash_to_ind = {}

        self.callbacks = [
            checkpoint,
            TensorBoard(log_dir=self.model_base + '.logs', ),
            LambdaCallback(on_epoch_end=on_epoch_end)
        ]
Пример #6
0
    def __init__(self):
        parser = create_parser(usage)
        self.args = args = TrainData.parse_args(parser)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)
        self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(skip_acc=args.no_validation,
                             extra_metrics=args.extra_metrics,
                             loss_bias=1.0 - args.sensitivity)
        self.model = create_model(args.model, params)
        self.listener = Listener('',
                                 args.chunk_size,
                                 runner_cls=lambda x: None)

        from keras.callbacks import ModelCheckpoint, TensorBoard
        checkpoint = ModelCheckpoint(args.model,
                                     monitor=args.metric_monitor,
                                     save_best_only=args.save_best)
        epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch')
        self.epoch = epoch_fiti.read().read(0, int)

        def on_epoch_end(a, b):
            self.epoch += 1
            epoch_fiti.write().write(self.epoch, str)

        self.model_base = splitext(self.args.model)[0]

        self.callbacks = [
            checkpoint,
            TensorBoard(log_dir=self.model_base + '.logs', ),
            LambdaCallback(on_epoch_end=on_epoch_end)
        ]

        self.data = TrainData.from_both(args.tags_file, args.tags_folder,
                                        args.folder)
        pos_files, neg_files = self.data.train_files
        self.neg_files_it = iter(cycle(neg_files))
        self.pos_files_it = iter(cycle(pos_files))
Пример #7
0
class TrainSampledScript(TrainScript):
    usage = Usage('''
        Train a model, sampling data points with the highest loss from a larger dataset

        :-c --cycles int 200
            Number of sampling cycles of size {epoch} to run

        :-n --num-sample-chunk int 50
            Number of new samples to introduce at a time between training cycles

        :-sf --samples-file str -
            Json file to write selected samples to.
            Default = {model_base}.samples.json

        :-is --invert-samples
            Unused parameter
        ...
    ''') | TrainScript.usage

    def __init__(self, args):
        super().__init__(args)
        if self.args.invert_samples:
            raise ValueError('--invert-samples should be left blank')
        self.args.samples_file = (self.args.samples_file
                                  or '{model_base}.samples.json').format(
                                      model_base=self.model_base)
        self.samples, self.hash_to_ind = self.load_sample_data(
            self.args.samples_file, self.train)
        self.metrics_fiti = Fitipy(self.model_base + '.logs',
                                   'sampling-metrics.txt')

    def write_sampling_metrics(self, predicted):
        correct = float(
            sum((predicted > 0.5) == (self.train[1] > 0.5)) /
            len(self.train[1]))
        print('Successfully calculated: {0:.3%}'.format(correct))

        lines = self.metrics_fiti.read().lines()
        lines.append('{}\t{}'.format(
            len(self.samples) / len(self.train[1]), correct))
        self.metrics_fiti.write().lines(lines)

    def choose_new_samples(self, predicted):
        failed_samples = {
            calc_sample_hash(inp, target)
            for i, (inp, pred, target) in enumerate(
                zip(self.train[0], predicted, self.train[1]))
            if (pred > 0.5) != (target > 0.5)
        }
        remaining_failed_samples = failed_samples - self.samples
        print('Remaining failed samples:', len(remaining_failed_samples))
        return islice(remaining_failed_samples, self.args.num_sample_chunk)

    def run(self):
        print('Writing to:', self.args.samples_file)
        print('Writing metrics to:', self.metrics_fiti.path)
        for _ in range(self.args.cycles):
            print('Calculating on whole dataset...')
            predicted = self.model.predict(self.train[0])

            self.samples.update(self.choose_new_samples(predicted))
            Fitipy(self.args.samples_file).write().set(self.samples)
            print('Added', self.args.num_sample_chunk, 'samples')

            self.write_sampling_metrics(predicted)

            self.model.fit(*self.sampled_data,
                           batch_size=self.args.batch_size,
                           epochs=self.epoch + self.args.epochs,
                           callbacks=self.callbacks,
                           initial_epoch=self.epoch,
                           validation_data=self.test)
Пример #8
0
def main():
    args = create_parser(usage).parse_args()
    filenames = glob(join(args.folder, '*.wav'))
    shuffle(filenames)
    wav_id = -1
    stream = None
    stop_event = Event()
    stop_event.set()
    p = PyAudio()
    atexit.register(p.terminate)

    def play_audio(audio_file):
        nonlocal stream
        if stream:
            stop_event.clear()
            stop_event.wait()
            stream.stop_stream()
            stream.close()
            stream = None
        audio = load_audio(audio_file)[-pr.buffer_samples:]
        audio /= 2 * min(audio.mean() + 4 * audio.std(), abs(audio).max())
        stream = p.open(format=paFloat32,
                        channels=1,
                        rate=pr.sample_rate,
                        output=True)
        stream.start_stream()

        def write_audio():
            data = audio.astype('float32').tostring()
            chunk_size = 1024
            for pos in range(chunk_size, len(data) + chunk_size, chunk_size):
                if not stop_event.is_set():
                    stop_event.set()
                    return
                stream.write(data[pos - chunk_size:pos])
            while stop_event.is_set():
                sleep(chunk_size / pr.sample_rate)
            stop_event.set()

        Thread(target=write_audio, daemon=True).start()

    tags_file = Fitipy(args.tags_file)
    tags = tags_file.read().dict()

    def submit():
        nonlocal wav_id
        if wav_id >= 0:
            tags[basename(splitext(filenames[wav_id])[0])] = float(
                slider.get())
            tags_file.write().dict(tags)
        wav_id += 1
        play_audio(filenames[wav_id])

    submit()

    master = Tk()
    label = Label(master, text='0')
    label.pack()

    def on_slider_change(x):
        label['text'] = str(int(float(x)))

    slider = Scale(master, from_=0, to=100, command=on_slider_change)
    slider.pack()

    Button(master, text='Submit', command=submit).pack()
    Button(master,
           text='Replay',
           command=lambda: play_audio(filenames[wav_id])).pack()
    mainloop()
    stream.stop_stream()
    stream.close()
Пример #9
0
def main():
    args = create_parser(usage).parse_args()
    num_seen_file = Fitipy(args.cache_file + '.num')
    topics_cache = args.cache_file + '.topics.json'
    if not isfile(topics_cache):
        print('Generating topics...')
        with open(topics_cache, 'w') as f:
            json.dump(get_keywords_uiuc(), f)
    with open(topics_cache) as f:
        topics = json.load(f)

    num_seen = num_seen_file.read().read(0, int)
    with open(args.auth_file) as f:
        auth = yaml.load(f)
    email = auth['username']
    password = auth['password']
    server = auth.get('pop3_host', 'pop3.' + email.split('@')[-1])
    client = StatelessClass(EmailReceiver,
                            email=email,
                            password=password,
                            server=server)  # type: EmailReceiver

    print('Waiting for emails...')
    while True:
        num_messages = len(client.get_list())
        if num_messages < num_seen:
            num_seen = num_messages
            num_seen_file.write().write(num_seen)
        if num_messages <= num_seen:
            time.sleep(1)
            continue
        for msg_id in range(num_seen + 1, num_messages + 1):
            email = client.get_email(msg_id)
            print('Found new email from {} titled {}.'.format(
                email['From'], email['Subject']))
            email_txt = '\n'.join(email['text'])
            email_txt = BeautifulSoup(email_txt).text
            email_txt = re.sub(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))', '',
                               email_txt)

            freq = calc_freq(email_txt, topics)
            tags = relevant_topics(freq)
            print('Found the following tags:', ', '.join(tags))
            events = Event.find()
            matched_events = [
                event for event in events if event.get('emailSrc') and
                SequenceMatcher(a=event['emailSrc'], b=email_txt).ratio() > 0.9
            ]
            if matched_events:
                print('Ignoring, similar to {} other emails'.format(
                    len(matched_events)))
            else:
                Event.add({
                    'name': email['Subject'],
                    'description': email_txt,
                    'location': '',
                    'time': int(time.time()),
                    'tags': tags,
                    'emailSrc': email_txt
                })
            num_seen += 1
            num_seen_file.write().write(num_seen, str)